Beginnings of Heuristics work
[contractdashboard.git] / heuristics / metadataHeuristics.php
blob:a/heuristics/metadataHeuristics.php -> blob:b/heuristics/metadataHeuristics.php
--- a/heuristics/metadataHeuristics.php
+++ b/heuristics/metadataHeuristics.php
@@ -1,1 +1,59 @@
+ <?php
+/* all
+   SELECT description, count(*) as count
+FROM contractnotice 
+group by description having count > 1 order by count
+*/
+/*- duplicated description
+ - most duplicated overall, most duplicated per agency/category/supplier etc. */
+$heuristics["METADATA_DUPLICATED_DESCRIPTION"] = Array(
+	"description" => ""
+);
+function METADATA_DUPLICATED_DESCRIPTION($cn)
+{
+	$averageDuplicatedDescriptions = getAverageDuplicatedDescriptions();
+	$stddevDuplicatedDescriptions = getstddevDuplicatedDescriptions();
+	$query = 'select count(*) from contractnotice where description = "' . $agencyName . '"';
+    $result = $conn->query($query);
+    $r = $result->fetch(PDO::FETCH_BOTH);
+	$dupeDesc = $r[0];
+        if ($dupeDesc == 1) $value = 0;
+	else $value = abs($dupeDesc - $averageDuplicatedDescriptions) / $stddevDuplicatedDescriptions;
+	return Array(
+		"heuristic_value" => $value,
+		"raw_value" => $dupeDesc,
+		"mean" => $averageDuplicatedDescriptions,
+		"stddev" => $stddevDuplicatedDescriptions
+	);
+}
+$averageDuplicatedDescriptions;
+function getAverageDuplicatedDescriptions()
+{
+	global $averageDuplicatedDescriptions;
+	if (!$averageDuplicatedDescriptions) {
+		getStatsDuplicatedDescriptions();
+	}
+	return $averageDuplicatedDescriptions;
+}
+$stddevDuplicatedDescriptions;
+function getstddevDuplicatedDescriptions()
+{
+	global $stddevDuplicatedDescriptions;
+	if (!$stddevDuplicatedDescriptions) {
+		getStatsDuplicatedDescriptions();
+	}
+	return $stddevDuplicatedDescriptions;
+}
+function getStatsDuplicatedDescriptions()
+{
+	$query = "select avg(count),STDDEV(count) from (
+        SELECT description, count(*) as count
+FROM contractnotice 
+group by description having count > 1 
+        ) as a;";
+    $result = $conn->query($query);
+    $r = $result->fetch(PDO::FETCH_BOTH);
+	$averageDuplicatedDescriptions = $r[0];
+	$stddevDuplicatedDescriptions = $r[1];
+}