Add initial date based heuristics
[contractdashboard.git] / heuristics / historyHeuristics.php
blob:a/heuristics/historyHeuristics.php -> blob:b/heuristics/historyHeuristics.php
--- a/heuristics/historyHeuristics.php
+++ b/heuristics/historyHeuristics.php
@@ -1,25 +1,126 @@
-<?php     
-    // "unusual for agency/supplier due to previous low number of transactions "
-$heuristics["HISTORY_LOW_TRANSACTIONS"] = Array(
-	"description" => "unusual for agency/supplier due to previous low number of transactions "
+<?php
+$heuristics["HISTORY_LOW_TRANSACTIONS_AGENCY"] = Array(
+	"description" => "unusual for agency due to previous low number of transactions "
 );
-function HISTORY_LOW_TRANSACTIONS($cn)
+function HISTORY_LOW_TRANSACTIONS_AGENCY($cn)
 {
-    	$averageContractPeriod = getAverageContractPeriod();
-	$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']);
-	$days = intval($diff / 24);
-	return ($days > 45 ? 1 : 0);
+	$thisAgencyTransactions = getAgencyTransactions($cn['agencyName']);
+	$averageAgencyTransactions = getAverageAgencyTransactions();
+	$stddevAgencyTransactions = getstddevAgencyTransactions();
+	$diff = strtotime($cn['contractEnd']) - strtotime($cn['contractStart']);
+	$days = intval($diff / (60 * 60 * 24));
+	$value = abs($days - $averageAgencyTransactions) / $stddevAgencyTransactions;
+	return Array(
+		"heuristic_value" => $value,
+		"raw_value" => $days,
+		"mean" => $averageAgencyTransactions,
+		"stddev" => $stddevAgencyTransactions
+	);
 }
- /*   - unusual value for time of year
-        - compare to all other records in last 2 weeks
-        - ie. many large contracts in june so takes more to standout*/
- 
- $heuristics["HISTORY_HIGH_VALUE_FOR_MONTH"] = Array(
-	"description" => "unusual value for time of year");
-function HISTORY_HIGH_VALUE_FOR_MONTH($cn)
+$agencyTransactions = Array();
+function getAgencyTransactions($agencyName)
 {
-    	$averageContractPeriod = getAverageContractPeriod();
-	$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']);
-	$days = intval($diff / 24);
-	return ($days > 45 ? 1 : 0);
+	global $agencyTransactions;
+	if (!$agencyTransactions[$agencyName]) {
+		$query = 'select count(*) from contractnotice where agencyName = "' . $agencyName . '"';
+		$result = mysql_query($query);
+		$r = mysql_fetch_array($result, MYSQL_BOTH);
+		$agencyTransactions[$agencyName] = $r[0];
+	}
+	return $agencyTransactions[$agencyName];
 }
+$averageAgencyTransactions;
+function getAverageAgencyTransactions()
+{
+	global $averageAgencyTransactions;
+	if (!$averageAgencyTransactions) {
+		getStatsAgencyTransactions();
+	}
+	return $averageAgencyTransactions;
+}
+$stddevAgencyTransactions;
+function getstddevAgencyTransactions()
+{
+	global $stddevAgencyTransactions;
+	if (!$stddevAgencyTransactions) {
+		getStatsAgencyTransactions();
+	}
+	return $stddevAgencyTransactions;
+}
+function getStatsAgencyTransactions()
+{
+	global $averageAgencyTransactions, $stddevAgencyTransactions;
+	$query = "select avg(count), STDDEV(count) from (select count(*) as count
+                from contractnotice group by agencyName) as a;";
+	$result = mysql_query($query);
+	$r = mysql_fetch_array($result, MYSQL_BOTH);
+	$averageAgencyTransactions = $r[0];
+	$stddevAgencyTransactions = $r[1];
+}
+$heuristics["HISTORY_LOW_TRANSACTIONS_SUPPLIER"] = Array(
+	"description" => "unusual for supplier due to previous low number of transactions "
+);
+function HISTORY_LOW_TRANSACTIONS_SUPPLIER($cn)
+{
+	$thisSupplierTransactions = getSupplierTransactions($cn['supplierName'], $cn['supplierABN']);
+	$averageSupplierTransactions = getAverageSupplierTransactions();
+	$stddevSupplierTransactions = getstddevSupplierTransactions();
+	$diff = strtotime($cn['contractEnd']) - strtotime($cn['contractStart']);
+	$days = intval($diff / (60 * 60 * 24));
+	$value = abs($days - $averageSupplierTransactions) / $stddevSupplierTransactions;
+	return Array(
+		"heuristic_value" => $value,
+		"raw_value" => $days,
+		"mean" => $averageSupplierTransactions,
+		"stddev" => $stddevSupplierTransactions
+	);
+}
+$supplierTransactions = Array();
+function getSupplierTransactions($supplierName, $supplierABN)
+{
+	global $supplierTransactions;
+	if ($supplierABN != 0 && $supplierABN != "") {
+		if (!$supplierTransactions[$supplierABN]) {
+			$query = 'select count(*) from contractnotice where supplierABN = "' . $supplierABN . '"';
+			$result = mysql_query($query);
+			$r = mysql_fetch_array($result, MYSQL_BOTH);
+			$supplierTransactions[$supplierABN] = $r[0];
+		}
+		return $supplierTransactions[$supplierABN];
+	}
+	if (!$supplierTransactions[$supplierName]) {
+		$query = 'select count(*) from contractnotice where supplierName = "' . $supplierName . '"';
+		$result = mysql_query($query);
+		$r = mysql_fetch_array($result, MYSQL_BOTH);
+		$supplierTransactions[$supplierName] = $r[0];
+	}
+	return $supplierTransactions[$supplierName];
+}
+$averageSupplierTransactions;
+function getAverageSupplierTransactions()
+{
+	global $averageSupplierTransactions;
+	if (!$averageSupplierTransactions) {
+		getStatsSupplierTransactions();
+	}
+	return $averageSupplierTransactions;
+}
+$stddevSupplierTransactions;
+function getstddevSupplierTransactions()
+{
+	global $stddevSupplierTransactions;
+	if (!$stddevSupplierTransactions) {
+		getStatsSupplierTransactions();
+	}
+	return $stddevSupplierTransactions;
+}
+function getStatsSupplierTransactions()
+{
+	global $averageSupplierTransactions, $stddevSupplierTransactions;
+	$query = 'select avg(count), stddev(count) from (select IF(supplierABN != "",supplierABN,supplierName) as supplierID, count(*) as count from contractnotice group by supplierID) as a;';
+	$result = mysql_query($query);
+	$r = mysql_fetch_array($result, MYSQL_BOTH);
+	$averageSupplierTransactions = $r[0];
+	$stddevSupplierTransactions = $r[1];
+}
+