Add initial date based heuristics
Add initial date based heuristics

--- a/heuristics/dateHeuristics.php
+++ b/heuristics/dateHeuristics.php
@@ -1,4 +1,92 @@
-        - long contract period (number of weeks/days?)
-            - Reported late
-        - 45 days? A late contract is a dodgy contract except maybe for variations?
-
+<?php
+//long contract period (number of weeks/days?)
+$heuristics["DATE_LONG_CONTRACT_PERIOD"] = Array(
+	"description" => "long contract period (number of weeks/days?)"
+);
+function DATE_LONG_CONTRACT_PERIOD($cn)
+{
+	$averageContractPeriod = getAverageContractPeriod();
+	$stddevContractPeriod = getstddevContractPeriod();
+	$diff = strtotime($cn['contractEnd']) - strtotime($cn['contractStart']);
+	$days = intval($diff / (60 * 60 * 24));
+	$value = abs($days - $averageContractPeriod) / $stddevContractPeriod;
+	return Array(
+		"heuristic_value" => $value,
+		"raw_value" => $days,
+		"mean" => $averageContractPeriod,
+		"stddev" => $stddevContractPeriod
+	);
+}
+$averageContractPeriod;
+function getAverageContractPeriod()
+{
+	global $averageContractPeriod;
+	if (!$averageContractPeriod) {
+		$query = "select AVG(dateDiff(contractEnd,contractStart)) from contractnotice";
+		$result = mysql_query($query);
+		$r = mysql_fetch_array($result, MYSQL_BOTH);
+		$averageContractPeriod = $r[0];
+	}
+	return $averageContractPeriod;
+}
+$stddevContractPeriod;
+function getstddevContractPeriod()
+{
+	global $stddevContractPeriod;
+	if (!$stddevContractPeriod) {
+		$query = "select STDDEV(dateDiff(contractEnd,contractStart)) from contractnotice";
+		$result = mysql_query($query);
+		$r = mysql_fetch_array($result, MYSQL_BOTH);
+		$stddevContractPeriod = $r[0];
+	}
+	return $stddevContractPeriod;
+}
+//Reported late, 45 days? A late contract is a dodgy contract except maybe for variations?
+$heuristics["DATE_REPORTED_LATE"] = Array(
+	"description" => "Reported late, 45 days?"
+);
+function DATE_REPORTED_LATE($cn)
+{
+	$averageDaysLate = getAverageDaysLate();
+	$stddevDaysLate = getStddevDaysLate();
+	$diff = strtotime($cn['publishDate']) - strtotime($cn['contractStart']);
+	$days = intval($diff / (60 * 60 * 24));
+	if ($days <= 0) {
+		$value = 0;
+	}
+	else {
+		// +1 demerit for exceeding 45 day requirement
+		$value = (abs($days - $averageDaysLate) / $stddevDaysLate) + ($days < 45 ? 0 : 1);
+	}
+	return Array(
+		"heuristic_value" => $value,
+		"raw_value" => $days,
+		"mean" => $averageDaysLate,
+		"stddev" => $stddevDaysLate
+	);
+}
+$averageDaysLate;
+function getAverageDaysLate()
+{
+	global $averageDaysLate;
+	if (!$averageDaysLate) {
+		$query = "select AVG(dateDiff(publishDate,contractStart)) from contractnotice";
+		$result = mysql_query($query);
+		$r = mysql_fetch_array($result, MYSQL_BOTH);
+		$averageDaysLate = $r[0];
+	}
+	return $averageDaysLate;
+}
+$stddevDaysLate;
+function getStddevDaysLate()
+{
+	global $stddevDaysLate;
+	if (!$stddevDaysLate) {
+		$query = "select STDDEV(dateDiff(publishDate,contractStart)) from contractnotice";
+		$result = mysql_query($query);
+		$r = mysql_fetch_array($result, MYSQL_BOTH);
+		$stddevDaysLate = $r[0];
+	}
+	return $stddevDaysLate;
+}
+?>

--- /dev/null
+++ b/heuristics/heuristics.inc.php
@@ -1,1 +1,33 @@
-
+<?php
+  include_once("../lib/common.inc.php");
+$heuristics = Array();
+//each heuristic adds self to description array
+include ("dateHeuristics.php");
+//include("historyHeuristics.php");
+//include("metadataHeuristics.php");
+//include("valueHeuristics.php");
+// method signature heuristic($contractNoticeAsArray);
+function runHeuristic($heuristicName, $cn)
+{
+	$hresults = call_user_func($heuristicName, $cn);
+	if (!isset($hresults["heuristic_value"]) || !isset($hresults["raw_value"]) || !isset($hresults["mean"]) || !isset($hresults["stddev"])) {
+		print_r($hresults);
+		die("Missing field in heurtistic $heuristicName result");
+	}
+	$query = "insert into heuristic_results values('$heuristicName',
+    '{$hresults["heuristic_value"]}',
+     '{$hresults["raw_value"]}',
+      '{$hresults["mean"]}',
+       '{$hresults["stddev"]}',
+           '{$cn["CNID"]}',
+           NOW(),
+           '{$cn["publishDate"]}',
+           '{$cn["agencyABN"]}',
+           '{$cn["supplierID"]}'
+    )";
+	// save value and cn data via sql
+	$result = mysql_query($query);
+	if ($result) echo "Saved $heuristicName for {$cn["CNID"]} <br>\n";
+	elseif (strpos(mysql_error() , "Duplicate entry") === false) echo $hresults . " failed insert.<br>" . mysql_error() . " <br>  $query <br><br>\n";
+}
+?>

--- a/heuristics/historyHeuristics.php
+++ b/heuristics/historyHeuristics.php
@@ -1,6 +1,25 @@
-      - unusual for agency/supplier
-        - previous low number of transactions 
-          - zero ie. new agency/supplier is huge score
-    - unusual value for time of year
+<?php     
+    // "unusual for agency/supplier due to previous low number of transactions "
+$heuristics["HISTORY_LOW_TRANSACTIONS"] = Array(
+	"description" => "unusual for agency/supplier due to previous low number of transactions "
+);
+function HISTORY_LOW_TRANSACTIONS($cn)
+{
+    	$averageContractPeriod = getAverageContractPeriod();
+	$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']);
+	$days = intval($diff / 24);
+	return ($days > 45 ? 1 : 0);
+}
+ /*   - unusual value for time of year
         - compare to all other records in last 2 weeks
-        - ie. many large contracts in june so takes more to standout
+        - ie. many large contracts in june so takes more to standout*/
+ 
+ $heuristics["HISTORY_HIGH_VALUE_FOR_MONTH"] = Array(
+	"description" => "unusual value for time of year");
+function HISTORY_HIGH_VALUE_FOR_MONTH($cn)
+{
+    	$averageContractPeriod = getAverageContractPeriod();
+	$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']);
+	$days = intval($diff / 24);
+	return ($days > 45 ? 1 : 0);
+}

--- a/heuristics/metadataHeuristics.php
+++ b/heuristics/metadataHeuristics.php
@@ -1,3 +1,12 @@
-      - duplicated description
-        - most duplicated overall, most duplicated per agency/category/supplier etc.
-  
+ <?php
+ /*- duplicated description
+        - most duplicated overall, most duplicated per agency/category/supplier etc. */
+   $heuristics["METADATA_DUPLICATED_DESCRIPTION"] = Array(
+	"description" => "unusual value for time of year");
+function METADATA_DUPLICATED_DESCRIPTION($cn)
+{
+    	$averageContractPeriod = getAverageContractPeriod();
+	$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']);
+	$days = intval($diff / 24);
+	return ($days > 45 ? 1 : 0);
+}

--- a/heuristics/runHeuristics.php
+++ b/heuristics/runHeuristics.php
@@ -1,6 +1,29 @@
-<?
-if agency
-if supplier
-if CN
+<?php
+include_once("heuristics.inc.php");
+$query = "SELECT *, agency.abn as agencyABN, IF(supplierABN != '',supplierABN,supplierName) as supplierID
+FROM contractnotice JOIN agency ON contractnotice.agencyName=agency.agencyName
+WHERE  DATE(importDate) = (select * from (SELECT DATE(importDate) 
+FROM contractnotice ORDER BY importDate DESC limit 1) alias)";
+$result = mysql_query($query);
+if (!$result) echo mysql_error().$query;
+while ($cn = mysql_fetch_array($result, MYSQL_BOTH)) {
+	//get each new CN from latest update
+	foreach ($heuristics as $heuristic => $description) {
+		// run all heuristics
+		runHeuristic($heuristic, $cn);
+	}
+        flush();
+}
+/*foreach agency
+
+aggregate agency metrics
+
+foreach supplier
+
+aggreate supplier metrics
+
+foreach CN
+
+aggregate CN metrics */
 ?>
 

--- a/heuristics/valueHeuristics.php
+++ b/heuristics/valueHeuristics.php
@@ -2,9 +2,27 @@
         - large contract value
           - chi-square test for outliers / standard dev from mean/median
           - percent of total contracts for supplier/agency
-
+   $heuristics["METADATA_DUPLICATED_DESCRIPTION"] = Array(
+	"description" => "unusual value for time of year");
+function METADATA_DUPLICATED_DESCRIPTION($cn)
+{
+    	$averageContractPeriod = getAverageContractPeriod();
+	$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']);
+	$days = intval($diff / 24);
+	return ($days > 45 ? 1 : 0);
+}
 
         - peculiar value
         - Just under 80k, amplified if other contracts with same supplier are just under
     - unusual variation amount
         - absolute value; large reductions as well as large increases
+        
+           $heuristics["METADATA_DUPLICATED_DESCRIPTION"] = Array(
+	"description" => "unusual value for time of year");
+function METADATA_DUPLICATED_DESCRIPTION($cn)
+{
+    	$averageContractPeriod = getAverageContractPeriod();
+	$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']);
+	$days = intval($diff / 24);
+	return ($days > 45 ? 1 : 0);
+}