--- a/heuristics/historyHeuristics.php +++ b/heuristics/historyHeuristics.php @@ -1,25 +1,126 @@ -<?php - // "unusual for agency/supplier due to previous low number of transactions " -$heuristics["HISTORY_LOW_TRANSACTIONS"] = Array( - "description" => "unusual for agency/supplier due to previous low number of transactions " +<?php +$heuristics["HISTORY_LOW_TRANSACTIONS_AGENCY"] = Array( + "description" => "unusual for agency due to previous low number of transactions " ); -function HISTORY_LOW_TRANSACTIONS($cn) +function HISTORY_LOW_TRANSACTIONS_AGENCY($cn) { - $averageContractPeriod = getAverageContractPeriod(); - $diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); - $days = intval($diff / 24); - return ($days > 45 ? 1 : 0); + $thisAgencyTransactions = getAgencyTransactions($cn['agencyName']); + $averageAgencyTransactions = getAverageAgencyTransactions(); + $stddevAgencyTransactions = getstddevAgencyTransactions(); + $diff = strtotime($cn['contractEnd']) - strtotime($cn['contractStart']); + $days = intval($diff / (60 * 60 * 24)); + $value = abs($days - $averageAgencyTransactions) / $stddevAgencyTransactions; + return Array( + "heuristic_value" => $value, + "raw_value" => $days, + "mean" => $averageAgencyTransactions, + "stddev" => $stddevAgencyTransactions + ); } - /* - unusual value for time of year - - compare to all other records in last 2 weeks - - ie. many large contracts in june so takes more to standout*/ - - $heuristics["HISTORY_HIGH_VALUE_FOR_MONTH"] = Array( - "description" => "unusual value for time of year"); -function HISTORY_HIGH_VALUE_FOR_MONTH($cn) +$agencyTransactions = Array(); +function getAgencyTransactions($agencyName) { - $averageContractPeriod = getAverageContractPeriod(); - $diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); - $days = intval($diff / 24); - return ($days > 45 ? 1 : 0); + global $agencyTransactions; + if (!$agencyTransactions[$agencyName]) { + $query = 'select count(*) from contractnotice where agencyName = "' . $agencyName . '"'; + $result = mysql_query($query); + $r = mysql_fetch_array($result, MYSQL_BOTH); + $agencyTransactions[$agencyName] = $r[0]; + } + return $agencyTransactions[$agencyName]; } +$averageAgencyTransactions; +function getAverageAgencyTransactions() +{ + global $averageAgencyTransactions; + if (!$averageAgencyTransactions) { + getStatsAgencyTransactions(); + } + return $averageAgencyTransactions; +} +$stddevAgencyTransactions; +function getstddevAgencyTransactions() +{ + global $stddevAgencyTransactions; + if (!$stddevAgencyTransactions) { + getStatsAgencyTransactions(); + } + return $stddevAgencyTransactions; +} +function getStatsAgencyTransactions() +{ + global $averageAgencyTransactions, $stddevAgencyTransactions; + $query = "select avg(count), STDDEV(count) from (select count(*) as count + from contractnotice group by agencyName) as a;"; + $result = mysql_query($query); + $r = mysql_fetch_array($result, MYSQL_BOTH); + $averageAgencyTransactions = $r[0]; + $stddevAgencyTransactions = $r[1]; +} +$heuristics["HISTORY_LOW_TRANSACTIONS_SUPPLIER"] = Array( + "description" => "unusual for supplier due to previous low number of transactions " +); +function HISTORY_LOW_TRANSACTIONS_SUPPLIER($cn) +{ + $thisSupplierTransactions = getSupplierTransactions($cn['supplierName'], $cn['supplierABN']); + $averageSupplierTransactions = getAverageSupplierTransactions(); + $stddevSupplierTransactions = getstddevSupplierTransactions(); + $diff = strtotime($cn['contractEnd']) - strtotime($cn['contractStart']); + $days = intval($diff / (60 * 60 * 24)); + $value = abs($days - $averageSupplierTransactions) / $stddevSupplierTransactions; + return Array( + "heuristic_value" => $value, + "raw_value" => $days, + "mean" => $averageSupplierTransactions, + "stddev" => $stddevSupplierTransactions + ); +} +$supplierTransactions = Array(); +function getSupplierTransactions($supplierName, $supplierABN) +{ + global $supplierTransactions; + if ($supplierABN != 0 && $supplierABN != "") { + if (!$supplierTransactions[$supplierABN]) { + $query = 'select count(*) from contractnotice where supplierABN = "' . $supplierABN . '"'; + $result = mysql_query($query); + $r = mysql_fetch_array($result, MYSQL_BOTH); + $supplierTransactions[$supplierABN] = $r[0]; + } + return $supplierTransactions[$supplierABN]; + } + if (!$supplierTransactions[$supplierName]) { + $query = 'select count(*) from contractnotice where supplierName = "' . $supplierName . '"'; + $result = mysql_query($query); + $r = mysql_fetch_array($result, MYSQL_BOTH); + $supplierTransactions[$supplierName] = $r[0]; + } + return $supplierTransactions[$supplierName]; +} +$averageSupplierTransactions; +function getAverageSupplierTransactions() +{ + global $averageSupplierTransactions; + if (!$averageSupplierTransactions) { + getStatsSupplierTransactions(); + } + return $averageSupplierTransactions; +} +$stddevSupplierTransactions; +function getstddevSupplierTransactions() +{ + global $stddevSupplierTransactions; + if (!$stddevSupplierTransactions) { + getStatsSupplierTransactions(); + } + return $stddevSupplierTransactions; +} +function getStatsSupplierTransactions() +{ + global $averageSupplierTransactions, $stddevSupplierTransactions; + $query = 'select avg(count), stddev(count) from (select IF(supplierABN != "",supplierABN,supplierName) as supplierID, count(*) as count from contractnotice group by supplierID) as a;'; + $result = mysql_query($query); + $r = mysql_fetch_array($result, MYSQL_BOTH); + $averageSupplierTransactions = $r[0]; + $stddevSupplierTransactions = $r[1]; +} +