<?php | <?php |
//long contract period (number of weeks/days?) | //long contract period (number of weeks/days?) |
$heuristics["DATE_LONG_CONTRACT_PERIOD"] = Array( | $heuristics["DATE_LONG_CONTRACT_PERIOD"] = Array( |
"description" => "long contract period (number of weeks/days?)" | "description" => "long contract period (number of weeks/days?)" |
); | ); |
function DATE_LONG_CONTRACT_PERIOD($cn) | function DATE_LONG_CONTRACT_PERIOD($cn) |
{ | { |
$averageContractPeriod = getAverageContractPeriod(); | $averageContractPeriod = getAverageContractPeriod(); |
$stddevContractPeriod = getstddevContractPeriod(); | $stddevContractPeriod = getstddevContractPeriod(); |
$diff = strtotime($cn['contractEnd']) - strtotime($cn['contractStart']); | $diff = strtotime($cn['contractEnd']) - strtotime($cn['contractStart']); |
$days = intval($diff / (60 * 60 * 24)); | $days = intval($diff / (60 * 60 * 24)); |
$value = abs($days - $averageContractPeriod) / $stddevContractPeriod; | $value = abs($days - $averageContractPeriod) / $stddevContractPeriod; |
return Array( | return Array( |
"heuristic_value" => $value, | "heuristic_value" => $value, |
"raw_value" => $days, | "raw_value" => $days, |
"mean" => $averageContractPeriod, | "mean" => $averageContractPeriod, |
"stddev" => $stddevContractPeriod | "stddev" => $stddevContractPeriod |
); | ); |
} | } |
$averageContractPeriod; | $averageContractPeriod; |
function getAverageContractPeriod() | function getAverageContractPeriod() |
{ | { |
global $averageContractPeriod; | global $averageContractPeriod, $stddevContractPeriod; |
if (!$averageContractPeriod) { | if (!$averageContractPeriod) { |
$query = "select AVG(dateDiff(contractEnd,contractStart)) from contractnotice"; | getStddevAverageContractPeriod(); |
$result = mysql_query($query); | |
$r = mysql_fetch_array($result, MYSQL_BOTH); | |
$averageContractPeriod = $r[0]; | |
} | } |
return $averageContractPeriod; | return $averageContractPeriod; |
} | } |
$stddevContractPeriod; | $stddevContractPeriod; |
function getstddevContractPeriod() | function getstddevContractPeriod() |
{ | { |
global $stddevContractPeriod; | global $averageContractPeriod, $stddevContractPeriod; |
if (!$stddevContractPeriod) { | if (!$stddevContractPeriod) { |
$query = "select STDDEV(dateDiff(contractEnd,contractStart)) from contractnotice"; | getStddevAverageContractPeriod(); |
$result = mysql_query($query); | |
$r = mysql_fetch_array($result, MYSQL_BOTH); | |
$stddevContractPeriod = $r[0]; | |
} | } |
return $stddevContractPeriod; | return $stddevContractPeriod; |
} | } |
function getStddevAverageContractPeriod() | |
{ | |
global $averageContractPeriod, $stddevContractPeriod; | |
$query = "select AVG(dateDiff(contractEnd,contractStart)),stddev(dateDiff(contractEnd,contractStart)) from contractnotice"; | |
$result = mysql_query($query); | |
$r = mysql_fetch_array($result, MYSQL_BOTH); | |
$averageContractPeriod = $r[0]; | |
$stddevContractPeriod = $r[1]; | |
} | |
//Reported late, 45 days? A late contract is a dodgy contract except maybe for variations? | //Reported late, 45 days? A late contract is a dodgy contract except maybe for variations? |
$heuristics["DATE_REPORTED_LATE"] = Array( | $heuristics["DATE_REPORTED_LATE"] = Array( |
"description" => "Reported late, 45 days?" | "description" => "Reported late, 45 days?" |
); | ); |
function DATE_REPORTED_LATE($cn) | function DATE_REPORTED_LATE($cn) |
{ | { |
$averageDaysLate = getAverageDaysLate(); | $averageDaysLate = getAverageDaysLate(); |
$stddevDaysLate = getStddevDaysLate(); | $stddevDaysLate = getStddevDaysLate(); |
$diff = strtotime($cn['publishDate']) - strtotime($cn['contractStart']); | $diff = strtotime($cn['publishDate']) - strtotime($cn['contractStart']); |
$days = intval($diff / (60 * 60 * 24)); | $days = intval($diff / (60 * 60 * 24)); |
if ($days <= 0) { | if ($days <= 0) { |
$value = 0; | $value = 0; |
} | } |
else { | else { |
// +1 demerit for exceeding 45 day requirement | // +1 demerit for exceeding 45 day requirement |
$value = (abs($days - $averageDaysLate) / $stddevDaysLate) + ($days < 45 ? 0 : 1); | $value = (abs($days - $averageDaysLate) / $stddevDaysLate) + ($days < 45 ? 0 : 1); |
} | } |
return Array( | return Array( |
"heuristic_value" => $value, | "heuristic_value" => $value, |
"raw_value" => $days, | "raw_value" => $days, |
"mean" => $averageDaysLate, | "mean" => $averageDaysLate, |
"stddev" => $stddevDaysLate | "stddev" => $stddevDaysLate |
); | ); |
} | } |
$averageDaysLate; | $averageDaysLate; |
function getAverageDaysLate() | function getAverageDaysLate() |
{ | { |
global $averageDaysLate; | global $averageDaysLate; |
if (!$averageDaysLate) { | if (!$averageDaysLate) { |
$query = "select AVG(dateDiff(publishDate,contractStart)) from contractnotice"; | getDaysLate(); |
$result = mysql_query($query); | |
$r = mysql_fetch_array($result, MYSQL_BOTH); | |
$averageDaysLate = $r[0]; | |
} | } |
return $averageDaysLate; | return $averageDaysLate; |
} | } |
$stddevDaysLate; | $stddevDaysLate; |
function getStddevDaysLate() | function getStddevDaysLate() |
{ | { |
global $stddevDaysLate; | global $stddevDaysLate; |
if (!$stddevDaysLate) { | if (!$stddevDaysLate) { |
$query = "select STDDEV(dateDiff(publishDate,contractStart)) from contractnotice"; | getDaysLate(); |
$result = mysql_query($query); | |
$r = mysql_fetch_array($result, MYSQL_BOTH); | |
$stddevDaysLate = $r[0]; | |
} | } |
return $stddevDaysLate; | return $stddevDaysLate; |
} | } |
function getDaysLate() { | |
global $averageDaysLate,$stddevDaysLate; | |
$query = "select AVG(dateDiff(publishDate,contractStart)), STDDEV(dateDiff(publishDate,contractStart)) from contractnotice"; | |
$result = mysql_query($query); | |
$r = mysql_fetch_array($result, MYSQL_BOTH); | |
$averageDaysLate = $r[0]; | |
$stddevDaysLate = $r[1]; | |
} | |
?> | ?> |
<?php | <?php |
include_once("../lib/common.inc.php"); | include_once ("../lib/common.inc.php"); |
$heuristics = Array(); | $heuristics = Array(); |
//each heuristic adds self to description array | //each heuristic adds self to description array |
include ("dateHeuristics.php"); | include ("dateHeuristics.php"); |
//include("historyHeuristics.php"); | //include ("historyHeuristics.php"); |
//include("metadataHeuristics.php"); | //include ("metadataHeuristics.php"); |
//include("valueHeuristics.php"); | //include ("valueHeuristics.php"); |
// method signature heuristic($contractNoticeAsArray); | |
function runHeuristic($heuristicName, $cn) | function runHeuristic($heuristicName, $cn) |
{ | { |
$hresults = call_user_func($heuristicName, $cn); | // check if already ran |
if (!isset($hresults["heuristic_value"]) || !isset($hresults["raw_value"]) || !isset($hresults["mean"]) || !isset($hresults["stddev"])) { | $query = "select count(*) from heuristic_results where heuristic_name = '$heuristicName' and CNID = '{$CN['CNID']}"; |
print_r($hresults); | $result = mysql_query($query); |
die("Missing field in heurtistic $heuristicName result"); | $r = mysql_fetch_array($result); |
} | if ($r[0] == 0) { |
$query = "insert into heuristic_results values('$heuristicName', | // if not, run now |
$hresults = call_user_func($heuristicName, $cn); | |
if (!isset($hresults["heuristic_value"]) || !isset($hresults["raw_value"]) || !isset($hresults["mean"]) || !isset($hresults["stddev"])) { | |
print_r($hresults); | |
die("Missing field in heurtistic $heuristicName result"); | |
} | |
$query = "insert into heuristic_results values('$heuristicName', | |
'{$hresults["heuristic_value"]}', | '{$hresults["heuristic_value"]}', |
'{$hresults["raw_value"]}', | '{$hresults["raw_value"]}', |
'{$hresults["mean"]}', | '{$hresults["mean"]}', |
'{$hresults["stddev"]}', | '{$hresults["stddev"]}', |
'{$cn["CNID"]}', | '{$cn["CNID"]}', |
NOW(), | NOW(), |
'{$cn["publishDate"]}', | '{$cn["publishDate"]}', |
'{$cn["agencyABN"]}', | '{$cn["agencyABN"]}', |
'{$cn["supplierID"]}' | '{$cn["supplierID"]}' |
)"; | )"; |
// save value and cn data via sql | // save value and cn data via sql |
$result = mysql_query($query); | $result = mysql_query($query); |
if ($result) echo "Saved $heuristicName for {$cn["CNID"]} <br>\n"; | if ($result) echo "Saved $heuristicName for {$cn["CNID"]} <br>\n"; |
elseif (strpos(mysql_error() , "Duplicate entry") === false) echo $hresults . " failed insert.<br>" . mysql_error() . " <br> $query <br><br>\n"; | elseif (strpos(mysql_error() , "Duplicate entry") === false) echo $hresults . " failed insert.<br>" . mysql_error() . " <br> $query <br><br>\n"; |
} | |
} | } |
?> | ?> |
<?php | <?php |
// "unusual for agency/supplier due to previous low number of transactions " | $heuristics["HISTORY_LOW_TRANSACTIONS_AGENCY"] = Array( |
$heuristics["HISTORY_LOW_TRANSACTIONS"] = Array( | "description" => "unusual for agency due to previous low number of transactions " |
"description" => "unusual for agency/supplier due to previous low number of transactions " | |
); | ); |
function HISTORY_LOW_TRANSACTIONS($cn) | function HISTORY_LOW_TRANSACTIONS_AGENCY($cn) |
{ | { |
$averageContractPeriod = getAverageContractPeriod(); | $thisAgencyTransactions = getAgencyTransactions($cn['agencyName']); |
$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); | $averageAgencyTransactions = getAverageAgencyTransactions(); |
$days = intval($diff / 24); | $stddevAgencyTransactions = getstddevAgencyTransactions(); |
return ($days > 45 ? 1 : 0); | $diff = strtotime($cn['contractEnd']) - strtotime($cn['contractStart']); |
$days = intval($diff / (60 * 60 * 24)); | |
$value = abs($days - $averageAgencyTransactions) / $stddevAgencyTransactions; | |
return Array( | |
"heuristic_value" => $value, | |
"raw_value" => $days, | |
"mean" => $averageAgencyTransactions, | |
"stddev" => $stddevAgencyTransactions | |
); | |
} | } |
/* - unusual value for time of year | $agencyTransactions = Array(); |
- compare to all other records in last 2 weeks | function getAgencyTransactions($agencyName) |
- ie. many large contracts in june so takes more to standout*/ | |
$heuristics["HISTORY_HIGH_VALUE_FOR_MONTH"] = Array( | |
"description" => "unusual value for time of year"); | |
function HISTORY_HIGH_VALUE_FOR_MONTH($cn) | |
{ | { |
$averageContractPeriod = getAverageContractPeriod(); | global $agencyTransactions; |
$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); | if (!$agencyTransactions[$agencyName]) { |
$days = intval($diff / 24); | $query = 'select count(*) from contractnotice where agencyName = "' . $agencyName . '"'; |
return ($days > 45 ? 1 : 0); | $result = mysql_query($query); |
$r = mysql_fetch_array($result, MYSQL_BOTH); | |
$agencyTransactions[$agencyName] = $r[0]; | |
} | |
return $agencyTransactions[$agencyName]; | |
} | } |
$averageAgencyTransactions; | |
function getAverageAgencyTransactions() | |
{ | |
global $averageAgencyTransactions; | |
if (!$averageAgencyTransactions) { | |
getStatsAgencyTransactions(); | |
} | |
return $averageAgencyTransactions; | |
} | |
$stddevAgencyTransactions; | |
function getstddevAgencyTransactions() | |
{ | |
global $stddevAgencyTransactions; | |
if (!$stddevAgencyTransactions) { | |
getStatsAgencyTransactions(); | |
} | |
return $stddevAgencyTransactions; | |
} | |
function getStatsAgencyTransactions() | |
{ | |
global $averageAgencyTransactions, $stddevAgencyTransactions; | |
$query = "select avg(count), STDDEV(count) from (select count(*) as count | |
from contractnotice group by agencyName) as a;"; | |
$result = mysql_query($query); | |
$r = mysql_fetch_array($result, MYSQL_BOTH); | |
$averageAgencyTransactions = $r[0]; | |
$stddevAgencyTransactions = $r[1]; | |
} | |
$heuristics["HISTORY_LOW_TRANSACTIONS_SUPPLIER"] = Array( | |
"description" => "unusual for supplier due to previous low number of transactions " | |
); | |
function HISTORY_LOW_TRANSACTIONS_SUPPLIER($cn) | |
{ | |
$thisSupplierTransactions = getSupplierTransactions($cn['supplierName'], $cn['supplierABN']); | |
$averageSupplierTransactions = getAverageSupplierTransactions(); | |
$stddevSupplierTransactions = getstddevSupplierTransactions(); | |
$diff = strtotime($cn['contractEnd']) - strtotime($cn['contractStart']); | |
$days = intval($diff / (60 * 60 * 24)); | |
$value = abs($days - $averageSupplierTransactions) / $stddevSupplierTransactions; | |
return Array( | |
"heuristic_value" => $value, | |
"raw_value" => $days, | |
"mean" => $averageSupplierTransactions, | |
"stddev" => $stddevSupplierTransactions | |
); | |
} | |
$supplierTransactions = Array(); | |
function getSupplierTransactions($supplierName, $supplierABN) | |
{ | |
global $supplierTransactions; | |
if ($supplierABN != 0 && $supplierABN != "") { | |
if (!$supplierTransactions[$supplierABN]) { | |
$query = 'select count(*) from contractnotice where supplierABN = "' . $supplierABN . '"'; | |
$result = mysql_query($query); | |
$r = mysql_fetch_array($result, MYSQL_BOTH); | |
$supplierTransactions[$supplierABN] = $r[0]; | |
} | |
return $supplierTransactions[$supplierABN]; | |
} | |
if (!$supplierTransactions[$supplierName]) { | |
$query = 'select count(*) from contractnotice where supplierName = "' . $supplierName . '"'; | |
$result = mysql_query($query); | |
$r = mysql_fetch_array($result, MYSQL_BOTH); | |
$supplierTransactions[$supplierName] = $r[0]; | |
} | |
return $supplierTransactions[$supplierName]; | |
} | |
$averageSupplierTransactions; | |
function getAverageSupplierTransactions() | |
{ | |
global $averageSupplierTransactions; | |
if (!$averageSupplierTransactions) { | |
getStatsSupplierTransactions(); | |
} | |
return $averageSupplierTransactions; | |
} | |
$stddevSupplierTransactions; | |
function getstddevSupplierTransactions() | |
{ | |
global $stddevSupplierTransactions; | |
if (!$stddevSupplierTransactions) { | |
getStatsSupplierTransactions(); | |
} | |
return $stddevSupplierTransactions; | |
} | |
function getStatsSupplierTransactions() | |
{ | |
global $averageSupplierTransactions, $stddevSupplierTransactions; | |
$query = 'select avg(count), stddev(count) from (select IF(supplierABN != "",supplierABN,supplierName) as supplierID, count(*) as count from contractnotice group by supplierID) as a;'; | |
$result = mysql_query($query); | |
$r = mysql_fetch_array($result, MYSQL_BOTH); | |
$averageSupplierTransactions = $r[0]; | |
$stddevSupplierTransactions = $r[1]; | |
} | |
<?php | <?php |
/*- duplicated description | /* all |
- most duplicated overall, most duplicated per agency/category/supplier etc. */ | SELECT description, count(*) as count |
$heuristics["METADATA_DUPLICATED_DESCRIPTION"] = Array( | FROM `contractnotice` |
"description" => "unusual value for time of year"); | group by description having count > 1 order by count |
*/ | |
/*- duplicated description | |
- most duplicated overall, most duplicated per agency/category/supplier etc. */ | |
$heuristics["METADATA_DUPLICATED_DESCRIPTION"] = Array( | |
"description" => "" | |
); | |
function METADATA_DUPLICATED_DESCRIPTION($cn) | function METADATA_DUPLICATED_DESCRIPTION($cn) |
{ | { |
$averageContractPeriod = getAverageContractPeriod(); | $averageDuplicatedDescriptions = getAverageDuplicatedDescriptions(); |
$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); | $stddevDuplicatedDescriptions = getstddevDuplicatedDescriptions(); |
$days = intval($diff / 24); | $query = 'select count(*) from contractnotice where description = "' . $agencyName . '"'; |
return ($days > 45 ? 1 : 0); | $result = mysql_query($query); |
$r = mysql_fetch_array($result, MYSQL_BOTH); | |
$dupeDesc = $r[0]; | |
if ($dupeDesc == 1) $value = 0; | |
else $value = abs($dupeDesc - $averageDuplicatedDescriptions) / $stddevDuplicatedDescriptions; | |
return Array( | |
"heuristic_value" => $value, | |
"raw_value" => $dupeDesc, | |
"mean" => $averageDuplicatedDescriptions, | |
"stddev" => $stddevDuplicatedDescriptions | |
); | |
} | } |
$averageDuplicatedDescriptions; | |
function getAverageDuplicatedDescriptions() | |
{ | |
global $averageDuplicatedDescriptions; | |
if (!$averageDuplicatedDescriptions) { | |
getStatsDuplicatedDescriptions(); | |
} | |
return $averageDuplicatedDescriptions; | |
} | |
$stddevDuplicatedDescriptions; | |
function getstddevDuplicatedDescriptions() | |
{ | |
global $stddevDuplicatedDescriptions; | |
if (!$stddevDuplicatedDescriptions) { | |
getStatsDuplicatedDescriptions(); | |
} | |
return $stddevDuplicatedDescriptions; | |
} | |
function getStatsDuplicatedDescriptions() | |
{ | |
$query = "select avg(count),STDDEV(count) from ( | |
SELECT description, count(*) as count | |
FROM `contractnotice` | |
group by description having count > 1 | |
) as a;"; |