- long contract period (number of weeks/days?) | <?php |
- Reported late | //long contract period (number of weeks/days?) |
- 45 days? A late contract is a dodgy contract except maybe for variations? | $heuristics["DATE_LONG_CONTRACT_PERIOD"] = Array( |
"description" => "long contract period (number of weeks/days?)" | |
); | |
function DATE_LONG_CONTRACT_PERIOD($cn) | |
{ | |
$averageContractPeriod = getAverageContractPeriod(); | |
$stddevContractPeriod = getstddevContractPeriod(); | |
$diff = strtotime($cn['contractEnd']) - strtotime($cn['contractStart']); | |
$days = intval($diff / (60 * 60 * 24)); | |
$value = abs($days - $averageContractPeriod) / $stddevContractPeriod; | |
return Array( | |
"heuristic_value" => $value, | |
"raw_value" => $days, | |
"mean" => $averageContractPeriod, | |
"stddev" => $stddevContractPeriod | |
); | |
} | |
$averageContractPeriod; | |
function getAverageContractPeriod() | |
{ | |
global $averageContractPeriod; | |
if (!$averageContractPeriod) { | |
$query = "select AVG(dateDiff(contractEnd,contractStart)) from contractnotice"; | |
$result = mysql_query($query); | |
$r = mysql_fetch_array($result, MYSQL_BOTH); | |
$averageContractPeriod = $r[0]; | |
} | |
return $averageContractPeriod; | |
} | |
$stddevContractPeriod; | |
function getstddevContractPeriod() | |
{ | |
global $stddevContractPeriod; | |
if (!$stddevContractPeriod) { | |
$query = "select STDDEV(dateDiff(contractEnd,contractStart)) from contractnotice"; | |
$result = mysql_query($query); | |
$r = mysql_fetch_array($result, MYSQL_BOTH); | |
$stddevContractPeriod = $r[0]; | |
} | |
return $stddevContractPeriod; | |
} | |
//Reported late, 45 days? A late contract is a dodgy contract except maybe for variations? | |
$heuristics["DATE_REPORTED_LATE"] = Array( | |
"description" => "Reported late, 45 days?" | |
); | |
function DATE_REPORTED_LATE($cn) | |
{ | |
$averageDaysLate = getAverageDaysLate(); | |
$stddevDaysLate = getStddevDaysLate(); | |
$diff = strtotime($cn['publishDate']) - strtotime($cn['contractStart']); | |
$days = intval($diff / (60 * 60 * 24)); | |
if ($days <= 0) { | |
$value = 0; | |
} | |
else { | |
// +1 demerit for exceeding 45 day requirement | |
$value = (abs($days - $averageDaysLate) / $stddevDaysLate) + ($days < 45 ? 0 : 1); | |
} | |
return Array( | |
"heuristic_value" => $value, | |
"raw_value" => $days, | |
"mean" => $averageDaysLate, | |
"stddev" => $stddevDaysLate | |
); | |
} | |
$averageDaysLate; | |
function getAverageDaysLate() | |
{ | |
global $averageDaysLate; | |
if (!$averageDaysLate) { | |
$query = "select AVG(dateDiff(publishDate,contractStart)) from contractnotice"; | |
$result = mysql_query($query); | |
$r = mysql_fetch_array($result, MYSQL_BOTH); | |
$averageDaysLate = $r[0]; | |
} | |
return $averageDaysLate; | |
} | |
$stddevDaysLate; | |
function getStddevDaysLate() | |
{ | |
global $stddevDaysLate; | |
if (!$stddevDaysLate) { | |
$query = "select STDDEV(dateDiff(publishDate,contractStart)) from contractnotice"; | |
$result = mysql_query($query); | |
$r = mysql_fetch_array($result, MYSQL_BOTH); | |
$stddevDaysLate = $r[0]; | |
} | |
return $stddevDaysLate; | |
} | |
?> |
<?php | |
include_once("../lib/common.inc.php"); | |
$heuristics = Array(); | |
//each heuristic adds self to description array | |
include ("dateHeuristics.php"); | |
//include("historyHeuristics.php"); | |
//include("metadataHeuristics.php"); | |
//include("valueHeuristics.php"); | |
// method signature heuristic($contractNoticeAsArray); | |
function runHeuristic($heuristicName, $cn) | |
{ | |
$hresults = call_user_func($heuristicName, $cn); | |
if (!isset($hresults["heuristic_value"]) || !isset($hresults["raw_value"]) || !isset($hresults["mean"]) || !isset($hresults["stddev"])) { | |
print_r($hresults); | |
die("Missing field in heurtistic $heuristicName result"); | |
} | |
$query = "insert into heuristic_results values('$heuristicName', | |
'{$hresults["heuristic_value"]}', | |
'{$hresults["raw_value"]}', | |
'{$hresults["mean"]}', | |
'{$hresults["stddev"]}', | |
'{$cn["CNID"]}', | |
NOW(), | |
'{$cn["publishDate"]}', | |
'{$cn["agencyABN"]}', | |
'{$cn["supplierID"]}' | |
)"; | |
// save value and cn data via sql | |
$result = mysql_query($query); | |
if ($result) echo "Saved $heuristicName for {$cn["CNID"]} <br>\n"; | |
elseif (strpos(mysql_error() , "Duplicate entry") === false) echo $hresults . " failed insert.<br>" . mysql_error() . " <br> $query <br><br>\n"; | |
} | |
?> |
- unusual for agency/supplier | <?php |
- previous low number of transactions | // "unusual for agency/supplier due to previous low number of transactions " |
- zero ie. new agency/supplier is huge score | $heuristics["HISTORY_LOW_TRANSACTIONS"] = Array( |
- unusual value for time of year | "description" => "unusual for agency/supplier due to previous low number of transactions " |
); | |
function HISTORY_LOW_TRANSACTIONS($cn) | |
{ | |
$averageContractPeriod = getAverageContractPeriod(); | |
$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); | |
$days = intval($diff / 24); | |
return ($days > 45 ? 1 : 0); | |
} | |
/* - unusual value for time of year | |
- compare to all other records in last 2 weeks | - compare to all other records in last 2 weeks |
- ie. many large contracts in june so takes more to standout | - ie. many large contracts in june so takes more to standout*/ |
$heuristics["HISTORY_HIGH_VALUE_FOR_MONTH"] = Array( | |
"description" => "unusual value for time of year"); | |
function HISTORY_HIGH_VALUE_FOR_MONTH($cn) | |
{ | |
$averageContractPeriod = getAverageContractPeriod(); | |
$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); | |
$days = intval($diff / 24); | |
return ($days > 45 ? 1 : 0); | |
} |
- duplicated description | <?php |
- most duplicated overall, most duplicated per agency/category/supplier etc. | /*- duplicated description |
- most duplicated overall, most duplicated per agency/category/supplier etc. */ | |
$heuristics["METADATA_DUPLICATED_DESCRIPTION"] = Array( | |
"description" => "unusual value for time of year"); | |
function METADATA_DUPLICATED_DESCRIPTION($cn) | |
{ | |
$averageContractPeriod = getAverageContractPeriod(); | |
$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); | |
$days = intval($diff / 24); | |
return ($days > 45 ? 1 : 0); | |
} |
<? | <?php |
if agency | include_once("heuristics.inc.php"); |
if supplier | $query = "SELECT *, agency.abn as agencyABN, IF(supplierABN != '',supplierABN,supplierName) as supplierID |
if CN | FROM contractnotice JOIN agency ON contractnotice.agencyName=agency.agencyName |
WHERE DATE(importDate) = (select * from (SELECT DATE(importDate) | |
FROM contractnotice ORDER BY importDate DESC limit 1) alias)"; | |
$result = mysql_query($query); | |
if (!$result) echo mysql_error().$query; | |
while ($cn = mysql_fetch_array($result, MYSQL_BOTH)) { | |
//get each new CN from latest update | |
foreach ($heuristics as $heuristic => $description) { | |
// run all heuristics | |
runHeuristic($heuristic, $cn); | |
} | |
flush(); | |
} | |
/*foreach agency | |
aggregate agency metrics | |
foreach supplier | |
aggreate supplier metrics | |
foreach CN | |
aggregate CN metrics */ | |
?> | ?> |
- large contract value | - large contract value |
- chi-square test for outliers / standard dev from mean/median | - chi-square test for outliers / standard dev from mean/median |
- percent of total contracts for supplier/agency | - percent of total contracts for supplier/agency |
$heuristics["METADATA_DUPLICATED_DESCRIPTION"] = Array( | |
"description" => "unusual value for time of year"); | |
function METADATA_DUPLICATED_DESCRIPTION($cn) | |
{ | |
$averageContractPeriod = getAverageContractPeriod(); | |
$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); | |
$days = intval($diff / 24); | |
return ($days > 45 ? 1 : 0); | |
} | |
- peculiar value | - peculiar value |
- Just under 80k, amplified if other contracts with same supplier are just under | - Just under 80k, amplified if other contracts with same supplier are just under |
- unusual variation amount | - unusual variation amount |
- absolute value; large reductions as well as large increases | - absolute value; large reductions as well as large increases |
$heuristics["METADATA_DUPLICATED_DESCRIPTION"] = Array( | |
"description" => "unusual value for time of year"); | |
function METADATA_DUPLICATED_DESCRIPTION($cn) | |
{ | |
$averageContractPeriod = getAverageContractPeriod(); | |
$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); | |
$days = intval($diff / 24); | |
return ($days > 45 ? 1 : 0); | |
} |