<?php | <?php |
include_once ("../lib/common.inc.php"); | include_once ("../lib/common.inc.php"); |
$heuristics = Array(); | $heuristics = Array(); |
//each heuristic adds self to description array | //each heuristic adds self to description array |
include ("dateHeuristics.php"); | include ("dateHeuristics.php"); |
include ("historyHeuristics.php"); | include ("historyHeuristics.php"); |
//include ("metadataHeuristics.php"); | //include ("metadataHeuristics.php"); |
//include ("valueHeuristics.php"); | //include ("valueHeuristics.php"); |
function runHeuristic($heuristicName, $cn) | function runHeuristic($heuristicName, $cn) { |
{ | |
global $conn; | global $conn; |
// check if already ran | // check if already ran |
$query = "select count(*) from heuristic_results where heuristic_name = '$heuristicName' and \"CNID\" = '{$cn['CNID']}'"; | $query = "select count(*) from heuristic_results where heuristic_name = '$heuristicName' and \"CNID\" = '{$cn['CNID']}'"; |
$result = $conn->query($query); | $result = $conn->query($query); |
databaseError($conn->errorInfo()); | databaseError($conn->errorInfo()); |
$r = $result->fetch(PDO::FETCH_BOTH); | $r = $result->fetch(PDO::FETCH_BOTH); |
if ($r[0] == 0) { | if ($r[0] == 0) { |
// if not, run now | // if not, run now |
$hresults = call_user_func($heuristicName, $cn); | $hresults = call_user_func($heuristicName, $cn); |
if (!isset($hresults["heuristic_value"]) || !isset($hresults["raw_value"]) || !isset($hresults["mean"]) || !isset($hresults["stddev"])) { | if (!isset($hresults["heuristic_value"]) || !isset($hresults["raw_value"]) || !isset($hresults["mean"]) || !isset($hresults["stddev"])) { |
print_r($hresults); | print_r($hresults); |
die("Missing field in heurtistic $heuristicName result"); | die("Missing field in heurtistic $heuristicName result"); |
} | } |
$query = "insert into heuristic_results values('$heuristicName', | $query = "insert into heuristic_results values('$heuristicName', |
'{$hresults["heuristic_value"]}', | '{$hresults["heuristic_value"]}', |
'{$hresults["raw_value"]}', | '{$hresults["raw_value"]}', |
'{$hresults["mean"]}', | '{$hresults["mean"]}', |
'{$hresults["stddev"]}', | '{$hresults["stddev"]}', |
'{$cn["CNID"]}', | '{$cn["CNID"]}', |
NOW(), | NOW(), |
'{$cn["publishDate"]}', | '{$cn["publishDate"]}'," |
'{$cn["agencyABN"]}', | //."'{$cn["agencyABN"]}', |
'{$cn["supplierID"]}' | ."0,'{$cn["supplierID"]}' |
)"; | )"; |
// save value and cn data via sql | // save value and cn data via sql |
$result = $conn->query($query); | $result = $conn->query($query); |
$errors = $conn->errorInfo(); | $errors = $conn->errorInfo(); |
if ($errors[2] == "") echo "Saved $heuristicName for {$cn["CNID"]} <br>\n"; | if ($errors[2] == "") |
elseif (strpos($errors[2] , "Duplicate entry") === false) echo $hresults . " failed insert.<br>" . print_r($errors,true) . " <br> $query <br><br>\n"; | echo "Saved $heuristicName for {$cn["CNID"]} <br>\n"; |
} | elseif (strpos($errors[2], "Duplicate entry") === false) |
echo $hresults . " failed insert.<br>" . print_r($errors, true) . " <br> $query <br><br>\n"; | |
} | |
} | } |
?> | ?> |
<?php | <?php |
$heuristics["HISTORY_LOW_TRANSACTIONS_AGENCY"] = Array( | $heuristics["HISTORY_LOW_TRANSACTIONS_AGENCY"] = Array( |
"description" => "unusual for agency due to previous low number of transactions " | "description" => "unusual for agency due to previous low number of transactions " |
); | ); |
function HISTORY_LOW_TRANSACTIONS_AGENCY($cn) | function HISTORY_LOW_TRANSACTIONS_AGENCY($cn) |
{ | { |
$thisAgencyTransactions = getAgencyTransactions($cn['agencyName']); | $thisAgencyTransactions = getAgencyTransactions($cn['agencyName']); |
$averageAgencyTransactions = getAverageAgencyTransactions(); | $averageAgencyTransactions = getAverageAgencyTransactions(); |
$stddevAgencyTransactions = getstddevAgencyTransactions(); | $stddevAgencyTransactions = getstddevAgencyTransactions(); |
$diff = strtotime($cn['contractEnd']) - strtotime($cn['contractStart']); | $diff = strtotime($cn['contractEnd']) - strtotime($cn['contractStart']); |
$days = intval($diff / (60 * 60 * 24)); | $days = intval($diff / (60 * 60 * 24)); |
$value = abs($days - $averageAgencyTransactions) / $stddevAgencyTransactions; | $value = abs($days - $averageAgencyTransactions) / $stddevAgencyTransactions; |
return Array( | return Array( |
"heuristic_value" => $value, | "heuristic_value" => $value, |
"raw_value" => $days, | "raw_value" => $days, |
"mean" => $averageAgencyTransactions, | "mean" => $averageAgencyTransactions, |
"stddev" => $stddevAgencyTransactions | "stddev" => $stddevAgencyTransactions |
); | ); |
} | } |
$agencyTransactions = Array(); | $agencyTransactions = Array(); |
function getAgencyTransactions($agencyName) | function getAgencyTransactions($agencyName) |
{ | { |
global $agencyTransactions; | global $agencyTransactions,$conn; |
if (!$agencyTransactions[$agencyName]) { | if (!$agencyTransactions[$agencyName]) { |
$query = 'select count(*) from contractnotice where agencyName = "' . $agencyName . '"'; | $query = 'select count(*) from contractnotice where "agencyName" = \'' . $agencyName . '"\''; |
$result = $conn->query($query); | $result = $conn->query($query); |
$r = $result->fetch(PDO::FETCH_BOTH); | $r = $result->fetch(PDO::FETCH_BOTH); |
$agencyTransactions[$agencyName] = $r[0]; | $agencyTransactions[$agencyName] = $r[0]; |
} | } |
return $agencyTransactions[$agencyName]; | return $agencyTransactions[$agencyName]; |
} | } |
$averageAgencyTransactions; | $averageAgencyTransactions; |
function getAverageAgencyTransactions() | function getAverageAgencyTransactions() |
{ | { |
global $averageAgencyTransactions; | global $averageAgencyTransactions; |
if (!$averageAgencyTransactions) { | if (!$averageAgencyTransactions) { |
getStatsAgencyTransactions(); | getStatsAgencyTransactions(); |
} | } |
return $averageAgencyTransactions; | return $averageAgencyTransactions; |
} | } |
$stddevAgencyTransactions; | $stddevAgencyTransactions; |
function getstddevAgencyTransactions() | function getstddevAgencyTransactions() |
{ | { |
global $stddevAgencyTransactions; | global $stddevAgencyTransactions; |
if (!$stddevAgencyTransactions) { | if (!$stddevAgencyTransactions) { |
getStatsAgencyTransactions(); | getStatsAgencyTransactions(); |
} | } |
return $stddevAgencyTransactions; | return $stddevAgencyTransactions; |
} | } |
function getStatsAgencyTransactions() | function getStatsAgencyTransactions() |
{ | { |
global $averageAgencyTransactions, $stddevAgencyTransactions; | global $averageAgencyTransactions, $stddevAgencyTransactions,$conn; |
$query = "select avg(count), STDDEV(count) from (select count(*) as count | $query = 'select avg(count), STDDEV(count) from (select count(*) as count |
from contractnotice group by agencyName) as a;"; | from contractnotice group by "agencyName") as a;'; |
$result = $conn->query($query); | $result = $conn->query($query); |
$r = $result->fetch(PDO::FETCH_BOTH); | $r = $result->fetch(PDO::FETCH_BOTH); |
$averageAgencyTransactions = $r[0]; | $averageAgencyTransactions = $r[0]; |
$stddevAgencyTransactions = $r[1]; | $stddevAgencyTransactions = $r[1]; |
} | } |
$heuristics["HISTORY_LOW_TRANSACTIONS_SUPPLIER"] = Array( | $heuristics["HISTORY_LOW_TRANSACTIONS_SUPPLIER"] = Array( |
"description" => "unusual for supplier due to previous low number of transactions " | "description" => "unusual for supplier due to previous low number of transactions " |
); | ); |
function HISTORY_LOW_TRANSACTIONS_SUPPLIER($cn) | function HISTORY_LOW_TRANSACTIONS_SUPPLIER($cn) |
{ | { |
$thisSupplierTransactions = getSupplierTransactions($cn['supplierName'], $cn['supplierABN']); | $thisSupplierTransactions = getSupplierTransactions($cn['supplierName'], $cn['supplierABN']); |
$averageSupplierTransactions = getAverageSupplierTransactions(); | $averageSupplierTransactions = getAverageSupplierTransactions(); |
$stddevSupplierTransactions = getstddevSupplierTransactions(); | $stddevSupplierTransactions = getstddevSupplierTransactions(); |
$diff = strtotime($cn['contractEnd']) - strtotime($cn['contractStart']); | $diff = strtotime($cn['contractEnd']) - strtotime($cn['contractStart']); |
$days = intval($diff / (60 * 60 * 24)); | $days = intval($diff / (60 * 60 * 24)); |
$value = abs($days - $averageSupplierTransactions) / $stddevSupplierTransactions; | $value = abs($days - $averageSupplierTransactions) / $stddevSupplierTransactions; |
return Array( | return Array( |
"heuristic_value" => $value, | "heuristic_value" => $value, |
"raw_value" => $days, | "raw_value" => $days, |
"mean" => $averageSupplierTransactions, | "mean" => $averageSupplierTransactions, |
"stddev" => $stddevSupplierTransactions | "stddev" => $stddevSupplierTransactions |
); | ); |
} | } |
$supplierTransactions = Array(); | $supplierTransactions = Array(); |
function getSupplierTransactions($supplierName, $supplierABN) | function getSupplierTransactions($supplierName, $supplierABN) |
{ | { |
global $supplierTransactions; | global $supplierTransactions,$conn; |
if ($supplierABN != 0 && $supplierABN != "") { | if ($supplierABN != 0 && $supplierABN != "") { |
if (!$supplierTransactions[$supplierABN]) { | if (!$supplierTransactions[$supplierABN]) { |
$query = 'select count(*) from contractnotice where supplierABN = "' . $supplierABN . '"'; | $query = "select count(*) from contractnotice where \"supplierABN\" = '" . $supplierABN . "'"; |
$result = $conn->query($query); | $result = $conn->query($query); |
$r = $result->fetch(PDO::FETCH_BOTH); | $r = $result->fetch(PDO::FETCH_BOTH); |
$supplierTransactions[$supplierABN] = $r[0]; | $supplierTransactions[$supplierABN] = $r[0]; |
} | } |
return $supplierTransactions[$supplierABN]; | return $supplierTransactions[$supplierABN]; |
} | } |
if (!$supplierTransactions[$supplierName]) { | if (!$supplierTransactions[$supplierName]) { |
$query = 'select count(*) from contractnotice where supplierName = "' . $supplierName . '"'; | $query = "select count(*) from contractnotice where \"supplierName\" = '" . $supplierName . "'"; |
$result = $conn->query($query); | $result = $conn->query($query); |
$r = $result->fetch(PDO::FETCH_BOTH); | $r = $result->fetch(PDO::FETCH_BOTH); |
$supplierTransactions[$supplierName] = $r[0]; | $supplierTransactions[$supplierName] = $r[0]; |
} | } |
return $supplierTransactions[$supplierName]; | return $supplierTransactions[$supplierName]; |
} | } |
$averageSupplierTransactions; | $averageSupplierTransactions; |
function getAverageSupplierTransactions() | function getAverageSupplierTransactions() |
{ | { |
global $averageSupplierTransactions; | global $averageSupplierTransactions; |
if (!$averageSupplierTransactions) { | if (!$averageSupplierTransactions) { |
getStatsSupplierTransactions(); | getStatsSupplierTransactions(); |
} | } |
return $averageSupplierTransactions; | return $averageSupplierTransactions; |
} | } |
$stddevSupplierTransactions; | $stddevSupplierTransactions; |
function getstddevSupplierTransactions() | function getstddevSupplierTransactions() |
{ | { |
global $stddevSupplierTransactions; | global $stddevSupplierTransactions; |
if (!$stddevSupplierTransactions) { | if (!$stddevSupplierTransactions) { |
getStatsSupplierTransactions(); | getStatsSupplierTransactions(); |
} | } |
return $stddevSupplierTransactions; | return $stddevSupplierTransactions; |
} | } |
function getStatsSupplierTransactions() | function getStatsSupplierTransactions() |
{ | { |
global $averageSupplierTransactions, $stddevSupplierTransactions; | global $averageSupplierTransactions, $stddevSupplierTransactions,$conn; |
$query = 'select avg(count), stddev(count) from (select IF(supplierABN != "",supplierABN,supplierName) as supplierID, count(*) as count from contractnotice group by supplierID) as a;'; | $query = 'select avg(count), stddev(count) from ( |
select (case when "supplierABN" != 0 THEN "supplierABN"::text ELSE "supplierName" END) as "supplierID", | |
count(*) as count from contractnotice group by "supplierID") as a;'; | |
$result = $conn->query($query); | $result = $conn->query($query); |
$r = $result->fetch(PDO::FETCH_BOTH); | $r = $result->fetch(PDO::FETCH_BOTH); |
$averageSupplierTransactions = $r[0]; | $averageSupplierTransactions = $r[0]; |
$stddevSupplierTransactions = $r[1]; | $stddevSupplierTransactions = $r[1]; |
} | } |
<?php | <?php |
if (php_sapi_name() != "cli") { | if (php_sapi_name() != "cli") { |
include_once ("../lib/common.inc.php"); | include_once ("../lib/common.inc.php"); |
auth(); | auth(); |
include_once("heuristics.inc.php"); | include_once("heuristics.inc.php"); |
$query = 'SELECT *, agency.abn as "agencyABN", case when "supplierABN" != 0 then "supplierABN"::text else "supplierName" end as "supplierID" | // agency table missing JOIN agency ON contractnotice."agencyName" |
FROM contractnotice JOIN agency ON contractnotice."agencyName"=agency."agencyName" | $query = 'SELECT *, case when "supplierABN" != 0 then "supplierABN"::text else "supplierName" end as "supplierID" |
FROM contractnotice | |
WHERE DATE("importDate") = (select * from (SELECT DATE("importDate") | WHERE DATE("importDate") = (select * from (SELECT DATE("importDate") |
FROM contractnotice ORDER BY "importDate" DESC limit 1) alias) limit 10'; | FROM contractnotice ORDER BY "importDate" DESC limit 1) alias) limit 100'; |
$query = $conn->prepare($query); | $query = $conn->prepare($query); |
$query->execute(); | $query->execute(); |
databaseError($conn->errorInfo()); | databaseError($conn->errorInfo()); |
foreach ($query->fetchAll() as $cn) { | foreach ($query->fetchAll() as $cn) { |
//get each new CN from latest update | //get each new CN from latest update |
foreach ($heuristics as $heuristic => $description) { | foreach ($heuristics as $heuristic => $description) { |
// run all heuristics | // run all heuristics |
runHeuristic($heuristic, $cn); | runHeuristic($heuristic, $cn); |
} | } |
flush(); | flush(); |
} | } |
/*foreach agency | /*foreach agency |
aggregate agency metrics | aggregate agency metrics |
foreach supplier | foreach supplier |
aggreate supplier metrics | aggreate supplier metrics |
foreach CN | foreach CN |
aggregate CN metrics */ | aggregate CN metrics */ |
} | } |
?> | ?> |
<?php | <?php |
/* - large contract value | |
- standard dev from mean/median | /* - large contract value |
- percent of total contracts for supplier/agency*/ | - standard dev from mean/median |
$heuristics["VALUE_LARGE_CONTRACT_OVERALL"] = Array( | - percent of total contracts for supplier/agency */ |
"description" => "unusual value for time of year"); | $heuristics["VALUE_LARGE_CONTRACT_OVERALL"] = Array( |
function METADATA_DUPLICATED_DESCRIPTION($cn) | "description" => "unusual value for time of year"); |
{ | |
$averageContractPeriod = getAverageContractPeriod(); | function VALUE_LARGE_CONTRACT_OVERALL($cn) { |
$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); | $averageContractPeriod = getAverageContractPeriod(); |
$days = intval($diff / 24); | $diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); |
return ($days > 45 ? 1 : 0); | $days = intval($diff / 24); |
return ($days > 45 ? 1 : 0); | |
} | } |
/* - peculiar value | /* - peculiar value |
- Just under 80k, amplified if other contracts with same supplier are just under | - Just under 80k, amplified if other contracts with same supplier are just under |
*/ | */ |
$heuristics["VALUE_NEAR_THRESHOLD"] = Array( | $heuristics["VALUE_NEAR_THRESHOLD"] = Array( |
"description" => "unusual value for time of year"); | "description" => "unusual value for time of year"); |
/* | /* |
- unusual variation amount - absolute value; large reductions as well as large increases | - unusual variation amount - absolute value; large reductions as well as large increases |
*/ | */ |
$heuristics["VALUE_LARGE_VARIATION"] = Array( | $heuristics["VALUE_LARGE_VARIATION"] = Array( |
"description" => "unusual value for time of year"); | "description" => "unusual value for time of year"); |
function METADATA_DUPLICATED_DESCRIPTION($cn) | |
{ | function VALUE_LARGE_VARIATION($cn) { |
$averageContractPeriod = getAverageContractPeriod(); | $averageContractPeriod = getAverageContractPeriod(); |
$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); | $diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); |
$days = intval($diff / 24); | $days = intval($diff / 24); |
return ($days > 45 ? 1 : 0); | return ($days > 45 ? 1 : 0); |
} | } |
/* - unusual value for time of year | /* - unusual value for time of year |
- compare to all other records in last 2 weeks | - compare to all other records in last 2 weeks |
- ie. many large contracts in june so takes more to standout*/ | - ie. many large contracts in june so takes more to standout */ |
$heuristics["VALUE_HIGH_FOR_MONTH"] = Array( | $heuristics["VALUE_HIGH_FOR_MONTH"] = Array( |
"description" => "unusual value for time of year" | "description" => "unusual value for time of year" |
); | ); |
function VALUE_HIGH_FOR_MONTH($cn, $monthAsInt) | |
{ | function VALUE_HIGH_FOR_MONTH($cn, $monthAsInt) { |
$averageContractPeriod = getAverageContractPeriod(); | $averageContractPeriod = getAverageContractPeriod(); |
$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); | $diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); |
$days = intval($diff / 24); | $days = intval($diff / 24); |
return ($days > 45 ? 1 : 0); | return ($days > 45 ? 1 : 0); |
} | } |
$monthlyValueAverage = Array(); | $monthlyValueAverage = Array(); |
function getAgencyTransactions($agencyName) | |
{ | function getAgencyTransactions($agencyName) { |
global $agencyTransactions; | global $agencyTransactions; |
if (!$agencyTransactions[$agencyName]) { | if (!$agencyTransactions[$agencyName]) { |
$query = 'select count(*) from contractnotice where agencyName = "' . $agencyName . '"'; | $query = 'select count(*) from contractnotice where agencyName = "' . $agencyName . '"'; |
$result = $conn->query($query); | $result = $conn->query($query); |
$r = $result->fetch(PDO::FETCH_BOTH); | $r = $result->fetch(PDO::FETCH_BOTH); |
$agencyTransactions[$agencyName] = $r[0]; | $agencyTransactions[$agencyName] = $r[0]; |
} | } |
return $agencyTransactions[$agencyName]; | return $agencyTransactions[$agencyName]; |
} | } |
?> | ?> |
<?php | <?php |
/*// most interesting | /*// most interesting |
SELECT sum(heuristic_value) as sum, CNID | SELECT sum(heuristic_value) as sum, CNID |
FROM heuristic_results group by CNID order by sum DESC limit 30 | FROM heuristic_results group by CNID order by sum DESC limit 30 |
// spread of values | // spread of values |
select floor(sum) as val,count(*) from (SELECT sum(heuristic_value) | select floor(sum) as val,count(*) from (SELECT sum(heuristic_value) |
as sum FROM heuristic_results group by CNID) as a group by val*/ | as sum FROM heuristic_results group by "CNID") as a group by val*/ |
$series = Array(); | $series = Array(); |
include_once("../lib/common.inc.php"); | include_once("../lib/common.inc.php"); |
$query = "select heuristic_name, floor(heuristic_value) as val,count(*) from heuristic_results group by heuristic_name, val"; | $query = "select heuristic_name, floor(heuristic_value) as val,count(*) from heuristic_results group by heuristic_name, val"; |
$result = $conn->query($query); | $result = $conn->query($query); |
foreach ($result->fetchAll() as $r) { | foreach ($result->fetchAll() as $r) { |
$series[$r["heuristic_name"]][$r["val"]] = $r[2]; | $series[$r["heuristic_name"]][$r["val"]] = $r[2]; |
} | } |
$labels = Array(); | |
foreach ($series as $value) { | |
$labels = $labels+array_keys($value); | |
} | |
$labels = Array(0,1,2,3,4,5); | |
foreach ($series as $seriesName => $seriesEntry) { | |
$data; | |
foreach ($labels as $label) { | |
$data[$label] = ($seriesEntry[$label] ? $seriesEntry[$label] : 0); | |
} | |
} | |
print_r($labels); | |
print_r($data); | |
?> | ?> |
<?php | <?php |
date_default_timezone_set("Australia/ACT"); | date_default_timezone_set("Australia/ACT"); |
error_reporting(E_AL |