<?php |
<?php |
/*- duplicated description |
/* all |
- most duplicated overall, most duplicated per agency/category/supplier etc. */ |
SELECT description, count(*) as count |
$heuristics["METADATA_DUPLICATED_DESCRIPTION"] = Array( |
FROM `contractnotice` |
"description" => "unusual value for time of year"); |
group by description having count > 1 order by count |
|
*/ |
|
/*- duplicated description |
|
- most duplicated overall, most duplicated per agency/category/supplier etc. */ |
|
$heuristics["METADATA_DUPLICATED_DESCRIPTION"] = Array( |
|
"description" => "" |
|
); |
function METADATA_DUPLICATED_DESCRIPTION($cn) |
function METADATA_DUPLICATED_DESCRIPTION($cn) |
{ |
{ |
$averageContractPeriod = getAverageContractPeriod(); |
$averageDuplicatedDescriptions = getAverageDuplicatedDescriptions(); |
$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); |
$stddevDuplicatedDescriptions = getstddevDuplicatedDescriptions(); |
$days = intval($diff / 24); |
$query = 'select count(*) from contractnotice where description = "' . $agencyName . '"'; |
return ($days > 45 ? 1 : 0); |
$result = $conn->query($query); |
|
$r = $result->fetch(PDO::FETCH_BOTH); |
|
$dupeDesc = $r[0]; |
|
if ($dupeDesc == 1) $value = 0; |
|
else $value = abs($dupeDesc - $averageDuplicatedDescriptions) / $stddevDuplicatedDescriptions; |
|
return Array( |
|
"heuristic_value" => $value, |
|
"raw_value" => $dupeDesc, |
|
"mean" => $averageDuplicatedDescriptions, |
|
"stddev" => $stddevDuplicatedDescriptions |
|
); |
} |
} |
|
$averageDuplicatedDescriptions; |
|
function getAverageDuplicatedDescriptions() |
|
{ |
|
global $averageDuplicatedDescriptions; |
|
if (!$averageDuplicatedDescriptions) { |
|
getStatsDuplicatedDescriptions(); |
|
} |
|
return $averageDuplicatedDescriptions; |
|
} |
|
$stddevDuplicatedDescriptions; |
|
function getstddevDuplicatedDescriptions() |
|
{ |
|
global $stddevDuplicatedDescriptions; |
|
if (!$stddevDuplicatedDescriptions) { |
|
getStatsDuplicatedDescriptions(); |
|
} |
|
return $stddevDuplicatedDescriptions; |
|
} |
|
function getStatsDuplicatedDescriptions() |
|
{ |
|
$query = "select avg(count),STDDEV(count) from ( |
|
SELECT description, count(*) as count |
|
FROM `contractnotice` |
|
group by description having count > 1 |
|
) as a;"; |
|
$result = $conn->query($query); |
|
$r = $result->fetch(PDO::FETCH_BOTH); |
|
$averageDuplicatedDescriptions = $r[0]; |
|
$stddevDuplicatedDescriptions = $r[1]; |
|
} |
|
|