--- a/heuristics/metadataHeuristics.php +++ b/heuristics/metadataHeuristics.php @@ -1,12 +1,59 @@ "unusual value for time of year"); +/* all + SELECT description, count(*) as count +FROM contractnotice +group by description having count > 1 order by count +*/ +/*- duplicated description + - most duplicated overall, most duplicated per agency/category/supplier etc. */ +$heuristics["METADATA_DUPLICATED_DESCRIPTION"] = Array( + "description" => "" +); function METADATA_DUPLICATED_DESCRIPTION($cn) { - $averageContractPeriod = getAverageContractPeriod(); - $diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); - $days = intval($diff / 24); - return ($days > 45 ? 1 : 0); + $averageDuplicatedDescriptions = getAverageDuplicatedDescriptions(); + $stddevDuplicatedDescriptions = getstddevDuplicatedDescriptions(); + $query = 'select count(*) from contractnotice where description = "' . $agencyName . '"'; + $result = $conn->query($query); + $r = $result->fetch(PDO::FETCH_BOTH); + $dupeDesc = $r[0]; + if ($dupeDesc == 1) $value = 0; + else $value = abs($dupeDesc - $averageDuplicatedDescriptions) / $stddevDuplicatedDescriptions; + return Array( + "heuristic_value" => $value, + "raw_value" => $dupeDesc, + "mean" => $averageDuplicatedDescriptions, + "stddev" => $stddevDuplicatedDescriptions + ); } +$averageDuplicatedDescriptions; +function getAverageDuplicatedDescriptions() +{ + global $averageDuplicatedDescriptions; + if (!$averageDuplicatedDescriptions) { + getStatsDuplicatedDescriptions(); + } + return $averageDuplicatedDescriptions; +} +$stddevDuplicatedDescriptions; +function getstddevDuplicatedDescriptions() +{ + global $stddevDuplicatedDescriptions; + if (!$stddevDuplicatedDescriptions) { + getStatsDuplicatedDescriptions(); + } + return $stddevDuplicatedDescriptions; +} +function getStatsDuplicatedDescriptions() +{ + $query = "select avg(count),STDDEV(count) from ( + SELECT description, count(*) as count +FROM contractnotice +group by description having count > 1 + ) as a;"; + $result = $conn->query($query); + $r = $result->fetch(PDO::FETCH_BOTH); + $averageDuplicatedDescriptions = $r[0]; + $stddevDuplicatedDescriptions = $r[1]; +} +