1 order by count */ /*- duplicated description - most duplicated overall, most duplicated per agency/category/supplier etc. */ $heuristics["METADATA_DUPLICATED_DESCRIPTION"] = Array( "description" => "" ); function METADATA_DUPLICATED_DESCRIPTION($cn) { $averageDuplicatedDescriptions = getAverageDuplicatedDescriptions(); $stddevDuplicatedDescriptions = getstddevDuplicatedDescriptions(); $query = 'select count(*) from contractnotice where description = "' . $agencyName . '"'; $result = $conn->query($query); $r = $result->fetch(PDO::FETCH_BOTH); $dupeDesc = $r[0]; if ($dupeDesc == 1) $value = 0; else $value = abs($dupeDesc - $averageDuplicatedDescriptions) / $stddevDuplicatedDescriptions; return Array( "heuristic_value" => $value, "raw_value" => $dupeDesc, "mean" => $averageDuplicatedDescriptions, "stddev" => $stddevDuplicatedDescriptions ); } $averageDuplicatedDescriptions; function getAverageDuplicatedDescriptions() { global $averageDuplicatedDescriptions; if (!$averageDuplicatedDescriptions) { getStatsDuplicatedDescriptions(); } return $averageDuplicatedDescriptions; } $stddevDuplicatedDescriptions; function getstddevDuplicatedDescriptions() { global $stddevDuplicatedDescriptions; if (!$stddevDuplicatedDescriptions) { getStatsDuplicatedDescriptions(); } return $stddevDuplicatedDescriptions; } function getStatsDuplicatedDescriptions() { $query = "select avg(count),STDDEV(count) from ( SELECT description, count(*) as count FROM contractnotice group by description having count > 1 ) as a;"; $result = $conn->query($query); $r = $result->fetch(PDO::FETCH_BOTH); $averageDuplicatedDescriptions = $r[0]; $stddevDuplicatedDescriptions = $r[1]; }