Tidy up imports
[contractdashboard.git] / heuristics / metadataHeuristics.php
blob:a/heuristics/metadataHeuristics.php -> blob:b/heuristics/metadataHeuristics.php
- duplicated description <?php
- most duplicated overall, most duplicated per agency/category/supplier etc. /* all
  SELECT description, count(*) as count
  FROM contractnotice
  group by description having count > 1 order by count
  */
  /*- duplicated description
  - most duplicated overall, most duplicated per agency/category/supplier etc. */
  $heuristics["METADATA_DUPLICATED_DESCRIPTION"] = Array(
  "description" => ""
  );
  function METADATA_DUPLICATED_DESCRIPTION($cn)
  {
  $averageDuplicatedDescriptions = getAverageDuplicatedDescriptions();
  $stddevDuplicatedDescriptions = getstddevDuplicatedDescriptions();
  $query = 'select count(*) from contractnotice where description = "' . $agencyName . '"';
  $result = $conn->query($query);
  $r = $result->fetch(PDO::FETCH_BOTH);
  $dupeDesc = $r[0];
  if ($dupeDesc == 1) $value = 0;
  else $value = abs($dupeDesc - $averageDuplicatedDescriptions) / $stddevDuplicatedDescriptions;
  return Array(
  "heuristic_value" => $value,
  "raw_value" => $dupeDesc,
  "mean" => $averageDuplicatedDescriptions,
  "stddev" => $stddevDuplicatedDescriptions
  );
  }
  $averageDuplicatedDescriptions;
  function getAverageDuplicatedDescriptions()
  {
  global $averageDuplicatedDescriptions;
  if (!$averageDuplicatedDescriptions) {
  getStatsDuplicatedDescriptions();
  }
  return $averageDuplicatedDescriptions;
  }
  $stddevDuplicatedDescriptions;
  function getstddevDuplicatedDescriptions()
  {
  global $stddevDuplicatedDescriptions;
  if (!$stddevDuplicatedDescriptions) {
  getStatsDuplicatedDescriptions();
  }
  return $stddevDuplicatedDescriptions;
  }
  function getStatsDuplicatedDescriptions()
  {
  $query = "select avg(count),STDDEV(count) from (
  SELECT description, count(*) as count
  FROM contractnotice
  group by description having count > 1
  ) as a;";
  $result = $conn->query($query);
  $r = $result->fetch(PDO::FETCH_BOTH);
  $averageDuplicatedDescriptions = $r[0];
  $stddevDuplicatedDescriptions = $r[1];
  }