Add initial date based heuristics
[contractdashboard.git] / heuristics / metadataHeuristics.php
blob:a/heuristics/metadataHeuristics.php -> blob:b/heuristics/metadataHeuristics.php
<?php <?php
/*- duplicated description /* all
- most duplicated overall, most duplicated per agency/category/supplier etc. */ SELECT description, count(*) as count
$heuristics["METADATA_DUPLICATED_DESCRIPTION"] = Array( FROM `contractnotice`
"description" => "unusual value for time of year"); group by description having count > 1 order by count
  */
  /*- duplicated description
  - most duplicated overall, most duplicated per agency/category/supplier etc. */
  $heuristics["METADATA_DUPLICATED_DESCRIPTION"] = Array(
  "description" => ""
  );
function METADATA_DUPLICATED_DESCRIPTION($cn) function METADATA_DUPLICATED_DESCRIPTION($cn)
{ {
$averageContractPeriod = getAverageContractPeriod(); $averageDuplicatedDescriptions = getAverageDuplicatedDescriptions();
$diff = strtotime($cn['contractStart']) - strtotime($cn['publishDate']); $stddevDuplicatedDescriptions = getstddevDuplicatedDescriptions();
$days = intval($diff / 24); $query = 'select count(*) from contractnotice where description = "' . $agencyName . '"';
return ($days > 45 ? 1 : 0); $result = $conn->query($query);
  $r = $result->fetch(PDO::FETCH_BOTH);
  $dupeDesc = $r[0];
  if ($dupeDesc == 1) $value = 0;
  else $value = abs($dupeDesc - $averageDuplicatedDescriptions) / $stddevDuplicatedDescriptions;
  return Array(
  "heuristic_value" => $value,
  "raw_value" => $dupeDesc,
  "mean" => $averageDuplicatedDescriptions,
  "stddev" => $stddevDuplicatedDescriptions
  );
} }
  $averageDuplicatedDescriptions;
  function getAverageDuplicatedDescriptions()
  {
  global $averageDuplicatedDescriptions;
  if (!$averageDuplicatedDescriptions) {
  getStatsDuplicatedDescriptions();
  }
  return $averageDuplicatedDescriptions;
  }
  $stddevDuplicatedDescriptions;
  function getstddevDuplicatedDescriptions()
  {
  global $stddevDuplicatedDescriptions;
  if (!$stddevDuplicatedDescriptions) {
  getStatsDuplicatedDescriptions();
  }
  return $stddevDuplicatedDescriptions;
  }
  function getStatsDuplicatedDescriptions()
  {
  $query = "select avg(count),STDDEV(count) from (
  SELECT description, count(*) as count
  FROM `contractnotice`
  group by description having count > 1
  ) as a;";
  $result = $conn->query($query);
  $r = $result->fetch(PDO::FETCH_BOTH);
  $averageDuplicatedDescriptions = $r[0];
  $stddevDuplicatedDescriptions = $r[1];
  }