add sitemap
[contractdashboard.git] / heuristics / metadataHeuristics.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
 <?php
/* all
   SELECT description, count(*) as count
FROM contractnotice 
group by description having count > 1 order by count
*/
/*- duplicated description
 - most duplicated overall, most duplicated per agency/category/supplier etc. */
$heuristics["METADATA_DUPLICATED_DESCRIPTION"] = Array(
        "description" => ""
);
function METADATA_DUPLICATED_DESCRIPTION($cn)
{
        $averageDuplicatedDescriptions = getAverageDuplicatedDescriptions();
        $stddevDuplicatedDescriptions = getstddevDuplicatedDescriptions();
        $query = 'select count(*) from contractnotice where description = "' . $agencyName . '"';
    $result = $conn->query($query);
    $r = $result->fetch(PDO::FETCH_BOTH);
        $dupeDesc = $r[0];
        if ($dupeDesc == 1) $value = 0;
        else $value = abs($dupeDesc - $averageDuplicatedDescriptions) / $stddevDuplicatedDescriptions;
        return Array(
                "heuristic_value" => $value,
                "raw_value" => $dupeDesc,
                "mean" => $averageDuplicatedDescriptions,
                "stddev" => $stddevDuplicatedDescriptions
        );
}
$averageDuplicatedDescriptions;
function getAverageDuplicatedDescriptions()
{
        global $averageDuplicatedDescriptions;
        if (!$averageDuplicatedDescriptions) {
                getStatsDuplicatedDescriptions();
        }
        return $averageDuplicatedDescriptions;
}
$stddevDuplicatedDescriptions;
function getstddevDuplicatedDescriptions()
{
        global $stddevDuplicatedDescriptions;
        if (!$stddevDuplicatedDescriptions) {
                getStatsDuplicatedDescriptions();
        }
        return $stddevDuplicatedDescriptions;
}
function getStatsDuplicatedDescriptions()
{
        $query = "select avg(count),STDDEV(count) from (
        SELECT description, count(*) as count
FROM contractnotice 
group by description having count > 1 
        ) as a;";
    $result = $conn->query($query);
    $r = $result->fetch(PDO::FETCH_BOTH);
        $averageDuplicatedDescriptions = $r[0];
        $stddevDuplicatedDescriptions = $r[1];
}