beginnings of NAA data import
[disclosr.git] / admin / genericAgencyFixer.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
<?php
 
include_once("../include/common.inc.php");
require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
 
setlocale(LC_CTYPE, 'C');
 
 
$db = $server->get_db('disclosr-agencies');
// metatags
try {
    $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows;
    //print_r($rows);
    foreach ($agencies as $agency) {
        if (isset($agency->value->scrapeDepth)) {
            unset($agency->value->scrapeDepth);
        }
 
        if (isset($agency->value->lastScraped)) {
            unset($agency->value->lastScraped);
        }
        $db->save($agency->value);
        echo "<hr>";
        flush();
    }
} catch (SetteeRestClientException $e) {
    setteErrorHandler($e);
}
// metatags
try {
    $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows;
    //print_r($rows);
    foreach ($agencies as $agency) {
        //echo $agency->value->name . " ".$agency->value->website."<br />\n";
        // print_r($agency);
        //hasRestricitiveLicence"       hasRestrictiveLicense -> has Restrictive Licence
        // "hasYoutube" -> Tube
        // "comment" -> "comments"
        if (!isset($agency->value->metaTags) && isset($agency->value->website)) {
            echo $agency->value->name . " " . $agency->value->website . "<br />\n";
            $agency->value->metaTags = Array();
            $request = Requests::get($agency->value->website);
            $html = phpQuery::newDocumentHTML($request->body);
            phpQuery::selectDocument($html);
            foreach (pq('meta')->elements as $meta) {
                $tagName = $meta->getAttribute('name');
                ;
                $content = $meta->getAttribute('content');
                if ($tagName != "") {
                    echo "$tagName == $content <br>\n";
                    $agency->value->metaTags[$tagName] = $content;
                }
            }
            //print_r($agency->value->metaTags);
            $db->save($agency->value);
            echo "<hr>";
            flush();
        }
    }
} catch (SetteeRestClientException $e) {
    setteErrorHandler($e);
}
?>