cache org ids while importing datagov
[disclosr.git] / admin / importAPSCEmployees2012.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
<?php
 
require_once '../include/common.inc.php';
require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
$db = $server->get_db('disclosr-agencies');
$rows = $db->get_view("app", "byName")->rows;
$nametoid = Array();
$sums = Array();
$functions = Array();
foreach ($rows as $row) {
    $nametoid[trim($row->key)] = $row->value;
}
 
 
$request = Requests::get("http://www.apsc.gov.au/publications-and-media/parliamentary/state-of-the-service/new-sosr/appendix-2-aps-agencies");
$doc = phpQuery::newDocumentHTML($request->body);
phpQuery::selectDocument($doc);
foreach (pq('tr')->elements as $tr) {
    //echo $tr->nodeValue.PHP_EOL;
    $agency = "";
    $employees = "";
    $function = "";
    $i = 0;
    foreach ($tr->childNodes as $td) {
        //echo  $td->nodeValue." $i <br>";
        if ($i == 0)
            $agency = $td->nodeValue;
        if ($i == 2) {
            $employees = trim(str_replace(",", "", $td->nodeValue));
        }
        if ($i == 4) {
            $function = $td->nodeValue;
        }
        $i++;
    }
    if ($agency != "" && $employees != "" && $function != "") {
        $name = trim(str_replace('2','',$agency));
         //echo "$name<br><bR>" . PHP_EOL;
        if (isset($nametoid[$name])) {
            $id = $nametoid[$name];
            //echo $id . "<br>" . PHP_EOL;
            @$sums[$id]["2011-2012"] += $employees;
            $functions[$id] = $function;
        } else if ($agency != "Agency"){
            echo "<br>ERROR NAME '$agency' MISSING FROM ID LIST<br><bR>" . PHP_EOL;
 
            die();
        }
    } else {
        echo "skipped $agency";
    }
}
//print_r($sums);
foreach ($sums as $id => $sum) {
    echo $id . "<br>" . PHP_EOL;
    $doc = $db->get($id);
    echo $doc->name . "<br>" . PHP_EOL;
    // print_r($doc);
    $changed = false;
    if (!isset($doc->statistics)) {
        $changed = true;
        $doc->statistics = new stdClass();
    }
    if (!isset($doc->statistics->employees)) {
        $changed = true;
        $doc->statistics->employees = new stdClass();
    }
    foreach ($sum as $timePeriod => $value) {
        if (!isset($doc->statistics->employees->$timePeriod->value)
                || $doc->statistics->employees->$timePeriod->value != $value) {
            $changed = true;
            $doc->statistics->employees->$timePeriod = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
            $doc->employees = $value;
              $doc->functionClassification = $functions[$id];
        }
    }
  
    if ($changed) {
        $db->save($doc);
    } else {
        echo "not changed" . "<br>" . PHP_EOL;
    }
}
// employees: timeperiod, source = apsc state of service, value 
?>