mo scrapers and apsc2012 imoort
[disclosr.git] / admin / importAPSCEmployees2012.php
blob:a/admin/importAPSCEmployees2012.php -> blob:b/admin/importAPSCEmployees2012.php
--- a/admin/importAPSCEmployees2012.php
+++ b/admin/importAPSCEmployees2012.php
@@ -1,1 +1,86 @@
+<?php
 
+require_once '../include/common.inc.php';
+require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
+$db = $server->get_db('disclosr-agencies');
+$rows = $db->get_view("app", "byName")->rows;
+$nametoid = Array();
+$sums = Array();
+$functions = Array();
+foreach ($rows as $row) {
+    $nametoid[trim($row->key)] = $row->value;
+}
+
+
+$request = Requests::get("http://www.apsc.gov.au/publications-and-media/parliamentary/state-of-the-service/new-sosr/appendix-2-aps-agencies");
+$doc = phpQuery::newDocumentHTML($request->body);
+phpQuery::selectDocument($doc);
+foreach (pq('tr')->elements as $tr) {
+    //echo $tr->nodeValue.PHP_EOL;
+    $agency = "";
+    $employees = "";
+    $function = "";
+    $i = 0;
+    foreach ($tr->childNodes as $td) {
+        //echo  $td->nodeValue." $i <br>";
+        if ($i == 0)
+            $agency = $td->nodeValue;
+        if ($i == 2) {
+            $employees = trim(str_replace(",", "", $td->nodeValue));
+        }
+        if ($i == 4) {
+            $function = $td->nodeValue;
+        }
+        $i++;
+    }
+    if ($agency != "" && $employees != "" && $function != "") {
+        $name = trim(str_replace('2','',$agency));
+         //echo "$name<br><bR>" . PHP_EOL;
+        if (isset($nametoid[$name])) {
+            $id = $nametoid[$name];
+            //echo $id . "<br>" . PHP_EOL;
+            @$sums[$id]["2011-2012"] += $employees;
+            $functions[$id] = $function;
+        } else if ($agency != "Agency"){
+            echo "<br>ERROR NAME '$agency' MISSING FROM ID LIST<br><bR>" . PHP_EOL;
+
+            die();
+        }
+    } else {
+        echo "skipped $agency";
+    }
+}
+//print_r($sums);
+foreach ($sums as $id => $sum) {
+    echo $id . "<br>" . PHP_EOL;
+    $doc = $db->get($id);
+    echo $doc->name . "<br>" . PHP_EOL;
+    // print_r($doc);
+    $changed = false;
+    if (!isset($doc->statistics)) {
+        $changed = true;
+        $doc->statistics = new stdClass();
+    }
+    if (!isset($doc->statistics->employees)) {
+        $changed = true;
+        $doc->statistics->employees = new stdClass();
+    }
+    foreach ($sum as $timePeriod => $value) {
+        if (!isset($doc->statistics->employees->$timePeriod->value)
+                || $doc->statistics->employees->$timePeriod->value != $value) {
+            $changed = true;
+            $doc->statistics->employees->$timePeriod = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
+            $doc->employees = $value;
+              $doc->functionClassification = $functions[$id];
+        }
+    }
+  
+    if ($changed) {
+        $db->save($doc);
+    } else {
+        echo "not changed" . "<br>" . PHP_EOL;
+    }
+}
+// employees: timeperiod, source = apsc state of service, value 
+?>
+