mo scrapers and apsc2012 imoort
[disclosr.git] / admin / importAPSCEmployees2012.php
blob:a/admin/importAPSCEmployees2012.php -> blob:b/admin/importAPSCEmployees2012.php
  <?php
   
  require_once '../include/common.inc.php';
  require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
  $db = $server->get_db('disclosr-agencies');
  $rows = $db->get_view("app", "byName")->rows;
  $nametoid = Array();
  $sums = Array();
  $functions = Array();
  foreach ($rows as $row) {
  $nametoid[trim($row->key)] = $row->value;
  }
   
   
  $request = Requests::get("http://www.apsc.gov.au/publications-and-media/parliamentary/state-of-the-service/new-sosr/appendix-2-aps-agencies");
  $doc = phpQuery::newDocumentHTML($request->body);
  phpQuery::selectDocument($doc);
  foreach (pq('tr')->elements as $tr) {
  //echo $tr->nodeValue.PHP_EOL;
  $agency = "";
  $employees = "";
  $function = "";
  $i = 0;
  foreach ($tr->childNodes as $td) {
  //echo $td->nodeValue." $i <br>";
  if ($i == 0)
  $agency = $td->nodeValue;
  if ($i == 2) {
  $employees = trim(str_replace(",", "", $td->nodeValue));
  }
  if ($i == 4) {
  $function = $td->nodeValue;
  }
  $i++;
  }
  if ($agency != "" && $employees != "" && $function != "") {
  $name = trim(str_replace('2','',$agency));
  //echo "$name<br><bR>" . PHP_EOL;
  if (isset($nametoid[$name])) {
  $id = $nametoid[$name];
  //echo $id . "<br>" . PHP_EOL;
  @$sums[$id]["2011-2012"] += $employees;
  $functions[$id] = $function;
  } else if ($agency != "Agency"){
  echo "<br>ERROR NAME '$agency' MISSING FROM ID LIST<br><bR>" . PHP_EOL;
   
  die();
  }
  } else {
  echo "skipped $agency";
  }
  }
  //print_r($sums);
  foreach ($sums as $id => $sum) {
  echo $id . "<br>" . PHP_EOL;
  $doc = $db->get($id);
  echo $doc->name . "<br>" . PHP_EOL;
  // print_r($doc);
  $changed = false;
  if (!isset($doc->statistics)) {
  $changed = true;
  $doc->statistics = new stdClass();
  }
  if (!isset($doc->statistics->employees)) {
  $changed = true;
  $doc->statistics->employees = new stdClass();
  }
  foreach ($sum as $timePeriod => $value) {
  if (!isset($doc->statistics->employees->$timePeriod->value)
  || $doc->statistics->employees->$timePeriod->value != $value) {
  $changed = true;
  $doc->statistics->employees->$timePeriod = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
  $doc->employees = $value;
  $doc->functionClassification = $functions[$id];
  }
  }
   
  if ($changed) {
  $db->save($doc);
  } else {
  echo "not changed" . "<br>" . PHP_EOL;
  }
  }
  // employees: timeperiod, source = apsc state of service, value
  ?>