Employee headcount data importer
[disclosr.git] / admin / importGov2RegisterRSSFacebookTwitter.php
blob:a/admin/importGov2RegisterRSSFacebookTwitter.php -> blob:b/admin/importGov2RegisterRSSFacebookTwitter.php
<?php <?php
   
require_once '../include/common.inc.php'; require_once '../include/common.inc.php';
try { require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
$server->create_db('disclosr-agencies');  
} catch (SetteeRestClientException $e) { $db = $server->get_db('disclosr-agencies');
setteErrorHandler($e); $rows = $db->get_view("app", "byName")->rows;
  $nametoid = Array();
  $accounts = Array();
  foreach ($rows as $row) {
  $nametoid[trim($row->key)] = $row->value;
} }
$db = $server->get_db('disclosr-agencies');  
createAgencyDesignDoc();  
   
// twitter https://docs.google.com/spreadsheet/fm?id=tsJVd9EYoAjbl014y3qMgWg.03918275400592898296.8568379511161083736&hl=en&fmcmd=5&gid=0 function extractHTMLAccounts($url, $accountType) {
// RSS https://docs.google.com/spreadsheet/fm?id=tbqjwIDHKHiVYF_glJ93GgA.03918275400592898296.8789688748524615194&authkey=CJDP-uQG&hl=en_GB&fmcmd=5&gid=0 global $accounts, $nametoid;
// facebook https://docs.google.com/spreadsheet/fm?id=tkcqoo9wrgzNWmoANuVhsBw.03918275400592898296.3040387705062056060&authkey=CKzl7r0I&hl=en_GB&fmcmd=5&gid=0 $request = Requests::get($url);
  $doc = phpQuery::newDocumentHTML($request->body);
  phpQuery::selectDocument($doc);
  foreach (pq('tr')->elements as $tr) {
  //echo $tr->nodeValue.PHP_EOL;
  $agency = "";
  $url = "";
  foreach ($tr->childNodes as $td) {
  $class = $td->getAttribute("class");
  //echo "cccc $class ".$td->nodeValue.PHP_EOL;
  if ($class == "s11" || $class == "s10" || $class == "s7") {
  $agency = $td->nodeValue;
  } else if ($class == "s6" || $class == "s9") {
  $url = $td->nodeValue;
  foreach ($td->childNodes as $a) {
  $href = $a->getAttribute("href");
  if ($href != "") {
  $url = $href;
  }
  }
  }
  }
  if ($agency != "" && $url != "") {
  if (!in_array(trim($agency), array_keys($nametoid))) {
  echo trim($agency) . " missing" . PHP_EOL;
  } else {
  // echo $agency." = ".$url.PHP_EOL;
  $accounts[$nametoid[trim($agency)]][$accountType][] = $url;
  }
  }
  }
  }
   
  function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) {
  global $accounts, $nametoid;
  $request = Requests::get($url);
  $Data = str_getcsv($request->body, "\n"); //parse the rows
  $headers = Array();
  foreach ($Data as $num => $line) {
  $Row = str_getcsv($line, ",", '"');
  if ($num == 0) {
   
  } else if ($num == 1) {
  $headers = $Row;
  //print_r($headers);
  } else {
  if (isset($Row[array_search($nameField, $headers)])) {
  $agencyName = $Row[array_search($nameField, $headers)];
  if (!$filter || $Row[array_search("State", $headers)] == "NAT") {
  if (!in_array(trim($agencyName), array_keys($nametoid))) {
  echo trim($agencyName) . " missing" . PHP_EOL;
  } else {
  // echo $Row[array_search($nameField, $headers)] . PHP_EOL;
  $accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)];
  }
  }
  } else {
  //echo "error finding agency" . $line . PHP_EOL;
  }
  }
  }
  }
   
  // http://agimo.govspace.gov.au/page/gov2register/
  // twitter
  extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Twitter", "Agency/Body/Event", "", true);
  // RSS
  extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS");
  // facebook
  extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook");
  foreach ($accounts as $id => $accountTypes) {
  echo $id . "<br>" . PHP_EOL;
  $doc = object_to_array($db->get($id));
  // print_r($doc);
   
  foreach ($accountTypes as $accountType => $accounts) {
  if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) {
  $doc["has" . $accountType] = Array();
  }
  $doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts));
  }
  $db->save($doc);
  }
?> ?>