<?php |
<?php |
|
|
require_once '../include/common.inc.php'; |
require_once '../include/common.inc.php'; |
require($basePath.'lib/phpquery/phpQuery/phpQuery.php'); |
require($basePath . 'lib/phpquery/phpQuery/phpQuery.php'); |
|
|
$db = $server->get_db('disclosr-agencies'); |
$db = $server->get_db('disclosr-agencies'); |
$rows = $db->get_view("app", "byName")->rows; |
$rows = $db->get_view("app", "byName")->rows; |
$nametoid = Array(); |
$nametoid = Array(); |
$accounts = Array(); |
$accounts = Array(); |
foreach ($rows as $row) { |
foreach ($rows as $row) { |
$nametoid[trim($row->key)] = $row->value; |
$nametoid[trim($row->key)] = $row->value; |
} |
} |
|
|
function extractHTMLAccounts($url, $accountType) { |
function extractHTMLAccounts($url, $accountType) { |
global $accounts, $nametoid; |
global $accounts, $nametoid; |
$request = Requests::get($url); |
$request = Requests::get($url); |
$doc = phpQuery::newDocumentHTML($request->body); |
$doc = phpQuery::newDocumentHTML($request->body); |
phpQuery::selectDocument($doc); |
phpQuery::selectDocument($doc); |
foreach (pq('tr')->elements as $tr) { |
foreach (pq('tr')->elements as $tr) { |
//echo $tr->nodeValue.PHP_EOL; |
//echo $tr->nodeValue.PHP_EOL; |
$agency = ""; |
$agency = ""; |
$url = ""; |
$url = ""; |
foreach ($tr->childNodes as $td) { |
foreach ($tr->childNodes as $td) { |
$class = $td->getAttribute("class"); |
$class = $td->getAttribute("class"); |
//echo "cccc $class ".$td->nodeValue.PHP_EOL; |
//echo "cccc $class ".$td->nodeValue.PHP_EOL; |
if ($class == "s11" || $class == "s10" || $class == "s7") { |
if ($class == "s11" || $class == "s10" || $class == "s7") { |
$agency = $td->nodeValue; |
$agency = $td->nodeValue; |
} else if ($class == "s6" || $class == "s9"){ |
} else if ($class == "s6" || $class == "s9") { |
$url = $td->nodeValue; |
$url = $td->nodeValue; |
foreach($td->childNodes as $a) { |
foreach ($td->childNodes as $a) { |
$href = $a->getAttribute("href"); |
$href = $a->getAttribute("href"); |
if ($href != "") { |
if ($href != "") { |
$url = $href; |
$url = $href; |
} |
|
} |
|
} |
|
} |
|
if ($agency != "" && $url != "") { |
|
if (!in_array(trim($agency), array_keys($nametoid))) { |
|
echo trim($agency)." missing" . PHP_EOL; |
|
} else { |
|
// echo $agency." = ".$url.PHP_EOL; |
|
$accounts[$nametoid[trim($agency)]][$accountType][] = $url; |
|
} |
} |
|
} |
} |
} |
|
} |
|
if ($agency != "" && $url != "") { |
|
if (!in_array(trim($agency), array_keys($nametoid))) { |
|
echo trim($agency) . " missing" . PHP_EOL; |
|
} else { |
|
// echo $agency." = ".$url.PHP_EOL; |
|
$accounts[$nametoid[trim($agency)]][$accountType][] = $url; |
|
} |
|
} |
} |
} |
|
|
} |
} |
|
|
function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) { |
function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) { |
global $accounts, $nametoid; |
global $accounts, $nametoid; |
$request = Requests::get($url); |
$request = Requests::get($url); |
$Data = str_getcsv($request->body, "\n"); //parse the rows |
$Data = str_getcsv($request->body, "\n"); //parse the rows |
$headers = Array(); |
$headers = Array(); |
foreach ($Data as $num => $line) { |
foreach ($Data as $num => $line) { |
$Row = str_getcsv($line, ",",'"'); |
$Row = str_getcsv($line, ",", '"'); |
if ($num == 0) { |
if ($num == 0) { |
|
|
} else if ($num == 1) { |
} else if ($num == 1) { |
$headers = $Row; |
$headers = $Row; |
//print_r($headers); |
//print_r($headers); |
} else { |
} else { |
if (isset($Row[array_search($nameField, $headers)])) { |
if (isset($Row[array_search($nameField, $headers)])) { |
$agencyName = $Row[array_search($nameField, $headers)]; |
$agencyName = $Row[array_search($nameField, $headers)]; |
if (!$filter || $Row[array_search("State", $headers)] == "NAT") { |
if (!$filter || $Row[array_search("State", $headers)] == "NAT") { |
if (!in_array(trim($agencyName), array_keys($nametoid))) { |
if (!in_array(trim($agencyName), array_keys($nametoid))) { |
echo trim($agencyName)." missing" . PHP_EOL; |
echo trim($agencyName) . " missing" . PHP_EOL; |
} else { |
} else { |
// echo $Row[array_search($nameField, $headers)] . PHP_EOL; |
// echo $Row[array_search($nameField, $headers)] . PHP_EOL; |
$accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)]; |
$accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)]; |
} |
} |
} |
} |
} else { |
} else { |
//echo "error finding agency" . $line . PHP_EOL; |
//echo "error finding agency" . $line . PHP_EOL; |
} |
} |
} |
} |
} |
} |
} |
} |
|
|
// http://agimo.govspace.gov.au/page/gov2register/ |
// http://agimo.govspace.gov.au/page/gov2register/ |
// twitter |
// twitter |
extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Twitter", "Agency/Body/Event", "", true); |
extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Twitter", "Agency/Body/Event", "", true); |
// RSS |
// RSS |
extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS"); |
extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS"); |
// facebook |
// facebook |
extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook"); |
extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook"); |
|
foreach ($accounts as $id => $accountTypes) { |
|
echo $id . "<br>" . PHP_EOL; |
|
$doc = object_to_array($db->get($id)); |
|
// print_r($doc); |
|
|
|
foreach ($accountTypes as $accountType => $accounts) { |
|
if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) { |
|
$doc["has" . $accountType] = Array(); |
|
} |
|
$doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts)); |
|
} |
|
$db->save($doc); |
|
} |
?> |
?> |
|
|