1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | <?php require_once '../include/common.inc.php'; require($basePath . 'lib/phpquery/phpQuery/phpQuery.php'); $db = $server->get_db('disclosr-agencies'); $rows = $db->get_view("app", "byName")->rows; $nametoid = Array(); $accounts = Array(); foreach ($rows as $row) { $nametoid[trim($row->key)] = $row->value; } function extractHTMLAccounts($url, $accountType) { global $accounts, $nametoid; $request = Requests::get($url); $doc = phpQuery::newDocumentHTML($request->body); phpQuery::selectDocument($doc); foreach (pq('tr')->elements as $tr) { //echo $tr->nodeValue.PHP_EOL; $agency = ""; $url = ""; foreach ($tr->childNodes as $td) { $class = $td->getAttribute("class"); //echo "cccc $class ".$td->nodeValue.PHP_EOL; if ($class == "s11" || $class == "s10" || $class == "s7") { $agency = $td->nodeValue; } else if ($class == "s6" || $class == "s9") { $url = $td->nodeValue; foreach ($td->childNodes as $a) { $href = $a->getAttribute("href"); if ($href != "") { $url = $href; } } } } if ($agency != "" && $url != "") { if (!in_array(trim($agency), array_keys($nametoid))) { echo trim($agency) . " missing" . PHP_EOL; } else { // echo $agency." = ".$url.PHP_EOL; $accounts[$nametoid[trim($agency)]][$accountType][] = $url; } } } } function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) { global $accounts, $nametoid; $request = Requests::get($url); $Data = str_getcsv($request->body, "\n"); //parse the rows $headers = Array(); foreach ($Data as $num => $line) { $Row = str_getcsv($line, ",", '"'); if ($num == 0) { } else if ($num == 1) { $headers = $Row; //print_r($headers); } else { if (isset($Row[array_search($nameField, $headers)])) { $agencyName = $Row[array_search($nameField, $headers)]; if (!$filter || $Row[array_search("State", $headers)] == "NAT") { if (!in_array(trim($agencyName), array_keys($nametoid))) { echo trim($agencyName) . " missing" . PHP_EOL; } else { // echo $Row[array_search($nameField, $headers)] . PHP_EOL; $accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)]; } } } else { //echo "error finding agency" . $line . PHP_EOL; } } } } // http://agimo.govspace.gov.au/page/gov2register/ // twitter extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Twitter", "Agency/Body/Event", "", true); // RSS extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS"); // facebook extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook"); foreach ($accounts as $id => $accountTypes) { echo $id . "<br>" . PHP_EOL; $doc = object_to_array($db->get($id)); // print_r($doc); foreach ($accountTypes as $accountType => $accounts) { if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) { $doc["has" . $accountType] = Array(); } $doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts)); } $db->save($doc); } ?> |