Testing facebook imports
Former-commit-id: 26f7cc47bbd4c30bb9dab99a095d0363e52c3766
--- a/.gitmodules
+++ b/.gitmodules
@@ -16,4 +16,7 @@
[submodule "javascripts/flotr2"]
path = javascripts/flotr2
url = https://github.com/HumbleSoftware/Flotr2.git
+[submodule "lib/phpquery"]
+ path = lib/phpquery
+ url = https://github.com/TobiaszCudnik/phpquery.git
--- /dev/null
+++ b/admin/importAustraliaGovAuGov2.php
@@ -1,1 +1,61 @@
+<?php
+require_once '../include/common.inc.php';
+
+$db = $server->get_db('disclosr-agencies');
+$rows = $db->get_view("app", "byName")->rows;
+$nametoid = Array();
+$accounts = Array();
+foreach ($rows as $row) {
+ $nametoid[trim($row->key)] = $row->value;
+}
+
+function extractCSVAccounts($url, $nameField, $accountField, $filter) {
+ global $accounts, $nametoid;
+ $request = Requests::get($url);
+ $Data = str_getcsv($request->body, "\n"); //parse the rows
+ $headers = Array();
+ foreach ($Data as $num => $line) {
+ $Row = str_getcsv($line, ",");
+ if ($num == 0) {
+
+ } else if ($num == 1) {
+ $headers = $Row;
+ //print_r($headers);
+ } else {
+ if (isset($Row[array_search($nameField, $headers)])) {
+ $agencyName = $Row[array_search($nameField, $headers)];
+ if (!$filter || $Row[array_search("State", $headers)] == "NAT") {
+ if (!in_array(trim($agencyName), array_keys($nametoid))) {
+ echo "$agencyName missing" . PHP_EOL;
+ } else {
+ // echo $Row[array_search($nameField, $headers)] . PHP_EOL;
+ }
+ }
+ } else {
+ //echo "error finding agency" . $line . PHP_EOL;
+ }
+ }
+ }
+}
+
+// http://agimo.govspace.gov.au/page/gov2register/
+// twitter
+//extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Agency/Body/Event", "", true);
+// RSS
+// https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=csv
+// facebook
+extractCSVAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=csv","Agency","Name");
+
+/*
+ * http://australia.gov.au/news-and-media/media-release-rss-feeds
+ * http://australia.gov.au/news-and-media/social-media/blogs
+ * http://australia.gov.au/news-and-media/social-media/twitter
+ * http://australia.gov.au/news-and-media/social-media/facebook
+ * http://australia.gov.au/news-and-media/social-media/youtube
+ * http://australia.gov.au/news-and-media/social-media/flickr
+ * http://australia.gov.au/news-and-media/social-media/apps http://www.harmony.gov.au/get-involved/app-downloads.htm http://www.em.gov.au/Resources/Pages/Before-the-Storm-phone-game.aspx
+ * http://australia.gov.au/news-and-media/social-media/podcasts
+ */
+?>
+
--- a/admin/importGov2RegisterRSSFacebookTwitter.php
+++ b/admin/importGov2RegisterRSSFacebookTwitter.php
@@ -1,6 +1,7 @@
<?php
require_once '../include/common.inc.php';
+require($basePath.'lib/phpquery/phpQuery/phpQuery.php');
$db = $server->get_db('disclosr-agencies');
$rows = $db->get_view("app", "byName")->rows;
@@ -9,14 +10,50 @@
foreach ($rows as $row) {
$nametoid[trim($row->key)] = $row->value;
}
+function extractHTMLAccounts($url, $accountType) {
+ global $accounts, $nametoid;
+ $request = Requests::get($url);
+ $doc = phpQuery::newDocumentHTML($request->body);
+ phpQuery::selectDocument($doc);
+ foreach (pq('tr')->elements as $tr) {
+ //echo $tr->nodeValue.PHP_EOL;
+ $agency = "";
+ $url = "";
+ foreach ($tr->childNodes as $td) {
+ $class = $td->getAttribute("class");
+ //echo "cccc $class ".$td->nodeValue.PHP_EOL;
+ if ($class == "s11" || $class == "s10" || $class == "s7") {
+ $agency = $td->nodeValue;
+ } else if ($class == "s6" || $class == "s9"){
+ $url = $td->nodeValue;
+ foreach($td->childNodes as $a) {
+ $href = $a->getAttribute("href");
+ if ($href != "") {
+ $url = $href;
+ }
+ }
+ }
+ }
+ if ($agency != "" && $url != "") {
+ if (!in_array(trim($agency), array_keys($nametoid))) {
+ echo trim($agency)." missing" . PHP_EOL;
+ } else {
+ // echo $agency." = ".$url.PHP_EOL;
+ $accounts[$nametoid[trim($agency)]][$accountType][] = $url;
+ }
+
+ }
+ }
+
+}
-function extractCSVAccounts($url, $nameField, $accountField, $filter) {
+function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) {
global $accounts, $nametoid;
$request = Requests::get($url);
$Data = str_getcsv($request->body, "\n"); //parse the rows
$headers = Array();
foreach ($Data as $num => $line) {
- $Row = str_getcsv($line, ",");
+ $Row = str_getcsv($line, ",",'"');
if ($num == 0) {
} else if ($num == 1) {
@@ -27,9 +64,10 @@
$agencyName = $Row[array_search($nameField, $headers)];
if (!$filter || $Row[array_search("State", $headers)] == "NAT") {
if (!in_array(trim($agencyName), array_keys($nametoid))) {
- echo "$agencyName missing" . PHP_EOL;
+ echo trim($agencyName)." missing" . PHP_EOL;
} else {
// echo $Row[array_search($nameField, $headers)] . PHP_EOL;
+ $accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)];
}
}
} else {
@@ -41,21 +79,11 @@
// http://agimo.govspace.gov.au/page/gov2register/
// twitter
-extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Agency/Body/Event", "", true);
+extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Twitter", "Agency/Body/Event", "", true);
// RSS
-// https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=csv
+extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS");
// facebook
-//extractCSVAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=csv","","");
+extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook");
-/*
- * http://australia.gov.au/news-and-media/media-release-rss-feeds
- * http://australia.gov.au/news-and-media/social-media/blogs
- * http://australia.gov.au/news-and-media/social-media/twitter
- * http://australia.gov.au/news-and-media/social-media/facebook
- * http://australia.gov.au/news-and-media/social-media/youtube
- * http://australia.gov.au/news-and-media/social-media/flickr
- * http://australia.gov.au/news-and-media/social-media/apps http://www.harmony.gov.au/get-involved/app-downloads.htm http://www.em.gov.au/Resources/Pages/Before-the-Storm-phone-game.aspx
- * http://australia.gov.au/news-and-media/social-media/podcasts
- */
?>
--- /dev/null
+++ b/lib/phpquery