agls function scraper
Former-commit-id: 174e97ffdeaeded56d2cc86d961d05b6f1a0c427
--- a/.gitmodules
+++ b/.gitmodules
@@ -22,4 +22,7 @@
[submodule "javascripts/bubbletree"]
path = javascripts/bubbletree
url = https://github.com/okfn/bubbletree.git
+[submodule "lib/querypath"]
+ path = lib/querypath
+ url = https://github.com/technosophos/querypath.git
--- a/admin/conflicts.php
+++ b/admin/conflicts.php
@@ -1,7 +1,7 @@
<?php
include_once('../include/common.inc.php');
-include_header();
+include_header('Fix Conflicts');
require_once '../lib/php-diff/lib/Diff.php';
require_once '../lib/php-diff/lib/Diff/Renderer/Html/SideBySide.php';
--- /dev/null
+++ b/admin/genericAgencyFixer.php
@@ -1,1 +1,44 @@
+<?php
+include_once("../include/common.inc.php");
+require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
+
+setlocale(LC_CTYPE, 'C');
+
+
+$db = $server->get_db('disclosr-agencies');
+
+try {
+ $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows;
+ //print_r($rows);
+ foreach ($agencies as $agency) {
+ //echo $agency->value->name . " ".$agency->value->website."<br />\n";
+ // print_r($agency);
+ //hasRestricitiveLicence" hasRestrictiveLicense -> has Restrictive Licence
+ // "hasYoutube" -> Tube
+ // "comment" -> "comments"
+ if (!isset($agency->value->metaTags) && isset($agency->value->website)) {
+ echo $agency->value->name . " ".$agency->value->website."<br />\n";
+ $agency->value->metaTags = Array();
+ $request = Requests::get($agency->value->website);
+ $html = phpQuery::newDocumentHTML($request->body);
+ phpQuery::selectDocument($html);
+ foreach (pq('meta')->elements as $meta) {
+ $tagName = $meta->getAttribute('name');;
+ $content = $meta->getAttribute('content');
+ if ($tagName != "") {
+echo "$tagName == $content <br>\n";
+ $agency->value->metaTags[$tagName] = $content;
+ }
+ }
+ //print_r($agency->value->metaTags);
+ $db->save($agency->value);
+ echo "<hr>";
+ flush();
+ }
+ }
+} catch (SetteeRestClientException $e) {
+ setteErrorHandler($e);
+}
+?>
+