From: Maxious Date: Sat, 01 Sep 2012 12:28:07 +0000 Subject: agls function scraper X-Git-Url: https://maxious.lambdacomplex.org/git/?p=disclosr.git&a=commitdiff&h=874dfff3f944677ce84039b23ac19a93b445d8c9 --- agls function scraper Former-commit-id: 174e97ffdeaeded56d2cc86d961d05b6f1a0c427 --- --- a/.gitmodules +++ b/.gitmodules @@ -22,4 +22,7 @@ [submodule "javascripts/bubbletree"] path = javascripts/bubbletree url = https://github.com/okfn/bubbletree.git +[submodule "lib/querypath"] + path = lib/querypath + url = https://github.com/technosophos/querypath.git --- a/admin/conflicts.php +++ b/admin/conflicts.php @@ -1,7 +1,7 @@ get_db('disclosr-agencies'); + +try { + $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows; + //print_r($rows); + foreach ($agencies as $agency) { + //echo $agency->value->name . " ".$agency->value->website."
\n"; + // print_r($agency); + //hasRestricitiveLicence" hasRestrictiveLicense -> has Restrictive Licence + // "hasYoutube" -> Tube + // "comment" -> "comments" + if (!isset($agency->value->metaTags) && isset($agency->value->website)) { + echo $agency->value->name . " ".$agency->value->website."
\n"; + $agency->value->metaTags = Array(); + $request = Requests::get($agency->value->website); + $html = phpQuery::newDocumentHTML($request->body); + phpQuery::selectDocument($html); + foreach (pq('meta')->elements as $meta) { + $tagName = $meta->getAttribute('name');; + $content = $meta->getAttribute('content'); + if ($tagName != "") { +echo "$tagName == $content
\n"; + $agency->value->metaTags[$tagName] = $content; + } + } + //print_r($agency->value->metaTags); + $db->save($agency->value); + echo "
"; + flush(); + } + } +} catch (SetteeRestClientException $e) { + setteErrorHandler($e); +} +?> +