1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | <?php include_once("../include/common.inc.php"); require($basePath . 'lib/phpquery/phpQuery/phpQuery.php'); setlocale(LC_CTYPE, 'C'); $db = $server->get_db('disclosr-agencies'); // metatags try { $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows; //print_r($rows); foreach ($agencies as $agency) { if (isset($agency->value->scrapeDepth)) { unset($agency->value->scrapeDepth); } if (isset($agency->value->lastScraped)) { unset($agency->value->lastScraped); } $db->save($agency->value); echo "<hr>"; flush(); } } catch (SetteeRestClientException $e) { setteErrorHandler($e); } // metatags try { $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows; //print_r($rows); foreach ($agencies as $agency) { //echo $agency->value->name . " ".$agency->value->website."<br />\n"; // print_r($agency); //hasRestricitiveLicence" hasRestrictiveLicense -> has Restrictive Licence // "hasYoutube" -> Tube // "comment" -> "comments" if (!isset($agency->value->metaTags) && isset($agency->value->website)) { echo $agency->value->name . " " . $agency->value->website . "<br />\n"; $agency->value->metaTags = Array(); $request = Requests::get($agency->value->website); $html = phpQuery::newDocumentHTML($request->body); phpQuery::selectDocument($html); foreach (pq('meta')->elements as $meta) { $tagName = $meta->getAttribute('name'); ; $content = $meta->getAttribute('content'); if ($tagName != "") { echo "$tagName == $content <br>\n"; $agency->value->metaTags[$tagName] = $content; } } //print_r($agency->value->metaTags); $db->save($agency->value); echo "<hr>"; flush(); } } } catch (SetteeRestClientException $e) { setteErrorHandler($e); } ?> |