agls function scraper
agls function scraper


Former-commit-id: 174e97ffdeaeded56d2cc86d961d05b6f1a0c427

[submodule "couchdb/couchdb-lucene"] [submodule "couchdb/couchdb-lucene"]
path = couchdb/couchdb-lucene path = couchdb/couchdb-lucene
url = https://github.com/rnewson/couchdb-lucene.git url = https://github.com/rnewson/couchdb-lucene.git
[submodule "couchdb/settee"] [submodule "couchdb/settee"]
path = couchdb/settee path = couchdb/settee
url = https://github.com/inadarei/settee.git url = https://github.com/inadarei/settee.git
[submodule "lib/php-diff"] [submodule "lib/php-diff"]
path = lib/php-diff path = lib/php-diff
url = https://github.com/chrisboulton/php-diff.git url = https://github.com/chrisboulton/php-diff.git
[submodule "lib/Requests"] [submodule "lib/Requests"]
path = lib/Requests path = lib/Requests
url = https://github.com/rmccue/Requests.git url = https://github.com/rmccue/Requests.git
[submodule "javascripts/flotr2"] [submodule "javascripts/flotr2"]
path = javascripts/flotr2 path = javascripts/flotr2
url = https://github.com/HumbleSoftware/Flotr2.git url = https://github.com/HumbleSoftware/Flotr2.git
[submodule "lib/phpquery"] [submodule "lib/phpquery"]
path = lib/phpquery path = lib/phpquery
url = https://github.com/TobiaszCudnik/phpquery.git url = https://github.com/TobiaszCudnik/phpquery.git
[submodule "javascripts/sigma"] [submodule "javascripts/sigma"]
path = javascripts/sigma path = javascripts/sigma
url = https://github.com/jacomyal/sigma.js.git url = https://github.com/jacomyal/sigma.js.git
[submodule "javascripts/bubbletree"] [submodule "javascripts/bubbletree"]
path = javascripts/bubbletree path = javascripts/bubbletree
url = https://github.com/okfn/bubbletree.git url = https://github.com/okfn/bubbletree.git
  [submodule "lib/querypath"]
  path = lib/querypath
  url = https://github.com/technosophos/querypath.git
   
<?php <?php
   
include_once('../include/common.inc.php'); include_once('../include/common.inc.php');
include_header(); include_header('Fix Conflicts');
require_once '../lib/php-diff/lib/Diff.php'; require_once '../lib/php-diff/lib/Diff.php';
require_once '../lib/php-diff/lib/Diff/Renderer/Html/SideBySide.php'; require_once '../lib/php-diff/lib/Diff/Renderer/Html/SideBySide.php';
   
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
   
try { try {
$rows = $db->get_view("app", "getConflicts", null, true)->rows; $rows = $db->get_view("app", "getConflicts", null, true)->rows;
//print_r($rows); //print_r($rows);
foreach ($rows as $row) { foreach ($rows as $row) {
echo "<h2>".$row->id."</h2>"; echo "<h2>".$row->id."</h2>";
$request = Requests::get($serverAddr."disclosr-agencies/".$row->id); $request = Requests::get($serverAddr."disclosr-agencies/".$row->id);
$origSort = object_to_array(json_decode($request->body)); $origSort = object_to_array(json_decode($request->body));
ksort($origSort); ksort($origSort);
$origDoc = explode(",",json_encode($origSort)); $origDoc = explode(",",json_encode($origSort));
foreach($row->value as $conflictRev) { foreach($row->value as $conflictRev) {
$conflictURL = $serverAddr."disclosr-agencies/".$row->id."?rev=".$conflictRev; $conflictURL = $serverAddr."disclosr-agencies/".$row->id."?rev=".$conflictRev;
$request = Requests::get($conflictURL); $request = Requests::get($conflictURL);
$conflictSort = object_to_array(json_decode($request->body)); $conflictSort = object_to_array(json_decode($request->body));
ksort($conflictSort); ksort($conflictSort);
$conflictDoc = explode(",",json_encode($conflictSort)); $conflictDoc = explode(",",json_encode($conflictSort));
echo "curl -X DELETE ".$conflictURL."<br>".PHP_EOL; echo "curl -X DELETE ".$conflictURL."<br>".PHP_EOL;
// Options for generating the diff // Options for generating the diff
$options = array( $options = array(
//'ignoreWhitespace' => true, //'ignoreWhitespace' => true,
//'ignoreCase' => true, //'ignoreCase' => true,
); );
   
// Initialize the diff class // Initialize the diff class
$diff = new Diff($conflictDoc, $origDoc, $options); $diff = new Diff($conflictDoc, $origDoc, $options);
   
// Generate a side by side diff // Generate a side by side diff
$renderer = new Diff_Renderer_Html_SideBySide; $renderer = new Diff_Renderer_Html_SideBySide;
echo $diff->Render($renderer); echo $diff->Render($renderer);
} }
die(); die();
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
   
include_footer(); include_footer();
?> ?>
   
  <?php
 
  include_once("../include/common.inc.php");
  require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
 
  setlocale(LC_CTYPE, 'C');
 
 
  $db = $server->get_db('disclosr-agencies');
 
  try {
  $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows;
  //print_r($rows);
  foreach ($agencies as $agency) {
  //echo $agency->value->name . " ".$agency->value->website."<br />\n";
  // print_r($agency);
  //hasRestricitiveLicence" hasRestrictiveLicense -> has Restrictive Licence
  // "hasYoutube" -> Tube
  // "comment" -> "comments"
  if (!isset($agency->value->metaTags) && isset($agency->value->website)) {
  echo $agency->value->name . " ".$agency->value->website."<br />\n";
  $agency->value->metaTags = Array();
  $request = Requests::get($agency->value->website);
  $html = phpQuery::newDocumentHTML($request->body);
  phpQuery::selectDocument($html);
  foreach (pq('meta')->elements as $meta) {
  $tagName = $meta->getAttribute('name');;
  $content = $meta->getAttribute('content');
  if ($tagName != "") {
  echo "$tagName == $content <br>\n";
  $agency->value->metaTags[$tagName] = $content;
  }
  }
  //print_r($agency->value->metaTags);
  $db->save($agency->value);
  echo "<hr>";
  flush();
  }
  }
  } catch (SetteeRestClientException $e) {
  setteErrorHandler($e);
  }
  ?>