RTK import
RTK import


Former-commit-id: 831a25e0eea93541a7ab3816694f3feeda047778

<?php <?php
   
include_once("../include/common.inc.php"); include_once("../include/common.inc.php");
require($basePath . 'lib/phpquery/phpQuery/phpQuery.php'); require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
   
setlocale(LC_CTYPE, 'C'); setlocale(LC_CTYPE, 'C');
   
   
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
  // metatags
  try {
  $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows;
  //print_r($rows);
  foreach ($agencies as $agency) {
  if (isset($agency->value->scrapeDepth)) {
  unset($agency->value->scrapeDepth);
  }
   
  if (isset($agency->value->lastScraped)) {
  unset($agency->value->lastScraped);
  }
  $db->save($agency->value);
  echo "<hr>";
  flush();
  }
  } catch (SetteeRestClientException $e) {
  setteErrorHandler($e);
  }
  // metatags
try { try {
$agencies = $db->get_view("app", "byCanonicalName", null, true)->rows; $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows;
//print_r($rows); //print_r($rows);
foreach ($agencies as $agency) { foreach ($agencies as $agency) {
//echo $agency->value->name . " ".$agency->value->website."<br />\n"; //echo $agency->value->name . " ".$agency->value->website."<br />\n";
// print_r($agency); // print_r($agency);
//hasRestricitiveLicence" hasRestrictiveLicense -> has Restrictive Licence //hasRestricitiveLicence" hasRestrictiveLicense -> has Restrictive Licence
// "hasYoutube" -> Tube // "hasYoutube" -> Tube
// "comment" -> "comments" // "comment" -> "comments"
if (!isset($agency->value->metaTags) && isset($agency->value->website)) { if (!isset($agency->value->metaTags) && isset($agency->value->website)) {
echo $agency->value->name . " ".$agency->value->website."<br />\n"; echo $agency->value->name . " " . $agency->value->website . "<br />\n";
$agency->value->metaTags = Array(); $agency->value->metaTags = Array();
$request = Requests::get($agency->value->website); $request = Requests::get($agency->value->website);
$html = phpQuery::newDocumentHTML($request->body); $html = phpQuery::newDocumentHTML($request->body);
phpQuery::selectDocument($html); phpQuery::selectDocument($html);
foreach (pq('meta')->elements as $meta) { foreach (pq('meta')->elements as $meta) {
$tagName = $meta->getAttribute('name');; $tagName = $meta->getAttribute('name');
  ;
$content = $meta->getAttribute('content'); $content = $meta->getAttribute('content');
if ($tagName != "") { if ($tagName != "") {
echo "$tagName == $content <br>\n"; echo "$tagName == $content <br>\n";
$agency->value->metaTags[$tagName] = $content; $agency->value->metaTags[$tagName] = $content;
} }
} }
//print_r($agency->value->metaTags); //print_r($agency->value->metaTags);
$db->save($agency->value); $db->save($agency->value);
echo "<hr>"; echo "<hr>";
flush(); flush();
} }
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
?> ?>
   
<?php <?php
   
require_once '../include/common.inc.php'; require_once '../include/common.inc.php';
   
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
$rows = $db->get_view("app", "byName")->rows; $rows = $db->get_view("app", "byName")->rows;
$nametoid = Array(); $nametoid = Array();
$accounts = Array(); $accounts = Array();
foreach ($rows as $row) { foreach ($rows as $row) {
$nametoid[trim($row->key)] = $row->value; $nametoid[trim($row->key)] = $row->value;
} }
   
function extractCSVAccounts($url, $nameField, $accountField, $filter) { function extractCSVAccounts($url, $nameField, $accountField, $filter) {
global $accounts, $nametoid; global $accounts, $nametoid;
$request = Requests::get($url); $request = Requests::get($url);
echo $url; echo $url;
$Data = str_getcsv($request->body, "\n"); //parse the rows $Data = str_getcsv($request->body, "\n"); //parse the rows
$headers = Array(); $headers = Array();
foreach ($Data as $num => $line) { foreach ($Data as $num => $line) {
$Row = str_getcsv($line, ","); $Row = str_getcsv($line, ",");
if ($num == 0) { if ($num == 0) {
$headers = $Row; $headers = $Row;
print_r($headers); print_r($headers);
} else { } else {
if (isset($Row[array_search($nameField, $headers)])) { if (isset($Row[array_search($nameField, $headers)])) {
$agencyName = $Row[array_search($nameField, $headers)]; $agencyName = $Row[array_search($nameField, $headers)];
if (!in_array(trim($agencyName), array_keys($nametoid))) { if (!in_array(trim($agencyName), array_keys($nametoid))) {
echo "$agencyName missing" . PHP_EOL; echo "$agencyName missing" . PHP_EOL;
} else { } else {
echo $Row[array_search($nameField, $headers)] . PHP_EOL; echo $Row[array_search($nameField, $headers)] . PHP_EOL;
$accounts[$nametoid[trim($agencyName)]]["rtkURLs"][$agencyName] = 'http://www.righttoknow.org.au/body/'.$Row[array_search($accountField, $headers)]; $accounts[$nametoid[trim($agencyName)]]["rtkURLs"][$agencyName] = 'http://www.righttoknow.org.au/body/'.$Row[array_search($accountField, $headers)];
  $accounts[$nametoid[trim($agencyName)]]["rtkDescriptions"][$agencyName] = $Row[array_search("Notes", $headers)];
} }
} else { } else {
echo "error finding any agency" . $line . PHP_EOL; echo "error finding any agency" . $line . PHP_EOL;
} }
} }
} }
} }
   
extractCSVAccounts("http://www.righttoknow.org.au/body/all-authorities.csv","Agency","URL name"); extractCSVAccounts("http://www.righttoknow.org.au/body/all-authorities.csv","Agency","URL name");
print_r($accounts); //print_r($accounts);
/* foreach ($accounts as $id => $accountTypes) { foreach ($accounts as $id => $allvalues) {
echo $id . "<br>" . PHP_EOL; echo $id . "<br>" . PHP_EOL;
$doc = object_to_array($db->get($id)); $doc = object_to_array($db->get($id));
// print_r($doc); // print_r($doc);
   
foreach ($accountTypes as $accountType => $accounts) { foreach ($allvalues as $valueType => $values) {
if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) { if (!isset($doc[ $valueType]) || !is_array($doc[ $valueType])) {
$doc["has" . $accountType] = Array(); $doc[ $valueType] = Array();
} }
$doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts)); $doc[ $valueType] = array_unique(array_merge($doc[ $valueType], $values));
  if ( $valueType == "rtkDescriptions") {
  foreach ($values as $descriptionAgency => $descriptionValue) {
  if ($descriptionAgency == $doc->value->name) {
  $doc->value->description = $descriptionValue;
  }
  }
  }
} }
$db->save($doc); $db->save($doc);
}*/ }
?> ?>
   
<?php <?php
   
/** /**
* Databaase class. * Databaase class.
*/ */
class SetteeDatabase { class SetteeDatabase {
   
/** /**
* Base URL of the CouchDB REST API * Base URL of the CouchDB REST API
*/ */
private $conn_url; private $conn_url;
   
/** /**
* HTTP REST Client instance * HTTP REST Client instance
*/ */
protected $rest_client; protected $rest_client;
   
/** /**
* Name of the database * Name of the database
*/ */
private $dbname; private $dbname;
   
/** /**
* Default constructor * Default constructor
*/ */
function __construct($conn_url, $dbname) { function __construct($conn_url, $dbname) {
$this->conn_url = $conn_url; $this->conn_url = $conn_url;
$this->dbname = $dbname; $this->dbname = $dbname;
$this->rest_client = SetteeRestClient::get_instance($this->conn_url); $this->rest_client = SetteeRestClient::get_instance($this->conn_url);
} }
   
  /**
/** * Get UUID from CouchDB
* Get UUID from CouchDB *
* * @return
* @return * CouchDB-generated UUID string
* CouchDB-generated UUID string *
* */
*/ function gen_uuid() {
function gen_uuid() { $ret = $this->rest_client->http_get('_uuids');
$ret = $this->rest_client->http_get('_uuids'); return $ret['decoded']->uuids[0]; // should never be empty at this point, so no checking
return $ret['decoded']->uuids[0]; // should never be empty at this point, so no checking }
}  
  /**
/** * Create or update a document database
* Create or update a document database *
* * @param $document
* @param $document * PHP object, a PHP associative array, or a JSON String representing the document to be saved. PHP Objects and arrays are JSON-encoded automatically.
* PHP object, a PHP associative array, or a JSON String representing the document to be saved. PHP Objects and arrays are JSON-encoded automatically. *
* * <p>If $document has a an "_id" property set, it will be used as document's unique id (even for "create" operation).
* <p>If $document has a an "_id" property set, it will be used as document's unique id (even for "create" operation). * If "_id" is missing, CouchDB will be used to generate a UUID.
* If "_id" is missing, CouchDB will be used to generate a UUID. *
* * <p>If $document has a "_rev" property (revision), document will be updated, rather than creating a new document.
* <p>If $document has a "_rev" property (revision), document will be updated, rather than creating a new document. * You have to provide "_rev" if you want to update an existing document, otherwise operation will be assumed to be
* You have to provide "_rev" if you want to update an existing document, otherwise operation will be assumed to be * one of creation and you will get a duplicate document exception from CouchDB. Also, you may not provide "_rev" but
* one of creation and you will get a duplicate document exception from CouchDB. Also, you may not provide "_rev" but * not provide "_id" since that is an invalid input.
* not provide "_id" since that is an invalid input. *
* * @param $allowRevAutoDetection
* @param $allowRevAutoDetection * Default: false. When true and _rev is missing from the document, save() function will auto-detect latest revision
* Default: false. When true and _rev is missing from the document, save() function will auto-detect latest revision * for a document and use it. This option is "false" by default because it involves an extra http HEAD request and
* for a document and use it. This option is "false" by default because it involves an extra http HEAD request and * therefore can make save() operation slightly slower if such auto-detection is not required.
* therefore can make save() operation slightly slower if such auto-detection is not required. *
* * @return
* @return * document object with the database id (uuid) and revision attached;
* document object with the database id (uuid) and revision attached; *
* * @throws SetteeCreateDatabaseException
* @throws SetteeCreateDatabaseException */
*/ function save($document, $allowRevAutoDetection = false) {
function save($document, $allowRevAutoDetection = false) { if (is_string($document)) {
if (is_string($document)) { $document = json_decode($document);
$document = json_decode($document); }
}  
  // Allow passing of $document as an array (for syntactic simplicity and also because in JSON world it does not matter)
// Allow passing of $document as an array (for syntactic simplicity and also because in JSON world it does not matter) if (is_array($document)) {
if(is_array($document)) { $document = (object) $document;
$document = (object) $document; }
}  
  if (empty($document->_id) && empty($document->_rev)) {
if (empty($document->_id) && empty($document->_rev)) { $id = $this->gen_uuid();
$id = $this->gen_uuid(); } elseif (empty($document->_id) && !empty($document->_rev)) {
} throw new SetteeWrongInputException("Error: You can not save a document with a revision provided, but missing id");
elseif (empty($document->_id) && !empty($document->_rev)) { } else {
throw new SetteeWrongInputException("Error: You can not save a document with a revision provided, but missing id"); $id = $document->_id;
}  
else { if ($allowRevAutoDetection) {
$id = $document->_id; try {
  $rev = $this->get_rev($id);
if ($allowRevAutoDetection) { } catch (SetteeRestClientException $e) {
try { // auto-detection may fail legitimately, if a document has never been saved before (new doc), so skipping error
$rev = $this->get_rev($id); }
} catch (SetteeRestClientException $e) { if (!empty($rev)) {
// auto-detection may fail legitimately, if a document has never been saved before (new doc), so skipping error $document->_rev = $rev;
} }
if (!empty($rev)) { }
$document->_rev = $rev; }
}  
} $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
} $document_json = json_encode($document, JSON_NUMERIC_CHECK);
   
$full_uri = $this->dbname . "/" . $this->safe_urlencode($id); $ret = $this->rest_client->http_put($full_uri, $document_json);
$document_json = json_encode($document, JSON_NUMERIC_CHECK);  
  $document->_id = $ret['decoded']->id;
$ret = $this->rest_client->http_put($full_uri, $document_json); $document->_rev = $ret['decoded']->rev;
   
$document->_id = $ret['decoded']->id; return $document;
$document->_rev = $ret['decoded']->rev; }
   
return $document; /**
} * @param $doc
  * @param $name
/** * @param $content
* @param $doc * Content of the attachment in a string-buffer format. This function will automatically base64-encode content for
* @param $name * you, so you don't have to do it.
* @param $content * @param $mime_type
* Content of the attachment in a string-buffer format. This function will automatically base64-encode content for * Optional. Will be auto-detected if not provided
* you, so you don't have to do it. * @return void
* @param $mime_type */
* Optional. Will be auto-detected if not provided public function add_attachment($doc, $name, $content, $mime_type = null) {
* @return void if (empty($doc->_attachments) || !is_object($doc->_attachments)) {
*/ $doc->_attachments = new stdClass();
public function add_attachment($doc, $name, $content, $mime_type = null) { }
if (empty($doc->_attachments) || !is_object($doc->_attachments)) {  
$doc->_attachments = new stdClass(); if (empty($mime_type)) {
} $mime_type = $this->rest_client->content_mime_type($content);
  }
if (empty($mime_type)) {  
$mime_type = $this->rest_client->content_mime_type($content); $doc->_attachments->$name = new stdClass();
} $doc->_attachments->$name->content_type = $mime_type;
  $doc->_attachments->$name->data = base64_encode($content);
$doc->_attachments->$name = new stdClass(); }
$doc->_attachments->$name->content_type = $mime_type;  
$doc->_attachments->$name->data = base64_encode($content); /**
} * @param $doc
  * @param $name
/** * @param $file
* @param $doc * Full path to a file (e.g. as returned by PHP's realpath function).
* @param $name * @param $mime_type
* @param $file * Optional. Will be auto-detected if not provided
* Full path to a file (e.g. as returned by PHP's realpath function). * @return void
* @param $mime_type */
* Optional. Will be auto-detected if not provided public function add_attachment_file($doc, $name, $file, $mime_type = null) {
* @return void $content = file_get_contents($file);
*/ $this->add_attachment($doc, $name, $content, $mime_type);
public function add_attachment_file($doc, $name, $file, $mime_type = null) { }
$content = file_get_contents($file);  
$this->add_attachment($doc, $name, $content, $mime_type); /**
} *
  * Retrieve a document from CouchDB
/** *
* * @throws SetteeWrongInputException
* Retrieve a document from CouchDB *
* * @param $id
* @throws SetteeWrongInputException * Unique ID (usually: UUID) of the document to be retrieved.
* * @return
* @param $id * database document in PHP object format.
* Unique ID (usually: UUID) of the document to be retrieved. */
* @return function get($id) {
* database document in PHP object format. if (empty($id)) {
*/ throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid.");
function get($id) { }
if (empty($id)) {  
throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid."); $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
} $full_uri = str_replace("%3Frev%3D", "?rev=", $full_uri);
  $ret = $this->rest_client->http_get($full_uri);
$full_uri = $this->dbname . "/" . $this->safe_urlencode($id); return $ret['decoded'];
$full_uri = str_replace("%3Frev%3D","?rev=",$full_uri); }
$ret = $this->rest_client->http_get($full_uri);  
return $ret['decoded']; /**
} *
  * Get the latest revision of a document with document id: $id in CouchDB.
/** *
* * @throws SetteeWrongInputException
* Get the latest revision of a document with document id: $id in CouchDB. *
* * @param $id
* @throws SetteeWrongInputException * Unique ID (usually: UUID) of the document to be retrieved.
* * @return
* @param $id * database document in PHP object format.
* Unique ID (usually: UUID) of the document to be retrieved. */
* @return function get_rev($id) {
* database document in PHP object format. if (empty($id)) {
*/ throw new SetteeWrongInputException("Error: Can't query a document without a uuid.");
function get_rev($id) { }
if (empty($id)) {