<?php | <?php |
include_once("../include/common.inc.php"); | include_once("../include/common.inc.php"); |
require($basePath . 'lib/phpquery/phpQuery/phpQuery.php'); | require($basePath . 'lib/phpquery/phpQuery/phpQuery.php'); |
setlocale(LC_CTYPE, 'C'); | setlocale(LC_CTYPE, 'C'); |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
// metatags | |
try { | |
$agencies = $db->get_view("app", "byCanonicalName", null, true)->rows; | |
//print_r($rows); | |
foreach ($agencies as $agency) { | |
if (isset($agency->value->scrapeDepth)) { | |
unset($agency->value->scrapeDepth); | |
} | |
if (isset($agency->value->lastScraped)) { | |
unset($agency->value->lastScraped); | |
} | |
$db->save($agency->value); | |
echo "<hr>"; | |
flush(); | |
} | |
} catch (SetteeRestClientException $e) { | |
setteErrorHandler($e); | |
} | |
// metatags | |
try { | try { |
$agencies = $db->get_view("app", "byCanonicalName", null, true)->rows; | $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows; |
//print_r($rows); | //print_r($rows); |
foreach ($agencies as $agency) { | foreach ($agencies as $agency) { |
//echo $agency->value->name . " ".$agency->value->website."<br />\n"; | //echo $agency->value->name . " ".$agency->value->website."<br />\n"; |
// print_r($agency); | // print_r($agency); |
//hasRestricitiveLicence" hasRestrictiveLicense -> has Restrictive Licence | //hasRestricitiveLicence" hasRestrictiveLicense -> has Restrictive Licence |
// "hasYoutube" -> Tube | // "hasYoutube" -> Tube |
// "comment" -> "comments" | // "comment" -> "comments" |
if (!isset($agency->value->metaTags) && isset($agency->value->website)) { | if (!isset($agency->value->metaTags) && isset($agency->value->website)) { |
echo $agency->value->name . " ".$agency->value->website."<br />\n"; | echo $agency->value->name . " " . $agency->value->website . "<br />\n"; |
$agency->value->metaTags = Array(); | $agency->value->metaTags = Array(); |
$request = Requests::get($agency->value->website); | $request = Requests::get($agency->value->website); |
$html = phpQuery::newDocumentHTML($request->body); | $html = phpQuery::newDocumentHTML($request->body); |
phpQuery::selectDocument($html); | phpQuery::selectDocument($html); |
foreach (pq('meta')->elements as $meta) { | foreach (pq('meta')->elements as $meta) { |
$tagName = $meta->getAttribute('name');; | $tagName = $meta->getAttribute('name'); |
; | |
$content = $meta->getAttribute('content'); | $content = $meta->getAttribute('content'); |
if ($tagName != "") { | if ($tagName != "") { |
echo "$tagName == $content <br>\n"; | echo "$tagName == $content <br>\n"; |
$agency->value->metaTags[$tagName] = $content; | $agency->value->metaTags[$tagName] = $content; |
} | } |
} | } |
//print_r($agency->value->metaTags); | //print_r($agency->value->metaTags); |
$db->save($agency->value); | $db->save($agency->value); |
echo "<hr>"; | echo "<hr>"; |
flush(); | flush(); |
} | } |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
?> | ?> |
<?php | <?php |
require_once '../include/common.inc.php'; | require_once '../include/common.inc.php'; |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
$rows = $db->get_view("app", "byName")->rows; | $rows = $db->get_view("app", "byName")->rows; |
$nametoid = Array(); | $nametoid = Array(); |
$accounts = Array(); | $accounts = Array(); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
$nametoid[trim($row->key)] = $row->value; | $nametoid[trim($row->key)] = $row->value; |
} | } |
function extractCSVAccounts($url, $nameField, $accountField, $filter) { | function extractCSVAccounts($url, $nameField, $accountField, $filter) { |
global $accounts, $nametoid; | global $accounts, $nametoid; |
$request = Requests::get($url); | $request = Requests::get($url); |
echo $url; | echo $url; |
$Data = str_getcsv($request->body, "\n"); //parse the rows | $Data = str_getcsv($request->body, "\n"); //parse the rows |
$headers = Array(); | $headers = Array(); |
foreach ($Data as $num => $line) { | foreach ($Data as $num => $line) { |
$Row = str_getcsv($line, ","); | $Row = str_getcsv($line, ","); |
if ($num == 0) { | if ($num == 0) { |
$headers = $Row; | $headers = $Row; |
print_r($headers); | print_r($headers); |
} else { | } else { |
if (isset($Row[array_search($nameField, $headers)])) { | if (isset($Row[array_search($nameField, $headers)])) { |
$agencyName = $Row[array_search($nameField, $headers)]; | $agencyName = $Row[array_search($nameField, $headers)]; |
if (!in_array(trim($agencyName), array_keys($nametoid))) { | if (!in_array(trim($agencyName), array_keys($nametoid))) { |
echo "$agencyName missing" . PHP_EOL; | echo "$agencyName missing" . PHP_EOL; |
} else { | } else { |
echo $Row[array_search($nameField, $headers)] . PHP_EOL; | echo $Row[array_search($nameField, $headers)] . PHP_EOL; |
$accounts[$nametoid[trim($agencyName)]]["rtkURLs"][$agencyName] = 'http://www.righttoknow.org.au/body/'.$Row[array_search($accountField, $headers)]; | $accounts[$nametoid[trim($agencyName)]]["rtkURLs"][$agencyName] = 'http://www.righttoknow.org.au/body/'.$Row[array_search($accountField, $headers)]; |
$accounts[$nametoid[trim($agencyName)]]["rtkDescriptions"][$agencyName] = $Row[array_search("Notes", $headers)]; | |
} | } |
} else { | } else { |
echo "error finding any agency" . $line . PHP_EOL; | echo "error finding any agency" . $line . PHP_EOL; |
} | } |
} | } |
} | } |
} | } |
extractCSVAccounts("http://www.righttoknow.org.au/body/all-authorities.csv","Agency","URL name"); | extractCSVAccounts("http://www.righttoknow.org.au/body/all-authorities.csv","Agency","URL name"); |
print_r($accounts); | //print_r($accounts); |
/* foreach ($accounts as $id => $accountTypes) { | foreach ($accounts as $id => $allvalues) { |
echo $id . "<br>" . PHP_EOL; | echo $id . "<br>" . PHP_EOL; |
$doc = object_to_array($db->get($id)); | $doc = object_to_array($db->get($id)); |
// print_r($doc); | // print_r($doc); |
foreach ($accountTypes as $accountType => $accounts) { | foreach ($allvalues as $valueType => $values) { |
if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) { | if (!isset($doc[ $valueType]) || !is_array($doc[ $valueType])) { |
$doc["has" . $accountType] = Array(); | $doc[ $valueType] = Array(); |
} | } |
$doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts)); | $doc[ $valueType] = array_unique(array_merge($doc[ $valueType], $values)); |
if ( $valueType == "rtkDescriptions") { | |
foreach ($values as $descriptionAgency => $descriptionValue) { | |
if ($descriptionAgency == $doc->value->name) { | |
$doc->value->description = $descriptionValue; | |
} | |
} | |
} | |
} | } |
$db->save($doc); | $db->save($doc); |
}*/ | } |
?> | ?> |
<?php | <?php |
/** | /** |
* Databaase class. | * Databaase class. |
*/ | */ |
class SetteeDatabase { | class SetteeDatabase { |
/** | /** |
* Base URL of the CouchDB REST API | * Base URL of the CouchDB REST API |
*/ | */ |
private $conn_url; | private $conn_url; |
/** | /** |
* HTTP REST Client instance | * HTTP REST Client instance |
*/ | */ |
protected $rest_client; | protected $rest_client; |
/** | /** |
* Name of the database | * Name of the database |
*/ | */ |
private $dbname; | private $dbname; |
/** | /** |
* Default constructor | * Default constructor |
*/ | */ |
function __construct($conn_url, $dbname) { | function __construct($conn_url, $dbname) { |
$this->conn_url = $conn_url; | $this->conn_url = $conn_url; |
$this->dbname = $dbname; | $this->dbname = $dbname; |
$this->rest_client = SetteeRestClient::get_instance($this->conn_url); | $this->rest_client = SetteeRestClient::get_instance($this->conn_url); |
} | } |
/** | |
/** | * Get UUID from CouchDB |
* Get UUID from CouchDB | * |
* | * @return |
* @return | * CouchDB-generated UUID string |
* CouchDB-generated UUID string | * |
* | */ |
*/ | function gen_uuid() { |
function gen_uuid() { | $ret = $this->rest_client->http_get('_uuids'); |
$ret = $this->rest_client->http_get('_uuids'); | return $ret['decoded']->uuids[0]; // should never be empty at this point, so no checking |
return $ret['decoded']->uuids[0]; // should never be empty at this point, so no checking | } |
} | |
/** | |
/** | * Create or update a document database |
* Create or update a document database | * |
* | * @param $document |
* @param $document | * PHP object, a PHP associative array, or a JSON String representing the document to be saved. PHP Objects and arrays are JSON-encoded automatically. |
* PHP object, a PHP associative array, or a JSON String representing the document to be saved. PHP Objects and arrays are JSON-encoded automatically. | * |
* | * <p>If $document has a an "_id" property set, it will be used as document's unique id (even for "create" operation). |
* <p>If $document has a an "_id" property set, it will be used as document's unique id (even for "create" operation). | * If "_id" is missing, CouchDB will be used to generate a UUID. |
* If "_id" is missing, CouchDB will be used to generate a UUID. | * |
* | * <p>If $document has a "_rev" property (revision), document will be updated, rather than creating a new document. |
* <p>If $document has a "_rev" property (revision), document will be updated, rather than creating a new document. | * You have to provide "_rev" if you want to update an existing document, otherwise operation will be assumed to be |
* You have to provide "_rev" if you want to update an existing document, otherwise operation will be assumed to be | * one of creation and you will get a duplicate document exception from CouchDB. Also, you may not provide "_rev" but |
* one of creation and you will get a duplicate document exception from CouchDB. Also, you may not provide "_rev" but | * not provide "_id" since that is an invalid input. |
* not provide "_id" since that is an invalid input. | * |
* | * @param $allowRevAutoDetection |
* @param $allowRevAutoDetection | * Default: false. When true and _rev is missing from the document, save() function will auto-detect latest revision |
* Default: false. When true and _rev is missing from the document, save() function will auto-detect latest revision | * for a document and use it. This option is "false" by default because it involves an extra http HEAD request and |
* for a document and use it. This option is "false" by default because it involves an extra http HEAD request and | * therefore can make save() operation slightly slower if such auto-detection is not required. |
* therefore can make save() operation slightly slower if such auto-detection is not required. | * |
* | * @return |
* @return | * document object with the database id (uuid) and revision attached; |
* document object with the database id (uuid) and revision attached; | * |
* | * @throws SetteeCreateDatabaseException |
* @throws SetteeCreateDatabaseException | */ |
*/ | function save($document, $allowRevAutoDetection = false) { |
function save($document, $allowRevAutoDetection = false) { | if (is_string($document)) { |
if (is_string($document)) { | $document = json_decode($document); |
$document = json_decode($document); | } |
} | |
// Allow passing of $document as an array (for syntactic simplicity and also because in JSON world it does not matter) | |
// Allow passing of $document as an array (for syntactic simplicity and also because in JSON world it does not matter) | if (is_array($document)) { |
if(is_array($document)) { | $document = (object) $document; |