<?php | <?php |
include_once("../include/common.inc.php"); | include_once("../include/common.inc.php"); |
require($basePath . 'lib/phpquery/phpQuery/phpQuery.php'); | require($basePath . 'lib/phpquery/phpQuery/phpQuery.php'); |
setlocale(LC_CTYPE, 'C'); | setlocale(LC_CTYPE, 'C'); |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
// metatags | |
try { | |
$agencies = $db->get_view("app", "byCanonicalName", null, true)->rows; | |
//print_r($rows); | |
foreach ($agencies as $agency) { | |
if (isset($agency->value->scrapeDepth)) { | |
unset($agency->value->scrapeDepth); | |
} | |
if (isset($agency->value->lastScraped)) { | |
unset($agency->value->lastScraped); | |
} | |
$db->save($agency->value); | |
echo "<hr>"; | |
flush(); | |
} | |
} catch (SetteeRestClientException $e) { | |
setteErrorHandler($e); | |
} | |
// metatags | |
try { | try { |
$agencies = $db->get_view("app", "byCanonicalName", null, true)->rows; | $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows; |
//print_r($rows); | //print_r($rows); |
foreach ($agencies as $agency) { | foreach ($agencies as $agency) { |
//echo $agency->value->name . " ".$agency->value->website."<br />\n"; | //echo $agency->value->name . " ".$agency->value->website."<br />\n"; |
// print_r($agency); | // print_r($agency); |
//hasRestricitiveLicence" hasRestrictiveLicense -> has Restrictive Licence | //hasRestricitiveLicence" hasRestrictiveLicense -> has Restrictive Licence |
// "hasYoutube" -> Tube | // "hasYoutube" -> Tube |
// "comment" -> "comments" | // "comment" -> "comments" |
if (!isset($agency->value->metaTags) && isset($agency->value->website)) { | if (!isset($agency->value->metaTags) && isset($agency->value->website)) { |
echo $agency->value->name . " ".$agency->value->website."<br />\n"; | echo $agency->value->name . " " . $agency->value->website . "<br />\n"; |
$agency->value->metaTags = Array(); | $agency->value->metaTags = Array(); |
$request = Requests::get($agency->value->website); | $request = Requests::get($agency->value->website); |
$html = phpQuery::newDocumentHTML($request->body); | $html = phpQuery::newDocumentHTML($request->body); |
phpQuery::selectDocument($html); | phpQuery::selectDocument($html); |
foreach (pq('meta')->elements as $meta) { | foreach (pq('meta')->elements as $meta) { |
$tagName = $meta->getAttribute('name');; | $tagName = $meta->getAttribute('name'); |
; | |
$content = $meta->getAttribute('content'); | $content = $meta->getAttribute('content'); |
if ($tagName != "") { | if ($tagName != "") { |
echo "$tagName == $content <br>\n"; | echo "$tagName == $content <br>\n"; |
$agency->value->metaTags[$tagName] = $content; | $agency->value->metaTags[$tagName] = $content; |
} | } |
} | } |
//print_r($agency->value->metaTags); | //print_r($agency->value->metaTags); |
$db->save($agency->value); | $db->save($agency->value); |
echo "<hr>"; | echo "<hr>"; |
flush(); | flush(); |
} | } |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
?> | ?> |
<?php | <?php |
require_once '../include/common.inc.php'; | require_once '../include/common.inc.php'; |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
$rows = $db->get_view("app", "byName")->rows; | $rows = $db->get_view("app", "byName")->rows; |
$nametoid = Array(); | $nametoid = Array(); |
$accounts = Array(); | $accounts = Array(); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
$nametoid[trim($row->key)] = $row->value; | $nametoid[trim($row->key)] = $row->value; |
} | } |
function extractCSVAccounts($url, $nameField, $accountField, $filter) { | function extractCSVAccounts($url, $nameField, $accountField, $filter) { |
global $accounts, $nametoid; | global $accounts, $nametoid; |
$request = Requests::get($url); | $request = Requests::get($url); |
echo $url; | echo $url; |
$Data = str_getcsv($request->body, "\n"); //parse the rows | $Data = str_getcsv($request->body, "\n"); //parse the rows |
$headers = Array(); | $headers = Array(); |
foreach ($Data as $num => $line) { | foreach ($Data as $num => $line) { |
$Row = str_getcsv($line, ","); | $Row = str_getcsv($line, ","); |
if ($num == 0) { | if ($num == 0) { |
$headers = $Row; | $headers = $Row; |
print_r($headers); | print_r($headers); |
} else { | } else { |
if (isset($Row[array_search($nameField, $headers)])) { | if (isset($Row[array_search($nameField, $headers)])) { |
$agencyName = $Row[array_search($nameField, $headers)]; | $agencyName = $Row[array_search($nameField, $headers)]; |
if (!in_array(trim($agencyName), array_keys($nametoid))) { | if (!in_array(trim($agencyName), array_keys($nametoid))) { |
echo "$agencyName missing" . PHP_EOL; | echo "$agencyName missing" . PHP_EOL; |
} else { | } else { |
echo $Row[array_search($nameField, $headers)] . PHP_EOL; | echo $Row[array_search($nameField, $headers)] . PHP_EOL; |
$accounts[$nametoid[trim($agencyName)]]["rtkURLs"][$agencyName] = 'http://www.righttoknow.org.au/body/'.$Row[array_search($accountField, $headers)]; | $accounts[$nametoid[trim($agencyName)]]["rtkURLs"][$agencyName] = 'http://www.righttoknow.org.au/body/'.$Row[array_search($accountField, $headers)]; |
$accounts[$nametoid[trim($agencyName)]]["rtkDescriptions"][$agencyName] = $Row[array_search("Notes", $headers)]; | |
} | } |
} else { | } else { |
echo "error finding any agency" . $line . PHP_EOL; | echo "error finding any agency" . $line . PHP_EOL; |
} | } |
} | } |
} | } |
} | } |
extractCSVAccounts("http://www.righttoknow.org.au/body/all-authorities.csv","Agency","URL name"); | extractCSVAccounts("http://www.righttoknow.org.au/body/all-authorities.csv","Agency","URL name"); |
print_r($accounts); | //print_r($accounts); |
/* foreach ($accounts as $id => $accountTypes) { | foreach ($accounts as $id => $allvalues) { |
echo $id . "<br>" . PHP_EOL; | echo $id . "<br>" . PHP_EOL; |
$doc = object_to_array($db->get($id)); | $doc = object_to_array($db->get($id)); |
// print_r($doc); | // print_r($doc); |
foreach ($accountTypes as $accountType => $accounts) { | foreach ($allvalues as $valueType => $values) { |
if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) { | if (!isset($doc[ $valueType]) || !is_array($doc[ $valueType])) { |
$doc["has" . $accountType] = Array(); | $doc[ $valueType] = Array(); |
} | } |
$doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts)); | $doc[ $valueType] = array_unique(array_merge($doc[ $valueType], $values)); |
if ( $valueType == "rtkDescriptions") { | |
foreach ($values as $descriptionAgency => $descriptionValue) { | |
if ($descriptionAgency == $doc->value->name) { | |
$doc->value->description = $descriptionValue; | |
} | |
} | |
} | |
} | } |
$db->save($doc); | $db->save($doc); |
}*/ | } |
?> | ?> |
<?php | <?php |
require_once '../include/common.inc.php'; | require_once '../include/common.inc.php'; |
//function createFOIDocumentsDesignDoc() { | //function createFOIDocumentsDesignDoc() { |
$foidb = $server->get_db('disclosr-foidocuments'); | $foidb = $server->get_db('disclosr-foidocuments'); |
$obj = new stdClass(); | $obj = new stdClass(); |
$obj->_id = "_design/" . urlencode("app"); | $obj->_id = "_design/" . urlencode("app"); |
$obj->language = "javascript"; | $obj->language = "javascript"; |
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; | $obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; |
$obj->views->byDate->map = "function(doc) { emit(doc.date, doc); };"; | $obj->views->byDate->map = "function(doc) { emit(doc.date, doc); };"; |
$obj->views->byDate->reduce = "_count"; | |
$obj->views->byDateMonthYear->map = "function(doc) { emit(doc.date, doc); };"; | $obj->views->byDateMonthYear->map = "function(doc) { emit(doc.date, doc); };"; |
$obj->views->byDateMonthYear->reduce = "_count"; | $obj->views->byDateMonthYear->reduce = "_count"; |
$obj->views->byAgencyID->map = "function(doc) { emit(doc.agencyID, doc); };"; | $obj->views->byAgencyID->map = "function(doc) { emit(doc.agencyID, doc); };"; |
$obj->views->byAgencyID->reduce = "_count"; | $obj->views->byAgencyID->reduce = "_count"; |
// allow safe updates (even if slightly slower due to extra: rev-detection check). | // allow safe updates (even if slightly slower due to extra: rev-detection check). |
$foidb->save($obj, true); | $foidb->save($obj, true); |
//function createDocumentsDesignDoc() { | //function createDocumentsDesignDoc() { |
$docdb = $server->get_db('disclosr-documents'); | $docdb = $server->get_db('disclosr-documents'); |
$obj = new stdClass(); | $obj = new stdClass(); |
$obj->_id = "_design/" . urlencode("app"); | $obj->_id = "_design/" . urlencode("app"); |
$obj->language = "javascript"; | $obj->language = "javascript"; |
$obj->views->web_server->map = "function(doc) {\n emit(doc.web_server, 1);\n}"; | $obj->views->web_server->map = "function(doc) {\n emit(doc.web_server, 1);\n}"; |
$obj->views->web_server->reduce = "function (key, values, rereduce) {\n return sum(values);\n}"; | $obj->views->web_server->reduce = "function (key, values, rereduce) {\n return sum(values);\n}"; |
$obj->views->byAgency->map = "function(doc) {\n emit(doc.agencyID, 1);\n}"; | $obj->views->byAgency->map = "function(doc) {\n emit(doc.agencyID, 1);\n}"; |
$obj->views->byAgency->reduce = "function (key, values, rereduce) {\n return sum(values);\n}"; | $obj->views->byAgency->reduce = "function (key, values, rereduce) {\n return sum(values);\n}"; |
$obj->views->byURL->map = "function(doc) {\n emit(doc.url, doc);\n}"; | $obj->views->byURL->map = "function(doc) {\n emit(doc.url, doc);\n}"; |
$obj->views->agency->map = "function(doc) {\n emit(doc.agencyID, doc);\n}"; | $obj->views->agency->map = "function(doc) {\n emit(doc.agencyID, doc);\n}"; |
$obj->views->byWebServer->map = "function(doc) {\n emit(doc.web_server, doc);\n}"; | $obj->views->byWebServer->map = "function(doc) {\n emit(doc.web_server, doc);\n}"; |
$obj->views->getValidationRequired = "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}"; | $obj->views->getValidationRequired = "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}"; |
//function createAgencyDesignDoc() { | //function createAgencyDesignDoc() { |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
$obj = new stdClass(); | $obj = new stdClass(); |
$obj->_id = "_design/" . urlencode("app"); | $obj->_id = "_design/" . urlencode("app"); |
$obj->language = "javascript"; | $obj->language = "javascript"; |
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; | $obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; |
$obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };"; | $obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };"; |