RTK import
Former-commit-id: 831a25e0eea93541a7ab3816694f3feeda047778
--- a/admin/genericAgencyFixer.php
+++ b/admin/genericAgencyFixer.php
@@ -7,28 +7,48 @@
$db = $server->get_db('disclosr-agencies');
+// metatags
+try {
+ $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows;
+ //print_r($rows);
+ foreach ($agencies as $agency) {
+ if (isset($agency->value->scrapeDepth)) {
+ unset($agency->value->scrapeDepth);
+ }
+ if (isset($agency->value->lastScraped)) {
+ unset($agency->value->lastScraped);
+ }
+ $db->save($agency->value);
+ echo "<hr>";
+ flush();
+ }
+} catch (SetteeRestClientException $e) {
+ setteErrorHandler($e);
+}
+// metatags
try {
$agencies = $db->get_view("app", "byCanonicalName", null, true)->rows;
//print_r($rows);
foreach ($agencies as $agency) {
//echo $agency->value->name . " ".$agency->value->website."<br />\n";
- // print_r($agency);
+ // print_r($agency);
//hasRestricitiveLicence" hasRestrictiveLicense -> has Restrictive Licence
// "hasYoutube" -> Tube
// "comment" -> "comments"
if (!isset($agency->value->metaTags) && isset($agency->value->website)) {
- echo $agency->value->name . " ".$agency->value->website."<br />\n";
+ echo $agency->value->name . " " . $agency->value->website . "<br />\n";
$agency->value->metaTags = Array();
$request = Requests::get($agency->value->website);
$html = phpQuery::newDocumentHTML($request->body);
phpQuery::selectDocument($html);
foreach (pq('meta')->elements as $meta) {
- $tagName = $meta->getAttribute('name');;
+ $tagName = $meta->getAttribute('name');
+ ;
$content = $meta->getAttribute('content');
if ($tagName != "") {
-echo "$tagName == $content <br>\n";
- $agency->value->metaTags[$tagName] = $content;
+ echo "$tagName == $content <br>\n";
+ $agency->value->metaTags[$tagName] = $content;
}
}
//print_r($agency->value->metaTags);
--- a/admin/importRTKbodies.php
+++ b/admin/importRTKbodies.php
@@ -29,6 +29,7 @@
} else {
echo $Row[array_search($nameField, $headers)] . PHP_EOL;
$accounts[$nametoid[trim($agencyName)]]["rtkURLs"][$agencyName] = 'http://www.righttoknow.org.au/body/'.$Row[array_search($accountField, $headers)];
+ $accounts[$nametoid[trim($agencyName)]]["rtkDescriptions"][$agencyName] = $Row[array_search("Notes", $headers)];
}
} else {
echo "error finding any agency" . $line . PHP_EOL;
@@ -38,19 +39,26 @@
}
extractCSVAccounts("http://www.righttoknow.org.au/body/all-authorities.csv","Agency","URL name");
-print_r($accounts);
-/* foreach ($accounts as $id => $accountTypes) {
+//print_r($accounts);
+ foreach ($accounts as $id => $allvalues) {
echo $id . "<br>" . PHP_EOL;
$doc = object_to_array($db->get($id));
// print_r($doc);
- foreach ($accountTypes as $accountType => $accounts) {
- if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) {
- $doc["has" . $accountType] = Array();
+ foreach ($allvalues as $valueType => $values) {
+ if (!isset($doc[ $valueType]) || !is_array($doc[ $valueType])) {
+ $doc[ $valueType] = Array();
}
- $doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts));
+ $doc[ $valueType] = array_unique(array_merge($doc[ $valueType], $values));
+ if ( $valueType == "rtkDescriptions") {
+ foreach ($values as $descriptionAgency => $descriptionValue) {
+ if ($descriptionAgency == $doc->value->name) {
+ $doc->value->description = $descriptionValue;
+ }
+ }
+ }
}
$db->save($doc);
-}*/
+}
?>
--- a/couchdb/settee/src/classes/SetteeDatabase.class.php
+++ b/couchdb/settee/src/classes/SetteeDatabase.class.php
@@ -267,12 +267,11 @@
if ($startdocid != null) {
$data .= "&startkey_docid='$startdocid'";
}
- if ($reduce != null) {
- if ($reduce == true) {
+ if ($reduce === true) {
$data .= "&reduce=true";
- } else {
+ } else if ($reduce === false){
+
$data .= "&reduce=false";
- }
}
if ($limit) {
$data .= "&limit=" . $limit;
@@ -314,3 +313,4 @@
}
}
+
--- a/couchdb/settee/src/classes/SetteeRestClient.class.php
+++ b/couchdb/settee/src/classes/SetteeRestClient.class.php
@@ -244,3 +244,4 @@
}
class SetteeRestClientException extends Exception {}
+
--- a/documents/scrapers/0049d35216493c545ef5f7f000e6b252.py
+++ b/documents/scrapers/0049d35216493c545ef5f7f000e6b252.py
@@ -2,7 +2,14 @@
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
import genericScrapers
-
+import traceback
+try:
+ import amonpy
+ amonpy.config.address = 'http://amon_instance:port'
+ amonpy.config.secret_key = 'the secret key from /etc/amon.conf'
+ amon_available = True
+except ImportError:
+ amon_available = False
class ScraperImplementation(genericScrapers.GenericPDFDisclogScraper):
@@ -15,5 +22,27 @@
genericScrapers.GenericPDFDisclogScraper)
print 'Instance:', isinstance(ScraperImplementation(),
genericScrapers.GenericPDFDisclogScraper)
- ScraperImplementation().doScrape()
+ try:
+ ScraperImplementation().doScrape()
+ except Exception, err:
+ sys.stderr.write('ERROR: %s\n' % str(err))
+ print ‘Error Reason: ‘, err.__doc__
+ print ‘Exception: ‘, err.__class__
+ print traceback.format_exc()
+ if amon_available:
+ data = {
+ 'exception_class': '',
+ 'url': '',
+ 'backtrace': ['exception line ', 'another exception line'],
+ 'enviroment': '',
+
+ # In 'data' you can add request information, session variables - it's a recursive
+ # dictionary, so you can literally add everything important for your specific case
+ # The dictionary doesn't have a specified structure, the keys below are only example
+ 'data': {'request': '', 'session': '', 'more': ''}
+ }
+
+ amonpy.exception(data)
+ pass
+
--- a/documents/template.inc.php
+++ b/documents/template.inc.php
@@ -157,8 +157,8 @@
function displayLogEntry($row, $idtoname) {
$result = "";
$result .= '<div itemscope itemtype="http://schema.org/Article">';
- $result .= '<h2> <span itemprop="datePublished">' . $row->value->date . "</span>: <span itemprop='name headline'>" . truncate($row->value->title, 120) . "</span>";
- $result .= ' (<span itemprop="author publisher creator">' . $idtoname[$row->value->agencyID] . '</span>)</h2>';
+ $result .= '<h2><a href="http://disclosurelo.gs/view.php?id='.$row->value->_id.'"> <span itemprop="datePublished">' . $row->value->date . "</span>: <span itemprop='name headline'>" . truncate($row->value->title, 120) . "</span>";
+ $result .= ' (<span itemprop="author publisher creator">' . $idtoname[$row->value->agencyID] . '</span>)</a></h2>';
$result .= "<p itemprop='description articleBody text'> Title: " . $row->value->title . "<br/>";
if (isset($row->value->description)) {
$result .= str_replace("\n", "<br>", preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "",trim($row->value->description)));
--- a/getAgency.php
+++ b/getAgency.php
@@ -5,11 +5,11 @@
function displayValue($key, $value, $mode) {
global $db, $schemas;
if ($mode == "view") {
- if (strpos($key, "_") === 0 || $key == "metadata")
+ if (strpos($key, "_") === 0 || $key == "metadata" || $key == "metaTags" || $key == "statistics")
return;
echo "<tr>";
- echo "<td>";
+ echo "<td class='$key'>";
if (isset($schemas['agency']["properties"][$key])) {
echo $schemas['agency']["properties"][$key]['x-title'] . "<br><small>" . $schemas['agency']["properties"][$key]['description'] . "</small>";
}
--- a/include/common.inc.php
+++ b/include/common.inc.php
@@ -9,6 +9,7 @@
|| strstr($_SERVER['PHP_SELF'], "include/")
|| strstr($_SERVER['PHP_SELF'], "documents/")
|| $_SERVER['SERVER_NAME'] == "disclosurelo.gs"
+ || $_SERVER['SERVER_NAME'] == "www.disclosurelo.gs"
)
$basePath = "../";