columns
Former-commit-id: 82edd5f41bab243828a5febd9e00b5fdb051dc86
--- a/admin/genericAgencyFixer.php
+++ b/admin/genericAgencyFixer.php
@@ -7,28 +7,48 @@
$db = $server->get_db('disclosr-agencies');
+// metatags
+try {
+ $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows;
+ //print_r($rows);
+ foreach ($agencies as $agency) {
+ if (isset($agency->value->scrapeDepth)) {
+ unset($agency->value->scrapeDepth);
+ }
+ if (isset($agency->value->lastScraped)) {
+ unset($agency->value->lastScraped);
+ }
+ $db->save($agency->value);
+ echo "<hr>";
+ flush();
+ }
+} catch (SetteeRestClientException $e) {
+ setteErrorHandler($e);
+}
+// metatags
try {
$agencies = $db->get_view("app", "byCanonicalName", null, true)->rows;
//print_r($rows);
foreach ($agencies as $agency) {
//echo $agency->value->name . " ".$agency->value->website."<br />\n";
- // print_r($agency);
+ // print_r($agency);
//hasRestricitiveLicence" hasRestrictiveLicense -> has Restrictive Licence
// "hasYoutube" -> Tube
// "comment" -> "comments"
if (!isset($agency->value->metaTags) && isset($agency->value->website)) {
- echo $agency->value->name . " ".$agency->value->website."<br />\n";
+ echo $agency->value->name . " " . $agency->value->website . "<br />\n";
$agency->value->metaTags = Array();
$request = Requests::get($agency->value->website);
$html = phpQuery::newDocumentHTML($request->body);
phpQuery::selectDocument($html);
foreach (pq('meta')->elements as $meta) {
- $tagName = $meta->getAttribute('name');;
+ $tagName = $meta->getAttribute('name');
+ ;
$content = $meta->getAttribute('content');
if ($tagName != "") {
-echo "$tagName == $content <br>\n";
- $agency->value->metaTags[$tagName] = $content;
+ echo "$tagName == $content <br>\n";
+ $agency->value->metaTags[$tagName] = $content;
}
}
//print_r($agency->value->metaTags);
--- a/admin/importRTKbodies.php
+++ b/admin/importRTKbodies.php
@@ -29,6 +29,7 @@
} else {
echo $Row[array_search($nameField, $headers)] . PHP_EOL;
$accounts[$nametoid[trim($agencyName)]]["rtkURLs"][$agencyName] = 'http://www.righttoknow.org.au/body/'.$Row[array_search($accountField, $headers)];
+ $accounts[$nametoid[trim($agencyName)]]["rtkDescriptions"][$agencyName] = $Row[array_search("Notes", $headers)];
}
} else {
echo "error finding any agency" . $line . PHP_EOL;
@@ -38,19 +39,26 @@
}
extractCSVAccounts("http://www.righttoknow.org.au/body/all-authorities.csv","Agency","URL name");
-print_r($accounts);
-/* foreach ($accounts as $id => $accountTypes) {
+//print_r($accounts);
+ foreach ($accounts as $id => $allvalues) {
echo $id . "<br>" . PHP_EOL;
$doc = object_to_array($db->get($id));
// print_r($doc);
- foreach ($accountTypes as $accountType => $accounts) {
- if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) {
- $doc["has" . $accountType] = Array();
+ foreach ($allvalues as $valueType => $values) {
+ if (!isset($doc[ $valueType]) || !is_array($doc[ $valueType])) {
+ $doc[ $valueType] = Array();
}
- $doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts));
+ $doc[ $valueType] = array_unique(array_merge($doc[ $valueType], $values));
+ if ( $valueType == "rtkDescriptions") {
+ foreach ($values as $descriptionAgency => $descriptionValue) {
+ if ($descriptionAgency == $doc->value->name) {
+ $doc->value->description = $descriptionValue;
+ }
+ }
+ }
}
$db->save($doc);
-}*/
+}
?>
--- a/couchdb/settee/src/classes/SetteeDatabase.class.php
+++ b/couchdb/settee/src/classes/SetteeDatabase.class.php
@@ -1,319 +1,316 @@
<?php
/**
-* Databaase class.
-*/
+ * Databaase class.
+ */
class SetteeDatabase {
- /**
- * Base URL of the CouchDB REST API
- */
- private $conn_url;
-
- /**
- * HTTP REST Client instance
- */
- protected $rest_client;
-
- /**
- * Name of the database
- */
- private $dbname;
-
- /**
- * Default constructor
- */
- function __construct($conn_url, $dbname) {
- $this->conn_url = $conn_url;
- $this->dbname = $dbname;
- $this->rest_client = SetteeRestClient::get_instance($this->conn_url);
- }
-
-
- /**
- * Get UUID from CouchDB
- *
- * @return
- * CouchDB-generated UUID string
- *
- */
- function gen_uuid() {
- $ret = $this->rest_client->http_get('_uuids');
- return $ret['decoded']->uuids[0]; // should never be empty at this point, so no checking
- }
-
- /**
- * Create or update a document database
- *
- * @param $document
- * PHP object, a PHP associative array, or a JSON String representing the document to be saved. PHP Objects and arrays are JSON-encoded automatically.
- *
- * <p>If $document has a an "_id" property set, it will be used as document's unique id (even for "create" operation).
- * If "_id" is missing, CouchDB will be used to generate a UUID.
- *
- * <p>If $document has a "_rev" property (revision), document will be updated, rather than creating a new document.
- * You have to provide "_rev" if you want to update an existing document, otherwise operation will be assumed to be
- * one of creation and you will get a duplicate document exception from CouchDB. Also, you may not provide "_rev" but
- * not provide "_id" since that is an invalid input.
- *
- * @param $allowRevAutoDetection
- * Default: false. When true and _rev is missing from the document, save() function will auto-detect latest revision
- * for a document and use it. This option is "false" by default because it involves an extra http HEAD request and
- * therefore can make save() operation slightly slower if such auto-detection is not required.
- *
- * @return
- * document object with the database id (uuid) and revision attached;
- *
- * @throws SetteeCreateDatabaseException
- */
- function save($document, $allowRevAutoDetection = false) {
- if (is_string($document)) {
- $document = json_decode($document);
- }
-
- // Allow passing of $document as an array (for syntactic simplicity and also because in JSON world it does not matter)
- if(is_array($document)) {
- $document = (object) $document;
- }
-
- if (empty($document->_id) && empty($document->_rev)) {
- $id = $this->gen_uuid();
- }
- elseif (empty($document->_id) && !empty($document->_rev)) {
- throw new SetteeWrongInputException("Error: You can not save a document with a revision provided, but missing id");
- }
- else {
- $id = $document->_id;
-
- if ($allowRevAutoDetection) {
- try {
- $rev = $this->get_rev($id);
- } catch (SetteeRestClientException $e) {
- // auto-detection may fail legitimately, if a document has never been saved before (new doc), so skipping error
- }
- if (!empty($rev)) {
- $document->_rev = $rev;
- }
- }
- }
-
- $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
- $document_json = json_encode($document, JSON_NUMERIC_CHECK);
-
- $ret = $this->rest_client->http_put($full_uri, $document_json);
-
- $document->_id = $ret['decoded']->id;
- $document->_rev = $ret['decoded']->rev;
-
- return $document;
- }
-
- /**
- * @param $doc
- * @param $name
- * @param $content
- * Content of the attachment in a string-buffer format. This function will automatically base64-encode content for
- * you, so you don't have to do it.
- * @param $mime_type
- * Optional. Will be auto-detected if not provided
- * @return void
- */
- public function add_attachment($doc, $name, $content, $mime_type = null) {
- if (empty($doc->_attachments) || !is_object($doc->_attachments)) {
- $doc->_attachments = new stdClass();
- }
-
- if (empty($mime_type)) {
- $mime_type = $this->rest_client->content_mime_type($content);
- }
-
- $doc->_attachments->$name = new stdClass();
- $doc->_attachments->$name->content_type = $mime_type;
- $doc->_attachments->$name->data = base64_encode($content);
- }
-
- /**
- * @param $doc
- * @param $name
- * @param $file
- * Full path to a file (e.g. as returned by PHP's realpath function).
- * @param $mime_type
- * Optional. Will be auto-detected if not provided
- * @return void
- */
- public function add_attachment_file($doc, $name, $file, $mime_type = null) {
- $content = file_get_contents($file);
- $this->add_attachment($doc, $name, $content, $mime_type);
- }
-
- /**
- *
- * Retrieve a document from CouchDB
- *
- * @throws SetteeWrongInputException
- *
- * @param $id
- * Unique ID (usually: UUID) of the document to be retrieved.
- * @return
- * database document in PHP object format.
- */
- function get($id) {
- if (empty($id)) {
- throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid.");
- }
-
- $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
-$full_uri = str_replace("%3Frev%3D","?rev=",$full_uri);
- $ret = $this->rest_client->http_get($full_uri);
- return $ret['decoded'];
- }
-
- /**
- *
- * Get the latest revision of a document with document id: $id in CouchDB.
- *
- * @throws SetteeWrongInputException
- *
- * @param $id
- * Unique ID (usually: UUID) of the document to be retrieved.
- * @return
- * database document in PHP object format.
- */
- function get_rev($id) {
- if (empty($id)) {
- throw new SetteeWrongInputException("Error: Can't query a document without a uuid.");
- }
-
- $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
- $headers = $this->rest_client->http_head($full_uri);
- if (empty($headers['Etag'])) {
- throw new SetteeRestClientException("Error: could not retrieve revision. Server unexpectedly returned empty Etag");
- }
- $etag = str_replace('"', '', $headers['Etag']);
- return $etag;
- }
-
- /**
- * Delete a document
- *
- * @param $document
- * a PHP object or JSON representation of the document that has _id and _rev fields.
- *
- * @return void
- */
- function delete($document) {
- if (!is_object($document)) {
- $document = json_decode($document);
- }
-
- $full_uri = $this->dbname . "/" . $this->safe_urlencode($document->_id) . "?rev=" . $document->_rev;
- $this->rest_client->http_delete($full_uri);
- }
-
-
- /*----------------- View-related functions --------------*/
-
- /**
- * Create a new view or update an existing one.
- *
- * @param $design_doc
- * @param $view_name
- * @param $map_src
- * Source code of the map function in Javascript
- * @param $reduce_src
- * Source code of the reduce function in Javascript (optional)
- * @return void
- */
- function save_view($design_doc, $view_name, $map_src, $reduce_src = null) {
- $obj = new stdClass();
- $obj->_id = "_design/" . urlencode($design_doc);
- $view_name = urlencode($view_name);
- $obj->views->$view_name->map = $map_src;
- if (!empty($reduce_src)) {
- $obj->views->$view_name->reduce = $reduce_src;
- }
-
- // allow safe updates (even if slightly slower due to extra: rev-detection check).
- return $this->save($obj, true);
- }
-
- /**
- * Create a new view or update an existing one.
- *
- * @param $design_doc
- * @param $view_name
- * @param $key
- * key parameter to a view. Can be a single value or an array (for a range). If passed an array, function assumes
- * that first element is startkey, second: endkey.
- * @param $descending
- * return results in descending order. Please don't forget that if you are using a startkey/endkey, when you change
- * order you also need to swap startkey and endkey values!
- *
- * @return void
- */
- function get_view($design_doc, $view_name, $key = null, $descending = false, $limit = false, $reduce=null) {
- $id = "_design/" . urlencode($design_doc);
- $view_name = urlencode($view_name);
- $id .= "/_view/$view_name";
-
- $data = array();
- if (!empty($key)) {
- if (is_string($key)) {
- $data = "key=" . '"' . $key . '"';
- }
- elseif (is_array($key)) {
- list($startkey, $endkey) = $key;
- $data = "startkey=" . '"' . $startkey . '"&' . "endkey=" . '"' . $endkey . '"';
- }
-
- if ($descending) {
- $data .= "&descending=true";
- }
- if ($reduce != null) {
- if ($reduce == true) {
- $data .= "&reduce=true";
- } else {
- $data .= "&reduce=false";
- }
- }
- if ($limit) {
- $data .= "&limit=".$limit;
- }
- }
-
-
-
- if (empty($id)) {
- throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid.");
- }
-
- $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
-
-$full_uri = str_replace("%253Fgroup%253D","?group=",$full_uri);
-$full_uri = str_replace("%253Flimit%253D","?limit=",$full_uri);
- $ret = $this->rest_client->http_get($full_uri, $data);
- //$ret['decoded'] = str_replace("?k","&k",$ret['decoded']);
- return $ret['decoded'];
-
- }
-
- /**
- * @param $id
- * @return
- * return a properly url-encoded id.
- */
- private function safe_urlencode($id) {
- //-- System views like _design can have "/" in their URLs.
- $id = rawurlencode($id);
- if (substr($id, 0, 1) == '_') {
- $id = str_replace('%2F', '/', $id);
- }
- return $id;
- }
-
- /** Getter for a database name */
- function get_name() {
- return $this->dbname;
- }
+ /**
+ * Base URL of the CouchDB REST API
+ */
+ private $conn_url;
+
+ /**
+ * HTTP REST Client instance
+ */
+ protected $rest_client;
+
+ /**
+ * Name of the database
+ */
+ private $dbname;
+
+ /**
+ * Default constructor
+ */
+ function __construct($conn_url, $dbname) {
+ $this->conn_url = $conn_url;
+ $this->dbname = $dbname;
+ $this->rest_client = SetteeRestClient::get_instance($this->conn_url);
+ }
+
+ /**
+ * Get UUID from CouchDB
+ *
+ * @return
+ * CouchDB-generated UUID string
+ *
+ */
+ function gen_uuid() {
+ $ret = $this->rest_client->http_get('_uuids');
+ return $ret['decoded']->uuids[0]; // should never be empty at this point, so no checking
+ }
+
+ /**
+ * Create or update a document database
+ *
+ * @param $document
+ * PHP object, a PHP associative array, or a JSON String representing the document to be saved. PHP Objects and arrays are JSON-encoded automatically.
+ *
+ * <p>If $document has a an "_id" property set, it will be used as document's unique id (even for "create" operation).
+ * If "_id" is missing, CouchDB will be used to generate a UUID.
+ *
+ * <p>If $document has a "_rev" property (revision), document will be updated, rather than creating a new document.
+ * You have to provide "_rev" if you want to update an existing document, otherwise operation will be assumed to be
+ * one of creation and you will get a duplicate document exception from CouchDB. Also, you may not provide "_rev" but
+ * not provide "_id" since that is an invalid input.
+ *
+ * @param $allowRevAutoDetection
+ * Default: false. When true and _rev is missing from the document, save() function will auto-detect latest revision
+ * for a document and use it. This option is "false" by default because it involves an extra http HEAD request and
+ * therefore can make save() operation slightly slower if such auto-detection is not required.
+ *
+ * @return
+ * document object with the database id (uuid) and revision attached;
+ *
+ * @throws SetteeCreateDatabaseException
+ */
+ function save($document, $allowRevAutoDetection = false) {
+ if (is_string($document)) {
+ $document = json_decode($document);
+ }
+
+ // Allow passing of $document as an array (for syntactic simplicity and also because in JSON world it does not matter)
+ if (is_array($document)) {
+ $document = (object) $document;
+ }
+
+ if (empty($document->_id) && empty($document->_rev)) {
+ $id = $this->gen_uuid();
+ } elseif (empty($document->_id) && !empty($document->_rev)) {
+ throw new SetteeWrongInputException("Error: You can not save a document with a revision provided, but missing id");
+ } else {
+ $id = $document->_id;
+
+ if ($allowRevAutoDetection) {
+ try {
+ $rev = $this->get_rev($id);
+ } catch (SetteeRestClientException $e) {
+ // auto-detection may fail legitimately, if a document has never been saved before (new doc), so skipping error
+ }
+ if (!empty($rev)) {
+ $document->_rev = $rev;
+ }
+ }
+ }
+
+ $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
+ $document_json = json_encode($document, JSON_NUMERIC_CHECK);
+
+ $ret = $this->rest_client->http_put($full_uri, $document_json);
+
+ $document->_id = $ret['decoded']->id;
+ $document->_rev = $ret['decoded']->rev;
+
+ return $document;
+ }
+
+ /**
+ * @param $doc
+ * @param $name
+ * @param $content
+ * Content of the attachment in a string-buffer format. This function will automatically base64-encode content for
+ * you, so you don't have to do it.
+ * @param $mime_type
+ * Optional. Will be auto-detected if not provided
+ * @return void
+ */
+ public function add_attachment($doc, $name, $content, $mime_type = null) {
+ if (empty($doc->_attachments) || !is_object($doc->_attachments)) {
+ $doc->_attachments = new stdClass();
+ }
+
+ if (empty($mime_type)) {
+ $mime_type = $this->rest_client->content_mime_type($content);
+ }
+
+ $doc->_attachments->$name = new stdClass();
+ $doc->_attachments->$name->content_type = $mime_type;
+ $doc->_attachments->$name->data = base64_encode($content);
+ }
+
+ /**
+ * @param $doc
+ * @param $name
+ * @param $file
+ * Full path to a file (e.g. as returned by PHP's realpath function).
+ * @param $mime_type
+ * Optional. Will be auto-detected if not provided
+ * @return void
+ */
+ public function add_attachment_file($doc, $name, $file, $mime_type = null) {
+ $content = file_get_contents($file);
+ $this->add_attachment($doc, $name, $content, $mime_type);
+ }
+
+ /**
+ *
+ * Retrieve a document from CouchDB
+ *
+ * @throws SetteeWrongInputException
+ *
+ * @param $id
+ * Unique ID (usually: UUID) of the document to be retrieved.
+ * @return
+ * database document in PHP object format.
+ */
+ function get($id) {
+ if (empty($id)) {
+ throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid.");
+ }
+
+ $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
+ $full_uri = str_replace("%3Frev%3D", "?rev=", $full_uri);
+ $ret = $this->rest_client->http_get($full_uri);
+ return $ret['decoded'];
+ }
+
+ /**
+ *
+ * Get the latest revision of a document with document id: $id in CouchDB.
+ *
+ * @throws SetteeWrongInputException
+ *
+ * @param $id
+ * Unique ID (usually: UUID) of the document to be retrieved.
+ * @return
+ * database document in PHP object format.
+ */
+ function get_rev($id) {
+ if (empty($id)) {
+ throw new SetteeWrongInputException("Error: Can't query a document without a uuid.");
+ }
+
+ $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
+ $headers = $this->rest_client->http_head($full_uri);
+ if (empty($headers['Etag'])) {
+ throw new SetteeRestClientException("Error: could not retrieve revision. Server unexpectedly returned empty Etag");
+ }
+ $etag = str_replace('"', '', $headers['Etag']);
+ return $etag;
+ }
+
+ /**
+ * Delete a document
+ *
+ * @param $document
+ * a PHP object or JSON representation of the document that has _id and _rev fields.
+ *
+ * @return void
+ */
+ function delete($document) {
+ if (!is_object($document)) {
+ $document = json_decode($document);
+ }
+
+ $full_uri = $this->dbname . "/" . $this->safe_urlencode($document->_id) . "?rev=" . $document->_rev;
+ $this->rest_client->http_delete($full_uri);
+ }
+
+ /* ----------------- View-related functions -------------- */
+
+ /**
+ * Create a new view or update an existing one.
+ *
+ * @param $design_doc
+ * @param $view_name
+ * @param $map_src
+ * Source code of the map function in Javascript
+ * @param $reduce_src
+ * Source code of the reduce function in Javascript (optional)
+ * @return void
+ */
+ function save_view($design_doc, $view_name, $map_src, $reduce_src = null) {
+ $obj = new stdClass();
+ $obj->_id = "_design/" . urlencode($design_doc);
+ $view_name = urlencode($view_name);
+ $obj->views->$view_name->map = $map_src;
+ if (!empty($reduce_src)) {
+ $obj->views->$view_name->reduce = $reduce_src;
+ }
+
+ // allow safe updates (even if slightly slower due to extra: rev-detection check).
+ return $this->save($obj, true);
+ }
+
+ /**
+ * Create a new view or update an existing one.
+ *
+ * @param $design_doc
+ * @param $view_name
+ * @param $key
+ * key parameter to a view. Can be a single value or an array (for a range). If passed an array, function assumes
+ * that first element is startkey, second: endkey.
+ * @param $descending
+ * return results in descending order. Please don't forget that if you are using a startkey/endkey, when you change
+ * order you also need to swap startkey and endkey values!
+ *
+ * @return void
+ */
+ function get_view($design_doc, $view_name, $key = null, $descending = false, $limit = false, $reduce = null, $startdocid = null) {
+ $id = "_design/" . urlencode($design_doc);
+ $view_name = urlencode($view_name);
+ $id .= "/_view/$view_name";
+
+ $data = array();
+ if (!empty($key)) {
+ if (is_string($key)) {
+ $data = "key=" . '"' . $key . '"';
+ } elseif (is_array($key)) {
+ list($startkey, $endkey) = $key;
+ $data = "startkey=" . '"' . $startkey . '"&' . "endkey=" . '"' . $endkey . '"';
+ }
+
+ if ($descending) {
+ $data .= "&descending=true";
+ }
+ if ($startdocid != null) {
+ $data .= "&startkey_docid='$startdocid'";
+ }
+ if ($reduce === true) {
+ $data .= "&reduce=true";
+ } else if ($reduce === false){
+
+ $data .= "&reduce=false";
+ }
+ if ($limit) {
+ $data .= "&limit=" . $limit;
+ }
+ }
+
+
+
+ if (empty($id)) {
+ throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid.");
+ }
+
+ $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
+
+ $full_uri = str_replace("%253Fgroup%253D", "?group=", $full_uri);
+ $full_uri = str_replace("%253Flimit%253D", "?limit=", $full_uri);
+ $ret = $this->rest_client->http_get($full_uri, $data);
+ //$ret['decoded'] = str_replace("?k","&k",$ret['decoded']);
+ return $ret['decoded'];
+ }
+
+ /**
+ * @param $id
+ * @return
+ * return a properly url-encoded id.
+ */
+ private function safe_urlencode($id) {
+ //-- System views like _design can have "/" in their URLs.
+ $id = rawurlencode($id);
+ if (substr($id, 0, 1) == '_') {
+ $id = str_replace('%2F', '/', $id);
+ }
+ return $id;
+ }
+
+ /** Getter for a database name */
+ function get_name() {
+ return $this->dbname;
+ }
}
+
--- a/couchdb/settee/src/classes/SetteeRestClient.class.php
+++ b/couchdb/settee/src/classes/SetteeRestClient.class.php
@@ -244,3 +244,4 @@
}
class SetteeRestClientException extends Exception {}
+
--- a/documents/genericScrapers.py
+++ b/documents/genericScrapers.py
@@ -57,7 +57,7 @@
foidocsdb = scrape.couch['disclosr-foidocuments']
(url, mime_type, rcontent) = scrape.fetchURL(scrape.docsdb,
self.getURL(), "foidocuments", self.getAgencyID())
- content = rcontent.read()
+ content = rcontent
dochash = scrape.mkhash(content)
doc = foidocsdb.get(dochash)
if doc is None:
@@ -89,7 +89,7 @@
device = TextConverter(rsrcmgr, outfp, codec='utf-8',
laparams=laparams)
fp = StringIO()
- fp.write(content.read())
+ fp.write(content)
process_pdf(rsrcmgr, device, fp, set(), caching=True,
check_extractable=True)
--- a/documents/index.php
+++ b/documents/index.php
@@ -1,9 +1,9 @@
<?php
-
include('template.inc.php');
include_header_documents("");
include_once('../include/common.inc.php');
$endkey = (isset($_REQUEST['end_key']) ? $_REQUEST['end_key'] : '9999-99-99');
+$enddocid = (isset($_REQUEST['end_docid']) ? $_REQUEST['end_docid'] : null);
?>
<div class="headline">Read all the information released by Australian Federal Government agencies under the FOI Act in one place!</div>
<a style='float:right' href="rss.xml.php"><img src="img/feed-icon-14x14.png" alt="RSS Icon"/> All Agencies RSS Feed</a><br>
@@ -16,18 +16,20 @@
}
$foidocsdb = $server->get_db('disclosr-foidocuments');
try {
- $rows = $foidocsdb->get_view("app", "byDate", Array($endkey, '0000-00-00'), true, 20)->rows;
+ $rows = $foidocsdb->get_view("app", "byDate", Array($endkey, '0000-00-00'), true, 20,null, $enddocid)->rows;
if ($rows) {
foreach ($rows as $key => $row) {
echo displayLogEntry($row, $idtoname);
- if (!isset($startkey)) $startkey = $row->key;
+ if (!isset($startkey))
+ $startkey = $row->key;
$endkey = $row->key;
+ $enddocid = $row->value->_id;
}
}
} catch (SetteeRestClientException $e) {
setteErrorHandler($e);
}
-echo "<a class='btn btn-large btn-primary' href='?end_key=$endkey' style='float:right;'>next page <i class='icon-circle-arrow-right icon-white'></i></a>";
+echo "<a class='btn btn-large btn-primary' href='?end_key=$endkey&end_docid=$enddocid' style='float:right;'>next page <i class='icon-circle-arrow-right icon-white'></i></a>";
include_footer_documents();
?>
--- a/documents/runScrapers.sh
+++ b/documents/runScrapers.sh
@@ -1,3 +1,10 @@
-for f in scrapers/*.py; do echo "Processing $f file.."; python $f; done
+for f in scrapers/*.py;
+ do echo "Processing $f file..";
+ python $f;
+ if [ "$?" -ne "0" ]; then
+ echo "error";
+ sleep 2;
+ fi
+done
--- a/documents/scrape.py<