pagination by docid
Former-commit-id: 3aa6116d88acaa6e423bf1d972cda0f2a51d9f5b
--- a/admin/refreshDesignDoc.php
+++ b/admin/refreshDesignDoc.php
@@ -9,7 +9,6 @@
$obj->language = "javascript";
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };";
$obj->views->byDate->map = "function(doc) { emit(doc.date, doc); };";
-$obj->views->byDate->reduce = "_count";
$obj->views->byDateMonthYear->map = "function(doc) { emit(doc.date, doc); };";
$obj->views->byDateMonthYear->reduce = "_count";
$obj->views->byAgencyID->map = "function(doc) { emit(doc.agencyID, doc); };";
--- a/couchdb/settee/src/classes/SetteeDatabase.class.php
+++ b/couchdb/settee/src/classes/SetteeDatabase.class.php
@@ -1,310 +1,316 @@
<?php
/**
-* Databaase class.
-*/
+ * Databaase class.
+ */
class SetteeDatabase {
- /**
- * Base URL of the CouchDB REST API
- */
- private $conn_url;
-
- /**
- * HTTP REST Client instance
- */
- protected $rest_client;
-
- /**
- * Name of the database
- */
- private $dbname;
-
- /**
- * Default constructor
- */
- function __construct($conn_url, $dbname) {
- $this->conn_url = $conn_url;
- $this->dbname = $dbname;
- $this->rest_client = SetteeRestClient::get_instance($this->conn_url);
- }
-
-
- /**
- * Get UUID from CouchDB
- *
- * @return
- * CouchDB-generated UUID string
- *
- */
- function gen_uuid() {
- $ret = $this->rest_client->http_get('_uuids');
- return $ret['decoded']->uuids[0]; // should never be empty at this point, so no checking
- }
-
- /**
- * Create or update a document database
- *
- * @param $document
- * PHP object, a PHP associative array, or a JSON String representing the document to be saved. PHP Objects and arrays are JSON-encoded automatically.
- *
- * <p>If $document has a an "_id" property set, it will be used as document's unique id (even for "create" operation).
- * If "_id" is missing, CouchDB will be used to generate a UUID.
- *
- * <p>If $document has a "_rev" property (revision), document will be updated, rather than creating a new document.
- * You have to provide "_rev" if you want to update an existing document, otherwise operation will be assumed to be
- * one of creation and you will get a duplicate document exception from CouchDB. Also, you may not provide "_rev" but
- * not provide "_id" since that is an invalid input.
- *
- * @param $allowRevAutoDetection
- * Default: false. When true and _rev is missing from the document, save() function will auto-detect latest revision
- * for a document and use it. This option is "false" by default because it involves an extra http HEAD request and
- * therefore can make save() operation slightly slower if such auto-detection is not required.
- *
- * @return
- * document object with the database id (uuid) and revision attached;
- *
- * @throws SetteeCreateDatabaseException
- */
- function save($document, $allowRevAutoDetection = false) {
- if (is_string($document)) {
- $document = json_decode($document);
- }
-
- // Allow passing of $document as an array (for syntactic simplicity and also because in JSON world it does not matter)
- if(is_array($document)) {
- $document = (object) $document;
- }
-
- if (empty($document->_id) && empty($document->_rev)) {
- $id = $this->gen_uuid();
- }
- elseif (empty($document->_id) && !empty($document->_rev)) {
- throw new SetteeWrongInputException("Error: You can not save a document with a revision provided, but missing id");
- }
- else {
- $id = $document->_id;
-
- if ($allowRevAutoDetection) {
- try {
- $rev = $this->get_rev($id);
- } catch (SetteeRestClientException $e) {
- // auto-detection may fail legitimately, if a document has never been saved before (new doc), so skipping error
- }
- if (!empty($rev)) {
- $document->_rev = $rev;
- }
- }
- }
-
- $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
- $document_json = json_encode($document, JSON_NUMERIC_CHECK);
-
- $ret = $this->rest_client->http_put($full_uri, $document_json);
-
- $document->_id = $ret['decoded']->id;
- $document->_rev = $ret['decoded']->rev;
-
- return $document;
- }
-
- /**
- * @param $doc
- * @param $name
- * @param $content
- * Content of the attachment in a string-buffer format. This function will automatically base64-encode content for
- * you, so you don't have to do it.
- * @param $mime_type
- * Optional. Will be auto-detected if not provided
- * @return void
- */
- public function add_attachment($doc, $name, $content, $mime_type = null) {
- if (empty($doc->_attachments) || !is_object($doc->_attachments)) {
- $doc->_attachments = new stdClass();
- }
-
- if (empty($mime_type)) {
- $mime_type = $this->rest_client->content_mime_type($content);
- }
-
- $doc->_attachments->$name = new stdClass();
- $doc->_attachments->$name->content_type = $mime_type;
- $doc->_attachments->$name->data = base64_encode($content);
- }
-
- /**
- * @param $doc
- * @param $name
- * @param $file
- * Full path to a file (e.g. as returned by PHP's realpath function).
- * @param $mime_type
- * Optional. Will be auto-detected if not provided
- * @return void
- */
- public function add_attachment_file($doc, $name, $file, $mime_type = null) {
- $content = file_get_contents($file);
- $this->add_attachment($doc, $name, $content, $mime_type);
- }
-
- /**
- *
- * Retrieve a document from CouchDB
- *
- * @throws SetteeWrongInputException
- *
- * @param $id
- * Unique ID (usually: UUID) of the document to be retrieved.
- * @return
- * database document in PHP object format.
- */
- function get($id) {
- if (empty($id)) {
- throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid.");
- }
-
- $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
-$full_uri = str_replace("%3Frev%3D","?rev=",$full_uri);
- $ret = $this->rest_client->http_get($full_uri);
- return $ret['decoded'];
- }
-
- /**
- *
- * Get the latest revision of a document with document id: $id in CouchDB.
- *
- * @throws SetteeWrongInputException
- *
- * @param $id
- * Unique ID (usually: UUID) of the document to be retrieved.
- * @return
- * database document in PHP object format.
- */
- function get_rev($id) {
- if (empty($id)) {
- throw new SetteeWrongInputException("Error: Can't query a document without a uuid.");
- }
-
- $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
- $headers = $this->rest_client->http_head($full_uri);
- if (empty($headers['Etag'])) {
- throw new SetteeRestClientException("Error: could not retrieve revision. Server unexpectedly returned empty Etag");
- }
- $etag = str_replace('"', '', $headers['Etag']);
- return $etag;
- }
-
- /**
- * Delete a document
- *
- * @param $document
- * a PHP object or JSON representation of the document that has _id and _rev fields.
- *
- * @return void
- */
- function delete($document) {
- if (!is_object($document)) {
- $document = json_decode($document);
- }
-
- $full_uri = $this->dbname . "/" . $this->safe_urlencode($document->_id) . "?rev=" . $document->_rev;
- $this->rest_client->http_delete($full_uri);
- }
-
-
- /*----------------- View-related functions --------------*/
-
- /**
- * Create a new view or update an existing one.
- *
- * @param $design_doc
- * @param $view_name
- * @param $map_src
- * Source code of the map function in Javascript
- * @param $reduce_src
- * Source code of the reduce function in Javascript (optional)
- * @return void
- */
- function save_view($design_doc, $view_name, $map_src, $reduce_src = null) {
- $obj = new stdClass();
- $obj->_id = "_design/" . urlencode($design_doc);
- $view_name = urlencode($view_name);
- $obj->views->$view_name->map = $map_src;
- if (!empty($reduce_src)) {
- $obj->views->$view_name->reduce = $reduce_src;
- }
-
- // allow safe updates (even if slightly slower due to extra: rev-detection check).
- return $this->save($obj, true);
- }
-
- /**
- * Create a new view or update an existing one.
- *
- * @param $design_doc
- * @param $view_name
- * @param $key
- * key parameter to a view. Can be a single value or an array (for a range). If passed an array, function assumes
- * that first element is startkey, second: endkey.
- * @param $descending
- * return results in descending order. Please don't forget that if you are using a startkey/endkey, when you change
- * order you also need to swap startkey and endkey values!
- *
- * @return void
- */
- function get_view($design_doc, $view_name, $key = null, $descending = false, $limit = false) {
- $id = "_design/" . urlencode($design_doc);
- $view_name = urlencode($view_name);
- $id .= "/_view/$view_name";
-
- $data = array();
- if (!empty($key)) {
- if (is_string($key)) {
- $data = "key=" . '"' . $key . '"';
- }
- elseif (is_array($key)) {
- list($startkey, $endkey) = $key;
- $data = "startkey=" . '"' . $startkey . '"&' . "endkey=" . '"' . $endkey . '"';
- }
-
- if ($descending) {
- $data .= "&descending=true";
- }
- if ($limit) {
- $data .= "&limit=".$limit;
- }
- }
-
-
-
- if (empty($id)) {
- throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid.");
- }
-
- $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
-$full_uri = str_replace("%253Fgroup%253D","?group=",$full_uri);
-$full_uri = str_replace("%253Flimit%253D","?limit=",$full_uri);
- $ret = $this->rest_client->http_get($full_uri, $data);
- return $ret['decoded'];
-
- }
-
- /**
- * @param $id
- * @return
- * return a properly url-encoded id.
- */
- private function safe_urlencode($id) {
- //-- System views like _design can have "/" in their URLs.
- $id = rawurlencode($id);
- if (substr($id, 0, 1) == '_') {
- $id = str_replace('%2F', '/', $id);
- }
- return $id;
- }
-
- /** Getter for a database name */
- function get_name() {
- return $this->dbname;
- }
+ /**
+ * Base URL of the CouchDB REST API
+ */
+ private $conn_url;
+
+ /**
+ * HTTP REST Client instance
+ */
+ protected $rest_client;
+
+ /**
+ * Name of the database
+ */
+ private $dbname;
+
+ /**
+ * Default constructor
+ */
+ function __construct($conn_url, $dbname) {
+ $this->conn_url = $conn_url;
+ $this->dbname = $dbname;
+ $this->rest_client = SetteeRestClient::get_instance($this->conn_url);
+ }
+
+ /**
+ * Get UUID from CouchDB
+ *
+ * @return
+ * CouchDB-generated UUID string
+ *
+ */
+ function gen_uuid() {
+ $ret = $this->rest_client->http_get('_uuids');
+ return $ret['decoded']->uuids[0]; // should never be empty at this point, so no checking
+ }
+
+ /**
+ * Create or update a document database
+ *
+ * @param $document
+ * PHP object, a PHP associative array, or a JSON String representing the document to be saved. PHP Objects and arrays are JSON-encoded automatically.
+ *
+ * <p>If $document has a an "_id" property set, it will be used as document's unique id (even for "create" operation).
+ * If "_id" is missing, CouchDB will be used to generate a UUID.
+ *
+ * <p>If $document has a "_rev" property (revision), document will be updated, rather than creating a new document.
+ * You have to provide "_rev" if you want to update an existing document, otherwise operation will be assumed to be
+ * one of creation and you will get a duplicate document exception from CouchDB. Also, you may not provide "_rev" but
+ * not provide "_id" since that is an invalid input.
+ *
+ * @param $allowRevAutoDetection
+ * Default: false. When true and _rev is missing from the document, save() function will auto-detect latest revision
+ * for a document and use it. This option is "false" by default because it involves an extra http HEAD request and
+ * therefore can make save() operation slightly slower if such auto-detection is not required.
+ *
+ * @return
+ * document object with the database id (uuid) and revision attached;
+ *
+ * @throws SetteeCreateDatabaseException
+ */
+ function save($document, $allowRevAutoDetection = false) {
+ if (is_string($document)) {
+ $document = json_decode($document);
+ }
+
+ // Allow passing of $document as an array (for syntactic simplicity and also because in JSON world it does not matter)
+ if (is_array($document)) {
+ $document = (object) $document;
+ }
+
+ if (empty($document->_id) && empty($document->_rev)) {
+ $id = $this->gen_uuid();
+ } elseif (empty($document->_id) && !empty($document->_rev)) {
+ throw new SetteeWrongInputException("Error: You can not save a document with a revision provided, but missing id");
+ } else {
+ $id = $document->_id;
+
+ if ($allowRevAutoDetection) {
+ try {
+ $rev = $this->get_rev($id);
+ } catch (SetteeRestClientException $e) {
+ // auto-detection may fail legitimately, if a document has never been saved before (new doc), so skipping error
+ }
+ if (!empty($rev)) {
+ $document->_rev = $rev;
+ }
+ }
+ }
+
+ $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
+ $document_json = json_encode($document, JSON_NUMERIC_CHECK);
+
+ $ret = $this->rest_client->http_put($full_uri, $document_json);
+
+ $document->_id = $ret['decoded']->id;
+ $document->_rev = $ret['decoded']->rev;
+
+ return $document;
+ }
+
+ /**
+ * @param $doc
+ * @param $name
+ * @param $content
+ * Content of the attachment in a string-buffer format. This function will automatically base64-encode content for
+ * you, so you don't have to do it.
+ * @param $mime_type
+ * Optional. Will be auto-detected if not provided
+ * @return void
+ */
+ public function add_attachment($doc, $name, $content, $mime_type = null) {
+ if (empty($doc->_attachments) || !is_object($doc->_attachments)) {
+ $doc->_attachments = new stdClass();
+ }
+
+ if (empty($mime_type)) {
+ $mime_type = $this->rest_client->content_mime_type($content);
+ }
+
+ $doc->_attachments->$name = new stdClass();
+ $doc->_attachments->$name->content_type = $mime_type;
+ $doc->_attachments->$name->data = base64_encode($content);
+ }
+
+ /**
+ * @param $doc
+ * @param $name
+ * @param $file
+ * Full path to a file (e.g. as returned by PHP's realpath function).
+ * @param $mime_type
+ * Optional. Will be auto-detected if not provided
+ * @return void
+ */
+ public function add_attachment_file($doc, $name, $file, $mime_type = null) {
+ $content = file_get_contents($file);
+ $this->add_attachment($doc, $name, $content, $mime_type);
+ }
+
+ /**
+ *
+ * Retrieve a document from CouchDB
+ *
+ * @throws SetteeWrongInputException
+ *
+ * @param $id
+ * Unique ID (usually: UUID) of the document to be retrieved.
+ * @return
+ * database document in PHP object format.
+ */
+ function get($id) {
+ if (empty($id)) {
+ throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid.");
+ }
+
+ $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
+ $full_uri = str_replace("%3Frev%3D", "?rev=", $full_uri);
+ $ret = $this->rest_client->http_get($full_uri);
+ return $ret['decoded'];
+ }
+
+ /**
+ *
+ * Get the latest revision of a document with document id: $id in CouchDB.
+ *
+ * @throws SetteeWrongInputException
+ *
+ * @param $id
+ * Unique ID (usually: UUID) of the document to be retrieved.
+ * @return
+ * database document in PHP object format.
+ */
+ function get_rev($id) {
+ if (empty($id)) {
+ throw new SetteeWrongInputException("Error: Can't query a document without a uuid.");
+ }
+
+ $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
+ $headers = $this->rest_client->http_head($full_uri);
+ if (empty($headers['Etag'])) {
+ throw new SetteeRestClientException("Error: could not retrieve revision. Server unexpectedly returned empty Etag");
+ }
+ $etag = str_replace('"', '', $headers['Etag']);
+ return $etag;
+ }
+
+ /**
+ * Delete a document
+ *
+ * @param $document
+ * a PHP object or JSON representation of the document that has _id and _rev fields.
+ *
+ * @return void
+ */
+ function delete($document) {
+ if (!is_object($document)) {
+ $document = json_decode($document);
+ }
+
+ $full_uri = $this->dbname . "/" . $this->safe_urlencode($document->_id) . "?rev=" . $document->_rev;
+ $this->rest_client->http_delete($full_uri);
+ }
+
+ /* ----------------- View-related functions -------------- */
+
+ /**
+ * Create a new view or update an existing one.
+ *
+ * @param $design_doc
+ * @param $view_name
+ * @param $map_src
+ * Source code of the map function in Javascript
+ * @param $reduce_src
+ * Source code of the reduce function in Javascript (optional)
+ * @return void
+ */
+ function save_view($design_doc, $view_name, $map_src, $reduce_src = null) {
+ $obj = new stdClass();
+ $obj->_id = "_design/" . urlencode($design_doc);
+ $view_name = urlencode($view_name);
+ $obj->views->$view_name->map = $map_src;
+ if (!empty($reduce_src)) {
+ $obj->views->$view_name->reduce = $reduce_src;
+ }
+
+ // allow safe updates (even if slightly slower due to extra: rev-detection check).
+ return $this->save($obj, true);
+ }
+
+ /**
+ * Create a new view or update an existing one.
+ *
+ * @param $design_doc
+ * @param $view_name
+ * @param $key
+ * key parameter to a view. Can be a single value or an array (for a range). If passed an array, function assumes
+ * that first element is startkey, second: endkey.
+ * @param $descending
+ * return results in descending order. Please don't forget that if you are using a startkey/endkey, when you change
+ * order you also need to swap startkey and endkey values!
+ *
+ * @return void
+ */
+ function get_view($design_doc, $view_name, $key = null, $descending = false, $limit = false, $reduce = null, $startdocid = null) {
+ $id = "_design/" . urlencode($design_doc);
+ $view_name = urlencode($view_name);
+ $id .= "/_view/$view_name";
+
+ $data = array();
+ if (!empty($key)) {
+ if (is_string($key)) {
+ $data = "key=" . '"' . $key . '"';
+ } elseif (is_array($key)) {
+ list($startkey, $endkey) = $key;
+ $data = "startkey=" . '"' . $startkey . '"&' . "endkey=" . '"' . $endkey . '"';
+ }
+
+ if ($descending) {
+ $data .= "&descending=true";
+ }
+ if ($startdocid != null) {
+ $data .= "&startkey_docid='$startdocid'";
+ }
+ if ($reduce != null) {
+ if ($reduce == true) {
+ $data .= "&reduce=true";
+ } else {
+ $data .= "&reduce=false";
+ }
+ }
+ if ($limit) {
+ $data .= "&limit=" . $limit;
+ }
+ }
+
+
+
+ if (empty($id)) {
+ throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid.");
+ }
+
+ $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
+
+ $full_uri = str_replace("%253Fgroup%253D", "?group=", $full_uri);
+ $full_uri = str_replace("%253Flimit%253D", "?limit=", $full_uri);
+ $ret = $this->rest_client->http_get($full_uri, $data);
+ //$ret['decoded'] = str_replace("?k","&k",$ret['decoded']);
+ return $ret['decoded'];
+ }
+
+ /**
+ * @param $id
+ * @return
+ * return a properly url-encoded id.
+ */
+ private function safe_urlencode($id) {
+ //-- System views like _design can have "/" in their URLs.
+ $id = rawurlencode($id);
+ if (substr($id, 0, 1) == '_') {
+ $id = str_replace('%2F', '/', $id);
+ }
+ return $id;
+ }
+
+ /** Getter for a database name */
+ function get_name() {
+ return $this->dbname;
+ }
}
--- /dev/null
+++ b/documents/about.php
@@ -1,1 +1,11 @@
+<?php
+include('template.inc.php');
+include_header_documents("About");
+include_once('../include/common.inc.php');
+?>
+<h1>About</h1>
+<?php
+include_footer_documents();
+?>
+
--- /dev/null
+++ b/documents/agency.php
@@ -1,1 +1,41 @@
+<?php
+include('template.inc.php');
+include_once('../include/common.inc.php');
+$agenciesdb = $server->get_db('disclosr-agencies');
+$idtoname = Array();
+foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) {
+ $idtoname[$row->id] = trim($row->value->name);
+}
+$foidocsdb = $server->get_db('disclosr-foidocuments');
+
+include_header_documents((isset($_REQUEST['id']) ? $idtoname[$_REQUEST['id']] : 'Entries by Agency'));
+$endkey = (isset($_REQUEST['end_key']) ? $_REQUEST['end_key'] : '9999-99-99');
+?>
+<div class="headline">Read all the information released by Australian Federal Government agencies under the FOI Act in one place!</div>
+<a style='float:right' href="rss.xml.php"><img src="img/feed-icon-14x14.png" alt="RSS Icon"/> All Agencies RSS Feed</a><br>
+<?php
+try {
+ if ($_REQUEST['id']) {
+ $rows = $foidocsdb->get_view("app", "byAgencyID", $_REQUEST['id'], false, false, false)->rows;
+ foreach ($rows as $row) {
+ //print_r($rows);
+ echo displayLogEntry($row, $idtoname);
+ if (!isset($startkey))
+ $startkey = $row->key;
+ $endkey = $row->key;
+ }
+ } else {
+ $rows = $foidocsdb->get_view("app", "byAgencyID?group=true", null, false, false, true)->rows;
+ if ($rows) {
+ foreach ($rows as $row) {
+ echo '<a href="agency.php?id=' . $row->key . '">' . $idtoname[$row->key] . " (" . $row->value . " records)</a> <br>\n";
+ }
+ }
+ }
+} catch (SetteeRestClientException $e) {
+ setteErrorHandler($e);
+}
+echo "<a class='btn btn-large btn-primary' href='?end_key=$endkey' style='float:right;'>next page <i class='icon-circle-arrow-right icon-white'></i></a>";
+include_footer_documents();
+?>
--- a/documents/charts.php
+++ b/documents/charts.php
@@ -1,6 +1,6 @@
<?php
include('template.inc.php');
-include_header_documents("");
+include_header_documents("Charts");
include_once('../include/common.inc.php');
$agenciesdb = $server->get_db('disclosr-agencies');
@@ -15,29 +15,28 @@
<h1><a href="about.php">Charts</a></h1>
<h4 class="subheader">Lorem ipsum.</h4>
</div>
-<div id="employees" style="width:1000px;height:900px;"></div>
+<div id="bydate" style="width:1000px;height:300px;"></div>
+<div id="byagency" style="width:1200px;height:300px;"></div>
<script id="source">
window.onload = function() {
$(document).ready(function() {
var
d1 = [],
- start = new Date("2009/01/01 01:00").getTime(),
- options,
- graph,
- i, x, o;
+ options1,
+ o1;
<?php
try {
- $rows = $foidocsdb->get_view("app", "byDate?group=true", null, true)->rows;
+ $rows = $foidocsdb->get_view("app", "byDateMonthYear?group=true",null, false,false,true)->rows;
$dataValues = Array();
foreach ($rows as $row) {
- $dataValues[$row->value] = $row->key;
+ $dataValues[$row->key] = $row->value;
}
$i = 0;
ksort($dataValues);
- foreach ($dataValues as $value => $key) {
+ foreach ($dataValues as $key => $value) {
$date = date_create_from_format('Y-m-d', $key);
if (date_format($date, 'U') != "") {
echo " d1.push([".date_format($date, 'U')."000, $value]);" . PHP_EOL;
@@ -52,7 +51,7 @@
- options = {
+ options1 = {
xaxis : {
mode : 'time',
labelsAngle : 45
@@ -68,19 +67,19 @@
function drawGraph (opts) {
// Clone the options, so the 'options' variable always keeps intact.
- o = Flotr._.extend(Flotr._.clone(options), opts || {});
+ o1 = Flotr._.extend(Flotr._.clone(options1), opts || {});
// Return a new graph.
return Flotr.draw(
- document.getElementById("employees"),
+ document.getElementById("bydate"),
[ d1 ],
- o
+ o1
);
}
graph = drawGraph();
- Flotr.EventAdapter.observe(container, 'flotr:select', function(area){
+ Flotr.EventAdapter.observe(document.getElementById("bydate"), 'flotr:select', function(area){
// Draw selected area
graph = drawGraph({
xaxis : { min : area.x1, max : area.x2, mode : 'time', labelsAngle : 45 },
@@ -89,10 +88,74 @@
});
// When graph is clicked, draw the graph with default area.
- Flotr.EventAdapter.observe(container, 'flotr:click', function () { graph = drawGraph(); });
+ Flotr.EventAdapter.observe(document.getElementById("bydate"), 'flotr:click', function () { graph = drawGraph(); });
});
};
+
+var d2 = [];
+var agencylabels = [];
+function agencytrackformatter(obj) {
+
+ return agencylabels[Math.floor(obj.x)] +" = "+obj.y;
+
+ }
+ function agencytickformatter(val, axis) {
+ if (agencylabels[Math.floor(val)]) {
+ return '<p style="margin-top:8em;-webkit-transform:rotate(-90deg);">'+(agencylabels[Math.floor(val)])+"</b>";
+
+ } else {
+ return "";
+ }
+ }
+<?php
+ try {
+ $rows = $foidocsdb->get_view("app", "byAgencyID?group=true",null, false,false,true)->rows;
+
+
+ $dataValues = Array();
+ $i = 0;
+ foreach ($rows as $row) {
+ echo " d2.push([".$i.", $row->value]);" . PHP_EOL;
+ echo " agencylabels.push(['".str_replace("'","",$idtoname[$row->key])."']);" . PHP_EOL;
+
+ $i++;
+ }
+ } catch (SetteeRestClientException $e) {
+ setteErrorHandler($e);
+ }
+ ?>
+ // Draw the graph
+ Flotr.draw(
+ document.getElementById("byagency"),
+ [d2],
+ {
+ bars : {
+ show : true,
+ horizontal : false,
+ shadowSize : 0,
+ barWidth : 0.5
+ },
+mouse : {
+ track : true,
+ relative : true,
+ trackFormatter: agencytrackformatter
+ },
+ yaxis : {
+ min : 0,
+ autoscaleMargin : 1
+ },
+ xaxis: {
+ minorTickFreq: 1,
+ noTicks: agencylabels.length,
+ showMinorLabels: true,
+ tickFormatter: agencytickformatter
+ },
+ legend: {
+ show: false
+ }
+ }
+ );
</script>
<?php
--- /dev/null
+++ b/documents/date.php
@@ -1,1 +1,34 @@
+<?php
+include('template.inc.php');
+include_header_documents("Entries by Date");
+include_once('../include/common.inc.php');
+$endkey = (isset($_REQUEST['end_key']) ? $_REQUEST['end_key'] : '9999-99-99');
+?>
+<div class="headline">Read all the information released by Australian Federal Government agencies under the FOI Act in one place!</div>
+<a style='float:right' href="rss.xml.php"><img src="img/feed-icon-14x14.png" alt="RSS Icon"/> All Agencies RSS Feed</a><br>
+<?php
+/*$agenciesdb = $server->get_db('disclosr-agencies');
+
+$idtoname = Array();
+foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) {
+ $idtoname[$row->id] = trim($row->value->name);
+}
+$foidocsdb = $server->get_db('disclosr-foidocuments');
+try {
+ $rows = $foidocsdb->get_view("app", "byDate", Array($endkey, '0000-00-00'), true, 20)->rows;
+ if ($rows) {
+ foreach ($rows as $key => $row) {
+ echo displayLogEntry($row, $idtoname);
+ if (!isset($startkey)) $startkey = $row->key;
+ $endkey = $row->key;
+ }
+ }
+} catch (SetteeRestClientException $e) {
+ setteErrorHandler($e);
+}
+echo "<a class='btn btn-large btn-primary' href='?end_key=$endkey' style='float:right;'>next page <i class='icon-circle-arrow-right icon-white'></i></a>";
+*/
+include_footer_documents();
+?>
+
--- a/documents/disclogsList.php
+++ b/documents/disclogsList.php
@@ -1,7 +1,7 @@
<?php
include('template.inc.php');
-include_header_documents("");
+include_header_documents("List of Disclosure Logs");
include_once('../include/common.inc.php');
echo "<table>
--- /dev/null
+++ b/documents/disclosr-documents.nja
@@ -1,1 +1,7 @@
-
+{
+ "venv": "",
+ "project-type": "Import from sources",
+ "name": "disclosr-documents",
+ "license": "GNU General Public License v3",
+ "description": ""
+}
--- a/documents/genericScrapers.py
+++ b/documents/genericScrapers.py
@@ -1,150 +1,281 @@
-import sys,os
+import sys
+import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
import scrape
from bs4 import BeautifulSoup
from time import mktime
import feedparser
import abc
-import unicodedata, re
+import unicodedata
+import re
import dateutil
from dateutil.parser import *
from datetime import *
import codecs
+import difflib
+
+from StringIO import StringIO
+
+from pdfminer.pdfparser import PDFDocument, PDFParser
+from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter, process_pdf
+from pdfminer.pdfdevice import PDFDevice, TagExtractor
+from pdfminer.converter import TextConverter
+from pdfminer.cmapdb import CMapDB
+from pdfminer.layout import LAParams
+
+
class GenericDisclogScraper(object):
- __metaclass__ = abc.ABCMeta
- agencyID = None
- disclogURL = None
- def remove_control_chars(self, input):
- return "".join([i for i in input if ord(i) in range(32, 127)])
- def getAgencyID(self):
- """ disclosr agency id """
- if self.agencyID == None:
- self.agencyID = os.path.basename(sys.argv[0]).replace(".py","")
- return self.agencyID
-
- def getURL(self):
- """ disclog URL"""
- if self.disclogURL == None:
- agency = scrape.agencydb.get(self.getAgencyID())
- self.disclogURL = agency['FOIDocumentsURL']
- return self.disclogURL
-
- @abc.abstractmethod
- def doScrape(self):
- """ do the scraping """
- return
-
- @abc.abstractmethod
- def getDescription(self, content, entry, doc):
- """ get description"""
- return
-
+ __metaclass__ = abc.ABCMeta
+ agencyID = None
+ disclogURL = None
+
+ def remove_control_chars(self, input):
+ return "".join([i for i in input if ord(i) in range(32, 127)])
+
+ def getAgencyID(self):
+ """ disclosr agency id """
+ if self.agencyID is None:
+ self.agencyID = os.path.basename(sys.argv[0]).replace(".py", "")
+ return self.agencyID
+
+ def getURL(self):
+ """ disclog URL"""
+ if self.disclogURL is None:
+ agency = scrape.agencydb.get(self.getAgencyID())
+ self.disclogURL = agency['FOIDocumentsURL']
+ return self.disclogURL
+
+ @abc.abstractmethod
+ def doScrape(self):
+ """ do the scraping """
+ return
+
+class GenericHTMLDisclogScraper(GenericDisclogScraper):
+
+ def doScrape(self):
+ foidocsdb = scrape.couch['disclosr-foidocuments']
+ (url, mime_type, rcontent) = scrape.fetchURL(scrape.docsdb,
+ self.getURL(), "foidocuments", self.getAgencyID())
+ content = rcontent
+ dochash = scrape.mkhash(content)
+ doc = foidocsdb.get(dochash)
+ if doc is None:
+ print "saving " + dochash
+ description = "This log may have updated but as it was not in a table last time we viewed it, we cannot extract what has changed. Please refer to the agency's website Disclosure Log to see the most recent entries"
+ last_attach = scrape.getLastAttachment(scrape.docsdb, self.getURL())
+ if last_attach != None:
+ html_diff = difflib.HtmlDiff()
+ description = description + "\nChanges: "
+ description = description + html_diff.make_table(last_attach.read().split('\n'),
+ content.split('\n'))
+ edate = date.today().strftime("%Y-%m-%d")
+ doc = {'_id': dochash, 'agencyID': self.getAgencyID()
+ , 'url': self.getURL(), 'docID': dochash,
+ "date": edate, "title": "Disclosure Log Updated", "description": description}
+ foidocsdb.save(doc)
+ else:
+ print "already saved"
+
+class GenericPDFDisclogScraper(GenericDisclogScraper):
+
+ def doScrape(self):
+ foidocsdb = scrape.couch['disclosr-foidocuments']
+ (url, mime_type, content) = scrape.fetchURL(scrape.docsdb,
+ self.getURL(), "foidocuments", self.getAgencyID())
+ laparams = LAParams()
+ rsrcmgr = PDFResourceManager(caching=True)
+ outfp = StringIO()
+ device = TextConverter(rsrcmgr, outfp, codec='utf-8',
+ laparams=laparams)
+ fp = StringIO()
+ fp.write(content)
+
+ process_pdf(rsrcmgr, device, fp, set(), caching=True,
+ check_extractable=True)
+ description = outfp.getvalue()
+ fp.close()
+ device.close()
+ outfp.close()
+ dochash = scrape.mkhash(description)
+ doc = foidocsdb.get(dochash)
+ if doc is None:
+ print "saving " + dochash
+ edate = date.today().strftime("%Y-%m-%d")
+ doc = {'_id': dochash, 'agencyID': self.getAgencyID()
+ , 'url': self.getURL(), 'docID': dochash,
+ "date": edate, "title": "Disclosure Log Updated", "description": description}
+ foidocsdb.save(doc)
+ else:
+ print "already saved"
+
+
+class GenericDOCXDisclogScraper(GenericDisclogScraper):
+
+ def doScrape(self):
+ foidocsdb = scrape.couch['disclosr-foidocuments']
+ (url, mime_type, content) = scrape.fetchURL(scrape.docsdb
+ , self.getURL(), "foidocuments", self.getAgencyID())
+ mydoc = zipfile.ZipFile(file)
+ xmlcontent = mydoc.read('word/document.xml')
+ document = etree.fromstring(xmlcontent)
+ ## Fetch all the text out of the document we just created
+ paratextlist = getdocumenttext(document)
+ # Make explicit unicode version
+ newparatextlist = []
+ for paratext in paratextlist:
+ newparatextlist.append(paratext.encode("utf-8"))
+ ## Print our documnts test with two newlines under each paragraph
+ description = '\n\n'.join(newparatextlist).strip(' \t\n\r')
+ dochash = scrape.mkhash(description)
+ doc = foidocsdb.get(dochash)
+
+ if doc is None:
+ print "saving " + dochash
+ edate = time().strftime("%Y-%m-%d")
+ doc = {'_id': dochash, 'agencyID': self.getAgencyID()
+ , 'url': self.getURL(), 'docID': dochash,
+ "date": edate, "title": "Disclosure Log Updated", "description": description}
+ foidocsdb.save(doc)
+ else:
+ print "already saved"
class GenericRSSDisclogScraper(GenericDisclogScraper):
- def doScrape(self):
- foidocsdb = scrape.couch['disclosr-foidocuments']
- (url,mime_type,content) = scrape.fetchURL(scrape.docsdb, self.getURL(), "foidocuments", self.getAgencyID())
- feed = feedparser.parse(content)
- for entry in feed.entries:
- #print entry
- print entry.id
- hash = scrape.mkhash(entry.id)
- #print hash
- doc = foidocsdb.get(hash)
- #print doc
- if doc == None:
- print "saving "+ hash
- edate = datetime.fromtimestamp(mktime( entry.published_parsed)).strftime("%Y-%m-%d")
- doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': entry.link, 'docID': entry.id,
- "date": edate,"title": entry.title}
- self.getDescription(entry,entry, doc)
- foidocsdb.save(doc)
+ def doScrape(self):
+ foidocsdb = scrape.couch['disclosr-foidocuments']
+ (url, mime_type, content) = scrape.fetchURL(scrape.docsdb,
+ self.getURL(), "foidocuments", self.getAgencyID())
+ feed = feedparser.parse(content)
+ for entry in feed.entries:
+ #print entry
+ print entry.id
+ dochash = scrape.mkhash(entry.id)
+ doc = foidocsdb.get(dochash)
+ #print doc
+ if doc is None:
+ print "saving " + dochash
+ edate = datetime.fromtimestamp(
+ mktime(entry.published_parsed)).strftime("%Y-%m-%d")
+ doc = {'_id': dochash, 'agencyID': self.getAgencyID(),
+ 'url': entry.link, 'docID': entry.id,
+ "date": edate, "title": entry.title}
+ self.getDescription(entry, entry, doc)
+ foidocsdb.save(doc)
+ else:
+ print "already saved"
+
+ def getDescription(self, content, entry, doc):
+ """ get description from rss entry"""
+ doc.update({'description': content.summary})
+ return
+
+
+class GenericOAICDisclogScraper(GenericDisclogScraper):
+ __metaclass__ = abc.ABCMeta
+
+ @abc.abstractmethod
+ def getColumns(self, columns):
+ """ rearranges columns if required """
+ return
+
+ def getColumnCount(self):
+ return 5
+
+ def getDescription(self, content, entry, doc):
+ """ get description from rss entry"""
+ descriptiontxt = ""
+ for string in content.stripped_strings:
+ descriptiontxt = descriptiontxt + " \n" + string
+ doc.update({'description': descriptiontxt})
+
+ def getTitle(self, content, entry, doc):
+ doc.update({'title': (''.join(content.stripped_strings))})
+
+ def getTable(self, soup):
+ return soup.table
+
+ def getRows(self, table):
+ return table.find_all('tr')
+
+ def getDate(self, content, entry, doc):
+ date = ''.join(content.stripped_strings).strip()
+ (a, b, c) = date.partition("(")
+ date = self.remove_control_chars(a.replace("Octber", "October"))
+ print date
+ edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
+ print edate
+ doc.update({'date': edate})
+ return
+
+ def getLinks(self, content, entry, doc):
+ links = []
+ for atag in entry.find_all("a"):
+ if atag.has_key('href'):
+ links.append(scrape.fullurl(content, atag['href']))
+ if links != []:
+ doc.update({'links': links})
+ return
+
+ def doScrape(self):
+ foidocsdb = scrape.couch['disclosr-foidocuments']
+ (url, mime_type, content) = scrape.fetchURL(scrape.docsdb,
+ self.getURL(), "foidocuments", self.getAgencyID())
+ if content is not None:
+ if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type == "application/xml":
+ # http://www.crummy.com/software/BeautifulSoup/documentation.html
+ print "parsing"
+ soup = BeautifulSoup(content)
+ table = self.getTable(soup)
+ for row in self.getRows(table):
+ columns = row.find_all('td')
+ if len(columns) is self.getColumnCount():
+ (id, date, title,
+ description, notes) = self.getColumns(columns)
+ print self.remove_control_chars(
+ ''.join(id.stripped_strings))
+ if id.string is None:
+ dochash = scrape.mkhash(
+ self.remove_control_chars(
+ url + (''.join(date.stripped_strings))))
else:
- print "already saved"
- def getDescription(self, content, entry, doc):
- """ get description from rss entry"""
- doc.update({'description': content.summary})
- return
-
-class GenericOAICDisclogScraper(GenericDisclogScraper):
- __metaclass__ = abc.ABCMeta
- @abc.abstractmethod
- def getColumns(self,columns):
- """ rearranges columns if required """
- return
- def getColumnCount(self):
- return 5
- def getDescription(self, content, entry, doc):
- """ get description from rss entry"""
- descriptiontxt = ""
- for string in content.stripped_strings:
- descriptiontxt = descriptiontxt + " \n" + string
- doc.update({'description': descriptiontxt})
- return
- def getTitle(self, content, entry, doc):
- doc.update({'title': (''.join(content.stripped_strings))})
- return
- def getTable(self, soup):
- return soup.table
- def getRows(self, table):
- return table.find_all('tr')
- def getDate(self, content, entry, doc):
- date = ''.join(content.stripped_strings).strip()
- (a,b,c) = date.partition("(")
- date = self.remove_control_chars(a.replace("Octber","October"))
- print date
- edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
- print edate
- doc.update({'date': edate})
- return
- def getLinks(self, content, entry, doc):
- links = []
- for atag in entry.find_all("a"):
- if atag.has_key('href'):
- links.append(scrape.fullurl(content,atag['href']))
- if links != []:
- doc.update({'links': links})
- return
-
- def doScrape(self):
- foidocsdb = scrape.couch['disclosr-foidocuments']
- (url,mime_type,content) = scrape.fetchURL(scrape.docsdb, self.getURL(), "foidocuments", self.getAgencyID())
- if content != None:
- if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml":
- # http://www.crummy.com/software/BeautifulSoup/documentation.html
- soup = BeautifulSoup(content)
- table = self.getTable(soup)
- for row in self.getRows(table):
- columns = row.find_all('td')
- if len(columns) == self.getColumnCount():
- (id, date, title, description, notes) = self.getColumns(columns)
- print self.remove_control_chars(''.join(id.stripped_strings))
- if id.string == None:
- hash = scrape.mkhash(self.remove_control_chars(url+(''.join(date.stripped_strings))))
- else:
- hash = scrape.mkhash(self.remove_control_chars(url+(''.join(id.stripped_strings))))
- doc = foidocsdb.get(hash)
-
- if doc == None:
- print "saving " +hash
- doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), 'docID': (''.join(id.stripped_strings))}
- self.getLinks(self.getURL(),row,doc)
- self.getTitle(title,row, doc)
- self.getDate(date,row, doc)
- self.getDescription(description,row, doc)
- if notes != None:
- doc.update({ 'notes': (''.join(notes.stripped_strings))})
- foidocsdb.save(doc)
- else:
- print "already saved "+hash
-
- elif len(row.find_all('th')) == self.getColumnCount():
- print "header row"
-
- else:
- print "ERROR number of columns incorrect"
- print row
-
+ dochash = scrape.mkhash(
+ self.remove_control_chars(
+ url + (''.join(id.stripped_strings))))
+ doc = foidocsdb.get(dochash)
+
+ if doc is None:
+ print "saving " + dochash
+ doc = {'_id': dochash,
+ 'agencyID': self.getAgencyID(),
+ 'url': self.getURL(),
+ 'docID': (''.join(id.stripped_strings))}
+ self.getLinks(self.getURL(), row, doc)
+ self.getTitle(title, row, doc)
+ self.getDate(date, row, doc)
+ self.getDescription(description, row, doc)
+ if notes is not None:
+ doc.update({ 'notes': (
+ ''.join(notes.stripped_strings))})
+ badtitles = ['-','Summary of FOI Request'
+ , 'FOI request(in summary form)'
+ , 'Summary of FOI request received by the ASC',
+'Summary of FOI request received by agency/minister',
+'Description of Documents Requested','FOI request',
+'Description of FOI Request','Summary of request','Description','Summary',
+'Summary of FOIrequest received by agency/minister','Summary of FOI request received','Description of FOI Request',"FOI request",'Results 1 to 67 of 67']
+ if doc['title'] not in badtitles\
+ and doc['description'] != '':
+ print "saving"
+ foidocsdb.save(doc)
+ else:
+ print "already saved " + dochash
+
+ elif len(row.find_all('th')) is self.getColumnCount():
+ print "header row"
+
+ else:
+ print "ERROR number of columns incorrect"
+ print row
+
Binary files /dev/null and b/documents/img/feed-icon-14x14.png differ
--- a/documents/index.php
+++ b/documents/index.php
@@ -1,12 +1,13 @@
<?php
-
include('template.inc.php');
include_header_documents("");
include_once('../include/common.inc.php');
-$startkey = (isset($_REQUEST['start_key']) ? $_REQUEST['start_key'] : '9999-99-99');
+$endkey = (isset($_REQUEST['end_key']) ? $_REQUEST['end_key'] : '9999-99-99');
+$enddocid = (isset($_REQUEST['end_docid']) ? $_REQUEST['end_docid'] : null);
?>
+<div class="headline">Read all the information released by Australian Federal Government agencies under the FOI Act in one place!</div>
+<a style='float:right' href="rss.xml.php"><img src="img/feed-icon-14x14.png" alt="RSS Icon"/> All Agencies RSS Feed</a><br>
<?php
-
$agenciesdb = $server->get_db('disclosr-agencies');
$idtoname = Array();
@@ -15,17 +16,20 @@
}
$foidocsdb = $server->get_db('disclosr-foidocuments');
try {
- $rows = $foidocsdb->get_view("app", "byDate", Array($startkey, '0000-00-00'), true, 20)->rows;
+ $rows = $foidocsdb->get_view("app", "byDate", Array($endkey, '0000-00-00'), true, 20,null, $enddocid)->rows;
if ($rows) {
foreach ($rows as $key => $row) {
echo displayLogEntry($row, $idtoname);
+ if (!isset($startkey))
+ $startkey = $row->key;
$endkey = $row->key;
+ $enddocid = $row->value->_id;
}
}
} catch (SetteeRestClientException $e) {
setteErrorHandler($e);
}
-echo "<a href='?start_key=$endkey'>next page</a>";
+echo "<a class='btn btn-large btn-primary' href='?end_key=$endkey&end_docid=$enddocid' style='float:right;'>next page <i class='icon-circle-arrow-right icon-white'></i></a>";
include_footer_documents();
?>
--- a/documents/rss.xml.php
+++ b/documents/rss.xml.php
@@ -8,30 +8,39 @@
//Creating an instance of FeedWriter class.
$TestFeed = new RSS2FeedWriter();
//Setting the channel elements
-//Use wrapper functions for common channelelements
-$TestFeed->setTitle('Last Modified - All');
-$TestFeed->setLink('http://disclosurelo.gs/rss.xml.php');
-$TestFeed->setDescription('Latest entries');
- $TestFeed->setChannelElement('language', 'en-us');
- $TestFeed->setChannelElement('pubDate', date(DATE_RSS, time()));
-//Retriving informations from database
+////Retriving informations from database
$idtoname = Array();
$agenciesdb = $server->get_db('disclosr-agencies');
foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) {
$idtoname[$row->id] = trim($row->value->name);
}
$foidocsdb = $server->get_db('disclosr-foidocuments');
-$rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99','0000-00-00', 50), true)->rows;
+if (isset($_REQUEST['id'])) {
+ $rows = $foidocsdb->get_view("app", "byAgencyID", $_REQUEST['id'], false, false, false)->rows;
+ $title = $idtoname[$_REQUEST['id']];
+} else {
+ $rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99', '0000-00-00', 50), true)->rows;
+ $title = 'All Agencies';
+}
+//Use wrapper functions for common channelelements
+$TestFeed->setTitle('disclosurelo.gs Newest Entries - '.$title);
+$TestFeed->setLink('http://disclosurelo.gs/rss.xml.php'.(isset($_REQUEST['id'])? '?id='.$_REQUEST['id'] : ''));
+$TestFeed->setDescription('disclosurelo.gs Newest Entries - '.$title);
+$TestFeed->setChannelElement('language', 'en-us');
+$TestFeed->setChannelElement('pubDate', date(DATE_RSS, time()));
+
+
//print_r($rows);
foreach ($rows as $row) {
//Create an empty FeedItem
$newItem = $TestFeed->createNewItem();
//Add elements to the feed item
$newItem->setTitle($row->value->title);
- $newItem->setLink("view.php?id=".$row->value->_id);
- $newItem->setDate(date("c", strtotime($row->value->date)));
- $newItem->setDescription(displayLogEntry($row,$idtoname));
- $newItem->addElement('guid', $row->value->_id,array('isPermaLink'=>'true'));
+ $newItem->setLink("http://disclosurelo.gs/view.php?id=" . $row->value->_id);
+ $newItem->setDate(strtotime($row->value->date));
+ $newItem->setDescription(displayLogEntry($row, $idtoname));
+ $newItem->setAuthor($idtoname[$row->value->agencyID]);
+ $newItem->addElement('guid', "http://disclosurelo.gs/view.php?id=" . $row->value->_id, array('isPermaLink' => 'true'));
//Now add the feed item
$TestFeed->addItem($newItem);
}
--- a/documents/runScrapers.sh
+++ b/documents/runScrapers.sh
@@ -1,3 +1,10 @@
-for f in scrapers/*.py; do echo "Processing $f file.."; python $f; done
+for f in scrapers/*.py;
+ do echo "Processing $f file..";
+ python $f;
+ if [ "$?" -ne "0" ]; then
+ echo "error";
+ sleep 2;
+ fi
+done
--- a/documents/scrape.py
+++ b/documents/scrape.py
@@ -8,186 +8,198 @@
import time
import os
import mimetypes
-import re
import urllib
import urlparse
def mkhash(input):
- return hashlib.md5(input).hexdigest().encode("utf-8")
+ return hashlib.md5(input).hexdigest().encode("utf-8")
def canonurl(url):
- r"""Return the canonical, ASCII-encoded form of a UTF-8 encoded URL, or ''
- if the URL looks invalid.
- >>> canonurl('\xe2\x9e\xa1.ws') # tinyarro.ws
- 'http://xn--hgi.ws/'
- """
- # strip spaces at the ends and ensure it's prefixed with 'scheme://'
- url = url.strip()
- if not url:
- return ''
- if not urlparse.urlsplit(url).scheme:
- url = 'http://' + url
-
- # turn it into Unicode
- #try:
- # url = unicode(url, 'utf-8')
- #except UnicodeDecodeError:
- # return '' # bad UTF-8 chars in URL
-
- # parse the URL into its components
- parsed = urlparse.urlsplit(url)
- scheme, netloc, path, query, fragment = parsed
-
- # ensure scheme is a letter followed by letters, digits, and '+-.' chars
- if not re.match(r'[a-z][-+.a-z0-9]*$', scheme, flags=re.I):
- return ''
- scheme = str(scheme)
-
- # ensure domain and port are valid, eg: sub.domain.<1-to-6-TLD-chars>[:port]
- match = re.match(r'(.+\.[a-z0-9]{1,6})(:\d{1,5})?$', netloc, flags=re.I)
- if not match:
- return ''
- domain, port = match.groups()
- netloc = domain + (port if port else '')
- netloc = netloc.encode('idna')
-
- # ensure path is valid and convert Unicode chars to %-encoded
- if not path:
- path = '/' # eg: 'http://google.com' -> 'http://google.com/'
- path = urllib.quote(urllib.unquote(path.encode('utf-8')), safe='/;')
-
- # ensure query is valid
- query = urllib.quote(urllib.unquote(query.encode('utf-8')), safe='=&?/')
-
- # ensure fragment is valid
- fragment = urllib.quote(urllib.unquote(fragment.encode('utf-8')))
-
- # piece it all back together, truncating it to a maximum of 4KB
- url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
- return url[:4096]
+ r"""Return the canonical, ASCII-encoded form of a UTF-8 encoded URL, or ''
+ if the URL looks invalid.
+ >>> canonurl('\xe2\x9e\xa1.ws') # tinyarro.ws
+ 'http://xn--hgi.ws/'
+ """
+ # strip spaces at the ends and ensure it's prefixed with 'scheme://'
+ url = url.strip()
+ if not url:
+ return ''
+ if not urlparse.urlsplit(url).scheme:
+ url = 'http://' + url
+
+ # turn it into Unicode
+ #try:
+ # url = unicode(url, 'utf-8')
+ #except UnicodeDecodeError:
+ # return '' # bad UTF-8 chars in URL
+
+ # parse the URL into its components
+ parsed = urlparse.urlsplit(url)
+ scheme, netloc, path, query, fragment = parsed
+
+ # ensure scheme is a letter followed by letters, digits, and '+-.' chars
+ if not re.match(r'[a-z][-+.a-z0-9]*$', scheme, flags=re.I):
+ return ''
+ scheme = str(scheme)
+
+ # ensure domain and port are valid, eg: sub.domain.<1-to-6-TLD-chars>[:port]
+ match = re.match(r'(.+\.[a-z0-9]{1,6})(:\d{1,5})?$', netloc, flags=re.I)
+ if not match:
+ return ''
+ domain, port = match.groups()
+ netloc = domain + (port if port else '')
+ netloc = netloc.encode('idna')
+
+ # ensure path is valid and convert Unicode chars to %-encoded
+ if not path:
+ path = '/' # eg: 'http://google.com' -> 'http://google.com/'
+ path = urllib.quote(urllib.unquote(path.encode('utf-8')), safe='/;')
+
+ # ensure query is valid
+ query = urllib.quote(urllib.unquote(query.encode('utf-8')), safe='=&?/')
+
+ # ensure fragment is valid
+ fragment = urllib.quote(urllib.unquote(fragment.encode('utf-8')))
+
+ # piece it all back together, truncating it to a maximum of 4KB
+ url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
+ return url[:4096]
def fullurl(url,href):
- href = href.replace(" ","%20")
- href = re.sub('#.*$','',href)
- return urljoin(url,href)
+ href = href.replace(" ","%20")
+ href = re.sub('#.*$','',href)
+ return urljoin(url,href)
#http://diveintopython.org/http_web_services/etags.html
-class NotModifiedHandler(urllib2.BaseHandler):
- def http_error_304(self, req, fp, code, message, headers):
- addinfourl = urllib2.addinfourl(fp, headers, req.get_full_url())
- addinfourl.code = code
- return addinfourl
+class NotModifiedHandler(urllib2.BaseHandler):
+ def http_error_304(self, req, fp, code, message, headers):
+ addinfourl = urllib2.addinfourl(fp, headers, req.get_full_url())
+ addinfourl.code = code
+ return addinfourl
+
+def getLastAttachment(docsdb,url):
+ hash = mkhash(url)
+ doc = docsdb.get(hash)
+ if doc != None:
+ last_attachment_fname = doc["_attachments"].keys()[-1]
+ last_attachment = docsdb.get_attachment(doc,last_attachment_fname)
+ return last_attachment
+ else:
+ return None
def fetchURL(docsdb, url, fieldName, agencyID, scrape_again=True):
- url = canonurl(url)
- hash = mkhash(url)
- req = urllib2.Request(url)
- print "Fetching %s (%s)" % (url,hash)
- if url.startswith("mailto") or url.startswith("javascript") or url.startswith("#") or url == None or url == "":
- print "Not a valid HTTP url"
- return (None,None,None)
- doc = docsdb.get(hash)
- if doc == None:
- doc = {'_id': hash, 'agencyID': agencyID, 'url': url, 'fieldName':fieldName}
- else:
- if (('page_scraped' in doc) and (time.time() - doc['page_scraped']) < 60*24*14*1000):
- print "Uh oh, trying to scrape URL again too soon!"
- last_attachment_fname = doc["_attachments"].keys()[-1]
- last_attachment = docsdb.get_attachment(doc,last_attachment_fname)
- content = last_attachment
- return (doc['url'],doc['mime_type'],content)
- if scrape_again == False:
- print "Not scraping this URL again as requested"
- return (None,None,None)
-
- time.sleep(3) # wait 3 seconds to give webserver time to recover
-
- req.add_header("User-Agent", "Mozilla/4.0 (compatible; Prometheus webspider; owner maxious@lambdacomplex.org)")
- #if there is a previous version stored in couchdb, load caching helper tags
- if doc.has_key('etag'):
- req.add_header("If-None-Match", doc['etag'])
- if doc.has_key('last_modified'):
- req.add_header("If-Modified-Since", doc['last_modified'])
-
- opener = urllib2.build_opener(NotModifiedHandler())
- try:
- url_handle = opener.open(req)
- doc['url'] = url_handle.geturl() # may have followed a redirect to a new url
- headers = url_handle.info() # the addinfourls have the .info() too
- doc['etag'] = headers.getheader("ETag")
- doc['last_modified'] = headers.getheader("Last-Modified")
- doc['date'] = headers.getheader("Date")
- doc['page_scraped'] = time.time()
- doc['web_server'] = headers.getheader("Server")
- doc['via'] = headers.getheader("Via")
- doc['powered_by'] = headers.getheader("X-Powered-By")
- doc['file_size'] = headers.getheader("Content-Length")
- content_type = headers.getheader("Content-Type")
- if content_type != None:
- doc['mime_type'] = content_type.split(";")[0]
- else:
- (type,encoding) = mimetypes.guess_type(url)
- doc['mime_type'] = type
- if hasattr(url_handle, 'code'):
- if url_handle.code == 304:
- print "the web page has not been modified"
- return (None,None,None)
- else:
- content = url_handle.read()
- docsdb.save(doc)
- doc = docsdb.get(hash) # need to get a _rev
- docsdb.put_attachment(doc, content, str(time.time())+"-"+os.path.basename(url), doc['mime_type'])
- return (doc['url'], doc['mime_type'], content)
- #store as attachment epoch-filename
-
- except urllib2.URLError as e:
- error = ""
- if hasattr(e, 'reason'):
- error = "error %s in downloading %s" % (str(e.reason), url)
- elif hasattr(e, 'code'):
- error = "error %s in downloading %s" % (e.code, url)
- print error
- doc['error'] = error
- docsdb.save(doc)
- return (None,None,None)
+ url = canonurl(url)
+ hash = mkhash(url)
+ req = urllib2.Request(url)
+ print "Fetching %s (%s)" % (url,hash)
+ if url.startswith("mailto") or url.startswith("javascript") or url.startswith("#") or url == None or url == "":
+ print "Not a valid HTTP url"
+ return (None,None,None)
+ doc = docsdb.get(hash)
+ if doc == None:
+ doc = {'_id': hash, 'agencyID': agencyID, 'url': url, 'fieldName':fieldName}
+ else:
+ if (('page_scraped' in doc) and (time.time() - doc['page_scraped']) < 60*24*14*1000):
+ print "Uh oh, trying to scrape URL again too soon!"+hash
+ last_attachment_fname = doc["_attachments"].keys()[-1]
+ last_attachment = docsdb.get_attachment(doc,last_attachment_fname)
+ content = last_attachment
+ return (doc['url'],doc['mime_type'],content.read())
+ if scrape_again == False:
+ print "Not scraping this URL again as requested"
+ return (doc['url'],doc['mime_type'],content.read())
+
+ req.add_header("User-Agent", "Mozilla/4.0 (compatible; Prometheus webspider; owner maxious@lambdacomplex.org)")
+ #if there is a previous version stored in couchdb, load caching helper tags
+ if doc.has_key('etag'):
+ req.add_header("If-None-Match", doc['etag'])
+ if doc.has_key('last_modified'):
+ req.add_header("If-Modified-Since", doc['last_modified'])
+
+ opener = urllib2.build_opener(NotModifiedHandler())
+ try:
+ url_handle = opener.open(req)
+ doc['url'] = url_handle.geturl() # may have followed a redirect to a new url
+ headers = url_handle.info() # the addinfourls have the .info() too
+ doc['etag'] = headers.getheader("ETag")
+ doc['last_modified'] = headers.getheader("Last-Modified")
+ doc['date'] = headers.getheader("Date")
+ doc['page_scraped'] = time.time()
+ doc['web_server'] = headers.getheader("Server")
+ doc['via'] = headers.getheader("Via")
+ doc['powered_by'] = headers.getheader("X-Powered-By")
+ doc['file_size'] = headers.getheader("Content-Length")
+ content_type = headers.getheader("Content-Type")
+ if content_type != None:
+ doc['mime_type'] = content_type.split(";")[0]
+ else:
+ (type,encoding) = mimetypes.guess_type(url)
+ doc['mime_type'] = type
+ if hasattr(url_handle, 'code'):
+ if url_handle.code == 304:
+ print "the web page has not been modified"+hash
+ last_attachment_fname = doc["_attachments"].keys()[-1]
+ last_attachment = docsdb.get_attachment(doc,last_attachment_fname)
+ content = last_attachment
+ return (doc['url'],doc['mime_type'],content.read())
+ else:
+ print "new webpage loaded"
+ content = url_handle.read()
+ docsdb.save(doc)
+ doc = docsdb.get(hash) # need to get a _rev
+ docsdb.put_attachment(doc, content, str(time.time())+"-"+os.path.basename(url), doc['mime_type'])
+ return (doc['url'], doc['mime_type'], content)
+ #store as attachment epoch-filename
+
+ except urllib2.URLError as e:
+ print "error!"
+ error = ""
+ if hasattr(e, 'reason'):
+ error = "error %s in downloading %s" % (str(e.reason), url)
+ elif hasattr(e, 'code'):
+ error = "error %s in downloading %s" % (e.code, url)
+ print error
+ doc['error'] = error
+ docsdb.save(doc)
+ return (None,None,None)
def scrapeAndStore(docsdb, url, depth, fieldName, agencyID):
- (url,mime_type,content) = fetchURL(docsdb, url, fieldName, agencyID)
- badURLs = ["http://www.ausport.gov.au/supporting/funding/grants_and_scholarships/grant_funding_report"]
- if content != None and depth > 0 and url != "http://www.ausport.gov.au/supporting/funding/grants_and_scholarships/grant_funding_report":
- if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml":
- # http://www.crummy.com/software/BeautifulSoup/documentation.html
- soup = BeautifulSoup(content)
- navIDs = soup.findAll(id=re.compile('nav|Nav|menu|bar|left|right|sidebar|more-links|breadcrumb|footer|header'))
- for nav in navIDs:
- print "Removing element", nav['id']
- nav.extract()
- navClasses = soup.findAll(attrs={'class' : re.compile('nav|menu|bar|left|right|sidebar|more-links|breadcrumb|footer|header')})
- for nav in navClasses:
- print "Removing element", nav['class']
- nav.extract()
- links = soup.findAll('a') # soup.findAll('a', id=re.compile("^p-"))
- linkurls = set([])
- for link in links:
- if link.has_key("href"):
- if link['href'].startswith("http"):
- # lets not do external links for now
- # linkurls.add(link['href'])
- None
- if link['href'].startswith("mailto"):
- # not http
- None
- if link['href'].startswith("javascript"):
- # not http
- None
- else:
- # remove anchors and spaces in urls
- linkurls.add(fullurl(url,link['href']))
- for linkurl in linkurls:
- #print linkurl
- scrapeAndStore(docsdb, linkurl, depth-1, fieldName, agencyID)
+ (url,mime_type,content) = fetchURL(docsdb, url, fieldName, agencyID)
+ badURLs = ["http://www.ausport.gov.au/supporting/funding/grants_and_scholarships/grant_funding_report"]
+ if content != None and depth > 0 and url != "http://www.ausport.gov.au/supporting/funding/grants_and_scholarships/grant_funding_report":
+ if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml":
+ # http://www.crummy.com/software/BeautifulSoup/documentation.html
+ soup = BeautifulSoup(content)
+ navIDs = soup.findAll(id=re.compile('nav|Nav|menu|bar|left|right|sidebar|more-links|breadcrumb|footer|header'))
+ for nav in navIDs:
+ print "Removing element", nav['id']
+ nav.extract()
+ navClasses = soup.findAll(attrs={'class' : re.compile('nav|menu|bar|left|right|sidebar|more-links|breadcrumb|footer|header')})
+ for nav in navClasses:
+ print "Removing element", nav['class']
+ nav.extract()
+ links = soup.findAll('a') # soup.findAll('a', id=re.compile("^p-"))
+ linkurls = set([])
+ for link in links:
+ if link.has_key("href"):
+ if link['href'].startswith("http"):
+ # lets not do external links for now
+ # linkurls.add(link['href'])
+ None
+ if link['href'].startswith("mailto"):
+ # not http
+ None
+ if link['href'].startswith("javascript"):
+ # not http
+ None
+ else:
+ # remove anchors and spaces in urls
+ linkurls.add(fullurl(url,link['href']))
+ for linkurl in linkurls:
+ #print linkurl
+ scrapeAndStore(docsdb, linkurl, depth-1, fieldName, agencyID)
#couch = couchdb.Server('http://192.168.1.148:5984/')
couch = couchdb.Server('http://127.0.0.1:5984/')
@@ -196,20 +208,20 @@
docsdb = couch['disclosr-documents']
if __name__ == "__main__":
- for row in agencydb.view('app/getScrapeRequired'): #not recently scraped agencies view?
- agency = agencydb.get(row.id)
- print agency['name']
- for key in agency.keys():
- if key == "FOIDocumentsURL" and "status" not in agency.keys:
- scrapeAndStore(docsdb, agency[key],0,key,agency['_id'])
- if key == 'website' and False:
- scrapeAndStore(docsdb, agency[key],0,key,agency['_id'])
- agency['metadata']['lastScraped'] = time.time()
- if key.endswith('URL') and False:
- print key
- depth = 1
- if 'scrapeDepth' in agency.keys():
- depth = agency['scrapeDepth']
- scrapeAndStore(docsdb, agency[key],depth,key,agency['_id'])
- agencydb.save(agency)
-
+ for row in agencydb.view('app/getScrapeRequired'): #not recently scraped agencies view?
+ agency = agencydb.get(row.id)
+ print agency['name']
+ for key in agency.keys():
+ if key == "FOIDocumentsURL" and "status" not in agency.keys:
+ scrapeAndStore(docsdb, agency[key],0,key,agency['_id'])
+ if key == 'website' and False:
+ scrapeAndStore(docsdb, agency[key],0,key,agency['_id'])
+ agency['metadata']['lastScraped'] = time.time()
+ if key.endswith('URL') and False:
+ print key
+ depth = 1
+ if 'scrapeDepth' in agency.keys():
+ depth = agency['scrapeDepth']
+ scrapeAndStore(docsdb, agency[key],depth,key,agency['_id'])
+ agencydb.save(agency)
+
--- /dev/null
+++ b/documents/scrapers/0049d35216493c545ef5f7f000e6b252.py
@@ -1,1 +1,19 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+
+class ScraperImplementation(genericScrapers.GenericPDFDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation,
+ genericScrapers.GenericPDFDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(),
+ genericScrapers.GenericPDFDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/0049d35216493c545ef5f7f000e6b252.txt
+++ /dev/null
@@ -1,2 +1,1 @@
-pdf
--- /dev/null
+++ b/documents/scrapers/00a294de663db69062ca09aede7c0487.py
@@ -1,1 +1,47 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import dateutil
+from dateutil.parser import *
+from datetime import *
+
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+ def getDate(self, content, entry, doc):
+ date = ''.join(entry.find('th').stripped_strings).strip()
+ (a, b, c) = date.partition("(")
+ date = self.remove_control_chars(a.replace("Octber", "October"))
+ print date
+ edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
+ print edate
+ doc.update({'date': edate})
+ return
+ def getColumnCount(self):
+ return 4
+
+ def getTable(self, soup):
+ return soup.find(summary="List of Defence documents released under Freedom of Information requets")
+
+ def getColumns(self, columns):
+ (id, description, access, notes) = columns
+ return (id, None, description, description, notes)
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+
+ nsi = ScraperImplementation()
+ nsi.disclogURL = "http://www.defence.gov.au/foi/disclosure_log_201213.cfm"
+ nsi.doScrape()
+
+ nsi.disclogURL = "http://www.defence.gov.au/foi/disclosure_log_201112.cfm"
+ nsi.doScrape()
+
+ nsi.disclogURL = "http://www.defence.gov.au/foi/disclosure_log_201011.cfm"
+ nsi.doScrape()
+
+
--- a/documents/scrapers/00a294de663db69062ca09aede7c0487.txt
+++ /dev/null
@@ -1,2 +1,1 @@
-multipage
--- /dev/null
+++ b/documents/scrapers/0ae822d1a748e60d90f0b79b97d5a3e5.py
@@ -1,1 +1,19 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+
+class ScraperImplementation(genericScrapers.GenericHTMLDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation,
+ genericScrapers.GenericHTMLDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(),
+ genericScrapers.GenericHTMLDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/0ae822d1a748e60d90f0b79b97d5a3e5.txt
+++ /dev/null
@@ -1,2 +1,1 @@
-ACMA style
--- /dev/null
+++ b/documents/scrapers/1803322b27286950cab0c543168b5f21.py
@@ -1,1 +1,58 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import dateutil
+from dateutil.parser import *
+from datetime import *
+import scrape
+from bs4 import BeautifulSoup
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+ def getDescription(self,content, entry,doc):
+ link = None
+ links = []
+ description = ""
+ for atag in entry.find_all('a'):
+ if atag.has_key('href'):
+ link = scrape.fullurl(self.getURL(), atag['href'])
+ (url, mime_type, htcontent) = scrape.fetchURL(scrape.docsdb, link, "foidocuments", self.getAgencyID(), False)
+ if htcontent != None:
+ if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml":
+ soup = BeautifulSoup(htcontent)
+ row = soup.find(id="content_div_148050")
+ description = ''.join(row.stripped_strings)
+ for atag in row.find_all("a"):
+ if atag.has_key('href'):
+ links.append(scrape.fullurl(link, atag['href']))
+
+ if links != []:
+ doc.update({'links': links})
+ if description != "":
+ doc.update({ 'description': description})
+ def getColumnCount(self):
+ return 4
+
+ def getColumns(self, columns):
+ (id, date, datepub, title) = columns
+ return (id, date, title, title, None)
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+
+ nsi = ScraperImplementation()
+ nsi.disclogURL = "http://www.dbcde.gov.au/about_us/freedom_of_information_disclosure_log/foi_list?result_146858_result_page=1"
+ nsi.doScrape()
+ nsi.disclogURL = "http://www.dbcde.gov.au/about_us/freedom_of_information_disclosure_log/foi_list?result_146858_result_page=2"
+ nsi.doScrape()
+ nsi.disclogURL = "http://www.dbcde.gov.au/about_us/freedom_of_information_disclosure_log/foi_list?result_146858_result_page=3"
+ nsi.doScrape()
+ nsi.disclogURL = "http://www.dbcde.gov.au/about_us/freedom_of_information_disclosure_log/foi_list?result_146858_result_page=4"
+ nsi.doScrape()
+ nsi.disclogURL = "http://www.dbcde.gov.au/about_us/freedom_of_information_disclosure_log/foi_list?result_146858_result_page=5"
+ nsi.doScrape()
+
--- a/documents/scrapers/1803322b27286950cab0c543168b5f21.txt
+++ /dev/null
@@ -1,2 +1,1 @@
-multipage log
--- a/documents/scrapers/227cb6eb7d2c9f8a6e846df7447d6caa.py
+++ b/documents/scrapers/227cb6eb7d2c9f8a6e846df7447d6caa.py
@@ -21,9 +21,10 @@
for row in soup.find(class_ = "ms-rteTable-GreyAlternating").find_all('tr'):
if row != None:
rowtitle = row.find('th').string
- description = description + "\n" + rowtitle + ": "
+ if rowtitle != None:
+ description = description + "\n" + rowtitle + ": "
for text in row.find('td').stripped_strings:
- description = description + text
+ description = description + text
for atag in row.find_all("a"):
if atag.has_key('href'):
links.append(scrape.fullurl(link,atag['href']))
--- /dev/null
+++ b/documents/scrapers/31685505438d393f45a90f442b8fa27f.py
@@ -1,1 +1,19 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+
+class ScraperImplementation(genericScrapers.GenericPDFDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation,
+ genericScrapers.GenericPDFDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(),
+ genericScrapers.GenericPDFDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/31685505438d393f45a90f442b8fa27f.txt
+++ /dev/null
@@ -1,2 +1,1 @@
-pdf
--- /dev/null
+++ b/documents/scrapers/3e2f110af49d62833a835bd257771ffb.py
@@ -1,1 +1,19 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+
+class ScraperImplementation(genericScrapers.GenericHTMLDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation,
+ genericScrapers.GenericHTMLDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(),
+ genericScrapers.GenericHTMLDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/3e2f110af49d62833a835bd257771ffb.txt
+++ /dev/null
@@ -1,2 +1,1 @@
-no disclog
--- /dev/null
+++ b/documents/scrapers/41a166419503bb50e410c58be54c102f.py
@@ -1,1 +1,27 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+from datetime import date
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+ def getTable(self,soup):
+ return soup.find(id= "ctl00_MSO_ContentDiv").table
+
+ def getColumns(self,columns):
+ (id, title, description, notes) = columns
+ return (id, title, title, description, notes)
+ def getDate(self, content, entry, doc):
+ edate = date.today().strftime("%Y-%m-%d")
+ doc.update({'date': edate})
+ return
+ def getColumnCount(self):
+ return 4
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/41a166419503bb50e410c58be54c102f.txt
+++ /dev/null
@@ -1,1 +1,1 @@
-aspx
+
--- /dev/null
+++ b/documents/scrapers/4d2af2dcc72f1703bbf04b13b03720a8.py
@@ -1,1 +1,19 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+
+class ScraperImplementation(genericScrapers.GenericHTMLDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation,
+ genericScrapers.GenericHTMLDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(),
+ genericScrapers.GenericHTMLDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/4d2af2dcc72f1703bbf04b13b03720a8.txt
+++ /dev/null
@@ -1,2 +1,1 @@
-no disclog
--- a/documents/scrapers/50601505ef69483121a6d130bb0515e4.txt
+++ b/documents/scrapers/50601505ef69483121a6d130bb0515e4.txt
@@ -1,1 +1,1 @@
-apsc has ACMA style disclog
+ACMA style
--- /dev/null
+++ b/documents/scrapers/525c3953187da08cd702359b2fc2997f.py
@@ -1,1 +1,19 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+
+class ScraperImplementation(genericScrapers.GenericHTMLDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation,
+ genericScrapers.GenericHTMLDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(),
+ genericScrapers.GenericHTMLDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/525c3953187da08cd702359b2fc2997f.txt
+++ /dev/null
@@ -1,2 +1,1 @@
-no disclog
--- /dev/null
+++ b/documents/scrapers/627f116dfe42c9f27ad6747be0aa44e2.py
@@ -1,1 +1,19 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+
+class ScraperImplementation(genericScrapers.GenericHTMLDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation,
+ genericScrapers.GenericHTMLDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(),
+ genericScrapers.GenericHTMLDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/627f116dfe42c9f27ad6747be0aa44e2.txt
+++ /dev/null
@@ -1,1 +1,1 @@
-no disclog
+
--- /dev/null
+++ b/documents/scrapers/6afdde1d4ff1ad8d8cfe1a8675ea83bd.py
@@ -1,1 +1,19 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+
+class ScraperImplementation(genericScrapers.GenericPDFDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation,
+ genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(),
+ genericScrapers.GenericOAICDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/6afdde1d4ff1ad8d8cfe1a8675ea83bd.txt
+++ /dev/null
@@ -1,2 +1,1 @@
-PDF
--- /dev/null
+++ b/documents/scrapers/795e7a8afb39a420360aa207b0cb1306.py
@@ -1,1 +1,19 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+
+class ScraperImplementation(genericScrapers.GenericHTMLDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation,
+ genericScrapers.GenericHTMLDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(),
+ genericScrapers.GenericHTMLDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/795e7a8afb39a420360aa207b0cb1306.txt
+++ /dev/null
@@ -1,2 +1,1 @@
-no disclog
--- /dev/null
+++ b/documents/scrapers/7b39ce7f362a0af9a711eaf223943eea.py
@@ -1,1 +1,19 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+
+class ScraperImplementation(genericScrapers.GenericHTMLDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation,
+ genericScrapers.GenericHTMLDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(),
+ genericScrapers.GenericHTMLDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/7b39ce7f362a0af9a711eaf223943eea.txt
+++ /dev/null
@@ -1,2 +1,1 @@
-no disclog
--- /dev/null
+++ b/documents/scrapers/7c6adc1d41cf029bf1a0959e5156477a.py
@@ -1,1 +1,51 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from datetime import date
+from pyquery import PyQuery as pq
+from lxml import etree
+import urllib
+import dateutil
+from dateutil.parser import *
+class ACMADisclogScraper(genericScrapers.GenericDisclogScraper):
+
+ def doScrape(self):
+ foidocsdb = scrape.couch['disclosr-foidocuments']
+ (url, mime_type, content) = scrape.fetchURL(scrape.docsdb,
+ self.getURL(), "foidocuments", self.getAgencyID())
+
+ d = pq(content)
+ d.make_links_absolute(base_url = self.getURL())
+ for table in d('table').items():
+ title= table('thead').text()
+ print title
+ (idate,descA,descB,link,deldate,notes) = table('tbody tr').map(lambda i, e: pq(e).children().eq(1).text())
+ links = table('a').map(lambda i, e: pq(e).attr('href'))
+ description = descA+" "+descB
+ edate = parse(idate[:12], dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
+ print edate
+ dochash = scrape.mkhash(self.remove_control_chars(title))
+ doc = foidocsdb.get(dochash)
+ if doc is None:
+ print "saving " + dochash
+ edate = date.today().strftime("%Y-%m-%d")
+ doc = {'_id': dochash, 'agencyID': self.getAgencyID()
+ , 'url': self.getURL(), 'docID': dochash,
+ "links": links,
+ "date": edate, "notes": notes, "title": title, "description": description}
+ #print doc
+ foidocsdb.save(doc)
+ else:
+ print "already saved"
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ACMADisclogScraper,
+ genericScrapers.GenericDisclogScraper)
+ print 'Instance:', isinstance(ACMADisclogScraper(),
+ genericScrapers.GenericDisclogScraper)
+ ACMADisclogScraper().doScrape()
+
--- a/documents/scrapers/7c6adc1d41cf029bf1a0959e5156477a.txt
+++ /dev/null
@@ -1,1 +1,1 @@
-acma style
+
--- /dev/null
+++ b/documents/scrapers/8317df630946937864d31a4728ad8ee8.py
@@ -1,1 +1,19 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+
+class ScraperImplementation(genericScrapers.GenericPDFDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation,
+ genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(),
+ genericScrapers.GenericOAICDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/8317df630946937864d31a4728ad8ee8.txt
+++ /dev/null
@@ -1,2 +1,1 @@
-pdf
--- /dev/null
+++ b/documents/scrapers/8796220032faf94501bd366763263685.py
@@ -1,1 +1,37 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import dateutil
+from dateutil.parser import *
+from datetime import *
+
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+ def getColumnCount(self):
+ return 6
+
+ def getColumns(self, columns):
+ (id, date, title, description, datepub, notes) = columns
+ return (id, date, title, description, notes)
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+
+ nsi = ScraperImplementation()
+ nsi.disclogURL = "http://www.dpmc.gov.au/foi/ips/disclosure_logs/pmo/2011-12.cfm"
+ nsi.doScrape()
+ nsi.disclogURL = "http://www.dpmc.gov.au/foi/ips/disclosure_logs/dpmc/2011-12.cfm"
+ nsi.doScrape()
+ nsi.disclogURL = "http://www.dpmc.gov.au/foi/ips/disclosure_logs/dpmc/2012-13.cfm"
+ nsi.doScrape()
+ nsi.disclogURL = "http://www.dpmc.gov.au/foi/ips/disclosure_logs/omsi/2011-12.cfm"
+ nsi.doScrape()
+ nsi.disclogURL = "http://www.dpmc.gov.au/foi/ips/disclosure_logs/omps/2012-13.cfm"
+ nsi.doScrape()
+
--- a/documents/scrapers/8796220032faf94501bd366763263685.txt
+++ /dev/null
@@ -1,2 +1,1 @@
-multiple pages
--- a/documents/scrapers/8e874a2fde8aa0ccdc6d14573d766540.py
+++ b/documents/scrapers/8e874a2fde8aa0ccdc6d14573d766540.py
@@ -3,7 +3,7 @@
import genericScrapers
import scrape
from bs4 import BeautifulSoup
-import codecs
+import codecs
#http://www.doughellmann.com/PyMOTW/abc/
class NewScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
def getDescription(self,content, entry,doc):
@@ -20,7 +20,7 @@
soup = BeautifulSoup(htcontent)
for text in soup.find(id="divFullWidthColumn").stripped_strings:
description = description + text.encode('ascii', 'ignore')
-
+
for atag in soup.find(id="divFullWidthColumn").find_all("a"):
if atag.has_key('href'):
links.append(scrape.fullurl(link,atag['href']))
@@ -76,11 +76,10 @@
if __name__ == '__main__':
print 'Subclass:', issubclass(NewScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
print 'Instance:', isinstance(NewScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
- #NewScraperImplementation().doScrape()
+ NewScraperImplementation().doScrape()
print 'Subclass:', issubclass(OldScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
print 'Instance:', isinstance(OldScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
osi = OldScraperImplementation()
osi.disclogURL = "http://archive.treasury.gov.au/content/foi_publications.asp?year=-1&abstract=0&classification=&=&titl=Disclosure+Log+-+Documents+Released+Under+FOI"
osi.doScrape()
-# old site too
--- /dev/null
+++ b/documents/scrapers/9f4815bfdcb918a036e4bb43a30f8d77.py
@@ -1,1 +1,19 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+
+class ScraperImplementation(genericScrapers.GenericHTMLDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation,
+ genericScrapers.GenericHTMLDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(),
+ genericScrapers.GenericHTMLDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/9f4815bfdcb918a036e4bb43a30f8d77.txt
+++ /dev/null
@@ -1,1 +1,1 @@
-no disclog
+
--- /dev/null
+++ b/documents/scrapers/b0a3281ba66efe173c5a33d5ef90ff76.py
@@ -1,1 +1,35 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import dateutil
+from dateutil.parser import *
+from datetime import *
+
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+ def getColumnCount(self):
+ return 2
+
+ def getColumns(self, columns):
+ (date, title) = columns
+ return (title, date, title, title, None)
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+
+ nsi = ScraperImplementation()
+ nsi.disclogURL = "http://www.immi.gov.au/about/foi/foi-disclosures-2012.htm"
+ nsi.doScrape()
+ nsi.disclogURL = "http://www.immi.gov.au/about/foi/foi-disclosures-2011.htm"
+ nsi.doScrape()
+ nsi.disclogURL = "http://www.immi.gov.au/about/foi/foi-disclosures-2010.htm"
+ nsi.doScrape()
+ nsi.disclogURL = "http://www.immi.gov.au/about/foi/foi-disclosures-2009.htm"
+ nsi.doScrape()
+
--- a/documents/scrapers/b0a3281ba66efe173c5a33d5ef90ff76.txt
+++ /dev/null
@@ -1,2 +1,1 @@
-multipage immi
--- /dev/null
+++ b/documents/scrapers/bb96fe4065afb7e0872136dd657f9369.py
@@ -1,1 +1,19 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+
+class ScraperImplementation(genericScrapers.GenericHTMLDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation,
+ genericScrapers.GenericHTMLDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(),
+ genericScrapers.GenericHTMLDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/bb96fe4065afb7e0872136dd657f9369.txt
+++ /dev/null
@@ -1,1 +1,1 @@
-no disclog
+
--- /dev/null
+++ b/documents/scrapers/bf6e587f166040b63681cd2ff76fbfdf.py
@@ -1,1 +1,19 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+
+class ScraperImplementation(genericScrapers.GenericHTMLDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation,
+ genericScrapers.GenericHTMLDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(),
+ genericScrapers.GenericHTMLDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/bf6e587f166040b63681cd2ff76fbfdf.txt
+++ /dev/null
@@ -1,1 +1,1 @@
-no disclog
+
--- a/documents/scrapers/c1302c8d7cbbd911f0d4d8a4128f8079.txt
+++ b/documents/scrapers/c1302c8d7cbbd911f0d4d8a4128f8079.txt
@@ -1,1 +1,1 @@
-uses RET disclog
+parent
--- /dev/null
+++ b/documents/scrapers/c57c0bf315ce5977e730905707a2f6a3.py
@@ -1,1 +1,19 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+
+class ScraperImplementation(genericScrapers.GenericPDFDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation,
+ genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(),
+ genericScrapers.GenericOAICDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/c57c0bf315ce5977e730905707a2f6a3.txt
+++ /dev/null
@@ -1,3 +1,1 @@
-# pdf
-http://www.awm.gov.au/about/AWM_Disclosure_Log.pdf
--- /dev/null
+++ b/documents/scrapers/cb7f40e3495b682de6eee61bf09c1cfc.py
@@ -1,1 +1,19 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+
+class ScraperImplementation(genericScrapers.GenericHTMLDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation,
+ genericScrapers.GenericHTMLDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(),
+ genericScrapers.GenericHTMLDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/cb7f40e3495b682de6eee61bf09c1cfc.txt
+++ /dev/null
@@ -1,1 +1,1 @@
-no disclog
+
--- /dev/null
+++ b/documents/scrapers/d72744fb1e5d6e87af9a5ea16cc27fa5.py
@@ -1,1 +1,49 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from datetime import date
+from pyquery import PyQuery as pq
+from lxml import etree
+import urllib
+import dateutil
+from dateutil.parser import *
+class ACMADisclogScraper(genericScrapers.GenericDisclogScraper):
+
+ def doScrape(self):
+ foidocsdb = scrape.couch['disclosr-foidocuments']
+ (url, mime_type, content) = scrape.fetchURL(scrape.docsdb,
+ self.getURL(), "foidocuments", self.getAgencyID())
+
+ d = pq(content)
+ d.make_links_absolute(base_url = self.getURL())
+ for item in d('.item-list').items():
+ title= item('h3').text()
+ print title
+ links = item('a').map(lambda i, e: pq(e).attr('href'))
+ description = title= item('ul').text()
+ edate = date.today().strftime("%Y-%m-%d")
+ print edate
+ dochash = scrape.mkhash(self.remove_control_chars(title))
+ doc = foidocsdb.get(dochash)
+ if doc is None:
+ print "saving " + dochash
+ doc = {'_id': dochash, 'agencyID': self.getAgencyID()
+ , 'url': self.getURL(), 'docID': dochash,
+ "links": links,
+ "date": edate, "title": title, "description": description}
+ #print doc
+ foidocsdb.save(doc)
+ else:
+ print "already saved"
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ACMADisclogScraper,
+ genericScrapers.GenericDisclogScraper)
+ print 'Instance:', isinstance(ACMADisclogScraper(),
+ genericScrapers.GenericDisclogScraper)
+ ACMADisclogScraper().doScrape()
+
--- a/documents/scrapers/d72744fb1e5d6e87af9a5ea16cc27fa5.txt
+++ /dev/null
@@ -1,1 +1,1 @@
-acma style
+
--- /dev/null
+++ b/documents/scrapers/e770921522a49dc77de208cc724ce134.py
@@ -1,1 +1,19 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+
+class ScraperImplementation(genericScrapers.GenericHTMLDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation,
+ genericScrapers.GenericHTMLDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(),
+ genericScrapers.GenericHTMLDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/e770921522a49dc77de208cc724ce134.txt
+++ /dev/null
@@ -1,1 +1,1 @@
-no disclog
+
--- /dev/null
+++ b/documents/scrapers/f189459fc43f941e0d4ecfba52c666f3.py
@@ -1,1 +1,19 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+
+class ScraperImplementation(genericScrapers.GenericHTMLDisclogScraper):
+
+ def __init__(self):
+ super(ScraperImplementation, self).__init__()
+
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation,
+ genericScrapers.GenericHTMLDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(),
+ genericScrapers.GenericHTMLDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/f189459fc43f941e0d4ecfba52c666f3.txt
+++ /dev/null
@@ -1,2 +1,1 @@
-no disclog
--- a/documents/sitemap.xml.php
+++ b/documents/sitemap.xml.php
@@ -10,10 +10,18 @@
if (strpos($file, ".php") !== false && $file != "index.php" && $file != "sitemap.xml.php")
echo " <url><loc>" . local_url() . "$file</loc><priority>0.6</priority></url>\n";
}
-
-$db = $server->get_db('disclosr-foidocuments');
+$agenciesdb = $server->get_db('disclosr-agencies');
try {
- $rows = $db->get_view("app", "all")->rows;
+ $rows = $agenciesdb->get_view("app", "byCanonicalName")->rows;
+ foreach ($rows as $row) {
+ echo '<url><loc>' . local_url() . 'agency.php?id=' . $row->value->_id . "</loc><priority>0.3</priority></url>\n";
+ }
+} catch (SetteeRestClientException $e) {
+ setteErrorHandler($e);
+}
+$foidocsdb = $server->get_db('disclosr-foidocuments');
+try {
+ $rows = $foidocsdb->get_view("app", "all")->rows;
foreach ($rows as $row) {
echo '<url><loc>' . local_url() . 'view.php?id=' . $row->value->_id . "</loc><priority>0.3</priority></url>\n";
}
--- a/documents/template.inc.php
+++ b/documents/template.inc.php
@@ -1,152 +1,183 @@
<?php
function include_header_documents($title) {
-?>
-<!doctype html>
-<!-- paulirish.com/2008/conditional-stylesheets-vs-css-hacks-answer-neither/ -->
-<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]-->
-<!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8" lang="en"> <![endif]-->
-<!--[if IE 8]> <html class="no-js lt-ie9" lang="en"> <![endif]-->
-<!-- Consider adding a manifest.appcache: h5bp.com/d/Offline -->
-<!--[if gt IE 8]><!--> <html class="no-js" lang="en"> <!--<![endif]-->
-<head>
- <meta charset="utf-8">
+ header('X-UA-Compatible: IE=edge,chrome=1');
+ ?>
+ <!doctype html>
+ <!-- paulirish.com/2008/conditional-stylesheets-vs-css-hacks-answer-neither/ -->
+ <!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]-->
+ <!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8" lang="en"> <![endif]-->
+ <!--[if IE 8]> <html class="no-js lt-ie9" lang="en"> <![endif]-->
+ <!-- Consider adding a manifest.appcache: h5bp.com/d/Offline -->
+ <!--[if gt IE 8]><!--> <html class="no-js" lang="en"> <!--<![endif]-->
+ <head>
+ <meta charset="utf-8">
- <!-- Use the .htaccess and remove these lines to avoid edge case issues.
- More info: h5bp.com/i/378 -->
- <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
+ <title>Australian Disclosure Logs<?php if ($title != "") echo " - $title"; ?></title>
+ <meta name="description" content="">
- <title>Australian Disclosure Logs<?php if ($title != "") echo " - $title";?></title>
- <meta name="description" content="">
+ <!-- Mobile viewport optimized: h5bp.com/viewport -->
+ <meta name="viewport" content="width=device-width">
+ <link rel="alternate" type="application/rss+xml" title="Latest Disclosure Log Entries" href="rss.xml.php" />
+ <!-- Place favicon.ico and apple-touch-icon.png in the root directory: mathiasbynens.be/notes/touch-icons -->
+ <meta name="google-site-verification" content="jkknX5g2FCpQvrW030b1Nq2hyoa6mb3EDiA7kCoHNj8" />
- <!-- Mobile viewport optimized: h5bp.com/viewport -->
- <meta name="viewport" content="width=device-width">
-<link rel="alternate" type="application/rss+xml" title="Latest Disclosure Log Entries" href="rss.xml.php" />
- <!-- Place favicon.ico and apple-touch-icon.png in the root directory: mathiasbynens.be/notes/touch-icons -->
-<meta name="google-site-verification" content="jkknX5g2FCpQvrW030b1Nq2hyoa6mb3EDiA7kCoHNj8" />
+ <!-- Le styles -->
+ <link href="css/bootstrap.min.css" rel="stylesheet">
+ <style type="text/css">
+ body {
+ padding-top: 60px;
+ padding-bottom: 40px;
+ }
+ .sidebar-nav {
+ padding: 9px 0;
+ }
+ </style>
+ <link href="css/bootstrap-responsive.min.css" rel="stylesheet">
- <!-- Le styles -->
- <link href="css/bootstrap.min.css" rel="stylesheet">
- <style type="text/css">
- body {
- padding-top: 60px;
- padding-bottom: 40px;
- }
- .sidebar-nav {
- padding: 9px 0;
- }
- </style>
- <link href="css/bootstrap-responsive.min.css" rel="stylesheet">
+ <!-- HTML5 shim, for IE6-8 support of HTML5 elements -->
+ <!--[if lt IE 9]>
+ <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
+ <![endif]-->
+ <!-- More ideas for your <head> here: h5bp.com/d/head-Tips -->
- <!-- HTML5 shim, for IE6-8 support of HTML5 elements -->
- <!--[if lt IE 9]>
- <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
- <![endif]-->
- <!-- More ideas for your <head> here: h5bp.com/d/head-Tips -->
+ <!-- All JavaScript at the bottom, except this Modernizr build.
+ Modernizr enables HTML5 elements & feature detects for optimal performance.
+ Create your own custom Modernizr build: www.modernizr.com/download/
+ <script src="js/libs/modernizr-2.5.3.min.js"></script>-->
+ <script src="js/jquery.js"></script>
+ <script type="text/javascript" src="js/flotr2.min.js"></script>
- <!-- All JavaScript at the bottom, except this Modernizr build.
- Modernizr enables HTML5 elements & feature detects for optimal performance.
- Create your own custom Modernizr build: www.modernizr.com/download/
- <script src="js/libs/modernizr-2.5.3.min.js"></script>-->
- <script src="js/jquery.js"></script>
- <script type="text/javascript" src="js/flotr2.min.js"></script>
-
-</head>
-<body>
- <div class="navbar navbar-inverse navbar-fixed-top">
- <div class="navbar-inner">
- <div class="container-fluid">
- <a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
- <span class="icon-bar"></span>
- <span class="icon-bar"></span>
- <span class="icon-bar"></span>
- </a>
- <a class="brand" href="#">Australian Disclosure Logs</a>
- <div class="nav-collapse collapse">
- <p class="navbar-text pull-right">
- Check out our subsites on:
-<a href="http://orgs.disclosurelo.gs">Government Agencies</a>
-• <a href="http://lobbyists.disclosurelo.gs">Political Lobbyists</a>
-• <a href="http://contracts.disclosurelo.gs">Government Contracts and Spending</a>
+ </head>
+ <body>
+ <div class="navbar navbar-inverse navbar-fixed-top">
+ <div class="navbar-inner">
+ <div class="container-fluid">
+ <a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+ <span class="icon-bar"></span>
+ <span class="icon-bar"></span>
+ <span class="icon-bar"></span>
+ </a>
+ <a class="brand" href="#">Australian Disclosure Logs</a>
+ <div class="nav-collapse collapse">
+ <p class="navbar-text pull-right">
+ <small>
+ Subsites on:
+ </small>
+ <a href="http://orgs.disclosurelo.gs">Government Agencies</a>
+ • <a href="http://lobbyists.disclosurelo.gs">Political Lobbyists</a>
+ • <a href="http://contracts.disclosurelo.gs">Government Contracts and Spending</a>
- </p>
- <ul class="nav">
- <li><a href="index.php">Home</a></li>
- <li><a href="disclogsList.php">List of Disclosure Logs</a></li>
- <li><a href="about.php">About</a></li>
-
- </ul>
- </div><!--/.nav-collapse -->
- </div>
- </div>
- </div>
- <div class="container">
- <?php
-}
-function include_footer_documents() {
- ?>
- </div> <!-- /container -->
- <hr>
+ </p>
+ <ul class="nav">
+ <li><a href="agency.php">By Agency</a></li>
+ <li><a href="date.php">By Date</a></li>
+ <li><a href="disclogsList.php">List of Disclosure Logs</a></li>
+ <li><a href="about.php">About</a></li>
- <footer>
- <p>© Company 2012</p>
- </footer>
- <script type="text/javascript">
+ </ul>
+ </div><!--/.nav-collapse -->
+ </div>
+ </div>
+ </div>
+ <div class="container">
+ <?php
+ }
- var _gaq = _gaq || [];
- _gaq.push(['_setAccount', 'UA-12341040-4']);
- _gaq.push(['_setDomainName', 'disclosurelo.gs']);
- _gaq.push(['_setAllowLinker', true]);
- _gaq.push(['_trackPageview']);
+ function include_footer_documents() {
+ global $ENV;
+ ?>
+ </div> <!-- /container -->
+ <hr>
- (function() {
- var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
- ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
- var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
- })();
+ <footer>
+ <p>Not affiliated with or endorsed by any government agency.</p>
+ </footer>
+ <?php
+ if ($ENV != "DEV") {
+ echo "<script type='text/javascript'>
-</script>
- <!-- Le javascript
- ================================================== -->
- <!-- Placed at the end of the document so the pages load faster -->
-<!--
- <script src="js/bootstrap-transition.js"></script>
- <script src="js/bootstrap-alert.js"></script>
- <script src="js/bootstrap-modal.js"></script>
- <script src="js/bootstrap-dropdown.js"></script>
- <script src="js/bootstrap-scrollspy.js"></script>
- <script src="js/bootstrap-tab.js"></script>
- <script src="js/bootstrap-tooltip.js"></script>
- <script src="js/bootstrap-popover.js"></script>
- <script src="js/bootstrap-button.js"></script>
- <script src="js/bootstrap-collapse.js"></script>
- <script src="js/bootstrap-carousel.js"></script>
- <script src="js/bootstrap-typeahead.js"></script>-->
+ var _gaq = _gaq || [];
+ _gaq.push(['_setAccount', 'UA-12341040-4']);
+ _gaq.push(['_setDomainName', 'disclosurelo.gs']);
+ _gaq.push(['_setAllowLinker', true]);
+ _gaq.push(['_trackPageview']);
+
+ (function() {
+ var ga = document.createElement('script');
+ ga.type = 'text/javascript';
+ ga.async = true;
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+ var s = document.getElementsByTagName('script')[0];
+ s.parentNode.insertBefore(ga, s);
+ })();
+
+ </script>";
+ }
+ ?>
+ <!-- Le javascript
+ ================================================== -->
+ <!-- Placed at the end of the document so the pages load faster -->
+ <!--
+ <script src="js/bootstrap-transition.js"></script>
+ <script src="js/bootstrap-alert.js"></script>
+ <script src="js/bootstrap-modal.js"></script>
+ <script src="js/bootstrap-dropdown.js"></script>
+ <script src="js/bootstrap-scrollspy.js"></script>
+ <script src="js/bootstrap-tab.js"></script>
+ <script src="js/bootstrap-tooltip.js"></script>
+ <script src="js/bootstrap-popover.js"></script>
+ <script src="js/bootstrap-button.js"></script>
+ <script src="js/bootstrap-collapse.js"></script>
+ <script src="js/bootstrap-carousel.js"></script>
+ <script src="js/bootstrap-typeahead.js"></script>-->
- </body>
-</html>
-<?php
+ </body>
+ </html>
+ <?php
+}
+
+function truncate($string, $length, $stopanywhere = false) {
+ //truncates a string to a certain char length, stopping on a word if not specified otherwise.
+ if (strlen($string) > $length) {
+ //limit hit!
+ $string = substr($string, 0, ($length - 3));
+ if ($stopanywhere) {
+ //stop anywhere
+ $string .= '...';
+ } else {
+ //stop on a word.
+ $string = substr($string, 0, strrpos($string, ' ')) . '...';
+ }
+ }
+ return $string;
}
function displayLogEntry($row, $idtoname) {
$result = "";
- $result .= "<div><h2>".$row->value->date.": ".$row->value->title." (".$idtoname[$row->value->agencyID].")</h2> <p>".str_replace("\n","<br>",$row->value->description);
-if (isset($row->value->notes)) {
-$result .= " <br>Note: ".$row->value->notes;
-}
-$result .= "</p>";
+ $result .= '<div itemscope itemtype="http://schema.org/Article">';
+ $result .= '<h2> <span itemprop="datePublished">' . $row->value->date . "</span>: <span itemprop='name headline'>" . truncate($row->value->title, 120) . "</span>";
+ $result .= ' (<span itemprop="author publisher creator">' . $idtoname[$row->value->agencyID] . '</span>)</h2>';
+ $result .= "<p itemprop='description articleBody text'> Title: " . $row->value->title . "<br/>";
+ if (isset($row->value->description)) {
+ $result .= str_replace("\n", "<br>", preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "",trim($row->value->description)));
+ }
+ if (isset($row->value->notes)) {
+ $result .= " <br>Note: " . $row->value->notes;
+ }
+ $result .= "</p>";
-if (isset($row->value->links)){
-$result .= "<h3>Links/Documents</h3><ul>";
-foreach ($row->value->links as $link) {
- $result .= "<li><a href='$link'>".$link."</a></li>";
+ if (isset($row->value->links)) {
+ $result .= '<h3>Links/Documents</h3><ul itemprop="associatedMedia">';
+ foreach ($row->value->links as $link) {
+ $result .= '<li itemscope itemtype="http://schema.org/MediaObject"><a href="' . htmlspecialchars ($link) . '" itemprop="url contentURL">' . htmlspecialchars ( $link) . "</a></li>";
+ }
+
+ $result .= "</ul>";
+ }
+ $result .= "<small><A itemprop='url' href='" . $row->value->url . "'>View original source...</a> ID: " . strip_tags($row->value->docID) . "</small>";
+ $result .= "</div>\n";
+ return $result;
}
- $result .= "</ul>";
-}
- $result .= "<small><A href='".$row->value->url."'>View original source...</a> ID: ".strip_tags($row->value->docID)."</small>";
-$result .= "</div>";
-return $result;
-}
-
--- a/documents/view.php
+++ b/documents/view.php
@@ -1,6 +1,5 @@
<?php
include('template.inc.php');
-include_header_documents("");
include_once('../include/common.inc.php');
?>
<?php
@@ -17,6 +16,8 @@
try {
$obj = new stdClass();
$obj->value = $foidocsdb->get($_REQUEST['id']);
+ include_header_documents($obj->value->title);
+
echo displayLogEntry($obj,$idtoname);
} catch (SetteeRestClientException $e) {
--- /dev/null
+++ b/lib/FeedWriter/COPYING
@@ -1,1 +1,675 @@
-
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ <program> Copyright (C) <year> <name of author>
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
+
--- /dev/null
+++ b/lib/FeedWriter/FeedItem.php
@@ -1,1 +1,251 @@
-
+<?php
+
+/*
+ * Copyright (C) 2008 Anis uddin Ahmad <anisniit@gmail.com>
+ * Copyright (C) 2010-2012 Michael Bemmerl <mail@mx-server.de>
+ *
+ * This file is part of the "Universal Feed Writer" project.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+*
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+ /**
+ * Universal Feed Writer
+ *
+ * FeedItem class - Used as feed element in FeedWriter class
+ *
+ * @package UniversalFeedWriter
+ * @author Anis uddin Ahmad <anisniit@gmail.com>
+ * @link http://www.ajaxray.com/projects/rss
+ */
+class FeedItem
+{
+ private $elements = array(); //Collection of feed elements
+ private $version;
+
+ /**
+ * Constructor
+ *
+ * @param contant (RSS1/RSS2/ATOM) RSS2 is default.
+ */
+ function __construct($version = RSS2)
+ {
+ $this->version = $version;
+ }
+
+ /**
+ * Add an element to elements array
+ *
+ * @access public
+ * @param string The tag name of an element
+ * @param string The content of tag
+ * @param array Attributes(if any) in 'attrName' => 'attrValue' format
+ * @param boolean Specifies, if an already existing element is overwritten.
+ * @return void
+ */
+ public function addElement($elementName, $content, $attributes = null, $overwrite = FALSE)
+ {
+ // return if element already exists & if overwriting is disabled.
+ if (isset($this->elements[$elementName]) && !$overwrite)
+ return;
+
+ $this->elements[$elementName]['name'] = $elementName;
+ $this->elements[$elementName]['content'] = $content;
+ $this->elements[$elementName]['attributes'] = $attributes;
+ }
+
+ /**
+ * Set multiple feed elements from an array.
+ * Elements which have attributes cannot be added by this method
+ *
+ * @access public
+ * @param array array of elements in 'tagName' => 'tagContent' format.
+ * @return void
+ */
+ public function addElementArray($elementArray)
+ {
+ if (!is_array($elementArray))
+ return;
+
+ foreach ($elementArray as $elementName => $content)
+ {
+ $this->addElement($elementName, $content);
+ }
+ }
+
+ /**
+ * Return the collection of elements in this feed item
+ *
+ * @access public
+ * @return array
+ */
+ public function getElements()
+ {
+ return $this->elements;
+ }
+
+ /**
+ * Return the type of this feed item
+ *
+ * @access public
+ * @return string The feed type, as defined in FeedWriter.php
+ */
+ public function getVersion()
+ {
+ return $this->version;
+ }
+
+ // Wrapper functions ------------------------------------------------------
+
+ /**
+ * Set the 'dscription' element of feed item
+ *
+ * @access public
+ * @param string The content of 'description' or 'summary' element
+ * @return void
+ */
+ public function setDescription($description)
+ {
+ $tag = ($this->version == ATOM) ? 'summary' : 'description';
+ $this->addElement($tag, $description);
+ }
+
+ /**
+ * @desc Set the 'title' element of feed item
+ * @access public
+ * @param string The content of 'title' element
+ * @return void
+ */
+ public function setTitle($title)
+ {
+ $this->addElement('title', $title);
+ }
+
+ /**
+ * Set the 'date' element of feed item
+ *
+ * @access public
+ * @param string The content of 'date' element
+ * @return void
+ */
+ public function setDate($date)
+ {
+ if(!is_numeric($date))
+ {
+ if ($date instanceof DateTime)
+ {
+ if (version_compare(PHP_VERSION, '5.3.0', '>='))
+ $date = $date->getTimestamp();
+ else
+ $date = strtotime($date->format('r'));
+ }
+ else
+ $date = strtotime($date);
+ }
+
+ if($this->version == ATOM)
+ {
+ $tag = 'updated';
+ $value = date(DATE_ATOM, $date);
+ }
+ elseif($this->version == RSS2)
+ {
+ $tag = 'pubDate';
+ $value = date(DATE_RSS, $date);
+ }
+ else
+ {
+ $tag = 'dc:date';
+ $value = date("Y-m-d", $date);
+ }
+
+ $this->addElement($tag, $value);
+ }
+
+ /**
+ * Set the 'link' element of feed item
+ *
+ * @access public
+ * @param string The content of 'link' element
+ * @return void
+ */
+ public function setLink($link)
+ {
+ if($this->version == RSS2 || $this->version == RSS1)
+ {
+ $this->addElement('link', $link);
+ }
+ else
+ {
+ $this->addElement('link','',array('href'=>$link));
+ $this->addElement('id', FeedWriter::uuid($link,'urn:uuid:'));
+ }
+ }
+
+ /**
+ * Set the 'encloser' element of feed item
+ * For RSS 2.0 only
+ *
+ * @access public
+ * @param string The url attribute of encloser tag
+ * @param string The length attribute of encloser tag
+ * @param string The type attribute of encloser tag
+ * @return void
+ */
+ public function setEncloser($url, $length, $type)
+ {
+ if ($this->version != RSS2)
+ return;
+
+ $attributes = array('url'=>$url, 'length'=>$length, 'type'=>$type);
+ $this->addElement('enclosure','',$attributes);
+ }
+
+ /**
+ * Set the 'author' element of feed item
+ * For ATOM only
+ *
+ * @access public
+ * @param string The author of this item
+ * @return void
+ */
+ public function setAuthor($author)
+ {
+ if ($this->version != ATOM)
+ return;
+
+ $this->addElement('author', '<name>' . $author . '</name>');
+ }
+
+ /**
+ * Set the unique identifier of the feed item
+ *
+ * @access public
+ * @param string The unique identifier of this item
+ * @return void
+ */
+ public function setId($id)
+ {
+ if ($this->version == RSS2)
+ {
+ $this->addElement('guid', $id, array('isPermaLink' => 'false'));
+ }
+ else if ($this->version == ATOM)
+ {
+ $this->addElement('id', FeedWriter::uuid($id,'urn:uuid:'), NULL, TRUE);
+ }
+ }
+
+ } // end of class FeedItem
+
--- /dev/null
+++ b/lib/FeedWriter/FeedTypes.php
@@ -1,1 +1,63 @@
+<?php
+/*
+ * Copyright (C) 2012 Michael Bemmerl <mail@mx-server.de>
+ *
+ * This file is part of the "Universal Feed Writer" project.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+if (!class_exists('FeedWriter'))
+ require dirname(__FILE__) . '/FeedWriter.php';
+
+/**
+ * Wrapper for creating RSS1 feeds
+ *
+ * @package UniversalFeedWriter
+ */
+class RSS1FeedWriter extends FeedWriter
+{
+ function __construct()
+ {
+ parent::__construct(RSS1);
+ }
+}
+
+/**
+ * Wrapper for creating RSS2 feeds
+ *
+ * @package UniversalFeedWriter
+ */
+class RSS2FeedWriter extends FeedWriter
+{
+ function __construct()
+ {
+ parent::__construct(RSS2);
+ }
+}
+
+/**
+ * Wrapper for creating ATOM feeds
+ *
+ * @package UniversalFeedWriter
+ */
+class ATOMFeedWriter extends FeedWriter
+{
+ function __construct()
+ {
+ parent::__construct(ATOM);
+ }
+}
+
--- /dev/null
+++ b/lib/FeedWriter/FeedWriter.php
@@ -1,1 +1,514 @@
-
+<?php
+
+/*
+ * Copyright (C) 2008 Anis uddin Ahmad <anisniit@gmail.com>
+ * Copyright (C) 2010-2012 Michael Bemmerl <mail@mx-server.de>
+ *
+ * This file is part of the "Universal Feed Writer" project.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+// RSS 0.90 Officially obsoleted by 1.0
+// RSS 0.91, 0.92, 0.93 and 0.94 Officially obsoleted by 2.0
+// So, define constants for RSS 1.0, RSS 2.0 and ATOM
+
+define('RSS1', 'RSS 1.0', true);
+define('RSS2', 'RSS 2.0', true);
+define('ATOM', 'ATOM', true);
+
+if (!class_exists('FeedItem'))
+ require dirname(__FILE__) . '/FeedItem.php';
+
+/**
+ * Universal Feed Writer class
+ *
+ * Generate RSS 1.0, RSS2.0 and ATOM Feeds
+ *
+ * @package UniversalFeedWriter
+ * @author Anis uddin Ahmad <anisniit@gmail.com>
+ * @link http://www.ajaxray.com/projects/rss
+ */
+abstract class FeedWriter
+{
+ private $channels = array(); // Collection of channel elements
+ private $items = array(); // Collection of items as object of FeedItem class.
+ private $data = array(); // Store some other version wise data
+ private $CDATAEncoding = array(); // The tag names which have to encoded as CDATA
+
+ private $version = null;
+
+ /**
+ * Constructor
+ *
+ * @param constant the version constant (RSS1/RSS2/ATOM).
+ */
+ protected function __construct($version = RSS2)
+ {
+ $this->version = $version;
+
+ // Setting default value for essential channel elements
+ $this->channels['title'] = $version . ' Feed';
+ $this->channels['link'] = 'http://www.ajaxray.com/blog';
+
+ //Tag names to encode in CDATA
+ $this->CDATAEncoding = array('description', 'content:encoded', 'summary');
+ }
+
+ // Start # public functions ---------------------------------------------
+
+ /**
+ * Set a channel element
+ * @access public
+ * @param string name of the channel tag
+ * @param string content of the channel tag
+ * @return void
+ */
+ public function setChannelElement($elementName, $content)
+ {
+ $this->channels[$elementName] = $content;
+ }
+
+ /**
+ * Set multiple channel elements from an array. Array elements
+ * should be 'channelName' => 'channelContent' format.
+ *
+ * @access public
+ * @param array array of channels
+ * @return void
+ */
+ public function setChannelElementsFromArray($elementArray)
+ {
+ if (!is_array($elementArray))
+ return;
+
+ foreach ($elementArray as $elementName => $content)
+ {
+ $this->setChannelElement($elementName, $content);
+ }
+ }
+
+ /**
+ * Genarate the actual RSS/ATOM file
+ *
+ * @access public
+ * @param bool FALSE if the specific feed media type should be send.
+ * @return void
+ */
+ public function generateFeed($useGenericContentType = FALSE)
+ {
+ $contentType = "text/xml";
+
+ if (!$useGenericContentType)
+ {
+ switch($this->version)
+ {
+ case RSS2 : $contentType = "application/rss+xml";
+ break;
+ case RSS1 : $contentType = "application/rdf+xml";
+ break;
+ case ATOM : $contentType = "application/atom+xml";
+ break;
+ }
+ }
+
+ header("Content-Type: " . $contentType);
+
+ $this->printHeader();
+ $this->printChannels();
+ $this->printItems();
+ $this->printFooter();
+ }
+
+ /**
+ * Create a new FeedItem.
+ *
+ * @access public
+ * @return object instance of FeedItem class
+ */
+ public function createNewItem()
+ {
+ $Item = new FeedItem($this->version);
+ return $Item;
+ }
+
+ /**
+ * Add a FeedItem to the main class
+ *
+ * @access public
+ * @param object instance of FeedItem class
+ * @return void
+ */
+ public function addItem(FeedItem $feedItem)
+ {
+ if ($feedItem->getVersion() != $this->version)
+ die('Feed type mismatch: This instance can handle ' . $this->version . ' feeds only, but item with type ' . $feedItem->getVersion() . ' given.');
+
+ $this->items[] = $feedItem;
+ }
+
+
+ // Wrapper functions -------------------------------------------------------------------
+
+ /**
+ * Set the 'title' channel element
+ *
+ * @access public
+ * @param string value of 'title' channel tag
+ * @return void
+ */
+ public function setTitle($title)
+ {
+ $this->setChannelElement('title', $title);
+ }
+
+ /**
+ * Set the 'updated' channel element of an ATOM feed
+ *
+ * @access public
+ * @param string value of 'updated' channel tag
+ * @return void
+ */
+ public function setDate($date)
+ {
+ if ($this->version != ATOM)
+ return;
+
+ if ($date instanceof DateTime)
+ $date = $date->format(DateTime::ATOM);
+ else if(is_numeric($date))
+ $date = date(DATE_ATOM, $date);
+ else
+ $date = date(DATE_ATOM, strtotime($date));
+
+ $this->setChannelElement('updated', $date);
+ }
+
+ /**
+ * Set the 'description' channel element
+ *
+ * @access public
+ * @param string value of 'description' channel tag
+ * @return void
+ */
+ public function setDescription($desciption)
+ {
+ if ($this->version != ATOM)
+ $this->setChannelElement('description', $desciption);
+ }
+
+ /**
+ * Set the 'link' channel element
+ *
+ * @access public
+ * @param string value of 'link' channel tag
+ * @return void
+ */
+ public function setLink($link)
+ {
+ $this->setChannelElement('link', $link);
+ }
+
+ /**
+ * Set the 'image' channel element
+ *
+ * @access public
+ * @param string title of image
+ * @param string link url of the image
+ * @param string path url of the image
+ * @return void
+ */
+ public function setImage($title, $link, $url)
+ {
+ $this->setChannelElement('image', array('title'=>$title, 'link'=>$link, 'url'=>$url));
+ }
+
+ /**
+ * Set the 'about' channel element. Only for RSS 1.0
+ *
+ * @access public
+ * @param string value of 'about' channel tag
+ * @return void
+ */
+ public function setChannelAbout($url)
+ {
+ $this->data['ChannelAbout'] = $url;
+ }
+
+ /**
+ * Generates an UUID
+ * @author Anis uddin Ahmad <admin@ajaxray.com>
+ * @param string an optional prefix
+ * @return string the formated uuid
+ */
+ public static function uuid($key = null, $prefix = '')
+ {
+ $key = ($key == null)? uniqid(rand()) : $key;
+ $chars = md5($key);
+ $uuid = substr($chars,0,8) . '-';
+ $uuid .= substr($chars,8,4) . '-';
+ $uuid .= substr($chars,12,4) . '-';
+ $uuid .= substr($chars,16,4) . '-';
+ $uuid .= substr($chars,20,12);
+
+ return $prefix . $uuid;
+ }
+ // End # public functions ----------------------------------------------
+
+ // Start # private functions ----------------------------------------------
+
+ /**
+ * Prints the xml and rss namespace
+ *
+ * @access private
+ * @return void
+ */
+ private function printHeader()
+ {
+ $out = '<?xml version="1.0" encoding="utf-8"?>' . PHP_EOL;
+
+ if($this->version == RSS2)
+ {
+ $out .= '<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:wfw="http://wellformedweb.org/CommentAPI/">';
+ }
+ elseif($this->version == RSS1)
+ {
+ $out .= '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/">';
+ }
+ else if($this->version == ATOM)
+ {
+ $out .= '<feed xmlns="http://www.w3.org/2005/Atom">';
+ }
+
+ $out .= PHP_EOL;
+
+ echo $out;
+ }
+
+ /**
+ * Closes the open tags at the end of file
+ *
+ * @access private
+ * @return void
+ */
+ private function printFooter()
+ {
+ if($this->version == RSS2)
+ {
+ echo '</channel>' . PHP_EOL . '</rss>';
+ }
+ elseif($this->version == RSS1)
+ {
+ echo '</rdf:RDF>';
+ }
+ else if($this->version == ATOM)
+ {
+ echo '</feed>';
+ }
+ }
+
+ /**
+ * Creates a single node as xml format
+ *
+ * @access private
+ * @param string name of the tag
+ * @param mixed tag value as string or array of nested tags in 'tagName' => 'tagValue' format
+ * @param array Attributes(if any) in 'attrName' => 'attrValue' format
+ * @return string formatted xml tag
+ */
+ private function makeNode($tagName, $tagContent, $attributes = null)
+ {
+ $nodeText = '';
+ $attrText = '';
+
+ if(is_array($attributes) && count($attributes) > 0)
+ {
+ foreach ($attributes as $key => $value)
+ {
+ $value = htmlspecialchars($value);
+ $attrText .= " $key=\"$value\" ";
+ }
+
+ // Get rid of the last whitespace
+ $attrText = substr($attrText, 0, strlen($attrText) - 1);
+ }
+
+ if(is_array($tagContent) && $this->version == RSS1)
+ {
+ $attrText = ' rdf:parseType="Resource"';
+ }
+
+ $attrText .= (in_array($tagName, $this->CDATAEncoding) && $this->version == ATOM) ? ' type="html"' : '';
+ $nodeText .= "<{$tagName}{$attrText}>";
+ $nodeText .= (in_array($tagName, $this->CDATAEncoding)) ? '<![CDATA[' : '';
+
+ if(is_array($tagContent))
+ {
+ foreach ($tagContent as $key => $value)
+ {
+ $nodeText .= $this->makeNode($key, $value);
+ }
+ }
+ else
+ {
+ $nodeText .= (in_array($tagName, $this->CDATAEncoding))? $this->sanitizeCDATA($tagContent) : htmlspecialchars($tagContent);
+ }
+
+ $nodeText .= (in_array($tagName, $this->CDATAEncoding)) ? ']]>' : '';
+ $nodeText .= "</$tagName>" . PHP_EOL;
+
+ return $nodeText;
+ }
+
+ /**
+ * @desc Print channels
+ * @access private
+ * @return void
+ */
+ private function printChannels()
+ {
+ //Start channel tag
+ switch ($this->version)
+ {
+ case RSS2:
+ echo '<channel>' . PHP_EOL;
+ break;
+ case RSS1:
+ echo (isset($this->data['ChannelAbout']))? "<channel rdf:about=\"{$this->data['ChannelAbout']}\">" : "<channel rdf:about=\"{$this->channels['link']}\">";
+ break;
+ }
+
+ //Print Items of channel
+ foreach ($this->channels as $key => $value)
+ {
+ if($this->version == ATOM && $key == 'link')
+ {
+ // ATOM prints link element as href attribute
+ echo $this->makeNode($key,'', array('href' => $value));
+ //Add the id for ATOM
+ echo $this->makeNode('id', FeedWriter::uuid($value, 'urn:uuid:'));
+ }
+ else
+ {
+ echo $this->makeNode($key, $value);
+ }
+
+ }
+
+ //RSS 1.0 have special tag <rdf:Seq> with channel
+ if($this->version == RSS1)
+ {
+ echo "<items>" . PHP_EOL . "<rdf:Seq>" . PHP_EOL;
+ foreach ($this->items as $item)
+ {
+ $thisItems = $item->getElements();
+ echo "<rdf:li resource=\"{$thisItems['link']['content']}\"/>" . PHP_EOL;
+ }
+ echo "</rdf:Seq>" . PHP_EOL . "</items>" . PHP_EOL . "</channel>" . PHP_EOL;
+ }
+ }
+
+ /**
+ * Prints formatted feed items
+ *
+ * @access private
+ * @return void
+ */
+ private function printItems()
+ {
+ foreach ($this->items as $item)
+ {
+ $thisItems = $item->getElements();
+
+ //the argument is printed as rdf:about attribute of item in rss 1.0
+ echo $this->startItem($thisItems['link']['content']);
+
+ foreach ($thisItems as $feedItem)
+ {
+ echo $this->makeNode($feedItem['name'], $feedItem['content'], $feedItem['attributes']);
+ }
+ echo $this->endItem();
+ }
+ }
+
+ /**
+ * Make the starting tag of channels
+ *
+ * @access private
+ * @param string The vale of about tag which is used for RSS 1.0 only.
+ * @return void
+ */
+ private function startItem($about = false)
+ {
+ if($this->version == RSS2)
+ {
+ echo '<item>' . PHP_EOL;
+ }
+ else if($this->version == RSS1)
+ {
+ if($about)
+ {
+ echo "<item rdf:about=\"$about\">" . PHP_EOL;
+ }
+ else
+ {
+ die("link element is not set." . PHP_EOL . "It's required for RSS 1.0 to be used as the about attribute of the item tag.");
+ }
+ }
+ else if($this->version == ATOM)
+ {
+ echo "<entry>" . PHP_EOL;
+ }
+ }
+
+ /**
+ * Closes feed item tag
+ *
+ * @access private
+ * @return void
+ */
+ private function endItem()
+ {
+ if($this->version == RSS2 || $this->version == RSS1)
+ {
+ echo '</item>' . PHP_EOL;
+ }
+ else if($this->version == ATOM)
+ {
+ echo "</entry>" . PHP_EOL;
+ }
+ }
+
+ /**
+ * Sanitizes data which will be later on returned as CDATA in the feed.
+ *
+ * A "]]>" respectively "<![CDATA" in the data would break the CDATA in the
+ * XML, so the brackets are converted to a HTML entity.
+ *
+ * @access private
+ * @param string Data to be sanitized
+ * @return string Sanitized data
+ */
+ private function sanitizeCDATA($text)
+ {
+ $text = str_replace("]]>", "]]>", $text);
+ $text = str_replace("<![CDATA[", "<![CDATA[", $text);
+
+ return $text;
+ }
+
+ // End # private functions ----------------------------------------------
+
+} // end of class FeedWriter
+
--- /dev/null
+++ b/lib/FeedWriter/README
@@ -1,1 +1,20 @@
+This package can be used to generate feeds in either RSS 1.0, RSS 2.0 or ATOM
+formats.
+There are three main classes that abstracts the feed information and another to
+encapsulate the feed items information.
+
+Applications can create feed writer object, several feed item objects, set
+several types of properties of either feeds and feed items, and add items to
+the feed.
+
+Once a feed is fully composed with its items, the feed writer class can generate
+the necessary XML structure to describe the feed in the RSS or ATOM formats.
+The feed is generated as part of the current feed output.
+
+
+Requirements
+============
+
+PHP >= 5.0
+
--- /dev/null
+++ b/lib/FeedWriter/examples/example_atom.php
@@ -1,1 +1,60 @@
+<?php
+/*
+ * Copyright (C) 2008 Anis uddin Ahmad <anisniit@gmail.com>
+ *
+ * This file is part of the "Universal Feed Writer" project.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+*
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+ include("../FeedTypes.php");
+
+ // IMPORTANT : No need to add id for feed or channel. It will be automatically created from link.
+
+ //Creating an instance of ATOMFeedWriter class.
+ //The constant ATOM is passed to mention the version
+ $TestFeed = new ATOMFeedWriter();
+
+ //Setting the channel elements
+ //Use wrapper functions for common elements
+ $TestFeed->setTitle('Testing the RSS writer class');
+ $TestFeed->setLink('http://www.ajaxray.com/rss2/channel/about');
+
+ //For other channel elements, use setChannelElement() function
+ $TestFeed->setChannelElement('updated', date(DATE_ATOM , time()));
+ $TestFeed->setChannelElement('author', array('name'=>'Anis uddin Ahmad'));
+
+ //Adding a feed. Genarally this protion will be in a loop and add all feeds.
+
+ //Create an empty FeedItem
+ $newItem = $TestFeed->createNewItem();
+
+ //Add elements to the feed item
+ //Use wrapper functions to add common feed elements
+ $newItem->setTitle('The first feed');
+ $newItem->setLink('http://www.yahoo.com');
+ $newItem->setDate(time());
+ //Internally changed to "summary" tag for ATOM feed
+ $newItem->setDescription('This is a test of adding CDATA encoded description by the php <b>Universal Feed Writer</b> class');
+
+ //Now add the feed item
+ $TestFeed->addItem($newItem);
+
+ //OK. Everything is done. Now genarate the feed.
+ $TestFeed->generateFeed();
+
+?>
+
--- /dev/null
+++ b/lib/FeedWriter/examples/example_minimum.php
@@ -1,1 +1,59 @@
+<?php
+/*
+ * Copyright (C) 2008 Anis uddin Ahmad <anisniit@gmail.com>
+ *
+ * This file is part of the "Universal Feed Writer" project.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+*
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+ // This is a minimum example of using the class
+ include("../FeedTypes.php");
+
+ //Creating an instance of RSS2FeedWriter class.
+ $TestFeed = new RSS2FeedWriter();
+
+ //Setting the channel elements
+ //Use wrapper functions for common channel elements
+ $TestFeed->setTitle('Testing & Checking the RSS writer class');
+ $TestFeed->setLink('http://www.ajaxray.com/projects/rss');
+ $TestFeed->setDescription('This is a test of creating a RSS 2.0 feed Universal Feed Writer');
+
+ //Image title and link must match with the 'title' and 'link' channel elements for valid RSS 2.0
+ $TestFeed->setImage('Testing the RSS writer class','http://www.ajaxray.com/projects/rss','http://www.rightbrainsolution.com/_resources/img/logo.png');
+
+ //Let's add some feed items: Create two empty FeedItem instances
+ $itemOne = $TestFeed->createNewItem();
+ $itemTwo = $TestFeed->createNewItem();
+
+ //Add item details
+ $itemOne->setTitle('The title of the first entry.');
+ $itemOne->setLink('http://www.google.de');
+ $itemOne->setDate(time());
+ $itemOne->setDescription('And here\'s the description of the entry.');
+ $itemTwo->setTitle('Lorem ipsum');
+ $itemTwo->setLink('http://www.example.com');
+ $itemTwo->setDate(1234567890);
+ $itemTwo->setDescription('Lorem ipsum dolor sit amet, consectetur, adipisci velit');
+
+ //Now add the feed item
+ $TestFeed->addItem($itemOne);
+ $TestFeed->addItem($itemTwo);
+
+ //OK. Everything is done. Now genarate the feed.
+ $TestFeed->generateFeed();
+
+?>
+
--- /dev/null
+++ b/lib/FeedWriter/examples/example_rss1.php
@@ -1,1 +1,67 @@
+<?php
+/*
+ * Copyright (C) 2008 Anis uddin Ahmad <anisniit@gmail.com>
+ *
+ * This file is part of the "Universal Feed Writer" project.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+*
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+ include("../FeedTypes.php");
+
+ //Creating an instance of RSS1FeedWriter class.
+ //The constant RSS1 is passed to mention the version
+ $TestFeed = new RSS1FeedWriter();
+
+ //Setting the channel elements
+ //Use wrapper functions for common elements
+ //For other optional channel elements, use setChannelElement() function
+ $TestFeed->setTitle('Testing the RSS writer class');
+ $TestFeed->setLink('http://www.ajaxray.com/rss2/channel/about');
+ $TestFeed->setDescription('This is test of creating a RSS 1.0 feed by Universal Feed Writer');
+
+ //It's important for RSS 1.0
+ $TestFeed->setChannelAbout('http://www.ajaxray.com/rss2/channel/about');
+
+ //Adding a feed. Genarally this protion will be in a loop and add all feeds.
+
+ //Create an empty FeedItem
+ $newItem = $TestFeed->createNewItem();
+
+ //Add elements to the feed item
+ //Use wrapper functions to add common feed elements
+ $newItem->setTitle('The first feed');
+ $newItem->setLink('http://www.yahoo.com');
+ //The parameter is a timestamp for setDate() function
+ $newItem->setDate(time());
+ $newItem->setDescription('This is test of adding CDATA encoded description by the php <b>Universal Feed Writer</b> class');
+ //Use core addElement() function for other supported optional elements
+ $newItem->addElement('dc:subject', 'Nothing but test');
+
+ //Now add the feed item
+ $TestFeed->addItem($newItem);
+
+ //Adding multiple elements from array
+ //Elements which have an attribute cannot be added by this way
+ $newItem = $TestFeed->createNewItem();
+ $newItem->addElementArray(array('title'=>'The 2nd feed', 'link'=>'http://www.google.com', 'description'=>'This is a test of the FeedWriter class'));
+ $TestFeed->addItem($newItem);
+
+ //OK. Everything is done. Now genarate the feed.
+ $TestFeed->generateFeed();
+
+?>
+
--- /dev/null
+++ b/lib/FeedWriter/examples/example_rss2.php
@@ -1,1 +1,73 @@
+<?php
+/*
+ * Copyright (C) 2008 Anis uddin Ahmad <anisniit@gmail.com>
+ *
+ * This file is part of the "Universal Feed Writer" project.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+*
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+ include("../FeedTypes.php");
+
+ //Creating an instance of RSS2FeedWriter class.
+ //The constant RSS2 is passed to mention the version
+ $TestFeed = new RSS2FeedWriter();
+
+ //Setting the channel elements
+ //Use wrapper functions for common channel elements
+ $TestFeed->setTitle('Testing & Checking the RSS writer class');
+ $TestFeed->setLink('http://www.ajaxray.com/projects/rss');
+ $TestFeed->setDescription('This is a test of creating a RSS 2.0 feed with Universal Feed Writer');
+
+ //Image title and link must match with the 'title' and 'link' channel elements for RSS 2.0
+ $TestFeed->setImage('Testing the RSS writer class','http://www.ajaxray.com/projects/rss','http://www.rightbrainsolution.com/_resources/img/logo.png');
+
+ //Use core setChannelElement() function for other optional channels
+ $TestFeed->setChannelElement('language', 'en-us');
+ $TestFeed->setChannelElement('pubDate', date(DATE_RSS, time()));
+
+ //Adding a feed. Genarally this portion will be in a loop and add all feeds.
+
+ //Create an empty FeedItem
+ $newItem = $TestFeed->createNewItem();
+
+ //Add elements to the feed item
+ //Use wrapper functions to add common feed elements
+ $newItem->setTitle('The first feed');
+ $newItem->setLink('http://www.yahoo.com');
+ //The parameter is a timestamp for setDate() function
+ $newItem->setDate(time());
+ $newItem->setDescription('This is a test of adding CDATA encoded description by the php <b>Universal Feed Writer</b> class');
+ $newItem->setEncloser('http://www.attrtest.com', '1283629', 'audio/mpeg');
+ //Use core addElement() function for other supported optional elements
+ $newItem->addElement('author', 'admin@ajaxray.com (Anis uddin Ahmad)');
+ //Attributes have to passed as array in 3rd parameter
+ $newItem->addElement('guid', 'http://www.ajaxray.com',array('isPermaLink'=>'true'));
+
+ //Now add the feed item
+ $TestFeed->addItem($newItem);
+
+ //Another method to add feeds from array()
+ //Elements which have attribute cannot be added by this way
+ $newItem = $TestFeed->createNewItem();
+ $newItem->addElementArray(array('title'=>'The 2nd feed', 'link'=>'http://www.google.com', 'description'=>'This is a test of the FeedWriter class'));
+ $TestFeed->addItem($newItem);
+
+ //OK. Everything is done. Now genarate the feed.
+ $TestFeed->generateFeed();
+
+?>
+