From: Maxious <maxious@lambdacomplex.org>
Date: Sat, 01 Dec 2012 07:50:43 +0000
Subject: rm settee
X-Git-Url: https://maxious.lambdacomplex.org/git/?p=disclosr.git&a=commitdiff&h=5efdc83287d1abe104a141b533c11000bf51bbb5
---
rm settee


Former-commit-id: 2fe3d86753e524fca7ee4e095a794727c0556d79
---


--- a/.gitmodules
+++ b/.gitmodules
@@ -1,9 +1,6 @@
 [submodule "couchdb/couchdb-lucene"]
 	path = couchdb/couchdb-lucene
 	url = https://github.com/rnewson/couchdb-lucene.git
-[submodule "couchdb/settee"]
-	path = couchdb/settee
-	url = https://github.com/inadarei/settee.git
 [submodule "lib/php-diff"]
 	path = lib/php-diff
 	url = https://github.com/chrisboulton/php-diff.git
@@ -31,4 +28,7 @@
 [submodule "documents/lib/parsedatetime"]
 	path = documents/lib/parsedatetime
 	url = git://github.com/bear/parsedatetime.git
+[submodule "lib/FeedWriter"]
+	path = lib/FeedWriter
+	url = https://github.com/mibe/FeedWriter
 

--- a/admin/exportEmployees.csv.php
+++ b/admin/exportEmployees.csv.php
@@ -4,7 +4,8 @@
 
 $format = "csv";
 //$format = "json";
-if (isset($_REQUEST['format']))  $format = $_REQUEST['format'];
+if (isset($_REQUEST['format']))
+    $format = $_REQUEST['format'];
 setlocale(LC_CTYPE, 'C');
 if ($format == "csv") {
     $headers = Array("name");
@@ -21,7 +22,6 @@
         if (isset($row->value->statistics->employees)) {
 
             $headers = array_unique(array_merge($headers, array_keys(object_to_array($row->value->statistics->employees))));
-
         }
     }
 } catch (SetteeRestClientException $e) {
@@ -40,15 +40,14 @@
         fputcsv($fp, $headers);
     } else if ($format == "json") {
         echo '{
-            "labels" : ["' . implode('","', $headers) . '"],'.PHP_EOL;
+            "labels" : ["' . implode('","', $headers) . '"],' . PHP_EOL;
     }
     try {
         $agencies = $db->get_view("app", "all", null, true)->rows;
         //print_r($agencies);
         $first = true;
         if ($format == "json") {
-        echo '"data" : ['.PHP_EOL;
-        
+            echo '"data" : [' . PHP_EOL;
         }
         foreach ($agencies as $agency) {
 
@@ -56,25 +55,35 @@
                 $row = Array();
                 $agencyEmployeesArray = object_to_array($agency->value->statistics->employees);
                 foreach ($headers as $i => $fieldName) {
+                    if ($format == "csv") {
+                     if (isset($agencyEmployeesArray[$fieldName])) {
+                        $row[] = $agencyEmployeesArray[$fieldName]["value"] ;
+                    } else if ($i == 0) {
+                        $row[] = $agency->value->name;
+                    } else {
+                        $row[] = 0;
+                    }
+                } else if ($format == "json") {
                     if (isset($agencyEmployeesArray[$fieldName])) {
-                        $row[] = '['.$i.','.$agencyEmployeesArray[$fieldName]["value"].']';
+                        $row[] = '[' . $i . ',' . $agencyEmployeesArray[$fieldName]["value"] . ']';
                     } else {
-                        $row[] = '['.$i.',0]';
+                        $row[] = '[' . $i . ',0]';
                     }
+                }
                 }
                 if ($format == "csv") {
                     fputcsv($fp, array_values($row));
                 } else if ($format == "json") {
-                    if (!$first) echo ",";
-                    echo '{"data" : [' . implode(",", array_values($row)) . '], "label": "'.$agency->value->name.'", "lines" : { "show" : true }, "points" : { "show" : true }}'.PHP_EOL;
+                    if (!$first)
+                        echo ",";
+                    echo '{"data" : [' . implode(",", array_values($row)) . '], "label": "' . $agency->value->name . '", "lines" : { "show" : true }, "points" : { "show" : true }}' . PHP_EOL;
                     $first = false;
                 }
             }
         }
         if ($format == "json") {
-        echo ']
-            }'.PHP_EOL;
-        
+            echo ']
+            }' . PHP_EOL;
         }
     } catch (SetteeRestClientException $e) {
         setteErrorHandler($e);

--- a/admin/importAPSCEmployees.php
+++ b/admin/importAPSCEmployees.php
@@ -47,13 +47,17 @@
     $changed = false;
     if (!isset($doc->statistics)) {
         $changed = true;
-        $doc->statistics = Array();
+        $doc->statistics = new stdClass();
+    }
+    if (!isset($doc->statistics->employees)) {
+        $changed = true;
+        $doc->statistics->employees = new stdClass();
     }
     foreach ($sum as $timePeriod => $value) {
         if (!isset($doc->statistics->employees->$timePeriod->value) 
                 || $doc->statistics->employees->$timePeriod->value != $value) {
             $changed = true;
-            $doc->statistics["employees"][$timePeriod] = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
+            $doc->statistics->employees->$timePeriod = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
         }
     }
     if ($changed) {

--- /dev/null
+++ b/admin/importAPSCEmployees2012.php
@@ -1,1 +1,86 @@
+<?php
 
+require_once '../include/common.inc.php';
+require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
+$db = $server->get_db('disclosr-agencies');
+$rows = $db->get_view("app", "byName")->rows;
+$nametoid = Array();
+$sums = Array();
+$functions = Array();
+foreach ($rows as $row) {
+    $nametoid[trim($row->key)] = $row->value;
+}
+
+
+$request = Requests::get("http://www.apsc.gov.au/publications-and-media/parliamentary/state-of-the-service/new-sosr/appendix-2-aps-agencies");
+$doc = phpQuery::newDocumentHTML($request->body);
+phpQuery::selectDocument($doc);
+foreach (pq('tr')->elements as $tr) {
+    //echo $tr->nodeValue.PHP_EOL;
+    $agency = "";
+    $employees = "";
+    $function = "";
+    $i = 0;
+    foreach ($tr->childNodes as $td) {
+        //echo  $td->nodeValue." $i <br>";
+        if ($i == 0)
+            $agency = $td->nodeValue;
+        if ($i == 2) {
+            $employees = trim(str_replace(",", "", $td->nodeValue));
+        }
+        if ($i == 4) {
+            $function = $td->nodeValue;
+        }
+        $i++;
+    }
+    if ($agency != "" && $employees != "" && $function != "") {
+        $name = trim(str_replace('2','',$agency));
+         //echo "$name<br><bR>" . PHP_EOL;
+        if (isset($nametoid[$name])) {
+            $id = $nametoid[$name];
+            //echo $id . "<br>" . PHP_EOL;
+            @$sums[$id]["2011-2012"] += $employees;
+            $functions[$id] = $function;
+        } else if ($agency != "Agency"){
+            echo "<br>ERROR NAME '$agency' MISSING FROM ID LIST<br><bR>" . PHP_EOL;
+
+            die();
+        }
+    } else {
+        echo "skipped $agency";
+    }
+}
+//print_r($sums);
+foreach ($sums as $id => $sum) {
+    echo $id . "<br>" . PHP_EOL;
+    $doc = $db->get($id);
+    echo $doc->name . "<br>" . PHP_EOL;
+    // print_r($doc);
+    $changed = false;
+    if (!isset($doc->statistics)) {
+        $changed = true;
+        $doc->statistics = new stdClass();
+    }
+    if (!isset($doc->statistics->employees)) {
+        $changed = true;
+        $doc->statistics->employees = new stdClass();
+    }
+    foreach ($sum as $timePeriod => $value) {
+        if (!isset($doc->statistics->employees->$timePeriod->value)
+                || $doc->statistics->employees->$timePeriod->value != $value) {
+            $changed = true;
+            $doc->statistics->employees->$timePeriod = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
+            $doc->employees = $value;
+              $doc->functionClassification = $functions[$id];
+        }
+    }
+  
+    if ($changed) {
+        $db->save($doc);
+    } else {
+        echo "not changed" . "<br>" . PHP_EOL;
+    }
+}
+// employees: timeperiod, source = apsc state of service, value 
+?>
+

--- a/admin/refreshDesignDoc.php
+++ b/admin/refreshDesignDoc.php
@@ -4,74 +4,62 @@
 //function createFOIDocumentsDesignDoc() {
 
 $foidb = $server->get_db('disclosr-foidocuments');
-    $obj = new stdClass();
-    $obj->_id = "_design/" . urlencode("app");
-    $obj->language = "javascript";
-    $obj->views->all->map = "function(doc) {   emit(doc._id, doc); };";
-    $obj->views->byDate->map = "function(doc) {   emit(doc.date, doc); };";
-    $obj->views->byDate->reduce = "_count";
-    $obj->views->byAgencyID->map = "function(doc) {   emit(doc.agencyID, doc); };";
-    $obj->views->byAgencyID->reduce = "_count";
+$obj = new stdClass();
+$obj->_id = "_design/" . urlencode("app");
+$obj->language = "javascript";
+$obj->views->all->map = "function(doc) {   emit(doc._id, doc); };";
+$obj->views->byDate->map = "function(doc) {   emit(doc.date, doc); };";
+$obj->views->byDate->reduce = "_count";
+$obj->views->byDateMonthYear->map = "function(doc) {   emit(doc.date, doc); };";
+$obj->views->byDateMonthYear->reduce = "_count";
+$obj->views->byAgencyID->map = "function(doc) {   emit(doc.agencyID, doc); };";
+$obj->views->byAgencyID->reduce = "_count";
 
-    // allow safe updates (even if slightly slower due to extra: rev-detection check).
- $foidb->save($obj, true);
+// allow safe updates (even if slightly slower due to extra: rev-detection check).
+$foidb->save($obj, true);
 
 
-function createDocumentsDesignDoc() {
-    /*
-    global $db;
-    $obj = new stdClass();
-    $obj->_id = "_design/" . urlencode("app");
-    $obj->language = "javascript";
-    $obj->views->all->map = "function(doc) {   emit(doc._id, doc); };";
-    $obj->views->byABN->map = "function(doc) {   emit(doc.abn, doc); };";
- "views": {
-      "web_server": {
-      "map": "function(doc) {\n  emit(doc.web_server, 1);\n}",
-      "reduce": "function (key, values, rereduce) {\n    return sum(values);\n}"
-      },
-      "byAgency": {
-      "map": "function(doc) {\n  emit(doc.agencyID, 1);\n}",
-      "reduce": "function (key, values, rereduce) {\n    return sum(values);\n}"
-      },
-      "byURL": {
-      "map": "function(doc) {\n  emit(doc.url, doc);\n}"
-      },
-      "agency": {
-      "map": "function(doc) {\n  emit(doc.agencyID, doc);\n}"
-      },
-      "byWebServer": {
-      "map": "function(doc) {\n  emit(doc.web_server, doc);\n}"
-      },
-      "getValidationRequired": {
-      "map": "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n  emit(doc._id, doc._attachments);\n}\n}"
-      }
-      } */
-}
+//function createDocumentsDesignDoc() {
+$docdb = $server->get_db('disclosr-documents');
+
+$obj = new stdClass();
+$obj->_id = "_design/" . urlencode("app");
+$obj->language = "javascript";
+$obj->views->web_server->map = "function(doc) {\n  emit(doc.web_server, 1);\n}";
+$obj->views->web_server->reduce = "function (key, values, rereduce) {\n    return sum(values);\n}";
+$obj->views->byAgency->map = "function(doc) {\n  emit(doc.agencyID, 1);\n}";
+$obj->views->byAgency->reduce = "function (key, values, rereduce) {\n    return sum(values);\n}";
+$obj->views->byURL->map = "function(doc) {\n  emit(doc.url, doc);\n}";
+$obj->views->agency->map = "function(doc) {\n  emit(doc.agencyID, doc);\n}";
+$obj->views->byWebServer->map = "function(doc) {\n  emit(doc.web_server, doc);\n}";
+$obj->views->getValidationRequired = "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n  emit(doc._id, doc._attachments);\n}\n}";
+
+
+
 
 //function createAgencyDesignDoc() {
 $db = $server->get_db('disclosr-agencies');
-    $obj = new stdClass();
-    $obj->_id = "_design/" . urlencode("app");
-    $obj->language = "javascript";
-    $obj->views->all->map = "function(doc) {   emit(doc._id, doc); };";
-    $obj->views->byABN->map = "function(doc) {   emit(doc.abn, doc); };";
-    $obj->views->byCanonicalName->map = "function(doc) {  
+$obj = new stdClass();
+$obj->_id = "_design/" . urlencode("app");
+$obj->language = "javascript";
+$obj->views->all->map = "function(doc) {   emit(doc._id, doc); };";
+$obj->views->byABN->map = "function(doc) {   emit(doc.abn, doc); };";
+$obj->views->byCanonicalName->map = "function(doc) {  
             if (doc.parentOrg || doc.orgType == 'FMA-DepartmentOfState') {
         emit(doc.name, doc); 
         }
 };";
-    $obj->views->byDeptStateName->map = "function(doc) {  
+$obj->views->byDeptStateName->map = "function(doc) {  
             if (doc.orgType == 'FMA-DepartmentOfState') {
         emit(doc.name, doc._id); 
         }
 };";
-    $obj->views->parentOrgs->map = "function(doc) {
+$obj->views->parentOrgs->map = "function(doc) {
             if (doc.parentOrg) {
         emit(doc._id, doc.parentOrg);
         }
 };";
-    $obj->views->byName->map = 'function(doc) { 
+$obj->views->byName->map = 'function(doc) { 
         if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") {
         emit(doc.name, doc._id); 
 if (typeof(doc.shortName) != "undefined" && doc.shortName != doc.name) {
@@ -95,14 +83,14 @@
         }
 };';
 
-    $obj->views->foiEmails->map = "function(doc) {  
+$obj->views->foiEmails->map = "function(doc) {  
         emit(doc._id, doc.foiEmail);
 };";
 
-    $obj->views->byLastModified->map = "function(doc) {   emit(doc.metadata.lastModified, doc); }";
-    $obj->views->getActive->map = 'function(doc) { if (doc.status == "active") {  emit(doc._id, doc); } };';
-    $obj->views->getSuspended->map = 'function(doc) { if (doc.status == "suspended") {  emit(doc._id, doc); } };';
-    $obj->views->getScrapeRequired->map = "function(doc) {   
+$obj->views->byLastModified->map = "function(doc) {   emit(doc.metadata.lastModified, doc); }";
+$obj->views->getActive->map = 'function(doc) { if (doc.status == "active") {  emit(doc._id, doc); } };';
+$obj->views->getSuspended->map = 'function(doc) { if (doc.status == "suspended") {  emit(doc._id, doc); } };';
+$obj->views->getScrapeRequired->map = "function(doc) {   
 
 var lastScrape = Date.parse(doc.metadata.lastScraped);
 
@@ -113,14 +101,14 @@
 }
 
 };";
-    $obj->views->showNamesABNs->map = "function(doc) {   emit(doc._id, {name: doc.name, abn: doc.abn}); };";
-    $obj->views->getConflicts->map = "function(doc) {
+$obj->views->showNamesABNs->map = "function(doc) {   emit(doc._id, {name: doc.name, abn: doc.abn}); };";
+$obj->views->getConflicts->map = "function(doc) {
   if (doc._conflicts) {
     emit(null, [doc._rev].concat(doc._conflicts));
   }
 }";
-    // http://stackoverflow.com/questions/646628/javascript-startswith
-    $obj->views->score->map = 'if(!String.prototype.startsWith){
+// http://stackoverflow.com/questions/646628/javascript-startswith
+$obj->views->score->map = 'if(!String.prototype.startsWith){
     String.prototype.startsWith = function (str) {
         return !this.indexOf(str);
     }
@@ -144,7 +132,7 @@
         emit(count+doc._id, {id:doc._id, name: doc.name, score:count, orgType: doc.orgType, portfolio:portfolio});
     }
 }';
-    $obj->views->scoreHas->map = 'if(!String.prototype.startsWith){
+$obj->views->scoreHas->map = 'if(!String.prototype.startsWith){
     String.prototype.startsWith = function (str) {
         return !this.indexOf(str);
     }
@@ -164,22 +152,20 @@
   emit("total", 1);
   }
 }';
-    $obj->views->scoreHas->reduce = 'function (key, values, rereduce) {
+$obj->views->scoreHas->reduce = 'function (key, values, rereduce) {
     return sum(values);
 }';
-    $obj->views->fieldNames->map = '
+$obj->views->fieldNames->map = '
 function(doc) {
 for(var propName in doc) {
      	emit(propName, doc._id);
 	}
   
 }';
-    $obj->views->fieldNames->reduce = 'function (key, values, rereduce) {
+$obj->views->fieldNames->reduce = 'function (key, values, rereduce) {
     return values.length;
 }';
-    // allow safe updates (even if slightly slower due to extra: rev-detection check).
- $db->save($obj, true);
-
-
+// allow safe updates (even if slightly slower due to extra: rev-detection check).
+$db->save($obj, true);
 ?>
 

--- a/couchdb/SetteeDatabase.class.php
+++ /dev/null
@@ -1,306 +1,1 @@
-<?php
 
-/**
-* Databaase class.
-*/
-class SetteeDatabase {
-
-  /**
-  * Base URL of the CouchDB REST API
-  */
-  private $conn_url;
-  
-  /**
-  * HTTP REST Client instance
-  */
-  protected $rest_client;
-  
-  /**
-  * Name of the database
-  */
-  private $dbname;
-  
-  /**
-  * Default constructor
-  */ 
-  function __construct($conn_url, $dbname) {
-    $this->conn_url = $conn_url;
-    $this->dbname = $dbname;
-    $this->rest_client = SetteeRestClient::get_instance($this->conn_url);
-  }
-
-
-  /**
-  * Get UUID from CouchDB
-  *
-  * @return
-  *     CouchDB-generated UUID string
-  *
-  */
-  function gen_uuid() {
-    $ret = $this->rest_client->http_get('_uuids');
-    return $ret['decoded']->uuids[0]; // should never be empty at this point, so no checking
-  }
-
- /**
-  * Create or update a document database
-  *
-  * @param $document
-  *     PHP object, a PHP associative array, or a JSON String representing the document to be saved. PHP Objects and arrays are JSON-encoded automatically.
-  *
-  * <p>If $document has a an "_id" property set, it will be used as document's unique id (even for "create" operation).
-  * If "_id" is missing, CouchDB will be used to generate a UUID.
-  *
-  * <p>If $document has a "_rev" property (revision), document will be updated, rather than creating a new document.
-  * You have to provide "_rev" if you want to update an existing document, otherwise operation will be assumed to be
-  * one of creation and you will get a duplicate document exception from CouchDB. Also, you may not provide "_rev" but
-  * not provide "_id" since that is an invalid input.
-  *
-  * @param $allowRevAutoDetection
-  *   Default: false. When true and _rev is missing from the document, save() function will auto-detect latest revision
-  * for a document and use it. This option is "false" by default because it involves an extra http HEAD request and
-  * therefore can make save() operation slightly slower if such auto-detection is not required.
-  *
-  * @return
-  *     document object with the database id (uuid) and revision attached;
-  *
-  *  @throws SetteeCreateDatabaseException
-  */
-  function save($document, $allowRevAutoDetection = false) {
-    if (is_string($document)) {
-      $document = json_decode($document);
-    }
-
-    // Allow passing of $document as an array (for syntactic simplicity and also because in JSON world it does not matter) 
-    if(is_array($document)) {
-      $document = (object) $document;
-    }
-
-    if (empty($document->_id) && empty($document->_rev)) {
-      $id = $this->gen_uuid();
-    }
-    elseif (empty($document->_id) && !empty($document->_rev)) {
-      throw new SetteeWrongInputException("Error: You can not save a document with a revision provided, but missing id");
-    }
-    else {
-      $id = $document->_id;
-
-      if ($allowRevAutoDetection) {
-        try {
-          $rev = $this->get_rev($id);
-        } catch (SetteeRestClientException $e) {
-          // auto-detection may fail legitimately, if a document has never been saved before (new doc), so skipping error
-        }
-        if (!empty($rev)) {
-          $document->_rev = $rev;
-        }
-      }
-    }
-    
-    $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
-    $document_json = json_encode($document, JSON_NUMERIC_CHECK);
-    
-    $ret = $this->rest_client->http_put($full_uri, $document_json);
-
-    $document->_id = $ret['decoded']->id;
-    $document->_rev = $ret['decoded']->rev;
-
-    return $document;
-  }
-
-  /**
-   * @param  $doc
-   * @param  $name
-   * @param  $content
-   *    Content of the attachment in a string-buffer format. This function will automatically base64-encode content for
-   *    you, so you don't have to do it.
-   * @param  $mime_type
-   *    Optional. Will be auto-detected if not provided
-   * @return void
-   */
-  public function add_attachment($doc, $name, $content, $mime_type = null) {
-    if (empty($doc->_attachments) || !is_object($doc->_attachments)) {
-      $doc->_attachments = new stdClass();
-    }
-
-    if (empty($mime_type)) {
-      $mime_type = $this->rest_client->content_mime_type($content);
-    }
-
-    $doc->_attachments->$name = new stdClass();
-    $doc->_attachments->$name->content_type = $mime_type;
-    $doc->_attachments->$name->data = base64_encode($content);
-  }  
-
-  /**
-   * @param  $doc
-   * @param  $name
-   * @param  $file
-   *    Full path to a file (e.g. as returned by PHP's realpath function).
-   * @param  $mime_type
-   *    Optional. Will be auto-detected if not provided
-   * @return void
-   */
-  public function add_attachment_file($doc, $name, $file, $mime_type = null) {
-    $content = file_get_contents($file);
-    $this->add_attachment($doc, $name, $content, $mime_type);
-  }
-
-  /**
-   *
-   * Retrieve a document from CouchDB
-   *
-   * @throws SetteeWrongInputException
-   * 
-   * @param  $id
-   *    Unique ID (usually: UUID) of the document to be retrieved.
-   * @return
-   *    database document in PHP object format.
-   */
-  function get($id) {
-    if (empty($id)) {
-      throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid.");
-    }
-
-    $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
-$full_uri = str_replace("%3Frev%3D","?rev=",$full_uri);
-    $ret = $this->rest_client->http_get($full_uri);
-    return $ret['decoded'];
-  }
-
-    /**
-   *
-   * Get the latest revision of a document with document id: $id in CouchDB.
-   *
-   * @throws SetteeWrongInputException
-   *
-   * @param  $id
-   *    Unique ID (usually: UUID) of the document to be retrieved.
-   * @return
-   *    database document in PHP object format.
-   */
-  function get_rev($id) {
-    if (empty($id)) {
-      throw new SetteeWrongInputException("Error: Can't query a document without a uuid.");
-    }
-
-    $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
-    $headers = $this->rest_client->http_head($full_uri);
-	if (empty($headers['Etag'])) {
-	  throw new SetteeRestClientException("Error: could not retrieve revision. Server unexpectedly returned empty Etag");
-	}
-    $etag = str_replace('"', '', $headers['Etag']);
-    return $etag;
-  }
-  
-  /**
-  * Delete a document
-  *
-  * @param $document
-  *    a PHP object or JSON representation of the document that has _id and _rev fields.
-  *
-  * @return void 
-  */  
-  function delete($document) {
-    if (!is_object($document)) {
-      $document = json_decode($document);
-    }
-
-    $full_uri = $this->dbname . "/" . $this->safe_urlencode($document->_id) . "?rev=" . $document->_rev;
-    $this->rest_client->http_delete($full_uri);
-  }
-
-  
-  /*-----------------  View-related functions --------------*/
-
-  /**
-   * Create a new view or update an existing one.
-   *
-   * @param  $design_doc
-   * @param  $view_name
-   * @param  $map_src
-   *    Source code of the map function in Javascript
-   * @param  $reduce_src
-   *    Source code of the reduce function  in Javascript (optional)
-   * @return void
-   */
-  function save_view($design_doc, $view_name, $map_src, $reduce_src = null) {
-    $obj = new stdClass();
-    $obj->_id = "_design/" . urlencode($design_doc);
-    $view_name = urlencode($view_name);
-    $obj->views->$view_name->map = $map_src;
-    if (!empty($reduce_src)) {
-      $obj->views->$view_name->reduce = $reduce_src;
-    }
-
-    // allow safe updates (even if slightly slower due to extra: rev-detection check).
-    return $this->save($obj, true);
-  }
-
-  /**
-   * Create a new view or update an existing one.
-   *
-   * @param  $design_doc
-   * @param  $view_name
-   * @param  $key
-   *    key parameter to a view. Can be a single value or an array (for a range). If passed an array, function assumes
-   *    that first element is startkey, second: endkey.
-   * @param  $descending
-   *    return results in descending order. Please don't forget that if you are using a startkey/endkey, when you change
-   *  order you also need to swap startkey and endkey values!
-   * 
-   * @return void
-   */
-  function get_view($design_doc, $view_name, $key = null, $descending = false) {
-    $id = "_design/" . urlencode($design_doc);
-    $view_name = urlencode($view_name);
-    $id .= "/_view/$view_name";
-
-    $data = array();
-    if (!empty($key)) {
-      if (is_string($key)) {
-        $data = "key=" . '"' . $key . '"';
-      }
-      elseif (is_array($key)) {
-        list($startkey, $endkey) = $key;
-        $data = "startkey=" . '"' . $startkey . '"&' . "endkey=" . '"' . $endkey . '"';
-      }
-
-      if ($descending) {
-        $data .= "&descending=true";
-      }
-    }
-
-
-
-    if (empty($id)) {
-      throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid.");
-    }
-
-    $full_uri = $this->dbname . "/" . $this->safe_urlencode($id);
-$full_uri = str_replace("%253Fgroup%253Dtrue","?group=true",$full_uri);
-    $ret = $this->rest_client->http_get($full_uri, $data);
-    return $ret['decoded'];
-    
-  }
-
-  /**
-   * @param  $id
-   * @return
-   *    return a properly url-encoded id.
-   */
-  private function safe_urlencode($id) {
-    //-- System views like _design can have "/" in their URLs.
-    $id = rawurlencode($id);
-    if (substr($id, 0, 1) == '_') {
-      $id = str_replace('%2F', '/', $id);
-    }
-    return $id;
-  }
-  
-  /** Getter for a database name */
-  function get_name() {
-    return $this->dbname;
-  }
-
-}

--- a/couchdb/settee
+++ /dev/null

--- /dev/null
+++ b/documents/.gitignore
@@ -1,1 +1,2 @@
 
+*.pyc

--- a/documents/disclogsList.php
+++ b/documents/disclogsList.php
@@ -8,46 +8,69 @@
     <tr><th>Agency Name</th><th>Disclosure Log URL recorded?</th><th>Do we monitor this URL?</th></tr>";
 $agenciesdb = $server->get_db('disclosr-agencies');
 $docsdb = $server->get_db('disclosr-documents');
+$agencies = 0;
+$disclogs = 0;
+$red = 0;
+$green = 0;
+$yellow = 0;
+$orange = 0;
 try {
     $rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows;
 
 
     if ($rows) {
         foreach ($rows as $row) {
+            if ((!isset($row->value->status) || $row->value->status != "suspended") && isset($row->value->foiEmail)) {
+                echo "<tr><td>";
+                if (isset($row->value->website)) echo "<a href='" . $row->value->website . "'>";
+                echo "<b>" . $row->value->name . "</b>";
+                if (isset($row->value->website)) echo "</a>";
+                if ($ENV == "DEV")
+                    echo "<br>(" . $row->id . ")";
+                echo "</td>\n";
+                $agencies++;
 
-            echo "<tr><td><b>" . $row->value->name . "</b>";
-            if ($ENV == "DEV")
-                echo "<br>(" . $row->id . ")";
-            echo "</td>\n";
-
-
-            echo "<td>";
-            if (isset($row->value->FOIDocumentsURL)) {
-                echo '<a href="' . $row->value->FOIDocumentsURL . '">'
-                . $row->value->FOIDocumentsURL . '</a>';
-                if ($ENV == "DEV")
-                    echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">'
-                    . 'view local copy</a>)</small>';
-            } else {
-                echo "<font color='red'>✘</font>";
+                echo "<td>";
+                if (isset($row->value->FOIDocumentsURL)) {
+                    $disclogs++;
+                    echo '<a href="' . $row->value->FOIDocumentsURL . '">'
+                    . $row->value->FOIDocumentsURL . '</a>';
+                    if ($ENV == "DEV")
+                        echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">'
+                        . 'view local copy</a>)</small>';
+                } else {
+                    echo "<font color='red'><abbr title='No'>✘</abbr></font>";
+                }
+                echo "</td>\n<td>";
+                if (isset($row->value->FOIDocumentsURL)) {
+                    if (file_exists("./scrapers/" . $row->id . '.py')) {
+                        echo "<font color='green'><abbr title='Yes'>✔</abbr></font>";
+                        $green++;
+                    } else if (file_exists("./scrapers/" . $row->id . '.txt')) {
+                        if (trim(file_get_contents("./scrapers/" . $row->id . '.txt')) == "no disclog") {
+                            echo "<font color='yellow'><abbr title='No log table exists at URL to scrape'><b>◎</b></abbr></font>";
+                        $yellow++;
+                        } else {
+                            echo file_get_contents("./scrapers/" . $row->id . '.txt');
+                        echo "<font color='orange'><abbr title='Work in progress'><b>▬</b></abbr></font>";
+                        $orange++;
+                        }
+                    } else {
+                        echo "<font color='red'><abbr title='No'>✘</abbr></font>";
+                        $red++;
+                    }
+                }
+                echo "</td></tr>\n";
             }
-            echo "</td>\n<td>";
-            if (isset($row->value->FOIDocumentsURL)) {
-                if (file_exists("./scrapers/" . $row->id . '.py')) {
-                    echo "<font color='green'>✔</font>";
-                } else if (file_exists("./scrapers/" . $row->id . '.txt')) {
-                    echo "<font color='orange'><b>▬</b></font>";
-                } else {
-                    echo "<font color='red'>✘</font>";
-                }
-            }
-            echo "</td></tr>\n";
         }
     }
 } catch (SetteeRestClientException $e) {
     setteErrorHandler($e);
 }
 echo "</table>";
+echo $agencies . " agencies, " . round(($disclogs / $agencies) * 100) . "% with disclosure logs; "
+ . round(($green / $disclogs) * 100) . "% logs with scrapers " . round(($red / $disclogs) * 100) . "% logs without scrapers " . round(($orange / $disclogs) * 100) . "% logs Work-In-Progress scrapers ";
+
 include_footer_documents();
 ?>
 

--- a/documents/genericScrapers.py
+++ b/documents/genericScrapers.py
@@ -9,6 +9,7 @@
 import dateutil
 from dateutil.parser import *
 from datetime import *
+import codecs
 
 class GenericDisclogScraper(object):
         __metaclass__ = abc.ABCMeta
@@ -84,12 +85,18 @@
                 doc.update({'description': descriptiontxt})
 		return
         def getTitle(self, content, entry, doc):
-                doc.update({'title': content.string})
+                doc.update({'title': (''.join(content.stripped_strings))})
 		return
 	def getTable(self, soup):
 		return soup.table
+	def getRows(self, table):
+		return table.find_all('tr')
 	def getDate(self, content, entry, doc):
-		edate = parse(''.join(content.stripped_strings).strip(), dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
+		date = ''.join(content.stripped_strings).strip()
+		(a,b,c) = date.partition("(")
+		date = self.remove_control_chars(a.replace("Octber","October"))
+		print date
+		edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
 		print edate
 		doc.update({'date': edate})
 		return
@@ -110,11 +117,11 @@
 			# http://www.crummy.com/software/BeautifulSoup/documentation.html
 				soup = BeautifulSoup(content)
 				table = self.getTable(soup)
-				for row in table.find_all('tr'):
+				for row in self.getRows(table):
 					columns = row.find_all('td')
 					if len(columns) == self.getColumnCount():
-						(id, date, description, title, notes) = self.getColumns(columns)
-						print ''.join(id.stripped_strings)
+						(id, date, title, description, notes) = self.getColumns(columns)
+						print self.remove_control_chars(''.join(id.stripped_strings))
 						if id.string == None:
 							hash = scrape.mkhash(self.remove_control_chars(url+(''.join(date.stripped_strings))))
 						else:
@@ -123,13 +130,13 @@
 							
 						if doc == None:
 							print "saving " +hash
-							doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), 'docID': id.string}
+							doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), 'docID': (''.join(id.stripped_strings))}
 							self.getLinks(self.getURL(),row,doc)
                                 			self.getTitle(title,row, doc)
                                 			self.getDate(date,row, doc)
 							self.getDescription(description,row, doc)
 							if notes != None:
-                                        			doc.update({ 'notes': notes.string})
+                                        			doc.update({ 'notes': (''.join(notes.stripped_strings))})
 							foidocsdb.save(doc)
 						else:
 							print "already saved "+hash

--- a/documents/index.php
+++ b/documents/index.php
@@ -1,11 +1,11 @@
 <?php
+
 include('template.inc.php');
 include_header_documents("");
 include_once('../include/common.inc.php');
+$startkey = (isset($_REQUEST['start_key']) ? $_REQUEST['start_key'] : '9999-99-99');
 ?>
 <?php
-
-
 
 $agenciesdb = $server->get_db('disclosr-agencies');
 
@@ -15,17 +15,17 @@
 }
 $foidocsdb = $server->get_db('disclosr-foidocuments');
 try {
-    $rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99','0000-00-00'), true)->rows;
-
-
+    $rows = $foidocsdb->get_view("app", "byDate", Array($startkey, '0000-00-00'), true, 20)->rows;
     if ($rows) {
-        foreach ($rows as $row) {
-displayLogEntry($row,$idtoname);
+        foreach ($rows as $key => $row) {
+            echo displayLogEntry($row, $idtoname);
+            $endkey = $row->key;
         }
     }
 } catch (SetteeRestClientException $e) {
     setteErrorHandler($e);
 }
+echo "<a href='?start_key=$endkey'>next page</a>";
 include_footer_documents();
 ?>
 

--- a/documents/robots.txt
+++ b/documents/robots.txt
@@ -2,4 +2,5 @@
 # http://code.google.com/web/controlcrawlindex/
 
 User-agent: *
-
+Disallow: /admin/
+Sitemap: http://disclosurelo.gs/sitemap.xml.php

--- a/documents/rss.xml.php
+++ b/documents/rss.xml.php
@@ -3,28 +3,38 @@
 // Agency X updated Y,  new files, diff of plain text/link text,
 // feed for just one agency or all
 // This is a minimum example of using the Universal Feed Generator Class
-include("lib/FeedWriter.php");
+include("../lib/FeedWriter/FeedTypes.php");
+include_once('../include/common.inc.php');
 //Creating an instance of FeedWriter class.
-$TestFeed = new FeedWriter(RSS2);
+$TestFeed = new RSS2FeedWriter();
 //Setting the channel elements
 //Use wrapper functions for common channelelements
 $TestFeed->setTitle('Last Modified - All');
-$TestFeed->setLink('http://disclosr.lambdacomplex.org/rss.xml.php');
+$TestFeed->setLink('http://disclosurelo.gs/rss.xml.php');
 $TestFeed->setDescription('This is test of creating a RSS 2.0 feed Universal Feed Writer');
+  $TestFeed->setChannelElement('language', 'en-us');
+  $TestFeed->setChannelElement('pubDate', date(DATE_RSS, time()));
 //Retriving informations from database
-$rows = $db->get_view("app", "byLastModified")->rows;
+$idtoname = Array();
+$agenciesdb = $server->get_db('disclosr-agencies');
+foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) {
+    $idtoname[$row->id] = trim($row->value->name);
+}
+$foidocsdb = $server->get_db('disclosr-foidocuments');
+$rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99','0000-00-00'), true)->rows;
 //print_r($rows);
 foreach ($rows as $row) {
     //Create an empty FeedItem
     $newItem = $TestFeed->createNewItem();
     //Add elements to the feed item
-    $newItem->setTitle($row['name']);
-    $newItem->setLink($row['id']);
-    $newItem->setDate(date("c", $row['metadata']['lastModified']));
-    $newItem->setDescription($row['name']);
+    $newItem->setTitle($row->value->title);
+    $newItem->setLink("view.php?id=".$row->value->docID);
+    $newItem->setDate(date("c", strtotime($row->value->date)));
+    $newItem->setDescription(displayLogEntry($row,$idtoname));
+    $newItem->addElement('guid', $row->value->_id,array('isPermaLink'=>'true'));
     //Now add the feed item
     $TestFeed->addItem($newItem);
 }
 //OK. Everything is done. Now genarate the feed.
-$TestFeed->genarateFeed();
+$TestFeed->generateFeed();
 ?>

--- a/documents/scrape.py
+++ b/documents/scrape.py
@@ -204,12 +204,12 @@
 				scrapeAndStore(docsdb, agency[key],0,key,agency['_id'])
 			if key == 'website' and False:
 				scrapeAndStore(docsdb, agency[key],0,key,agency['_id'])
+                                agency['metadata']['lastScraped'] = time.time()
 			if key.endswith('URL') and False:
 				print key 
 				depth = 1
 				if 'scrapeDepth' in agency.keys():
 					depth = agency['scrapeDepth']
 				scrapeAndStore(docsdb, agency[key],depth,key,agency['_id'])
-		agency['metadata']['lastScraped'] = time.time()
 		agencydb.save(agency)
 

--- /dev/null
+++ b/documents/scrapers/0049d35216493c545ef5f7f000e6b252.txt
@@ -1,1 +1,2 @@
+pdf
 

--- /dev/null
+++ b/documents/scrapers/00a294de663db69062ca09aede7c0487.txt
@@ -1,1 +1,2 @@
+multipage
 

--- /dev/null
+++ b/documents/scrapers/0324e4b1654fd6dd651307abcef67094.py
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 6
+        def getColumns(self,columns):
+                (id, date, title, description, notes,link) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/0372b19123076338d483f624c433727b.txt
@@ -1,1 +1,2 @@
+docx
 

--- /dev/null
+++ b/documents/scrapers/0ae822d1a748e60d90f0b79b97d5a3e5.txt
@@ -1,1 +1,2 @@
+ACMA style
 

--- /dev/null
+++ b/documents/scrapers/0ced9dd2de36100c3cabdb7fd8e843a9.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/1097fa8afdcf5db89d212d0979226667.py
+++ b/documents/scrapers/1097fa8afdcf5db89d212d0979226667.py
@@ -8,7 +8,7 @@
 class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)

--- a/documents/scrapers/157cbe654bdaa0a48e6650152ae34489.py
+++ b/documents/scrapers/157cbe654bdaa0a48e6650152ae34489.py
@@ -10,7 +10,7 @@
                 return 5
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
         def getTable(self,soup):
                 return soup.find_all('table')[4]
 

--- /dev/null
+++ b/documents/scrapers/1803322b27286950cab0c543168b5f21.txt
@@ -1,1 +1,2 @@
+multipage log
 

--- /dev/null
+++ b/documents/scrapers/1d404c4934f74feacd00dcb434e7c10a.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "cphMain_C001_Col01").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/24bd71114d3975ed9a63ad29624c62c9.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "inner_content")       
+        def getColumnCount(self):
+                return 2
+        def getColumns(self,columns):
+                (date, title) = columns
+                return (date, date, title, title, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/2cac2cd1f42687db2d04fa20b5b6a538.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 3
+        def getColumns(self,columns):
+                (id, title, date) = columns
+                return (id, date, title, title, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/31685505438d393f45a90f442b8fa27f.txt
@@ -1,1 +1,2 @@
+pdf
 

--- /dev/null
+++ b/documents/scrapers/38ca99d2790975a40dde3fae41dbdc3d.py
@@ -1,1 +1,32 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 3
+        def getColumns(self,columns):
+                (date, title, description) = columns
+                return (date, date, title, description, None)
+   	def getTitle(self, content, entry, doc):
+		i = 0
+		title = ""
+		for string in content.stripped_strings:
+    			if i < 2:
+				title = title + string
+			i = i+1
+                doc.update({'title': title})
+		print title
+                return
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/3b54190e3f409380e109fae29e1917aa.py
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 7
+        def getColumns(self,columns):
+                (id, date, title, description, link, deldate,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/3cd40b1240e987cbcd3f0e67054ce259.py
+++ b/documents/scrapers/3cd40b1240e987cbcd3f0e67054ce259.py
@@ -7,7 +7,7 @@
 class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
 	def getColumns(self,columns):
 		(id, date, description, title, notes) = columns
-		return (id, date, description, title, notes)
+		return (id, date, title, description, notes)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)

--- /dev/null
+++ b/documents/scrapers/3d5871a44abbbc81ef5b3a420070755d.py
@@ -1,1 +1,47 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(class_ = "inner-column").table       
+        def getRows(self,table):
+                return table.tbody.find_all('tr',recursive=False)
+        def getColumnCount(self):
+                return 3
+        def getColumns(self,columns):
+                (date, title, description) = columns
+                return (date, date, title, description, None)
+        def getDate(self, content, entry, doc):
+		i = 0
+		date = ""
+		for string in content.stripped_strings:
+    			if i ==1:
+				date = string
+			i = i+1
+                edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
+                print edate
+                doc.update({'date': edate})
+                return
+   	def getTitle(self, content, entry, doc):
+		i = 0
+		title = ""
+		for string in content.stripped_strings:
+    			if i < 2:
+				title = title + string
+			i = i+1
+                doc.update({'title': title})
+		#print title
+                return
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/3e2f110af49d62833a835bd257771ffb.txt
@@ -1,1 +1,2 @@
+no disclog
 

--- /dev/null
+++ b/documents/scrapers/41a166419503bb50e410c58be54c102f.txt
@@ -1,1 +1,1 @@
-
+aspx

--- /dev/null
+++ b/documents/scrapers/4934000fddd6a5b1094f398798341290.py
@@ -1,1 +1,23 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    si = ScraperImplementation()
+    si.doScrape()
+

--- /dev/null
+++ b/documents/scrapers/4c57389dda9bd454bcb08bc1e5ed87bf.txt
@@ -1,1 +1,2 @@
+parent
 

--- /dev/null
+++ b/documents/scrapers/4d2af2dcc72f1703bbf04b13b03720a8.txt
@@ -1,1 +1,2 @@
+no disclog
 

--- /dev/null
+++ b/documents/scrapers/525c3953187da08cd702359b2fc2997f.txt
@@ -1,1 +1,2 @@
+no disclog
 

--- /dev/null
+++ b/documents/scrapers/53b14397c8f27c29ff07b6319f7a0ec5.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/53d2884f8afd026096a27bd5051ec50e.py
+++ b/documents/scrapers/53d2884f8afd026096a27bd5051ec50e.py
@@ -10,7 +10,7 @@
                 return soup.find(class_ = "ms-rtestate-field").table
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
 
         def getLinks(self, content, entry, doc):
 		link = None

--- /dev/null
+++ b/documents/scrapers/54cbb3439276062b7a9f007f9f69d1f6.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (id, date, title, description) = columns
+                return (id, date, title, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/55b69726fde4b4898ecf6d7217d1d1d2.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (id, date, title, description) = columns
+                return (id, date, title, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/5716ce0aacfe98f7d638b7a66b7f1040.py
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (date, id, title, description) = columns
+                return (id, date, title, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/5d05365e981d87e746b596d63e35b1dc.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/601aedeef4344638d635bdd761e9fdba.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (date, title, description,notes) = columns
+                return (title, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/627f116dfe42c9f27ad6747be0aa44e2.txt
@@ -1,1 +1,1 @@
-
+no disclog

--- /dev/null
+++ b/documents/scrapers/649b053f5e2884906ddc7174c2cd4b38.py
@@ -1,1 +1,28 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    si = ScraperImplementation()
+    si.doScrape()
+    si.disclogURL = "http://www.fahcsia.gov.au/disclosure-log-2011-12-financial-year"
+    si.doScrape()
+    si.disclogURL = "http://www.fahcsia.gov.au/disclosure-log-2010-11-financial-year"
+    si.doScrape()
+
+

--- /dev/null
+++ b/documents/scrapers/655d4d67333536bda18d68265dfe7e80.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id="node-30609")       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/656f7bb1884f4b9d31ebe2a5f5f58064.txt
@@ -1,1 +1,2 @@
+list style
 

--- /dev/null
+++ b/documents/scrapers/65ec17101b00519e6d88c5a9f33c2c46.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 3
+        def getColumns(self,columns):
+                (id, date, description) = columns
+                return (id, date, description, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/69d59284ef0ccd2677394d82d3292abc.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "centercontent").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/6ac74a939f420c6194ae29224809734a.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/6afdde1d4ff1ad8d8cfe1a8675ea83bd.txt
@@ -1,1 +1,2 @@
+PDF
 

--- /dev/null
+++ b/documents/scrapers/6cf3870aedeeecfd6394b5c0abed4c55.py
@@ -1,1 +1,23 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    si = ScraperImplementation()
+    si.doScrape()
+

--- /dev/null
+++ b/documents/scrapers/6fa04af95fbe7de96daa2c7560e0aad3.py
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "content_div_50269").table
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/6fa04af95fbe7de96daa2c7560e0aad3.txt
+++ /dev/null
@@ -1,19 +1,1 @@
-import sys,os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
-import genericScrapers
-import scrape
-from bs4 import BeautifulSoup
 
-#http://www.doughellmann.com/PyMOTW/abc/
-class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
-        def getTable(self,soup):
-                return soup.find(id = "content_div_50269").table
-        def getColumns(self,columns):
-                (id, date, title, description, notes) = columns
-                return (id, date, title, description, notes)
-
-if __name__ == '__main__':
-    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
-    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
-    ScraperImplementation().doScrape()
-

--- a/documents/scrapers/6fe3c812a99d486963133459b2768cf6.py
+++ b/documents/scrapers/6fe3c812a99d486963133459b2768cf6.py
@@ -8,7 +8,7 @@
 class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)

--- /dev/null
+++ b/documents/scrapers/72a295f10734d64e8185f651fd2b39ea.txt
@@ -1,1 +1,2 @@
+weird div based log with tables of links
 

--- /dev/null
+++ b/documents/scrapers/75d8f1c605ef9da0c2590264b7aa046b.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "content-middle").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)