--- a/.gitmodules
+++ b/.gitmodules
@@ -31,4 +31,7 @@
 [submodule "documents/lib/parsedatetime"]
 	path = documents/lib/parsedatetime
 	url = git://github.com/bear/parsedatetime.git
+[submodule "lib/FeedWriter"]
+	path = lib/FeedWriter
+	url = https://github.com/mibe/FeedWriter
 


--- /dev/null
+++ b/documents/.gitignore
@@ -1,1 +1,2 @@
 
+*.pyc


--- a/documents/disclogsList.php
+++ b/documents/disclogsList.php
@@ -19,8 +19,8 @@
 
     if ($rows) {
         foreach ($rows as $row) {
-
-            echo "<tr><td><b>" . $row->value->name . "</b>";
+if (!isset($row->value->status) || $row->value->status != "suspended") {
+            echo "<tr><td><a href='" . $row->value->website ."'><b>". $row->value->name . "</b></a>";
             if ($ENV == "DEV")
                 echo "<br>(" . $row->id . ")";
             echo "</td>\n";
@@ -35,29 +35,31 @@
                     echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">'
                     . 'view local copy</a>)</small>';
             } else {
-                echo "<font color='red'>✘</font>";
+                echo "<font color='red'><abbr title='No'>✘</abbr></font>";
             }
             echo "</td>\n<td>";
             if (isset($row->value->FOIDocumentsURL)) {
                 if (file_exists("./scrapers/" . $row->id . '.py')) {
-                    echo "<font color='green'>✔</font>";
+                    echo "<font color='green'><abbr title='Yes'>✔</abbr></font>";
                     $green++;
                 } else if (file_exists("./scrapers/" . $row->id . '.txt')) {
-                    echo "<font color='orange'><b>▬</b></font>";
+                    echo "<font color='orange'><abbr title='Work in progress'><b>▬</b></abbr></font>";
                     $orange++;
                 } else {
-                    echo "<font color='red'>✘</font>";
+                    echo "<font color='red'><abbr title='No'>✘</abbr></font>";
                     $red++;
                 }
             }
             echo "</td></tr>\n";
         }
     }
+}
 } catch (SetteeRestClientException $e) {
     setteErrorHandler($e);
 }
 echo "</table>";
-echo $agencies." agencies ".(($disclogs/$agencies)*100)."% with disclosure logs, ".(($green/$disclogs)*100)."% with scrapers ".(($red/$disclogs)*100)."% without scrapers ".(($orange/$disclogs)*100)."% WIP scrapers ";
+echo $agencies." agencies, ".round(($disclogs/$agencies)*100)."% with disclosure logs; "
+.round(($green/$disclogs)*100)."% logs with scrapers ".round(($red/$disclogs)*100)."% logs without scrapers ".round(($orange/$disclogs)*100)."% logs Work-In-Progress scrapers ";
 
 include_footer_documents();
 ?>


--- a/documents/genericScrapers.py
+++ b/documents/genericScrapers.py
@@ -92,7 +92,9 @@
 		return table.find_all('tr')
 	def getDate(self, content, entry, doc):
 		date = ''.join(content.stripped_strings).strip()
-		date = str.replace("Octber","October",date)
+		(a,b,c) = date.partition("(")
+		date = a.replace("Octber","October")
+		print date
 		edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
 		print edate
 		doc.update({'date': edate})


--- a/documents/index.php
+++ b/documents/index.php
@@ -20,7 +20,7 @@
 
     if ($rows) {
         foreach ($rows as $row) {
-displayLogEntry($row,$idtoname);
+echo displayLogEntry($row,$idtoname);
         }
     }
 } catch (SetteeRestClientException $e) {


--- a/documents/robots.txt
+++ b/documents/robots.txt
@@ -2,4 +2,5 @@
 # http://code.google.com/web/controlcrawlindex/
 
 User-agent: *
-
+Disallow: /admin/
+Sitemap: http://disclosurelo.gs/sitemap.xml.php


--- a/documents/rss.xml.php
+++ b/documents/rss.xml.php
@@ -3,28 +3,38 @@
 // Agency X updated Y,  new files, diff of plain text/link text,
 // feed for just one agency or all
 // This is a minimum example of using the Universal Feed Generator Class
-include("lib/FeedWriter.php");
+include("../lib/FeedWriter/FeedTypes.php");
+include_once('../include/common.inc.php');
 //Creating an instance of FeedWriter class.
-$TestFeed = new FeedWriter(RSS2);
+$TestFeed = new RSS2FeedWriter();
 //Setting the channel elements
 //Use wrapper functions for common channelelements
 $TestFeed->setTitle('Last Modified - All');
-$TestFeed->setLink('http://disclosr.lambdacomplex.org/rss.xml.php');
+$TestFeed->setLink('http://disclosurelo.gs/rss.xml.php');
 $TestFeed->setDescription('This is test of creating a RSS 2.0 feed Universal Feed Writer');
+  $TestFeed->setChannelElement('language', 'en-us');
+  $TestFeed->setChannelElement('pubDate', date(DATE_RSS, time()));
 //Retriving informations from database
-$rows = $db->get_view("app", "byLastModified")->rows;
+$idtoname = Array();
+$agenciesdb = $server->get_db('disclosr-agencies');
+foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) {
+    $idtoname[$row->id] = trim($row->value->name);
+}
+$foidocsdb = $server->get_db('disclosr-foidocuments');
+$rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99','0000-00-00'), true)->rows;
 //print_r($rows);
 foreach ($rows as $row) {
     //Create an empty FeedItem
     $newItem = $TestFeed->createNewItem();
     //Add elements to the feed item
-    $newItem->setTitle($row['name']);
-    $newItem->setLink($row['id']);
-    $newItem->setDate(date("c", $row['metadata']['lastModified']));
-    $newItem->setDescription($row['name']);
+    $newItem->setTitle($row->value->title);
+    $newItem->setLink("view.php?id=".$row->value->docID);
+    $newItem->setDate(date("c", strtotime($row->value->date)));
+    $newItem->setDescription(displayLogEntry($row,$idtoname));
+    $newItem->addElement('guid', $row->value->_id,array('isPermaLink'=>'true'));
     //Now add the feed item
     $TestFeed->addItem($newItem);
 }
 //OK. Everything is done. Now genarate the feed.
-$TestFeed->genarateFeed();
+$TestFeed->generateFeed();
 ?>


--- /dev/null
+++ b/documents/scrapers/0049d35216493c545ef5f7f000e6b252.txt
@@ -1,1 +1,2 @@
+pdf
 


--- /dev/null
+++ b/documents/scrapers/00a294de663db69062ca09aede7c0487.txt
@@ -1,1 +1,2 @@
+multipage
 


--- /dev/null
+++ b/documents/scrapers/0372b19123076338d483f624c433727b.txt
@@ -1,1 +1,2 @@
+docx
 


--- /dev/null
+++ b/documents/scrapers/0ae822d1a748e60d90f0b79b97d5a3e5.txt
@@ -1,1 +1,2 @@
+ACMA style
 


--- /dev/null
+++ b/documents/scrapers/0ced9dd2de36100c3cabdb7fd8e843a9.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/1803322b27286950cab0c543168b5f21.txt
@@ -1,1 +1,2 @@
+multipage log
 


--- /dev/null
+++ b/documents/scrapers/24bd71114d3975ed9a63ad29624c62c9.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "inner_content")       
+        def getColumnCount(self):
+                return 2
+        def getColumns(self,columns):
+                (date, title) = columns
+                return (date, date, title, title, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/2cac2cd1f42687db2d04fa20b5b6a538.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 3
+        def getColumns(self,columns):
+                (id, title, date) = columns
+                return (id, date, title, title, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/31685505438d393f45a90f442b8fa27f.txt
@@ -1,1 +1,2 @@
+pdf
 


--- /dev/null
+++ b/documents/scrapers/3e2f110af49d62833a835bd257771ffb.txt
@@ -1,1 +1,2 @@
+no disclog
 


--- /dev/null
+++ b/documents/scrapers/41a166419503bb50e410c58be54c102f.txt
@@ -1,1 +1,1 @@
-
+aspx


--- /dev/null
+++ b/documents/scrapers/4934000fddd6a5b1094f398798341290.py
@@ -1,1 +1,23 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    si = ScraperImplementation()
+    si.doScrape()
+


--- /dev/null
+++ b/documents/scrapers/4c57389dda9bd454bcb08bc1e5ed87bf.txt
@@ -1,1 +1,2 @@
+parent
 


--- /dev/null
+++ b/documents/scrapers/4d2af2dcc72f1703bbf04b13b03720a8.txt
@@ -1,1 +1,2 @@
+no disclog
 


--- /dev/null
+++ b/documents/scrapers/525c3953187da08cd702359b2fc2997f.txt
@@ -1,1 +1,2 @@
+no disclog
 


--- /dev/null
+++ b/documents/scrapers/54cbb3439276062b7a9f007f9f69d1f6.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (id, date, title, description) = columns
+                return (id, date, title, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/601aedeef4344638d635bdd761e9fdba.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (date, title, description,notes) = columns
+                return (title, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/649b053f5e2884906ddc7174c2cd4b38.py
@@ -1,1 +1,28 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    si = ScraperImplementation()
+    si.doScrape()
+    si.disclogURL = "http://www.fahcsia.gov.au/disclosure-log-2011-12-financial-year"
+    si.doScrape()
+    si.disclogURL = "http://www.fahcsia.gov.au/disclosure-log-2010-11-financial-year"
+    si.doScrape()
+
+


--- /dev/null
+++ b/documents/scrapers/655d4d67333536bda18d68265dfe7e80.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id="node-30609")       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/656f7bb1884f4b9d31ebe2a5f5f58064.txt
@@ -1,1 +1,2 @@
+list style
 


--- /dev/null
+++ b/documents/scrapers/65ec17101b00519e6d88c5a9f33c2c46.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 3
+        def getColumns(self,columns):
+                (id, date, description) = columns
+                return (id, date, description, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/69d59284ef0ccd2677394d82d3292abc.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "centercontent").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/6ac74a939f420c6194ae29224809734a.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/6afdde1d4ff1ad8d8cfe1a8675ea83bd.txt
@@ -1,1 +1,2 @@
+PDF
 


--- /dev/null
+++ b/documents/scrapers/6cf3870aedeeecfd6394b5c0abed4c55.py
@@ -1,1 +1,23 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    si = ScraperImplementation()
+    si.doScrape()
+


--- /dev/null
+++ b/documents/scrapers/72a295f10734d64e8185f651fd2b39ea.txt
@@ -1,1 +1,2 @@
+weird div based log with tables of links
 


--- /dev/null
+++ b/documents/scrapers/75d8f1c605ef9da0c2590264b7aa046b.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "content-middle").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/768bbbfb34115873af361af8519b38a9.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/795e7a8afb39a420360aa207b0cb1306.txt
@@ -1,1 +1,2 @@
+no disclog
 


--- /dev/null
+++ b/documents/scrapers/7b39ce7f362a0af9a711eaf223943eea.txt
@@ -1,1 +1,2 @@
+no disclog
 


--- /dev/null
+++ b/documents/scrapers/7ec28d7d97fcf493b1350acd03e3642e.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 3
+        def getColumns(self,columns):
+                (date, title, description) = columns
+                return (date, date, title, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/7f55a3c42ad7460254906aa043a6e324.py
@@ -1,1 +1,24 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getTitle(self, content, entry, doc):
+                doc.update({'title': content.stripped_strings.next()})
+                return
+        def getColumnCount(self):
+                return 3
+        def getColumns(self,columns):
+                (date, id, description) = columns
+                return (id, date, description, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/8317df630946937864d31a4728ad8ee8.txt
@@ -1,1 +1,2 @@
+pdf
 


--- /dev/null
+++ b/documents/scrapers/8796220032faf94501bd366763263685.txt
@@ -1,1 +1,2 @@
+multiple pages
 


--- /dev/null
+++ b/documents/scrapers/8aae1c28db7f3ce10f232a0137be6bb2.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/8e874a2fde8aa0ccdc6d14573d766540.txt
@@ -1,1 +1,49 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getDescription(self,content, entry,doc):
+		link = None
+                links = []
+                description = ""
+		for atag in entry.find_all('a'):
+			if atag.has_key('href'):
+				link = scrape.fullurl(self.getURL(),atag['href'])			
+                                (url,mime_type,htcontent) = scrape.fetchURL(scrape.docsdb, link, "foidocuments", self.getAgencyID(), False)
+                                if htcontent != None:
+                                        if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml":
+                                        # http://www.crummy.com/software/BeautifulSoup/documentation.html
+                                                soup = BeautifulSoup(htcontent)
+                                                for row in soup.find(class_ = "ms-rteTable-GreyAlternating").find_all('tr'):
+                                                        if row != None:
+								rowtitle = row.find('th').string
+                                                                description = description + "\n" + rowtitle + ": "
+                                                                for text in row.find('td').stripped_strings:
+                                                                        description = description + text
+                                                     		for atag in row.find_all("a"):
+                                                                	if atag.has_key('href'):
+                                                                        	links.append(scrape.fullurl(link,atag['href']))
+
+		if links != []:
+                 	doc.update({'links': links})
+                if description != "":
+                        doc.update({ 'description': description})
+
+	def getColumnCount(self):
+		return 2
+	def getTable(self,soup):
+		return soup.find(class_ = "ms-rteTable-GreyAlternating")
+	def getColumns(self,columns):
+		(date, title) = columns
+		return (title, date, title, title, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+# old site too http://archive.treasury.gov.au/content/foi_publications.asp
+


--- /dev/null
+++ b/documents/scrapers/905a1c409b6afb1de0074b13a5559560.py
@@ -1,1 +1,23 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    si = ScraperImplementation()
+    si.doScrape()
+


--- /dev/null
+++ b/documents/scrapers/9282306e244040c9e4ae5705f06f9548.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (id, date, title, description) = columns
+                return (id, date, title, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/93ce83e46f5c2c4ca1b7f199b59b4bd2.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (id, date,logdate, description) = columns
+                return (id, date, description, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/99328d76c8efb56ff3f1da79b9d1b17f.txt
@@ -1,1 +1,2 @@
+acma style
 


--- /dev/null
+++ b/documents/scrapers/9961dc45e046288ad1431941653af20c.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/a1ab9c80ab473958676c62c1a25dd502.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/a43467fe82b840a353b380c4d7462a4c.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 3
+        def getColumns(self,columns):
+                (date, title, description) = columns
+                return (date, date, title, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/a687a9eaab9e10e9e118d3fd7cf0e13a.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id="ctl00_ContentPlaceHolderMainNoAjax_EdtrTD1494_2").table       
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (blank,id, title,date) = columns
+                return (id, date, title, title, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/b0a3281ba66efe173c5a33d5ef90ff76.txt
@@ -1,1 +1,2 @@
+multipage immi
 


--- /dev/null
+++ b/documents/scrapers/b0fb402314e685238537105ee0e70c84.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/b7770c4584332cff42bb6abb3326e564.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "ctl00_PlaceHolderMain_Content__ControlWrapper_RichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/b91f866928eb61959dbbab56313214fc.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/bc91b878e2317fa231cc2c512e2027f0.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (id, date, title, description) = columns
+                return (id, date, title, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/cca17a34bd490474a316fe0a1ca03c25.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "ctl00_PlaceHolderMain_ctl01__ControlWrapper_RichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/cde8eb4a2e40abb18d8b28d3b85bc9b0.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(summary="This table lists the schedule of upcoming courses.")       
+        def getColumnCount(self):
+                return 7
+        def getColumns(self,columns):
+                (id, date, title, description,link,deldate,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/ce34d1e9b55911e4272d2d388821f311.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/d1296c366287f7a9faedf235c7e6df01.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id="main").table       
+        def getColumnCount(self):
+                return 7
+        def getColumns(self,columns):
+                (id, date, title, description,link,deldate,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/e64c71f4986f78675a252104c5a5f359.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/e770921522a49dc77de208cc724ce134.txt
@@ -1,1 +1,2 @@
+c'est ne pas une table
 


--- /dev/null
+++ b/documents/scrapers/e90b1b7cbb83e3eed0b5f849c7e3af79.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "inner_content")       
+        def getColumnCount(self):
+                return 2
+        def getColumns(self,columns):
+                (date, title) = columns
+                return (date, date, title, title, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/ee30aad97f0bb32e74c4587404b67ce4.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (id, title, date, description) = columns
+                return (id, date, title, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/scrapers/f189459fc43f941e0d4ecfba52c666f3.txt
@@ -1,1 +1,2 @@
+no disclog
 


--- /dev/null
+++ b/documents/scrapers/f5ce2d1651739704634eb8ca4b2b46d3.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "ctl00_PlaceHolderMain_PublishingPageContent__ControlWrapper_RichHtmlField").table       
+        def getColumnCount(self):
+                return 7
+        def getColumns(self,columns):
+                (id, date, title, description,link,deldate, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+


--- /dev/null
+++ b/documents/sitemap.xml.php
@@ -1,1 +1,25 @@
+<?php
 
+include ('../include/common.inc.php');
+$last_updated = date('Y-m-d', @filemtime('cbrfeed.zip'));
+header("Content-Type: text/xml");
+echo "<?xml version='1.0' encoding='UTF-8'?>";
+echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
+echo " <url><loc>" . local_url() . "index.php</loc><priority>1.0</priority></url>\n";
+foreach (scandir("./") as $file) {
+    if (strpos($file, ".php") !== false && $file != "index.php" && $file != "sitemap.xml.php")
+        echo " <url><loc>" . local_url() . "$file</loc><priority>0.6</priority></url>\n";
+}
+
+$db = $server->get_db('disclosr-foidocuments');
+try {
+    $rows = $db->get_view("app", "all")->rows;
+    foreach ($rows as $row) {
+        echo '<url><loc>' . local_url() . 'view.php?id=' . $row->value->_id . "</loc><priority>0.3</priority></url>\n";
+    }
+} catch (SetteeRestClientException $e) {
+    setteErrorHandler($e);
+}
+echo '</urlset>';
+?>
+


--- a/documents/template.inc.php
+++ b/documents/template.inc.php
@@ -130,21 +130,23 @@
 }
 
 function displayLogEntry($row, $idtoname) {
-    echo "<div><h2>".$row->value->date.": ".$row->value->title." (".$idtoname[$row->value->agencyID].")</h2> <p>".str_replace("\n","<br>",$row->value->description);
+    $result = "";
+    $result .= "<div><h2>".$row->value->date.": ".$row->value->title." (".$idtoname[$row->value->agencyID].")</h2> <p>".str_replace("\n","<br>",$row->value->description);
 if (isset($row->value->notes)) {
-echo " <br>Note: ".$row->value->notes;
+$result .= " <br>Note: ".$row->value->notes;
 }
-echo "</p>";
+$result .= "</p>";
 
 if (isset($row->value->links)){
-echo "<h3>Links/Documents</h3><ul>";
+$result .= "<h3>Links/Documents</h3><ul>";
 foreach ($row->value->links as $link) {
-    echo "<li><a href='$link'>".$link."</a></li>";
+    $result .= "<li><a href='$link'>".$link."</a></li>";
 }
 
-        echo "</ul>";
+        $result .= "</ul>";
 }
-        echo "<small><A href='".$row->value->url."'>View original source...</a> ID: ".$row->value->docID."</small>";
-echo"</div>";
+        $result .= "<small><A href='".$row->value->url."'>View original source...</a> ID: ".$row->value->docID."</small>";
+$result .= "</div>";
+return $result;
 }
 


--- /dev/null
+++ b/documents/view.php
@@ -1,1 +1,27 @@
+<?php
+include('template.inc.php');
+include_header_documents("");
+include_once('../include/common.inc.php');
+?>
+<?php
 
+
+
+$agenciesdb = $server->get_db('disclosr-agencies');
+
+$idtoname = Array();
+foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) {
+    $idtoname[$row->id] = trim($row->value->name);
+}
+$foidocsdb = $server->get_db('disclosr-foidocuments');
+try {
+  $obj = new stdClass();
+    $obj->value = $foidocsdb->get($_REQUEST['id']);
+echo displayLogEntry($obj,$idtoname);
+
+} catch (SetteeRestClientException $e) {
+    setteErrorHandler($e);
+}
+include_footer_documents();
+?>
+


--- a/lib/FeedItem.php
+++ /dev/null
@@ -1,168 +1,1 @@
-<?php

- /**

- * Univarsel Feed Writer

- * 

- * FeedItem class - Used as feed element in FeedWriter class

- *

- * @package         UnivarselFeedWriter

- * @author          Anis uddin Ahmad <anisniit@gmail.com>

- * @link            http://www.ajaxray.com/projects/rss

- */

- class FeedItem

- {

-	private $elements = array();    //Collection of feed elements

-	private $version;

-	

-	/**

-	* Constructor 

-	* 

-	* @param    contant     (RSS1/RSS2/ATOM) RSS2 is default. 

-	*/ 

-	function __construct($version = RSS2)

-	{    

-		$this->version = $version;

-	}

-	

-	/**

-	* Add an element to elements array

-	* 

-	* @access   public

-	* @param    srting  The tag name of an element

-	* @param    srting  The content of tag

-	* @param    array   Attributes(if any) in 'attrName' => 'attrValue' format

-	* @return   void

-	*/

-	public function addElement($elementName, $content, $attributes = null)

-	{

-		$this->elements[$elementName]['name']       = $elementName;

-		$this->elements[$elementName]['content']    = $content;

-		$this->elements[$elementName]['attributes'] = $attributes;

-	}

-	

-	/**

-	* Set multiple feed elements from an array. 

-	* Elements which have attributes cannot be added by this method

-	* 

-	* @access   public

-	* @param    array   array of elements in 'tagName' => 'tagContent' format.

-	* @return   void

-	*/

-	public function addElementArray($elementArray)

-	{

-		if(! is_array($elementArray)) return;

-		foreach ($elementArray as $elementName => $content) 

-		{

-			$this->addElement($elementName, $content);

-		}

-	}

-	

-	/**

-	* Return the collection of elements in this feed item

-	* 

-	* @access   public

-	* @return   array

-	*/

-	public function getElements()

-	{

-		return $this->elements;

-	}

-	

-	// Wrapper functions ------------------------------------------------------

-	

-	/**

-	* Set the 'dscription' element of feed item

-	* 

-	* @access   public

-	* @param    string  The content of 'description' element

-	* @return   void

-	*/

-	public function setDescription($description) 

-	{

-		$tag = ($this->version == ATOM)? 'summary' : 'description'; 

-		$this->addElement($tag, $description);

-	}

-	

-	/**

-	* @desc     Set the 'title' element of feed item

-	* @access   public

-	* @param    string  The content of 'title' element

-	* @return   void

-	*/

-	public function setTitle($title) 

-	{

-		$this->addElement('title', $title);  	

-	}

-	

-	/**

-	* Set the 'date' element of feed item

-	* 

-	* @access   public

-	* @param    string  The content of 'date' element

-	* @return   void

-	*/

-	public function setDate($date) 

-	{

-		if(! is_numeric($date))

-		{

-			$date = strtotime($date);

-		}

-		

-		if($this->version == ATOM)

-		{

-			$tag    = 'updated';

-			$value  = date(DATE_ATOM, $date);

-		}        

-		elseif($this->version == RSS2) 

-		{

-			$tag    = 'pubDate';

-			$value  = date(DATE_RSS, $date);

-		}

-		else                                

-		{

-			$tag    = 'dc:date';

-			$value  = date("Y-m-d", $date);

-		}

-		

-		$this->addElement($tag, $value);    

-	}

-	

-	/**

-	* Set the 'link' element of feed item

-	* 

-	* @access   public

-	* @param    string  The content of 'link' element

-	* @return   void

-	*/

-	public function setLink($link) 

-	{

-		if($this->version == RSS2 || $this->version == RSS1)

-		{

-			$this->addElement('link', $link);

-		}

-		else

-		{

-			$this->addElement('link','',array('href'=>$link));

-			$this->addElement('id', FeedWriter::uuid($link,'urn:uuid:'));

-		} 

-		

-	}

-	

-	/**

-	* Set the 'encloser' element of feed item

-	* For RSS 2.0 only

-	* 

-	* @access   public

-	* @param    string  The url attribute of encloser tag

-	* @param    string  The length attribute of encloser tag

-	* @param    string  The type attribute of encloser tag

-	* @return   void

-	*/

-	public function setEncloser($url, $length, $type)

-	{

-		$attributes = array('url'=>$url, 'length'=>$length, 'type'=>$type);

-		$this->addElement('enclosure','',$attributes);

-	}

-	

- } // end of class FeedItem

-?>

 


--- a/lib/FeedWriter.php
+++ /dev/null
@@ -1,435 +1,1 @@
-<?php

-// RSS 0.90  Officially obsoleted by 1.0

-// RSS 0.91, 0.92, 0.93 and 0.94  Officially obsoleted by 2.0

-// So, define constants for RSS 1.0, RSS 2.0 and ATOM 	

-

-	define('RSS1', 'RSS 1.0', true);

-	define('RSS2', 'RSS 2.0', true);

-	define('ATOM', 'ATOM', true);

-

- /**

- * Univarsel Feed Writer class

- *

- * Genarate RSS 1.0, RSS2.0 and ATOM Feed

- *                             

- * @package     UnivarselFeedWriter

- * @author      Anis uddin Ahmad <anisniit@gmail.com>

- * @link        http://www.ajaxray.com/projects/rss

- */

- class FeedWriter

- {

-	 private $channels      = array();  // Collection of channel elements

-	 private $items         = array();  // Collection of items as object of FeedItem class.

-	 private $data          = array();  // Store some other version wise data

-	 private $CDATAEncoding = array();  // The tag names which have to encoded as CDATA

-	 

-	 private $version   = null; 

-	

-	/**

-	* Constructor

-	* 

-	* @param    constant    the version constant (RSS1/RSS2/ATOM).       

-	*/ 

-	function __construct($version = RSS2)

-	{	

-		$this->version = $version;

-			

-		// Setting default value for assential channel elements

-		$this->channels['title']        = $version . ' Feed';

-		$this->channels['link']         = 'http://www.ajaxray.com/blog';

-				

-		//Tag names to encode in CDATA

-		$this->CDATAEncoding = array('description', 'content:encoded', 'summary');

-	}

-

-	// Start # public functions ---------------------------------------------

-	

-	/**

-	* Set a channel element

-	* @access   public

-	* @param    srting  name of the channel tag

-	* @param    string  content of the channel tag

-	* @return   void

-	*/

-	public function setChannelElement($elementName, $content)

-	{

-		$this->channels[$elementName] = $content ;

-	}

-	

-	/**

-	* Set multiple channel elements from an array. Array elements 

-	* should be 'channelName' => 'channelContent' format.

-	* 

-	* @access   public

-	* @param    array   array of channels

-	* @return   void

-	*/

-	public function setChannelElementsFromArray($elementArray)

-	{

-		if(! is_array($elementArray)) return;

-		foreach ($elementArray as $elementName => $content) 

-		{

-			$this->setChannelElement($elementName, $content);

-		}

-	}

-	

-	/**

-	* Genarate the actual RSS/ATOM file

-	* 

-	* @access   public

-	* @return   void

-	*/ 

-	public function genarateFeed()

-	{

-		header("Content-type: text/xml");

-		

-		$this->printHead();

-		$this->printChannels();

-		$this->printItems();

-		$this->printTale();

-	}

-	

-	/**

-	* Create a new FeedItem.

-	* 

-	* @access   public

-	* @return   object  instance of FeedItem class

-	*/

-	public function createNewItem()

-	{

-		$Item = new FeedItem($this->version);

-		return $Item;

-	}

-	

-	/**

-	* Add a FeedItem to the main class

-	* 

-	* @access   public

-	* @param    object  instance of FeedItem class

-	* @return   void

-	*/

-	public function addItem($feedItem)

-	{

-		$this->items[] = $feedItem;    

-	}

-	

-	

-	// Wrapper functions -------------------------------------------------------------------

-	

-	/**

-	* Set the 'title' channel element

-	* 

-	* @access   public

-	* @param    srting  value of 'title' channel tag

-	* @return   void

-	*/

-	public function setTitle($title)

-	{

-		$this->setChannelElement('title', $title);

-	}

-	

-	/**

-	* Set the 'description' channel element

-	* 

-	* @access   public

-	* @param    srting  value of 'description' channel tag

-	* @return   void

-	*/

-	public function setDescription($desciption)

-	{

-		$this->setChannelElement('description', $desciption);

-	}

-	

-	/**

-	* Set the 'link' channel element

-	* 

-	* @access   public

-	* @param    srting  value of 'link' channel tag

-	* @return   void

-	*/

-	public function setLink($link)

-	{

-		$this->setChannelElement('link', $link);

-	}

-	

-	/**

-	* Set the 'image' channel element

-	* 

-	* @access   public

-	* @param    srting  title of image

-	* @param    srting  link url of the imahe

-	* @param    srting  path url of the image

-	* @return   void

-	*/

-	public function setImage($title, $link, $url)

-	{

-		$this->setChannelElement('image', array('title'=>$title, 'link'=>$link, 'url'=>$url));

-	}

-	

-	/**

-	* Set the 'about' channel element. Only for RSS 1.0

-	* 

-	* @access   public

-	* @param    srting  value of 'about' channel tag

-	* @return   void

-	*/

-	public function setChannelAbout($url)

-	{

-		$this->data['ChannelAbout'] = $url;    

-	}

-	

-  /**

-  * Genarates an UUID

-  * @author     Anis uddin Ahmad <admin@ajaxray.com>

-  * @param      string  an optional prefix

-  * @return     string  the formated uuid

-  */

-  public function uuid($key = null, $prefix = '') 

-  {

-	$key = ($key == null)? uniqid(rand()) : $key;

-	$chars = md5($key);

-	$uuid  = substr($chars,0,8) . '-';

-	$uuid .= substr($chars,8,4) . '-';

-	$uuid .= substr($chars,12,4) . '-';

-	$uuid .= substr($chars,16,4) . '-';

-	$uuid .= substr($chars,20,12);

-

-	return $prefix . $uuid;

-  }

-	// End # public functions ----------------------------------------------

-	

-	// Start # private functions ----------------------------------------------

-	

-	/**

-	* Prints the xml and rss namespace

-	* 

-	* @access   private

-	* @return   void

-	*/

-	private function printHead()

-	{

-		$out  = '<?xml version="1.0" encoding="utf-8"?>' . "\n";

-		

-		if($this->version == RSS2)

-		{

-			$out .= '<rss version="2.0"

-					xmlns:content="http://purl.org/rss/1.0/modules/content/"

-					xmlns:wfw="http://wellformedweb.org/CommentAPI/"

-				  >' . PHP_EOL;

-		}    

-		elseif($this->version == RSS1)

-		{

-			$out .= '<rdf:RDF 

-					 xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"

-					 xmlns="http://purl.org/rss/1.0/"

-					 xmlns:dc="http://purl.org/dc/elements/1.1/"

-					>' . PHP_EOL;;

-		}

-		else if($this->version == ATOM)

-		{

-			$out .= '<feed xmlns="http://www.w3.org/2005/Atom">' . PHP_EOL;;

-		}

-		echo $out;

-	}

-	

-	/**

-	* Closes the open tags at the end of file

-	* 

-	* @access   private

-	* @return   void

-	*/

-	private function printTale()

-	{

-		if($this->version == RSS2)

-		{

-			echo '</channel>' . PHP_EOL . '</rss>'; 

-		}    

-		elseif($this->version == RSS1)

-		{

-			echo '</rdf:RDF>';

-		}

-		else if($this->version == ATOM)

-		{

-			echo '</feed>';

-		}

-	  

-	}

-

-	/**

-	* Creates a single node as xml format

-	* 

-	* @access   private

-	* @param    srting  name of the tag

-	* @param    mixed   tag value as string or array of nested tags in 'tagName' => 'tagValue' format

-	* @param    array   Attributes(if any) in 'attrName' => 'attrValue' format

-	* @return   string  formatted xml tag

-	*/

-	private function makeNode($tagName, $tagContent, $attributes = null)

-	{        

-		$nodeText = '';

-		$attrText = '';

-

-		if(is_array($attributes))

-		{

-			foreach ($attributes as $key => $value) 

-			{

-				$attrText .= " $key=\"$value\" ";

-			}

-		}

-		

-		if(is_array($tagContent) && $this->version == RSS1)

-		{

-			$attrText = ' rdf:parseType="Resource"';

-		}

-		

-		

-		$attrText .= (in_array($tagName, $this->CDATAEncoding) && $this->version == ATOM)? ' type="html" ' : '';

-		$nodeText .= (in_array($tagName, $this->CDATAEncoding))? "<{$tagName}{$attrText}><![CDATA[" : "<{$tagName}{$attrText}>";

-		 

-		if(is_array($tagContent))

-		{ 

-			foreach ($tagContent as $key => $value) 

-			{

-				$nodeText .= $this->makeNode($key, $value);

-			}

-		}

-		else

-		{

-			$nodeText .= (in_array($tagName, $this->CDATAEncoding))? $tagContent : htmlentities($tagContent);

-		}           

-			

-		$nodeText .= (in_array($tagName, $this->CDATAEncoding))? "]]></$tagName>" : "</$tagName>";

-

-		return $nodeText . PHP_EOL;

-	}

-	

-	/**

-	* @desc     Print channels

-	* @access   private

-	* @return   void

-	*/

-	private function printChannels()

-	{

-		//Start channel tag

-		switch ($this->version) 

-		{

-		   case RSS2: 

-				echo '<channel>' . PHP_EOL;        

-				break;

-		   case RSS1: 

-				echo (isset($this->data['ChannelAbout']))? "<channel rdf:about=\"{$this->data['ChannelAbout']}\">" : "<channel rdf:about=\"{$this->channels['link']}\">";

-				break;

-		}

-		

-		//Print Items of channel

-		foreach ($this->channels as $key => $value) 

-		{

-			if($this->version == ATOM && $key == 'link') 

-			{

-				// ATOM prints link element as href attribute

-				echo $this->makeNode($key,'',array('href'=>$value));

-				//Add the id for ATOM

-				echo $this->makeNode('id',$this->uuid($value,'urn:uuid:'));

-			}

-			else

-			{

-				echo $this->makeNode($key, $value);

-			}    

-			

-		}

-		

-		//RSS 1.0 have special tag <rdf:Seq> with channel 

-		if($this->version == RSS1)

-		{

-			echo "<items>" . PHP_EOL . "<rdf:Seq>" . PHP_EOL;

-			foreach ($this->items as $item) 

-			{

-				$thisItems = $item->getElements();

-				echo "<rdf:li resource=\"{$thisItems['link']['content']}\"/>" . PHP_EOL;

-			}

-			echo "</rdf:Seq>" . PHP_EOL . "</items>" . PHP_EOL . "</channel>" . PHP_EOL;

-		}

-	}

-	

-	/**

-	* Prints formatted feed items

-	* 

-	* @access   private

-	* @return   void

-	*/

-	private function printItems()

-	{    

-		foreach ($this->items as $item) 

-		{

-			$thisItems = $item->getElements();

-			

-			//the argument is printed as rdf:about attribute of item in rss 1.0 

-			echo $this->startItem($thisItems['link']['content']);

-			

-			foreach ($thisItems as $feedItem ) 

-			{

-				echo $this->makeNode($feedItem['name'], $feedItem['content'], $feedItem['attributes']); 

-			}

-			echo $this->endItem();

-		}

-	}

-	

-	/**

-	* Make the starting tag of channels

-	* 

-	* @access   private

-	* @param    srting  The vale of about tag which is used for only RSS 1.0

-	* @return   void

-	*/

-	private function startItem($about = false)

-	{

-		if($this->version == RSS2)

-		{

-			echo '<item>' . PHP_EOL; 

-		}    

-		elseif($this->version == RSS1)

-		{

-			if($about)

-			{

-				echo "<item rdf:about=\"$about\">" . PHP_EOL;

-			}

-			else

-			{

-				die('link element is not set .\n It\'s required for RSS 1.0 to be used as about attribute of item');

-			}

-		}

-		else if($this->version == ATOM)

-		{

-			echo "<entry>" . PHP_EOL;

-		}    

-	}

-	

-	/**

-	* Closes feed item tag

-	* 

-	* @access   private

-	* @return   void

-	*/

-	private function endItem()

-	{

-		if($this->version == RSS2 || $this->version == RSS1)

-		{

-			echo '</item>' . PHP_EOL; 

-		}    

-		else if($this->version == ATOM)

-		{

-			echo "</entry>" . PHP_EOL;

-		}

-	}

-	

-

-	

-	// End # private functions ----------------------------------------------

-	

- } // end of class FeedWriter

- 

-// autoload classes

-function __autoload($class_name) 

-{

-	require_once $class_name . '.php';

-}
+