more scrapers, multipage scrapers
more scrapers, multipage scrapers


Former-commit-id: d70c13dcae735d252b687b917ec530695be41419

--- a/documents/disclogsList.php
+++ b/documents/disclogsList.php
@@ -8,6 +8,11 @@
     <tr><th>Agency Name</th><th>Disclosure Log URL recorded?</th><th>Do we monitor this URL?</th></tr>";
 $agenciesdb = $server->get_db('disclosr-agencies');
 $docsdb = $server->get_db('disclosr-documents');
+$agencies = 0;
+$disclogs = 0;
+$red = 0;
+$green = 0;
+$orange = 0;
 try {
     $rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows;
 
@@ -19,10 +24,11 @@
             if ($ENV == "DEV")
                 echo "<br>(" . $row->id . ")";
             echo "</td>\n";
-
+$agencies++;
 
             echo "<td>";
             if (isset($row->value->FOIDocumentsURL)) {
+                $disclogs++;
                 echo '<a href="' . $row->value->FOIDocumentsURL . '">'
                 . $row->value->FOIDocumentsURL . '</a>';
                 if ($ENV == "DEV")
@@ -35,10 +41,13 @@
             if (isset($row->value->FOIDocumentsURL)) {
                 if (file_exists("./scrapers/" . $row->id . '.py')) {
                     echo "<font color='green'>✔</font>";
+                    $green++;
                 } else if (file_exists("./scrapers/" . $row->id . '.txt')) {
                     echo "<font color='orange'><b>▬</b></font>";
+                    $orange++;
                 } else {
                     echo "<font color='red'>✘</font>";
+                    $red++;
                 }
             }
             echo "</td></tr>\n";
@@ -48,6 +57,8 @@
     setteErrorHandler($e);
 }
 echo "</table>";
+echo $agencies." agencies ".(($disclogs/$agencies)*100)."% with disclosure logs, ".(($green/$disclogs)*100)."% with scrapers ".(($red/$disclogs)*100)."% without scrapers ".(($orange/$disclogs)*100)."% WIP scrapers ";
+
 include_footer_documents();
 ?>
 

--- a/documents/genericScrapers.py
+++ b/documents/genericScrapers.py
@@ -84,12 +84,17 @@
                 doc.update({'description': descriptiontxt})
 		return
         def getTitle(self, content, entry, doc):
-                doc.update({'title': content.string})
+                doc.update({'title': (''.join(content.stripped_strings))})
 		return
 	def getTable(self, soup):
 		return soup.table
+	def getRows(self, table):
+		return table.find_all('tr')
 	def getDate(self, content, entry, doc):
-		edate = parse(''.join(content.stripped_strings).strip(), dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
+		date = ''.join(content.stripped_strings).strip()
+		date = str.replace("Octber","October",date)
+		print date
+		edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
 		print edate
 		doc.update({'date': edate})
 		return
@@ -110,10 +115,10 @@
 			# http://www.crummy.com/software/BeautifulSoup/documentation.html
 				soup = BeautifulSoup(content)
 				table = self.getTable(soup)
-				for row in table.find_all('tr'):
+				for row in self.getRows(table):
 					columns = row.find_all('td')
 					if len(columns) == self.getColumnCount():
-						(id, date, description, title, notes) = self.getColumns(columns)
+						(id, date, title, description, notes) = self.getColumns(columns)
 						print ''.join(id.stripped_strings)
 						if id.string == None:
 							hash = scrape.mkhash(self.remove_control_chars(url+(''.join(date.stripped_strings))))
@@ -123,13 +128,13 @@
 							
 						if doc == None:
 							print "saving " +hash
-							doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), 'docID': id.string}
+							doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), 'docID': (''.join(id.stripped_strings))}
 							self.getLinks(self.getURL(),row,doc)
                                 			self.getTitle(title,row, doc)
                                 			self.getDate(date,row, doc)
 							self.getDescription(description,row, doc)
 							if notes != None:
-                                        			doc.update({ 'notes': notes.string})
+                                        			doc.update({ 'notes': (''.join(notes.stripped_strings))})
 							foidocsdb.save(doc)
 						else:
 							print "already saved "+hash

--- a/documents/scrape.py
+++ b/documents/scrape.py
@@ -204,12 +204,12 @@
 				scrapeAndStore(docsdb, agency[key],0,key,agency['_id'])
 			if key == 'website' and False:
 				scrapeAndStore(docsdb, agency[key],0,key,agency['_id'])
+                                agency['metadata']['lastScraped'] = time.time()
 			if key.endswith('URL') and False:
 				print key 
 				depth = 1
 				if 'scrapeDepth' in agency.keys():
 					depth = agency['scrapeDepth']
 				scrapeAndStore(docsdb, agency[key],depth,key,agency['_id'])
-		agency['metadata']['lastScraped'] = time.time()
 		agencydb.save(agency)
 

--- /dev/null
+++ b/documents/scrapers/00a294de663db69062ca09aede7c0487.txt
@@ -1,1 +1,2 @@
+multipage
 

--- /dev/null
+++ b/documents/scrapers/0324e4b1654fd6dd651307abcef67094.py
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 6
+        def getColumns(self,columns):
+                (id, date, title, description, notes,link) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/1097fa8afdcf5db89d212d0979226667.py
+++ b/documents/scrapers/1097fa8afdcf5db89d212d0979226667.py
@@ -8,7 +8,7 @@
 class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)

--- a/documents/scrapers/157cbe654bdaa0a48e6650152ae34489.py
+++ b/documents/scrapers/157cbe654bdaa0a48e6650152ae34489.py
@@ -10,7 +10,7 @@
                 return 5
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
         def getTable(self,soup):
                 return soup.find_all('table')[4]
 

--- /dev/null
+++ b/documents/scrapers/1803322b27286950cab0c543168b5f21.txt
@@ -1,1 +1,2 @@
+multipage log
 

--- /dev/null
+++ b/documents/scrapers/38ca99d2790975a40dde3fae41dbdc3d.py
@@ -1,1 +1,32 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 3
+        def getColumns(self,columns):
+                (date, title, description) = columns
+                return (date, date, title, description, None)
+   	def getTitle(self, content, entry, doc):
+		i = 0
+		title = ""
+		for string in content.stripped_strings:
+    			if i < 2:
+				title = title + string
+			i = i+1
+                doc.update({'title': title})
+		print title
+                return
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/3b54190e3f409380e109fae29e1917aa.py
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 7
+        def getColumns(self,columns):
+                (id, date, title, description, link, deldate,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/3cd40b1240e987cbcd3f0e67054ce259.py
+++ b/documents/scrapers/3cd40b1240e987cbcd3f0e67054ce259.py
@@ -7,7 +7,7 @@
 class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
 	def getColumns(self,columns):
 		(id, date, description, title, notes) = columns
-		return (id, date, description, title, notes)
+		return (id, date, title, description, notes)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)

--- /dev/null
+++ b/documents/scrapers/3d5871a44abbbc81ef5b3a420070755d.py
@@ -1,1 +1,47 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(class_ = "inner-column").table       
+        def getRows(self,table):
+                return table.tbody.find_all('tr',recursive=False)
+        def getColumnCount(self):
+                return 3
+        def getColumns(self,columns):
+                (date, title, description) = columns
+                return (date, date, title, description, None)
+        def getDate(self, content, entry, doc):
+		i = 0
+		date = ""
+		for string in content.stripped_strings:
+    			if i ==1:
+				date = string
+			i = i+1
+                edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
+                print edate
+                doc.update({'date': edate})
+                return
+   	def getTitle(self, content, entry, doc):
+		i = 0
+		title = ""
+		for string in content.stripped_strings:
+    			if i < 2:
+				title = title + string
+			i = i+1
+                doc.update({'title': title})
+		#print title
+                return
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/4934000fddd6a5b1094f398798341290.py
@@ -1,1 +1,23 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    si = ScraperImplementation()
+    si.doScrape()
+

--- /dev/null
+++ b/documents/scrapers/53b14397c8f27c29ff07b6319f7a0ec5.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/53d2884f8afd026096a27bd5051ec50e.py
+++ b/documents/scrapers/53d2884f8afd026096a27bd5051ec50e.py
@@ -10,7 +10,7 @@
                 return soup.find(class_ = "ms-rtestate-field").table
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
 
         def getLinks(self, content, entry, doc):
 		link = None

--- /dev/null
+++ b/documents/scrapers/55b69726fde4b4898ecf6d7217d1d1d2.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (id, date, title, description) = columns
+                return (id, date, title, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/5716ce0aacfe98f7d638b7a66b7f1040.py
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (date, id, title, description) = columns
+                return (id, date, title, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/5d05365e981d87e746b596d63e35b1dc.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/627f116dfe42c9f27ad6747be0aa44e2.txt
@@ -1,1 +1,2 @@
+see parent dhs
 

--- /dev/null
+++ b/documents/scrapers/649b053f5e2884906ddc7174c2cd4b38.py
@@ -1,1 +1,28 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    si = ScraperImplementation()
+    si.doScrape()
+    si.disclogURL = "http://www.fahcsia.gov.au/disclosure-log-2011-12-financial-year"
+    si.doScrape()
+    si.disclogURL = "http://www.fahcsia.gov.au/disclosure-log-2010-11-financial-year"
+    si.doScrape()
+
+

--- /dev/null
+++ b/documents/scrapers/6cf3870aedeeecfd6394b5c0abed4c55.py
@@ -1,1 +1,23 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    si = ScraperImplementation()
+    si.doScrape()
+

--- /dev/null
+++ b/documents/scrapers/6fa04af95fbe7de96daa2c7560e0aad3.py
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "content_div_50269").table
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/6fa04af95fbe7de96daa2c7560e0aad3.txt
+++ /dev/null
@@ -1,19 +1,1 @@
-import sys,os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
-import genericScrapers
-import scrape
-from bs4 import BeautifulSoup
 
-#http://www.doughellmann.com/PyMOTW/abc/
-class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
-        def getTable(self,soup):
-                return soup.find(id = "content_div_50269").table
-        def getColumns(self,columns):
-                (id, date, title, description, notes) = columns
-                return (id, date, title, description, notes)
-
-if __name__ == '__main__':
-    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
-    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
-    ScraperImplementation().doScrape()
-

--- a/documents/scrapers/6fe3c812a99d486963133459b2768cf6.py
+++ b/documents/scrapers/6fe3c812a99d486963133459b2768cf6.py
@@ -8,7 +8,7 @@
 class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)

--- /dev/null
+++ b/documents/scrapers/77f02f713e3c37bff73882fb90828379.py
@@ -1,1 +1,22 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find("table",width="571")
+#findAll("table")[3]
+        def getColumnCount(self):
+                return 7
+        def getColumns(self,columns):
+                (id, date, title, description,link,deldate,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/795c33ed030293dbdb155c909ea47e37.py
+++ b/documents/scrapers/795c33ed030293dbdb155c909ea47e37.py
@@ -10,7 +10,7 @@
                 return 7
         def getColumns(self,columns):
                 (id, date, title, description, notes, deletedate, otherinfo) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
         #def getTable(self,soup):
         #        return soup.find(class_ = "box").table
 

--- a/documents/scrapers/8c9421f852c441910bf1d93a57b31d64.py
+++ b/documents/scrapers/8c9421f852c441910bf1d93a57b31d64.py
@@ -7,7 +7,7 @@
 class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)

--- a/documents/scrapers/8ef0e5802f99800f514b3a148e013b75.py
+++ b/documents/scrapers/8ef0e5802f99800f514b3a148e013b75.py
@@ -12,7 +12,7 @@
                 return soup.find(class_ = "content").table
         def getColumns(self,columns):
                 (id, date, title, description) = columns
-                return (id, date, description, title, None)
+                return (id, date, title, description, None)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)

--- /dev/null
+++ b/documents/scrapers/905a1c409b6afb1de0074b13a5559560.py
@@ -1,1 +1,23 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    si = ScraperImplementation()
+    si.doScrape()
+

--- /dev/null
+++ b/documents/scrapers/9f5cd66dea3e2ec958c17e28b27e60a7.txt
@@ -1,1 +1,2 @@
+acma style
 

--- /dev/null
+++ b/documents/scrapers/ad033512610d8e36886ab6a795f26561.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "_ctl0__ctl0_MainContentPlaceHolder_MainContentPlaceHolder_ContentSpan").findAll("table")[3]
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/b0a3281ba66efe173c5a33d5ef90ff76.txt
@@ -1,1 +1,2 @@
+multipage immi
 

--- /dev/null
+++ b/documents/scrapers/b506b87c8ee9e3a7ea8007914078c741.py
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 6
+        def getColumns(self,columns):
+                (id, date, title, description,link,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/c25f628f9f38d889485d7a4bff873b23.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(class_ = "ms-rtestate-field").table       
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (id, date, title, description) = columns
+                return (id, date, title, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/c25f628f9f38d889485d7a4bff873b23.txt
+++ /dev/null
@@ -1,20 +1,1 @@
-import sys,os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
-import genericScrapers
-import scrape
-from bs4 import BeautifulSoup
 
-#http://www.doughellmann.com/PyMOTW/abc/
-class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
-       
-        def getColumnCount(self):
-                return 4
-        def getColumns(self,columns):
-                (id, date, title, description) = columns
-                return (id, date, title, description, None)
-
-if __name__ == '__main__':
-    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
-    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
-    ScraperImplementation().doScrape()
-

--- a/documents/scrapers/c43ca6780764f4e61918e8836be74420.py
+++ b/documents/scrapers/c43ca6780764f4e61918e8836be74420.py
@@ -7,7 +7,7 @@
 class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
 	def getColumns(self,columns):
 		(id, date, title,description,notes) = columns
-		return (id, date, description, title, notes)
+		return (id, date, title, description, notes)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)

--- /dev/null
+++ b/documents/scrapers/cb7f40e3495b682de6eee61bf09c1cfc.txt
@@ -1,1 +1,2 @@
+no log
 

--- /dev/null
+++ b/documents/scrapers/dae7e934f1c341ccc9547a89a8af917e.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/dfd7414bb0c21a0076ab559901ae0588.py
+++ b/documents/scrapers/dfd7414bb0c21a0076ab559901ae0588.py
@@ -8,7 +8,7 @@
 class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
         def getTable(self,soup):
                 return soup.find(class_ = "content")
 

--- a/documents/scrapers/e2a845e55bc9986e6c75c5ad2c508b8d.py
+++ b/documents/scrapers/e2a845e55bc9986e6c75c5ad2c508b8d.py
@@ -7,7 +7,7 @@
 class ScraperImplementation(genericScrapers.GenericRSSDisclogScraper):
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericRSSDisclogScraper)

--- /dev/null
+++ b/documents/scrapers/f0caafbcf292c90e7b8ad18ddcf9afc3.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "genericContent").table.tbody
+        def getColumnCount(self):
+                return 3
+        def getColumns(self,columns):
+                (id,  date,title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/f0caafbcf292c90e7b8ad18ddcf9afc3.txt
+++ /dev/null
@@ -1,21 +1,1 @@
-import sys,os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
-import genericScrapers
-import scrape
-from bs4 import BeautifulSoup
 
-#http://www.doughellmann.com/PyMOTW/abc/
-class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
-        def getTable(self,soup):
-                return soup.find(id = "genericContent").table.tbody
-        def getColumnCount(self):
-                return 5
-        def getColumns(self,columns):
-                (id,  date,title, description, notes) = columns
-                return (id, date, title, description, notes)
-
-if __name__ == '__main__':
-    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
-    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
-    ScraperImplementation().doScrape()
-

--- a/documents/scrapers/f2ab2908d8ee56ed8d995ef4187e75e6.py
+++ b/documents/scrapers/f2ab2908d8ee56ed8d995ef4187e75e6.py
@@ -8,7 +8,7 @@
 class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
         def getTable(self,soup):
                 return soup.find(id = "content").table
 

--- a/documents/scrapers/rtk.py
+++ b/documents/scrapers/rtk.py
@@ -7,7 +7,7 @@
 class ScraperImplementation(genericScrapers.GenericRSSDisclogScraper):
         def getColumns(self,columns):
                 (id, date, title, description, notes) = columns
-                return (id, date, description, title, notes)
+                return (id, date, title, description, notes)
 
 if __name__ == '__main__':
     print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericRSSDisclogScraper)

--- a/documents/viewDocument.php
+++ b/documents/viewDocument.php
@@ -3,7 +3,13 @@
 include_once('../include/common.inc.php');
 $hash = $_REQUEST['hash'];
 $docsdb = $server->get_db('disclosr-documents');
+try {
 $doc = object_to_array($docsdb->get($hash));
+
+} catch (SetteeRestClientException $e) {
+    setteErrorHandler($e);
+}
+
 
 if (!isset($doc['_attachments']) || count($doc['_attachments']) == 0) die ("no attachments");
 $attachments = $doc['_attachments'];
@@ -13,3 +19,4 @@
 //echo $url;
 $request = Requests::get($url);
 echo ($request->body);
+

--- a/include/couchdb.inc.php
+++ b/include/couchdb.inc.php
@@ -10,6 +10,7 @@
 if (php_uname('n') == "KYUUBEY") {
 
     $serverAddr = 'http://192.168.1.148:5984/';
+    $serverAddr = 'http://127.0.0.1:5984/';
 } else {
     $serverAddr = 'http://127.0.0.1:5984/';
 }