add innovation scrAper
Former-commit-id: 80558a9217d1bcad0766200d0e1d42aa022ff501
--- /dev/null
+++ b/documents/scrapers/41a166419503bb50e410c58be54c102f.py
@@ -1,1 +1,27 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+from datetime import date
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+ def getTable(self,soup):
+ return soup.find(id= "ctl00_MSO_ContentDiv").table
+
+ def getColumns(self,columns):
+ (id, title, description, notes) = columns
+ return (id, title, title, description, notes)
+ def getDate(self, content, entry, doc):
+ edate = date.today().strftime("%Y-%m-%d")
+ doc.update({'date': edate})
+ return
+ def getColumnCount(self):
+ return 4
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/41a166419503bb50e410c58be54c102f.txt
+++ /dev/null
@@ -1,1 +1,1 @@
-aspx
+
--- a/documents/scrapers/d72744fb1e5d6e87af9a5ea16cc27fa5.py
+++ b/documents/scrapers/d72744fb1e5d6e87af9a5ea16cc27fa5.py
@@ -30,7 +30,6 @@
doc = foidocsdb.get(dochash)
if doc is None:
print "saving " + dochash
- edate = date.today().strftime("%Y-%m-%d")
doc = {'_id': dochash, 'agencyID': self.getAgencyID()
, 'url': self.getURL(), 'docID': dochash,
"links": links,
--- a/documents/scrapers/d72744fb1e5d6e87af9a5ea16cc27fa5.txt
+++ /dev/null
@@ -1,1 +1,1 @@
-acma style
+