more scrapers
Former-commit-id: 680ff767fd0ca810aa4b2cbe789740758373acf2
--- /dev/null
+++ b/documents/scrapers/0603dfcc930a791efaa64f31ae5fceda.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+ def getTable(self,soup):
+ return soup.find(id="body-content").table
+ def getColumnCount(self):
+ return 5
+ def getColumns(self,columns):
+ (id, date, title, description, notes) = columns
+ return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- /dev/null
+++ b/documents/scrapers/1fda9544d2a3fa4cd92aec4b206a6763.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+ def getTable(self,soup):
+ return soup.find(_class = "article-content").table
+ def getColumnCount(self):
+ return 5
+ def getColumns(self,columns):
+ (id, title, date, description, notes) = columns
+ return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- /dev/null
+++ b/documents/scrapers/3d426eb8c85c8f04b814eee597efd866.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+ def getTable(self,soup):
+ return soup.find(id = "primary").table
+ def getColumnCount(self):
+ return 5
+ def getColumns(self,columns):
+ (id, date, title, description, notes) = columns
+ return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- /dev/null
+++ b/documents/scrapers/50601505ef69483121a6d130bb0515e4.txt
@@ -1,1 +1,1 @@
-
+apsc has ACMA style disclog
--- /dev/null
+++ b/documents/scrapers/6fa04af95fbe7de96daa2c7560e0aad3.txt
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+ def getTable(self,soup):
+ return soup.find(id = "content_div_50269").table
+ def getColumns(self,columns):
+ (id, date, title, description, notes) = columns
+ return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- /dev/null
+++ b/documents/scrapers/bf6e587f166040b63681cd2ff76fbfdf.txt
@@ -1,1 +1,1 @@
-
+no disclog yet
--- /dev/null
+++ b/documents/scrapers/c1302c8d7cbbd911f0d4d8a4128f8079.txt
@@ -1,1 +1,1 @@
-
+uses RET disclog
--- /dev/null
+++ b/documents/scrapers/c25f628f9f38d889485d7a4bff873b23.txt
@@ -1,1 +1,20 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+
+ def getColumnCount(self):
+ return 4
+ def getColumns(self,columns):
+ (id, date, title, description) = columns
+ return (id, date, title, description, None)
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- /dev/null
+++ b/documents/scrapers/f0caafbcf292c90e7b8ad18ddcf9afc3.txt
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+ def getTable(self,soup):
+ return soup.find(id = "genericContent").table.tbody
+ def getColumnCount(self):
+ return 5
+ def getColumns(self,columns):
+ (id, date,title, description, notes) = columns
+ return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+ ScraperImplementation().doScrape()
+