more scrapers
[disclosr.git] / documents / scrapers / 8e874a2fde8aa0ccdc6d14573d766540.py
blob:a/documents/scrapers/8e874a2fde8aa0ccdc6d14573d766540.py -> blob:b/documents/scrapers/8e874a2fde8aa0ccdc6d14573d766540.py
--- a/documents/scrapers/8e874a2fde8aa0ccdc6d14573d766540.py
+++ b/documents/scrapers/8e874a2fde8aa0ccdc6d14573d766540.py
@@ -3,7 +3,7 @@
 import genericScrapers
 import scrape
 from bs4 import BeautifulSoup
-import codecs 
+import codecs
 #http://www.doughellmann.com/PyMOTW/abc/
 class NewScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
         def getDescription(self,content, entry,doc):
@@ -20,7 +20,7 @@
                                                 soup = BeautifulSoup(htcontent)
                                                 for text in soup.find(id="divFullWidthColumn").stripped_strings:
                                                     description = description + text.encode('ascii', 'ignore')
-                                                
+
                                                 for atag in soup.find(id="divFullWidthColumn").find_all("a"):
                                                       	if atag.has_key('href'):
                                                               	links.append(scrape.fullurl(link,atag['href']))
@@ -76,11 +76,10 @@
 if __name__ == '__main__':
     print 'Subclass:', issubclass(NewScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
     print 'Instance:', isinstance(NewScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
-    #NewScraperImplementation().doScrape()
+    NewScraperImplementation().doScrape()
     print 'Subclass:', issubclass(OldScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
     print 'Instance:', isinstance(OldScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
     osi = OldScraperImplementation()
     osi.disclogURL = "http://archive.treasury.gov.au/content/foi_publications.asp?year=-1&abstract=0&classification=&=&titl=Disclosure+Log+-+Documents+Released+Under+FOI"
     osi.doScrape()
-# old site too