From: Maxious Date: Sun, 25 Nov 2012 22:43:31 +0000 Subject: add ausaid again X-Git-Url: https://maxious.lambdacomplex.org/git/?p=disclosr.git&a=commitdiff&h=7816ac6af5ed23e3f105e802868b58bad4e895e4 --- add ausaid again Former-commit-id: ccbe1ad50acae0133a4d29c3431230936af84f95 --- --- /dev/null +++ b/documents/scrapers/ac6cf1ebaa79694e2cc3705bc83c89b3.py @@ -1,1 +1,31 @@ +import sys,os +sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) +import genericScrapers +import scrape +from bs4 import BeautifulSoup +from datetime import datetime +from datetime import date +#http://www.doughellmann.com/PyMOTW/abc/ +class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): + def getColumnCount(self): + return 3 + def getTable(self,soup): + return soup.find(class_ = "internal") + def getColumns(self,columns): + (id, date, description) = columns + return (id, date, description, description, None) + def getTitle(self, content, entry, doc): + doc.update({'title': content.stripped_strings.next()}) + return + def getDate(self, content, entry, doc): + edate = datetime.strptime(content.string.strip(), "%d/%m/%Y").strftime("%Y-%m-%d") + print edate + doc.update({'date': edate}) + return + +if __name__ == '__main__': + print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) + print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) + ScraperImplementation().doScrape() +