1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | import sys,os sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) import genericScrapers import dateutil from dateutil.parser import * from datetime import * class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): def __init__(self): super(ScraperImplementation, self).__init__() def getColumnCount(self): return 6 def getColumns(self, columns): (id, date, title, description, datepub, notes) = columns return (id, date, title, description, notes) if __name__ == '__main__': print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) nsi = ScraperImplementation() nsi.disclogURL = "http://www.dpmc.gov.au/foi/ips/disclosure_logs/pmo/2011-12.cfm" nsi.doScrape() nsi.disclogURL = "http://www.dpmc.gov.au/foi/ips/disclosure_logs/dpmc/2011-12.cfm" nsi.doScrape() nsi.disclogURL = "http://www.dpmc.gov.au/foi/ips/disclosure_logs/dpmc/2012-13.cfm" nsi.doScrape() nsi.disclogURL = "http://www.dpmc.gov.au/foi/ips/disclosure_logs/omsi/2011-12.cfm" nsi.doScrape() nsi.disclogURL = "http://www.dpmc.gov.au/foi/ips/disclosure_logs/omps/2012-13.cfm" nsi.doScrape() |