|
import sys,os |
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) |
|
import genericScrapers |
|
import scrape |
|
from bs4 import BeautifulSoup |
|
import dateutil |
|
from dateutil.parser import * |
|
from datetime import * |
|
|
|
#http://www.doughellmann.com/PyMOTW/abc/ |
|
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): |
|
def getColumnCount(self): |
|
return 5 |
|
def getColumns(self,columns): |
|
(id, date, title, description, notes) = columns |
|
return (id, date, title, description, notes) |
|
|
|
if __name__ == '__main__': |
|
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) |
|
print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) |
|
si = ScraperImplementation() |
|
si.doScrape() |
|
si.disclogURL = "http://www.fahcsia.gov.au/disclosure-log-2011-12-financial-year" |
|
si.doScrape() |
|
si.disclogURL = "http://www.fahcsia.gov.au/disclosure-log-2010-11-financial-year" |
|
si.doScrape() |
|
|
|
|