cache org ids while importing datagov
[disclosr.git] / documents / scrapers / 41a166419503bb50e410c58be54c102f.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import sys,os
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
import genericScrapers
import scrape
from bs4 import BeautifulSoup
from datetime import date
 
#http://www.doughellmann.com/PyMOTW/abc/
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
    def getTable(self,soup):
        return soup.find(id= "ctl00_MSO_ContentDiv").table
 
    def getColumns(self,columns):
        (id, title, description, notes) = columns
        return (id, title, title, description, notes)
    def getDate(self, content, entry, doc):
        edate = date.today().strftime("%Y-%m-%d")
        doc.update({'date': edate})
        return
    def getColumnCount(self):
        return 4
 
if __name__ == '__main__':
    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
    ScraperImplementation().doScrape()