Merge branch 'master' of ssh://apples.lambdacomplex.org/git/disclosr
[disclosr.git] / documents / scrapers / 5716ce0aacfe98f7d638b7a66b7f1040.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import sys,os
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
import genericScrapers
import scrape
from bs4 import BeautifulSoup
 
#http://www.doughellmann.com/PyMOTW/abc/
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
        def getDocHash(self, id,date, url):
                ''' url changes on ever request so ignore for hash '''
                return scrape.mkhash(
                                self.remove_control_chars(
                                    ''.join(id.stripped_strings)))
        def getColumnCount(self):
                return 4
        def getColumns(self,columns):
                (date, id, title, description) = columns
                return (id, date, title, description, None)
 
if __name__ == '__main__':
    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
    ScraperImplementation().doScrape()