1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | import sys,os sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) import genericScrapers import scrape from bs4 import BeautifulSoup import dateutil from dateutil.parser import * from datetime import * #http://www.doughellmann.com/PyMOTW/abc/ class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): def getColumnCount(self): return 3 def getColumns(self,columns): (date, title, description) = columns return (date, date, title, description, None) def getTitle(self, content, entry, doc): i = 0 title = "" for string in content.stripped_strings: if i < 2: title = title + string i = i+1 title = self.remove_control_chars(title) doc.update({'title': title}) print title return if __name__ == '__main__': print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) ScraperImplementation().doScrape() |