import sys,os | |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | |
import genericScrapers | |
import scrape | |
from bs4 import BeautifulSoup | |
#http://www.doughellmann.com/PyMOTW/abc/ | |
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): | |
def getColumnCount(self): | |
return 5 | |
def getColumns(self,columns): | |
(id, date, title, description, notes) = columns | |
return (id, date, description, title, notes) | |
def getTable(self,soup): | |
return soup.find_all('table')[4] | |
if __name__ == '__main__': | |
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) | |
print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) | |
ScraperImplementation().doScrape() | |
import sys,os | |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | |
import genericScrapers | |
import scrape | |
from bs4 import BeautifulSoup | |
#http://www.doughellmann.com/PyMOTW/abc/ | |
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): | |
def getColumnCount(self): | |
return 3 | |
def getColumns(self,columns): | |
(id, date, title) = columns | |
return (id, date, title,title,title) | |
if __name__ == '__main__': | |
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) | |
print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) | |
ScraperImplementation().doScrape() | |
import sys,os | |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | |
import genericScrapers | |
import scrape | |
from bs4 import BeautifulSoup | |
#http://www.doughellmann.com/PyMOTW/abc/ | |
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): | |
def getColumnCount(self): | |
return 7 | |
def getColumns(self,columns): | |
(id, date, title, description, notes, deletedate, otherinfo) = columns | |
return (id, date, description, title, notes) | |
#def getTable(self,soup): | |
# return soup.find(class_ = "box").table | |
if __name__ == '__main__': | |
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) | |
print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) | |
ScraperImplementation().doScrape() | |
import sys,os | |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | |
import genericScrapers | |
import scrape | |
from bs4 import BeautifulSoup | |
from datetime import datetime | |
from datetime import date | |
#http://www.doughellmann.com/PyMOTW/abc/ | |
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): | |
def getColumnCount(self): | |
return 3 | |
def getTable(self,soup): | |
return soup.find(class_ = "internal") | |
def getColumns(self,columns): | |
(id, date, description) = columns | |
return (id, date, description, description, None) | |
def getTitle(self, content, entry, doc): | |
doc.update({'title': content.stripped_strings.next()}) | |
return | |
def getDate(self, content, entry, doc): | |
edate = datetime.strptime(content.string.strip(), "%d/%m/%Y").strftime("%Y-%m-%d") | |
print edate | |
doc.update({'date': edate}) | |
return | |
if __name__ == '__main__': | |
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) | |
print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) | |
ScraperImplementation().doScrape() | |