--- a/documents/scrapers/227cb6eb7d2c9f8a6e846df7447d6caa.py +++ b/documents/scrapers/227cb6eb7d2c9f8a6e846df7447d6caa.py @@ -11,7 +11,7 @@ links = [] description = "" for atag in entry.find_all('a'): - if atag.has_key('href'): + if atag.has_attr('href'): link = scrape.fullurl(self.getURL(),atag['href']) (url,mime_type,htcontent) = scrape.fetchURL(scrape.docsdb, link, "foidocuments", self.getAgencyID(), False) if htcontent != None: @@ -26,20 +26,23 @@ for text in row.stripped_strings: description = description + text + "\n" for atag in row.find_all("a"): - if atag.has_key('href'): + if atag.has_attr('href'): links.append(scrape.fullurl(link,atag['href'])) if links != []: doc.update({'links': links}) if description != "": doc.update({ 'description': description}) - + def getRows(self, table): + return table.find_all(class_ = "dl-row"); + def findColumns(self, table): + return table.find_all('div'); def getColumnCount(self): return 2 def getTable(self,soup): - return soup.find(class_ = "ms-rteTable-default") + return soup.find(class_ = "foi-dl-list") def getColumns(self,columns): - (date, title) = columns + (title,date) = columns return (title, date, title, title, None) if __name__ == '__main__':