--- a/documents/scrapers/8e874a2fde8aa0ccdc6d14573d766540.py +++ b/documents/scrapers/8e874a2fde8aa0ccdc6d14573d766540.py @@ -11,18 +11,18 @@ links = [] description = "" for atag in entry.find_all('a'): - if atag.has_key('href'): + if atag.has_attr('href'): link = scrape.fullurl(self.getURL(),atag['href']) (url,mime_type,htcontent) = scrape.fetchURL(scrape.docsdb, link, "foidocuments", self.getAgencyID(), False) if htcontent != None: if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml": # http://www.crummy.com/software/BeautifulSoup/documentation.html soup = BeautifulSoup(htcontent) - for text in soup.find(id="divFullWidthColumn").stripped_strings: + for text in soup.find(class_ = "mainContent").stripped_strings: description = description + text.encode('ascii', 'ignore') - for atag in soup.find(id="divFullWidthColumn").find_all("a"): - if atag.has_key('href'): + for atag in soup.find(id="SortingTable").find_all("a"): + if atag.has_attr('href'): links.append(scrape.fullurl(link,atag['href'])) if links != []: @@ -43,7 +43,7 @@ links = [] description = "" for atag in entry.find_all('a'): - if atag.has_key('href'): + if atag.has_attr('href'): link = scrape.fullurl(self.getURL(),atag['href']) (url,mime_type,htcontent) = scrape.fetchURL(scrape.docsdb, link, "foidocuments", self.getAgencyID(), False) if htcontent != None: @@ -53,7 +53,7 @@ for text in soup.find(id="content-item").stripped_strings: description = description + text + " \n" for atag in soup.find(id="content-item").find_all("a"): - if atag.has_key('href'): + if atag.has_attr('href'): links.append(scrape.fullurl(link,atag['href'])) if links != []: doc.update({'links': links})