derive agencyID and disclog url from filename
[disclosr.git] / documents / scrapers / be9996f0ac58f71f23d074e82d44ead3.py
blob:a/documents/scrapers/be9996f0ac58f71f23d074e82d44ead3.py -> blob:b/documents/scrapers/be9996f0ac58f71f23d074e82d44ead3.py
--- a/documents/scrapers/be9996f0ac58f71f23d074e82d44ead3.py
+++ b/documents/scrapers/be9996f0ac58f71f23d074e82d44ead3.py
@@ -6,9 +6,9 @@
 from bs4 import BeautifulSoup
 #http://www.doughellmann.com/PyMOTW/abc/
 class ScraperImplementation(genericScrapers.GenericRSSDisclogScraper):
-	def getDescription(self,entry,doc):
-		(url,mime_type,content) = scrape.fetchURL(scrape.docsdb, entry.link, "foidocuments", self.getAgencyID(), False)
-                if content != None:
+	def getDescription(self,content, entry,doc):
+		(url,mime_type,htcontent) = scrape.fetchURL(scrape.docsdb, entry.link, "foidocuments", self.getAgencyID(), False)
+                if htcontent != None:
                         if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml":
                         # http://www.crummy.com/software/BeautifulSoup/documentation.html
                                	soup = BeautifulSoup(content)
@@ -17,7 +17,7 @@
 				dldivs = soup.find('div',class_="download")
 				if dldivs != None:
                              		for atag in dldivs.find_all("a"):
-                                		if atag.has_key('href'):
+                                		if atag.has_attr('href'):
                                         		links.append(scrape.fullurl(url,atag['href']))
 				nodldivs = soup.find('div',class_="incompleteNotification")
 				if nodldivs != None and nodldivs.stripped_strings != None: