derive agencyID and disclog url from filename
[disclosr.git] / documents / genericScrapers.py
blob:a/documents/genericScrapers.py -> blob:b/documents/genericScrapers.py
--- a/documents/genericScrapers.py
+++ b/documents/genericScrapers.py
@@ -8,18 +8,31 @@
 import feedparser
 import abc
 
-class GenericRSSDisclogScraper(object):
+class GenericDisclogScraper(object):
         __metaclass__ = abc.ABCMeta
-        @abc.abstractmethod
+	agencyID = None
+	disclogURL = None
         def getAgencyID(self):
                 """ disclosr agency id """
-                return
+		if self.agencyID == None:
+			self.agencyID = os.path.basename(sys.argv[0]).replace(".py","")
+                return self.agencyID
 
-        @abc.abstractmethod
         def getURL(self):
                 """ disclog URL"""
-                return
+		if self.disclogURL == None:
+			agency = scrape.agencydb.get(self.getAgencyID())
+			self.disclogURL = agency['FOIDocumentsURL']
+                return self.disclogURL
 
+	@abc.abstractmethod
+	def doScrape(self):
+		""" do the scraping """
+		return
+
+
+
+class GenericRSSDisclogScraper(GenericDisclogScraper):
         def getDescription(self, entry, doc):
                 """ get description from rss entry"""
                 doc['description'] = entry.summary
@@ -46,18 +59,8 @@
                         else:
                         	print "already saved"			
 
-class GenericOAICDisclogScraper(object):
-	__metaclass__ = abc.ABCMeta
-	@abc.abstractmethod
-	def getAgencyID(self):
-		""" disclosr agency id """
-		return
-
-	@abc.abstractmethod
-	def getURL(self):
-		""" disclog URL"""
-		return
-
+class GenericOAICDisclogScraper(GenericDisclogScraper):
+        __metaclass__ = abc.ABCMeta
 	@abc.abstractmethod
 	def getColumns(self,columns):
 		""" rearranges columns if required """