[disclosr.git] / documents / genericScrapers.py
--- a/documents/genericScrapers.py
+++ b/documents/genericScrapers.py
@@ -198,6 +198,19 @@
 
     def getRows(self, table):
         return table.find_all('tr')
+    def findColumns(self, row):
+        return row.find_all('td')
+
+    def getDocHash(self, id,date, url):
+                        if id.string is None:
+			    print "no id, using date as hash"
+                            return scrape.mkhash(
+                                self.remove_control_chars(
+                                    url + (''.join(date.stripped_strings))))
+                        else:
+                            return scrape.mkhash(
+                                self.remove_control_chars(
+                                    url + (''.join(id.stripped_strings))))
 
     def getDate(self, content, entry, doc):
         strdate = ''.join(content.stripped_strings).strip()
@@ -234,20 +247,13 @@
                 soup = BeautifulSoup(content)
                 table = self.getTable(soup)
                 for row in self.getRows(table):
-                    columns = row.find_all('td')
+                    columns = self.findColumns(row)
                     if len(columns) is self.getColumnCount():
                         (id, date, title,
                          description, notes) = self.getColumns(columns)
                         print self.remove_control_chars(
                             ''.join(id.stripped_strings))
-                        if id.string is None:
-                            dochash = scrape.mkhash(
-                                self.remove_control_chars(
-                                    url + (''.join(date.stripped_strings))))
-                        else:
-                            dochash = scrape.mkhash(
-                                self.remove_control_chars(
-                                    url + (''.join(id.stripped_strings))))
+                        dochash = self.getDocHash(id,date,url)
                         doc = foidocsdb.get(dochash)
 
                         if doc is None: