add ACMA scraper
[disclosr.git] / documents / scrapers / 7c6adc1d41cf029bf1a0959e5156477a.py
blob:a/documents/scrapers/7c6adc1d41cf029bf1a0959e5156477a.py -> blob:b/documents/scrapers/7c6adc1d41cf029bf1a0959e5156477a.py
--- a/documents/scrapers/7c6adc1d41cf029bf1a0959e5156477a.py
+++ b/documents/scrapers/7c6adc1d41cf029bf1a0959e5156477a.py
@@ -16,16 +16,20 @@
         foidocsdb = scrape.couch['disclosr-foidocuments']
         (url, mime_type, content) = scrape.fetchURL(scrape.docsdb,
              self.getURL(), "foidocuments", self.getAgencyID())
-        
-        d = pq(content.read())
+
+        d = pq(content)
         d.make_links_absolute(base_url = self.getURL())
         for table in d('table').items():
             title= table('thead').text()
-            print title
+            print self.remove_control_chars(title)
             (idate,descA,descB,link,deldate,notes) = table('tbody tr').map(lambda i, e: pq(e).children().eq(1).text())
             links = table('a').map(lambda i, e: pq(e).attr('href'))
             description = descA+" "+descB
-            edate = parse(idate[:12], dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
+	    try:
+	            edate = parse(idate[:12], dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
+	    except ValueError:
+		    edate = date.today().strftime("%Y-%m-%d")
+		    pass
             print edate
             dochash = scrape.mkhash(self.remove_control_chars(title))
             doc = foidocsdb.get(dochash)
@@ -35,7 +39,7 @@
                 doc = {'_id': dochash, 'agencyID': self.getAgencyID()
                 , 'url': self.getURL(), 'docID': dochash,
                 "links": links,
-                "date": edate, "notes": notes, "title": "Disclosure Log Updated", "description": description}
+                "date": edate, "notes": notes, "title": title, "description": description}
                 #print doc
                 foidocsdb.save(doc)
             else: