pdf scrapers
[disclosr.git] / documents / scrape.py
blob:a/documents/scrape.py -> blob:b/documents/scrape.py
--- a/documents/scrape.py
+++ b/documents/scrape.py
@@ -76,6 +76,16 @@
         addinfourl = urllib2.addinfourl(fp, headers, req.get_full_url())
         addinfourl.code = code
         return addinfourl
+
+def getLastAttachment(docsdb,url):
+    hash = mkhash(url)
+    doc = docsdb.get(hash)
+    if doc != None:
+        last_attachment_fname = doc["_attachments"].keys()[-1]
+        last_attachment = docsdb.get_attachment(doc,last_attachment_fname)
+        return last_attachment
+    else:
+        return None
 
 def fetchURL(docsdb, url, fieldName, agencyID, scrape_again=True):
     url = canonurl(url)