Merge branch 'master' of ssh://apples.lambdacomplex.org/git/disclosr
[disclosr.git] / documents / scrape.py
blob:a/documents/scrape.py -> blob:b/documents/scrape.py
--- a/documents/scrape.py
+++ b/documents/scrape.py
@@ -112,7 +112,7 @@
     else:
         if (('page_scraped' in doc) and ((time.time() - doc['page_scraped']) < 60 * 24 * 14) or (scrape_again == False)):
             print "Uh oh, trying to scrape URL again too soon!" + hash
-	    if "_attachments" in doc.keys():
+	    if (not doc.has_key('file_size') or doc["file_size"] != "0") and "_attachments" in doc.keys():
 	            last_attachment_fname = doc["_attachments"].keys()[-1]
 	            last_attachment = docsdb.get_attachment(doc, last_attachment_fname)
         	    content = last_attachment.read()