--- a/documents/scrape.py +++ b/documents/scrape.py @@ -104,10 +104,10 @@ last_attachment_fname = doc["_attachments"].keys()[-1] last_attachment = docsdb.get_attachment(doc,last_attachment_fname) content = last_attachment - return (doc['url'],doc['mime_type'],content) + return (doc['url'],doc['mime_type'],content.read()) if scrape_again == False: print "Not scraping this URL again as requested" - return (None,None,None) + return (doc['url'],doc['mime_type'],content.read()) req.add_header("User-Agent", "Mozilla/4.0 (compatible; Prometheus webspider; owner maxious@lambdacomplex.org)") #if there is a previous version stored in couchdb, load caching helper tags @@ -141,7 +141,7 @@ last_attachment_fname = doc["_attachments"].keys()[-1] last_attachment = docsdb.get_attachment(doc,last_attachment_fname) content = last_attachment - return (doc['url'],doc['mime_type'],content) + return (doc['url'],doc['mime_type'],content.read()) else: print "new webpage loaded" content = url_handle.read()