--- a/documents/genericScrapers.py +++ b/documents/genericScrapers.py @@ -57,7 +57,7 @@ foidocsdb = scrape.couch['disclosr-foidocuments'] (url, mime_type, rcontent) = scrape.fetchURL(scrape.docsdb, self.getURL(), "foidocuments", self.getAgencyID()) - content = rcontent.read() + content = rcontent dochash = scrape.mkhash(content) doc = foidocsdb.get(dochash) if doc is None: @@ -66,9 +66,9 @@ last_attach = scrape.getLastAttachment(scrape.docsdb, self.getURL()) if last_attach != None: html_diff = difflib.HtmlDiff() - description = description + "\nChanges: " - description = description + html_diff.make_table(last_attach.read().split('\n'), - content.split('\n')) + #description = description + "\nChanges: " + #description = description + html_diff.make_table(last_attach.read().split('\n'), + # content.split('\n')) edate = date.today().strftime("%Y-%m-%d") doc = {'_id': dochash, 'agencyID': self.getAgencyID() , 'url': self.getURL(), 'docID': dochash, @@ -89,7 +89,7 @@ device = TextConverter(rsrcmgr, outfp, codec='utf-8', laparams=laparams) fp = StringIO() - fp.write(content.read()) + fp.write(content) process_pdf(rsrcmgr, device, fp, set(), caching=True, check_extractable=True)