more scrapers
[disclosr.git] / documents / genericScrapers.py
blob:a/documents/genericScrapers.py -> blob:b/documents/genericScrapers.py
--- a/documents/genericScrapers.py
+++ b/documents/genericScrapers.py
@@ -57,7 +57,7 @@
         foidocsdb = scrape.couch['disclosr-foidocuments']
         (url, mime_type, rcontent) = scrape.fetchURL(scrape.docsdb,
              self.getURL(), "foidocuments", self.getAgencyID())
-        content = rcontent.read()
+        content = rcontent
         dochash = scrape.mkhash(content)
         doc = foidocsdb.get(dochash)
         if doc is None:
@@ -66,9 +66,9 @@
             last_attach = scrape.getLastAttachment(scrape.docsdb, self.getURL())
             if last_attach != None:
                 html_diff = difflib.HtmlDiff()
-                description = description + "\nChanges: "
-                description = description + html_diff.make_table(last_attach.read().split('\n'),
-                           content.split('\n'))
+                #description = description + "\nChanges: "
+                #description = description + html_diff.make_table(last_attach.read().split('\n'),
+                #           content.split('\n'))
             edate = date.today().strftime("%Y-%m-%d")
             doc = {'_id': dochash, 'agencyID': self.getAgencyID()
             , 'url': self.getURL(), 'docID': dochash,
@@ -89,7 +89,7 @@
         device = TextConverter(rsrcmgr, outfp, codec='utf-8',
              laparams=laparams)
         fp = StringIO()
-        fp.write(content.read())
+        fp.write(content)
 
         process_pdf(rsrcmgr, device, fp, set(), caching=True,
              check_extractable=True)