Merge branch 'master' of ssh://maxious.lambdacomplex.org/git/disclosr
Merge branch 'master' of ssh://maxious.lambdacomplex.org/git/disclosr


Former-commit-id: 55934ce0d2e316959277c6b26c613794f09d5ee2

--- a/documents/genericScrapers.py
+++ b/documents/genericScrapers.py
@@ -57,7 +57,7 @@
         foidocsdb = scrape.couch['disclosr-foidocuments']
         (url, mime_type, rcontent) = scrape.fetchURL(scrape.docsdb,
              self.getURL(), "foidocuments", self.getAgencyID())
-        content = rcontent.read()
+        content = rcontent
         dochash = scrape.mkhash(content)
         doc = foidocsdb.get(dochash)
         if doc is None:
@@ -89,7 +89,7 @@
         device = TextConverter(rsrcmgr, outfp, codec='utf-8',
              laparams=laparams)
         fp = StringIO()
-        fp.write(content.read())
+        fp.write(content)
 
         process_pdf(rsrcmgr, device, fp, set(), caching=True,
              check_extractable=True)

--- a/documents/runScrapers.sh
+++ b/documents/runScrapers.sh
@@ -1,3 +1,10 @@
-for f in scrapers/*.py; do echo "Processing $f file.."; python $f; done
+for f in scrapers/*.py; 
+	do echo "Processing $f file.."; 
+	python $f; 
+	if [ "$?" -ne "0" ]; then
+		echo "error";
+		sleep 2; 
+	fi
+done
 
 

--- a/documents/scrape.py
+++ b/documents/scrape.py
@@ -104,10 +104,10 @@
             last_attachment_fname = doc["_attachments"].keys()[-1]
             last_attachment = docsdb.get_attachment(doc,last_attachment_fname)
             content = last_attachment
-            return (doc['url'],doc['mime_type'],content)
+            return (doc['url'],doc['mime_type'],content.read())
         if scrape_again == False:
             print "Not scraping this URL again as requested"
-            return (None,None,None)
+            return (doc['url'],doc['mime_type'],content.read())
 
     req.add_header("User-Agent", "Mozilla/4.0 (compatible; Prometheus webspider; owner maxious@lambdacomplex.org)")
     #if there is a previous version stored in couchdb, load caching helper tags
@@ -141,7 +141,7 @@
                 last_attachment_fname = doc["_attachments"].keys()[-1]
                 last_attachment = docsdb.get_attachment(doc,last_attachment_fname)
                 content = last_attachment
-                return (doc['url'],doc['mime_type'],content)
+                return (doc['url'],doc['mime_type'],content.read())
             else:
                 print "new webpage loaded"
                 content = url_handle.read()

--- a/documents/scrapers/7c6adc1d41cf029bf1a0959e5156477a.py
+++ b/documents/scrapers/7c6adc1d41cf029bf1a0959e5156477a.py
@@ -17,7 +17,7 @@
         (url, mime_type, content) = scrape.fetchURL(scrape.docsdb,
              self.getURL(), "foidocuments", self.getAgencyID())
 
-        d = pq(content.read())
+        d = pq(content)
         d.make_links_absolute(base_url = self.getURL())
         for table in d('table').items():
             title= table('thead').text()

--- a/documents/scrapers/d72744fb1e5d6e87af9a5ea16cc27fa5.py
+++ b/documents/scrapers/d72744fb1e5d6e87af9a5ea16cc27fa5.py
@@ -17,7 +17,7 @@
         (url, mime_type, content) = scrape.fetchURL(scrape.docsdb,
              self.getURL(), "foidocuments", self.getAgencyID())
 
-        d = pq(content.read())
+        d = pq(content)
         d.make_links_absolute(base_url = self.getURL())
         for item in d('.item-list').items():
             title= item('h3').text()

--- a/documents/template.inc.php
+++ b/documents/template.inc.php
@@ -61,7 +61,9 @@
                         <a class="brand" href="#">Australian Disclosure Logs</a>
                         <div class="nav-collapse collapse">
                             <p class="navbar-text pull-right">
-                                Check out our subsites on: 
+                                <small>
+                                Subsites on: 
+       </small> 
                                 <a href="http://orgs.disclosurelo.gs">Government Agencies</a>
                                 • <a href="http://lobbyists.disclosurelo.gs">Political Lobbyists</a>
                                 • <a href="http://contracts.disclosurelo.gs">Government Contracts and Spending</a>
@@ -169,7 +171,7 @@
     if (isset($row->value->links)) {
         $result .= '<h3>Links/Documents</h3><ul itemprop="associatedMedia">';
         foreach ($row->value->links as $link) {
-            $result .= '<li itemscope itemtype="http://schema.org/MediaObject"><a href=' . htmlspecialchars ($link) . '" itemprop="url contentURL">' . htmlspecialchars ( $link) . "</a></li>";
+            $result .= '<li itemscope itemtype="http://schema.org/MediaObject"><a href="' . htmlspecialchars ($link) . '" itemprop="url contentURL">' . htmlspecialchars ( $link) . "</a></li>";
         }
 
         $result .= "</ul>";