fix scraper interval
Former-commit-id: 3028cb06efbf3c9d8a9f903943fab75d89156535
--- a/documents/scrape.py
+++ b/documents/scrape.py
@@ -105,7 +105,7 @@
if doc == None:
doc = {'_id': hash, 'agencyID': agencyID, 'url': url, 'fieldName': fieldName, 'type': 'website'}
else:
- if (('page_scraped' in doc) and (time.time() - doc['page_scraped']) < 60 * 24 * 14 * 1000):
+ if (('page_scraped' in doc) and (time.time() - doc['page_scraped']) < 60 * 24 * 14):
print "Uh oh, trying to scrape URL again too soon!" + hash
last_attachment_fname = doc["_attachments"].keys()[-1]
last_attachment = docsdb.get_attachment(doc, last_attachment_fname)
@@ -209,8 +209,8 @@
scrapeAndStore(docsdb, linkurl, depth - 1, fieldName, agencyID)
#couch = couchdb.Server('http://192.168.1.148:5984/')
-couch = couchdb.Server('http://192.168.1.113:5984/')
-#couch = couchdb.Server('http://127.0.0.1:5984/')
+#couch = couchdb.Server('http://192.168.1.113:5984/')
+couch = couchdb.Server('http://127.0.0.1:5984/')
# select database
agencydb = couch['disclosr-agencies']
docsdb = couch['disclosr-documents']
--- a/documents/scrapers/0049d35216493c545ef5f7f000e6b252.py
+++ b/documents/scrapers/0049d35216493c545ef5f7f000e6b252.py
@@ -26,8 +26,8 @@
ScraperImplementation().doScrape()
except Exception, err:
sys.stderr.write('ERROR: %s\n' % str(err))
- print ‘Error Reason: ‘, err.__doc__
- print ‘Exception: ‘, err.__class__
+ print "Error Reason: ", err.__doc__
+ print "Exception: ", err.__class__
print traceback.format_exc()
if amon_available:
data = {
--- a/documents/scrapers/e2a845e55bc9986e6c75c5ad2c508b8d.py
+++ b/documents/scrapers/e2a845e55bc9986e6c75c5ad2c508b8d.py
@@ -14,5 +14,3 @@
print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericRSSDisclogScraper)
ScraperImplementation().doScrape()
-www.finance.gov.au/foi/disclosure-log/foi-rss.xml
-