--- a/documents/scrape.py +++ b/documents/scrape.py @@ -7,14 +7,15 @@ from urlparse import urljoin import time import os +import sys import mimetypes import urllib import urlparse import socket #couch = couchdb.Server('http://192.168.1.148:5984/') -couch = couchdb.Server('http://192.168.1.113:5984/') -#couch = couchdb.Server('http://127.0.0.1:5984/') +#couch = couchdb.Server('http://192.168.1.113:5984/') +couch = couchdb.Server('http://127.0.0.1:5984/') def mkhash(input): @@ -103,7 +104,7 @@ req = urllib2.Request(url) print "Fetching %s (%s)" % (url, hash) if url.startswith("mailto") or url.startswith("javascript") or url.startswith("#") or url == None or url == "": - print "Not a valid HTTP url" + print >> sys.stderr, "Not a valid HTTP url" return (None, None, None) doc = docsdb.get(hash) if doc == None: @@ -159,13 +160,13 @@ #store as attachment epoch-filename except (urllib2.URLError, socket.timeout) as e: - print "error!" + print >> sys.stderr,"error!" error = "" if hasattr(e, 'reason'): error = "error %s in downloading %s" % (str(e.reason), url) elif hasattr(e, 'code'): error = "error %s in downloading %s" % (e.code, url) - print error + print >> sys.stderr, error doc['error'] = error docsdb.save(doc) return (None, None, None)