--- a/documents/scrape.py +++ b/documents/scrape.py @@ -7,6 +7,7 @@ from urlparse import urljoin import time import os +import sys import mimetypes import urllib import urlparse @@ -103,7 +104,7 @@ req = urllib2.Request(url) print "Fetching %s (%s)" % (url, hash) if url.startswith("mailto") or url.startswith("javascript") or url.startswith("#") or url == None or url == "": - print "Not a valid HTTP url" + print >> sys.stderr, "Not a valid HTTP url" return (None, None, None) doc = docsdb.get(hash) if doc == None: @@ -159,13 +160,13 @@ #store as attachment epoch-filename except (urllib2.URLError, socket.timeout) as e: - print "error!" + print >> sys.stderr,"error!" error = "" if hasattr(e, 'reason'): error = "error %s in downloading %s" % (str(e.reason), url) elif hasattr(e, 'code'): error = "error %s in downloading %s" % (e.code, url) - print error + print >> sys.stderr, error doc['error'] = error docsdb.save(doc) return (None, None, None)