--- a/documents/datagov-resourcereport.py +++ b/documents/datagov-resourcereport.py @@ -5,11 +5,9 @@ import urllib import urlparse import httplib2 +import httplib import csv -import ssl -context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) -context.verify_mode = ssl.CERT_NONE def url_fix(s, charset='utf-8'): """Sometimes you get an URL by a user that just isn't a real @@ -71,9 +69,13 @@ h = httplib2.Http(disable_ssl_certificate_validation=True) resp = h.request(url_fix(resource['href']), 'HEAD') content_type = resp[0]['content-type'] if 'content-type' in resp[0].keys() else "" - out.writerow([pkg_name, url_fix(resource['href']), name,format, resp[0]['status'], content_type]) + out.writerow([pkg_name.encode('ascii', 'ignore'), url_fix(resource['href']).encode('ascii', 'ignore'), name.encode('ascii', 'ignore'),format, resp[0]['status'], content_type]) except httplib2.ServerNotFoundError: - out.writerow([pkg_name, url_fix(resource['href']), name,format, "500","badurl"]) + out.writerow([pkg_name.encode('ascii', 'ignore'), url_fix(resource['href']).encode('ascii', 'ignore'), name.encode('ascii', 'ignore'),format, "500","badurl"]) + except httplib.InvalidURL: + out.writerow([pkg_name.encode('ascii', 'ignore'), url_fix(resource['href']).encode('ascii', 'ignore'), name.encode('ascii', 'ignore'),format, "500","badurl"]) + except httplib2.RelativeURIError: + out.writerow([pkg_name.encode('ascii', 'ignore'), url_fix(resource['href']).encode('ascii', 'ignore'), name.encode('ascii', 'ignore'),format, "500","badurl"]) else: - out.writerow([pkg_name]) + out.writerow([pkg_name.encode('ascii', 'ignore')])