load datasets into scrapr then into ckan filestore
[disclosr.git] / documents / datagov-resourcereport.py
blob:a/documents/datagov-resourcereport.py -> blob:b/documents/datagov-resourcereport.py
--- a/documents/datagov-resourcereport.py
+++ b/documents/datagov-resourcereport.py
@@ -5,11 +5,9 @@
 import urllib
 import urlparse
 import httplib2
+import httplib
 import csv
-import ssl
 
-context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
-context.verify_mode = ssl.CERT_NONE
 
 def url_fix(s, charset='utf-8'):
     """Sometimes you get an URL by a user that just isn't a real
@@ -71,9 +69,13 @@
 					h = httplib2.Http(disable_ssl_certificate_validation=True)
   				        resp = h.request(url_fix(resource['href']), 'HEAD')
 					content_type = resp[0]['content-type'] if 'content-type' in resp[0].keys() else ""
-					out.writerow([pkg_name, url_fix(resource['href']), name,format, resp[0]['status'], content_type])
+					out.writerow([pkg_name.encode('ascii', 'ignore'), url_fix(resource['href']).encode('ascii', 'ignore'), name.encode('ascii', 'ignore'),format, resp[0]['status'], content_type])
 				    except httplib2.ServerNotFoundError:
-					out.writerow([pkg_name, url_fix(resource['href']), name,format, "500","badurl"])
+					out.writerow([pkg_name.encode('ascii', 'ignore'), url_fix(resource['href']).encode('ascii', 'ignore'), name.encode('ascii', 'ignore'),format, "500","badurl"])
+				    except httplib.InvalidURL:
+					out.writerow([pkg_name.encode('ascii', 'ignore'), url_fix(resource['href']).encode('ascii', 'ignore'), name.encode('ascii', 'ignore'),format, "500","badurl"])
+				    except httplib2.RelativeURIError:
+					out.writerow([pkg_name.encode('ascii', 'ignore'), url_fix(resource['href']).encode('ascii', 'ignore'), name.encode('ascii', 'ignore'),format, "500","badurl"])
 	    else:
-		out.writerow([pkg_name])
+		out.writerow([pkg_name.encode('ascii', 'ignore')])