Add KML/KMZ support master
Add KML/KMZ support

file:a/README.md -> file:b/README.md
--- a/README.md
+++ b/README.md
@@ -24,6 +24,12 @@
   sudo -u postgres psql $db -c "CREATE EXTENSION postgis;"
 
   sudo -u postgres psql $db -c "ALTER TABLE geometry_columns OWNER TO $owner; ALTER TABLE spatial_ref_sys OWNER TO $owner;"
+grant select on table geometry_columns to ckandga_data; # grant select to read only user
+
 
 INSERT into spatial_ref_sys (srid, auth_name, auth_srid, proj4text, srtext) values ( 96643, 'sr-org', 6643, '', 'PROJCS["Albers134",GEOGCS["GCS_GDA_1994",DATUM["D_GDA_1994",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.017453292519943295]],PROJECTION["Albers"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",134.0],PARAMETER["Standard_Parallel_1",-18.0],PARAMETER["Standard_Parallel_2",-36.0],PARAMETER["Latitude_Of_Origin",0.0],UNIT["Meter",1.0]]');
 
+
+example run
+python dga-spatialingestor.py '{"dbname":"geodatastore", "user":"postgres", "password":"snmc", "host":"localhost"}' http://localhost:5000 256fa905-cf92-4d6c-8714-95e3da2ea3c2 geodataset
+

--- a/dga-spatialingestor.py
+++ b/dga-spatialingestor.py
@@ -1,42 +1,59 @@
+#!/usr/bin/python
+# coding=utf-8
+'''
+spatial ingestor for data.gov.au
+<alex.sadleir@linkdigital.com.au>
+1.0  28/11/2013  initial implementation
+'''
 import ckanapi #https://github.com/open-data/ckanapi
 import errno, os, shutil, sys, glob
 from pprint import pprint 
 from email.mime.text import MIMEText
 from subprocess import Popen, PIPE
 import tempfile
+import smtplib
 from zipfile import ZipFile
+from datetime import datetime
 import urllib
 import fileinput
 import json
 import psycopg2
 import requests
-
-geoserver_addr = "http://localhost:8080/geoserver/"
+from dateutil import parser
+import lxml.etree as et
+
+geoserver_addr = "http://data.gov.au/geoserver/"
 geoserver_user = "admin"
-geoserver_passwd = "geoserver"
-email_addr = "maxious@lambdacomplex.org"
-shp2pgsql = "/usr/lib/postgresql/9.2/bin/shp2pgsql"
-omitted_orgs = []
+geoserver_passwd = "oRu7chan"
+email_addr = "alex.sadleir@linkdigital.com.au"
+shp2pgsql = "/usr/bin/shp2pgsql"
+omitted_orgs = ['launcestoncitycouncil','gcc']
 
 def email(subject, body):
 	msg = MIMEText(body)
-	msg["From"] = "ckan@localhost"
+	msg["From"] = "datagovau@gmail.com"
 	msg["To"] = email_addr
 	msg["Subject"] = subject
 	# Send the message via our own SMTP server, but don't include the
 	# envelope header.
-	s = smtplib.SMTP('localhost')
+	#p = Popen(["/usr/sbin/sendmail", "-t"], stdin=PIPE)
+	#p.communicate(msg.as_string())
+	s = smtplib.SMTP('smtp.gmail.com',587)
+	s.ehlo()
+	s.starttls()
+	s.ehlo
+	s.login('datagovau@gmail.com','3P4ELm9kjNAmKUL')
 	s.sendmail(msg["From"], [msg["To"]], msg.as_string())
 	s.quit()
 
 def success(msg):
 	print "Completed!"
-#	email("geodata success",msg)
+	email("geodata success",msg)
 	sys.exit(errno.EACCES)
 
 def failure(msg):
 	print "ERROR -"+msg
-#	email("geodata error",str(sys.argv)+msg)
+	email("geodata error",str(sys.argv)+msg)
 	sys.exit(errno.EACCES)
 
 def get_cursor(db_settings):
@@ -62,6 +79,7 @@
 		cur.execute(sql)
 		sql = ""
 	if sql != "":
+		#print sql
 		cur.execute(sql)
         cur.close()
         conn.close()
@@ -77,8 +95,8 @@
 dataset = ckan.action.package_show(id=dataset_id)
 print "loaded dataset"+dataset['name']
 #pprint(dataset)
-if dataset['owner_org'] in omitted_orgs:
-	print(dataset.owner_org + " in omitted_orgs")
+if dataset['organization']['name'] in omitted_orgs:
+	print(dataset['organization']['name'] + " in omitted_orgs")
 	sys.exit(0);
 
 ows_resources = []
@@ -92,29 +110,31 @@
 		else:
 			ows_resources += [resource]
 		
-	if "kml" in resource['format']:
+	if "kml" in resource['format'] or "kmz" in resource['format']:
+		data_modified_date = resource['last_modified']
 		kml_resources += [resource]
 	if "shp" in resource['format']:
+		data_modified_date = resource['last_modified']
 		shp_resources += [resource]
 
-if len(shp_resources) == 0:
+if len(shp_resources) + len(kml_resources) == 0:
 	print "No geodata format files detected"
 	sys.exit(0);
 
 #if geoserver api link does not exist or api link is out of date with data, continue
-if len(ows_resources) > 0 and data_modified_date <= wms_resources[0]['last_modified']:
+if len(ows_resources) > 0 and parser.parse(data_modified_date) <= wms_resources[0]['last_modified']:
 	print "Already up to date"
         sys.exit(0);
 
-#email("geodata processing started for "+dataset['id'], str(sys.argv))
-msg = ""
+email("geodata processing started for "+dataset['id'], "")
+msg = dataset['id']
 #download resource to tmpfile
 
 #check filesize limit
 
 (cur,conn) = get_cursor(db_settings)
 table_name = dataset['id'].replace("-","_")
-cur.execute("DROP TABLE IF EXISTS "+table_name)
+cur.execute('DROP TABLE IF EXISTS "'+table_name+'"')
 cur.close()
 conn.close()
 
@@ -133,20 +153,43 @@
 	prjfiles = glob.glob("*.[pP][rR][jJ]")
 	if len(shpfiles) == 0:
 		failure("no shp files found in zip "+shp_resources[0]['url'])
-	print "converting to pgsql "+shpfiles[0]
+	print "converting to pgsql "+table_name+" "+shpfiles[0]
 	process = Popen([shp2pgsql,shpfiles[0], table_name], stdout=PIPE, stderr=PIPE)
 	psql_load(process)
 	if len(prjfiles) > 0:
 		nativeCRS = open(prjfiles[0], 'r').read()
-#else:
-#	print "using KML file "+kml_resources[0]['url']
-#	#if kml ogr2ogr http://gis.stackexchange.com/questions/33102/how-to-import-kml-file-with-custom-data-to-postgres-postgis-database
-#	(filepath,headers) = urllib.urlretrieve(kml_resources[0]['url'], "input.kml")
-
+else:
+	print "using KML file "+kml_resources[0]['url']
+	nativeCRS = None
+	#if kml ogr2ogr http://gis.stackexchange.com/questions/33102/how-to-import-kml-file-with-custom-data-to-postgres-postgis-database
+	if kml_resources[0]['format'] == "kmz":
+		(filepath,headers) = urllib.urlretrieve(kml_resources[0]['url'], "input.zip" )
+		with ZipFile(filepath, 'r') as myzip:
+			myzip.extractall()
+		print "kmz unziped"
+		kmlfiles = glob.glob("*.[kK][mM][lL]")
+		if len(kmlfiles) == 0:
+			failure("no kml files found in zip "+kml_resources[0]['url'])
+		else:
+			kml_file = kmlfiles[0]
+	else: 
+		(filepath,headers) = urllib.urlretrieve(kml_resources[0]['url'], "input.kml")
+		kml_file = "input.kml"
+	print "changing kml folder name"
+	tree = et.parse(kml_file)
+	element = tree.xpath('//kml:Folder/kml:name', namespaces={'kml': "http://www.opengis.net/kml/2.2"})
+	element[0].text = table_name
+	with open(table_name+".kml", "w") as ofile: 
+		ofile.write(et.tostring(tree))
+	print "converting to pgsql "+table_name+".kml"
+	pargs = ['ogr2ogr','-f','PostgreSQL',"--config" ,"PG_USE_COPY","YES",'PG:dbname=\''+ db_settings['dbname']+'\' host=\''+db_settings['host']+'\' user=\''+db_settings['user']+ '\' password=\''+db_settings['password']+'\'' ,table_name+".kml",'-lco','GEOMETRY_NAME=geom']
+	pprint(pargs)
+	p = Popen(pargs)#, stdout=PIPE, stderr=PIPE)
+	p.communicate()
 
 #load bounding boxes
 (cur,conn) = get_cursor(db_settings)
-cur.execute('SELECT ST_Extent(geom) as box,ST_AsGeoJSON(ST_Extent(geom)) as geojson from '+table_name)
+cur.execute('SELECT ST_Extent(geom) as box,ST_AsGeoJSON(ST_Extent(geom)) as geojson from "'+table_name+'"')
 (bbox,bgjson) = cur.fetchone()
 cur.close()
 conn.close()
@@ -156,7 +199,7 @@
 #create geoserver dataset http://boundlessgeo.com/2012/10/adding-layers-to-geoserver-using-the-rest-api/
 # name workspace after dataset
 workspace = dataset['name']
-ws = requests.post(geoserver_addr+'rest/workspaces', data=json.dumps({'workspace': {'name': workspace} }), headers={'Content-type': 'application/json'}, auth=('admin', 'geoserver'))
+ws = requests.post(geoserver_addr+'rest/workspaces', data=json.dumps({'workspace': {'name': workspace} }), headers={'Content-type': 'application/json'}, auth=(geoserver_user, geoserver_passwd))
 pprint(ws)
 #echo ws.status_code
 #echo ws.text
@@ -168,39 +211,60 @@
      'port':5432,
      'database': db_settings['dbname'],
      'schema':'public',
-     'user':db_settings['user'],
+     'user':db_settings['user'] + "_data", #use read only user
      'passwd':db_settings['password'],
      'dbtype':'postgis'
 
                 }}})
-#print dsdata
-r = requests.post(geoserver_addr+'rest/workspaces/'+workspace+'/datastores', data=dsdata, headers={'Content-type': 'application/json'}, auth=('admin', 'geoserver'))
+print dsdata
+r = requests.post(geoserver_addr+'rest/workspaces/'+workspace+'/datastores', data=dsdata, headers={'Content-type': 'application/json'}, auth=(geoserver_user, geoserver_passwd))
 pprint(r)
 #echo r.status_code
 #echo r.text
 
 # name layer after resource title
-ftdata = {'featureType':{'name':table_name, 'title': resource['name']}}
+ftdata = {'featureType':{'name':table_name, 'title': dataset['title']}}
+(minx,miny, maxx, maxy) = bbox.replace("BOX","").replace("(","").replace(")","").replace(","," ").split(" ")
+bbox_obj = { 'minx': minx,'maxx': maxx,'miny': miny,'maxy': maxy }
+
 if nativeCRS != None:
 	ftdata['featureType']['nativeCRS'] = nativeCRS
 else:
-	(minx,miny, maxx, maxy) = bbox.replace("BOX","").replace("(","").replace(")","").replace(","," ").split(" ")
-	bbox_obj = { 'minx': minx,'maxx': maxx,'miny': miny,'maxy': maxy }
 	ftdata['featureType']['nativeBoundingBox'] = bbox_obj
 	ftdata['featureType']['latLonBoundingBox'] = bbox_obj
 	ftdata['featureType']['srs'] = "EPSG:4326"
 ftdata = json.dumps(ftdata)
-r = requests.post(geoserver_addr+'rest/workspaces/'+workspace+'/datastores/'+datastore+"/featuretypes", data= ftdata, headers={'Content-Type': 'application/json'}, auth=('admin', 'geoserver'))
+print geoserver_addr+'rest/workspaces/'+workspace+'/datastores/'+datastore+"/featuretypes"
+print ftdata
+r = requests.post(geoserver_addr+'rest/workspaces/'+workspace+'/datastores/'+datastore+"/featuretypes", data= ftdata, headers={'Content-Type': 'application/json'}, auth=(geoserver_user, geoserver_passwd))
 pprint(r)
 
 #generate wms/wfs api links, kml, png resources and add to package
 print bgjson
 dataset['spatial'] = bgjson
-# ckan.action.resource_update(id=resource['id'],url=resource['url'],name=resource['name'],     last_modified=datetime.now().isoformat())
-#/geodatasetws/wms
-#/geodatasetws/wfs
-#http://cloudnine.lambdacomplex.org:8080/geoserver/tiger/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=tiger:tiger_roads&outputFormat=application/json
-#http://cloudnine.lambdacomplex.org:8080/geoserver/tiger/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=tiger:tiger_roads&outputFormat=csv
+
+#TODO update or append
+ws_addr = geoserver_addr+dataset['name']+"/"
+for format in ['image/png','kml']:
+      url = ws_addr+"wms?request=GetMap&layers="+table_name+"&bbox="+bbox_obj['minx']+","+bbox_obj['miny']+","+bbox_obj['maxx']+","+bbox_obj['maxy']+"&width=512&height=512&format="+urllib.quote(format)
+      if format == "image/png":
+              dataset['resources'].append({"name":dataset['title'] + " Preview Image","description":"View overview image of this dataset" ,"format":format,"url":url, "last_modified": datetime.now().isoformat()})
+      if format == "kml":
+              dataset['resources'].append({"name":dataset['title'] + " KML","description":"For use in web and desktop spatial data tools including Google Earth" ,"format":format,"url":url, "last_modified": datetime.now().isoformat()})
+for format in ['csv','json']:
+       url = ws_addr+"wfs?request=GetFeature&typeName="+table_name+"&outputFormat="+urllib.quote(format)
+       if format == "csv":
+               dataset['resources'].append({"name": dataset['title'] + " CSV","description":"For summary of the objects/data in this collection","format":format,"url":url, "last_modified": datetime.now().isoformat()})
+       if format == "json":
+               dataset['resources'].append({"name":dataset['title'] + " GeoJSON","description":"For use in web-based data visualisation of this collection","format":format,"url":url, "last_modified": datetime.now().isoformat()})
+dataset['resources'].append({"name":dataset['title'] + " - Preview this Dataset (WMS)","description":"View the data in this datasets online via web-based WMS viewer","format":"wms",
+    "url":ws_addr+"wms?request=GetCapabilities", "last_modified": datetime.now().isoformat()})
+dataset['resources'].append({"name":dataset['title'] + " WFS Link","description":"WFS Link for use of live data in Desktop GIS tools","format":"wfs",
+    "url":ws_addr+"wfs?request=GetCapabilities", "last_modified": datetime.now().isoformat()})
+
+pprint(dataset)
+#ckan.action.package_update(id=dataset['id'],spatial=dataset['spatial'],resources=dataset['resources'])
+
 
 #delete tempdir
 shutil.rmtree(tempdir)