From: CKAN data.gov.au Date: Mon, 02 Dec 2013 06:06:13 +0000 Subject: Add KML/KMZ support X-Git-Url: https://maxious.lambdacomplex.org/git/?p=dga-spatialingestor.git&a=commitdiff&h=e8d573cf05f51c66075230e40353a2856fabc18f --- Add KML/KMZ support --- --- a/README.md +++ b/README.md @@ -24,6 +24,8 @@ sudo -u postgres psql $db -c "CREATE EXTENSION postgis;" sudo -u postgres psql $db -c "ALTER TABLE geometry_columns OWNER TO $owner; ALTER TABLE spatial_ref_sys OWNER TO $owner;" +grant select on table geometry_columns to ckandga_data; # grant select to read only user + INSERT into spatial_ref_sys (srid, auth_name, auth_srid, proj4text, srtext) values ( 96643, 'sr-org', 6643, '', 'PROJCS["Albers134",GEOGCS["GCS_GDA_1994",DATUM["D_GDA_1994",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.017453292519943295]],PROJECTION["Albers"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",134.0],PARAMETER["Standard_Parallel_1",-18.0],PARAMETER["Standard_Parallel_2",-36.0],PARAMETER["Latitude_Of_Origin",0.0],UNIT["Meter",1.0]]'); --- a/dga-spatialingestor.py +++ b/dga-spatialingestor.py @@ -19,22 +19,30 @@ import json import psycopg2 import requests - -geoserver_addr = "http://data.disclosurelo.gs:8080/geoserver/" +from dateutil import parser +import lxml.etree as et + +geoserver_addr = "http://data.gov.au/geoserver/" geoserver_user = "admin" -geoserver_passwd = "geoserver" -email_addr = "maxious@lambdacomplex.org" -shp2pgsql = "/usr/lib/postgresql/9.2/bin/shp2pgsql" -omitted_orgs = [] +geoserver_passwd = "oRu7chan" +email_addr = "alex.sadleir@linkdigital.com.au" +shp2pgsql = "/usr/bin/shp2pgsql" +omitted_orgs = ['launcestoncitycouncil','gcc'] def email(subject, body): msg = MIMEText(body) - msg["From"] = "ckan@localhost" + msg["From"] = "datagovau@gmail.com" msg["To"] = email_addr msg["Subject"] = subject # Send the message via our own SMTP server, but don't include the # envelope header. - s = smtplib.SMTP('localhost') + #p = Popen(["/usr/sbin/sendmail", "-t"], stdin=PIPE) + #p.communicate(msg.as_string()) + s = smtplib.SMTP('smtp.gmail.com',587) + s.ehlo() + s.starttls() + s.ehlo + s.login('datagovau@gmail.com','3P4ELm9kjNAmKUL') s.sendmail(msg["From"], [msg["To"]], msg.as_string()) s.quit() @@ -71,6 +79,7 @@ cur.execute(sql) sql = "" if sql != "": + #print sql cur.execute(sql) cur.close() conn.close() @@ -86,8 +95,8 @@ dataset = ckan.action.package_show(id=dataset_id) print "loaded dataset"+dataset['name'] #pprint(dataset) -if dataset['owner_org'] in omitted_orgs: - print(dataset.owner_org + " in omitted_orgs") +if dataset['organization']['name'] in omitted_orgs: + print(dataset['organization']['name'] + " in omitted_orgs") sys.exit(0); ows_resources = [] @@ -101,29 +110,31 @@ else: ows_resources += [resource] - if "kml" in resource['format']: + if "kml" in resource['format'] or "kmz" in resource['format']: + data_modified_date = resource['last_modified'] kml_resources += [resource] if "shp" in resource['format']: + data_modified_date = resource['last_modified'] shp_resources += [resource] -if len(shp_resources) == 0: +if len(shp_resources) + len(kml_resources) == 0: print "No geodata format files detected" sys.exit(0); #if geoserver api link does not exist or api link is out of date with data, continue -if len(ows_resources) > 0 and data_modified_date <= wms_resources[0]['last_modified']: +if len(ows_resources) > 0 and parser.parse(data_modified_date) <= wms_resources[0]['last_modified']: print "Already up to date" sys.exit(0); -email("geodata processing started for "+dataset['id'], str(sys.argv)) -msg = "" +email("geodata processing started for "+dataset['id'], "") +msg = dataset['id'] #download resource to tmpfile #check filesize limit (cur,conn) = get_cursor(db_settings) table_name = dataset['id'].replace("-","_") -cur.execute("DROP TABLE IF EXISTS "+table_name) +cur.execute('DROP TABLE IF EXISTS "'+table_name+'"') cur.close() conn.close() @@ -142,20 +153,43 @@ prjfiles = glob.glob("*.[pP][rR][jJ]") if len(shpfiles) == 0: failure("no shp files found in zip "+shp_resources[0]['url']) - print "converting to pgsql "+shpfiles[0] + print "converting to pgsql "+table_name+" "+shpfiles[0] process = Popen([shp2pgsql,shpfiles[0], table_name], stdout=PIPE, stderr=PIPE) psql_load(process) if len(prjfiles) > 0: nativeCRS = open(prjfiles[0], 'r').read() -#else: -# print "using KML file "+kml_resources[0]['url'] -# #if kml ogr2ogr http://gis.stackexchange.com/questions/33102/how-to-import-kml-file-with-custom-data-to-postgres-postgis-database -# (filepath,headers) = urllib.urlretrieve(kml_resources[0]['url'], "input.kml") - +else: + print "using KML file "+kml_resources[0]['url'] + nativeCRS = None + #if kml ogr2ogr http://gis.stackexchange.com/questions/33102/how-to-import-kml-file-with-custom-data-to-postgres-postgis-database + if kml_resources[0]['format'] == "kmz": + (filepath,headers) = urllib.urlretrieve(kml_resources[0]['url'], "input.zip" ) + with ZipFile(filepath, 'r') as myzip: + myzip.extractall() + print "kmz unziped" + kmlfiles = glob.glob("*.[kK][mM][lL]") + if len(kmlfiles) == 0: + failure("no kml files found in zip "+kml_resources[0]['url']) + else: + kml_file = kmlfiles[0] + else: + (filepath,headers) = urllib.urlretrieve(kml_resources[0]['url'], "input.kml") + kml_file = "input.kml" + print "changing kml folder name" + tree = et.parse(kml_file) + element = tree.xpath('//kml:Folder/kml:name', namespaces={'kml': "http://www.opengis.net/kml/2.2"}) + element[0].text = table_name + with open(table_name+".kml", "w") as ofile: + ofile.write(et.tostring(tree)) + print "converting to pgsql "+table_name+".kml" + pargs = ['ogr2ogr','-f','PostgreSQL',"--config" ,"PG_USE_COPY","YES",'PG:dbname=\''+ db_settings['dbname']+'\' host=\''+db_settings['host']+'\' user=\''+db_settings['user']+ '\' password=\''+db_settings['password']+'\'' ,table_name+".kml",'-lco','GEOMETRY_NAME=geom'] + pprint(pargs) + p = Popen(pargs)#, stdout=PIPE, stderr=PIPE) + p.communicate() #load bounding boxes (cur,conn) = get_cursor(db_settings) -cur.execute('SELECT ST_Extent(geom) as box,ST_AsGeoJSON(ST_Extent(geom)) as geojson from '+table_name) +cur.execute('SELECT ST_Extent(geom) as box,ST_AsGeoJSON(ST_Extent(geom)) as geojson from "'+table_name+'"') (bbox,bgjson) = cur.fetchone() cur.close() conn.close() @@ -165,7 +199,7 @@ #create geoserver dataset http://boundlessgeo.com/2012/10/adding-layers-to-geoserver-using-the-rest-api/ # name workspace after dataset workspace = dataset['name'] -ws = requests.post(geoserver_addr+'rest/workspaces', data=json.dumps({'workspace': {'name': workspace} }), headers={'Content-type': 'application/json'}, auth=('admin', 'geoserver')) +ws = requests.post(geoserver_addr+'rest/workspaces', data=json.dumps({'workspace': {'name': workspace} }), headers={'Content-type': 'application/json'}, auth=(geoserver_user, geoserver_passwd)) pprint(ws) #echo ws.status_code #echo ws.text @@ -177,19 +211,19 @@ 'port':5432, 'database': db_settings['dbname'], 'schema':'public', - 'user':db_settings['user'], + 'user':db_settings['user'] + "_data", #use read only user 'passwd':db_settings['password'], 'dbtype':'postgis' }}}) -#print dsdata -r = requests.post(geoserver_addr+'rest/workspaces/'+workspace+'/datastores', data=dsdata, headers={'Content-type': 'application/json'}, auth=('admin', 'geoserver')) +print dsdata +r = requests.post(geoserver_addr+'rest/workspaces/'+workspace+'/datastores', data=dsdata, headers={'Content-type': 'application/json'}, auth=(geoserver_user, geoserver_passwd)) pprint(r) #echo r.status_code #echo r.text # name layer after resource title -ftdata = {'featureType':{'name':table_name, 'title': resource['name']}} +ftdata = {'featureType':{'name':table_name, 'title': dataset['title']}} (minx,miny, maxx, maxy) = bbox.replace("BOX","").replace("(","").replace(")","").replace(","," ").split(" ") bbox_obj = { 'minx': minx,'maxx': maxx,'miny': miny,'maxy': maxy } @@ -200,13 +234,16 @@ ftdata['featureType']['latLonBoundingBox'] = bbox_obj ftdata['featureType']['srs'] = "EPSG:4326" ftdata = json.dumps(ftdata) -r = requests.post(geoserver_addr+'rest/workspaces/'+workspace+'/datastores/'+datastore+"/featuretypes", data= ftdata, headers={'Content-Type': 'application/json'}, auth=('admin', 'geoserver')) +print geoserver_addr+'rest/workspaces/'+workspace+'/datastores/'+datastore+"/featuretypes" +print ftdata +r = requests.post(geoserver_addr+'rest/workspaces/'+workspace+'/datastores/'+datastore+"/featuretypes", data= ftdata, headers={'Content-Type': 'application/json'}, auth=(geoserver_user, geoserver_passwd)) pprint(r) #generate wms/wfs api links, kml, png resources and add to package print bgjson dataset['spatial'] = bgjson +#TODO update or append ws_addr = geoserver_addr+dataset['name']+"/" for format in ['image/png','kml']: url = ws_addr+"wms?request=GetMap&layers="+table_name+"&bbox="+bbox_obj['minx']+","+bbox_obj['miny']+","+bbox_obj['maxx']+","+bbox_obj['maxy']+"&width=512&height=512&format="+urllib.quote(format) @@ -220,13 +257,13 @@ dataset['resources'].append({"name": dataset['title'] + " CSV","description":"For summary of the objects/data in this collection","format":format,"url":url, "last_modified": datetime.now().isoformat()}) if format == "json": dataset['resources'].append({"name":dataset['title'] + " GeoJSON","description":"For use in web-based data visualisation of this collection","format":format,"url":url, "last_modified": datetime.now().isoformat()}) -dataset['resources'].append({"name":dataset['name'] + " - Preview this Dataset (WMS)","description":"View the data in this datasets online via web-based WMS viewer","format":"wms", +dataset['resources'].append({"name":dataset['title'] + " - Preview this Dataset (WMS)","description":"View the data in this datasets online via web-based WMS viewer","format":"wms", "url":ws_addr+"wms?request=GetCapabilities", "last_modified": datetime.now().isoformat()}) dataset['resources'].append({"name":dataset['title'] + " WFS Link","description":"WFS Link for use of live data in Desktop GIS tools","format":"wfs", "url":ws_addr+"wfs?request=GetCapabilities", "last_modified": datetime.now().isoformat()}) pprint(dataset) -ckan.action.package_update(id=dataset['id'],spatial=dataset['spatial'],resources=dataset['resources']) +#ckan.action.package_update(id=dataset['id'],spatial=dataset['spatial'],resources=dataset['resources']) #delete tempdir