From: CKAN data.gov.au Date: Mon, 02 Dec 2013 04:11:42 +0000 Subject: data.gov.au customisation X-Git-Url: https://maxious.lambdacomplex.org/git/?p=dga-spatialingestor.git&a=commitdiff&h=e98c36cbf8d14892c1da8e4a807e7672434a39c5 --- data.gov.au customisation --- --- a/README.md +++ b/README.md @@ -24,6 +24,12 @@ sudo -u postgres psql $db -c "CREATE EXTENSION postgis;" sudo -u postgres psql $db -c "ALTER TABLE geometry_columns OWNER TO $owner; ALTER TABLE spatial_ref_sys OWNER TO $owner;" +grant select on table geometry_columns to ckandga_data; # grant select to read only user + INSERT into spatial_ref_sys (srid, auth_name, auth_srid, proj4text, srtext) values ( 96643, 'sr-org', 6643, '', 'PROJCS["Albers134",GEOGCS["GCS_GDA_1994",DATUM["D_GDA_1994",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.017453292519943295]],PROJECTION["Albers"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",134.0],PARAMETER["Standard_Parallel_1",-18.0],PARAMETER["Standard_Parallel_2",-36.0],PARAMETER["Latitude_Of_Origin",0.0],UNIT["Meter",1.0]]'); + +example run +python dga-spatialingestor.py '{"dbname":"geodatastore", "user":"postgres", "password":"snmc", "host":"localhost"}' http://localhost:5000 256fa905-cf92-4d6c-8714-95e3da2ea3c2 geodataset + --- a/dga-spatialingestor.py +++ b/dga-spatialingestor.py @@ -1,42 +1,57 @@ +#!/usr/bin/python +# coding=utf-8 +''' +spatial ingestor for data.gov.au + +1.0 28/11/2013 initial implementation +''' import ckanapi #https://github.com/open-data/ckanapi import errno, os, shutil, sys, glob from pprint import pprint from email.mime.text import MIMEText from subprocess import Popen, PIPE import tempfile +import smtplib from zipfile import ZipFile +from datetime import datetime import urllib import fileinput import json import psycopg2 import requests -geoserver_addr = "http://localhost:8080/geoserver/" +geoserver_addr = "http://data.gov.au/geoserver/" geoserver_user = "admin" -geoserver_passwd = "geoserver" -email_addr = "maxious@lambdacomplex.org" -shp2pgsql = "/usr/lib/postgresql/9.2/bin/shp2pgsql" -omitted_orgs = [] +geoserver_passwd = "oRu7chan" +email_addr = "alex.sadleir@linkdigital.com.au" +shp2pgsql = "/usr/bin/shp2pgsql" +omitted_orgs = ['launcestoncitycouncil','gcc'] def email(subject, body): msg = MIMEText(body) - msg["From"] = "ckan@localhost" + msg["From"] = "datagovau@gmail.com" msg["To"] = email_addr msg["Subject"] = subject # Send the message via our own SMTP server, but don't include the # envelope header. - s = smtplib.SMTP('localhost') + #p = Popen(["/usr/sbin/sendmail", "-t"], stdin=PIPE) + #p.communicate(msg.as_string()) + s = smtplib.SMTP('smtp.gmail.com',587) + s.ehlo() + s.starttls() + s.ehlo + s.login('datagovau@gmail.com','3P4ELm9kjNAmKUL') s.sendmail(msg["From"], [msg["To"]], msg.as_string()) s.quit() def success(msg): print "Completed!" -# email("geodata success",msg) + email("geodata success",msg) sys.exit(errno.EACCES) def failure(msg): print "ERROR -"+msg -# email("geodata error",str(sys.argv)+msg) + email("geodata error",str(sys.argv)+msg) sys.exit(errno.EACCES) def get_cursor(db_settings): @@ -77,8 +92,8 @@ dataset = ckan.action.package_show(id=dataset_id) print "loaded dataset"+dataset['name'] #pprint(dataset) -if dataset['owner_org'] in omitted_orgs: - print(dataset.owner_org + " in omitted_orgs") +if dataset['organization']['name'] in omitted_orgs: + print(dataset['organization']['name'] + " in omitted_orgs") sys.exit(0); ows_resources = [] @@ -92,9 +107,11 @@ else: ows_resources += [resource] - if "kml" in resource['format']: - kml_resources += [resource] +# if "kml" in resource['format']: +# data_modified_date = resource['last_modified'] +# kml_resources += [resource] if "shp" in resource['format']: + data_modified_date = resource['last_modified'] shp_resources += [resource] if len(shp_resources) == 0: @@ -106,15 +123,16 @@ print "Already up to date" sys.exit(0); -#email("geodata processing started for "+dataset['id'], str(sys.argv)) -msg = "" +email("geodata processing started for "+dataset['id'], "") +msg = dataset['id'] #download resource to tmpfile #check filesize limit (cur,conn) = get_cursor(db_settings) -table_name = dataset['id'].replace("-","_") -cur.execute("DROP TABLE IF EXISTS "+table_name) +table_name = dataset['id'] +#.replace("-","_") +cur.execute('DROP TABLE IF EXISTS "'+table_name+'"') cur.close() conn.close() @@ -146,7 +164,7 @@ #load bounding boxes (cur,conn) = get_cursor(db_settings) -cur.execute('SELECT ST_Extent(geom) as box,ST_AsGeoJSON(ST_Extent(geom)) as geojson from '+table_name) +cur.execute('SELECT ST_Extent(geom) as box,ST_AsGeoJSON(ST_Extent(geom)) as geojson from "'+table_name+'"') (bbox,bgjson) = cur.fetchone() cur.close() conn.close() @@ -156,7 +174,7 @@ #create geoserver dataset http://boundlessgeo.com/2012/10/adding-layers-to-geoserver-using-the-rest-api/ # name workspace after dataset workspace = dataset['name'] -ws = requests.post(geoserver_addr+'rest/workspaces', data=json.dumps({'workspace': {'name': workspace} }), headers={'Content-type': 'application/json'}, auth=('admin', 'geoserver')) +ws = requests.post(geoserver_addr+'rest/workspaces', data=json.dumps({'workspace': {'name': workspace} }), headers={'Content-type': 'application/json'}, auth=(geoserver_user, geoserver_passwd)) pprint(ws) #echo ws.status_code #echo ws.text @@ -168,39 +186,59 @@ 'port':5432, 'database': db_settings['dbname'], 'schema':'public', - 'user':db_settings['user'], + 'user':db_settings['user'] + "_data", #use read only user 'passwd':db_settings['password'], 'dbtype':'postgis' }}}) -#print dsdata -r = requests.post(geoserver_addr+'rest/workspaces/'+workspace+'/datastores', data=dsdata, headers={'Content-type': 'application/json'}, auth=('admin', 'geoserver')) +print dsdata +r = requests.post(geoserver_addr+'rest/workspaces/'+workspace+'/datastores', data=dsdata, headers={'Content-type': 'application/json'}, auth=(geoserver_user, geoserver_passwd)) pprint(r) #echo r.status_code #echo r.text # name layer after resource title -ftdata = {'featureType':{'name':table_name, 'title': resource['name']}} +ftdata = {'featureType':{'name':table_name, 'title': dataset['title']}} +(minx,miny, maxx, maxy) = bbox.replace("BOX","").replace("(","").replace(")","").replace(","," ").split(" ") +bbox_obj = { 'minx': minx,'maxx': maxx,'miny': miny,'maxy': maxy } + if nativeCRS != None: ftdata['featureType']['nativeCRS'] = nativeCRS else: - (minx,miny, maxx, maxy) = bbox.replace("BOX","").replace("(","").replace(")","").replace(","," ").split(" ") - bbox_obj = { 'minx': minx,'maxx': maxx,'miny': miny,'maxy': maxy } ftdata['featureType']['nativeBoundingBox'] = bbox_obj ftdata['featureType']['latLonBoundingBox'] = bbox_obj ftdata['featureType']['srs'] = "EPSG:4326" ftdata = json.dumps(ftdata) -r = requests.post(geoserver_addr+'rest/workspaces/'+workspace+'/datastores/'+datastore+"/featuretypes", data= ftdata, headers={'Content-Type': 'application/json'}, auth=('admin', 'geoserver')) +print geoserver_addr+'rest/workspaces/'+workspace+'/datastores/'+datastore+"/featuretypes" +print ftdata +r = requests.post(geoserver_addr+'rest/workspaces/'+workspace+'/datastores/'+datastore+"/featuretypes", data= ftdata, headers={'Content-Type': 'application/json'}, auth=(geoserver_user, geoserver_passwd)) pprint(r) #generate wms/wfs api links, kml, png resources and add to package print bgjson dataset['spatial'] = bgjson -# ckan.action.resource_update(id=resource['id'],url=resource['url'],name=resource['name'], last_modified=datetime.now().isoformat()) -#/geodatasetws/wms -#/geodatasetws/wfs -#http://cloudnine.lambdacomplex.org:8080/geoserver/tiger/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=tiger:tiger_roads&outputFormat=application/json -#http://cloudnine.lambdacomplex.org:8080/geoserver/tiger/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=tiger:tiger_roads&outputFormat=csv + +ws_addr = geoserver_addr+dataset['name']+"/" +for format in ['image/png','kml']: + url = ws_addr+"wms?request=GetMap&layers="+table_name+"&bbox="+bbox_obj['minx']+","+bbox_obj['miny']+","+bbox_obj['maxx']+","+bbox_obj['maxy']+"&width=512&height=512&format="+urllib.quote(format) + if format == "image/png": + dataset['resources'].append({"name":dataset['title'] + " Preview Image","description":"View overview image of this dataset" ,"format":format,"url":url, "last_modified": datetime.now().isoformat()}) + if format == "kml": + dataset['resources'].append({"name":dataset['title'] + " KML","description":"For use in web and desktop spatial data tools including Google Earth" ,"format":format,"url":url, "last_modified": datetime.now().isoformat()}) +for format in ['csv','json']: + url = ws_addr+"wfs?request=GetFeature&typeName="+table_name+"&outputFormat="+urllib.quote(format) + if format == "csv": + dataset['resources'].append({"name": dataset['title'] + " CSV","description":"For summary of the objects/data in this collection","format":format,"url":url, "last_modified": datetime.now().isoformat()}) + if format == "json": + dataset['resources'].append({"name":dataset['title'] + " GeoJSON","description":"For use in web-based data visualisation of this collection","format":format,"url":url, "last_modified": datetime.now().isoformat()}) +dataset['resources'].append({"name":dataset['title'] + " - Preview this Dataset (WMS)","description":"View the data in this datasets online via web-based WMS viewer","format":"wms", + "url":ws_addr+"wms?request=GetCapabilities", "last_modified": datetime.now().isoformat()}) +dataset['resources'].append({"name":dataset['title'] + " WFS Link","description":"WFS Link for use of live data in Desktop GIS tools","format":"wfs", + "url":ws_addr+"wfs?request=GetCapabilities", "last_modified": datetime.now().isoformat()}) + +pprint(dataset) +#ckan.action.package_update(id=dataset['id'],spatial=dataset['spatial'],resources=dataset['resources']) + #delete tempdir shutil.rmtree(tempdir)