1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 | #!/usr/bin/python # coding=utf-8 ''' spatial ingestor for data.gov.au <alex.sadleir@linkdigital.com.au> 1.0 28/11/2013 initial implementation ''' import ckanapi #https://github.com/open-data/ckanapi import errno, os, shutil, sys, glob from pprint import pprint from email.mime.text import MIMEText from subprocess import Popen, PIPE import tempfile import smtplib from zipfile import ZipFile from datetime import datetime import urllib import fileinput import json import psycopg2 import requests from dateutil import parser import lxml.etree as et geoserver_addr = "http://data.gov.au/geoserver/" geoserver_user = "admin" geoserver_passwd = "oRu7chan" email_addr = "alex.sadleir@linkdigital.com.au" shp2pgsql = "/usr/bin/shp2pgsql" omitted_orgs = ['launcestoncitycouncil','gcc'] def email(subject, body): msg = MIMEText(body) msg["From"] = "datagovau@gmail.com" msg["To"] = email_addr msg["Subject"] = subject # Send the message via our own SMTP server, but don't include the # envelope header. #p = Popen(["/usr/sbin/sendmail", "-t"], stdin=PIPE) #p.communicate(msg.as_string()) s = smtplib.SMTP('smtp.gmail.com',587) s.ehlo() s.starttls() s.ehlo s.login('datagovau@gmail.com','3P4ELm9kjNAmKUL') s.sendmail(msg["From"], [msg["To"]], msg.as_string()) s.quit() def success(msg): print "Completed!" email("geodata success",msg) sys.exit(errno.EACCES) def failure(msg): print "ERROR -"+msg email("geodata error",str(sys.argv)+msg) sys.exit(errno.EACCES) def get_cursor(db_settings): # Connect to an existing database try: conn = psycopg2.connect(dbname=db_settings['dbname'], user=db_settings['user'], password=db_settings['password'], host=db_settings['host']) except: failure("I am unable to connect to the database.") # Open a cursor to perform database operations cur = conn.cursor() conn.set_isolation_level(0) # Execute a command: this creates a new table #cur.execute("create extension postgis") return (cur,conn) def psql_load(proc): (cur,conn) = get_cursor(db_settings) sql = "" for line in iter(proc.stdout.readline,''): sql += line if sql.endswith(';'): cur.execute(sql) sql = "" if sql != "": #print sql cur.execute(sql) cur.close() conn.close() if len(sys.argv) != 5: print "spatial ingester. command line: postgis_url api_url api_key dataset_id" sys.exit(errno.EACCES) else: (path, db_settings_json, api_url, api_key, dataset_id) = sys.argv db_settings = json.loads(db_settings_json) ckan = ckanapi.RemoteCKAN(address=api_url, apikey=api_key) dataset = ckan.action.package_show(id=dataset_id) print "loaded dataset"+dataset['name'] #pprint(dataset) if dataset['organization']['name'] in omitted_orgs: print(dataset['organization']['name'] + " in omitted_orgs") sys.exit(0); ows_resources = [] kml_resources = [] shp_resources = [] data_modified_date = None for resource in dataset['resources']: if "wms" in resource['format'] or "wfs" in resource['format']: if geoserver_addr not in resource['url'] : failure(dataset['id']+" already has geo api"); else: ows_resources += [resource] if "kml" in resource['format'] or "kmz" in resource['format']: data_modified_date = resource['last_modified'] kml_resources += [resource] if "shp" in resource['format']: data_modified_date = resource['last_modified'] shp_resources += [resource] if len(shp_resources) + len(kml_resources) == 0: print "No geodata format files detected" sys.exit(0); #if geoserver api link does not exist or api link is out of date with data, continue if len(ows_resources) > 0 and parser.parse(data_modified_date) <= wms_resources[0]['last_modified']: print "Already up to date" sys.exit(0); email("geodata processing started for "+dataset['id'], "") msg = dataset['id'] #download resource to tmpfile #check filesize limit (cur,conn) = get_cursor(db_settings) table_name = dataset['id'].replace("-","_") cur.execute('DROP TABLE IF EXISTS "'+table_name+'"') cur.close() conn.close() tempdir = tempfile.mkdtemp(dataset['id']) os.chdir(tempdir) print tempdir+" created" #load esri shapefiles if len(shp_resources) > 0: print "using SHP file "+shp_resources[0]['url'] (filepath,headers) = urllib.urlretrieve(shp_resources[0]['url'], "input.zip" ) print "shp downlaoded" with ZipFile(filepath, 'r') as myzip: myzip.extractall() print "shp unziped" shpfiles = glob.glob("*.[sS][hH][pP]") prjfiles = glob.glob("*.[pP][rR][jJ]") if len(shpfiles) == 0: failure("no shp files found in zip "+shp_resources[0]['url']) print "converting to pgsql "+table_name+" "+shpfiles[0] process = Popen([shp2pgsql,shpfiles[0], table_name], stdout=PIPE, stderr=PIPE) psql_load(process) if len(prjfiles) > 0: nativeCRS = open(prjfiles[0], 'r').read() else: print "using KML file "+kml_resources[0]['url'] nativeCRS = None #if kml ogr2ogr http://gis.stackexchange.com/questions/33102/how-to-import-kml-file-with-custom-data-to-postgres-postgis-database if kml_resources[0]['format'] == "kmz": (filepath,headers) = urllib.urlretrieve(kml_resources[0]['url'], "input.zip" ) with ZipFile(filepath, 'r') as myzip: myzip.extractall() print "kmz unziped" kmlfiles = glob.glob("*.[kK][mM][lL]") if len(kmlfiles) == 0: failure("no kml files found in zip "+kml_resources[0]['url']) else: kml_file = kmlfiles[0] else: (filepath,headers) = urllib.urlretrieve(kml_resources[0]['url'], "input.kml") kml_file = "input.kml" print "changing kml folder name" tree = et.parse(kml_file) element = tree.xpath('//kml:Folder/kml:name', namespaces={'kml': "http://www.opengis.net/kml/2.2"}) element[0].text = table_name with open(table_name+".kml", "w") as ofile: ofile.write(et.tostring(tree)) print "converting to pgsql "+table_name+".kml" pargs = ['ogr2ogr','-f','PostgreSQL',"--config" ,"PG_USE_COPY","YES",'PG:dbname=\''+ db_settings['dbname']+'\' host=\''+db_settings['host']+'\' user=\''+db_settings['user']+ '\' password=\''+db_settings['password']+'\'' ,table_name+".kml",'-lco','GEOMETRY_NAME=geom'] pprint(pargs) p = Popen(pargs)#, stdout=PIPE, stderr=PIPE) p.communicate() #load bounding boxes (cur,conn) = get_cursor(db_settings) cur.execute('SELECT ST_Extent(geom) as box,ST_AsGeoJSON(ST_Extent(geom)) as geojson from "'+table_name+'"') (bbox,bgjson) = cur.fetchone() cur.close() conn.close() print bbox #create geoserver dataset http://boundlessgeo.com/2012/10/adding-layers-to-geoserver-using-the-rest-api/ # name workspace after dataset workspace = dataset['name'] ws = requests.post(geoserver_addr+'rest/workspaces', data=json.dumps({'workspace': {'name': workspace} }), headers={'Content-type': 'application/json'}, auth=(geoserver_user, geoserver_passwd)) pprint(ws) #echo ws.status_code #echo ws.text datastore = dataset['name']+'ds' dsdata =json.dumps({'dataStore':{'name':datastore, 'connectionParameters' : { 'host':db_settings['host'], 'port':5432, 'database': db_settings['dbname'], 'schema':'public', 'user':db_settings['user'] + "_data", #use read only user 'passwd':db_settings['password'], 'dbtype':'postgis' }}}) print dsdata r = requests.post(geoserver_addr+'rest/workspaces/'+workspace+'/datastores', data=dsdata, headers={'Content-type': 'application/json'}, auth=(geoserver_user, geoserver_passwd)) pprint(r) #echo r.status_code #echo r.text # name layer after resource title ftdata = {'featureType':{'name':table_name, 'title': dataset['title']}} (minx,miny, maxx, maxy) = bbox.replace("BOX","").replace("(","").replace(")","").replace(","," ").split(" ") bbox_obj = { 'minx': minx,'maxx': maxx,'miny': miny,'maxy': maxy } if nativeCRS != None: ftdata['featureType']['nativeCRS'] = nativeCRS else: ftdata['featureType']['nativeBoundingBox'] = bbox_obj ftdata['featureType']['latLonBoundingBox'] = bbox_obj ftdata['featureType']['srs'] = "EPSG:4326" ftdata = json.dumps(ftdata) print geoserver_addr+'rest/workspaces/'+workspace+'/datastores/'+datastore+"/featuretypes" print ftdata r = requests.post(geoserver_addr+'rest/workspaces/'+workspace+'/datastores/'+datastore+"/featuretypes", data= ftdata, headers={'Content-Type': 'application/json'}, auth=(geoserver_user, geoserver_passwd)) pprint(r) #generate wms/wfs api links, kml, png resources and add to package print bgjson dataset['spatial'] = bgjson #TODO update or append ws_addr = geoserver_addr+dataset['name']+"/" for format in ['image/png','kml']: url = ws_addr+"wms?request=GetMap&layers="+table_name+"&bbox="+bbox_obj['minx']+","+bbox_obj['miny']+","+bbox_obj['maxx']+","+bbox_obj['maxy']+"&width=512&height=512&format="+urllib.quote(format) if format == "image/png": dataset['resources'].append({"name":dataset['title'] + " Preview Image","description":"View overview image of this dataset" ,"format":format,"url":url, "last_modified": datetime.now().isoformat()}) if format == "kml": dataset['resources'].append({"name":dataset['title'] + " KML","description":"For use in web and desktop spatial data tools including Google Earth" ,"format":format,"url":url, "last_modified": datetime.now().isoformat()}) for format in ['csv','json']: url = ws_addr+"wfs?request=GetFeature&typeName="+table_name+"&outputFormat="+urllib.quote(format) if format == "csv": dataset['resources'].append({"name": dataset['title'] + " CSV","description":"For summary of the objects/data in this collection","format":format,"url":url, "last_modified": datetime.now().isoformat()}) if format == "json": dataset['resources'].append({"name":dataset['title'] + " GeoJSON","description":"For use in web-based data visualisation of this collection","format":format,"url":url, "last_modified": datetime.now().isoformat()}) dataset['resources'].append({"name":dataset['title'] + " - Preview this Dataset (WMS)","description":"View the data in this datasets online via web-based WMS viewer","format":"wms", "url":ws_addr+"wms?request=GetCapabilities", "last_modified": datetime.now().isoformat()}) dataset['resources'].append({"name":dataset['title'] + " WFS Link","description":"WFS Link for use of live data in Desktop GIS tools","format":"wfs", "url":ws_addr+"wfs?request=GetCapabilities", "last_modified": datetime.now().isoformat()}) pprint(dataset) #ckan.action.package_update(id=dataset['id'],spatial=dataset['spatial'],resources=dataset['resources']) #delete tempdir shutil.rmtree(tempdir) success(msg) |