initial commit
initial commit

file:b/README.md (new)
  ===================================
  dga-spatialingestor
  ===================================
 
  Provides ingestion capability for data.gov.au that can be triggered on dataset update with https://github.com/datagovuk/ckanext-os
 
 
  CKAN Configuration:
 
  ckan.plugins = os_wfs_server
 
  ckanext-os.spatial-datastore.jdbc.url = {"dbname":"geodatastore", "user":"user", "password":"password", "host":"localhost"}
 
  ckanext-os.spatial-ingester.filepath = /home/co/pyenv_dgu/src/os-spatialingester/spatial.ingester
 
  Creating the PostGIS database:
 
  owner=user
 
  db=geodatastore
 
  sudo -u postgres createdb -E UTF8 -O $owner $db
 
  sudo -u postgres psql $db -c "CREATE EXTENSION postgis;"
 
  sudo -u postgres psql $db -c "ALTER TABLE geometry_columns OWNER TO $owner; ALTER TABLE spatial_ref_sys OWNER TO $owner;"
 
  INSERT into spatial_ref_sys (srid, auth_name, auth_srid, proj4text, srtext) values ( 96643, 'sr-org', 6643, '', 'PROJCS["Albers134",GEOGCS["GCS_GDA_1994",DATUM["D_GDA_1994",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.017453292519943295]],PROJECTION["Albers"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",134.0],PARAMETER["Standard_Parallel_1",-18.0],PARAMETER["Standard_Parallel_2",-36.0],PARAMETER["Latitude_Of_Origin",0.0],UNIT["Meter",1.0]]');
 
  import ckanapi #https://github.com/open-data/ckanapi
  import errno, os, shutil, sys, glob
  from pprint import pprint
  from email.mime.text import MIMEText
  from subprocess import Popen, PIPE
  import tempfile
  from zipfile import ZipFile
  import urllib
  import fileinput
  import json
  import psycopg2
  import requests
 
  geoserver_addr = "http://localhost:8080/geoserver/"
  geoserver_user = "admin"
  geoserver_passwd = "geoserver"
  email_addr = "maxious@lambdacomplex.org"
  shp2pgsql = "/usr/lib/postgresql/9.2/bin/shp2pgsql"
  omitted_orgs = []
 
  def email(subject, body):
  msg = MIMEText(body)
  msg["From"] = "ckan@localhost"
  msg["To"] = email_addr
  msg["Subject"] = subject
  # Send the message via our own SMTP server, but don't include the
  # envelope header.
  s = smtplib.SMTP('localhost')
  s.sendmail(msg["From"], [msg["To"]], msg.as_string())
  s.quit()
 
  def success(msg):
  print "Completed!"
  # email("geodata success",msg)
  sys.exit(errno.EACCES)
 
  def failure(msg):
  print "ERROR -"+msg
  # email("geodata error",str(sys.argv)+msg)
  sys.exit(errno.EACCES)
 
  def get_cursor(db_settings):
 
  # Connect to an existing database
  try:
  conn = psycopg2.connect(dbname=db_settings['dbname'], user=db_settings['user'], password=db_settings['password'], host=db_settings['host'])
  except:
  failure("I am unable to connect to the database.")
  # Open a cursor to perform database operations
  cur = conn.cursor()
  conn.set_isolation_level(0)
  # Execute a command: this creates a new table
  #cur.execute("create extension postgis")
  return (cur,conn)
 
  def psql_load(proc):
  (cur,conn) = get_cursor(db_settings)
  sql = ""
  for line in iter(proc.stdout.readline,''):
  sql += line
  if sql.endswith(';'):
  cur.execute(sql)
  sql = ""
  if sql != "":
  cur.execute(sql)
  cur.close()
  conn.close()
 
  if len(sys.argv) != 5:
  print "spatial ingester. command line: postgis_url api_url api_key dataset_id"
  sys.exit(errno.EACCES)
  else:
  (path, db_settings_json, api_url, api_key, dataset_id) = sys.argv
  db_settings = json.loads(db_settings_json)
 
  ckan = ckanapi.RemoteCKAN(address=api_url, apikey=api_key)
  dataset = ckan.action.package_show(id=dataset_id)
  print "loaded dataset"+dataset['name']
  #pprint(dataset)
  if dataset['owner_org'] in omitted_orgs:
  print(dataset.owner_org + " in omitted_orgs")
  sys.exit(0);
 
  ows_resources = []
  kml_resources = []
  shp_resources = []
  data_modified_date = None
  for resource in dataset['resources']:
  if "wms" in resource['format'] or "wfs" in resource['format']:
  if geoserver_addr not in resource['url'] :
  failure(dataset['id']+" already has geo api");
  else:
  ows_resources += [resource]
 
  if "kml" in resource['format']:
  kml_resources += [resource]
  if "shp" in resource['format']:
  shp_resources += [resource]
 
  if len(shp_resources) == 0:
  print "No geodata format files detected"
  sys.exit(0);
 
  #if geoserver api link does not exist or api link is out of date with data, continue
  if len(ows_resources) > 0 and data_modified_date <= wms_resources[0]['last_modified']:
  print "Already up to date"
  sys.exit(0);
 
  #email("geodata processing started for "+dataset['id'], str(sys.argv))
  msg = ""
  #download resource to tmpfile
 
  #check filesize limit
 
  (cur,conn) = get_cursor(db_settings)
  table_name = dataset['id'].replace("-","_")
  cur.execute("DROP TABLE IF EXISTS "+table_name)
  cur.close()
  conn.close()
 
  tempdir = tempfile.mkdtemp(dataset['id'])
  os.chdir(tempdir)
  print tempdir+" created"
  #load esri shapefiles
  if len(shp_resources) > 0:
  print "using SHP file "+shp_resources[0]['url']
  (filepath,headers) = urllib.urlretrieve(shp_resources[0]['url'], "input.zip" )
  print "shp downlaoded"
  with ZipFile(filepath, 'r') as myzip:
  myzip.extractall()
  print "shp unziped"
  shpfiles = glob.glob("*.[sS][hH][pP]")
  prjfiles = glob.glob("*.[pP][rR][jJ]")
  if len(shpfiles) == 0:
  failure("no shp files found in zip "+shp_resources[0]['url'])
  print "converting to pgsql "+shpfiles[0]
  process = Popen([shp2pgsql,shpfiles[0], table_name], stdout=PIPE, stderr=PIPE)
  psql_load(process)
  if len(prjfiles) > 0:
  nativeCRS = open(prjfiles[0], 'r').read()
  #else:
  # print "using KML file "+kml_resources[0]['url']
  # #if kml ogr2ogr http://gis.stackexchange.com/questions/33102/how-to-import-kml-file-with-custom-data-to-postgres-postgis-database
  # (filepath,headers) = urllib.urlretrieve(kml_resources[0]['url'], "input.kml")
 
 
  #load bounding boxes
  (cur,conn) = get_cursor(db_settings)
  cur.execute('SELECT ST_Extent(geom) as box,ST_AsGeoJSON(ST_Extent(geom)) as geojson from '+table_name)
  (bbox,bgjson) = cur.fetchone()
  cur.close()
  conn.close()
  print bbox
 
 
  #create geoserver dataset http://boundlessgeo.com/2012/10/adding-layers-to-geoserver-using-the-rest-api/
  # name workspace after dataset
  workspace = dataset['name']
  ws = requests.post(geoserver_addr+'rest/workspaces', data=json.dumps({'workspace': {'name': workspace} }), headers={'Content-type': 'application/json'}, auth=('admin', 'geoserver'))
  pprint(ws)
  #echo ws.status_code
  #echo ws.text
 
  datastore = dataset['name']+'ds'
  dsdata =json.dumps({'dataStore':{'name':datastore,
  'connectionParameters' : {
  'host':db_settings['host'],
  'port':5432,
  'database': db_settings['dbname'],
  'schema':'public',
  'user':db_settings['user'],
  'passwd':db_settings['password'],
  'dbtype':'postgis'
 
  }}})
  #print dsdata
  r = requests.post(geoserver_addr+'rest/workspaces/'+workspace+'/datastores', data=dsdata, headers={'Content-type': 'application/json'}, auth=('admin', 'geoserver'))
  pprint(r)
  #echo r.status_code
  #echo r.text
 
  # name layer after resource title
  ftdata = {'featureType':{'name':table_name, 'title': resource['name']}}
  if nativeCRS != None:
  ftdata['featureType']['nativeCRS'] = nativeCRS
  else:
  (minx,miny, maxx, maxy) = bbox.replace("BOX","").replace("(","").replace(")","").replace(","," ").split(" ")
  bbox_obj = { 'minx': minx,'maxx': maxx,'miny': miny,'maxy': maxy }
  ftdata['featureType']['nativeBoundingBox'] = bbox_obj
  ftdata['featureType']['latLonBoundingBox'] = bbox_obj
  ftdata['featureType']['srs'] = "EPSG:4326"
  ftdata = json.dumps(ftdata)
  r = requests.post(geoserver_addr+'rest/workspaces/'+workspace+'/datastores/'+datastore+"/featuretypes", data= ftdata, headers={'Content-Type': 'application/json'}, auth=('admin', 'geoserver'))
  pprint(r)
 
  #generate wms/wfs api links, kml, png resources and add to package
  print bgjson
  dataset['spatial'] = bgjson
  # ckan.action.resource_update(id=resource['id'],url=resource['url'],name=resource['name'], last_modified=datetime.now().isoformat())
  #/geodatasetws/wms
  #/geodatasetws/wfs
  #http://cloudnine.lambdacomplex.org:8080/geoserver/tiger/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=tiger:tiger_roads&outputFormat=application/json
  #http://cloudnine.lambdacomplex.org:8080/geoserver/tiger/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=tiger:tiger_roads&outputFormat=csv
 
  #delete tempdir
  shutil.rmtree(tempdir)
  success(msg)