autoupdate
autoupdate

  #!/usr/bin/python
  # coding=utf-8
  '''
  auto update batch job for ckan
  <alex.sadleir@linkdigital.com.au>
  1.0 26/05/2014 initial implementation
   
  TODO
  - archive files in filestore
  - update frequency reduced based on dataset metadata
  - emails on HTTP errors
  '''
   
import requests import requests
import ckanapi import ckanapi
# copy (select id,url,format,extras from resource where extras like '%"autoupdate": "active"%') TO STDOUT WITH CSV;  
# f759e4b6-723c-4863-8a26-1529d689cad8,http://data.gov.au/geoserver/geelong-roofprints-kml/wms?request=GetCapabilities,wms,"{""autoupdate"": ""active""}"  
import fileinput  
import csv import csv
  import sys
  import psycopg2
  import json
  from subprocess import Popen, PIPE
   
def updateresource(resource_id): def updateresource(resource_id, dataset_id):
url = 'http://data.disclosurelo.gs' print ' --- '
api_key = '' ckan = ckanapi.RemoteCKAN(api_url,api_key)
db_credentials = ''  
print resource_id  
ckan = ckanapi.RemoteCKAN('http://data.disclosurelo.gs')  
#ckan = ckanapi.RemoteCKAN('http://demo.ckan.org') #ckan = ckanapi.RemoteCKAN('http://demo.ckan.org')
resource = ckan.action.resource_show(id=resource_id) resource = ckan.action.resource_show(id=resource_id)
  print 'updating '+resource['name']+'('+resource_id+', '+dataset_id+')'
print resource print resource
url = resource['url'] url = resource['url']
#last_modified= 'Mon, 24 Feb 2014 01:48:29 GMT' #last_modified= 'Mon, 24 Feb 2014 01:48:29 GMT'
#etag='"1393206509.38-638"' #etag='"1393206509.38-638"'
headers={} headers={}
if 'etag' in resource: if 'etag' in resource:
headers['If-None-Match'] = resource['etag'] headers['If-None-Match'] = resource['etag']
if 'file_last_modified' in resource: if 'file_last_modified' in resource:
headers["If-Modified-Since"] = resource['file_last_modified'] headers["If-Modified-Since"] = resource['file_last_modified']
  print headers
r = requests.head(url, headers=headers) r = requests.head(url, headers=headers)
if r.status_code == 304: if r.status_code == 304:
print 'not modified' print 'not modified'
return return
else: else:
print r.status_code print r.status_code
print r.headers print r.headers
if 'last-modified' in r.headers: if 'last-modified' in r.headers:
resource['file_last_modified'] = r.headers['last-modified'] resource['file_last_modified'] = r.headers['last-modified']
if 'etag' in r.headers: if 'etag' in r.headers:
resource['etag'] = r.headers['etag'] resource['etag'] = r.headers['etag']
#save updated resource #save updated resource
# result = ckan.action.resource_update(id,resource) print resource
if 'format' == 'shp': result = ckan.call_action('resource_update',resource)
print "geoingest!" db_settings_json, api_url, api_key, resource['dataset_id'] if resource['format'].lower() == 'shp' or resource['format'].lower() == 'kml':
  print "geoingest!"
  pargs= ['dga-spatialingestor.py', db_settings_json, api_url, api_key, dataset_id]
  print pargs
  p = Popen(pargs)#, stdout=PIPE, stderr=PIPE)
  p.communicate()
else: else:
ckan.action.datapusher_submit(resource_id=id) print "datapusher!"
  # https://github.com/ckan/ckan/blob/master/ckanext/datapusher/logic/action.py#L19
  ckan.action.datapusher_submit(resource_id=resource_id)
if len(sys.argv) != 5: if len(sys.argv) != 4:
print "spatial ingester. command line: postgis_url api_url api_key" print "autoupdate ingester. command line: postgis_url api_url api_key"
sys.exit(errno.EACCES) sys.exit(-1)
else: else:
(path, db_settings_json, api_url, api_key) = sys.argv (path, db_settings_json, api_url, api_key) = sys.argv
db_settings = json.loads(db_settings_json) db_settings = json.loads(db_settings_json)
  datastore_db_settings = dict(db_settings)
  datastore_db_settings['dbname'] = db_settings['datastore_dbname']
  datastore_db_settings_json = json.dumps(datastore_db_settings)
   
for line in fileinput.input(): #until https://github.com/ckan/ckan/pull/1732 is merged, use database directly
row = csv.reader(line)  
updateresource(line.replace("\n",""))  
   
  try:
  conn = psycopg2.connect(dbname=db_settings['dbname'], user=db_settings['user'], password=db_settings['password'], host=db_settings['host'])
  except:
  failure("I am unable to connect to the database.")
  # Open a cursor to perform database operations
  cur = conn.cursor()
  conn.set_isolation_level(0)
  cur.execute('select resource.id resource_id, package.id dataset_id from resource inner join resource_group on resource.resource_group_id = resource_group.id inner join package on resource_group.package_id = package.id where resource.extras like \'%"autoupdate": "active"%\';')
  row = cur.fetchone()
  while row is not None:
  updateresource(row[0],row[1])
  # process
  row = cur.fetchone()
  cur.close()
  conn.close()
   
{% ckan_extends %} {% ckan_extends %}
   
{% block basic_fields_url %} {% block basic_fields_url %}
{% set is_upload = (data.url_type == 'upload') %} {% set is_upload = (data.url_type == 'upload') %}
{% set field_url='url' %} {% set field_url='url' %}
{% set field_upload='upload' %} {% set field_upload='upload' %}
{% set field_clear='clear_upload' %} {% set field_clear='clear_upload' %}
{% set is_upload_enabled=h.uploads_enabled() %} {% set is_upload_enabled=h.uploads_enabled() %}
{% set is_url=data.url and not is_upload %} {% set is_url=data.url and not is_upload %}
{% set upload_label=_('File') %} {% set upload_label=_('File') %}
{% set url_label=_('URL') %} {% set url_label=_('URL') %}
   
{% set placeholder = placeholder if placeholder else _('http://example.com/my-image.jpg') %} {% set placeholder = placeholder if placeholder else _('http://example.com/my-image.jpg') %}
{% set url_label = url_label or _('Image URL') %} {% set url_label = url_label or _('Image URL') %}
{% set upload_label = upload_label or _('Image') %} {% set upload_label = upload_label or _('Image') %}
   
{% if is_upload_enabled %} {% if is_upload_enabled %}
<div class="image-upload" data-module="image-upload" data-module-is_url="{{ 'true' if is_url else 'false' }}" data-module-is_upload="{{ 'true' if is_upload else 'false' }}" <div class="image-upload" data-module="image-upload" data-module-is_url="{{ 'true' if is_url else 'false' }}" data-module-is_upload="{{ 'true' if is_upload else 'false' }}"
data-module-field_url="{{ field_url }}" data-module-field_upload="{{ field_upload }}" data-module-field_clear="{{ field_clear }}" data-module-upload_label="{{ upload_label }}"> data-module-field_url="{{ field_url }}" data-module-field_upload="{{ field_upload }}" data-module-field_clear="{{ field_clear }}" data-module-upload_label="{{ upload_label }}">
{% endif %} {% endif %}
   
{% call form.input(field_url, label=url_label, id='field-image-url', placeholder=placeholder, value=data.get(field_url), error=errors.get(field_url), classes=['control-full']) %} {% call form.input(field_url, label=url_label, id='field-image-url', placeholder=placeholder, value=data.get(field_url), error=errors.get(field_url), classes=['control-full']) %}
<span id="autoupdate_form"> <span id="autoupdate_form">
{% call form.select('autoupdate', label=_('Generate API from this Link'), options= [{'value': 'active', 'text': 'Active'}, {'value': 'inactive', 'text': 'Inactive'}], selected='Inactive', error=errors.autoupdate) %} {% call form.select('autoupdate', label=_('Generate API from this Link'), options= [{'value': 'active', 'text': 'Active'}, {'value': 'inactive', 'text': 'Inactive'}], selected='Active', error=errors.autoupdate) %}
<br/> <br/>
Where a file is compatible with either CKAN or GeoServer we will attempt to make a functional end-point for this resource. The link provided above will also be checked for a new version based on the update frequency as set at the dataset level. Where a file is compatible with either CKAN or GeoServer we will attempt to make a functional end-point for this resource. The link provided above will also be checked for a new version based on the update frequency as set at the dataset level.
</span> </span>
{% endcall %} {% endcall %}
{% endcall %} {% endcall %}
   
{% if is_upload_enabled %} {% if is_upload_enabled %}
{{ form.input(field_upload, label=upload_label, id='field-image-upload', type='file', placeholder='', value='', error='', classes=['control-full']) }} {{ form.input(field_upload, label=upload_label, id='field-image-upload', type='file', placeholder='', value='', error='', classes=['control-full']) }}
{% if is_upload %} {% if is_upload %}
{{form.checkbox(field_clear, label=_('Clear Upload'), id='field-clear-upload', value='true', error='', classes=['control-full']) }} {{form.checkbox(field_clear, label=_('Clear Upload'), id='field-clear-upload', value='true', error='', classes=['control-full']) }}
{% endif %} {% endif %}
{% endif %} {% endif %}
   
{% if is_upload_enabled %}</div>{% endif %} {% if is_upload_enabled %}</div>{% endif %}
   
   
{% endblock %} {% endblock %}