#!/usr/bin/python | |
# coding=utf-8 | |
''' | |
auto update batch job for ckan | |
<alex.sadleir@linkdigital.com.au> | |
1.0 26/05/2014 initial implementation | |
TODO | |
- archive files in filestore | |
- update frequency reduced based on dataset metadata | |
- emails on HTTP errors | |
''' | |
import requests | import requests |
import ckanapi | import ckanapi |
import csv | import csv |
import sys | import sys |
import psycopg2 | import psycopg2 |
import json | import json |
from subprocess import Popen, PIPE | from subprocess import Popen, PIPE |
def updateresource(resource_id, dataset_id): | def updateresource(resource_id, dataset_id): |
print ' --- ' | print ' --- ' |
ckan = ckanapi.RemoteCKAN(api_url,api_key) | ckan = ckanapi.RemoteCKAN(api_url,api_key) |
#ckan = ckanapi.RemoteCKAN('http://demo.ckan.org') | #ckan = ckanapi.RemoteCKAN('http://demo.ckan.org') |
resource = ckan.action.resource_show(id=resource_id) | resource = ckan.action.resource_show(id=resource_id) |
print 'updating '+resource['name']+'('+resource_id+', '+dataset_id+')' | print 'updating '+resource['name']+'('+resource_id+', '+dataset_id+')' |
print resource | print resource |
url = resource['url'] | url = resource['url'] |
#last_modified= 'Mon, 24 Feb 2014 01:48:29 GMT' | #last_modified= 'Mon, 24 Feb 2014 01:48:29 GMT' |
#etag='"1393206509.38-638"' | #etag='"1393206509.38-638"' |
headers={} | headers={} |
if 'etag' in resource: | if 'etag' in resource: |
headers['If-None-Match'] = resource['etag'] | headers['If-None-Match'] = resource['etag'] |
if 'file_last_modified' in resource: | if 'file_last_modified' in resource: |
headers["If-Modified-Since"] = resource['file_last_modified'] | headers["If-Modified-Since"] = resource['file_last_modified'] |
print headers | print headers |
r = requests.head(url, headers=headers) | r = requests.head(url, headers=headers) |
if r.status_code == 304: | if r.status_code == 304: |
print 'not modified' | print 'not modified' |
return | return |
else: | else: |
print r.status_code | print r.status_code |
print r.headers | print r.headers |
if 'last-modified' in r.headers: | if 'last-modified' in r.headers: |
resource['file_last_modified'] = r.headers['last-modified'] | resource['file_last_modified'] = r.headers['last-modified'] |
if 'etag' in r.headers: | if 'etag' in r.headers: |
resource['etag'] = r.headers['etag'] | resource['etag'] = r.headers['etag'] |
#save updated resource | #save updated resource |
print resource | print resource |
result = ckan.call_action('resource_update',resource) | result = ckan.call_action('resource_update',resource) |
if resource['format'].lower() == 'shp' or resource['format'].lower() == 'kml': | if resource['format'].lower() == 'shp' or resource['format'].lower() == 'kml': |
print "geoingest!" | print "geoingest!" |
pargs= ['dga-spatialingestor.py', db_settings_json, api_url, api_key, dataset_id] | pargs= ['dga-spatialingestor.py', db_settings_json, api_url, api_key, dataset_id] |
print pargs | print pargs |
p = Popen(pargs)#, stdout=PIPE, stderr=PIPE) | p = Popen(pargs)#, stdout=PIPE, stderr=PIPE) |
p.communicate() | p.communicate() |
else: | else: |
print "datapusher!" | print "datapusher!" |
# https://github.com/ckan/ckan/blob/master/ckanext/datapusher/logic/action.py#L19 | # https://github.com/ckan/ckan/blob/master/ckanext/datapusher/logic/action.py#L19 |
ckan.action.datapusher_submit(resource_id=resource_id) | ckan.action.datapusher_submit(resource_id=resource_id) |
if len(sys.argv) != 4: | if len(sys.argv) != 4: |
print "autoupdate ingester. command line: postgis_url api_url api_key" | print "autoupdate ingester. command line: postgis_url api_url api_key" |
sys.exit(-1) | sys.exit(-1) |
else: | else: |
(path, db_settings_json, api_url, api_key) = sys.argv | (path, db_settings_json, api_url, api_key) = sys.argv |
db_settings = json.loads(db_settings_json) | db_settings = json.loads(db_settings_json) |
datastore_db_settings = dict(db_settings) | datastore_db_settings = dict(db_settings) |
datastore_db_settings['dbname'] = db_settings['datastore_dbname'] | datastore_db_settings['dbname'] = db_settings['datastore_dbname'] |
datastore_db_settings_json = json.dumps(datastore_db_settings) | datastore_db_settings_json = json.dumps(datastore_db_settings) |
#until https://github.com/ckan/ckan/pull/1732 is merged, use database directly | |
try: | try: |
conn = psycopg2.connect(dbname=db_settings['dbname'], user=db_settings['user'], password=db_settings['password'], host=db_settings['host']) | conn = psycopg2.connect(dbname=db_settings['dbname'], user=db_settings['user'], password=db_settings['password'], host=db_settings['host']) |
except: | except: |
failure("I am unable to connect to the database.") | failure("I am unable to connect to the database.") |
# Open a cursor to perform database operations | # Open a cursor to perform database operations |
cur = conn.cursor() | cur = conn.cursor() |
conn.set_isolation_level(0) | conn.set_isolation_level(0) |
cur.execute('select resource.id resource_id, package.id dataset_id from resource inner join resource_group on resource.resource_group_id = resource_group.id inner join package on resource_group.package_id = package.id where resource.extras like \'%"autoupdate": "active"%\';') | cur.execute('select resource.id resource_id, package.id dataset_id from resource inner join resource_group on resource.resource_group_id = resource_group.id inner join package on resource_group.package_id = package.id where resource.extras like \'%"autoupdate": "active"%\';') |
row = cur.fetchone() | row = cur.fetchone() |
while row is not None: | while row is not None: |
updateresource(row[0],row[1]) | updateresource(row[0],row[1]) |
# process | # process |
row = cur.fetchone() | row = cur.fetchone() |
cur.close() | cur.close() |
conn.close() | conn.close() |
{% ckan_extends %} | {% ckan_extends %} |
{% block header_account_logged %} | |
{% if c.userobj.sysadmin %} | |
<li> | |
<a href="{{ h.url_for(controller='admin', action='index') }}" title="{{ _('Sysadmin settings') }}"> | |
<i class="icon-legal"> Sysadmin settings</i> | |
</a> | |
</li> | |
{% endif %} | |
<li> | |
<a href="{{ h.url_for(controller='user', action='read', id=c.userobj.name) }}" class="image" title="{{ _('View profile') }}"> | |
{{ h.gravatar((c.userobj.email_hash if c and c.userobj else ''), size=22) }} | |
<span class="username">{{ c.userobj.display_name }}</span> | |
</a> | |
</li> | |
{% set new_activities = h.new_activities() %} | |
<li class="notifications {% if new_activities > 0 %}notifications-important{% endif %}"> | |
{% set notifications_tooltip = ngettext('Dashboard (%(num)d new item)', 'Dashboard (%(num)d new items)', new_activities) %} | |
<a href="{{ h.url_for(controller='user', action='dashboard') }}" title="{{ notifications_tooltip }}"> | |
<i class="icon-dashboard"> notifications_tooltip</i> | |
<span>{{ new_activities }}</span> | |
</a> | |
</li> | |
{% block header_account_settings_link %} | |
<li> | |
<a href="{{ h.url_for(controller='user', action='edit', id=c.userobj.name) }}" title="{{ _('Edit settings') }}"> | |
<i class="icon-cog"> Edit settings</i> | |
</a> | |
</li> | |
{% endblock %} | |
{% block header_account_log_out_link %} | |
<li> | |
<a href="{{ h.url_for('/user/_logout') }}" title="{{ _('Log out') }}"> | |
<i class="icon-signout"> Log out</i> | |
</a> | |
</li> | |
{% endblock %} | |
{% block header_site_navigation %} | {% block header_site_navigation %} |
<nav class="section navigation"> | <nav class="section navigation"> |
<ul class="nav nav-pills"> | <ul class="nav nav-pills"> |
{% block header_site_navigation_tabs %} | {% block header_site_navigation_tabs %} |
{{ h.build_nav_main( | {{ h.build_nav_main( |
('search', _('Datasets')), | ('search', _('Datasets')), |
('organizations_index', _('Organizations')), | ('organizations_index', _('Organizations')), |
('about', _('About')), | ('about', _('About')), |
('stats', _('Site Statistics')) | ('stats', _('Site Statistics')) |
) }} | ) }} |
<li><a href="/related">Use Cases</a></li> | <li><a href="/related">Use Cases</a></li> |
<li><a href="https://datagovau.ideascale.com/">Feedback/Request Data</a></li> | <li><a href="https://datagovau.ideascale.com/">Feedback/Request Data</a></li> |
{% endblock %} | {% endblock %} |
</ul> | </ul> |
</nav> | </nav> |
{% endblock %} | {% endblock %} |
{% ckan_extends %} | {% ckan_extends %} |
{% block basic_fields_url %} | {% block basic_fields_url %} |
{% set is_upload = (data.url_type == 'upload') %} | {% set is_upload = (data.url_type == 'upload') %} |
{% set field_url='url' %} | {% set field_url='url' %} |
{% set field_upload='upload' %} | {% set field_upload='upload' %} |
{% set field_clear='clear_upload' %} | {% set field_clear='clear_upload' %} |
{% set is_upload_enabled=h.uploads_enabled() %} | {% set is_upload_enabled=h.uploads_enabled() %} |
{% set is_url=data.url and not is_upload %} | {% set is_url=data.url and not is_upload %} |
{% set upload_label=_('File') %} | {% set upload_label=_('File') %} |
{% set url_label=_('URL') %} | {% set url_label=_('URL') %} |
{% set placeholder = placeholder if placeholder else _('http://example.com/my-image.jpg') %} | {% set placeholder = placeholder if placeholder else _('http://example.com/my-image.jpg') %} |
{% set url_label = url_label or _('Image URL') %} | {% set url_label = url_label or _('Image URL') %} |
{% set upload_label = upload_label or _('Image') %} | {% set upload_label = upload_label or _('Image') %} |
{% if is_upload_enabled %} | {% if is_upload_enabled %} |
<div class="image-upload" data-module="image-upload" data-module-is_url="{{ 'true' if is_url else 'false' }}" data-module-is_upload="{{ 'true' if is_upload else 'false' }}" | <div class="image-upload" data-module="image-upload" data-module-is_url="{{ 'true' if is_url else 'false' }}" data-module-is_upload="{{ 'true' if is_upload else 'false' }}" |
data-module-field_url="{{ field_url }}" data-module-field_upload="{{ field_upload }}" data-module-field_clear="{{ field_clear }}" data-module-upload_label="{{ upload_label }}"> | data-module-field_url="{{ field_url }}" data-module-field_upload="{{ field_upload }}" data-module-field_clear="{{ field_clear }}" data-module-upload_label="{{ upload_label }}"> |
{% endif %} | {% endif %} |
{% call form.input(field_url, label=url_label, id='field-image-url', placeholder=placeholder, value=data.get(field_url), error=errors.get(field_url), classes=['control-full']) %} | {% call form.input(field_url, label=url_label, id='field-image-url', placeholder=placeholder, value=data.get(field_url), error=errors.get(field_url), classes=['control-full']) %} |
<span id="autoupdate_form"> | <span id="autoupdate_form"> |
{% call form.select('autoupdate', label=_('Generate API from this Link'), options= [{'value': 'active', 'text': 'Active'}, {'value': 'inactive', 'text': 'Inactive'}], selected='Inactive', error=errors.autoupdate) %} | {% call form.select('autoupdate', label=_('Generate API from this Link'), options= [{'value': 'active', 'text': 'Active'}, {'value': 'inactive', 'text': 'Inactive'}], selected='Active', error=errors.autoupdate) %} |
<br/> | <br/> |
Where a file is compatible with either CKAN or GeoServer we will attempt to make a functional end-point for this resource. The link provided above will also be checked for a new version based on the update frequency as set at the dataset level. | Where a file is compatible with either CKAN or GeoServer we will attempt to make a functional end-point for this resource. The link provided above will also be checked for a new version based on the update frequency as set at the dataset level. |
</span> | </span> |
{% endcall %} | {% endcall %} |
{% endcall %} | {% endcall %} |
{% if is_upload_enabled %} | {% if is_upload_enabled %} |
{{ form.input(field_upload, label=upload_label, id='field-image-upload', type='file', placeholder='', value='', error='', classes=['control-full']) }} | {{ form.input(field_upload, label=upload_label, id='field-image-upload', type='file', placeholder='', value='', error='', classes=['control-full']) }} |
{% if is_upload %} | {% if is_upload %} |
{{form.checkbox(field_clear, label=_('Clear Upload'), id='field-clear-upload', value='true', error='', classes=['control-full']) }} | {{form.checkbox(field_clear, label=_('Clear Upload'), id='field-clear-upload', value='true', error='', classes=['control-full']) }} |
{% endif %} | {% endif %} |
{% endif %} | {% endif %} |
{% if is_upload_enabled %}</div>{% endif %} | {% if is_upload_enabled %}</div>{% endif %} |
{% endblock %} | {% endblock %} |