#!/usr/bin/python | |
# coding=utf-8 | |
''' | |
auto update batch job for ckan | |
<alex.sadleir@linkdigital.com.au> | |
1.0 26/05/2014 initial implementation | |
TODO | |
- archive files in filestore | |
- update frequency reduced based on dataset metadata | |
- emails on HTTP errors | |
''' | |
import requests | import requests |
import ckanapi | import ckanapi |
import csv | import csv |
import sys | import sys |
import psycopg2 | import psycopg2 |
import json | import json |
from subprocess import Popen, PIPE | from subprocess import Popen, PIPE |
def updateresource(resource_id, dataset_id): | def updateresource(resource_id, dataset_id): |
print ' --- ' | print ' --- ' |
ckan = ckanapi.RemoteCKAN(api_url,api_key) | ckan = ckanapi.RemoteCKAN(api_url,api_key) |
#ckan = ckanapi.RemoteCKAN('http://demo.ckan.org') | #ckan = ckanapi.RemoteCKAN('http://demo.ckan.org') |
resource = ckan.action.resource_show(id=resource_id) | resource = ckan.action.resource_show(id=resource_id) |
print 'updating '+resource['name']+'('+resource_id+', '+dataset_id+')' | print 'updating '+resource['name']+'('+resource_id+', '+dataset_id+')' |
print resource | print resource |
url = resource['url'] | url = resource['url'] |
#last_modified= 'Mon, 24 Feb 2014 01:48:29 GMT' | #last_modified= 'Mon, 24 Feb 2014 01:48:29 GMT' |
#etag='"1393206509.38-638"' | #etag='"1393206509.38-638"' |
headers={} | headers={} |
if 'etag' in resource: | if 'etag' in resource: |
headers['If-None-Match'] = resource['etag'] | headers['If-None-Match'] = resource['etag'] |
if 'file_last_modified' in resource: | if 'file_last_modified' in resource: |
headers["If-Modified-Since"] = resource['file_last_modified'] | headers["If-Modified-Since"] = resource['file_last_modified'] |
print headers | print headers |
r = requests.head(url, headers=headers) | r = requests.head(url, headers=headers) |
if r.status_code == 304: | if r.status_code == 304: |
print 'not modified' | print 'not modified' |
return | return |
else: | else: |
print r.status_code | print r.status_code |
print r.headers | print r.headers |
if 'last-modified' in r.headers: | if 'last-modified' in r.headers: |
resource['file_last_modified'] = r.headers['last-modified'] | resource['file_last_modified'] = r.headers['last-modified'] |
if 'etag' in r.headers: | if 'etag' in r.headers: |
resource['etag'] = r.headers['etag'] | resource['etag'] = r.headers['etag'] |
#save updated resource | #save updated resource |
print resource | print resource |
result = ckan.call_action('resource_update',resource) | result = ckan.call_action('resource_update',resource) |
if resource['format'].lower() == 'shp' or resource['format'].lower() == 'kml': | if resource['format'].lower() == 'shp' or resource['format'].lower() == 'kml': |
print "geoingest!" | print "geoingest!" |
pargs= ['dga-spatialingestor.py', db_settings_json, api_url, api_key, dataset_id] | pargs= ['dga-spatialingestor.py', db_settings_json, api_url, api_key, dataset_id] |
print pargs | print pargs |
p = Popen(pargs)#, stdout=PIPE, stderr=PIPE) | p = Popen(pargs)#, stdout=PIPE, stderr=PIPE) |
p.communicate() | p.communicate() |
else: | else: |
print "datapusher!" | print "datapusher!" |
# https://github.com/ckan/ckan/blob/master/ckanext/datapusher/logic/action.py#L19 | # https://github.com/ckan/ckan/blob/master/ckanext/datapusher/logic/action.py#L19 |
ckan.action.datapusher_submit(resource_id=resource_id) | ckan.action.datapusher_submit(resource_id=resource_id) |
if len(sys.argv) != 4: | if len(sys.argv) != 4: |
print "autoupdate ingester. command line: postgis_url api_url api_key" | print "autoupdate ingester. command line: postgis_url api_url api_key" |
sys.exit(-1) | sys.exit(-1) |
else: | else: |
(path, db_settings_json, api_url, api_key) = sys.argv | (path, db_settings_json, api_url, api_key) = sys.argv |
db_settings = json.loads(db_settings_json) | db_settings = json.loads(db_settings_json) |
datastore_db_settings = dict(db_settings) | datastore_db_settings = dict(db_settings) |
datastore_db_settings['dbname'] = db_settings['datastore_dbname'] | datastore_db_settings['dbname'] = db_settings['datastore_dbname'] |
datastore_db_settings_json = json.dumps(datastore_db_settings) | datastore_db_settings_json = json.dumps(datastore_db_settings) |
#until https://github.com/ckan/ckan/pull/1732 is merged, use database directly | |
try: | try: |
conn = psycopg2.connect(dbname=db_settings['dbname'], user=db_settings['user'], password=db_settings['password'], host=db_settings['host']) | conn = psycopg2.connect(dbname=db_settings['dbname'], user=db_settings['user'], password=db_settings['password'], host=db_settings['host']) |
except: | except: |
failure("I am unable to connect to the database.") | failure("I am unable to connect to the database.") |
# Open a cursor to perform database operations | # Open a cursor to perform database operations |
cur = conn.cursor() | cur = conn.cursor() |
conn.set_isolation_level(0) | conn.set_isolation_level(0) |
cur.execute('select resource.id resource_id, package.id dataset_id from resource inner join resource_group on resource.resource_group_id = resource_group.id inner join package on resource_group.package_id = package.id where resource.extras like \'%"autoupdate": "active"%\';') | cur.execute('select resource.id resource_id, package.id dataset_id from resource inner join resource_group on resource.resource_group_id = resource_group.id inner join package on resource_group.package_id = package.id where resource.extras like \'%"autoupdate": "active"%\';') |
row = cur.fetchone() | row = cur.fetchone() |
while row is not None: | while row is not None: |
updateresource(row[0],row[1]) | updateresource(row[0],row[1]) |
# process | # process |
row = cur.fetchone() | row = cur.fetchone() |
cur.close() | cur.close() |
conn.close() | conn.close() |
import logging | import logging |
import ckan.plugins as plugins | import ckan.plugins as plugins |
import ckan.lib as lib | import ckan.lib as lib |
import ckan.lib.dictization.model_dictize as model_dictize | import ckan.lib.dictization.model_dictize as model_dictize |
import ckan.plugins.toolkit as tk | import ckan.plugins.toolkit as tk |
import ckan.model as model | import ckan.model as model |
from pylons import config | from pylons import config |
from sqlalchemy import orm | from sqlalchemy import orm |
import ckan.model | import ckan.model |
# get user created datasets and those they have edited | # get user created datasets and those they have edited |
def get_user_datasets(user_dict): | def get_user_datasets(user_dict): |
created_datasets_list = user_dict['datasets'] | created_datasets_list = user_dict['datasets'] |
active_datasets_list = [x['data']['package'] for x in | active_datasets_list = [x['data']['package'] for x in |
lib.helpers.get_action('user_activity_list',{'id':user_dict['id']}) if x['data'].get('package')] | lib.helpers.get_action('user_activity_list',{'id':user_dict['id']}) if x['data'].get('package')] |
raw_list = created_datasets_list + active_datasets_list | raw_list = created_datasets_list + active_datasets_list |
filtered_dict = {} | filtered_dict = {} |
for dataset in raw_list: | for dataset in raw_list: |
if dataset['id'] not in filtered_dict.keys(): | if dataset['id'] not in filtered_dict.keys(): |
filtered_dict[dataset['id']] = dataset | filtered_dict[dataset['id']] = dataset |
return filtered_dict.values() | return filtered_dict.values() |
def get_related_dataset(related_id): | def get_related_dataset(related_id): |
result = model.Session.execute("select dataset_id from related_dataset where related_id =\'"+related_id+"\' limit 1;").first()[0] | result = model.Session.execute("select dataset_id from related_dataset where related_id =\'"+related_id+"\' limit 1;").first()[0] |
return lib.helpers.get_action('package_show',{'id':result}) | return result |
def related_create(context, data_dict=None): | def related_create(context, data_dict=None): |
return {'success': False, 'msg': 'No one is allowed to create related items'} | return {'success': False, 'msg': 'No one is allowed to create related items'} |
class DataGovAuPlugin(plugins.SingletonPlugin, | class DataGovAuPlugin(plugins.SingletonPlugin, |
tk.DefaultDatasetForm): | tk.DefaultDatasetForm): |
'''An example IDatasetForm CKAN plugin. | '''An example IDatasetForm CKAN plugin. |
Uses a tag vocabulary to add a custom metadata field to datasets. | Uses a tag vocabulary to add a custom metadata field to datasets. |
''' | ''' |
plugins.implements(plugins.IConfigurer, inherit=False) | plugins.implements(plugins.IConfigurer, inherit=False) |
plugins.implements(plugins.ITemplateHelpers, inherit=False) | plugins.implements(plugins.ITemplateHelpers, inherit=False) |
plugins.implements(plugins.IAuthFunctions) | plugins.implements(plugins.IAuthFunctions) |
def get_auth_functions(self): | def get_auth_functions(self): |
return {'related_create': related_create} | return {'related_create': related_create} |
def update_config(self, config): | def update_config(self, config): |
# Add this plugin's templates dir to CKAN's extra_template_paths, so | # Add this plugin's templates dir to CKAN's extra_template_paths, so |
# that CKAN will use this plugin's custom templates. | # that CKAN will use this plugin's custom templates. |
# here = os.path.dirname(__file__) | # here = os.path.dirname(__file__) |
# rootdir = os.path.dirname(os.path.dirname(here)) | # rootdir = os.path.dirname(os.path.dirname(here)) |
tk.add_template_directory(config, 'templates') | tk.add_template_directory(config, 'templates') |
tk.add_public_directory(config, 'theme/public') | tk.add_public_directory(config, 'theme/public') |
tk.add_resource('theme/public', 'ckanext-datagovau') | tk.add_resource('theme/public', 'ckanext-datagovau') |
# config['licenses_group_url'] = 'http://%(ckan.site_url)/licenses.json' | # config['licenses_group_url'] = 'http://%(ckan.site_url)/licenses.json' |
def get_helpers(self): | def get_helpers(self): |
return {'get_user_datasets': get_user_datasets, 'get_related_dataset': get_related_dataset} | return {'get_user_datasets': get_user_datasets, 'get_related_dataset': get_related_dataset} |
{% ckan_extends %} | {% ckan_extends %} |
{% block basic_fields_url %} | {% block basic_fields_url %} |
{% set is_upload = (data.url_type == 'upload') %} | {% set is_upload = (data.url_type == 'upload') %} |
{% set field_url='url' %} | {% set field_url='url' %} |
{% set field_upload='upload' %} | {% set field_upload='upload' %} |
{% set field_clear='clear_upload' %} | {% set field_clear='clear_upload' %} |
{% set is_upload_enabled=h.uploads_enabled() %} | {% set is_upload_enabled=h.uploads_enabled() %} |
{% set is_url=data.url and not is_upload %} | {% set is_url=data.url and not is_upload %} |
{% set upload_label=_('File') %} | {% set upload_label=_('File') %} |
{% set url_label=_('URL') %} | {% set url_label=_('URL') %} |
{% set placeholder = placeholder if placeholder else _('http://example.com/my-image.jpg') %} | {% set placeholder = placeholder if placeholder else _('http://example.com/my-image.jpg') %} |
{% set url_label = url_label or _('Image URL') %} | {% set url_label = url_label or _('Image URL') %} |
{% set upload_label = upload_label or _('Image') %} | {% set upload_label = upload_label or _('Image') %} |
{% if is_upload_enabled %} | {% if is_upload_enabled %} |
<div class="image-upload" data-module="image-upload" data-module-is_url="{{ 'true' if is_url else 'false' }}" data-module-is_upload="{{ 'true' if is_upload else 'false' }}" | <div class="image-upload" data-module="image-upload" data-module-is_url="{{ 'true' if is_url else 'false' }}" data-module-is_upload="{{ 'true' if is_upload else 'false' }}" |
data-module-field_url="{{ field_url }}" data-module-field_upload="{{ field_upload }}" data-module-field_clear="{{ field_clear }}" data-module-upload_label="{{ upload_label }}"> | data-module-field_url="{{ field_url }}" data-module-field_upload="{{ field_upload }}" data-module-field_clear="{{ field_clear }}" data-module-upload_label="{{ upload_label }}"> |
{% endif %} | {% endif %} |
{% call form.input(field_url, label=url_label, id='field-image-url', placeholder=placeholder, value=data.get(field_url), error=errors.get(field_url), classes=['control-full']) %} | {% call form.input(field_url, label=url_label, id='field-image-url', placeholder=placeholder, value=data.get(field_url), error=errors.get(field_url), classes=['control-full']) %} |
<span id="autoupdate_form"> | <span id="autoupdate_form"> |
{% call form.select('autoupdate', label=_('Generate API from this Link'), options= [{'value': 'active', 'text': 'Active'}, {'value': 'inactive', 'text': 'Inactive'}], selected='Active', error=errors.autoupdate) %} | {% call form.select('autoupdate', label=_('Generate API from this Link'), options= [{'value': 'active', 'text': 'Active'}, {'value': 'inactive', 'text': 'Inactive'}], selected='Inactive', error=errors.autoupdate) %} |
<br/> | <br/> |
Where a file is compatible with either CKAN or GeoServer we will attempt to make a functional end-point for this resource. The link provided above will also be checked for a new version based on the update frequency as set at the dataset level. | Where a file is compatible with either CKAN or GeoServer we will attempt to make a functional end-point for this resource. The link provided above will also be checked for a new version based on the update frequency as set at the dataset level. |
</span> | </span> |
{% endcall %} | {% endcall %} |
{% endcall %} | {% endcall %} |
{% if is_upload_enabled %} | {% if is_upload_enabled %} |
{{ form.input(field_upload, label=upload_label, id='field-image-upload', type='file', placeholder='', value='', error='', classes=['control-full']) }} | {{ form.input(field_upload, label=upload_label, id='field-image-upload', type='file', placeholder='', value='', error='', classes=['control-full']) }} |
{% if is_upload %} | {% if is_upload %} |
{{form.checkbox(field_clear, label=_('Clear Upload'), id='field-clear-upload', value='true', error='', classes=['control-full']) }} | {{form.checkbox(field_clear, label=_('Clear Upload'), id='field-clear-upload', value='true', error='', classes=['control-full']) }} |
{% endif %} | {% endif %} |
{% endif %} | {% endif %} |
{% if is_upload_enabled %}</div>{% endif %} | {% if is_upload_enabled %}</div>{% endif %} |
{% endblock %} | {% endblock %} |
{# | {# |
Displays a single related item. | Displays a single related item. |
related - The related item dict. | related - The related item dict. |
pkg_id - The id of the owner package. If present the edit button will be | pkg_id - The id of the owner package. If present the edit button will be |
displayed. | displayed. |
Example: | Example: |
#} | #} |
{% set placeholder_map = { | {% set placeholder_map = { |
'application': h.url_for_static('/base/images/placeholder-application.png') | 'application': h.url_for_static('/base/images/placeholder-application.png') |
} %} | } %} |
{% set tooltip = _('Go to {related_item_type}').format(related_item_type=related.type|replace('_', ' ')|title) %} | {% set tooltip = _('Go to {related_item_type}').format(related_item_type=related.type|replace('_', ' ')|title) %} |
<li class="related-item media-item" data-module="related-item"> | <li class="related-item media-item" data-module="related-item"> |
<img src="{{ related.image_url or placeholder_map[related.type] or h.url_for_static('/base/images/placeholder-image.png') }}" alt="{{ related.title }}" class="media-image"> | <img src="{{ related.image_url or placeholder_map[related.type] or h.url_for_static('/base/images/placeholder-image.png') }}" alt="{{ related.title }}" class="media-image"> |
<h3 class="media-heading">{{ related.title }}</h3> | <h3 class="media-heading">{{ related.title }}</h3> |
{% if related.description %} | {% if related.description %} |
<div class="prose"> | <div class="prose"> |
{{ h.render_markdown(related.description) }} | {{ h.render_markdown(related.description) }} |
</div> | </div> |
{% endif %} | {% endif %} |
{% if h.get_related_dataset(related.id) %} | {% if h.check_access('package_show',{"id":h.get_related_dataset(related.id)}) %} |
<small>Using dataset: {{ h.get_related_dataset(related.id).title }}</small> | <small>Using dataset: {{ h.get_action('package_show',{"id":h.get_related_dataset(related.id)}).title }}</small> |
{% endif %} | {% endif %} |
<a class="media-view" href="{{ related.url }}" target="_blank" title="{{ tooltip }}"> | <a class="media-view" href="{{ related.url }}" target="_blank" title="{{ tooltip }}"> |
<span>{{ tooltip }}</span> | <span>{{ tooltip }}</span> |
<span class="banner"> | <span class="banner"> |
{%- if related.type == 'application' -%} | {%- if related.type == 'application' -%} |
app | app |
{%- elif related.type == 'visualization' -%} | {%- elif related.type == 'visualization' -%} |
viz | viz |
{%- else -%} | {%- else -%} |
{{ related.type | replace('news_', '') }} | {{ related.type | replace('news_', '') }} |
{%- endif -%} | {%- endif -%} |
</span> | </span> |
</a> | </a> |
{% if pkg_id %} | {% if pkg_id %} |
{{ h.nav_link(_('Edit'), controller='related', action='edit', id=pkg_id, related_id=related.id, class_='btn btn-primary btn-small media-edit') }} | {{ h.nav_link(_('Edit'), controller='related', action='edit', id=pkg_id, related_id=related.id, class_='btn btn-primary btn-small media-edit') }} |
{% endif %} | {% endif %} |
</li> | </li> |
{% if position is divisibleby 3 %} | {% if position is divisibleby 3 %} |
<li class="clearfix js-hide"></li> | <li class="clearfix js-hide"></li> |
{% endif %} | {% endif %} |