autoupdate
autoupdate

  #!/usr/bin/python
  # coding=utf-8
  '''
  auto update batch job for ckan
  <alex.sadleir@linkdigital.com.au>
  1.0 26/05/2014 initial implementation
   
  TODO
  - archive files in filestore
  - update frequency reduced based on dataset metadata
  - emails on HTTP errors
  '''
   
import requests import requests
import ckanapi import ckanapi
import csv import csv
import sys import sys
import psycopg2 import psycopg2
import json import json
from subprocess import Popen, PIPE from subprocess import Popen, PIPE
   
def updateresource(resource_id, dataset_id): def updateresource(resource_id, dataset_id):
print ' --- ' print ' --- '
ckan = ckanapi.RemoteCKAN(api_url,api_key) ckan = ckanapi.RemoteCKAN(api_url,api_key)
#ckan = ckanapi.RemoteCKAN('http://demo.ckan.org') #ckan = ckanapi.RemoteCKAN('http://demo.ckan.org')
resource = ckan.action.resource_show(id=resource_id) resource = ckan.action.resource_show(id=resource_id)
print 'updating '+resource['name']+'('+resource_id+', '+dataset_id+')' print 'updating '+resource['name']+'('+resource_id+', '+dataset_id+')'
print resource print resource
url = resource['url'] url = resource['url']
#last_modified= 'Mon, 24 Feb 2014 01:48:29 GMT' #last_modified= 'Mon, 24 Feb 2014 01:48:29 GMT'
#etag='"1393206509.38-638"' #etag='"1393206509.38-638"'
headers={} headers={}
if 'etag' in resource: if 'etag' in resource:
headers['If-None-Match'] = resource['etag'] headers['If-None-Match'] = resource['etag']
if 'file_last_modified' in resource: if 'file_last_modified' in resource:
headers["If-Modified-Since"] = resource['file_last_modified'] headers["If-Modified-Since"] = resource['file_last_modified']
print headers print headers
r = requests.head(url, headers=headers) r = requests.head(url, headers=headers)
if r.status_code == 304: if r.status_code == 304:
print 'not modified' print 'not modified'
return return
else: else:
print r.status_code print r.status_code
print r.headers print r.headers
if 'last-modified' in r.headers: if 'last-modified' in r.headers:
resource['file_last_modified'] = r.headers['last-modified'] resource['file_last_modified'] = r.headers['last-modified']
if 'etag' in r.headers: if 'etag' in r.headers:
resource['etag'] = r.headers['etag'] resource['etag'] = r.headers['etag']
#save updated resource #save updated resource
print resource print resource
result = ckan.call_action('resource_update',resource) result = ckan.call_action('resource_update',resource)
if resource['format'].lower() == 'shp' or resource['format'].lower() == 'kml': if resource['format'].lower() == 'shp' or resource['format'].lower() == 'kml':
print "geoingest!" print "geoingest!"
pargs= ['dga-spatialingestor.py', db_settings_json, api_url, api_key, dataset_id] pargs= ['dga-spatialingestor.py', db_settings_json, api_url, api_key, dataset_id]
print pargs print pargs
p = Popen(pargs)#, stdout=PIPE, stderr=PIPE) p = Popen(pargs)#, stdout=PIPE, stderr=PIPE)
p.communicate() p.communicate()
else: else:
print "datapusher!" print "datapusher!"
# https://github.com/ckan/ckan/blob/master/ckanext/datapusher/logic/action.py#L19 # https://github.com/ckan/ckan/blob/master/ckanext/datapusher/logic/action.py#L19
ckan.action.datapusher_submit(resource_id=resource_id) ckan.action.datapusher_submit(resource_id=resource_id)
if len(sys.argv) != 4: if len(sys.argv) != 4:
print "autoupdate ingester. command line: postgis_url api_url api_key" print "autoupdate ingester. command line: postgis_url api_url api_key"
sys.exit(-1) sys.exit(-1)
else: else:
(path, db_settings_json, api_url, api_key) = sys.argv (path, db_settings_json, api_url, api_key) = sys.argv
db_settings = json.loads(db_settings_json) db_settings = json.loads(db_settings_json)
datastore_db_settings = dict(db_settings) datastore_db_settings = dict(db_settings)
datastore_db_settings['dbname'] = db_settings['datastore_dbname'] datastore_db_settings['dbname'] = db_settings['datastore_dbname']
datastore_db_settings_json = json.dumps(datastore_db_settings) datastore_db_settings_json = json.dumps(datastore_db_settings)
   
  #until https://github.com/ckan/ckan/pull/1732 is merged, use database directly
   
try: try:
conn = psycopg2.connect(dbname=db_settings['dbname'], user=db_settings['user'], password=db_settings['password'], host=db_settings['host']) conn = psycopg2.connect(dbname=db_settings['dbname'], user=db_settings['user'], password=db_settings['password'], host=db_settings['host'])
except: except:
failure("I am unable to connect to the database.") failure("I am unable to connect to the database.")
# Open a cursor to perform database operations # Open a cursor to perform database operations
cur = conn.cursor() cur = conn.cursor()
conn.set_isolation_level(0) conn.set_isolation_level(0)
cur.execute('select resource.id resource_id, package.id dataset_id from resource inner join resource_group on resource.resource_group_id = resource_group.id inner join package on resource_group.package_id = package.id where resource.extras like \'%"autoupdate": "active"%\';') cur.execute('select resource.id resource_id, package.id dataset_id from resource inner join resource_group on resource.resource_group_id = resource_group.id inner join package on resource_group.package_id = package.id where resource.extras like \'%"autoupdate": "active"%\';')
row = cur.fetchone() row = cur.fetchone()
while row is not None: while row is not None:
updateresource(row[0],row[1]) updateresource(row[0],row[1])
# process # process
row = cur.fetchone() row = cur.fetchone()
cur.close() cur.close()
conn.close() conn.close()
   
import logging import logging
   
import ckan.plugins as plugins import ckan.plugins as plugins
import ckan.lib as lib import ckan.lib as lib
import ckan.lib.dictization.model_dictize as model_dictize import ckan.lib.dictization.model_dictize as model_dictize
import ckan.plugins.toolkit as tk import ckan.plugins.toolkit as tk
import ckan.model as model import ckan.model as model
from pylons import config from pylons import config
   
from sqlalchemy import orm from sqlalchemy import orm
import ckan.model import ckan.model
   
# get user created datasets and those they have edited # get user created datasets and those they have edited
def get_user_datasets(user_dict): def get_user_datasets(user_dict):
created_datasets_list = user_dict['datasets'] created_datasets_list = user_dict['datasets']
active_datasets_list = [x['data']['package'] for x in active_datasets_list = [x['data']['package'] for x in
lib.helpers.get_action('user_activity_list',{'id':user_dict['id']}) if x['data'].get('package')] lib.helpers.get_action('user_activity_list',{'id':user_dict['id']}) if x['data'].get('package')]
raw_list = created_datasets_list + active_datasets_list raw_list = created_datasets_list + active_datasets_list
filtered_dict = {} filtered_dict = {}
for dataset in raw_list: for dataset in raw_list:
if dataset['id'] not in filtered_dict.keys(): if dataset['id'] not in filtered_dict.keys():
filtered_dict[dataset['id']] = dataset filtered_dict[dataset['id']] = dataset
return filtered_dict.values() return filtered_dict.values()
   
def get_related_dataset(related_id): def get_related_dataset(related_id):
result = model.Session.execute("select dataset_id from related_dataset where related_id =\'"+related_id+"\' limit 1;").first()[0] result = model.Session.execute("select dataset_id from related_dataset where related_id =\'"+related_id+"\' limit 1;").first()[0]
return lib.helpers.get_action('package_show',{'id':result}) return result
   
def related_create(context, data_dict=None): def related_create(context, data_dict=None):
return {'success': False, 'msg': 'No one is allowed to create related items'} return {'success': False, 'msg': 'No one is allowed to create related items'}
   
class DataGovAuPlugin(plugins.SingletonPlugin, class DataGovAuPlugin(plugins.SingletonPlugin,
tk.DefaultDatasetForm): tk.DefaultDatasetForm):
'''An example IDatasetForm CKAN plugin. '''An example IDatasetForm CKAN plugin.
   
Uses a tag vocabulary to add a custom metadata field to datasets. Uses a tag vocabulary to add a custom metadata field to datasets.
   
''' '''
plugins.implements(plugins.IConfigurer, inherit=False) plugins.implements(plugins.IConfigurer, inherit=False)
plugins.implements(plugins.ITemplateHelpers, inherit=False) plugins.implements(plugins.ITemplateHelpers, inherit=False)
plugins.implements(plugins.IAuthFunctions) plugins.implements(plugins.IAuthFunctions)
   
def get_auth_functions(self): def get_auth_functions(self):
return {'related_create': related_create} return {'related_create': related_create}
   
def update_config(self, config): def update_config(self, config):
# Add this plugin's templates dir to CKAN's extra_template_paths, so # Add this plugin's templates dir to CKAN's extra_template_paths, so
# that CKAN will use this plugin's custom templates. # that CKAN will use this plugin's custom templates.
# here = os.path.dirname(__file__) # here = os.path.dirname(__file__)
# rootdir = os.path.dirname(os.path.dirname(here)) # rootdir = os.path.dirname(os.path.dirname(here))
   
tk.add_template_directory(config, 'templates') tk.add_template_directory(config, 'templates')
tk.add_public_directory(config, 'theme/public') tk.add_public_directory(config, 'theme/public')
tk.add_resource('theme/public', 'ckanext-datagovau') tk.add_resource('theme/public', 'ckanext-datagovau')
# config['licenses_group_url'] = 'http://%(ckan.site_url)/licenses.json' # config['licenses_group_url'] = 'http://%(ckan.site_url)/licenses.json'
   
def get_helpers(self): def get_helpers(self):
return {'get_user_datasets': get_user_datasets, 'get_related_dataset': get_related_dataset} return {'get_user_datasets': get_user_datasets, 'get_related_dataset': get_related_dataset}
   
   
{# {#
Displays a single related item. Displays a single related item.
   
related - The related item dict. related - The related item dict.
pkg_id - The id of the owner package. If present the edit button will be pkg_id - The id of the owner package. If present the edit button will be
displayed. displayed.
   
Example: Example:
   
   
   
#} #}
{% set placeholder_map = { {% set placeholder_map = {
'application': h.url_for_static('/base/images/placeholder-application.png') 'application': h.url_for_static('/base/images/placeholder-application.png')
} %} } %}
{% set tooltip = _('Go to {related_item_type}').format(related_item_type=related.type|replace('_', ' ')|title) %} {% set tooltip = _('Go to {related_item_type}').format(related_item_type=related.type|replace('_', ' ')|title) %}
<li class="related-item media-item" data-module="related-item"> <li class="related-item media-item" data-module="related-item">
<img src="{{ related.image_url or placeholder_map[related.type] or h.url_for_static('/base/images/placeholder-image.png') }}" alt="{{ related.title }}" class="media-image"> <img src="{{ related.image_url or placeholder_map[related.type] or h.url_for_static('/base/images/placeholder-image.png') }}" alt="{{ related.title }}" class="media-image">
<h3 class="media-heading">{{ related.title }}</h3> <h3 class="media-heading">{{ related.title }}</h3>
{% if related.description %} {% if related.description %}
<div class="prose"> <div class="prose">
{{ h.render_markdown(related.description) }} {{ h.render_markdown(related.description) }}
</div> </div>
{% endif %} {% endif %}
{% if h.get_related_dataset(related.id) %} {% if h.check_access('package_show',{"id":h.get_related_dataset(related.id)}) %}
<small>Using dataset: {{ h.get_related_dataset(related.id).title }}</small> <small>Using dataset: {{ h.get_action('package_show',{"id":h.get_related_dataset(related.id)}).title }}</small>
{% endif %} {% endif %}
   
<a class="media-view" href="{{ related.url }}" target="_blank" title="{{ tooltip }}"> <a class="media-view" href="{{ related.url }}" target="_blank" title="{{ tooltip }}">
<span>{{ tooltip }}</span> <span>{{ tooltip }}</span>
<span class="banner"> <span class="banner">
{%- if related.type == 'application' -%} {%- if related.type == 'application' -%}
app app
{%- elif related.type == 'visualization' -%} {%- elif related.type == 'visualization' -%}
viz viz
{%- else -%} {%- else -%}
{{ related.type | replace('news_', '') }} {{ related.type | replace('news_', '') }}
{%- endif -%} {%- endif -%}
</span> </span>
</a> </a>
{% if pkg_id %} {% if pkg_id %}
{{ h.nav_link(_('Edit'), controller='related', action='edit', id=pkg_id, related_id=related.id, class_='btn btn-primary btn-small media-edit') }} {{ h.nav_link(_('Edit'), controller='related', action='edit', id=pkg_id, related_id=related.id, class_='btn btn-primary btn-small media-edit') }}
{% endif %} {% endif %}
</li> </li>
{% if position is divisibleby 3 %} {% if position is divisibleby 3 %}
<li class="clearfix js-hide"></li> <li class="clearfix js-hide"></li>
{% endif %} {% endif %}