autoupdate
autoupdate

  #!/usr/bin/python
  # coding=utf-8
  '''
  auto update batch job for ckan
  <alex.sadleir@linkdigital.com.au>
  1.0 26/05/2014 initial implementation
 
  TODO
  - archive files in filestore
  - update frequency reduced based on dataset metadata
  - emails on HTTP errors
  '''
 
  import requests
  import ckanapi
  import csv
  import sys
  import psycopg2
  import json
  from subprocess import Popen, PIPE
 
  def updateresource(resource_id, dataset_id):
  print ' --- '
  ckan = ckanapi.RemoteCKAN(api_url,api_key)
  #ckan = ckanapi.RemoteCKAN('http://demo.ckan.org')
  resource = ckan.action.resource_show(id=resource_id)
  print 'updating '+resource['name']+'('+resource_id+', '+dataset_id+')'
  print resource
  url = resource['url']
  #last_modified= 'Mon, 24 Feb 2014 01:48:29 GMT'
  #etag='"1393206509.38-638"'
  headers={}
  if 'etag' in resource:
  headers['If-None-Match'] = resource['etag']
  if 'file_last_modified' in resource:
  headers["If-Modified-Since"] = resource['file_last_modified']
  print headers
  r = requests.head(url, headers=headers)
  if r.status_code == 304:
  print 'not modified'
  return
  else:
  print r.status_code
  print r.headers
  if 'last-modified' in r.headers:
  resource['file_last_modified'] = r.headers['last-modified']
  if 'etag' in r.headers:
  resource['etag'] = r.headers['etag']
  #save updated resource
  print resource
  result = ckan.call_action('resource_update',resource)
  if resource['format'].lower() == 'shp' or resource['format'].lower() == 'kml':
  print "geoingest!"
  pargs= ['dga-spatialingestor.py', db_settings_json, api_url, api_key, dataset_id]
  print pargs
  p = Popen(pargs)#, stdout=PIPE, stderr=PIPE)
  p.communicate()
  else:
  print "datapusher!"
  # https://github.com/ckan/ckan/blob/master/ckanext/datapusher/logic/action.py#L19
  ckan.action.datapusher_submit(resource_id=resource_id)
 
  if len(sys.argv) != 4:
  print "autoupdate ingester. command line: postgis_url api_url api_key"
  sys.exit(-1)
  else:
  (path, db_settings_json, api_url, api_key) = sys.argv
  db_settings = json.loads(db_settings_json)
  datastore_db_settings = dict(db_settings)
  datastore_db_settings['dbname'] = db_settings['datastore_dbname']
  datastore_db_settings_json = json.dumps(datastore_db_settings)
 
  #until https://github.com/ckan/ckan/pull/1732 is merged, use database directly
 
  try:
  conn = psycopg2.connect(dbname=db_settings['dbname'], user=db_settings['user'], password=db_settings['password'], host=db_settings['host'])
  except:
  failure("I am unable to connect to the database.")
  # Open a cursor to perform database operations
  cur = conn.cursor()
  conn.set_isolation_level(0)
  cur.execute('select resource.id resource_id, package.id dataset_id from resource inner join resource_group on resource.resource_group_id = resource_group.id inner join package on resource_group.package_id = package.id where resource.extras like \'%"autoupdate": "active"%\';')
  row = cur.fetchone()
  while row is not None:
  updateresource(row[0],row[1])
  # process
  row = cur.fetchone()
  cur.close()
  conn.close()
 
file:b/admin/start.sh (new)
  export NEW_RELIC_CONFIG_FILE="newrelic.ini"
  export VIRTUAL_ENV="/var/lib/ckan/dga/pyenv"
  export PATH="/var/lib/ckan/dga/pyenv:/var/lib/ckan/dga/pyenv/bin:$PATH"
  cd /var/lib/ckan/dga/pyenv/src/ckan
  newrelic-admin run-program paster serve development.ini
 
import logging import logging
   
import ckan.plugins as plugins import ckan.plugins as plugins
import ckan.lib as lib import ckan.lib as lib
import ckan.lib.dictization.model_dictize as model_dictize import ckan.lib.dictization.model_dictize as model_dictize
import ckan.plugins.toolkit as tk import ckan.plugins.toolkit as tk
import ckan.model as model import ckan.model as model
from pylons import config from pylons import config
   
from sqlalchemy import orm from sqlalchemy import orm
import ckan.model import ckan.model
   
# get user created datasets and those they have edited # get user created datasets and those they have edited
def get_user_datasets(user_dict): def get_user_datasets(user_dict):
created_datasets_list = user_dict['datasets'] created_datasets_list = user_dict['datasets']
active_datasets_list = [x['data']['package'] for x in active_datasets_list = [x['data']['package'] for x in
lib.helpers.get_action('user_activity_list',{'id':user_dict['id']}) if x['data'].get('package')] lib.helpers.get_action('user_activity_list',{'id':user_dict['id']}) if x['data'].get('package')]
raw_list = created_datasets_list + active_datasets_list raw_list = created_datasets_list + active_datasets_list
filtered_dict = {} filtered_dict = {}
for dataset in raw_list: for dataset in raw_list:
if dataset['id'] not in filtered_dict.keys(): if dataset['id'] not in filtered_dict.keys():
filtered_dict[dataset['id']] = dataset filtered_dict[dataset['id']] = dataset
return filtered_dict.values() return filtered_dict.values()
   
  def get_related_dataset(related_id):
  result = model.Session.execute("select dataset_id from related_dataset where related_id =\'"+related_id+"\' limit 1;").first()[0]
  return result
   
  def related_create(context, data_dict=None):
  return {'success': False, 'msg': 'No one is allowed to create related items'}
   
class DataGovAuPlugin(plugins.SingletonPlugin, class DataGovAuPlugin(plugins.SingletonPlugin,
tk.DefaultDatasetForm): tk.DefaultDatasetForm):
'''An example IDatasetForm CKAN plugin. '''An example IDatasetForm CKAN plugin.
   
Uses a tag vocabulary to add a custom metadata field to datasets. Uses a tag vocabulary to add a custom metadata field to datasets.
   
''' '''
plugins.implements(plugins.IConfigurer, inherit=False) plugins.implements(plugins.IConfigurer, inherit=False)
plugins.implements(plugins.ITemplateHelpers, inherit=False) plugins.implements(plugins.ITemplateHelpers, inherit=False)
  plugins.implements(plugins.IAuthFunctions)
   
  def get_auth_functions(self):
  return {'related_create': related_create}
   
def update_config(self, config): def update_config(self, config):
# Add this plugin's templates dir to CKAN's extra_template_paths, so # Add this plugin's templates dir to CKAN's extra_template_paths, so
# that CKAN will use this plugin's custom templates. # that CKAN will use this plugin's custom templates.
# here = os.path.dirname(__file__) # here = os.path.dirname(__file__)
# rootdir = os.path.dirname(os.path.dirname(here)) # rootdir = os.path.dirname(os.path.dirname(here))
   
tk.add_template_directory(config, 'templates') tk.add_template_directory(config, 'templates')
tk.add_public_directory(config, 'theme/public') tk.add_public_directory(config, 'theme/public')
tk.add_resource('theme/public', 'ckanext-datagovau') tk.add_resource('theme/public', 'ckanext-datagovau')
# config['licenses_group_url'] = 'http://%(ckan.site_url)/licenses.json' # config['licenses_group_url'] = 'http://%(ckan.site_url)/licenses.json'
   
def get_helpers(self): def get_helpers(self):
return {'get_user_datasets': get_user_datasets} return {'get_user_datasets': get_user_datasets, 'get_related_dataset': get_related_dataset}
   
   
{% ckan_extends %} {% ckan_extends %}
   
{% block header_site_navigation %} {% block header_site_navigation %}
<nav class="section navigation"> <nav class="section navigation">
<ul class="nav nav-pills"> <ul class="nav nav-pills">
{% block header_site_navigation_tabs %} {% block header_site_navigation_tabs %}
{{ h.build_nav_main( {{ h.build_nav_main(
('search', _('Datasets')), ('search', _('Datasets')),
('organizations_index', _('Organizations')), ('organizations_index', _('Organizations')),
('about', _('About')) ('about', _('About')),
  ('stats', _('Site Statistics'))
) }} ) }}
<li><a href="//data.gov.au/stats">Site Statistics</a></li> <li><a href="/related">Use Cases</a></li>
<li><a href="https://datagovau.ideascale.com/">Feedback/Request Data</a></li> <li><a href="https://datagovau.ideascale.com/">Feedback/Request Data</a></li>
{% endblock %} {% endblock %}
</ul> </ul>
</nav> </nav>
{% endblock %} {% endblock %}
   
{% ckan_extends %} {% ckan_extends %}
{% block home_search %} {% block home_secondary_content %}
  <script type="text/javascript" src="//www.google.com/jsapi">
<div class="hero-secondary-inner">  
<script type="text/javascript" src="http://www.google.com/jsapi">  
</script> </script>
<script type="text/javascript"> <script type="text/javascript">
google.load("feeds", "1") //Load Google Ajax Feed API (version 1) google.load("feeds", "1") //Load Google Ajax Feed API (version 1)
</script> </script>
   
<div id="feeddiv"> <div id="feeddiv"></div>
</div>  
</div>  
   
   
   
<script type="text/javascript"> <script type="text/javascript">
   
var feedcontainer=document.getElementById("feeddiv") var feedcontainer=document.getElementById("feeddiv")
var feedurl="http://www.finance.gov.au/taxonomy/term/1274/feed" var feedurl="http://www.finance.gov.au/taxonomy/term/1274/feed"
var feedlimit=4 var feedlimit=3
var rssoutput="<div class='module module-shallow module-narrow module-dark info box' style='color:black'><h2>Latest data.gov.au News</h2><i class='ckan-icon ckan-icon-feed'></i><a href='http://www.finance.gov.au/taxonomy/term/1274/feed/'>&nbsp;Subscribe to the blog </a></div>" var rssoutput="<div class='module module-shallow module-narrow module-dark info box' style='color:black'><h2>Latest data.gov.au News</h2><i class='ckan-icon ckan-icon-feed'></i><a href='http://www.finance.gov.au/taxonomy/term/1274/feed/'>&nbsp;Subscribe to the blog </a></div>"
   
   
function rssfeedsetup(){ function rssfeedsetup(){
var feedpointer=new google.feeds.Feed(feedurl) //Google Feed API method var feedpointer=new google.feeds.Feed(feedurl) //Google Feed API method
feedpointer.setNumEntries(feedlimit) //Google Feed API method feedpointer.setNumEntries(feedlimit) //Google Feed API method
feedpointer.load(displayfeed) //Google Feed API method feedpointer.load(displayfeed) //Google Feed API method
} }
   
function displayfeed(result){ function displayfeed(result){
if (!result.error){ if (!result.error){
var thefeeds=result.feed.entries var thefeeds=result.feed.entries
for (var i=0; i<thefeeds.length; i++) { for (var i=0; i<thefeeds.length; i++) {
rssoutput+="<div class='module module-shallow module-narrow module-dark info box'><h3><a href='" + thefeeds[i].link + "'>" + thefeeds[i].title + "</a></h3>" rssoutput+="<div class='module module-shallow module-narrow module-dark info box'><h3><a href='" + thefeeds[i].link + "'>" + thefeeds[i].title + "</a></h3>"
rssoutput+= " <font color='black'>Posted on " + new Date(thefeeds[i].publishedDate).toDateString() + "</font></div>" rssoutput+= " <font color='black'>Posted on " + new Date(thefeeds[i].publishedDate).toDateString() + "</font></div>"
} }
rssoutput+="" rssoutput+=""
feedcontainer.innerHTML=rssoutput feedcontainer.innerHTML=rssoutput
} }
else else
alert("Error fetching feeds!") alert("Error fetching feeds!")
} }
   
window.onload=function(){ window.onload=function(){
rssfeedsetup() rssfeedsetup()
} }
   
</script> </script>
   
<form class="module-content search-form" method="get" action="{% url_for controller='package', action='search' %}"> {{ super() }}
<h3 class="heading">{{ _("Search Your Data") }}</h3>  
<div class="search-input control-group search-giant">  
<input type="text" class="search" name="q" value="{{ c.q }}" autocomplete="off" placeholder="{{ _('eg. Gold Prices') }}" />  
<button type="submit">  
<i class="icon-search"></i>  
<span>{{ _('Search') }}</span>  
</button>  
</div>  
</form>  
{% endblock %} {% endblock %}
   
  {% set intro = g.site_intro_text %}
 
  <div class="module-content box">
  <header>
  {% if intro %}
  {{ h.render_markdown(intro) }}
  {% else %}
  <h1 class="page-heading">{{ _("Welcome to CKAN") }}</h1>
  <p>
  {% trans %}This is a nice introductory paragraph about CKAN or the site
  in general. We don't have any copy to go here yet but soon we will
  {% endtrans %}
  </p>
  {% endif %}
  </header>
  </div>
 
 
<div class="hero-secondary-inner">  
<script type="text/javascript" src="http://www.google.com/jsapi">  
</script>  
<script type="text/javascript">  
google.load("feeds", "1") //Load Google Ajax Feed API (version 1)  
</script>  
 
<div id="feeddiv">  
</div>  
</div>  
 
 
 
<script type="text/javascript">  
 
var feedcontainer=document.getElementById("feeddiv")  
var feedurl="http://www.finance.gov.au/taxonomy/term/1274/feed"  
var feedlimit=4  
var rssoutput="<div class='module module-shallow module-narrow module-dark info box' style='color:black'><h2>Latest data.gov.au News</h2><i class='ckan-icon ckan-icon-feed'></i><a href='http://www.finance.gov.au/taxonomy/term/1274/feed/'>&nbsp;Subscribe to the blog </a></div>"  
 
 
function rssfeedsetup(){  
var feedpointer=new google.feeds.Feed(feedurl) //Google Feed API method  
feedpointer.setNumEntries(feedlimit) //Google Feed API method  
feedpointer.load(displayfeed) //Google Feed API method  
}  
 
function displayfeed(result){  
if (!result.error){  
var thefeeds=result.feed.entries  
for (var i=0; i<thefeeds.length; i++) {  
rssoutput+="<div class='module module-shallow module-narrow module-dark info box'><h3><a href='" + thefeeds[i].link + "'>" + thefeeds[i].title + "</a></h3>"  
rssoutput+= " <font color='black'>Posted on " + new Date(thefeeds[i].publishedDate).toDateString() + "</font></div>"  
}  
rssoutput+=""  
feedcontainer.innerHTML=rssoutput  
}  
else  
alert("Error fetching feeds!")  
}  
 
window.onload=function(){  
rssfeedsetup()  
}  
 
</script>  
 
{% set tags = h.get_facet_items_dict('tags', limit=3) %}  
{% set placeholder = _('eg. Gold Prices') %}  
 
<div class="module module-search module-narrow module-shallow box">  
<form class="module-content search-form" method="get" action="{% url_for controller='package', action='search' %}">  
<h3 class="heading">{{ _("Search Your Data") }}</h3>  
<div class="search-input control-group search-giant">  
<input type="text" class="search" name="q" value="" autocomplete="off" placeholder="{{ placeholder }}" />  
<button type="submit">  
<i class="icon-search"></i>  
<span>{{ _('Search') }}</span>  
</button>  
</div>  
</form>  
<div class="tags">  
<h3>{{ _('Popular Tags') }}</h3>  
{% for tag in tags %}  
<a class="tag" href="{% url_for controller='package', action='search', tags=tag.name %}">{{ h.truncate(tag.display_name, 22) }}</a>  
{% endfor %}  
</div>  
</div>  
 
 
  {% set stats = h.get_site_statistics() %}
 
  <div class="box stats">
  <div class="inner">
  <h3>{{ _('{0} statistics').format(g.site_title) }}</h3>
  <ul>
  <li>