import requests |
import requests |
import ckanapi |
import ckanapi |
# copy (select id,url,format,extras from resource where extras like '%"autoupdate": "active"%') TO STDOUT WITH CSV; |
|
# f759e4b6-723c-4863-8a26-1529d689cad8,http://data.gov.au/geoserver/geelong-roofprints-kml/wms?request=GetCapabilities,wms,"{""autoupdate"": ""active""}" |
|
import fileinput |
|
import csv |
import csv |
|
import sys |
|
import psycopg2 |
|
import json |
|
from subprocess import Popen, PIPE |
|
|
def updateresource(resource_id): |
def updateresource(resource_id, dataset_id): |
url = 'http://data.disclosurelo.gs' |
print ' --- ' |
api_key = '' |
ckan = ckanapi.RemoteCKAN(api_url,api_key) |
db_credentials = '' |
|
print resource_id |
|
ckan = ckanapi.RemoteCKAN('http://data.disclosurelo.gs') |
|
#ckan = ckanapi.RemoteCKAN('http://demo.ckan.org') |
#ckan = ckanapi.RemoteCKAN('http://demo.ckan.org') |
resource = ckan.action.resource_show(id=resource_id) |
resource = ckan.action.resource_show(id=resource_id) |
|
print 'updating '+resource['name']+'('+resource_id+', '+dataset_id+')' |
print resource |
print resource |
url = resource['url'] |
url = resource['url'] |
#last_modified= 'Mon, 24 Feb 2014 01:48:29 GMT' |
#last_modified= 'Mon, 24 Feb 2014 01:48:29 GMT' |
#etag='"1393206509.38-638"' |
#etag='"1393206509.38-638"' |
headers={} |
headers={} |
if 'etag' in resource: |
if 'etag' in resource: |
headers['If-None-Match'] = resource['etag'] |
headers['If-None-Match'] = resource['etag'] |
if 'file_last_modified' in resource: |
if 'file_last_modified' in resource: |
headers["If-Modified-Since"] = resource['file_last_modified'] |
headers["If-Modified-Since"] = resource['file_last_modified'] |
|
print headers |
r = requests.head(url, headers=headers) |
r = requests.head(url, headers=headers) |
if r.status_code == 304: |
if r.status_code == 304: |
print 'not modified' |
print 'not modified' |
return |
return |
else: |
else: |
print r.status_code |
print r.status_code |
print r.headers |
print r.headers |
if 'last-modified' in r.headers: |
if 'last-modified' in r.headers: |
resource['file_last_modified'] = r.headers['last-modified'] |
resource['file_last_modified'] = r.headers['last-modified'] |
if 'etag' in r.headers: |
if 'etag' in r.headers: |
resource['etag'] = r.headers['etag'] |
resource['etag'] = r.headers['etag'] |
#save updated resource |
#save updated resource |
# result = ckan.action.resource_update(id,resource) |
print resource |
if 'format' == 'shp': |
result = ckan.call_action('resource_update',resource) |
print "geoingest!" db_settings_json, api_url, api_key, resource['dataset_id'] |
if resource['format'].lower() == 'shp' or resource['format'].lower() == 'kml': |
|
print "geoingest!" |
|
pargs= ['dga-spatialingestor.py', db_settings_json, api_url, api_key, dataset_id] |
|
print pargs |
|
p = Popen(pargs)#, stdout=PIPE, stderr=PIPE) |
|
p.communicate() |
else: |
else: |
ckan.action.datapusher_submit(resource_id=id) |
print "datapusher!" |
|
# https://github.com/ckan/ckan/blob/master/ckanext/datapusher/logic/action.py#L19 |
|
ckan.action.datapusher_submit(resource_id=resource_id) |
|
|
if len(sys.argv) != 5: |
if len(sys.argv) != 4: |
print "spatial ingester. command line: postgis_url api_url api_key" |
print "autoupdate ingester. command line: postgis_url api_url api_key" |
sys.exit(errno.EACCES) |
sys.exit(-1) |
else: |
else: |
(path, db_settings_json, api_url, api_key) = sys.argv |
(path, db_settings_json, api_url, api_key) = sys.argv |
db_settings = json.loads(db_settings_json) |
db_settings = json.loads(db_settings_json) |
|
datastore_db_settings = dict(db_settings) |
|
datastore_db_settings['dbname'] = db_settings['datastore_dbname'] |
|
datastore_db_settings_json = json.dumps(datastore_db_settings) |
|
|
for line in fileinput.input(): |
try: |
row = csv.reader(line) |
conn = psycopg2.connect(dbname=db_settings['dbname'], user=db_settings['user'], password=db_settings['password'], host=db_settings['host']) |
updateresource(line.replace("\n","")) |
except: |
|
failure("I am unable to connect to the database.") |
|
# Open a cursor to perform database operations |
|
cur = conn.cursor() |
|
conn.set_isolation_level(0) |
|
cur.execute('select resource.id resource_id, package.id dataset_id from resource inner join resource_group on resource.resource_group_id = resource_group.id inner join package on resource_group.package_id = package.id where resource.extras like \'%"autoupdate": "active"%\';') |
|
row = cur.fetchone() |
|
while row is not None: |
|
updateresource(row[0],row[1]) |
|
# process |
|
row = cur.fetchone() |
|
cur.close() |
|
conn.close() |
|
|