|
import ckanclient |
|
import couchdb |
|
from ckanclient import CkanApiError |
|
import re |
|
class LoaderError(Exception): |
|
pass |
|
# https://github.com/okfn/ckanext-importlib |
|
# Instantiate the CKAN client. |
|
ckan = ckanclient.CkanClient(base_location='http://localhost:5000/api', |
|
api_key='b47b24cd-591d-40c1-8677-d73101d56d1b') |
|
# (use your own api_key from http://thedatahub.org/user/me ) |
|
|
|
def munge(name): |
|
# convert spaces to underscores |
|
name = re.sub(' ', '_', name).lower() |
|
# convert symbols to dashes |
|
name = re.sub('[:]', '_-', name).lower() |
|
name = re.sub('[/]', '-', name).lower() |
|
# take out not-allowed characters |
|
name = re.sub('[^a-zA-Z0-9-_]', '', name).lower() |
|
# remove double underscores |
|
name = re.sub('__', '_', name).lower() |
|
return name |
|
def name_munge(input_name): |
|
return munge(input_name.replace(' ', '').replace('.', '_').replace('&', 'and')) |
|
#return input_name.replace(' ', '').replace('.', '_').replace('&', 'and') |
|
|
|
couch = couchdb.Server('http://127.0.0.1:5984/') |
|
docsdb = couch['disclosr-documents'] |
|
|
|
if __name__ == "__main__": |
|
for doc in docsdb.view('app/datasets'): |
|
print doc.id |
|
if doc.value['url'] != "http://data.gov.au/data/": |
|
# Collect the package metadata. |
|
pkg_name = name_munge(doc.value['metadata']['DCTERMS.Title'][:100]) |
|
package_entity = { |
|
'name': pkg_name, |
|
'title': doc.value['metadata']['DCTERMS.Title'], |
|
'url': doc.value['metadata']['DCTERMS.Source.URI'], |
|
'tags': doc.value['metadata']["Keywords / Tags"], #todo must be alphanumeric characters or symbols |
|
|
|
'author': doc.value['metadata']["DCTERMS.Creator"], |
|
'maintainer': doc.value['metadata']["DCTERMS.Creator"], |
|
'licence_id': doc.value['metadata']['DCTERMS.License'], |
|
'notes': doc.value['metadata']['Description'], |
|
} |
|
try: |
|
ckan.package_register_post(package_entity) |
|
except CkanApiError, e: |
|
if ckan.last_status == 409: |
|
print "already exists" |
|
else: |
|
raise LoaderError('Unexpected status %s checking for package under \'%s\': %r' % (ckan.last_status, doc.id, e.args)) |
|
|
|
print package_entity |
|
ckan.add_package_resource(pkg_name, 'http://example.org/', name='Foo', resource_type='data', format='csv') |
|
|