export
[disclosr.git] / documents / datagov-export.py
blob:a/documents/datagov-export.py -> blob:b/documents/datagov-export.py
  import ckanclient
  import couchdb
  from ckanclient import CkanApiError
  import re
  class LoaderError(Exception):
  pass
  # https://github.com/okfn/ckanext-importlib
  # Instantiate the CKAN client.
  ckan = ckanclient.CkanClient(base_location='http://localhost:5000/api',
  api_key='b47b24cd-591d-40c1-8677-d73101d56d1b')
  # (use your own api_key from http://thedatahub.org/user/me )
   
  def munge(name):
  # convert spaces to underscores
  name = re.sub(' ', '_', name).lower()
  # convert symbols to dashes
  name = re.sub('[:]', '_-', name).lower()
  name = re.sub('[/]', '-', name).lower()
  # take out not-allowed characters
  name = re.sub('[^a-zA-Z0-9-_]', '', name).lower()
  # remove double underscores
  name = re.sub('__', '_', name).lower()
  return name
  def name_munge(input_name):
  return munge(input_name.replace(' ', '').replace('.', '_').replace('&', 'and'))
  #return input_name.replace(' ', '').replace('.', '_').replace('&', 'and')
   
  couch = couchdb.Server('http://127.0.0.1:5984/')
  docsdb = couch['disclosr-documents']
   
  if __name__ == "__main__":
  for doc in docsdb.view('app/datasets'):
  print doc.id
  if doc.value['url'] != "http://data.gov.au/data/":
  # Collect the package metadata.
  pkg_name = name_munge(doc.value['metadata']['DCTERMS.Title'][:100])
  package_entity = {
  'name': pkg_name,
  'title': doc.value['metadata']['DCTERMS.Title'],
  'url': doc.value['metadata']['DCTERMS.Source.URI'],
  'tags': doc.value['metadata']["Keywords / Tags"], #todo must be alphanumeric characters or symbols
   
  'author': doc.value['metadata']["DCTERMS.Creator"],
  'maintainer': doc.value['metadata']["DCTERMS.Creator"],
  'licence_id': doc.value['metadata']['DCTERMS.License'],
  'notes': doc.value['metadata']['Description'],
  }
  try:
  ckan.package_register_post(package_entity)
  except CkanApiError, e:
  if ckan.last_status == 409:
  print "already exists"
  else:
  raise LoaderError('Unexpected status %s checking for package under \'%s\': %r' % (ckan.last_status, doc.id, e.args))
   
  print package_entity
  ckan.add_package_resource(pkg_name, 'http://example.org/', name='Foo', resource_type='data', format='csv')