Merge branch 'master' of ssh://apples.lambdacomplex.org/git/disclosr
[disclosr.git] / documents / datagov-export.py
blob:a/documents/datagov-export.py -> blob:b/documents/datagov-export.py
--- a/documents/datagov-export.py
+++ b/documents/datagov-export.py
@@ -135,15 +135,26 @@
                               doc.value['url'].replace("http://data.gov.au/dataset/", '').replace('/', '')[:100]);
             print pkg_name
             #add to or create organization using direct API
-            org_name = name_munge(doc.value['metadata']["Agency"][:100])
+            agency = doc.value['metadata']["Agency"]
+            if agency == "APS":
+                agency = "Australian Public Service Commission"
+            if agency == "Shared Services, Treasury Directorate":
+                agency = "Shared Services Procurement, Treasury Directorate"
+            if agency == "Treasury - Shared Services":
+                agency = "Shared Services Procurement, Treasury Directorate"
+            if agency == "Territory and Municipal Services (TAMS)":
+                agency = "Territory and Municipal Services Directorate"
+            if agency == "State Library of NSW":
+                agency = "State Library of New South Wales"
+            org_name = name_munge(agency[:100])
             if org_name not in orgs_list:
                 orgs_list = ckandirect.action.organization_list()['result']
                 #print orgs_list
                 if org_name not in orgs_list:
                     try:
                         print "org not found, creating " + org_name
-                        ckandirect.action.organization_create(name=org_name, title=doc.value['metadata']["Agency"],
-                                                              description=doc.value['metadata']["Agency"])
+                        ckandirect.action.organization_create(name=org_name, title=agency,
+                                                              description=agency)
                         orgs_list.append(org_name)
                     except ckanapi.ValidationError, e:
                         print e
@@ -158,6 +169,7 @@
             org_id = orgs_ids[org_name]
             print "org id is "+org_id
             tags = []
+            creator = doc.value['metadata']["DCTERMS.Creator"]
             if doc.value['agencyID'] == "AGIMO":
                 if len(doc.value['metadata']["Keywords / Tags"]) > 0:
                     if hasattr(doc.value['metadata']["Keywords / Tags"], '__iter__'):
@@ -167,20 +179,25 @@
 
                 tags = [re.sub('[^a-zA-Z0-9-_.]', '', tag.replace('&', 'and')).lower() for tag in tags if tag]
                 #print tags
+                extras = []
+
+                for extra_key in doc.value['metadata'].keys():
+                    if extra_key != "Keywords / Tags" and extra_key != "data.gov.au Category" and extra_key != "Download" :
+                        extras.append({'key':extra_key, 'value':doc.value['metadata'][extra_key]})
+
                 package_entity = {
                     'name': pkg_name,
                     'title': doc.value['metadata']['DCTERMS.Title'],
                     'url': doc.value['metadata']['DCTERMS.Source.URI'],
                     'tags': tags, #tags are mandatory?
-                    'author': doc.value['metadata']["DCTERMS.Creator"],
-                    'maintainer': doc.value['metadata']["DCTERMS.Creator"],
+                    'author': creator,
+                    'maintainer': creator,
                     'licence_id': get_licence_id(doc.value['metadata']['DCTERMS.License']),
                     'notes': html2text.html2text(doc.value['metadata']['Description']),
-                    'owner_org': org_id
-                    #todo add missing key values like jurasdiction
+                    'owner_org': org_id,
+                    'extras': extras
                 }
-            if doc.value['agencyID'] == "qld":
-                package_entity = doc.value['metadata']
+
 
             try:
                 #print package_entity
@@ -193,6 +210,7 @@
                     raise LoaderError('Unexpected status %s checking for package under \'%s\': %r' % (
                         ckan.last_status, pkg_name, e.args))
             pkg = ckan.package_entity_get(pkg_name)
+
 
             # add resources (downloadable data files)
             if 'Download' in doc.value['metadata'].keys():