cache org ids while importing datagov
[disclosr.git] / documents / datagov-export.py
blob:a/documents/datagov-export.py -> blob:b/documents/datagov-export.py
--- a/documents/datagov-export.py
+++ b/documents/datagov-export.py
@@ -92,12 +92,9 @@
     name = re.sub('__', '_', name).lower()
     return name
 
-#todo "{'name': [u'Url must be purely lowercase alphanumeric (ascii) characters and these symbols: -_']}"
-# http://data.gov.au/dataset/australian-domestic-regional-and-international-airline-activity-%E2%80%93-time-series/
+
 def name_munge(input_name):
     return munge(input_name.replace(' ', '').replace('.', '_').replace('&', 'and'))
-    #[:100]
-    #return input_name.replace(' ', '').replace('.', '_').replace('&', 'and')
 
 
 def get_licence_id(licencename):
@@ -123,7 +120,9 @@
 
 if __name__ == "__main__":
     orgs_list = []
+    orgs_ids = {}
     for doc in docsdb.view('app/datasets'):
+        print "   ---   "
         print doc.id
 
         if doc.value['url'] != "http://data.gov.au/data/" and doc.value['agencyID'] != "qld":
@@ -150,8 +149,12 @@
                 else:
                     print "org found, adding dataset to " + org_name
 
-            org = ckandirect.action.organization_show(id=org_name)
-            # todo cache org names -> id mapping
+            # cache org names -> id mapping
+            if org_name not in orgs_ids:
+                org = ckandirect.action.organization_show(id=org_name)
+                orgs_ids[org_name] = org["result"]["id"]
+            org_id = orgs_ids[org_name]
+            print "org id is "+org_id
             tags = []
             if doc.value['agencyID'] == "AGIMO":
                 if len(doc.value['metadata']["Keywords / Tags"]) > 0:
@@ -171,7 +174,7 @@
                     'maintainer': doc.value['metadata']["DCTERMS.Creator"],
                     'licence_id': get_licence_id(doc.value['metadata']['DCTERMS.License']),
                     'notes': html2text.html2text(doc.value['metadata']['Description']),
-                    'owner_org': org["result"]["id"]
+                    'owner_org': org_id
                     #todo add missing key values like jurasdiction
                 }
             if doc.value['agencyID'] == "qld":
@@ -201,10 +204,8 @@
             for group_name in groups:
                 group_url = name_munge(group_name[:100])
                 try:
-                    print ckan.group_entity_get(group_url)
-
                     # Update the group details
-                    group_entity = ckan.last_message
+                    group_entity = ckan.group_entity_get(group_url)
                     print "group "+group_name+" exists"
                     if 'packages' in group_entity.keys():
                         group_entity['packages'] = list(set(group_entity['packages'] + [pkg_name]))