cache org ids while importing datagov
[disclosr.git] / documents / datagov-export.py
blob:a/documents/datagov-export.py -> blob:b/documents/datagov-export.py
--- a/documents/datagov-export.py
+++ b/documents/datagov-export.py
@@ -10,8 +10,7 @@
     pass
 
 # Instantiate the CKAN client.
-#ckan = ckanclient.CkanClient(base_location='http://localhost:5000/api',    api_key='b47b24cd-591d-40c1-8677-d73101d56d1b')
-api_key = 'b3ab75e4-afbb-465b-a09d-8171c8c69a7a'
+api_key = 'ff34526e-f794-4068-8235-fcbba38cd8bc'
 ckan = ckanclient.CkanClient(base_location='http://data.disclosurelo.gs/api',
                              api_key=api_key)
 ckandirect = ckanapi.RemoteCKAN('http://data.disclosurelo.gs', api_key=api_key)
@@ -57,6 +56,9 @@
           ...
       ValueError: can't interpret '12 foo'
     """
+    if s == None:
+	return 0
+    s = s.replace(',', '')
     init = s
     num = ""
     while s and s[0:1].isdigit() or s[0:1] == '.':
@@ -192,43 +194,6 @@
                         ckan.last_status, pkg_name, e.args))
             pkg = ckan.package_entity_get(pkg_name)
 
-            # add dataset to group(s)
-            groups = []
-            if 'data.gov.au Category' in doc.value['metadata'].keys() and len(
-                    doc.value['metadata']['data.gov.au Category']) > 0:
-                if hasattr(doc.value['metadata']['data.gov.au Category'], '__iter__'):
-                    groups = groups + doc.value['metadata']['data.gov.au Category']
-                else:
-                    groups = groups + [doc.value['metadata']['data.gov.au Category']]
-
-            for group_name in groups:
-                group_url = name_munge(group_name[:100])
-                try:
-                    # Update the group details
-                    group_entity = ckan.group_entity_get(group_url)
-                    print "group "+group_name+" exists"
-                    if 'packages' in group_entity.keys():
-                        group_entity['packages'] = list(set(group_entity['packages'] + [pkg_name]))
-                    else:
-                        group_entity['packages'] = [pkg_name]
-                    ckan.group_entity_put(group_entity)
-                except CkanApiError, e:
-                    if ckan.last_status == 404:
-                        print "group "+group_name+" does not exist, creating"
-                        group_entity = {
-                            'name': group_url,
-                            'title': group_name,
-                            'description': group_name,
-                            'packages': [pkg_name]
-                        }
-                        #print group_entity
-                        ckan.group_register_post(group_entity)
-                    elif ckan.last_status == 409:
-                        print "group already exists"
-                    else:
-                        raise LoaderError('Unexpected status %s adding to group under \'%s\': %r' % (
-                            ckan.last_status, pkg_name, e.args))
-
             # add resources (downloadable data files)
             if 'Download' in doc.value['metadata'].keys():
                 try:
@@ -244,13 +209,17 @@
                                 format = 'xml'
                             if resource['format'] == '(CSV/XLS)':
                                 format = 'csv'
+                            if resource['format'] == '(Shapefile)':
+                                format = 'shp'
+                            if resource['format'] == '(KML/KMZ)':
+                                format = 'kml'
                             name = resource['href']
                             if 'name' in resource.keys():
                                 name = resource['name']
                             print resource
                             ckan.add_package_resource(pkg_name, resource['href'], name=name, resource_type='data',
                                                       format=format,
-                                                      size=human2bytes(resource['size'].replace(',', '')))
+                                                      size=human2bytes(resource.get('size','0B')))
                     else:
                         print "resources already exist"
                 except CkanApiError, e: