fix licence and format for datagov file upload
fix licence and format for datagov file upload


Former-commit-id: cf8e294aa2638dd43963d93dc03caf2e301fce41

--- a/documents/datagov-export.py
+++ b/documents/datagov-export.py
@@ -13,9 +13,14 @@
     pass
 
 import tempfile
-def add_package_resource_cachedurl(ckan, package_name, url, name, format, size, **kwargs):
+def add_package_resource_cachedurl(ckan, package_name, url, name, format, license_id, size,**kwargs):
+    if "xls" in url:
+	format = "xls"
     (returned_url, mime_type, content) = scrape.fetchURL(scrape.docsdb,
                                                 url, "dataset_resource", "AGIMO", False)
+    if mime_type in ["application/vnd.ms-excel","application/msexcel","application/x-msexcel","application/x-ms-excel","application/x-excel","application/x-dos_ms_excel","application/xls","application/x-xls","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"]:
+	format = "xls"
+
     if content != None:
 	    tf = tempfile.NamedTemporaryFile(delete=False)
 	    tfName = os.path.abspath(tf.name)
@@ -23,12 +28,12 @@
 	    tf.seek(0)
 	    tf.write(content)
 	    tf.flush()
-	    ckan.add_package_resource (package_name, tfName, name=name)
+	    ckan.add_package_resource (package_name, tfName, name=name, format=format, license_id=license_id)
     else:
 	print "fetch error"
-	ckan.add_package_resource(package_name, url, name=name, resource_type='data',
+	return ckan.add_package_resource(package_name, url, name=name, resource_type='data',
                                                       format=format,
-                                                      size=size)
+                                                      size=size, mimetype=mime_type, license_id=license_id)
 
 # Instantiate the CKAN client.
 api_key = 'ff34526e-f794-4068-8235-fcbba38cd8bc'
@@ -147,15 +152,15 @@
     return munge(input_name.replace(' ', '').replace('.', '_').replace('&', 'and'))
 
 
-def get_licence_id(licencename):
+def get_license_id(licencename):
     map = {
         "Creative Commons - Attribution-Share Alike 2.0 Australia (CC-SA)\nThe downloadable version of the database is licensed under CC-BY-SA Creative Commons Attribution Share Alike and contains only the database fields that are released under that license. These fields are object title, object number, object description as well as temporal, spatial and dimension details. It also contains a persistent URL for each record.": 'cc-by-sa',
         "CreativeCommonsAttributionNonCommercial30AustraliaCCBYNC30": 'cc-nc',
         'Otherpleasespecify': 'notspecified',
         '': 'notspecified',
         "Publicly available data": 'notspecified',
-        "CreativeCommonsAttributionNoDerivativeWorks30AustraliaCCBYND30": "other-closed",
-        "CreativeCommonsAttributionNonCommercialNoDerivs30AustraliaCCBYNCND30": "other-closed",
+        "CreativeCommonsAttributionNoDerivativeWorks30AustraliaCCBYND30": "cc-by-nd",
+        "CreativeCommonsAttributionNonCommercialNoDerivs30AustraliaCCBYNCND30": "cc-nc-nd",
         'CreativeCommonsAttribution30AustraliaCCBY30': 'cc-by',
         "Creative Commons - Attribution 2.5 Australia (CC-BY)": 'cc-by',
         'CreativeCommonsAttributionCCBY25': 'cc-by',
@@ -248,7 +253,7 @@
                         'tags': tags, #tags are mandatory?
                         'author': creator,
                         'maintainer': creator,
-                        'licence_id': get_licence_id(doc.value['metadata']['DCTERMS.License']),
+                        'license_id': get_license_id(doc.value['metadata']['DCTERMS.License']),
                         'notes': html2text.html2text(doc.value['metadata']['Description']),
                         'owner_org': org_id,
                         'extras': extras,
@@ -292,9 +297,8 @@
                                     name = resource['name']
                                 print resource
                                 add_package_resource_cachedurl(ckan, pkg_name, url_fix(resource['href']), name,
-                                                          format,
-                                                          human2bytes(resource.get('size', '0B')),
-                                                          resource_type='data')
+                                                          format, get_license_id(doc.value['metadata']['DCTERMS.License']),
+                                                          human2bytes(resource.get('size', '0B')))
                         else:
                             print "resources already exist"
                     except CkanApiError, e: