add dataqld
[disclosr.git] / documents / dataqld.py
blob:a/documents/dataqld.py -> blob:b/documents/dataqld.py
--- a/documents/dataqld.py
+++ b/documents/dataqld.py
@@ -1,1 +1,28 @@
+import sys, os
+import time
+import scrape
+from bs4 import BeautifulSoup
 
+from unidecode import unidecode
+import ckanclient
+
+# Instantiate the CKAN client.
+ckan = ckanclient.CkanClient(base_location='https://data.qld.gov.au/api')
+
+# Get the package list.
+package_list = ckan.package_register_get()
+for package_name in package_list:
+# Get the details of a package.
+    (url, mime_type, html) = scrape.fetchURL(scrape.docsdb,
+        "https://data.qld.gov.au/dataset/"+package_name , "data", "qld", False)
+    hash = scrape.mkhash(scrape.canonurl(url))
+    print hash
+    doc = scrape.docsdb.get(hash)
+    if "metadata" not in doc.keys() or True:
+        ckan.package_entity_get(package_name)
+        package_entity = ckan.last_message
+        doc['type'] = "dataset"
+        doc['metadata'] = package_entity
+        print package_entity
+        scrape.docsdb.save(doc)
+