--- a/documents/dataqld.py +++ b/documents/dataqld.py @@ -1,1 +1,28 @@ +import sys, os +import time +import scrape +from bs4 import BeautifulSoup +from unidecode import unidecode +import ckanclient + +# Instantiate the CKAN client. +ckan = ckanclient.CkanClient(base_location='https://data.qld.gov.au/api') + +# Get the package list. +package_list = ckan.package_register_get() +for package_name in package_list: +# Get the details of a package. + (url, mime_type, html) = scrape.fetchURL(scrape.docsdb, + "https://data.qld.gov.au/dataset/"+package_name , "data", "qld", False) + hash = scrape.mkhash(scrape.canonurl(url)) + print hash + doc = scrape.docsdb.get(hash) + if "metadata" not in doc.keys() or True: + ckan.package_entity_get(package_name) + package_entity = ckan.last_message + doc['type'] = "dataset" + doc['metadata'] = package_entity + print package_entity + scrape.docsdb.save(doc) +