update semantic markup, add publicbodies.org csv export
[disclosr.git] / documents / dataqld.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import sys, os
import time
import scrape
from bs4 import BeautifulSoup
 
from unidecode import unidecode
import ckanclient
 
# Instantiate the CKAN client.
ckan = ckanclient.CkanClient(base_location='https://data.qld.gov.au/api')
 
# Get the package list.
package_list = ckan.package_register_get()
for package_name in package_list:
# Get the details of a package.
    (url, mime_type, html) = scrape.fetchURL(scrape.docsdb,
        "https://data.qld.gov.au/dataset/"+package_name , "data", "qld", False)
    hash = scrape.mkhash(scrape.canonurl(url))
    print hash
    doc = scrape.docsdb.get(hash)
    if "metadata" not in doc.keys() or True:
        ckan.package_entity_get(package_name)
        package_entity = ckan.last_message
        doc['type'] = "dataset"
        doc['metadata'] = package_entity
        print package_entity
        scrape.docsdb.save(doc)