--- a/documents/datagov.py +++ b/documents/datagov.py @@ -13,7 +13,7 @@ if atag.has_key('href'): url = scrape.fullurl(listurl, atag['href']) (url, mime_type, html) = scrape.fetchURL(scrape.docsdb, - url, "data", "AGIMO") + url, "data", "AGIMO", False) hash = scrape.mkhash(scrape.canonurl(url)) doc = scrape.docsdb.get(hash) if "metadata" not in doc.keys() or True: @@ -34,15 +34,17 @@ if last_title == "Description": doc['metadata'][last_title] = unidecode(str(child)).encode('ascii', 'ignore') elif last_title == "Download": - doc['metadata'][last_title] = {} + doc['metadata'][last_title] = [] for item in child.find_all("li"): link = item.find("a") format = item.find(property="dc:format") linkobj = {"href":link['href'].replace("/bye?","").strip(), - "format": format.string.strip(), "size": format.next_sibling.string.strip()} + "format": format.string.strip()} + if format.next_sibling.string != None: + linkobj["size"] = format.next_sibling.string.strip() if link.string != None: linkobj["name"] = link.string.strip() - doc['metadata'][last_title][] = linkobj + doc['metadata'][last_title].append(linkobj) else: atags = child.find_all('a')