--- a/documents/datagov.py +++ b/documents/datagov.py @@ -13,7 +13,7 @@ if atag.has_key('href'): url = scrape.fullurl(listurl, atag['href']) (url, mime_type, html) = scrape.fetchURL(scrape.docsdb, - url, "data", "AGIMO") + url, "data", "AGIMO", False) hash = scrape.mkhash(scrape.canonurl(url)) doc = scrape.docsdb.get(hash) if "metadata" not in doc.keys() or True: @@ -39,7 +39,9 @@ link = item.find("a") format = item.find(property="dc:format") linkobj = {"href":link['href'].replace("/bye?","").strip(), - "format": format.string.strip(), "size": format.next_sibling.string.strip()} + "format": format.string.strip()} + if format.next_sibling.string != None: + linkobj["size"] = format.next_sibling.string.strip() if link.string != None: linkobj["name"] = link.string.strip() doc['metadata'][last_title].append(linkobj)