--- a/admin/metadata.py +++ b/admin/metadata.py @@ -1,1 +1,22 @@ +#http://packages.python.org/CouchDB/client.html +import couchdb +from BeautifulSoup import BeautifulSoup +couch = couchdb.Server('http://127.0.0.1:5984/') + +# select database +docsdb = couch['disclosr-documents'] + +for row in docsdb.view('app/getMetadataExtractRequired'): + print row.id + html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read() + metadata = [] + # http://www.crummy.com/software/BeautifulSoup/documentation.html + soup = BeautifulSoup(html) +metatags = soup.meta + for metatag in metatags: + print metatag['name'] + doc = docsdb.get(row.id) + //doc['metadata'] = metadata + //docsdb.save(doc) +