1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | #http://packages.python.org/CouchDB/client.html import couchdb from BeautifulSoup import BeautifulSoup couch = couchdb.Server('http://127.0.0.1:5984/') # select database docsdb = couch['disclosr-documents'] for row in docsdb.view('app/getMetadataExtractRequired'): print row.id html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read() metadata = [] # http://www.crummy.com/software/BeautifulSoup/documentation.html soup = BeautifulSoup(html) metatags = soup.meta for metatag in metatags: print metatag['name'] doc = docsdb.get(row.id) //doc['metadata'] = metadata //docsdb.save(doc) |