Add start of metadata extract
Former-commit-id: 51210e2d4385a4942d9d7a380a4c16c16811f98b
--- /dev/null
+++ b/admin/metadata.py
@@ -1,1 +1,22 @@
+#http://packages.python.org/CouchDB/client.html
+import couchdb
+from BeautifulSoup import BeautifulSoup
+couch = couchdb.Server('http://127.0.0.1:5984/')
+
+# select database
+docsdb = couch['disclosr-documents']
+
+for row in docsdb.view('app/getMetadataExtractRequired'):
+ print row.id
+ html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read()
+ metadata = []
+ # http://www.crummy.com/software/BeautifulSoup/documentation.html
+ soup = BeautifulSoup(html)
+metatags = soup.meta
+ for metatag in metatags:
+ print metatag['name']
+ doc = docsdb.get(row.id)
+ //doc['metadata'] = metadata
+ //docsdb.save(doc)
+