Add start of metadata extract
Add start of metadata extract


Former-commit-id: 51210e2d4385a4942d9d7a380a4c16c16811f98b

file:b/admin/metadata.py (new)
  #http://packages.python.org/CouchDB/client.html
  import couchdb
  from BeautifulSoup import BeautifulSoup
 
  couch = couchdb.Server('http://127.0.0.1:5984/')
 
  # select database
  docsdb = couch['disclosr-documents']
 
  for row in docsdb.view('app/getMetadataExtractRequired'):
  print row.id
  html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read()
  metadata = []
  # http://www.crummy.com/software/BeautifulSoup/documentation.html
  soup = BeautifulSoup(html)
  metatags = soup.meta
  for metatag in metatags:
  print metatag['name']
  doc = docsdb.get(row.id)
  //doc['metadata'] = metadata
  //docsdb.save(doc)