Add start of metadata extract
Add start of metadata extract


Former-commit-id: 51210e2d4385a4942d9d7a380a4c16c16811f98b

file:b/admin/metadata.py (new)
  #http://packages.python.org/CouchDB/client.html
  import couchdb
  from BeautifulSoup import BeautifulSoup
 
  couch = couchdb.Server('http://127.0.0.1:5984/')
 
  # select database
  docsdb = couch['disclosr-documents']
 
  for row in docsdb.view('app/getMetadataExtractRequired'):
  print row.id
  html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read()
  metadata = []
  # http://www.crummy.com/software/BeautifulSoup/documentation.html
  soup = BeautifulSoup(html)
  metatags = soup.meta
  for metatag in metatags:
  print metatag['name']
  doc = docsdb.get(row.id)
  //doc['metadata'] = metadata
  //docsdb.save(doc)
 
file:b/webserver.php (new)
  <?php
 
  include_once('include/common.inc.php');
  include_header();
 
  echo "<table>
  <tr><th>name</th><th>webserver</th><th>accessiblity errors</th></tr>";
  $agenciesdb = $server->get_db('disclosr-agencies');
  $docsdb = $server->get_db('disclosr-documents');
  try {
  $rows = $agenciesdb->get_view("app", "all", null, true)->rows;
 
 
  if ($rows) {
  foreach ($rows as $row) {
 
  echo "<tr><td>" . $row->value->name . "</td>";
  if (isset($row->value->website)) {
  try {
  $website = $docsdb->get(md5($row->value->website));
  $serverParts = explode(" ",$website->web_server);
  echo "<td>" . $serverParts[0] . "</td>";
  echo "<td>" . $website->mime_type . "</td>";
  } catch (SetteeRestClientException $e) {
  // setteErrorHandler($e);
  }
  }
  echo "</tr>";
  }
  }
  } catch (SetteeRestClientException $e) {
  setteErrorHandler($e);
  }
  include_footer();
  ?>