Add start of metadata extract
Add start of metadata extract


Former-commit-id: 51210e2d4385a4942d9d7a380a4c16c16811f98b

file:b/admin/metadata.py (new)
--- /dev/null
+++ b/admin/metadata.py
@@ -1,1 +1,22 @@
+#http://packages.python.org/CouchDB/client.html
+import couchdb
+from BeautifulSoup import BeautifulSoup
 
+couch = couchdb.Server('http://127.0.0.1:5984/')
+
+# select database
+docsdb = couch['disclosr-documents']
+
+for row in docsdb.view('app/getMetadataExtractRequired'): 
+    print row.id
+    html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read()
+    metadata = []
+     # http://www.crummy.com/software/BeautifulSoup/documentation.html
+            soup = BeautifulSoup(html)
+metatags = soup.meta
+    for metatag in metatags:
+        print metatag['name']
+    doc = docsdb.get(row.id)
+    //doc['metadata'] = metadata
+    //docsdb.save(doc)
+

file:b/webserver.php (new)
--- /dev/null
+++ b/webserver.php
@@ -1,1 +1,35 @@
+<?php
 
+include_once('include/common.inc.php');
+include_header();
+
+echo "<table>
+    <tr><th>name</th><th>webserver</th><th>accessiblity errors</th></tr>";
+$agenciesdb = $server->get_db('disclosr-agencies');
+$docsdb = $server->get_db('disclosr-documents');
+try {
+    $rows = $agenciesdb->get_view("app", "all", null, true)->rows;
+
+
+    if ($rows) {
+        foreach ($rows as $row) {
+
+            echo "<tr><td>" . $row->value->name . "</td>";
+            if (isset($row->value->website)) {
+                try {
+                    $website = $docsdb->get(md5($row->value->website));
+                    $serverParts = explode(" ",$website->web_server);
+                    echo "<td>" . $serverParts[0] . "</td>";
+                      echo "<td>" . $website->mime_type . "</td>";
+                } catch (SetteeRestClientException $e) {
+                   // setteErrorHandler($e);
+                }
+            }
+            echo "</tr>";
+        }
+    }
+} catch (SetteeRestClientException $e) {
+    setteErrorHandler($e);
+}
+include_footer();
+?>