Merge branch 'master' of ssh://apples.lambdacomplex.org/git/disclosr
Merge branch 'master' of ssh://apples.lambdacomplex.org/git/disclosr


Former-commit-id: ef55e235f9403e74df59b4377eba5481ff98488f

--- a/admin/importGov2RegisterRSSFacebookTwitter.php
+++ b/admin/importGov2RegisterRSSFacebookTwitter.php
@@ -1,7 +1,7 @@
 <?php
 
 require_once '../include/common.inc.php';
-require($basePath.'lib/phpquery/phpQuery/phpQuery.php');
+require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
 
 $db = $server->get_db('disclosr-agencies');
 $rows = $db->get_view("app", "byName")->rows;
@@ -10,41 +10,40 @@
 foreach ($rows as $row) {
     $nametoid[trim($row->key)] = $row->value;
 }
+
 function extractHTMLAccounts($url, $accountType) {
     global $accounts, $nametoid;
     $request = Requests::get($url);
     $doc = phpQuery::newDocumentHTML($request->body);
     phpQuery::selectDocument($doc);
     foreach (pq('tr')->elements as $tr) {
-       //echo $tr->nodeValue.PHP_EOL;
-       $agency = "";
-       $url = "";
-       foreach ($tr->childNodes as $td) {
-           $class = $td->getAttribute("class");
-           //echo "cccc $class ".$td->nodeValue.PHP_EOL;
-           if ($class == "s11" || $class == "s10" || $class == "s7") {
-               $agency = $td->nodeValue;
-           } else if ($class == "s6" || $class == "s9"){
-               $url = $td->nodeValue;
-               foreach($td->childNodes as $a) {
-                   $href = $a->getAttribute("href");
-                   if ($href != "") {
-                       $url = $href;
-                   }
-               }
-           }
-       }
-       if ($agency != "" && $url != "") {
-           if (!in_array(trim($agency), array_keys($nametoid))) {
-                        echo trim($agency)." missing" . PHP_EOL;
-                    } else {
-                     //   echo $agency." = ".$url.PHP_EOL;
-                        $accounts[$nametoid[trim($agency)]][$accountType][] = $url;
+        //echo $tr->nodeValue.PHP_EOL;
+        $agency = "";
+        $url = "";
+        foreach ($tr->childNodes as $td) {
+            $class = $td->getAttribute("class");
+            //echo "cccc $class ".$td->nodeValue.PHP_EOL;
+            if ($class == "s11" || $class == "s10" || $class == "s7") {
+                $agency = $td->nodeValue;
+            } else if ($class == "s6" || $class == "s9") {
+                $url = $td->nodeValue;
+                foreach ($td->childNodes as $a) {
+                    $href = $a->getAttribute("href");
+                    if ($href != "") {
+                        $url = $href;
                     }
-       
-       }
+                }
+            }
+        }
+        if ($agency != "" && $url != "") {
+            if (!in_array(trim($agency), array_keys($nametoid))) {
+                echo trim($agency) . " missing" . PHP_EOL;
+            } else {
+                //   echo $agency." = ".$url.PHP_EOL;
+                $accounts[$nametoid[trim($agency)]][$accountType][] = $url;
+            }
+        }
     }
-    
 }
 
 function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) {
@@ -53,7 +52,7 @@
     $Data = str_getcsv($request->body, "\n"); //parse the rows 
     $headers = Array();
     foreach ($Data as $num => $line) {
-        $Row = str_getcsv($line, ",",'"');
+        $Row = str_getcsv($line, ",", '"');
         if ($num == 0) {
             
         } else if ($num == 1) {
@@ -64,7 +63,7 @@
                 $agencyName = $Row[array_search($nameField, $headers)];
                 if (!$filter || $Row[array_search("State", $headers)] == "NAT") {
                     if (!in_array(trim($agencyName), array_keys($nametoid))) {
-                        echo trim($agencyName)." missing" . PHP_EOL;
+                        echo trim($agencyName) . " missing" . PHP_EOL;
                     } else {
                         // echo $Row[array_search($nameField, $headers)] . PHP_EOL;
                         $accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)];
@@ -84,6 +83,18 @@
 extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS");
 // facebook 
 extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook");
+foreach ($accounts as $id => $accountTypes) {
+    echo $id . "<br>" . PHP_EOL;
+    $doc = object_to_array($db->get($id));
+    // print_r($doc);
 
+    foreach ($accountTypes as $accountType => $accounts) {
+        if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) {
+            $doc["has" . $accountType] = Array();
+        }
+        $doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts));
+    }
+    $db->save($doc);
+}
 ?>
 

--- /dev/null
+++ b/admin/validation.py
@@ -1,1 +1,30 @@
+#http://packages.python.org/CouchDB/client.html
+import couchdb
+import json
+import pprint
+import re
+from tidylib import tidy_document
 
+couch = couchdb.Server('http://127.0.0.1:5984/')
+
+# select database
+docsdb = couch['disclosr-documents']
+
+def f(x):
+	invalid = re.compile(r"ensure|testing|flicker|updating|longdesc|Accessibility Checks|not recognized")
+	valid = re.compile(r"line")
+	return (not invalid.search(x)) and valid.search(x) and x != ''
+
+for row in docsdb.view('app/getValidationRequired'): 
+    print row.id
+    html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read()
+    #print html
+    document, errors = tidy_document(html,options={'accessibility-check':1,'show-warnings':0,'markup':0},keep_doc=True)
+    #http://www.aprompt.ca/Tidy/accessibilitychecks.html
+    #print document
+    errors = '\n'.join(filter(f,errors.split('\n')))
+    #print errors
+    doc = docsdb.get(row.id)
+    doc['validation'] = errors
+    docsdb.save(doc)
+

directory:a/couchdb/settee -> directory:b/couchdb/settee
--- a/couchdb/settee
+++ b/couchdb/settee

file:a/graph.php -> file:b/graph.php
--- a/graph.php
+++ b/graph.php
@@ -6,36 +6,46 @@
     $format = $_REQUEST['format'];
 }
 
-function add_node($id, $label) {
+function add_node($id, $label, $parent="") {
     global $format;
     if ($format == "html") {
-        echo "nodes[\"$id\"] = graph.newNode({label: \"$label\"});" . PHP_EOL;
+       // echo "nodes[\"$id\"] = graph.newNode({label: \"$label\"});" . PHP_EOL;
     }
      if ($format == "dot" && $label != "") {
          echo "$id [label=\"$label\"];". PHP_EOL;
      }
+      if ($format == "gexf") {
+          echo "<node id='$id' label=\"".htmlentities($label,ENT_XML1)."\" ".($parent != ""? "pid='$parent'><viz:size value='1'/>":"><viz:size value='2'/>")
+              ."<viz:color b='".rand(0,255)."' g='".rand(0,255)."' r='".rand(0,255)."'/>"
+                  ."</node>". PHP_EOL;
+      }
 }
 
 function add_edge($from, $to, $color) {
     global $format;
     if ($format == "html") {
-        echo "graph.newEdge(nodes[\"$from\"], nodes['$to'], {color: '$color'});" . PHP_EOL;
+     //   echo "graph.newEdge(nodes[\"$from\"], nodes['$to'], {color: '$color'});" . PHP_EOL;
     }
     if ($format == "dot") {
         echo "$from -> $to ".($color != ""? "[color=$color]":"").";". PHP_EOL;
     }
+     if ($format == "gexf") {
+          echo "<edge id='$from$to' source='$from' target='$to' />". PHP_EOL;
+      }
+}
+if ($format == "gexf") {
+    //header('Content-Type: text/xml');
+     header('Content-Type: application/gexf+xml');
+echo '<?xml version="1.0" encoding="UTF-8"?>
+<gexf xmlns="http://www.gexf.net/1.2draft" xmlns:viz="http://www.gexf.net/1.2draft/viz" version="1.2">
+    <meta lastmodifieddate="2009-03-20">
+        <creator>Gexf.net</creator>
+        <description>A hello world! file</description>
+    </meta>
+    <graph mode="static" defaultedgetype="directed">
+        <nodes>'. PHP_EOL;
 }
 
-if ($format == "html") {
-    ?>
-    <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.3.2/jquery.min.js"></script>
-    <script src="lib/springy/springy.js"></script>
-    <script src="lib/springy/springyui.js"></script>
-    <script>
-        var graph = new Graph();
-        var nodes = [];
-    <?php
-}
 if ($format == "dot") {
     echo 'digraph g {'. PHP_EOL;
 }
@@ -50,7 +60,10 @@
 } catch (SetteeRestClientException $e) {
     setteErrorHandler($e);
 }
-
+if ($format == "gexf") {
+echo '</nodes>
+        <edges>'. PHP_EOL;
+}
 try {
     $rows = $db->get_view("app", "byDeptStateName", null, true)->rows;
 //print_r($rows);
@@ -72,21 +85,59 @@
 }
 if ($format == "html") {
     ?>
-        window.onload = function() {
-            $(document).ready(function() {
-                var springy = $('#springydemo').springy({
-                    graph: graph
-                });
-            });
-        };
-    </script>
+ <div id="sigma-example" width="960" style="min-height:800px;background-color: #333;"></div>
+  <script src="javascripts/sigma.min.js"></script>
+  <script src="javascripts/sigma/plugins/sigma.parseGexf.js"></script>
+  <script src="javascripts/sigma/plugins/sigma.forceatlas2.js"></script>
+  <script type="text/javascript">function init() {
+  // Instanciate sigma.js and customize rendering :
+  var sigInst = sigma.init(document.getElementById('sigma-example')).drawingProperties({
+    defaultLabelColor: '#fff',
+    defaultLabelSize: 14,
+    defaultLabelBGColor: '#fff',
+    defaultLabelHoverColor: '#000',
+    labelThreshold: 6,
+    defaultEdgeType: 'curve'
+  }).graphProperties({
+    minNodeSize: 0.5,
+    maxNodeSize: 5,
+    minEdgeSize: 5,
+    maxEdgeSize: 5
+  }).mouseProperties({
+    maxRatio: 32
+  });
 
-    <canvas id="springydemo" width="1260" height="680" />
+  // Parse a GEXF encoded file to fill the graph
+  // (requires "sigma.parseGexf.js" to be included)
+  sigInst.parseGexf('graph.php?format=gexf');
+ sigInst.bind('downnodes',function(event){
+    var nodes = event.content;
+ });
+  // Draw the graph :
+  sigInst.draw();
+  // Start the ForceAtlas2 algorithm
+  // (requires "sigma.forceatlas2.js" to be included)
+  sigInst.startForceAtlas2();
+  
+}
+
+if (document.addEventListener) {
+  document.addEventListener("DOMContentLoaded", init, false);
+} else {
+  window.onload = init;
+}
+</script>
+
     <?php
 }
 if ($format == "dot") {
     echo "}";
 }
+if ($format == "gexf") {
+echo ' </edges>
+    </graph>
+</gexf>'. PHP_EOL;
+}
 //include_footer();
 ?>
 

--- a/include/couchdb.inc.php
+++ b/include/couchdb.inc.php
@@ -22,7 +22,10 @@
        },
        "byWebServer": {
            "map": "function(doc) {\n  emit(doc.web_server, doc);\n}"
-       }
+       },
+  "getValidationRequired": {
+       "map": "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n  emit(doc._id, doc._attachments);\n}\n}"
+   }
    }*/
 }
 
@@ -167,3 +170,4 @@
 function setteErrorHandler($e) {
     echo $e->getMessage() . "<br>" . PHP_EOL;
 }
+

directory:a/lib/springy (deleted)
--- a/lib/springy
+++ /dev/null