Parse complete directory.gov.au export into gexf
Parse complete directory.gov.au export into gexf


Former-commit-id: 7be06add579fbd14042ea9084bc738168a62dcdf

--- /dev/null
+++ b/admin/directory.gexf.php
@@ -1,1 +1,59 @@
+<?php
 
+$nodes = Array(Array("id" => "gov", "label" => "Federal Government"));
+$edges = Array();
+
+function addEdge($source, $target) {
+    global $edges;
+    $edges[] = Array("id" => md5($source . $target), "source" => $source, "target" => $target);
+}
+
+function addNode($id, $label, $pid) {
+    global $nodes;
+    $nodes[] = Array("id" => $id, "label" => $label , "pid" => $pid);
+}
+
+function addChildren($parentID, $parentXML) {
+    foreach ($parentXML as $childXML) {
+
+        if ($childXML->getName() == "organization" || $childXML->getName() == "organizationalUnit" || $childXML->getName() == "person") {
+            $attr = $childXML->attributes();
+            $id = $attr['UUID'];
+            if ($childXML->getName() == "organization" || $childXML->getName() == "organizationalUnit") {
+
+                $label = $childXML->name;
+            } else if ($childXML->getName() == "person") {
+                  $label = $childXML->fullName;
+            }
+            addNode($id, $label, $parentID);
+            addEdge($id, $parentID);
+            addChildren($id, $childXML);
+        }
+    }
+}
+
+if (file_exists('directoryexport.xml')) {
+    $xml = simplexml_load_file('directoryexport.xml');
+
+    addChildren("gov", $xml);
+} else {
+    exit('Failed to open directoryexport.xml');
+}
+  header('Content-Type: application/gexf+xml');
+echo '<?xml version="1.0" encoding="UTF-8"?>
+<gexf xmlns="http://www.gexf.net/1.2draft" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd" version="1.2">
+    <graph mode="static" defaultedgetype="directed">
+        <nodes>';
+foreach ($nodes as $node) {
+    echo '          <node id="' . $node['id'] . '" label="' . htmlentities($node['label'],ENT_XML1) . '" ' . (isset($node['pid']) ? 'pid="' . $node['pid'] . '"' : "") . ' />';
+}
+echo '</nodes>
+        <edges>';
+foreach ($edges as $edge) {
+    echo '            <edge id="' . $edge['id'] . '" source="' . $edge['source'] . '" target="' . $edge['target'] . '" />';
+}
+echo '</edges>
+    </graph>
+</gexf>';
+?>
+

--- /dev/null
+++ b/admin/directoryexport.xml

--- a/admin/importAPSCEmployees.php
+++ b/admin/importAPSCEmployees.php
@@ -32,23 +32,35 @@
                 @$sums[$id][$timePeriod] += $data[1];
             } else {
                 echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL;
-                
+
                 die();
-               
             }
         }
         fclose($handle);
     }
 }
 foreach ($sums as $id => $sum) {
-    echo $id. "<br>" . PHP_EOL;
+    echo $id . "<br>" . PHP_EOL;
     $doc = $db->get($id);
-   // print_r($doc);
-    if (isset($doc->statistics)) $doc->statistics = Array();
+     echo $doc->name . "<br>" . PHP_EOL;
+    // print_r($doc);
+    $changed = false;
+    if (!isset($doc->statistics)) {
+        $changed = true;
+        $doc->statistics = Array();
+    }
     foreach ($sum as $timePeriod => $value) {
-        $doc->statistics["employees"][$timePeriod] = Array("value"=>$value, "source"=>"http://apsc.gov.au/stateoftheservice/");
+        if (!isset($doc->statistics->employees->$timePeriod->value) 
+                || $doc->statistics->employees->$timePeriod->value != $value) {
+            $changed = true;
+            $doc->statistics["employees"][$timePeriod] = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
+        }
     }
-    $db->save($doc);
+    if ($changed) {
+        $db->save($doc);
+    } else {
+        echo "not changed" . "<br>" . PHP_EOL;
+    }
 }
 // employees: timeperiod, source = apsc state of service, value 
 ?>

file:b/admin/metadata.py (new)
--- /dev/null
+++ b/admin/metadata.py
@@ -1,1 +1,22 @@
+#http://packages.python.org/CouchDB/client.html
+import couchdb
+from BeautifulSoup import BeautifulSoup
 
+couch = couchdb.Server('http://127.0.0.1:5984/')
+
+# select database
+docsdb = couch['disclosr-documents']
+
+for row in docsdb.view('app/getMetadataExtractRequired'): 
+    print row.id
+    html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read()
+    metadata = []
+     # http://www.crummy.com/software/BeautifulSoup/documentation.html
+            soup = BeautifulSoup(html)
+metatags = soup.meta
+    for metatag in metatags:
+        print metatag['name']
+    doc = docsdb.get(row.id)
+    //doc['metadata'] = metadata
+    //docsdb.save(doc)
+

--- a/bubbletree.php
+++ b/bubbletree.php
@@ -24,7 +24,7 @@
 $color = new Lux_Color();
 
 $portfolios = Array();
-
+$total = 0;
 $db = $server->get_db('disclosr-agencies');
 try {
     $rows = $db->get_view("app", "byDeptStateName", null, true)->rows;
@@ -42,10 +42,15 @@
     foreach ($rows as $row) {
 	$employees = 0;
 	$portfolioid = 0;
-	if ($row->value->employees) $employees = $row->value->employees;
+	if (isset($row->value->employees)) $employees = $row->value->employees;
 	if (isset($row->value->statistics->employees)) {
 $agencyEmployeesArray = object_to_array($row->value->statistics->employees);
+if (isset($agencyEmployeesArray["2010-2011"]["value"])) {
  $employees = $agencyEmployeesArray["2010-2011"]["value"];
+} else {
+    // get last year that is recorded? throw error?
+    continue;
+}
 }
 if (!($employees > 0)) $employees =0;
 	if (isset($row->value->parentOrg)) $portfolioid = $row->value->parentOrg;

--- a/include/couchdb.inc.php
+++ b/include/couchdb.inc.php
@@ -5,28 +5,28 @@
 require ($basePath . 'couchdb/settee/src/settee.php');
 
 function createDocumentsDesignDoc() {
-    /*"views": {
-       "web_server": {
-           "map": "function(doc) {\n  emit(doc.web_server, 1);\n}",
-           "reduce": "function (key, values, rereduce) {\n    return sum(values);\n}"
-       },
-       "byAgency": {
-           "map": "function(doc) {\n  emit(doc.agencyID, 1);\n}",
-           "reduce": "function (key, values, rereduce) {\n    return sum(values);\n}"
-       },
-       "byURL": {
-           "map": "function(doc) {\n  emit(doc.url, doc);\n}"
-       },
-       "agency": {
-           "map": "function(doc) {\n  emit(doc.agencyID, doc);\n}"
-       },
-       "byWebServer": {
-           "map": "function(doc) {\n  emit(doc.web_server, doc);\n}"
-       },
-  "getValidationRequired": {
-       "map": "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n  emit(doc._id, doc._attachments);\n}\n}"
-   }
-   }*/
+    /* "views": {
+      "web_server": {
+      "map": "function(doc) {\n  emit(doc.web_server, 1);\n}",
+      "reduce": "function (key, values, rereduce) {\n    return sum(values);\n}"
+      },
+      "byAgency": {
+      "map": "function(doc) {\n  emit(doc.agencyID, 1);\n}",
+      "reduce": "function (key, values, rereduce) {\n    return sum(values);\n}"
+      },
+      "byURL": {
+      "map": "function(doc) {\n  emit(doc.url, doc);\n}"
+      },
+      "agency": {
+      "map": "function(doc) {\n  emit(doc.agencyID, doc);\n}"
+      },
+      "byWebServer": {
+      "map": "function(doc) {\n  emit(doc.web_server, doc);\n}"
+      },
+      "getValidationRequired": {
+      "map": "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n  emit(doc._id, doc._attachments);\n}\n}"
+      }
+      } */
 }
 
 function createAgencyDesignDoc() {
@@ -95,7 +95,7 @@
   }
 }";
     // http://stackoverflow.com/questions/646628/javascript-startswith
-$obj->views->score->map =  'if(!String.prototype.startsWith){
+    $obj->views->score->map = 'if(!String.prototype.startsWith){
     String.prototype.startsWith = function (str) {
         return !this.indexOf(str);
     }
@@ -119,7 +119,7 @@
         emit(count+doc._id, {id:doc._id, name: doc.name, score:count, orgType: doc.orgType, portfolio:portfolio});
     }
 }';
-        $obj->views->scoreHas->map = 'if(!String.prototype.startsWith){
+    $obj->views->scoreHas->map = 'if(!String.prototype.startsWith){
     String.prototype.startsWith = function (str) {
         return !this.indexOf(str);
     }
@@ -142,7 +142,7 @@
     $obj->views->scoreHas->reduce = 'function (key, values, rereduce) {
     return sum(values);
 }';
-        $obj->views->fieldNames->map = '
+    $obj->views->fieldNames->map = '
 function(doc) {
 for(var propName in doc) {
      	emit(propName, doc._id);
@@ -157,16 +157,16 @@
 }
 
 if (php_uname('n') == "vanille") {
-$serverAddr = 'http://192.168.178.21:5984/';
-   
+    $serverAddr = 'http://192.168.178.21:5984/';
 } else
 if (php_uname('n') == "KYUUBEY") {
 
-    $serverAddr = 'http://192.168.1.148:5984/';
+    $serverAddr = 'http://127.0.0.1:5984/';
 } else {
     $serverAddr = 'http://127.0.0.1:5984/';
 }
- $server = new SetteeServer($serverAddr);
+$server = new SetteeServer($serverAddr);
+
 function setteErrorHandler($e) {
     echo $e->getMessage() . "<br>" . PHP_EOL;
 }

file:b/webserver.php (new)
--- /dev/null
+++ b/webserver.php
@@ -1,1 +1,43 @@
+<?php
 
+include_once('include/common.inc.php');
+include_header();
+
+echo "<table>
+    <tr><th>name</th><th>webserver</th><th>accessiblity errors</th></tr>";
+$agenciesdb = $server->get_db('disclosr-agencies');
+$docsdb = $server->get_db('disclosr-documents');
+try {
+    $rows = $agenciesdb->get_view("app", "all", null, true)->rows;
+
+
+    if ($rows) {
+        foreach ($rows as $row) {
+
+            echo "<tr><td>" . $row->value->name . "</td>";
+            if (isset($row->value->website)) {
+                try {
+                    $website = $docsdb->get(md5($row->value->website));
+                    $serverParts = explode(" ",$website->web_server);
+                    echo "<td>" . $serverParts[0] . "</td>";
+                    if (!isset($website->validation)) {
+                        echo "<td>?</td>";
+                    } else {
+                        if ($website->validation == "") {
+                            echo "<td>No error</td>";
+                        } else {
+                      echo "<td><pre>" . str_replace("<", "&lt;", $website->validation) . "</pre></td>";
+                        }
+                    }
+                } catch (SetteeRestClientException $e) {
+                   // setteErrorHandler($e);
+                }
+            }
+            echo "</tr>";
+        }
+    }
+} catch (SetteeRestClientException $e) {
+    setteErrorHandler($e);
+}
+include_footer();
+?>