Parse complete directory.gov.au export into gexf
Parse complete directory.gov.au export into gexf


Former-commit-id: 7be06add579fbd14042ea9084bc738168a62dcdf

--- /dev/null
+++ b/admin/directory.gexf.php
@@ -1,1 +1,59 @@
+<?php
 
+$nodes = Array(Array("id" => "gov", "label" => "Federal Government"));
+$edges = Array();
+
+function addEdge($source, $target) {
+    global $edges;
+    $edges[] = Array("id" => md5($source . $target), "source" => $source, "target" => $target);
+}
+
+function addNode($id, $label, $pid) {
+    global $nodes;
+    $nodes[] = Array("id" => $id, "label" => $label , "pid" => $pid);
+}
+
+function addChildren($parentID, $parentXML) {
+    foreach ($parentXML as $childXML) {
+
+        if ($childXML->getName() == "organization" || $childXML->getName() == "organizationalUnit" || $childXML->getName() == "person") {
+            $attr = $childXML->attributes();
+            $id = $attr['UUID'];
+            if ($childXML->getName() == "organization" || $childXML->getName() == "organizationalUnit") {
+
+                $label = $childXML->name;
+            } else if ($childXML->getName() == "person") {
+                  $label = $childXML->fullName;
+            }
+            addNode($id, $label, $parentID);
+            addEdge($id, $parentID);
+            addChildren($id, $childXML);
+        }
+    }
+}
+
+if (file_exists('directoryexport.xml')) {
+    $xml = simplexml_load_file('directoryexport.xml');
+
+    addChildren("gov", $xml);
+} else {
+    exit('Failed to open directoryexport.xml');
+}
+  header('Content-Type: application/gexf+xml');
+echo '<?xml version="1.0" encoding="UTF-8"?>
+<gexf xmlns="http://www.gexf.net/1.2draft" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd" version="1.2">
+    <graph mode="static" defaultedgetype="directed">
+        <nodes>';
+foreach ($nodes as $node) {
+    echo '          <node id="' . $node['id'] . '" label="' . htmlentities($node['label'],ENT_XML1) . '" ' . (isset($node['pid']) ? 'pid="' . $node['pid'] . '"' : "") . ' />';
+}
+echo '</nodes>
+        <edges>';
+foreach ($edges as $edge) {
+    echo '            <edge id="' . $edge['id'] . '" source="' . $edge['source'] . '" target="' . $edge['target'] . '" />';
+}
+echo '</edges>
+    </graph>
+</gexf>';
+?>
+

--- /dev/null
+++ b/admin/directoryexport.xml

--- a/admin/importAPSCEmployees.php
+++ b/admin/importAPSCEmployees.php
@@ -32,23 +32,35 @@
                 @$sums[$id][$timePeriod] += $data[1];
             } else {
                 echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL;
-                
+
                 die();
-               
             }
         }
         fclose($handle);
     }
 }
 foreach ($sums as $id => $sum) {
-    echo $id. "<br>" . PHP_EOL;
+    echo $id . "<br>" . PHP_EOL;
     $doc = $db->get($id);
-   // print_r($doc);
-    if (isset($doc->statistics)) $doc->statistics = Array();
+     echo $doc->name . "<br>" . PHP_EOL;
+    // print_r($doc);
+    $changed = false;
+    if (!isset($doc->statistics)) {
+        $changed = true;
+        $doc->statistics = Array();
+    }
     foreach ($sum as $timePeriod => $value) {
-        $doc->statistics["employees"][$timePeriod] = Array("value"=>$value, "source"=>"http://apsc.gov.au/stateoftheservice/");
+        if (!isset($doc->statistics->employees->$timePeriod->value) 
+                || $doc->statistics->employees->$timePeriod->value != $value) {
+            $changed = true;
+            $doc->statistics["employees"][$timePeriod] = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
+        }
     }
-    $db->save($doc);
+    if ($changed) {
+        $db->save($doc);
+    } else {
+        echo "not changed" . "<br>" . PHP_EOL;
+    }
 }
 // employees: timeperiod, source = apsc state of service, value 
 ?>

file:b/admin/metadata.py (new)
--- /dev/null
+++ b/admin/metadata.py
@@ -1,1 +1,22 @@
+#http://packages.python.org/CouchDB/client.html
+import couchdb
+from BeautifulSoup import BeautifulSoup
 
+couch = couchdb.Server('http://127.0.0.1:5984/')
+
+# select database
+docsdb = couch['disclosr-documents']
+
+for row in docsdb.view('app/getMetadataExtractRequired'): 
+    print row.id
+    html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read()
+    metadata = []
+     # http://www.crummy.com/software/BeautifulSoup/documentation.html
+            soup = BeautifulSoup(html)
+metatags = soup.meta
+    for metatag in metatags:
+        print metatag['name']
+    doc = docsdb.get(row.id)
+    //doc['metadata'] = metadata
+    //docsdb.save(doc)
+

--- a/bubbletree.php
+++ b/bubbletree.php
@@ -24,7 +24,7 @@
 $color = new Lux_Color();
 
 $portfolios = Array();
-
+$total = 0;
 $db = $server->get_db('disclosr-agencies');
 try {
     $rows = $db->get_view("app", "byDeptStateName", null, true)->rows;
@@ -42,10 +42,15 @@
     foreach ($rows as $row) {
 	$employees = 0;
 	$portfolioid = 0;
-	if ($row->value->employees) $employees = $row->value->employees;
+	if (isset($row->value->employees)) $employees = $row->value->employees;
 	if (isset($row->value->statistics->employees)) {
 $agencyEmployeesArray = object_to_array($row->value->statistics->employees);
+if (isset($agencyEmployeesArray["2010-2011"]["value"])) {
  $employees = $agencyEmployeesArray["2010-2011"]["value"];
+} else {
+    // get last year that is recorded? throw error?
+    continue;
+}
 }
 if (!($employees > 0)) $employees =0;
 	if (isset($row->value->parentOrg)) $portfolioid = $row->value->parentOrg;

file:a/graph.php -> file:b/graph.php
--- a/graph.php
+++ b/graph.php
@@ -113,12 +113,12 @@
  sigInst.bind('downnodes',function(event){
     var nodes = event.content;
  });
-  // Draw the graph :
-  sigInst.draw();
   // Start the ForceAtlas2 algorithm
   // (requires "sigma.forceatlas2.js" to be included)
   sigInst.startForceAtlas2();
   
+  // Draw the graph :
+  sigInst.draw();
 }
 
 if (document.addEventListener) {

--- a/include/couchdb.inc.php
+++ b/include/couchdb.inc.php
@@ -5,28 +5,28 @@
 require ($basePath . 'couchdb/settee/src/settee.php');
 
 function createDocumentsDesignDoc() {
-    /*"views": {
-       "web_server": {
-           "map": "function(doc) {\n  emit(doc.web_server, 1);\n}",
-           "reduce": "function (key, values, rereduce) {\n    return sum(values);\n}"
-       },
-       "byAgency": {
-           "map": "function(doc) {\n  emit(doc.agencyID, 1);\n}",
-           "reduce": "function (key, values, rereduce) {\n    return sum(values);\n}"
-       },
-       "byURL": {
-           "map": "function(doc) {\n  emit(doc.url, doc);\n}"
-       },
-       "agency": {
-           "map": "function(doc) {\n  emit(doc.agencyID, doc);\n}"
-       },
-       "byWebServer": {
-           "map": "function(doc) {\n  emit(doc.web_server, doc);\n}"
-       },
-  "getValidationRequired": {
-       "map": "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n  emit(doc._id, doc._attachments);\n}\n}"
-   }
-   }*/
+    /* "views": {
+      "web_server": {
+      "map": "function(doc) {\n  emit(doc.web_server, 1);\n}",
+      "reduce": "function (key, values, rereduce) {\n    return sum(values);\n}"
+      },
+      "byAgency": {
+      "map": "function(doc) {\n  emit(doc.agencyID, 1);\n}",
+      "reduce": "function (key, values, rereduce) {\n    return sum(values);\n}"
+      },
+      "byURL": {
+      "map": "function(doc) {\n  emit(doc.url, doc);\n}"
+      },
+      "agency": {
+      "map": "function(doc) {\n  emit(doc.agencyID, doc);\n}"
+      },
+      "byWebServer": {
+      "map": "function(doc) {\n  emit(doc.web_server, doc);\n}"
+      },
+      "getValidationRequired": {
+      "map": "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n  emit(doc._id, doc._attachments);\n}\n}"
+      }
+      } */
 }
 
 function createAgencyDesignDoc() {
@@ -95,7 +95,7 @@
   }
 }";
     // http://stackoverflow.com/questions/646628/javascript-startswith
-$obj->views->score->map =  'if(!String.prototype.startsWith){
+    $obj->views->score->map = 'if(!String.prototype.startsWith){
     String.prototype.startsWith = function (str) {
         return !this.indexOf(str);
     }
@@ -119,7 +119,7 @@
         emit(count+doc._id, {id:doc._id, name: doc.name, score:count, orgType: doc.orgType, portfolio:portfolio});
     }
 }';
-        $obj->views->scoreHas->map = 'if(!String.prototype.startsWith){
+    $obj->views->scoreHas->map = 'if(!String.prototype.startsWith){
     String.prototype.startsWith = function (str) {
         return !this.indexOf(str);
     }
@@ -142,7 +142,7 @@
     $obj->views->scoreHas->reduce = 'function (key, values, rereduce) {
     return sum(values);
 }';
-        $obj->views->fieldNames->map = '
+    $obj->views->fieldNames->map = '
 function(doc) {
 for(var propName in doc) {
      	emit(propName, doc._id);
@@ -157,16 +157,16 @@
 }
 
 if (php_uname('n') == "vanille") {
-$serverAddr = 'http://192.168.178.21:5984/';
-   
+    $serverAddr = 'http://192.168.178.21:5984/';
 } else
 if (php_uname('n') == "KYUUBEY") {
 
-    $serverAddr = 'http://192.168.1.148:5984/';
+    $serverAddr = 'http://127.0.0.1:5984/';
 } else {
     $serverAddr = 'http://127.0.0.1:5984/';
 }
- $server = new SetteeServer($serverAddr);
+$server = new SetteeServer($serverAddr);
+
 function setteErrorHandler($e) {
     echo $e->getMessage() . "<br>" . PHP_EOL;
 }

--- a/unimplemented/foundation.html
+++ /dev/null
@@ -1,137 +1,1 @@
-<!DOCTYPE html>
 
-<!-- paulirish.com/2008/conditional-stylesheets-vs-css-hacks-answer-neither/ -->
-<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]-->
-<!--[if IE 7]>    <html class="no-js lt-ie9 lt-ie8" lang="en"> <![endif]-->
-<!--[if IE 8]>    <html class="no-js lt-ie9" lang="en"> <![endif]-->
-<!--[if gt IE 8]><!--> <html lang="en"> <!--<![endif]-->
-<head>
-	<meta charset="utf-8" />
-
-	<!-- Set the viewport width to device width for mobile -->
-	<meta name="viewport" content="width=device-width" />
-
-	<title>Welcome to Foundation</title>
-  
-	<!-- Included CSS Files -->
-	<link rel="stylesheet" href="stylesheets/foundation.css">
-	<link rel="stylesheet" href="stylesheets/app.css">
-
-	<!--[if lt IE 9]>
-		<link rel="stylesheet" href="stylesheets/ie.css">
-	<![endif]-->
-
-
-	<!-- IE Fix for HTML5 Tags -->
-	<!--[if lt IE 9]>
-		<script src="http://html5shiv.googlecode.com/svn/trunk/html5.js"></script>
-	<![endif]-->
-
-</head>
-<body>
-
-	<!-- container -->
-	<div class="container">
-
-		<div class="row">
-			<div class="twelve columns">
-				<h2>Welcome to Foundation</h2>
-				<p>This is version 2.1.4 released on December 19, 2011</p>
-				<hr />
-			</div>
-		</div>
-
-		<div class="row">
-			<div class="eight columns">
-				<h3>The Grid</h3>
-
-				<!-- Grid Example -->
-				<div class="row">
-					<div class="twelve columns">
-						<div class="panel">
-							<p>This is a twelve column section in a row. Each of these includes a div.panel element so you can see where the columns are - it's not required at all for the grid.</p>
-						</div>
-					</div>
-				</div>
-				<div class="row">
-					<div class="six columns">
-						<div class="panel">
-							<p>Six columns</p>
-						</div>
-					</div>
-					<div class="six columns">
-						<div class="panel">
-							<p>Six columns</p>
-						</div>
-					</div>
-				</div>
-				<div class="row">
-					<div class="four columns">
-						<div class="panel">
-							<p>Four columns</p>
-						</div>
-					</div>
-					<div class="four columns">
-						<div class="panel">
-							<p>Four columns</p>
-						</div>
-					</div>
-					<div class="four columns">
-						<div class="panel">
-							<p>Four columns</p>
-						</div>
-					</div>
-				</div>
-
-				<h3>Tabs</h3>
-				<dl class="tabs">
-					<dd><a href="#simple1" class="active">Simple Tab 1</a></dd>
-					<dd><a href="#simple2">Simple Tab 2</a></dd>
-					<dd><a href="#simple3">Simple Tab 3</a></dd>
-				</dl>
-
-				<ul class="tabs-content">
-					<li class="active" id="simple1Tab">This is simple tab 1's content. Pretty neat, huh?</li>
-					<li id="simple2Tab">This is simple tab 2's content. Now you see it!</li>
-					<li id="simple3Tab">This is simple tab 3's content. It's, you know...okay.</li>
-				</ul>
-
-				<h3>Buttons</h3>
-
-				<p><a href="#" class="small blue button">Small Blue Button</a></p>
-				<p><a href="#" class="blue button">Medium Blue Button</a></p>
-				<p><a href="#" class="large blue button">Large Blue Button</a></p>
-
-				<p><a href="#" class="nice radius small blue button">Nice Blue Button</a></p>
-				<p><a href="#" class="nice radius blue button">Nice Blue Button</a></p>
-				<p><a href="#" class="nice radius large blue button">Nice Blue Button</a></p>
-
-			</div>
-
-			<div class="four columns">			
-				<h4>Getting Started</h4>
-				<p>We're stoked you want to try Foundation! To get going, this file (index.html) includes some basic styles you can modify, play around with, or totally destroy to get going.</p>
-
-				<h4>Other Resources</h4>
-				<p>Once you've exhausted the fun in this document, you should check out:</p>
-				<ul class="disc">
-					<li><a href="http://foundation.zurb.com/docs">Foundation Documentation</a><br />Everything you need to know about using the framework.</li>
-					<li><a href="http://github.com/zurb/foundation">Foundation on Github</a><br />Latest code, issue reports, feature requests and more.</li>
-					<li><a href="http://twitter.com/foundationzurb">@foundationzurb</a><br />Ping us on Twitter if you have questions. If you build something with this we'd love to see it (and send you a totally boss sticker).</li>
-				</ul>
-			</div>
-		</div>
-
-	</div>
-	<!-- container -->
-
-
-
-
-	<!-- Included JS Files -->
-	<script src="javascripts/foundation.js"></script>
-	<script src="javascripts/app.js"></script>
-
-</body>
-</html>
-

--- a/unimplemented/humans.txt
+++ /dev/null
@@ -1,8 +1,1 @@
-/* Foundation was made by ZURB, an interaction design and design strategy firm in Campbell, CA */
-/* zurb.com */
-/* humanstxt.org */
 
-/* SITE */
-  Standards: HTML5, CSS3
-  Components: jQuery, Orbit, Reveal
-  Software: Coda, Textmate, Git

--- a/unimplemented/lastUpdated.php
+++ /dev/null
@@ -1,2 +1,1 @@
-for each agency, record when last changed (number of days too) and show a couple of URLs that were in that change
 

--- a/unimplemented/validation.php
+++ /dev/null

--- a/unimplemented/webservers.php
+++ /dev/null
@@ -1,1 +1,1 @@
-for each agency, find a scrapped document and read the webserver off it
+

file:b/webserver.php (new)
--- /dev/null
+++ b/webserver.php
@@ -1,1 +1,43 @@
+<?php
 
+include_once('include/common.inc.php');
+include_header();
+
+echo "<table>
+    <tr><th>name</th><th>webserver</th><th>accessiblity errors</th></tr>";
+$agenciesdb = $server->get_db('disclosr-agencies');
+$docsdb = $server->get_db('disclosr-documents');
+try {
+    $rows = $agenciesdb->get_view("app", "all", null, true)->rows;
+
+
+    if ($rows) {
+        foreach ($rows as $row) {
+
+            echo "<tr><td>" . $row->value->name . "</td>";
+            if (isset($row->value->website)) {
+                try {
+                    $website = $docsdb->get(md5($row->value->website));
+                    $serverParts = explode(" ",$website->web_server);
+                    echo "<td>" . $serverParts[0] . "</td>";
+                    if (!isset($website->validation)) {
+                        echo "<td>?</td>";
+                    } else {
+                        if ($website->validation == "") {
+                            echo "<td>No error</td>";
+                        } else {
+                      echo "<td><pre>" . str_replace("<", "&lt;", $website->validation) . "</pre></td>";
+                        }
+                    }
+                } catch (SetteeRestClientException $e) {
+                   // setteErrorHandler($e);
+                }
+            }
+            echo "</tr>";
+        }
+    }
+} catch (SetteeRestClientException $e) {
+    setteErrorHandler($e);
+}
+include_footer();
+?>