Parse complete directory.gov.au export into gexf
Parse complete directory.gov.au export into gexf


Former-commit-id: 7be06add579fbd14042ea9084bc738168a62dcdf

[submodule "couchdb/couchdb-lucene"] [submodule "couchdb/couchdb-lucene"]
path = couchdb/couchdb-lucene path = couchdb/couchdb-lucene
url = https://github.com/rnewson/couchdb-lucene.git url = https://github.com/rnewson/couchdb-lucene.git
[submodule "couchdb/settee"] [submodule "couchdb/settee"]
path = couchdb/settee path = couchdb/settee
url = https://github.com/inadarei/settee.git url = https://github.com/inadarei/settee.git
[submodule "lib/php-diff"] [submodule "lib/php-diff"]
path = lib/php-diff path = lib/php-diff
url = https://github.com/chrisboulton/php-diff.git url = https://github.com/chrisboulton/php-diff.git
[submodule "lib/Requests"] [submodule "lib/Requests"]
path = lib/Requests path = lib/Requests
url = https://github.com/rmccue/Requests.git url = https://github.com/rmccue/Requests.git
[submodule "javascripts/flotr2"] [submodule "javascripts/flotr2"]
path = javascripts/flotr2 path = javascripts/flotr2
url = https://github.com/HumbleSoftware/Flotr2.git url = https://github.com/HumbleSoftware/Flotr2.git
[submodule "lib/phpquery"] [submodule "lib/phpquery"]
path = lib/phpquery path = lib/phpquery
url = https://github.com/TobiaszCudnik/phpquery.git url = https://github.com/TobiaszCudnik/phpquery.git
[submodule "javascripts/sigma"] [submodule "javascripts/sigma"]
path = javascripts/sigma path = javascripts/sigma
url = https://github.com/jacomyal/sigma.js.git url = https://github.com/jacomyal/sigma.js.git
  [submodule "javascripts/bubbletree"]
  path = javascripts/bubbletree
  url = https://github.com/okfn/bubbletree.git
   
  <?php
 
  $nodes = Array(Array("id" => "gov", "label" => "Federal Government"));
  $edges = Array();
 
  function addEdge($source, $target) {
  global $edges;
  $edges[] = Array("id" => md5($source . $target), "source" => $source, "target" => $target);
  }
 
  function addNode($id, $label, $pid) {
  global $nodes;
  $nodes[] = Array("id" => $id, "label" => $label , "pid" => $pid);
  }
 
  function addChildren($parentID, $parentXML) {
  foreach ($parentXML as $childXML) {
 
  if ($childXML->getName() == "organization" || $childXML->getName() == "organizationalUnit" || $childXML->getName() == "person") {
  $attr = $childXML->attributes();
  $id = $attr['UUID'];
  if ($childXML->getName() == "organization" || $childXML->getName() == "organizationalUnit") {
 
  $label = $childXML->name;
  } else if ($childXML->getName() == "person") {
  $label = $childXML->fullName;
  }
  addNode($id, $label, $parentID);
  addEdge($id, $parentID);
  addChildren($id, $childXML);
  }
  }
  }
 
  if (file_exists('directoryexport.xml')) {
  $xml = simplexml_load_file('directoryexport.xml');
 
  addChildren("gov", $xml);
  } else {
  exit('Failed to open directoryexport.xml');
  }
  header('Content-Type: application/gexf+xml');
  echo '<?xml version="1.0" encoding="UTF-8"?>
  <gexf xmlns="http://www.gexf.net/1.2draft" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd" version="1.2">
  <graph mode="static" defaultedgetype="directed">
  <nodes>';
  foreach ($nodes as $node) {
  echo ' <node id="' . $node['id'] . '" label="' . htmlentities($node['label'],ENT_XML1) . '" ' . (isset($node['pid']) ? 'pid="' . $node['pid'] . '"' : "") . ' />';
  }
  echo '</nodes>
  <edges>';
  foreach ($edges as $edge) {
  echo ' <edge id="' . $edge['id'] . '" source="' . $edge['source'] . '" target="' . $edge['target'] . '" />';
  }
  echo '</edges>
  </graph>
  </gexf>';
  ?>
 
<?php <?php
   
require_once '../include/common.inc.php'; require_once '../include/common.inc.php';
   
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
$rows = $db->get_view("app", "byName")->rows; $rows = $db->get_view("app", "byName")->rows;
$nametoid = Array(); $nametoid = Array();
$sums = Array(); $sums = Array();
foreach ($rows as $row) { foreach ($rows as $row) {
$nametoid[trim($row->key)] = $row->value; $nametoid[trim($row->key)] = $row->value;
} }
$employeeCSVs = Array("2002-2003" => "0203apsemployees.csv", $employeeCSVs = Array("2002-2003" => "0203apsemployees.csv",
"2003-2004" => "0304apsemployees.csv", "2003-2004" => "0304apsemployees.csv",
"2004-2005" => "0405apsemployees.csv", "2004-2005" => "0405apsemployees.csv",
"2005-2006" => "0506apsemployees.csv", "2005-2006" => "0506apsemployees.csv",
"2006-2007" => "0607apsemployees.csv", "2006-2007" => "0607apsemployees.csv",
"2007-2008" => "0708apsemployees.csv", "2007-2008" => "0708apsemployees.csv",
"2008-2009" => "0809apsemployees.csv", "2008-2009" => "0809apsemployees.csv",
"2009-2010" => "0910apsemployees.csv", "2009-2010" => "0910apsemployees.csv",
"2010-2011" => "1011apsemployees.csv" "2010-2011" => "1011apsemployees.csv"
); );
foreach ($employeeCSVs as $timePeriod => $employeeCSV) { foreach ($employeeCSVs as $timePeriod => $employeeCSV) {
echo $employeeCSV . "<br>" . PHP_EOL; echo $employeeCSV . "<br>" . PHP_EOL;
$row = 1; $row = 1;
if (($handle = fopen($employeeCSV, "r")) !== FALSE) { if (($handle = fopen($employeeCSV, "r")) !== FALSE) {
while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) {
//print_r($data); //print_r($data);
$name = trim($data[0]); $name = trim($data[0]);
if (isset($nametoid[$name])) { if (isset($nametoid[$name])) {
$id = $nametoid[$name]; $id = $nametoid[$name];
//echo $id . "<br>" . PHP_EOL; //echo $id . "<br>" . PHP_EOL;
@$sums[$id][$timePeriod] += $data[1]; @$sums[$id][$timePeriod] += $data[1];
} else { } else {
echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL; echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL;
   
die(); die();
   
} }
} }
fclose($handle); fclose($handle);
} }
} }
foreach ($sums as $id => $sum) { foreach ($sums as $id => $sum) {
echo $id. "<br>" . PHP_EOL; echo $id . "<br>" . PHP_EOL;
$doc = $db->get($id); $doc = $db->get($id);
// print_r($doc); echo $doc->name . "<br>" . PHP_EOL;
if (isset($doc->statistics)) $doc->statistics = Array(); // print_r($doc);
  $changed = false;
  if (!isset($doc->statistics)) {
  $changed = true;
  $doc->statistics = Array();
  }
foreach ($sum as $timePeriod => $value) { foreach ($sum as $timePeriod => $value) {
$doc->statistics["employees"][$timePeriod] = Array("value"=>$value, "source"=>"http://apsc.gov.au/stateoftheservice/"); if (!isset($doc->statistics->employees->$timePeriod->value)
  || $doc->statistics->employees->$timePeriod->value != $value) {
  $changed = true;
  $doc->statistics["employees"][$timePeriod] = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
  }
} }
$db->save($doc); if ($changed) {
  $db->save($doc);
  } else {
  echo "not changed" . "<br>" . PHP_EOL;
  }
} }
// employees: timeperiod, source = apsc state of service, value // employees: timeperiod, source = apsc state of service, value
?> ?>
   
file:b/admin/metadata.py (new)
  #http://packages.python.org/CouchDB/client.html
  import couchdb
  from BeautifulSoup import BeautifulSoup
 
  couch = couchdb.Server('http://127.0.0.1:5984/')
 
  # select database
  docsdb = couch['disclosr-documents']
 
  for row in docsdb.view('app/getMetadataExtractRequired'):
  print row.id
  html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read()
  metadata = []
  # http://www.crummy.com/software/BeautifulSoup/documentation.html
  soup = BeautifulSoup(html)
  metatags = soup.meta
  for metatag in metatags:
  print metatag['name']
  doc = docsdb.get(row.id)
  //doc['metadata'] = metadata
  //docsdb.save(doc)
 
file:b/bubbletree.php (new)
 
  <!DOCTYPE html>
  <html xmlns="http://www.w3.org/1999/xhtml">
  <head>
  <meta charset="UTF-8"/>
  <title>Minimal BubbleTree Demo</title>
  <script type="text/javascript" src="http://code.jquery.com/jquery-1.7.2.js"></script>
  <script type="text/javascript" src="javascripts/bubbletree/lib/jquery.history.js"></script>
  <script type="text/javascript" src="javascripts/bubbletree/lib/raphael.js"></script>
  <script type="text/javascript" src="javascripts/bubbletree/lib/vis4.js"></script>
  <script type="text/javascript" src="javascripts/bubbletree/lib/Tween.js"></script>
  <script type="text/javascript" src="javascripts/bubbletree/build/bubbletree.js"></script>
  <link rel="stylesheet" type="text/css" href="javascripts/bubbletree/build/bubbletree.css" />
  <script type="text/javascript" src="javascripts/bubbletree/styles/cofog.js"></script>
 
 
  <script type="text/javascript">
 
  $(function() {
  <?php
  include_once('include/common.inc.php');
 
  include("lib/Color.php");
  $color = new Lux_Color();
 
  $portfolios = Array();
  $total = 0;
  $db = $server->get_db('disclosr-agencies');
  try {
  $rows = $db->get_view("app", "byDeptStateName", null, true)->rows;
  foreach ($rows as $row) {
  $portfolios[trim(str_replace(Array("Department of","Department","the","'","`"),"",$row->key))] = $row->value;
  }
  } catch (SetteeRestClientException $e) {
  setteErrorHandler($e);
  }
 
  $agencies = Array();
  try {
  $rows = $db->get_view("app", "byCanonicalName", null, true)->rows;
  //print_r($rows);
  foreach ($rows as $row) {
  $employees = 0;
  $portfolioid = 0;
  if (isset($row->value->employees)) $employees = $row->value->employees;
  if (isset($row->value->statistics->employees)) {
  $agencyEmployeesArray = object_to_array($row->value->statistics->employees);
  if (isset($agencyEmployeesArray["2010-2011"]["value"])) {
  $employees = $agencyEmployeesArray["2010-2011"]["value"];
  } else {
  // get last year that is recorded? throw error?
  continue;
  }
  }
  if (!($employees > 0)) $employees =0;
  if (isset($row->value->parentOrg)) $portfolioid = $row->value->parentOrg;
  if (isset($row->value->orgType) && $row->value->orgType == "FMA-DepartmentOfState") $portfolioid = $row->id;
  $agencies[$portfolioid][$row->value->name] = $employees;
  }
  } catch (SetteeRestClientException $e) {
  setteErrorHandler($e);
  }
  //print_r($portfolios);
  //print_r($agencies);
  $i = 0;
  foreach ($portfolios as $portfolioName => $portfolioID) {
  $i++;
  $portfolioColor = $color->hsv2hex(Array($i/10, .7, abs(($i*(1/10))-.5) + .5));
  $subnodes = Array();
  $portfolioEmployees = 0;
  foreach ($agencies[$portfolioID] as $agencyName => $agencyEmployees) {
  $agencyColor = $color->hsv2hex(Array($i/10, rand(1,10)/10, abs(($i*(1/10))-.5) + .5));
  $subnodes[] = Array(
  "label" => str_replace(Array("'","`"),"",$agencyName),
  "amount" => $agencyEmployees,
  "color" => "#".$agencyColor
  );
  $portfolioEmployees += $agencyEmployees;
  }
  $nodes[] = Array(
  "label" => $portfolioName,
  "amount" => $portfolioEmployees,
  "color" => "#".$portfolioColor,
  "children" => $subnodes
  );
  $total += $portfolioEmployees;
  }
  $data = Array(
  "label" => "Australian Federal Government",
  "amount" => $total,
  "color" => "#000000",
  "children" => $nodes
  );
  echo "var data =eval('('+'" . json_encode($data) . "'+')');";
  ?>
 
  new BubbleTree({
  data: data,
  container: '.bubbletree'
  });
 
 
  });
 
  </script>
  </head>
  <body>
  <div class="bubbletree-wrapper">
  <div class="bubbletree"></div>
  </div>
  </body>
  </html>
 
file:a/graph.php -> file:b/graph.php
<?php <?php
include_once('include/common.inc.php'); include_once('include/common.inc.php');
//include_header(); //include_header();
$format = "html"; $format = "html";
if (isset($_REQUEST['format'])) { if (isset($_REQUEST['format'])) {
$format = $_REQUEST['format']; $format = $_REQUEST['format'];
} }
   
function add_node($id, $label, $parent="") { function add_node($id, $label, $parent="") {
global $format; global $format;
if ($format == "html") { if ($format == "html") {
// echo "nodes[\"$id\"] = graph.newNode({label: \"$label\"});" . PHP_EOL; // echo "nodes[\"$id\"] = graph.newNode({label: \"$label\"});" . PHP_EOL;
} }
if ($format == "dot" && $label != "") { if ($format == "dot" && $label != "") {
echo "$id [label=\"$label\"];". PHP_EOL; echo "$id [label=\"$label\"];". PHP_EOL;
} }
if ($format == "gexf") { if ($format == "gexf") {
echo "<node id='$id' label=\"".htmlentities($label,ENT_XML1)."\" ".($parent != ""? "pid='$parent'><viz:size value='1'/>":"><viz:size value='2'/>") echo "<node id='$id' label=\"".htmlentities($label,ENT_XML1)."\" ".($parent != ""? "pid='$parent'><viz:size value='1'/>":"><viz:size value='2'/>")
."<viz:color b='".rand(0,255)."' g='".rand(0,255)."' r='".rand(0,255)."'/>" ."<viz:color b='".rand(0,255)."' g='".rand(0,255)."' r='".rand(0,255)."'/>"
."</node>". PHP_EOL; ."</node>". PHP_EOL;
} }
} }
   
function add_edge($from, $to, $color) { function add_edge($from, $to, $color) {
global $format; global $format;
if ($format == "html") { if ($format == "html") {
// echo "graph.newEdge(nodes[\"$from\"], nodes['$to'], {color: '$color'});" . PHP_EOL; // echo "graph.newEdge(nodes[\"$from\"], nodes['$to'], {color: '$color'});" . PHP_EOL;
} }
if ($format == "dot") { if ($format == "dot") {
echo "$from -> $to ".($color != ""? "[color=$color]":"").";". PHP_EOL; echo "$from -> $to ".($color != ""? "[color=$color]":"").";". PHP_EOL;
} }
if ($format == "gexf") { if ($format == "gexf") {
echo "<edge id='$from$to' source='$from' target='$to' />". PHP_EOL; echo "<edge id='$from$to' source='$from' target='$to' />". PHP_EOL;
} }
} }
if ($format == "gexf") { if ($format == "gexf") {
//header('Content-Type: text/xml'); //header('Content-Type: text/xml');
header('Content-Type: application/gexf+xml'); header('Content-Type: application/gexf+xml');
echo '<?xml version="1.0" encoding="UTF-8"?> echo '<?xml version="1.0" encoding="UTF-8"?>
<gexf xmlns="http://www.gexf.net/1.2draft" xmlns:viz="http://www.gexf.net/1.2draft/viz" version="1.2"> <gexf xmlns="http://www.gexf.net/1.2draft" xmlns:viz="http://www.gexf.net/1.2draft/viz" version="1.2">
<meta lastmodifieddate="2009-03-20"> <meta lastmodifieddate="2009-03-20">
<creator>Gexf.net</creator> <creator>Gexf.net</creator>
<description>A hello world! file</description> <description>A hello world! file</description>
</meta> </meta>
<graph mode="static" defaultedgetype="directed"> <graph mode="static" defaultedgetype="directed">
<nodes>'. PHP_EOL; <nodes>'. PHP_EOL;
} }
   
if ($format == "dot") { if ($format ==