Parse complete directory.gov.au export into gexf
Parse complete directory.gov.au export into gexf


Former-commit-id: 7be06add579fbd14042ea9084bc738168a62dcdf

  <?php
 
  $nodes = Array(Array("id" => "gov", "label" => "Federal Government"));
  $edges = Array();
 
  function addEdge($source, $target) {
  global $edges;
  $edges[] = Array("id" => md5($source . $target), "source" => $source, "target" => $target);
  }
 
  function addNode($id, $label, $pid) {
  global $nodes;
  $nodes[] = Array("id" => $id, "label" => $label , "pid" => $pid);
  }
 
  function addChildren($parentID, $parentXML) {
  foreach ($parentXML as $childXML) {
 
  if ($childXML->getName() == "organization" || $childXML->getName() == "organizationalUnit" || $childXML->getName() == "person") {
  $attr = $childXML->attributes();
  $id = $attr['UUID'];
  if ($childXML->getName() == "organization" || $childXML->getName() == "organizationalUnit") {
 
  $label = $childXML->name;
  } else if ($childXML->getName() == "person") {
  $label = $childXML->fullName;
  }
  addNode($id, $label, $parentID);
  addEdge($id, $parentID);
  addChildren($id, $childXML);
  }
  }
  }
 
  if (file_exists('directoryexport.xml')) {
  $xml = simplexml_load_file('directoryexport.xml');
 
  addChildren("gov", $xml);
  } else {
  exit('Failed to open directoryexport.xml');
  }
  header('Content-Type: application/gexf+xml');
  echo '<?xml version="1.0" encoding="UTF-8"?>
  <gexf xmlns="http://www.gexf.net/1.2draft" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd" version="1.2">
  <graph mode="static" defaultedgetype="directed">
  <nodes>';
  foreach ($nodes as $node) {
  echo ' <node id="' . $node['id'] . '" label="' . htmlentities($node['label'],ENT_XML1) . '" ' . (isset($node['pid']) ? 'pid="' . $node['pid'] . '"' : "") . ' />';
  }
  echo '</nodes>
  <edges>';
  foreach ($edges as $edge) {
  echo ' <edge id="' . $edge['id'] . '" source="' . $edge['source'] . '" target="' . $edge['target'] . '" />';
  }
  echo '</edges>
  </graph>
  </gexf>';
  ?>
 
<?php <?php
   
require_once '../include/common.inc.php'; require_once '../include/common.inc.php';
   
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
$rows = $db->get_view("app", "byName")->rows; $rows = $db->get_view("app", "byName")->rows;
$nametoid = Array(); $nametoid = Array();
$sums = Array(); $sums = Array();
foreach ($rows as $row) { foreach ($rows as $row) {
$nametoid[trim($row->key)] = $row->value; $nametoid[trim($row->key)] = $row->value;
} }
$employeeCSVs = Array("2002-2003" => "0203apsemployees.csv", $employeeCSVs = Array("2002-2003" => "0203apsemployees.csv",
"2003-2004" => "0304apsemployees.csv", "2003-2004" => "0304apsemployees.csv",
"2004-2005" => "0405apsemployees.csv", "2004-2005" => "0405apsemployees.csv",
"2005-2006" => "0506apsemployees.csv", "2005-2006" => "0506apsemployees.csv",
"2006-2007" => "0607apsemployees.csv", "2006-2007" => "0607apsemployees.csv",
"2007-2008" => "0708apsemployees.csv", "2007-2008" => "0708apsemployees.csv",
"2008-2009" => "0809apsemployees.csv", "2008-2009" => "0809apsemployees.csv",
"2009-2010" => "0910apsemployees.csv", "2009-2010" => "0910apsemployees.csv",
"2010-2011" => "1011apsemployees.csv" "2010-2011" => "1011apsemployees.csv"
); );
foreach ($employeeCSVs as $timePeriod => $employeeCSV) { foreach ($employeeCSVs as $timePeriod => $employeeCSV) {
echo $employeeCSV . "<br>" . PHP_EOL; echo $employeeCSV . "<br>" . PHP_EOL;
$row = 1; $row = 1;
if (($handle = fopen($employeeCSV, "r")) !== FALSE) { if (($handle = fopen($employeeCSV, "r")) !== FALSE) {
while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) {
//print_r($data); //print_r($data);
$name = trim($data[0]); $name = trim($data[0]);
if (isset($nametoid[$name])) { if (isset($nametoid[$name])) {
$id = $nametoid[$name]; $id = $nametoid[$name];
//echo $id . "<br>" . PHP_EOL; //echo $id . "<br>" . PHP_EOL;
@$sums[$id][$timePeriod] += $data[1]; @$sums[$id][$timePeriod] += $data[1];
} else { } else {
echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL; echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL;
   
die(); die();
   
} }
} }
fclose($handle); fclose($handle);
} }
} }
foreach ($sums as $id => $sum) { foreach ($sums as $id => $sum) {
echo $id. "<br>" . PHP_EOL; echo $id . "<br>" . PHP_EOL;
$doc = $db->get($id); $doc = $db->get($id);
// print_r($doc); echo $doc->name . "<br>" . PHP_EOL;
if (isset($doc->statistics)) $doc->statistics = Array(); // print_r($doc);
  $changed = false;
  if (!isset($doc->statistics)) {
  $changed = true;
  $doc->statistics = Array();
  }
foreach ($sum as $timePeriod => $value) { foreach ($sum as $timePeriod => $value) {
$doc->statistics["employees"][$timePeriod] = Array("value"=>$value, "source"=>"http://apsc.gov.au/stateoftheservice/"); if (!isset($doc->statistics->employees->$timePeriod->value)
  || $doc->statistics->employees->$timePeriod->value != $value) {
  $changed = true;
  $doc->statistics["employees"][$timePeriod] = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
  }
} }
$db->save($doc); if ($changed) {
  $db->save($doc);
  } else {
  echo "not changed" . "<br>" . PHP_EOL;
  }
} }
// employees: timeperiod, source = apsc state of service, value // employees: timeperiod, source = apsc state of service, value
?> ?>
   
file:b/admin/metadata.py (new)
  #http://packages.python.org/CouchDB/client.html
  import couchdb
  from BeautifulSoup import BeautifulSoup
 
  couch = couchdb.Server('http://127.0.0.1:5984/')
 
  # select database
  docsdb = couch['disclosr-documents']
 
  for row in docsdb.view('app/getMetadataExtractRequired'):
  print row.id
  html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read()
  metadata = []
  # http://www.crummy.com/software/BeautifulSoup/documentation.html
  soup = BeautifulSoup(html)
  metatags = soup.meta
  for metatag in metatags:
  print metatag['name']
  doc = docsdb.get(row.id)
  //doc['metadata'] = metadata
  //docsdb.save(doc)
 
   
<!DOCTYPE html> <!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml"> <html xmlns="http://www.w3.org/1999/xhtml">
<head> <head>
<meta charset="UTF-8"/> <meta charset="UTF-8"/>
<title>Minimal BubbleTree Demo</title> <title>Minimal BubbleTree Demo</title>
<script type="text/javascript" src="http://code.jquery.com/jquery-1.7.2.js"></script> <script type="text/javascript" src="http://code.jquery.com/jquery-1.7.2.js"></script>
<script type="text/javascript" src="javascripts/bubbletree/lib/jquery.history.js"></script> <script type="text/javascript" src="javascripts/bubbletree/lib/jquery.history.js"></script>
<script type="text/javascript" src="javascripts/bubbletree/lib/raphael.js"></script> <script type="text/javascript" src="javascripts/bubbletree/lib/raphael.js"></script>
<script type="text/javascript" src="javascripts/bubbletree/lib/vis4.js"></script> <script type="text/javascript" src="javascripts/bubbletree/lib/vis4.js"></script>
<script type="text/javascript" src="javascripts/bubbletree/lib/Tween.js"></script> <script type="text/javascript" src="javascripts/bubbletree/lib/Tween.js"></script>
<script type="text/javascript" src="javascripts/bubbletree/build/bubbletree.js"></script> <script type="text/javascript" src="javascripts/bubbletree/build/bubbletree.js"></script>
<link rel="stylesheet" type="text/css" href="javascripts/bubbletree/build/bubbletree.css" /> <link rel="stylesheet" type="text/css" href="javascripts/bubbletree/build/bubbletree.css" />
<script type="text/javascript" src="javascripts/bubbletree/styles/cofog.js"></script> <script type="text/javascript" src="javascripts/bubbletree/styles/cofog.js"></script>
<script type="text/javascript"> <script type="text/javascript">
$(function() { $(function() {
<?php <?php
include_once('include/common.inc.php'); include_once('include/common.inc.php');
   
include("lib/Color.php"); include("lib/Color.php");
$color = new Lux_Color(); $color = new Lux_Color();
   
$portfolios = Array(); $portfolios = Array();
  $total = 0;
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
try { try {
$rows = $db->get_view("app", "byDeptStateName", null, true)->rows; $rows = $db->get_view("app", "byDeptStateName", null, true)->rows;
foreach ($rows as $row) { foreach ($rows as $row) {
$portfolios[trim(str_replace(Array("Department of","Department","the","'","`"),"",$row->key))] = $row->value; $portfolios[trim(str_replace(Array("Department of","Department","the","'","`"),"",$row->key))] = $row->value;
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
   
$agencies = Array(); $agencies = Array();
try { try {
$rows = $db->get_view("app", "byCanonicalName", null, true)->rows; $rows = $db->get_view("app", "byCanonicalName", null, true)->rows;
//print_r($rows); //print_r($rows);
foreach ($rows as $row) { foreach ($rows as $row) {
$employees = 0; $employees = 0;
$portfolioid = 0; $portfolioid = 0;
if ($row->value->employees) $employees = $row->value->employees; if (isset($row->value->employees)) $employees = $row->value->employees;
if (isset($row->value->statistics->employees)) { if (isset($row->value->statistics->employees)) {
$agencyEmployeesArray = object_to_array($row->value->statistics->employees); $agencyEmployeesArray = object_to_array($row->value->statistics->employees);
  if (isset($agencyEmployeesArray["2010-2011"]["value"])) {
$employees = $agencyEmployeesArray["2010-2011"]["value"]; $employees = $agencyEmployeesArray["2010-2011"]["value"];
  } else {
  // get last year that is recorded? throw error?
  continue;
  }
} }
if (!($employees > 0)) $employees =0; if (!($employees > 0)) $employees =0;
if (isset($row->value->parentOrg)) $portfolioid = $row->value->parentOrg; if (isset($row->value->parentOrg)) $portfolioid = $row->value->parentOrg;
if (isset($row->value->orgType) && $row->value->orgType == "FMA-DepartmentOfState") $portfolioid = $row->id; if (isset($row->value->orgType) && $row->value->orgType == "FMA-DepartmentOfState") $portfolioid = $row->id;
$agencies[$portfolioid][$row->value->name] = $employees; $agencies[$portfolioid][$row->value->name] = $employees;
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
//print_r($portfolios); //print_r($portfolios);
//print_r($agencies); //print_r($agencies);
$i = 0; $i = 0;
foreach ($portfolios as $portfolioName => $portfolioID) { foreach ($portfolios as $portfolioName => $portfolioID) {
$i++; $i++;
$portfolioColor = $color->hsv2hex(Array($i/10, .7, abs(($i*(1/10))-.5) + .5)); $portfolioColor = $color->hsv2hex(Array($i/10, .7, abs(($i*(1/10))-.5) + .5));
$subnodes = Array(); $subnodes = Array();
$portfolioEmployees = 0; $portfolioEmployees = 0;
foreach ($agencies[$portfolioID] as $agencyName => $agencyEmployees) { foreach ($agencies[$portfolioID] as $agencyName => $agencyEmployees) {
$agencyColor = $color->hsv2hex(Array($i/10, rand(1,10)/10, abs(($i*(1/10))-.5) + .5)); $agencyColor = $color->hsv2hex(Array($i/10, rand(1,10)/10, abs(($i*(1/10))-.5) + .5));
$subnodes[] = Array( $subnodes[] = Array(
"label" => str_replace(Array("'","`"),"",$agencyName), "label" => str_replace(Array("'","`"),"",$agencyName),
"amount" => $agencyEmployees, "amount" => $agencyEmployees,
"color" => "#".$agencyColor "color" => "#".$agencyColor
); );
$portfolioEmployees += $agencyEmployees; $portfolioEmployees += $agencyEmployees;
} }
$nodes[] = Array( $nodes[] = Array(
"label" => $portfolioName, "label" => $portfolioName,
"amount" => $portfolioEmployees, "amount" => $portfolioEmployees,
"color" => "#".$portfolioColor, "color" => "#".$portfolioColor,
"children" => $subnodes "children" => $subnodes
); );
$total += $portfolioEmployees; $total += $portfolioEmployees;
} }
$data = Array( $data = Array(
"label" => "Australian Federal Government", "label" => "Australian Federal Government",
"amount" => $total, "amount" => $total,
"color" => "#000000", "color" => "#000000",
"children" => $nodes "children" => $nodes
); );
echo "var data =eval('('+'" . json_encode($data) . "'+')');"; echo "var data =eval('('+'" . json_encode($data) . "'+')');";
?> ?>
   
new BubbleTree({ new BubbleTree({
data: data, data: data,
container: '.bubbletree' container: '.bubbletree'
}); });
}); });
</script> </script>
</head> </head>
<body> <body>
<div class="bubbletree-wrapper"> <div class="bubbletree-wrapper">
<div class="bubbletree"></div> <div class="bubbletree"></div>
</div> </div>
</body> </body>
</html> </html>
   
<?php <?php
   
include $basePath . "schemas/schemas.inc.php"; include $basePath . "schemas/schemas.inc.php";
   
require ($basePath . 'couchdb/settee/src/settee.php'); require ($basePath . 'couchdb/settee/src/settee.php');
   
function createDocumentsDesignDoc() { function createDocumentsDesignDoc() {
/*"views": { /* "views": {
"web_server": { "web_server": {
"map": "function(doc) {\n emit(doc.web_server, 1);\n}", "map": "function(doc) {\n emit(doc.web_server, 1);\n}",
"reduce": "function (key, values, rereduce) {\n return sum(values);\n}" "reduce": "function (key, values, rereduce) {\n return sum(values);\n}"
}, },
"byAgency": { "byAgency": {
"map": "function(doc) {\n emit(doc.agencyID, 1);\n}", "map": "function(doc) {\n emit(doc.agencyID, 1);\n}",
"reduce": "function (key, values, rereduce) {\n return sum(values);\n}" "reduce": "function (key, values, rereduce) {\n return sum(values);\n}"
}, },
"byURL": { "byURL": {
"map": "function(doc) {\n emit(doc.url, doc);\n}" "map": "function(doc) {\n emit(doc.url, doc);\n}"
}, },
"agency": { "agency": {
"map": "function(doc) {\n emit(doc.agencyID, doc);\n}" "map": "function(doc) {\n emit(doc.agencyID, doc);\n}"
}, },
"byWebServer": { "byWebServer": {
"map": "function(doc) {\n emit(doc.web_server, doc);\n}" "map": "function(doc) {\n emit(doc.web_server, doc);\n}"
}, },
"getValidationRequired": { "getValidationRequired": {
"map": "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}" "map": "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}"
} }
}*/ } */
} }
   
function createAgencyDesignDoc() { function createAgencyDesignDoc() {
global $db; global $db;
$obj = new stdClass(); $obj = new stdClass();
$obj->_id = "_design/" . urlencode("app"); $obj->_id = "_design/" . urlencode("app");
$obj->language = "javascript"; $obj->language = "javascript";
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; $obj->views->all->map = "function(doc) { emit(doc._id, doc); };";
$obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };"; $obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };";
$obj->views->byCanonicalName->map = "function(doc) { $obj->views->byCanonicalName->map = "function(doc) {
if (doc.parentOrg || doc.orgType == 'FMA-DepartmentOfState') { if (doc.parentOrg || doc.orgType == 'FMA-DepartmentOfState') {
emit(doc.name, doc); emit(doc.name, doc);
} }
};"; };";
$obj->views->byDeptStateName->map = "function(doc) { $obj->views->byDeptStateName->map = "function(doc) {
if (doc.orgType == 'FMA-DepartmentOfState') { if (doc.orgType == 'FMA-DepartmentOfState') {
emit(doc.name, doc._id); emit(doc.name, doc._id);
} }
};"; };";
$obj->views->parentOrgs->map = "function(doc) { $obj->views->parentOrgs->map = "function(doc) {
if (doc.parentOrg) { if (doc.parentOrg) {
emit(doc._id, doc.parentOrg); emit(doc._id, doc.parentOrg);
} }
};"; };";
$obj->views->byName->map = 'function(doc) { $obj->views->byName->map = 'function(doc) {
if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") { if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") {
emit(doc.name, doc._id); emit(doc.name, doc._id);
if (typeof(doc.shortName) != "undefined" && doc.shortName != doc.name) { if (typeof(doc.shortName) != "undefined" && doc.shortName != doc.name) {
emit(doc.shortName, doc._id); emit(doc.shortName, doc._id);
} }
for (name in doc.otherNames) { for (name in doc.otherNames) {
if (doc.otherNames[name] != "" && doc.otherNames[name] != doc.name) { if (doc.otherNames[name] != "" && doc.otherNames[name] != doc.name) {
emit(doc.otherNames[name], doc._id); emit(doc.otherNames[name], doc._id);
} }
} }
for (name in doc.foiBodies) { for (name in doc.foiBodies) {