Make apsc stats importer nondestructive
Make apsc stats importer nondestructive


Former-commit-id: 1eae8d1c89fabed230ce5d3f02595c29a41b7f5e

[submodule "couchdb/couchdb-lucene"] [submodule "couchdb/couchdb-lucene"]
path = couchdb/couchdb-lucene path = couchdb/couchdb-lucene
url = https://github.com/rnewson/couchdb-lucene.git url = https://github.com/rnewson/couchdb-lucene.git
[submodule "couchdb/settee"] [submodule "couchdb/settee"]
path = couchdb/settee path = couchdb/settee
url = https://github.com/inadarei/settee.git url = https://github.com/inadarei/settee.git
[submodule "lib/springy"]  
path = lib/springy  
url = https://github.com/dhotson/springy.git  
[submodule "lib/php-diff"] [submodule "lib/php-diff"]
path = lib/php-diff path = lib/php-diff
url = https://github.com/chrisboulton/php-diff.git url = https://github.com/chrisboulton/php-diff.git
[submodule "lib/Requests"] [submodule "lib/Requests"]
path = lib/Requests path = lib/Requests
url = https://github.com/rmccue/Requests.git url = https://github.com/rmccue/Requests.git
[submodule "javascripts/flotr2"] [submodule "javascripts/flotr2"]
path = javascripts/flotr2 path = javascripts/flotr2
url = https://github.com/HumbleSoftware/Flotr2.git url = https://github.com/HumbleSoftware/Flotr2.git
[submodule "lib/phpquery"] [submodule "lib/phpquery"]
path = lib/phpquery path = lib/phpquery
url = https://github.com/TobiaszCudnik/phpquery.git url = https://github.com/TobiaszCudnik/phpquery.git
  [submodule "javascripts/sigma"]
  path = javascripts/sigma
  url = https://github.com/jacomyal/sigma.js.git
  [submodule "javascripts/bubbletree"]
  path = javascripts/bubbletree
  url = https://github.com/okfn/bubbletree.git
   
<?php <?php
   
require_once '../include/common.inc.php'; require_once '../include/common.inc.php';
   
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
$rows = $db->get_view("app", "byName")->rows; $rows = $db->get_view("app", "byName")->rows;
$nametoid = Array(); $nametoid = Array();
$sums = Array(); $sums = Array();
foreach ($rows as $row) { foreach ($rows as $row) {
$nametoid[trim($row->key)] = $row->value; $nametoid[trim($row->key)] = $row->value;
} }
$employeeCSVs = Array("2002-2003" => "0203apsemployees.csv", $employeeCSVs = Array("2002-2003" => "0203apsemployees.csv",
"2003-2004" => "0304apsemployees.csv", "2003-2004" => "0304apsemployees.csv",
"2004-2005" => "0405apsemployees.csv", "2004-2005" => "0405apsemployees.csv",
"2005-2006" => "0506apsemployees.csv", "2005-2006" => "0506apsemployees.csv",
"2006-2007" => "0607apsemployees.csv", "2006-2007" => "0607apsemployees.csv",
"2007-2008" => "0708apsemployees.csv", "2007-2008" => "0708apsemployees.csv",
"2008-2009" => "0809apsemployees.csv", "2008-2009" => "0809apsemployees.csv",
"2009-2010" => "0910apsemployees.csv", "2009-2010" => "0910apsemployees.csv",
"2010-2011" => "1011apsemployees.csv" "2010-2011" => "1011apsemployees.csv"
); );
foreach ($employeeCSVs as $timePeriod => $employeeCSV) { foreach ($employeeCSVs as $timePeriod => $employeeCSV) {
echo $employeeCSV . "<br>" . PHP_EOL; echo $employeeCSV . "<br>" . PHP_EOL;
$row = 1; $row = 1;
if (($handle = fopen($employeeCSV, "r")) !== FALSE) { if (($handle = fopen($employeeCSV, "r")) !== FALSE) {
while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) {
//print_r($data); //print_r($data);
$name = trim($data[0]); $name = trim($data[0]);
if (isset($nametoid[$name])) { if (isset($nametoid[$name])) {
$id = $nametoid[$name]; $id = $nametoid[$name];
//echo $id . "<br>" . PHP_EOL; //echo $id . "<br>" . PHP_EOL;
@$sums[$id][$timePeriod] += $data[1]; @$sums[$id][$timePeriod] += $data[1];
} else { } else {
echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL; echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL;
   
die(); die();
   
} }
} }
fclose($handle); fclose($handle);
} }
} }
foreach ($sums as $id => $sum) { foreach ($sums as $id => $sum) {
echo $id. "<br>" . PHP_EOL; echo $id . "<br>" . PHP_EOL;
$doc = $db->get($id); $doc = $db->get($id);
// print_r($doc); echo $doc->name . "<br>" . PHP_EOL;
if (isset($doc->statistics)) $doc->statistics = Array(); // print_r($doc);
  $changed = false;
  if (!isset($doc->statistics)) {
  $changed = true;
  $doc->statistics = Array();
  }
foreach ($sum as $timePeriod => $value) { foreach ($sum as $timePeriod => $value) {
$doc->statistics["employees"][$timePeriod] = Array("value"=>$value, "source"=>"http://apsc.gov.au/stateoftheservice/"); if (!isset($doc->statistics->employees->$timePeriod->value)
  || $doc->statistics->employees->$timePeriod->value != $value) {
  $changed = true;
  $doc->statistics["employees"][$timePeriod] = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
  }
} }
$db->save($doc); if ($changed) {
  $db->save($doc);
  } else {
  echo "not changed" . "<br>" . PHP_EOL;
  }
} }
// employees: timeperiod, source = apsc state of service, value // employees: timeperiod, source = apsc state of service, value
?> ?>
   
file:b/admin/metadata.py (new)
  #http://packages.python.org/CouchDB/client.html
  import couchdb
  from BeautifulSoup import BeautifulSoup
 
  couch = couchdb.Server('http://127.0.0.1:5984/')
 
  # select database
  docsdb = couch['disclosr-documents']
 
  for row in docsdb.view('app/getMetadataExtractRequired'):
  print row.id
  html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read()
  metadata = []
  # http://www.crummy.com/software/BeautifulSoup/documentation.html
  soup = BeautifulSoup(html)
  metatags = soup.meta
  for metatag in metatags:
  print metatag['name']
  doc = docsdb.get(row.id)
  //doc['metadata'] = metadata
  //docsdb.save(doc)
 
file:b/bubbletree.php (new)
 
  <!DOCTYPE html>
  <html xmlns="http://www.w3.org/1999/xhtml">
  <head>
  <meta charset="UTF-8"/>
  <title>Minimal BubbleTree Demo</title>
  <script type="text/javascript" src="http://code.jquery.com/jquery-1.7.2.js"></script>
  <script type="text/javascript" src="javascripts/bubbletree/lib/jquery.history.js"></script>
  <script type="text/javascript" src="javascripts/bubbletree/lib/raphael.js"></script>
  <script type="text/javascript" src="javascripts/bubbletree/lib/vis4.js"></script>
  <script type="text/javascript" src="javascripts/bubbletree/lib/Tween.js"></script>
  <script type="text/javascript" src="javascripts/bubbletree/build/bubbletree.js"></script>
  <link rel="stylesheet" type="text/css" href="javascripts/bubbletree/build/bubbletree.css" />
  <script type="text/javascript" src="javascripts/bubbletree/styles/cofog.js"></script>
 
 
  <script type="text/javascript">
 
  $(function() {
  <?php
  include_once('include/common.inc.php');
 
  include("lib/Color.php");
  $color = new Lux_Color();
 
  $portfolios = Array();
 
  $db = $server->get_db('disclosr-agencies');
  try {
  $rows = $db->get_view("app", "byDeptStateName", null, true)->rows;
  foreach ($rows as $row) {
  $portfolios[trim(str_replace(Array("Department of","Department","the","'","`"),"",$row->key))] = $row->value;
  }
  } catch (SetteeRestClientException $e) {
  setteErrorHandler($e);
  }
 
  $agencies = Array();
  try {
  $rows = $db->get_view("app", "byCanonicalName", null, true)->rows;
  //print_r($rows);
  foreach ($rows as $row) {
  $employees = 0;
  $portfolioid = 0;
  if ($row->value->employees) $employees = $row->value->employees;
  if (isset($row->value->statistics->employees)) {
  $agencyEmployeesArray = object_to_array($row->value->statistics->employees);
  $employees = $agencyEmployeesArray["2010-2011"]["value"];
  }
  if (!($employees > 0)) $employees =0;
  if (isset($row->value->parentOrg)) $portfolioid = $row->value->parentOrg;
  if (isset($row->value->orgType) && $row->value->orgType == "FMA-DepartmentOfState") $portfolioid = $row->id;
  $agencies[$portfolioid][$row->value->name] = $employees;
  }
  } catch (SetteeRestClientException $e) {
  setteErrorHandler($e);
  }
  //print_r($portfolios);
  //print_r($agencies);
  $i = 0;
  foreach ($portfolios as $portfolioName => $portfolioID) {
  $i++;
  $portfolioColor = $color->hsv2hex(Array($i/10, .7, abs(($i*(1/10))-.5) + .5));
  $subnodes = Array();
  $portfolioEmployees = 0;
  foreach ($agencies[$portfolioID] as $agencyName => $agencyEmployees) {
  $agencyColor = $color->hsv2hex(Array($i/10, rand(1,10)/10, abs(($i*(1/10))-.5) + .5));
  $subnodes[] = Array(
  "label" => str_replace(Array("'","`"),"",$agencyName),
  "amount" => $agencyEmployees,
  "color" => "#".$agencyColor
  );
  $portfolioEmployees += $agencyEmployees;
  }
  $nodes[] = Array(
  "label" => $portfolioName,
  "amount" => $portfolioEmployees,
  "color" => "#".$portfolioColor,
  "children" => $subnodes
  );
  $total += $portfolioEmployees;
  }
  $data = Array(
  "label" => "Australian Federal Government",
  "amount" => $total,
  "color" => "#000000",
  "children" => $nodes
  );
  echo "var data =eval('('+'" . json_encode($data) . "'+')');";
  ?>
 
  new BubbleTree({
  data: data,
  container: '.bubbletree'
  });
 
 
  });
 
  </script>
  </head>
  <body>
  <div class="bubbletree-wrapper">
  <div class="bubbletree"></div>
  </div>
  </body>
  </html>
 
file:a/graph.php -> file:b/graph.php
<?php <?php
include_once('include/common.inc.php'); include_once('include/common.inc.php');
//include_header(); //include_header();
$format = "html"; $format = "html";
if (isset($_REQUEST['format'])) { if (isset($_REQUEST['format'])) {
$format = $_REQUEST['format']; $format = $_REQUEST['format'];
} }
   
function add_node($id, $label) { function add_node($id, $label, $parent="") {
global $format; global $format;
if ($format == "html") { if ($format == "html") {
echo "nodes[\"$id\"] = graph.newNode({label: \"$label\"});" . PHP_EOL; // echo "nodes[\"$id\"] = graph.newNode({label: \"$label\"});" . PHP_EOL;
} }
if ($format == "dot" && $label != "") { if ($format == "dot" && $label != "") {
echo "$id [label=\"$label\"];". PHP_EOL; echo "$id [label=\"$label\"];". PHP_EOL;
} }
  if ($format == "gexf") {
  echo "<node id='$id' label=\"".htmlentities($label,ENT_XML1)."\" ".($parent != ""? "pid='$parent'><viz:size value='1'/>":"><viz:size value='2'/>")
  ."<viz:color b='".rand(0,255)."' g='".rand(0,255)."' r='".rand(0,255)."'/>"
  ."</node>". PHP_EOL;
  }
} }
   
function add_edge($from, $to, $color) { function add_edge($from, $to, $color) {
global $format; global $format;
if ($format == "html") { if ($format == "html") {
echo "graph.newEdge(nodes[\"$from\"], nodes['$to'], {color: '$color'});" . PHP_EOL; // echo "graph.newEdge(nodes[\"$from\"], nodes['$to'], {color: '$color'});" . PHP_EOL;
} }
if ($format == "dot") { if ($format == "dot") {
echo "$from -> $to ".($color != ""? "[color=$color]":"").";". PHP_EOL; echo "$from -> $to ".($color != ""? "[color=$color]":"").";". PHP_EOL;
} }
  if ($format == "gexf") {
  echo "<edge id='$from$to' source='$from' target='$to' />". PHP_EOL;
  }
  }
  if ($format == "gexf") {
  //header('Content-Type: text/xml');
  header('Content-Type: application/gexf+xml');
  echo '<?xml version="1.0" encoding="UTF-8"?>
  <gexf xmlns="http://www.gexf.net/1.2draft" xmlns:viz="http://www.gexf.net/1.2draft/viz" version="1.2">
  <meta lastmodifieddate="2009-03-20">
  <creator>Gexf.net</creator>
  <description>A hello world! file</description>
  </meta>
  <graph mode="static" defaultedgetype="directed">
  <nodes>'. PHP_EOL;
} }
   
if ($format == "html") {  
?>  
<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.3.2/jquery.min.js"></script>  
<script src="lib/springy/springy.js"></script>  
<script src="lib/springy/springyui.js"></script>  
<script>  
var graph = new Graph();  
var nodes = [];  
<?php  
}  
if ($format == "dot") { if ($format == "dot") {
echo 'digraph g {'. PHP_EOL; echo 'digraph g {'. PHP_EOL;
} }
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
add_node("fedg","Federal Government - Commonwealth of Australia"); add_node("fedg","Federal Government - Commonwealth of Australia");
try { try {
$rows = $db->get_view("app", "byCanonicalName", null, true)->rows; $rows = $db->get_view("app", "byCanonicalName", null, true)->rows;
//print_r($rows); //print_r($rows);
foreach ($rows as $row) { foreach ($rows as $row) {
add_node($row->id, $row->key); add_node($row->id, $row->key);
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
  if ($format == "gexf") {
  echo '</nodes>
  <edges>'. PHP_EOL;
  }
try { try {
$rows = $db->get_view("app", "byDeptStateName", null, true)->rows; $rows = $db->get_view("app", "byDeptStateName", null, true)->rows;
//print_r($rows); //print_r($rows);
foreach ($rows as $row) { foreach ($rows as $row) {
add_edge("fedg", $row->value, 'yellow'); add_edge("fedg", $row->value, 'yellow');
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
   
try { try {
$rows = $db->get_view("app", "parentOrgs", null, true)->rows; $rows = $db->get_view("app", "parentOrgs", null, true)->rows;
// print_r($rows); // print_r($rows);
foreach ($rows as $row) { foreach ($rows as $row) {
add_edge($row->key, $row->value, 'blue'); add_edge($row->key, $row->value, 'blue');
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
if ($format == "html") { if ($format == "html") {
?> ?>
window.onload = function() { <div id="sigma-example" width="960" style="min-height:800px;background-color: #333;"></div>
$(document).ready(function() { <script src="javascripts/sigma.min.js"></script>
var springy = $('#springydemo').springy({ <script src="javascripts/sigma/plugins/sigma.parseGexf.js"></script>
graph: graph <script src="javascripts/sigma/plugins/sigma.forceatlas2.js"></script>
}); <script type="text/javascript">function init() {
}); // Instanciate sigma.js and customize rendering :
}; var sigInst = sigma.init(document.getElementById('sigma-example')).drawingProperties({
</script> defaultLabelColor: '#fff',
  defaultLabelSize: 14,
  defaultLabelBGColor: '#fff',
  defaultLabelHoverColor: '#000',
  labelThreshold: 6,
  defaultEdgeType: 'curve'
  }).graphProperties({
  minNodeSize: 0.5,
  maxNodeSize: 5,