Remove bubbletree colors
Remove bubbletree colors


Former-commit-id: 91129056b556973816d6200d698809bba6c57d9c

[submodule "couchdb/couchdb-lucene"] [submodule "couchdb/couchdb-lucene"]
path = couchdb/couchdb-lucene path = couchdb/couchdb-lucene
url = https://github.com/rnewson/couchdb-lucene.git url = https://github.com/rnewson/couchdb-lucene.git
[submodule "couchdb/settee"] [submodule "couchdb/settee"]
path = couchdb/settee path = couchdb/settee
url = https://github.com/inadarei/settee.git url = https://github.com/inadarei/settee.git
[submodule "lib/springy"]  
path = lib/springy  
url = https://github.com/dhotson/springy.git  
[submodule "lib/php-diff"] [submodule "lib/php-diff"]
path = lib/php-diff path = lib/php-diff
url = https://github.com/chrisboulton/php-diff.git url = https://github.com/chrisboulton/php-diff.git
[submodule "lib/Requests"] [submodule "lib/Requests"]
path = lib/Requests path = lib/Requests
url = https://github.com/rmccue/Requests.git url = https://github.com/rmccue/Requests.git
[submodule "javascripts/flotr2"] [submodule "javascripts/flotr2"]
path = javascripts/flotr2 path = javascripts/flotr2
url = https://github.com/HumbleSoftware/Flotr2.git url = https://github.com/HumbleSoftware/Flotr2.git
[submodule "lib/phpquery"] [submodule "lib/phpquery"]
path = lib/phpquery path = lib/phpquery
url = https://github.com/TobiaszCudnik/phpquery.git url = https://github.com/TobiaszCudnik/phpquery.git
  [submodule "javascripts/sigma"]
  path = javascripts/sigma
  url = https://github.com/jacomyal/sigma.js.git
  [submodule "javascripts/bubbletree"]
  path = javascripts/bubbletree
  url = https://github.com/okfn/bubbletree.git
   
  <?php
 
  $nodes = Array(Array("id" => "gov", "label" => "Federal Government"));
  $edges = Array();
 
  function addEdge($source, $target) {
  global $edges;
  $edges[] = Array("id" => md5($source . $target), "source" => $source, "target" => $target);
  }
 
  function addNode($id, $label, $pid) {
  global $nodes;
  $nodes[] = Array("id" => $id, "label" => $label , "pid" => $pid);
  }
 
  function addChildren($parentID, $parentXML) {
  foreach ($parentXML as $childXML) {
 
  if ($childXML->getName() == "organization" || $childXML->getName() == "organizationalUnit" || $childXML->getName() == "person") {
  $attr = $childXML->attributes();
  $id = $attr['UUID'];
  if ($childXML->getName() == "organization" || $childXML->getName() == "organizationalUnit") {
 
  $label = $childXML->name;
  } else if ($childXML->getName() == "person") {
  $label = $childXML->fullName;
  }
  addNode($id, $label, $parentID);
  addEdge($id, $parentID);
  addChildren($id, $childXML);
  }
  }
  }
 
  if (file_exists('directoryexport.xml')) {
  $xml = simplexml_load_file('directoryexport.xml');
 
  addChildren("gov", $xml);
  } else {
  exit('Failed to open directoryexport.xml');
  }
  header('Content-Type: application/gexf+xml');
  echo '<?xml version="1.0" encoding="UTF-8"?>
  <gexf xmlns="http://www.gexf.net/1.2draft" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd" version="1.2">
  <graph mode="static" defaultedgetype="directed">
  <nodes>';
  foreach ($nodes as $node) {
  echo ' <node id="' . $node['id'] . '" label="' . htmlentities($node['label'],ENT_XML1) . '" ' . (isset($node['pid']) ? 'pid="' . $node['pid'] . '"' : "") . ' />';
  }
  echo '</nodes>
  <edges>';
  foreach ($edges as $edge) {
  echo ' <edge id="' . $edge['id'] . '" source="' . $edge['source'] . '" target="' . $edge['target'] . '" />';
  }
  echo '</edges>
  </graph>
  </gexf>';
  ?>
 
<?php <?php
   
require_once '../include/common.inc.php'; require_once '../include/common.inc.php';
   
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
$rows = $db->get_view("app", "byName")->rows; $rows = $db->get_view("app", "byName")->rows;
$nametoid = Array(); $nametoid = Array();
$sums = Array(); $sums = Array();
foreach ($rows as $row) { foreach ($rows as $row) {
$nametoid[trim($row->key)] = $row->value; $nametoid[trim($row->key)] = $row->value;
} }
$employeeCSVs = Array("2002-2003" => "0203apsemployees.csv", $employeeCSVs = Array("2002-2003" => "0203apsemployees.csv",
"2003-2004" => "0304apsemployees.csv", "2003-2004" => "0304apsemployees.csv",
"2004-2005" => "0405apsemployees.csv", "2004-2005" => "0405apsemployees.csv",
"2005-2006" => "0506apsemployees.csv", "2005-2006" => "0506apsemployees.csv",
"2006-2007" => "0607apsemployees.csv", "2006-2007" => "0607apsemployees.csv",
"2007-2008" => "0708apsemployees.csv", "2007-2008" => "0708apsemployees.csv",
"2008-2009" => "0809apsemployees.csv", "2008-2009" => "0809apsemployees.csv",
"2009-2010" => "0910apsemployees.csv", "2009-2010" => "0910apsemployees.csv",
"2010-2011" => "1011apsemployees.csv" "2010-2011" => "1011apsemployees.csv"
); );
foreach ($employeeCSVs as $timePeriod => $employeeCSV) { foreach ($employeeCSVs as $timePeriod => $employeeCSV) {
echo $employeeCSV . "<br>" . PHP_EOL; echo $employeeCSV . "<br>" . PHP_EOL;
$row = 1; $row = 1;
if (($handle = fopen($employeeCSV, "r")) !== FALSE) { if (($handle = fopen($employeeCSV, "r")) !== FALSE) {
while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) {
//print_r($data); //print_r($data);
$name = trim($data[0]); $name = trim($data[0]);
if (isset($nametoid[$name])) { if (isset($nametoid[$name])) {
$id = $nametoid[$name]; $id = $nametoid[$name];
//echo $id . "<br>" . PHP_EOL; //echo $id . "<br>" . PHP_EOL;
@$sums[$id][$timePeriod] += $data[1]; @$sums[$id][$timePeriod] += $data[1];
} else { } else {
echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL; echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL;
   
die(); die();
   
} }
} }
fclose($handle); fclose($handle);
} }
} }
foreach ($sums as $id => $sum) { foreach ($sums as $id => $sum) {
echo $id. "<br>" . PHP_EOL; echo $id . "<br>" . PHP_EOL;
$doc = $db->get($id); $doc = $db->get($id);
// print_r($doc); echo $doc->name . "<br>" . PHP_EOL;
if (isset($doc->statistics)) $doc->statistics = Array(); // print_r($doc);
  $changed = false;
  if (!isset($doc->statistics)) {
  $changed = true;
  $doc->statistics = Array();
  }
foreach ($sum as $timePeriod => $value) { foreach ($sum as $timePeriod => $value) {
$doc->statistics["employees"][$timePeriod] = Array("value"=>$value, "source"=>"http://apsc.gov.au/stateoftheservice/"); if (!isset($doc->statistics->employees->$timePeriod->value)
  || $doc->statistics->employees->$timePeriod->value != $value) {
  $changed = true;
  $doc->statistics["employees"][$timePeriod] = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
  }
} }
$db->save($doc); if ($changed) {
  $db->save($doc);
  } else {
  echo "not changed" . "<br>" . PHP_EOL;
  }
} }
// employees: timeperiod, source = apsc state of service, value // employees: timeperiod, source = apsc state of service, value
?> ?>
   
<?php <?php
   
require_once '../include/common.inc.php'; require_once '../include/common.inc.php';
require($basePath.'lib/phpquery/phpQuery/phpQuery.php'); require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
   
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
$rows = $db->get_view("app", "byName")->rows; $rows = $db->get_view("app", "byName")->rows;
$nametoid = Array(); $nametoid = Array();
$accounts = Array(); $accounts = Array();
foreach ($rows as $row) { foreach ($rows as $row) {
$nametoid[trim($row->key)] = $row->value; $nametoid[trim($row->key)] = $row->value;
} }
   
function extractHTMLAccounts($url, $accountType) { function extractHTMLAccounts($url, $accountType) {
global $accounts, $nametoid; global $accounts, $nametoid;
$request = Requests::get($url); $request = Requests::get($url);
$doc = phpQuery::newDocumentHTML($request->body); $doc = phpQuery::newDocumentHTML($request->body);
phpQuery::selectDocument($doc); phpQuery::selectDocument($doc);
foreach (pq('tr')->elements as $tr) { foreach (pq('tr')->elements as $tr) {
//echo $tr->nodeValue.PHP_EOL; //echo $tr->nodeValue.PHP_EOL;
$agency = ""; $agency = "";
$url = ""; $url = "";
foreach ($tr->childNodes as $td) { foreach ($tr->childNodes as $td) {
$class = $td->getAttribute("class"); $class = $td->getAttribute("class");
//echo "cccc $class ".$td->nodeValue.PHP_EOL; //echo "cccc $class ".$td->nodeValue.PHP_EOL;
if ($class == "s11" || $class == "s10" || $class == "s7") { if ($class == "s11" || $class == "s10" || $class == "s7") {
$agency = $td->nodeValue; $agency = $td->nodeValue;
} else if ($class == "s6" || $class == "s9"){ } else if ($class == "s6" || $class == "s9") {
$url = $td->nodeValue; $url = $td->nodeValue;
foreach($td->childNodes as $a) { foreach ($td->childNodes as $a) {
$href = $a->getAttribute("href"); $href = $a->getAttribute("href");
if ($href != "") { if ($href != "") {
$url = $href; $url = $href;
}  
}  
}  
}  
if ($agency != "" && $url != "") {  
if (!in_array(trim($agency), array_keys($nametoid))) {  
echo trim($agency)." missing" . PHP_EOL;  
} else {  
// echo $agency." = ".$url.PHP_EOL;  
$accounts[$nametoid[trim($agency)]][$accountType][] = $url;  
} }
  }
} }
  }
  if ($agency != "" && $url != "") {
  if (!in_array(trim($agency), array_keys($nametoid))) {
  echo trim($agency) . " missing" . PHP_EOL;
  } else {
  // echo $agency." = ".$url.PHP_EOL;
  $accounts[$nametoid[trim($agency)]][$accountType][] = $url;
  }
  }
} }
   
} }
   
function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) { function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) {
global $accounts, $nametoid; global $accounts, $nametoid;
$request = Requests::get($url); $request = Requests::get($url);
$Data = str_getcsv($request->body, "\n"); //parse the rows $Data = str_getcsv($request->body, "\n"); //parse the rows
$headers = Array(); $headers = Array();
foreach ($Data as $num => $line) { foreach ($Data as $num => $line) {
$Row = str_getcsv($line, ",",'"'); $Row = str_getcsv($line, ",", '"');
if ($num == 0) { if ($num == 0) {
} else if ($num == 1) { } else if ($num == 1) {
$headers = $Row; $headers = $Row;
//print_r($headers); //print_r($headers);
} else { } else {
if (isset($Row[array_search($nameField, $headers)])) { if (isset($Row[array_search($nameField, $headers)])) {
$agencyName = $Row[array_search($nameField, $headers)]; $agencyName = $Row[array_search($nameField, $headers)];
if (!$filter || $Row[array_search("State", $headers)] == "NAT") { if (!$filter || $Row[array_search("State", $headers)] == "NAT") {
if (!in_array(trim($agencyName), array_keys($nametoid))) { if (!in_array(trim($agencyName), array_keys($nametoid))) {
echo trim($agencyName)." missing" . PHP_EOL; echo trim($agencyName) . " missing" . PHP_EOL;
} else { } else {
// echo $Row[array_search($nameField, $headers)] . PHP_EOL; // echo $Row[array_search($nameField, $headers)] . PHP_EOL;
$accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)]; $accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)];
} }
} }
} else { } else {
//echo "error finding agency" . $line . PHP_EOL; //echo "error finding agency" . $line . PHP_EOL;
} }
} }
} }
} }
   
// http://agimo.govspace.gov.au/page/gov2register/ // http://agimo.govspace.gov.au/page/gov2register/
// twitter // twitter
extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Twitter", "Agency/Body/Event", "", true); extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Twitter", "Agency/Body/Event", "", true);
// RSS // RSS
extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS"); extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS");
// facebook // facebook
extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook"); extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook");
  foreach ($accounts as $id => $accountTypes) {
  echo $id . "<br>" . PHP_EOL;
  $doc = object_to_array($db->get($id));
  // print_r($doc);
   
  foreach ($accountTypes as $accountType => $accounts) {
  if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) {
  $doc["has" . $accountType] = Array();
  }
  $doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts));
  }
  $db->save($doc);
  }
?> ?>
   
file:b/admin/metadata.py (new)
  #http://packages.python.org/CouchDB/client.html
  import couchdb
  from BeautifulSoup import BeautifulSoup
 
  couch = couchdb.Server('http://127.0.0.1:5984/')
 
  # select database
  docsdb = couch['disclosr-documents']
 
  for row in docsdb.view('app/getMetadataExtractRequired'):
  print row.id
  html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read()
  metadata = []
  # http://www.crummy.com/software/BeautifulSoup/documentation.html
  soup = BeautifulSoup(html)
  metatags = soup.meta
  for metatag in metatags:
  print metatag['name']
  doc = docsdb.get(row.id)
  //doc['metadata'] = metadata
  //docsdb.save(doc)
 
  #http://packages.python.org/CouchDB/client.html
  import couchdb
  import json
  import pprint
  import re
  from tidylib import tidy_document
 
  couch = couchdb.Server('http://127.0.0.1:5984/')
 
  # select database
  docsdb = couch['disclosr-documents']
 
  def f(x):
  invalid = re.compile(r"ensure|testing|flicker|updating|longdesc|Accessibility Checks|not recognized")
  valid = re.compile(r"line")
  return (not invalid.search(x)) and valid.search(x) and x != ''
 
  for row in docsdb.view('app/getValidationRequired'):
  print row.id
  html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read()
  #print html
  document, errors = tidy_document(html,options={'accessibility-check':1,'show-warnings':0,'markup':0},keep_doc=True)
  #http://www.aprompt.ca/Tidy/accessibilitychec