Merge branch 'master' of ssh://apples.lambdacomplex.org/git/disclosr
Merge branch 'master' of ssh://apples.lambdacomplex.org/git/disclosr


Former-commit-id: 7d0a555ac7ae7f206decb2ebd82cabed1f051edf

[submodule "couchdb/couchdb-lucene"] [submodule "couchdb/couchdb-lucene"]
path = couchdb/couchdb-lucene path = couchdb/couchdb-lucene
url = https://github.com/rnewson/couchdb-lucene.git url = https://github.com/rnewson/couchdb-lucene.git
[submodule "couchdb/settee"] [submodule "couchdb/settee"]
path = couchdb/settee path = couchdb/settee
url = https://github.com/inadarei/settee.git url = https://github.com/inadarei/settee.git
[submodule "lib/php-diff"] [submodule "lib/php-diff"]
path = lib/php-diff path = lib/php-diff
url = https://github.com/chrisboulton/php-diff.git url = https://github.com/chrisboulton/php-diff.git
[submodule "lib/Requests"] [submodule "lib/Requests"]
path = lib/Requests path = lib/Requests
url = https://github.com/rmccue/Requests.git url = https://github.com/rmccue/Requests.git
[submodule "javascripts/flotr2"] [submodule "javascripts/flotr2"]
path = javascripts/flotr2 path = javascripts/flotr2
url = https://github.com/HumbleSoftware/Flotr2.git url = https://github.com/HumbleSoftware/Flotr2.git
[submodule "lib/phpquery"] [submodule "lib/phpquery"]
path = lib/phpquery path = lib/phpquery
url = https://github.com/TobiaszCudnik/phpquery.git url = https://github.com/TobiaszCudnik/phpquery.git
[submodule "javascripts/sigma"] [submodule "javascripts/sigma"]
path = javascripts/sigma path = javascripts/sigma
url = https://github.com/jacomyal/sigma.js.git url = https://github.com/jacomyal/sigma.js.git
  [submodule "javascripts/bubbletree"]
  path = javascripts/bubbletree
  url = https://github.com/okfn/bubbletree.git
   
<?php <?php
   
require_once '../include/common.inc.php'; require_once '../include/common.inc.php';
require($basePath.'lib/phpquery/phpQuery/phpQuery.php'); require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
   
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
$rows = $db->get_view("app", "byName")->rows; $rows = $db->get_view("app", "byName")->rows;
$nametoid = Array(); $nametoid = Array();
$accounts = Array(); $accounts = Array();
foreach ($rows as $row) { foreach ($rows as $row) {
$nametoid[trim($row->key)] = $row->value; $nametoid[trim($row->key)] = $row->value;
} }
   
function extractHTMLAccounts($url, $accountType) { function extractHTMLAccounts($url, $accountType) {
global $accounts, $nametoid; global $accounts, $nametoid;
$request = Requests::get($url); $request = Requests::get($url);
$doc = phpQuery::newDocumentHTML($request->body); $doc = phpQuery::newDocumentHTML($request->body);
phpQuery::selectDocument($doc); phpQuery::selectDocument($doc);
foreach (pq('tr')->elements as $tr) { foreach (pq('tr')->elements as $tr) {
//echo $tr->nodeValue.PHP_EOL; //echo $tr->nodeValue.PHP_EOL;
$agency = ""; $agency = "";
$url = ""; $url = "";
foreach ($tr->childNodes as $td) { foreach ($tr->childNodes as $td) {
$class = $td->getAttribute("class"); $class = $td->getAttribute("class");
//echo "cccc $class ".$td->nodeValue.PHP_EOL; //echo "cccc $class ".$td->nodeValue.PHP_EOL;
if ($class == "s11" || $class == "s10" || $class == "s7") { if ($class == "s11" || $class == "s10" || $class == "s7") {
$agency = $td->nodeValue; $agency = $td->nodeValue;
} else if ($class == "s6" || $class == "s9"){ } else if ($class == "s6" || $class == "s9") {
$url = $td->nodeValue; $url = $td->nodeValue;
foreach($td->childNodes as $a) { foreach ($td->childNodes as $a) {
$href = $a->getAttribute("href"); $href = $a->getAttribute("href");
if ($href != "") { if ($href != "") {
$url = $href; $url = $href;
}  
}  
}  
}  
if ($agency != "" && $url != "") {  
if (!in_array(trim($agency), array_keys($nametoid))) {  
echo trim($agency)." missing" . PHP_EOL;  
} else {  
// echo $agency." = ".$url.PHP_EOL;  
$accounts[$nametoid[trim($agency)]][$accountType][] = $url;  
} }
  }
} }
  }
  if ($agency != "" && $url != "") {
  if (!in_array(trim($agency), array_keys($nametoid))) {
  echo trim($agency) . " missing" . PHP_EOL;
  } else {
  // echo $agency." = ".$url.PHP_EOL;
  $accounts[$nametoid[trim($agency)]][$accountType][] = $url;
  }
  }
} }
   
} }
   
function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) { function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) {
global $accounts, $nametoid; global $accounts, $nametoid;
$request = Requests::get($url); $request = Requests::get($url);
$Data = str_getcsv($request->body, "\n"); //parse the rows $Data = str_getcsv($request->body, "\n"); //parse the rows
$headers = Array(); $headers = Array();
foreach ($Data as $num => $line) { foreach ($Data as $num => $line) {
$Row = str_getcsv($line, ",",'"'); $Row = str_getcsv($line, ",", '"');
if ($num == 0) { if ($num == 0) {
} else if ($num == 1) { } else if ($num == 1) {
$headers = $Row; $headers = $Row;
//print_r($headers); //print_r($headers);
} else { } else {
if (isset($Row[array_search($nameField, $headers)])) { if (isset($Row[array_search($nameField, $headers)])) {
$agencyName = $Row[array_search($nameField, $headers)]; $agencyName = $Row[array_search($nameField, $headers)];
if (!$filter || $Row[array_search("State", $headers)] == "NAT") { if (!$filter || $Row[array_search("State", $headers)] == "NAT") {
if (!in_array(trim($agencyName), array_keys($nametoid))) { if (!in_array(trim($agencyName), array_keys($nametoid))) {
echo trim($agencyName)." missing" . PHP_EOL; echo trim($agencyName) . " missing" . PHP_EOL;
} else { } else {
// echo $Row[array_search($nameField, $headers)] . PHP_EOL; // echo $Row[array_search($nameField, $headers)] . PHP_EOL;
$accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)]; $accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)];
} }
} }
} else { } else {
//echo "error finding agency" . $line . PHP_EOL; //echo "error finding agency" . $line . PHP_EOL;
} }
} }
} }
} }
   
// http://agimo.govspace.gov.au/page/gov2register/ // http://agimo.govspace.gov.au/page/gov2register/
// twitter // twitter
extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Twitter", "Agency/Body/Event", "", true); extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Twitter", "Agency/Body/Event", "", true);
// RSS // RSS
extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS"); extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS");
// facebook // facebook
extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook"); extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook");
  foreach ($accounts as $id => $accountTypes) {
  echo $id . "<br>" . PHP_EOL;
  $doc = object_to_array($db->get($id));
  // print_r($doc);
   
  foreach ($accountTypes as $accountType => $accounts) {
  if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) {
  $doc["has" . $accountType] = Array();
  }
  $doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts));
  }
  $db->save($doc);
  }
?> ?>
   
  #http://packages.python.org/CouchDB/client.html
  import couchdb
  import json
  import pprint
  import re
  from tidylib import tidy_document
 
  couch = couchdb.Server('http://127.0.0.1:5984/')
 
  # select database
  docsdb = couch['disclosr-documents']
 
  def f(x):
  invalid = re.compile(r"ensure|testing|flicker|updating|longdesc|Accessibility Checks|not recognized")
  valid = re.compile(r"line")
  return (not invalid.search(x)) and valid.search(x) and x != ''
 
  for row in docsdb.view('app/getValidationRequired'):
  print row.id
  html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read()
  #print html
  document, errors = tidy_document(html,options={'accessibility-check':1,'show-warnings':0,'markup':0},keep_doc=True)
  #http://www.aprompt.ca/Tidy/accessibilitychecks.html
  #print document
  errors = '\n'.join(filter(f,errors.split('\n')))
  #print errors
  doc = docsdb.get(row.id)
  doc['validation'] = errors
  docsdb.save(doc)
 
file:b/bubbletree.php (new)
 
  <!DOCTYPE html>
  <html xmlns="http://www.w3.org/1999/xhtml">
  <head>
  <meta charset="UTF-8"/>
  <title>Minimal BubbleTree Demo</title>
  <script type="text/javascript" src="http://code.jquery.com/jquery-1.7.2.js"></script>
  <script type="text/javascript" src="javascripts/bubbletree/lib/jquery.history.js"></script>
  <script type="text/javascript" src="javascripts/bubbletree/lib/raphael.js"></script>
  <script type="text/javascript" src="javascripts/bubbletree/lib/vis4.js"></script>
  <script type="text/javascript" src="javascripts/bubbletree/lib/Tween.js"></script>
  <script type="text/javascript" src="javascripts/bubbletree/build/bubbletree.js"></script>
  <link rel="stylesheet" type="text/css" href="javascripts/bubbletree/build/bubbletree.css" />
  <script type="text/javascript" src="javascripts/bubbletree/styles/cofog.js"></script>
 
 
  <script type="text/javascript">
 
  $(function() {
  <?php
  include_once('include/common.inc.php');
 
  include("lib/Color.php");
  $color = new Lux_Color();
 
  $portfolios = Array();
 
  $db = $server->get_db('disclosr-agencies');
  try {
  $rows = $db->get_view("app", "byDeptStateName", null, true)->rows;
  foreach ($rows as $row) {
  $portfolios[trim(str_replace(Array("Department of","Department","the","'","`"),"",$row->key))] = $row->value;
  }
  } catch (SetteeRestClientException $e) {
  setteErrorHandler($e);
  }
 
  $agencies = Array();
  try {
  $rows = $db->get_view("app", "byCanonicalName", null, true)->rows;
  //print_r($rows);
  foreach ($rows as $row) {
  $employees = 0;
  $portfolioid = 0;
  if ($row->value->employees) $employees = $row->value->employees;
  if (isset($row->value->statistics->employees)) {
  $agencyEmployeesArray = object_to_array($row->value->statistics->employees);
  $employees = $agencyEmployeesArray["2010-2011"]["value"];
  }
  if (!($employees > 0)) $employees =0;
  if (isset($row->value->parentOrg)) $portfolioid = $row->value->parentOrg;
  if (isset($row->value->orgType) && $row->value->orgType == "FMA-DepartmentOfState") $portfolioid = $row->id;
  $agencies[$portfolioid][$row->value->name] = $employees;
  }
  } catch (SetteeRestClientException $e) {
  setteErrorHandler($e);
  }
  //print_r($portfolios);
  //print_r($agencies);
  $i = 0;
  foreach ($portfolios as $portfolioName => $portfolioID) {
  $i++;
  $portfolioColor = $color->hsv2hex(Array($i/10, .7, abs(($i*(1/10))-.5) + .5));
  $subnodes = Array();
  $portfolioEmployees = 0;
  foreach ($agencies[$portfolioID] as $agencyName => $agencyEmployees) {
  $agencyColor = $color->hsv2hex(Array($i/10, rand(1,10)/10, abs(($i*(1/10))-.5) + .5));
  $subnodes[] = Array(
  "label" => str_replace(Array("'","`"),"",$agencyName),
  "amount" => $agencyEmployees,
  "color" => "#".$agencyColor
  );
  $portfolioEmployees += $agencyEmployees;
  }
  $nodes[] = Array(
  "label" => $portfolioName,
  "amount" => $portfolioEmployees,
  "color" => "#".$portfolioColor,
  "children" => $subnodes
  );
  $total += $portfolioEmployees;
  }
  $data = Array(
  "label" => "Australian Federal Government",
  "amount" => $total,
  "color" => "#000000",
  "children" => $nodes
  );
  echo "var data =eval('('+'" . json_encode($data) . "'+')');";
  ?>
 
  new BubbleTree({
  data: data,
  container: '.bubbletree'
  });
 
 
  });
 
  </script>
  </head>
  <body>
  <div class="bubbletree-wrapper">
  <div class="bubbletree"></div>
  </div>
  </body>
  </html>
 
directory:a/couchdb/settee -> directory:b/couchdb/settee
   
<?php <?php
   
include $basePath . "schemas/schemas.inc.php"; include $basePath . "schemas/schemas.inc.php";
   
require ($basePath . 'couchdb/settee/src/settee.php'); require ($basePath . 'couchdb/settee/src/settee.php');
   
function createDocumentsDesignDoc() { function createDocumentsDesignDoc() {
/*"views": { /*"views": {
"web_server": { "web_server": {
"map": "function(doc) {\n emit(doc.web_server, 1);\n}", "map": "function(doc) {\n emit(doc.web_server, 1);\n}",
"reduce": "function (key, values, rereduce) {\n return sum(values);\n}" "reduce": "function (key, values, rereduce) {\n return sum(values);\n}"
}, },
"byAgency": { "byAgency": {
"map": "function(doc) {\n emit(doc.agencyID, 1);\n}", "map": "function(doc) {\n emit(doc.agencyID, 1);\n}",
"reduce": "function (key, values, rereduce) {\n return sum(values);\n}" "reduce": "function (key, values, rereduce) {\n return sum(values);\n}"
}, },
"byURL": { "byURL": {
"map": "function(doc) {\n emit(doc.url, doc);\n}" "map": "function(doc) {\n emit(doc.url, doc);\n}"
}, },
"agency": { "agency": {
"map": "function(doc) {\n emit(doc.agencyID, doc);\n}" "map": "function(doc) {\n emit(doc.agencyID, doc);\n}"
}, },
"byWebServer": { "byWebServer": {
"map": "function(doc) {\n emit(doc.web_server, doc);\n}" "map": "function(doc) {\n emit(doc.web_server, doc);\n}"
} },
  "getValidationRequired": {
  "map": "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}"
  }
}*/ }*/
} }
   
function createAgencyDesignDoc() { function createAgencyDesignDoc() {
global $db; global $db;
$obj = new stdClass(); $obj = new stdClass();
$obj->_id = "_design/" . urlencode("app"); $obj->_id = "_design/" . urlencode("app");
$obj->language = "javascript"; $obj->language = "javascript";
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; $obj->views->all->map = "function(doc) { emit(doc._id, doc); };";
$obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };"; $obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };";
$obj->views->byCanonicalName->map = "function(doc) { $obj->views->byCanonicalName->map = "function(doc) {
if (doc.parentOrg || doc.orgType == 'FMA-DepartmentOfState') { if (doc.parentOrg || doc.orgType == 'FMA-DepartmentOfState') {
emit(doc.name, doc); emit(doc.name, doc);
} }
};"; };";
$obj->views->byDeptStateName->map = "function(doc) { $obj->views->byDeptStateName->map = "function(doc) {
if (doc.orgType == 'FMA-DepartmentOfState') { if (doc.orgType == 'FMA-DepartmentOfState') {
emit(doc.name, doc._id); emit(doc.name, doc._id);
} }
};"; };";
$obj->views->parentOrgs->map = "function(doc) { $obj->views->parentOrgs->map = "function(doc) {
if (doc.parentOrg) { if (doc.parentOrg) {
emit(doc._id, doc.parentOrg); emit(doc._id, doc.parentOrg);
} }
};"; };";
$obj->views->byName->map = 'function(doc) { $obj->views->byName->map = 'function(doc) {
if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") { if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") {
emit(doc.name, doc._id); emit(doc.name, doc._id);
if (typeof(doc.shortName) != "undefined" && doc.shortName != doc.name) { if (typeof(doc.shortName) != "undefined" && doc.shortName != doc.name) {
emit(doc.shortName, doc._id); emit(doc.shortName, doc._id);
} }
for (name in doc.otherNames) { for (name in doc.otherNames) {
if (doc.otherNames[name] != "" && doc.otherNames[name] != doc.name) { if (doc.otherNames[name] != "" && doc.otherNames[name] != doc.name) {
emit(doc.otherNames[name], doc._id); emit(doc.otherNames[name], doc._id);
}