[submodule "couchdb/couchdb-lucene"] | [submodule "couchdb/couchdb-lucene"] |
path = couchdb/couchdb-lucene | path = couchdb/couchdb-lucene |
url = https://github.com/rnewson/couchdb-lucene.git | url = https://github.com/rnewson/couchdb-lucene.git |
[submodule "couchdb/settee"] | [submodule "couchdb/settee"] |
path = couchdb/settee | path = couchdb/settee |
url = https://github.com/inadarei/settee.git | url = https://github.com/inadarei/settee.git |
[submodule "lib/php-diff"] | [submodule "lib/php-diff"] |
path = lib/php-diff | path = lib/php-diff |
url = https://github.com/chrisboulton/php-diff.git | url = https://github.com/chrisboulton/php-diff.git |
[submodule "lib/Requests"] | [submodule "lib/Requests"] |
path = lib/Requests | path = lib/Requests |
url = https://github.com/rmccue/Requests.git | url = https://github.com/rmccue/Requests.git |
[submodule "javascripts/flotr2"] | [submodule "javascripts/flotr2"] |
path = javascripts/flotr2 | path = javascripts/flotr2 |
url = https://github.com/HumbleSoftware/Flotr2.git | url = https://github.com/HumbleSoftware/Flotr2.git |
[submodule "lib/phpquery"] | [submodule "lib/phpquery"] |
path = lib/phpquery | path = lib/phpquery |
url = https://github.com/TobiaszCudnik/phpquery.git | url = https://github.com/TobiaszCudnik/phpquery.git |
[submodule "javascripts/sigma"] | [submodule "javascripts/sigma"] |
path = javascripts/sigma | path = javascripts/sigma |
url = https://github.com/jacomyal/sigma.js.git | url = https://github.com/jacomyal/sigma.js.git |
[submodule "javascripts/bubbletree"] | |
path = javascripts/bubbletree | |
url = https://github.com/okfn/bubbletree.git | |
<?php | <?php |
require_once '../include/common.inc.php'; | require_once '../include/common.inc.php'; |
require($basePath.'lib/phpquery/phpQuery/phpQuery.php'); | require($basePath . 'lib/phpquery/phpQuery/phpQuery.php'); |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
$rows = $db->get_view("app", "byName")->rows; | $rows = $db->get_view("app", "byName")->rows; |
$nametoid = Array(); | $nametoid = Array(); |
$accounts = Array(); | $accounts = Array(); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
$nametoid[trim($row->key)] = $row->value; | $nametoid[trim($row->key)] = $row->value; |
} | } |
function extractHTMLAccounts($url, $accountType) { | function extractHTMLAccounts($url, $accountType) { |
global $accounts, $nametoid; | global $accounts, $nametoid; |
$request = Requests::get($url); | $request = Requests::get($url); |
$doc = phpQuery::newDocumentHTML($request->body); | $doc = phpQuery::newDocumentHTML($request->body); |
phpQuery::selectDocument($doc); | phpQuery::selectDocument($doc); |
foreach (pq('tr')->elements as $tr) { | foreach (pq('tr')->elements as $tr) { |
//echo $tr->nodeValue.PHP_EOL; | //echo $tr->nodeValue.PHP_EOL; |
$agency = ""; | $agency = ""; |
$url = ""; | $url = ""; |
foreach ($tr->childNodes as $td) { | foreach ($tr->childNodes as $td) { |
$class = $td->getAttribute("class"); | $class = $td->getAttribute("class"); |
//echo "cccc $class ".$td->nodeValue.PHP_EOL; | //echo "cccc $class ".$td->nodeValue.PHP_EOL; |
if ($class == "s11" || $class == "s10" || $class == "s7") { | if ($class == "s11" || $class == "s10" || $class == "s7") { |
$agency = $td->nodeValue; | $agency = $td->nodeValue; |
} else if ($class == "s6" || $class == "s9"){ | } else if ($class == "s6" || $class == "s9") { |
$url = $td->nodeValue; | $url = $td->nodeValue; |
foreach($td->childNodes as $a) { | foreach ($td->childNodes as $a) { |
$href = $a->getAttribute("href"); | $href = $a->getAttribute("href"); |
if ($href != "") { | if ($href != "") { |
$url = $href; | $url = $href; |
} | |
} | |
} | |
} | |
if ($agency != "" && $url != "") { | |
if (!in_array(trim($agency), array_keys($nametoid))) { | |
echo trim($agency)." missing" . PHP_EOL; | |
} else { | |
// echo $agency." = ".$url.PHP_EOL; | |
$accounts[$nametoid[trim($agency)]][$accountType][] = $url; | |
} | } |
} | |
} | } |
} | |
if ($agency != "" && $url != "") { | |
if (!in_array(trim($agency), array_keys($nametoid))) { | |
echo trim($agency) . " missing" . PHP_EOL; | |
} else { | |
// echo $agency." = ".$url.PHP_EOL; | |
$accounts[$nametoid[trim($agency)]][$accountType][] = $url; | |
} | |
} | |
} | } |
} | } |
function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) { | function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) { |
global $accounts, $nametoid; | global $accounts, $nametoid; |
$request = Requests::get($url); | $request = Requests::get($url); |
$Data = str_getcsv($request->body, "\n"); //parse the rows | $Data = str_getcsv($request->body, "\n"); //parse the rows |
$headers = Array(); | $headers = Array(); |
foreach ($Data as $num => $line) { | foreach ($Data as $num => $line) { |
$Row = str_getcsv($line, ",",'"'); | $Row = str_getcsv($line, ",", '"'); |
if ($num == 0) { | if ($num == 0) { |
} else if ($num == 1) { | } else if ($num == 1) { |
$headers = $Row; | $headers = $Row; |
//print_r($headers); | //print_r($headers); |
} else { | } else { |
if (isset($Row[array_search($nameField, $headers)])) { | if (isset($Row[array_search($nameField, $headers)])) { |
$agencyName = $Row[array_search($nameField, $headers)]; | $agencyName = $Row[array_search($nameField, $headers)]; |
if (!$filter || $Row[array_search("State", $headers)] == "NAT") { | if (!$filter || $Row[array_search("State", $headers)] == "NAT") { |
if (!in_array(trim($agencyName), array_keys($nametoid))) { | if (!in_array(trim($agencyName), array_keys($nametoid))) { |
echo trim($agencyName)." missing" . PHP_EOL; | echo trim($agencyName) . " missing" . PHP_EOL; |
} else { | } else { |
// echo $Row[array_search($nameField, $headers)] . PHP_EOL; | // echo $Row[array_search($nameField, $headers)] . PHP_EOL; |
$accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)]; | $accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)]; |
} | } |
} | } |
} else { | } else { |
//echo "error finding agency" . $line . PHP_EOL; | //echo "error finding agency" . $line . PHP_EOL; |
} | } |
} | } |
} | } |
} | } |
// http://agimo.govspace.gov.au/page/gov2register/ | // http://agimo.govspace.gov.au/page/gov2register/ |
extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Twitter", "Agency/Body/Event", "", true); | extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Twitter", "Agency/Body/Event", "", true); |
// RSS | // RSS |
extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS"); | extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS"); |
extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook"); | extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook"); |
foreach ($accounts as $id => $accountTypes) { | |
echo $id . "<br>" . PHP_EOL; | |
$doc = object_to_array($db->get($id)); | |
// print_r($doc); | |
foreach ($accountTypes as $accountType => $accounts) { | |
if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) { | |
$doc["has" . $accountType] = Array(); | |
} | |
$doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts)); | |
} | |
$db->save($doc); | |
} | |
?> | ?> |
#http://packages.python.org/CouchDB/client.html | |
import couchdb | |
from BeautifulSoup import BeautifulSoup | |
couch = couchdb.Server('http://127.0.0.1:5984/') | |
# select database | |
docsdb = couch['disclosr-documents'] | |
for row in docsdb.view('app/getMetadataExtractRequired'): | |
print row.id | |
html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read() | |
metadata = [] | |
# http://www.crummy.com/software/BeautifulSoup/documentation.html | |
soup = BeautifulSoup(html) | |
metatags = soup.meta | |
for metatag in metatags: | |
print metatag['name'] | |
doc = docsdb.get(row.id) | |
//doc['metadata'] = metadata | |
//docsdb.save(doc) | |
#http://packages.python.org/CouchDB/client.html | |
import couchdb | |
import json | |
import pprint | |
import re | |
from tidylib import tidy_document | |
couch = couchdb.Server('http://127.0.0.1:5984/') | |
# select database | |
docsdb = couch['disclosr-documents'] | |
def f(x): | |
invalid = re.compile(r"ensure|testing|flicker|updating|longdesc|Accessibility Checks|not recognized") | |
valid = re.compile(r"line") | |
return (not invalid.search(x)) and valid.search(x) and x != '' | |
for row in docsdb.view('app/getValidationRequired'): | |
print row.id | |
html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read() | |
#print html | |
document, errors = tidy_document(html,options={'accessibility-check':1,'show-warnings':0,'markup':0},keep_doc=True) | |
#http://www.aprompt.ca/Tidy/accessibilitychecks.html | |
#print document | |
errors = '\n'.join(filter(f,errors.split('\n'))) | |
#print errors | |
doc = docsdb.get(row.id) | |
doc['validation'] = errors | |
docsdb.save(doc) | |
<!DOCTYPE html> | |
<html xmlns="http://www.w3.org/1999/xhtml"> | |
<head> | |
<meta charset="UTF-8"/> | |
<title>Minimal BubbleTree Demo</title> | |
<script type="text/javascript" src="http://code.jquery.com/jquery-1.7.2.js"></script> | |
<script type="text/javascript" src="javascripts/bubbletree/lib/jquery.history.js"></script> | |
<script type="text/javascript" src="javascripts/bubbletree/lib/raphael.js"></script> | |
<script type="text/javascript" src="javascripts/bubbletree/lib/vis4.js"></script> | |
<script type="text/javascript" src="javascripts/bubbletree/lib/Tween.js"></script> | |
<script type="text/javascript" src="javascripts/bubbletree/build/bubbletree.js"></script> | |
<link rel="stylesheet" type="text/css" href="javascripts/bubbletree/build/bubbletree.css" /> | |
<script type="text/javascript" src="javascripts/bubbletree/styles/cofog.js"></script> | |
<script type="text/javascript"> | |
$(function() { | |
<?php | |
include_once('include/common.inc.php'); | |
include("lib/Color.php"); | |
$color = new Lux_Color(); | |
$portfolios = Array(); | |
$db = $server->get_db('disclosr-agencies'); | |
try { | |
$rows = $db->get_view("app", "byDeptStateName", null, true)->rows; | |
foreach ($rows as $row) { | |
$portfolios[trim(str_replace(Array("Department of","Department","the","'","`"),"",$row->key))] = $row->value; | |
} | |
} catch (SetteeRestClientException $e) { | |
setteErrorHandler($e); | |
} | |
$agencies = Array(); | |
try { | |
$rows = $db->get_view("app", "byCanonicalName", null, true)->rows; | |
//print_r($rows); | |
foreach ($rows as $row) { | |
$employees = 0; | |
$portfolioid = 0; | |
if ($row->value->employees) $employees = $row->value->employees; | |
if (isset($row->value->statistics->employees)) { | |
$agencyEmployeesArray = object_to_array($row->value->statistics->employees); | |
$employees = $agencyEmployeesArray["2010-2011"]["value"]; | |
} | |
if (!($employees > 0)) $employees =0; | |
if (isset($row->value->parentOrg)) $portfolioid = $row->value->parentOrg; | |
if (isset($row->value->orgType) && $row->value->orgType == "FMA-DepartmentOfState") $portfolioid = $row->id; | |
$agencies[$portfolioid][$row->value->name] = $employees; | |
} | |
} catch (SetteeRestClientException $e) { | |
setteErrorHandler($e); | |
} | |
//print_r($portfolios); | |
//print_r($agencies); | |
$i = 0; | |
foreach ($portfolios as $portfolioName => $portfolioID) { | |
$i++; | |
$portfolioColor = $color->hsv2hex(Array($i/10, .7, abs(($i*(1/10))-.5) + .5)); | |
$subnodes = Array(); | |
$portfolioEmployees = 0; | |
foreach ($agencies[$portfolioID] as $agencyName => $agencyEmployees) { | |
$agencyColor = $color->hsv2hex(Array($i/10, rand(1,10)/10, abs(($i*(1/10))-.5) + .5)); | |
$subnodes[] = Array( | |
"label" => str_replace(Array("'","`"),"",$agencyName), | |
"amount" => $agencyEmployees, | |
"color" => "#".$agencyColor | |
); | |
$portfolioEmployees += $agencyEmployees; | |
} | |
$nodes[] = Array( | |
"label" => $portfolioName, | |
"amount" => $portfolioEmployees, | |
"color" => "#".$portfolioColor, | |
"children" => $subnodes | |
); | |
$total += $portfolioEmployees; | |
} | |
$data = Array( | |
"label" => "Australian Federal Government", | |
"amount" => $total, | |
"color" => "#000000", | |
"children" => $nodes | |
); | |
echo "var data =eval('('+'" . json_encode($data) . "'+')');"; | |
?> | |
new BubbleTree({ | |
data: data, | |
container: '.bubbletree' | |
}); | |
}); | |
</script> | |
</head> | |
<body> | |
<div class="bubbletree-wrapper"> | |
<div class="bubbletree"></div> | |
</div> | |
</body> | |
</html> | |
<?php | <?php |
include_once('include/common.inc.php'); | include_once('include/common.inc.php'); |
//include_header(); | //include_header(); |
$format = "html"; | $format = "html"; |
if (isset($_REQUEST['format'])) { | if (isset($_REQUEST['format'])) { |
$format = $_REQUEST['format']; | $format = $_REQUEST['format']; |
} | } |
function add_node($id, $label, $parent="") { | function add_node($id, $label, $parent="") { |
global $format; | global $format; |
if ($format == "html") { | if ($format == "html") { |
// echo "nodes[\"$id\"] = graph.newNode({label: \"$label\"});" . PHP_EOL; | // echo "nodes[\"$id\"] = graph.newNode({label: \"$label\"});" . PHP_EOL; |
} | } |
if ($format == "dot" && $label != "") { | if ($format == "dot" && $label != "") { |
echo "$id [label=\"$label\"];". PHP_EOL; | echo "$id [label=\"$label\"];". PHP_EOL; |
} | } |
if ($format == "gexf") { | if ($format == "gexf") { |
echo "<node id='$id' label=\"".htmlentities($label,ENT_XML1)."\" ".($parent != ""? "pid='$parent'><viz:size value='1'/>":"><viz:size value='2'/>") | echo "<node id='$id' label=\"".htmlentities($label,ENT_XML1)."\" ".($parent != ""? "pid='$parent'><viz:size value='1'/>":"><viz:size value='2'/>") |
."<viz:color b='".rand(0,255)."' g='".rand(0,255)."' r='".rand(0,255)."'/>" | ."<viz:color b='".rand(0,255)."' g='".rand(0,255)."' r='".rand(0,255)."'/>" |
."</node>". PHP_EOL; | ."</node>". PHP_EOL; |
} | } |
} | } |
function add_edge($from, $to, $color) { | function add_edge($from, $to, $color) { |
global $format; | global $format; |
if ($format == "html") { | if ($format == "html") { |
// echo "graph.newEdge(nodes[\"$from\"], nodes['$to'], {color: '$color'});" . PHP_EOL; | // echo "graph.newEdge(nodes[\"$from\"], nodes['$to'], {color: '$color'});" . PHP_EOL; |
} | } |
if ($format == "dot") { | if ($format == "dot") { |
echo "$from -> $to ".($color != ""? "[color=$color]":"").";". PHP_EOL; | echo "$from -> $to ".($color != ""? "[color=$color]":"").";". PHP_EOL; |
} | } |
if |