[submodule "couchdb/couchdb-lucene"] | [submodule "couchdb/couchdb-lucene"] |
path = couchdb/couchdb-lucene | path = couchdb/couchdb-lucene |
url = https://github.com/rnewson/couchdb-lucene.git | url = https://github.com/rnewson/couchdb-lucene.git |
[submodule "couchdb/settee"] | [submodule "couchdb/settee"] |
path = couchdb/settee | path = couchdb/settee |
url = https://github.com/inadarei/settee.git | url = https://github.com/inadarei/settee.git |
[submodule "lib/springy"] | |
path = lib/springy | |
url = https://github.com/dhotson/springy.git | |
[submodule "lib/php-diff"] | [submodule "lib/php-diff"] |
path = lib/php-diff | path = lib/php-diff |
url = https://github.com/chrisboulton/php-diff.git | url = https://github.com/chrisboulton/php-diff.git |
[submodule "lib/Requests"] | [submodule "lib/Requests"] |
path = lib/Requests | path = lib/Requests |
url = https://github.com/rmccue/Requests.git | url = https://github.com/rmccue/Requests.git |
[submodule "javascripts/flotr2"] | [submodule "javascripts/flotr2"] |
path = javascripts/flotr2 | path = javascripts/flotr2 |
url = https://github.com/HumbleSoftware/Flotr2.git | url = https://github.com/HumbleSoftware/Flotr2.git |
[submodule "lib/phpquery"] | [submodule "lib/phpquery"] |
path = lib/phpquery | path = lib/phpquery |
url = https://github.com/TobiaszCudnik/phpquery.git | url = https://github.com/TobiaszCudnik/phpquery.git |
[submodule "javascripts/sigma"] | |
path = javascripts/sigma | |
url = https://github.com/jacomyal/sigma.js.git | |
<?php | <?php |
require_once '../include/common.inc.php'; | require_once '../include/common.inc.php'; |
require($basePath . 'lib/phpquery/phpQuery/phpQuery.php'); | require($basePath.'lib/phpquery/phpQuery/phpQuery.php'); |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
$rows = $db->get_view("app", "byName")->rows; | $rows = $db->get_view("app", "byName")->rows; |
$nametoid = Array(); | $nametoid = Array(); |
$accounts = Array(); | $accounts = Array(); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
$nametoid[trim($row->key)] = $row->value; | $nametoid[trim($row->key)] = $row->value; |
} | } |
function extractHTMLAccounts($url, $accountType) { | function extractHTMLAccounts($url, $accountType) { |
global $accounts, $nametoid; | global $accounts, $nametoid; |
$request = Requests::get($url); | $request = Requests::get($url); |
$doc = phpQuery::newDocumentHTML($request->body); | $doc = phpQuery::newDocumentHTML($request->body); |
phpQuery::selectDocument($doc); | phpQuery::selectDocument($doc); |
foreach (pq('tr')->elements as $tr) { | foreach (pq('tr')->elements as $tr) { |
//echo $tr->nodeValue.PHP_EOL; | //echo $tr->nodeValue.PHP_EOL; |
$agency = ""; | $agency = ""; |
$url = ""; | $url = ""; |
foreach ($tr->childNodes as $td) { | foreach ($tr->childNodes as $td) { |
$class = $td->getAttribute("class"); | $class = $td->getAttribute("class"); |
//echo "cccc $class ".$td->nodeValue.PHP_EOL; | //echo "cccc $class ".$td->nodeValue.PHP_EOL; |
if ($class == "s11" || $class == "s10" || $class == "s7") { | if ($class == "s11" || $class == "s10" || $class == "s7") { |
$agency = $td->nodeValue; | $agency = $td->nodeValue; |
} else if ($class == "s6" || $class == "s9") { | } else if ($class == "s6" || $class == "s9"){ |
$url = $td->nodeValue; | $url = $td->nodeValue; |
foreach ($td->childNodes as $a) { | foreach($td->childNodes as $a) { |
$href = $a->getAttribute("href"); | $href = $a->getAttribute("href"); |
if ($href != "") { | if ($href != "") { |
$url = $href; | $url = $href; |
} | |
} | |
} | |
} | |
if ($agency != "" && $url != "") { | |
if (!in_array(trim($agency), array_keys($nametoid))) { | |
echo trim($agency)." missing" . PHP_EOL; | |
} else { | |
// echo $agency." = ".$url.PHP_EOL; | |
$accounts[$nametoid[trim($agency)]][$accountType][] = $url; | |
} | } |
} | |
} | } |
} | |
if ($agency != "" && $url != "") { | |
if (!in_array(trim($agency), array_keys($nametoid))) { | |
echo trim($agency) . " missing" . PHP_EOL; | |
} else { | |
// echo $agency." = ".$url.PHP_EOL; | |
$accounts[$nametoid[trim($agency)]][$accountType][] = $url; | |
} | |
} | |
} | } |
} | } |
function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) { | function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) { |
global $accounts, $nametoid; | global $accounts, $nametoid; |
$request = Requests::get($url); | $request = Requests::get($url); |
$Data = str_getcsv($request->body, "\n"); //parse the rows | $Data = str_getcsv($request->body, "\n"); //parse the rows |
$headers = Array(); | $headers = Array(); |
foreach ($Data as $num => $line) { | foreach ($Data as $num => $line) { |
$Row = str_getcsv($line, ",", '"'); | $Row = str_getcsv($line, ",",'"'); |
if ($num == 0) { | if ($num == 0) { |
} else if ($num == 1) { | } else if ($num == 1) { |
$headers = $Row; | $headers = $Row; |
//print_r($headers); | //print_r($headers); |
} else { | } else { |
if (isset($Row[array_search($nameField, $headers)])) { | if (isset($Row[array_search($nameField, $headers)])) { |
$agencyName = $Row[array_search($nameField, $headers)]; | $agencyName = $Row[array_search($nameField, $headers)]; |
if (!$filter || $Row[array_search("State", $headers)] == "NAT") { | if (!$filter || $Row[array_search("State", $headers)] == "NAT") { |
if (!in_array(trim($agencyName), array_keys($nametoid))) { | if (!in_array(trim($agencyName), array_keys($nametoid))) { |
echo trim($agencyName) . " missing" . PHP_EOL; | echo trim($agencyName)." missing" . PHP_EOL; |
} else { | } else { |
// echo $Row[array_search($nameField, $headers)] . PHP_EOL; | // echo $Row[array_search($nameField, $headers)] . PHP_EOL; |
$accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)]; | $accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)]; |
} | } |
} | } |
} else { | } else { |
//echo "error finding agency" . $line . PHP_EOL; | //echo "error finding agency" . $line . PHP_EOL; |
} | } |
} | } |
} | } |
} | } |
// http://agimo.govspace.gov.au/page/gov2register/ | // http://agimo.govspace.gov.au/page/gov2register/ |
extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Twitter", "Agency/Body/Event", "", true); | extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Twitter", "Agency/Body/Event", "", true); |
// RSS | // RSS |
extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS"); | extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS"); |
extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook"); | extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook"); |
foreach ($accounts as $id => $accountTypes) { | |
echo $id . "<br>" . PHP_EOL; | |
$doc = object_to_array($db->get($id)); | |
// print_r($doc); | |
foreach ($accountTypes as $accountType => $accounts) { | |
if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) { | |
$doc["has" . $accountType] = Array(); | |
} | |
$doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts)); | |
} | |
$db->save($doc); | |
} | |
?> | ?> |
#http://packages.python.org/CouchDB/client.html | |
import couchdb | |
import json | |
import pprint | |
import re | |
from tidylib import tidy_document | |
couch = couchdb.Server('http://127.0.0.1:5984/') | |
# select database | |
docsdb = couch['disclosr-documents'] | |
def f(x): | |
invalid = re.compile(r"ensure|testing|flicker|updating|longdesc|Accessibility Checks|not recognized") | |
valid = re.compile(r"line") | |
return (not invalid.search(x)) and valid.search(x) and x != '' | |
for row in docsdb.view('app/getValidationRequired'): | |
print row.id | |
html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read() | |
#print html | |
document, errors = tidy_document(html,options={'accessibility-check':1,'show-warnings':0,'markup':0},keep_doc=True) | |
#http://www.aprompt.ca/Tidy/accessibilitychecks.html | |
#print document | |
errors = '\n'.join(filter(f,errors.split('\n'))) | |
#print errors | |
doc = docsdb.get(row.id) | |
doc['validation'] = errors | |
docsdb.save(doc) | |
<?php | <?php |
include_once('include/common.inc.php'); | include_once('include/common.inc.php'); |
//include_header(); | //include_header(); |
$format = "html"; | $format = "html"; |
if (isset($_REQUEST['format'])) { | if (isset($_REQUEST['format'])) { |
$format = $_REQUEST['format']; | $format = $_REQUEST['format']; |
} | } |
function add_node($id, $label) { | function add_node($id, $label, $parent="") { |
global $format; | global $format; |
if ($format == "html") { | if ($format == "html") { |
echo "nodes[\"$id\"] = graph.newNode({label: \"$label\"});" . PHP_EOL; | // echo "nodes[\"$id\"] = graph.newNode({label: \"$label\"});" . PHP_EOL; |
} | } |
if ($format == "dot" && $label != "") { | if ($format == "dot" && $label != "") { |
echo "$id [label=\"$label\"];". PHP_EOL; | echo "$id [label=\"$label\"];". PHP_EOL; |
} | } |
if ($format == "gexf") { | |
echo "<node id='$id' label=\"".htmlentities($label,ENT_XML1)."\" ".($parent != ""? "pid='$parent'><viz:size value='1'/>":"><viz:size value='2'/>") | |
."<viz:color b='".rand(0,255)."' g='".rand(0,255)."' r='".rand(0,255)."'/>" | |
."</node>". PHP_EOL; | |
} | |
} | } |
function add_edge($from, $to, $color) { | function add_edge($from, $to, $color) { |
global $format; | global $format; |
if ($format == "html") { | if ($format == "html") { |
echo "graph.newEdge(nodes[\"$from\"], nodes['$to'], {color: '$color'});" . PHP_EOL; | // echo "graph.newEdge(nodes[\"$from\"], nodes['$to'], {color: '$color'});" . PHP_EOL; |
} | } |
if ($format == "dot") { | if ($format == "dot") { |
echo "$from -> $to ".($color != ""? "[color=$color]":"").";". PHP_EOL; | echo "$from -> $to ".($color != ""? "[color=$color]":"").";". PHP_EOL; |
} | } |
if ($format == "gexf") { | |
echo "<edge id='$from$to' source='$from' target='$to' />". PHP_EOL; | |
} | |
} | |
if ($format == "gexf") { | |
//header('Content-Type: text/xml'); | |
header('Content-Type: application/gexf+xml'); | |
echo '<?xml version="1.0" encoding="UTF-8"?> | |
<gexf xmlns="http://www.gexf.net/1.2draft" xmlns:viz="http://www.gexf.net/1.2draft/viz" version="1.2"> | |
<meta lastmodifieddate="2009-03-20"> | |
<creator>Gexf.net</creator> | |
<description>A hello world! file</description> | |
</meta> | |
<graph mode="static" defaultedgetype="directed"> | |
<nodes>'. PHP_EOL; | |
} | } |
if ($format == "html") { | |
?> | |
<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.3.2/jquery.min.js"></script> | |
<script src="lib/springy/springy.js"></script> | |
<script src="lib/springy/springyui.js"></script> | |
<script> | |
var graph = new Graph(); | |
var nodes = []; | |
<?php | |
} | |
if ($format == "dot") { | if ($format == "dot") { |
echo 'digraph g {'. PHP_EOL; | echo 'digraph g {'. PHP_EOL; |
} | } |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
add_node("fedg","Federal Government - Commonwealth of Australia"); | add_node("fedg","Federal Government - Commonwealth of Australia"); |
try { | try { |
$rows = $db->get_view("app", "byCanonicalName", null, true)->rows; | $rows = $db->get_view("app", "byCanonicalName", null, true)->rows; |
//print_r($rows); | //print_r($rows); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
add_node($row->id, $row->key); | add_node($row->id, $row->key); |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
if ($format == "gexf") { | |
echo '</nodes> | |
<edges>'. PHP_EOL; | |
} | |
try { | try { |
$rows = $db->get_view("app", "byDeptStateName", null, true)->rows; | $rows = $db->get_view("app", "byDeptStateName", null, true)->rows; |
//print_r($rows); | //print_r($rows); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
add_edge("fedg", $row->value, 'yellow'); | add_edge("fedg", $row->value, 'yellow'); |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
try { | try { |
$rows = $db->get_view("app", "parentOrgs", null, true)->rows; | $rows = $db->get_view("app", "parentOrgs", null, true)->rows; |
// print_r($rows); | // print_r($rows); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
add_edge($row->key, $row->value, 'blue'); | add_edge($row->key, $row->value, 'blue'); |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
if ($format == "html") { | if ($format == "html") { |
?> | ?> |
window.onload = function() { | <div id="sigma-example" width="960" style="min-height:800px;background-color: #333;"></div> |
$(document).ready(function() { | <script src="javascripts/sigma.min.js"></script> |
var springy = $('#springydemo').springy({ | <script src="javascripts/sigma/plugins/sigma.parseGexf.js"></script> |
graph: graph | <script src="javascripts/sigma/plugins/sigma.forceatlas2.js"></script> |
}); | <script type="text/javascript">function init() { |
}); | // Instanciate sigma.js and customize rendering : |
}; | var sigInst = sigma.init(document.getElementById('sigma-example')).drawingProperties({ |
</script> | defaultLabelColor: '#fff', |
defaultLabelSize: 14, | |
defaultLabelBGColor: '#fff', | |
defaultLabelHoverColor: '#000', | |
labelThreshold: 6, | |
defaultEdgeType: 'curve' | |
}).graphProperties({ | |
minNodeSize: 0.5, | |
maxNodeSize: 5, | |
minEdgeSize: 5, | |
maxEdgeSize: 5 | |
}).mouseProperties({ | |
maxRatio: 32 | |
}); | |
<canvas id="springydemo" width="1260" height="680" /> | // Parse a GEXF encoded file to fill the graph |
// (requires "sigma.parseGexf.js" to be included) | |
sigInst.parseGexf('graph.php?format=gexf'); | |
sigInst.bind('downnodes',function(event){ | |
var nodes = event.content; | |
}); | |
// Start the ForceAtlas2 algorithm | |
// (requires "sigma.forceatlas2.js" to be included) | |
sigInst.startForceAtlas2(); | |
// Draw the graph : | |
sigInst.draw(); | |
} | |
if (document.addEventListener) { | |
document.addEventListener("DOMContentLoaded", init, false); | |
} else { | |
window.onload = init; | |
} | |
</script> | |
<?php | <?php |
} | } |
if ($format == "dot") { | if ($format == "dot") { |
echo "}"; | echo "}"; |
} | } |
if ($format == "gexf") { | |
echo ' </edges> | |
</graph> | |
</gexf>'. PHP_EOL; | |
} | |
//include_footer(); | //include_footer(); |
?> | ?> |