--- a/.gitmodules +++ b/.gitmodules @@ -4,9 +4,6 @@ [submodule "couchdb/settee"] path = couchdb/settee url = https://github.com/inadarei/settee.git -[submodule "lib/springy"] - path = lib/springy - url = https://github.com/dhotson/springy.git [submodule "lib/php-diff"] path = lib/php-diff url = https://github.com/chrisboulton/php-diff.git @@ -19,4 +16,10 @@ [submodule "lib/phpquery"] path = lib/phpquery url = https://github.com/TobiaszCudnik/phpquery.git +[submodule "javascripts/sigma"] + path = javascripts/sigma + url = https://github.com/jacomyal/sigma.js.git +[submodule "javascripts/bubbletree"] + path = javascripts/bubbletree + url = https://github.com/okfn/bubbletree.git
--- a/about.php +++ b/about.php @@ -1,16 +1,16 @@ <?php include_once('include/common.inc.php'); -include_header(); +include_header('About'); ?> <div class="foundation-header"> <h1><a href="about.php">About/FAQ</a></h1> <h4 class="subheader">Lorem ipsum.</h4> </div> <h2> What is this? </h2> -Disclosr is a project to monitor Australian Federal Government agencies +Disclo.gs is a project to monitor Australian Federal Government agencies compliance with their <a href="http://www.oaic.gov.au/publications/other_operational/foi_policy_frequently_asked_questions.html#_Toc291837571">"proactive disclosure requirements"</a>. -OGRE (Open Government Realization Evaluation) is a ranking of compliance with these requirements. -Prometheus is the agent which polls agency websites to assess compliance. +<h2> Attributions </h2> +National Archives of Australia, Australian Governments’ Interactive Functions Thesaurus, 2nd edition, September 2005, published at http://www.naa.gov.au/recordkeeping/thesaurus/index.htm. <h2> Open everything </h2> All documents released CC-BY 3 AU
--- /dev/null +++ b/admin/conflicts.php @@ -1,1 +1,48 @@ +<?php +include_once('../include/common.inc.php'); +include_header(); + require_once '../lib/php-diff/lib/Diff.php'; + require_once '../lib/php-diff/lib/Diff/Renderer/Html/SideBySide.php'; + +$db = $server->get_db('disclosr-agencies'); + +try { + $rows = $db->get_view("app", "getConflicts", null, true)->rows; + //print_r($rows); + foreach ($rows as $row) { +echo "<h2>".$row->id."</h2>"; +$request = Requests::get($serverAddr."disclosr-agencies/".$row->id); +$origSort = object_to_array(json_decode($request->body)); +ksort($origSort); + $origDoc = explode(",",json_encode($origSort)); + foreach($row->value as $conflictRev) { +$conflictURL = $serverAddr."disclosr-agencies/".$row->id."?rev=".$conflictRev; +$request = Requests::get($conflictURL); +$conflictSort = object_to_array(json_decode($request->body)); +ksort($conflictSort); + $conflictDoc = explode(",",json_encode($conflictSort)); +echo "curl -X DELETE ".$conflictURL."<br>".PHP_EOL; + // Options for generating the diff + $options = array( + //'ignoreWhitespace' => true, + //'ignoreCase' => true, + ); + + // Initialize the diff class + $diff = new Diff($conflictDoc, $origDoc, $options); + + // Generate a side by side diff + $renderer = new Diff_Renderer_Html_SideBySide; + echo $diff->Render($renderer); +} +die(); + + } +} catch (SetteeRestClientException $e) { + setteErrorHandler($e); +} + +include_footer(); +?> +
--- /dev/null +++ b/admin/directory.gexf.php @@ -1,1 +1,59 @@ +<?php +$nodes = Array(Array("id" => "gov", "label" => "Federal Government")); +$edges = Array(); + +function addEdge($source, $target) { + global $edges; + $edges[] = Array("id" => md5($source . $target), "source" => $source, "target" => $target); +} + +function addNode($id, $label, $pid) { + global $nodes; + $nodes[] = Array("id" => $id, "label" => $label , "pid" => $pid); +} + +function addChildren($parentID, $parentXML) { + foreach ($parentXML as $childXML) { + + if ($childXML->getName() == "organization" || $childXML->getName() == "organizationalUnit" || $childXML->getName() == "person") { + $attr = $childXML->attributes(); + $id = $attr['UUID']; + if ($childXML->getName() == "organization" || $childXML->getName() == "organizationalUnit") { + + $label = $childXML->name; + } else if ($childXML->getName() == "person") { + $label = $childXML->fullName; + } + addNode($id, $label, $parentID); + addEdge($id, $parentID); + addChildren($id, $childXML); + } + } +} + +if (file_exists('directoryexport.xml')) { + $xml = simplexml_load_file('directoryexport.xml'); + + addChildren("gov", $xml); +} else { + exit('Failed to open directoryexport.xml'); +} + header('Content-Type: application/gexf+xml'); +echo '<?xml version="1.0" encoding="UTF-8"?> +<gexf xmlns="http://www.gexf.net/1.2draft" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd" version="1.2"> + <graph mode="static" defaultedgetype="directed"> + <nodes>'; +foreach ($nodes as $node) { + echo ' <node id="' . $node['id'] . '" label="' . htmlentities($node['label'],ENT_XML1) . '" ' . (isset($node['pid']) ? 'pid="' . $node['pid'] . '"' : "") . ' />'; +} +echo '</nodes> + <edges>'; +foreach ($edges as $edge) { + echo ' <edge id="' . $edge['id'] . '" source="' . $edge['source'] . '" target="' . $edge['target'] . '" />'; +} +echo '</edges> + </graph> +</gexf>'; +?> +
--- /dev/null +++ b/admin/directoryexport.xml
--- a/admin/exportEmployees.csv.php +++ b/admin/exportEmployees.csv.php @@ -5,7 +5,6 @@ $format = "csv"; //$format = "json"; if (isset($_REQUEST['format'])) $format = $_REQUEST['format']; - setlocale(LC_CTYPE, 'C'); if ($format == "csv") { $headers = Array("name"); @@ -22,6 +21,7 @@ if (isset($row->value->statistics->employees)) { $headers = array_unique(array_merge($headers, array_keys(object_to_array($row->value->statistics->employees)))); + } } } catch (SetteeRestClientException $e) {
--- /dev/null +++ b/admin/exportScore.csv.php @@ -1,1 +1,73 @@ +<?php +include_once("../include/common.inc.php"); + +$db = $server->get_db('disclosr-agencies'); +$format = "csv"; +//$format = "json"; +if (isset($_REQUEST['format'])) $format = $_REQUEST['format']; + +setlocale(LC_CTYPE, 'C'); + + $headers = Array(); + +$fp = fopen('php://output', 'w'); +if ($fp && $db) { + if ($format == "csv") { + header('Content-Type: text/csv; charset=utf-8'); + header('Content-Disposition: attachment; filename="export.score.' . date("c") . '.csv"'); + } + header('Pragma: no-cache'); + header('Expires: 0'); + + try { + $agencies = $db->get_view("score", "score", null, true)->rows; + //print_r($agencies); + $first = true; + if ($format == "json") { + echo '"data" : ['.PHP_EOL; + + } + foreach ($agencies as $agency) { + $agencyArray = object_to_array($agency->value); + if ($first) { + $headers = array_keys($agencyArray); +if ($format == "csv") { + fputcsv($fp, $headers); + } else if ($format == "json") { + echo '{ + "labels" : ["' . implode('","', $headers) . '"],'.PHP_EOL; + } + } + $row = Array(); + + foreach ($headers as $i => $fieldName) { + if (isset($agencyArray[$fieldName])) { + $row[] = $agencyArray[$fieldName]; + } else { + $row[] = ''; + } + } + if ($format == "csv") { + fputcsv($fp, array_values($row)); + } else if ($format == "json") { + if (!$first) echo ","; + echo '{"data" : [' . implode(",", array_values($row)) . '], "label": "'.$agency->value->name.'", "lines" : { "show" : true }, "points" : { "show" : true }}'.PHP_EOL; + + } + $first = false; + } + + if ($format == "json") { + echo '] + }'.PHP_EOL; + + } + } catch (SetteeRestClientException $e) { + setteErrorHandler($e); + } + + die; +} +?> +
--- a/admin/importAPSCEmployees.php +++ b/admin/importAPSCEmployees.php @@ -32,23 +32,35 @@ @$sums[$id][$timePeriod] += $data[1]; } else { echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL; - + die(); - } } fclose($handle); } } foreach ($sums as $id => $sum) { - echo $id. "<br>" . PHP_EOL; + echo $id . "<br>" . PHP_EOL; $doc = $db->get($id); - // print_r($doc); - if (isset($doc->statistics)) $doc->statistics = Array(); + echo $doc->name . "<br>" . PHP_EOL; + // print_r($doc); + $changed = false; + if (!isset($doc->statistics)) { + $changed = true; + $doc->statistics = Array(); + } foreach ($sum as $timePeriod => $value) { - $doc->statistics["employees"][$timePeriod] = Array("value"=>$value, "source"=>"http://apsc.gov.au/stateoftheservice/"); + if (!isset($doc->statistics->employees->$timePeriod->value) + || $doc->statistics->employees->$timePeriod->value != $value) { + $changed = true; + $doc->statistics["employees"][$timePeriod] = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/"); + } } - $db->save($doc); + if ($changed) { + $db->save($doc); + } else { + echo "not changed" . "<br>" . PHP_EOL; + } } // employees: timeperiod, source = apsc state of service, value ?>
--- a/admin/importGov2RegisterRSSFacebookTwitter.php +++ b/admin/importGov2RegisterRSSFacebookTwitter.php @@ -1,7 +1,7 @@ <?php require_once '../include/common.inc.php'; -require($basePath.'lib/phpquery/phpQuery/phpQuery.php'); +require($basePath . 'lib/phpquery/phpQuery/phpQuery.php'); $db = $server->get_db('disclosr-agencies'); $rows = $db->get_view("app", "byName")->rows; @@ -10,41 +10,40 @@ foreach ($rows as $row) { $nametoid[trim($row->key)] = $row->value; } + function extractHTMLAccounts($url, $accountType) { global $accounts, $nametoid; $request = Requests::get($url); $doc = phpQuery::newDocumentHTML($request->body); phpQuery::selectDocument($doc); foreach (pq('tr')->elements as $tr) { - //echo $tr->nodeValue.PHP_EOL; - $agency = ""; - $url = ""; - foreach ($tr->childNodes as $td) { - $class = $td->getAttribute("class"); - //echo "cccc $class ".$td->nodeValue.PHP_EOL; - if ($class == "s11" || $class == "s10" || $class == "s7") { - $agency = $td->nodeValue; - } else if ($class == "s6" || $class == "s9"){ - $url = $td->nodeValue; - foreach($td->childNodes as $a) { - $href = $a->getAttribute("href"); - if ($href != "") { - $url = $href; - } - } - } - } - if ($agency != "" && $url != "") { - if (!in_array(trim($agency), array_keys($nametoid))) { - echo trim($agency)." missing" . PHP_EOL; - } else { - // echo $agency." = ".$url.PHP_EOL; - $accounts[$nametoid[trim($agency)]][$accountType][] = $url; + //echo $tr->nodeValue.PHP_EOL; + $agency = ""; + $url = ""; + foreach ($tr->childNodes as $td) { + $class = $td->getAttribute("class"); + //echo "cccc $class ".$td->nodeValue.PHP_EOL; + if ($class == "s11" || $class == "s10" || $class == "s7") { + $agency = $td->nodeValue; + } else if ($class == "s6" || $class == "s9") { + $url = $td->nodeValue; + foreach ($td->childNodes as $a) { + $href = $a->getAttribute("href"); + if ($href != "") { + $url = $href; } - - } + } + } + } + if ($agency != "" && $url != "") { + if (!in_array(trim($agency), array_keys($nametoid))) { + echo trim($agency) . " missing" . PHP_EOL; + } else { + // echo $agency." = ".$url.PHP_EOL; + $accounts[$nametoid[trim($agency)]][$accountType][] = $url; + } + } } - } function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) { @@ -53,7 +52,7 @@ $Data = str_getcsv($request->body, "\n"); //parse the rows $headers = Array(); foreach ($Data as $num => $line) { - $Row = str_getcsv($line, ",",'"'); + $Row = str_getcsv($line, ",", '"'); if ($num == 0) { } else if ($num == 1) { @@ -64,7 +63,7 @@ $agencyName = $Row[array_search($nameField, $headers)]; if (!$filter || $Row[array_search("State", $headers)] == "NAT") { if (!in_array(trim($agencyName), array_keys($nametoid))) { - echo trim($agencyName)." missing" . PHP_EOL; + echo trim($agencyName) . " missing" . PHP_EOL; } else { // echo $Row[array_search($nameField, $headers)] . PHP_EOL; $accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)]; @@ -84,6 +83,18 @@ extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS"); // facebook extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook"); +foreach ($accounts as $id => $accountTypes) { + echo $id . "<br>" . PHP_EOL; + $doc = object_to_array($db->get($id)); + // print_r($doc); + foreach ($accountTypes as $accountType => $accounts) { + if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) { + $doc["has" . $accountType] = Array(); + } + $doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts)); + } + $db->save($doc); +} ?>
--- /dev/null +++ b/admin/metadata.py @@ -1,1 +1,22 @@ +#http://packages.python.org/CouchDB/client.html +import couchdb +from BeautifulSoup import BeautifulSoup +couch = couchdb.Server('http://127.0.0.1:5984/') + +# select database +docsdb = couch['disclosr-documents'] + +for row in docsdb.view('app/getMetadataExtractRequired'): + print row.id + html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read() + metadata = [] + # http://www.crummy.com/software/BeautifulSoup/documentation.html + soup = BeautifulSoup(html) +metatags = soup.meta + for metatag in metatags: + print metatag['name'] + doc = docsdb.get(row.id) + //doc['metadata'] = metadata + //docsdb.save(doc) +
Binary files /dev/null and b/admin/naa-agift.7z differ
--- /dev/null +++ b/admin/naa-agift.json @@ -1,1 +1,2232 @@ - +[ + "Home", + "index.htm", + [ + "BUSINESS SUPPORT AND REGULATION", + "000411.htm", + [ + "Association registration", + "000396.htm" + ], + [ + "Business process auditing", + "000413.htm" + ], + [ + "Business registration and licensing", + "000414.htm" + ], + [ + "Business sponsorship", + "001371.htm" + ], + [ + "Consumer protection", + "000439.htm" + ], + [ + "Fair trading compliance", + "000482.htm", + [ + "Prices surveillance", + "000533.htm" + ], + [ + "Product safety", + "000534.htm" + ], + [ + "Trade practices compliance", + "000583.htm" + ] + ], + [ + "Financial institutions regulation", + "000485.htm" + ], + [ + "Government procurement regulation", + "000491.htm" + ], + [ + "Industry assistance schemes", + "000499.htm" + ], + [ + "Industry development", + "000500.htm" + ], + [ + "Insurance regulation", + "000501.htm", + [ + "General insurance", + "000489.htm" + ], + [ + "Life insurance", + "000505.htm" + ] + ], + [ + "Professional accreditation", + "000535.htm" + ], + [ + "Small business services", + "000565.htm", + [ + "Small business advocacy", + "000563.htm" + ], + [ + "Small business development", + "000564.htm" + ] + ], + [ + "Stock market regulation", + "000572.htm" + ] + ], + [ + "CIVIC INFRASTRUCTURE", + "000424.htm", + [ + "Civic management", + "000425.htm", + [ + "Architectural services", + "000387.htm" + ], + [ + "Building approval services", + "000406.htm" + ], + [ + "Building regulations and standards", + "000408.htm" + ], + [ + "Engineering services", + "000477.htm" + ], + [ + "Town planning", + "000582.htm" + ] + ], + [ + "Energy supply", + "000476.htm" + ], + [ + "Integrated services planning", + "000503.htm" + ], + [ + "Public housing", + "000536.htm", + [ + "Public housing construction", + "000537.htm" + ], + [ + "Public housing design", + "000538.htm" + ], + [ + "Public housing maintenance", + "000540.htm" + ] + ], + [ + "Public land management", + "000541.htm", + [ + "Burial ground management", + "000410.htm" + ], + [ + "Cultural centre management", + "000444.htm" + ], + [ + "Garden management", + "000488.htm" + ], + [ + "Memorial maintenance", + "000509.htm" + ], + [ + "Recreational park management", + "000549.htm" + ], + [ + "Sporting facilities management", + "000571.htm" + ] + ], + [ + "Regional development", + "000552.htm" + ], + [ + "Transport network maintenance", + "000586.htm" + ], + [ + "Waste management", + "001349.htm" + ] + ], + [ + "COMMUNICATIONS", + "000433.htm", + [ + "Advertising standards", + "000381.htm" + ], + [ + "Broadcasting", + "000403.htm", + [ + "Broadcasting standards", + "000404.htm" + ], + [ + "Radio broadcasting", + "000546.htm" + ], + [ + "Television broadcasting", + "000580.htm" + ] + ], + [ + "Call centre administration", + "000417.htm" + ], + [ + "Electronic commerce", + "000468.htm", + [ + "Authentication", + "000399.htm" + ], + [ + "Online transaction standards", + "000526.htm" + ] + ], + [ + "Government media", + "000490.htm" + ], + [ + "Information management standards", + "000283.htm", + [ + "Data management", + "000448.htm" + ], + [ + "Information dissemination", + "000502.htm" + ], + [ + "Information technology standards", + "000282.htm" + ] + ], + [ + "Media ownership regulation", + "000508.htm" + ], + [ + "Postal services", + "000530.htm", + [ + "Courier services", + "000441.htm" + ], + [ + "Electronic postal services", + "000469.htm" + ], + [ + "Retail postal services", + "000554.htm" + ] + ], + [ + "Publishing", + "000543.htm", + [ + "Electronic publishing", + "000470.htm" + ], + [ + "Publishing standards", + "000544.htm" + ], + [ + "Website development", + "000591.htm" + ] + ], + [ + "Radio communication", + "000547.htm", + [ + "Apparatus licensing", + "000385.htm" + ], + [ + "Spectrum management", + "000570.htm" + ] + ], + [ + "Satellite communication", + "000560.htm" + ], + [ + "Telecommunications", + "000578.htm", + [ + "Carriage service providers", + "000420.htm" + ], + [ + "Carrier licensing", + "000421.htm" + ], + [ + "Equipment licensing", + "000480.htm" + ], + [ + "Mobile telephone services", + "000516.htm" + ], + [ + "Telephone services", + "000579.htm" + ] + ] + ], + [ + "COMMUNITY SERVICES", + "000435.htm", + [ + "Accommodation services", + "000377.htm", + [ + "Defence housing", + "000458.htm" + ], + [ + "Emergency accommodation", + "000471.htm" + ], + [ + "Public housing entitlements", + "000539.htm" + ], + [ + "Refuge support", + "000551.htm" + ] + ], + [ + "Community support", + "000436.htm", + [ + "Adoption services", + "000378.htm" + ], + [ + "Aged care services", + "000382.htm" + ], + [ + "Child and youth support", + "000422.htm" + ], + [ + "Child-care services", + "000423.htm" + ], + [ + "Defence community programs", + "000452.htm" + ], + [ + "Family reunion programs", + "000483.htm" + ], + [ + "Veterans\" entitlements", + "000587.htm" + ] + ], + [ + "Counselling services", + "000440.htm" + ], + [ + "Emergency services", + "000474.htm", + [ + "Ambulance services", + "000384.htm" + ], + [ + "Emergency funding", + "000472.htm" + ], + [ + "Firefighting services", + "000487.htm" + ] + ], + [ + "Financial assistance", + "000484.htm", + [ + "Benefits", + "000402.htm" + ], + [ + "Income support schemes", + "000494.htm" + ] + ], + [ + "Natural disas