[submodule "couchdb/couchdb-lucene"] | [submodule "couchdb/couchdb-lucene"] |
path = couchdb/couchdb-lucene | path = couchdb/couchdb-lucene |
url = https://github.com/rnewson/couchdb-lucene.git | url = https://github.com/rnewson/couchdb-lucene.git |
[submodule "couchdb/settee"] | [submodule "couchdb/settee"] |
path = couchdb/settee | path = couchdb/settee |
url = https://github.com/inadarei/settee.git | url = https://github.com/inadarei/settee.git |
[submodule "lib/springy"] | |
path = lib/springy | |
url = https://github.com/dhotson/springy.git | |
[submodule "lib/php-diff"] | [submodule "lib/php-diff"] |
path = lib/php-diff | path = lib/php-diff |
url = https://github.com/chrisboulton/php-diff.git | url = https://github.com/chrisboulton/php-diff.git |
[submodule "lib/Requests"] | [submodule "lib/Requests"] |
path = lib/Requests | path = lib/Requests |
url = https://github.com/rmccue/Requests.git | url = https://github.com/rmccue/Requests.git |
[submodule "javascripts/flotr2"] | [submodule "javascripts/flotr2"] |
path = javascripts/flotr2 | path = javascripts/flotr2 |
url = https://github.com/HumbleSoftware/Flotr2.git | url = https://github.com/HumbleSoftware/Flotr2.git |
[submodule "lib/phpquery"] | [submodule "lib/phpquery"] |
path = lib/phpquery | path = lib/phpquery |
url = https://github.com/TobiaszCudnik/phpquery.git | url = https://github.com/TobiaszCudnik/phpquery.git |
[submodule "javascripts/sigma"] | |
path = javascripts/sigma | |
url = https://github.com/jacomyal/sigma.js.git | |
[submodule "javascripts/bubbletree"] | |
path = javascripts/bubbletree | |
url = https://github.com/okfn/bubbletree.git | |
<?php | <?php |
include_once("../include/common.inc.php"); | include_once("../include/common.inc.php"); |
$format = "csv"; | $format = "csv"; |
//$format = "json"; | //$format = "json"; |
if (isset($_REQUEST['format'])) $format = $_REQUEST['format']; | if (isset($_REQUEST['format'])) $format = $_REQUEST['format']; |
setlocale(LC_CTYPE, 'C'); | setlocale(LC_CTYPE, 'C'); |
if ($format == "csv") { | if ($format == "csv") { |
$headers = Array("name"); | $headers = Array("name"); |
} else { | } else { |
$headers = Array(); | $headers = Array(); |
} | } |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
try { | try { |
$rows = $db->get_view("app", "all", null, true)->rows; | $rows = $db->get_view("app", "all", null, true)->rows; |
$dataValues = Array(); | $dataValues = Array(); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
if (isset($row->value->statistics->employees)) { | if (isset($row->value->statistics->employees)) { |
$headers = array_unique(array_merge($headers, array_keys(object_to_array($row->value->statistics->employees)))); | $headers = array_unique(array_merge($headers, array_keys(object_to_array($row->value->statistics->employees)))); |
} | } |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
$fp = fopen('php://output', 'w'); | $fp = fopen('php://output', 'w'); |
if ($fp && $db) { | if ($fp && $db) { |
if ($format == "csv") { | if ($format == "csv") { |
header('Content-Type: text/csv; charset=utf-8'); | header('Content-Type: text/csv; charset=utf-8'); |
header('Content-Disposition: attachment; filename="export.employeestats.' . date("c") . '.csv"'); | header('Content-Disposition: attachment; filename="export.employeestats.' . date("c") . '.csv"'); |
} | } |
header('Pragma: no-cache'); | header('Pragma: no-cache'); |
header('Expires: 0'); | header('Expires: 0'); |
if ($format == "csv") { | if ($format == "csv") { |
fputcsv($fp, $headers); | fputcsv($fp, $headers); |
} else if ($format == "json") { | } else if ($format == "json") { |
echo '{ | echo '{ |
"labels" : ["' . implode('","', $headers) . '"],'.PHP_EOL; | "labels" : ["' . implode('","', $headers) . '"],'.PHP_EOL; |
} | } |
try { | try { |
$agencies = $db->get_view("app", "all", null, true)->rows; | $agencies = $db->get_view("app", "all", null, true)->rows; |
//print_r($agencies); | //print_r($agencies); |
$first = true; | $first = true; |
if ($format == "json") { | if ($format == "json") { |
echo '"data" : ['.PHP_EOL; | echo '"data" : ['.PHP_EOL; |
} | } |
foreach ($agencies as $agency) { | foreach ($agencies as $agency) { |
if (isset($agency->value->statistics->employees)) { | if (isset($agency->value->statistics->employees)) { |
$row = Array(); | $row = Array(); |
$agencyEmployeesArray = object_to_array($agency->value->statistics->employees); | $agencyEmployeesArray = object_to_array($agency->value->statistics->employees); |
foreach ($headers as $i => $fieldName) { | foreach ($headers as $i => $fieldName) { |
if (isset($agencyEmployeesArray[$fieldName])) { | if (isset($agencyEmployeesArray[$fieldName])) { |
$row[] = '['.$i.','.$agencyEmployeesArray[$fieldName]["value"].']'; | $row[] = '['.$i.','.$agencyEmployeesArray[$fieldName]["value"].']'; |
} else { | } else { |
$row[] = '['.$i.',0]'; | $row[] = '['.$i.',0]'; |
} | } |
} | } |
if ($format == "csv") { | if ($format == "csv") { |
fputcsv($fp, array_values($row)); | fputcsv($fp, array_values($row)); |
} else if ($format == "json") { | } else if ($format == "json") { |
if (!$first) echo ","; | if (!$first) echo ","; |
echo '{"data" : [' . implode(",", array_values($row)) . '], "label": "'.$agency->value->name.'", "lines" : { "show" : true }, "points" : { "show" : true }}'.PHP_EOL; | echo '{"data" : [' . implode(",", array_values($row)) . '], "label": "'.$agency->value->name.'", "lines" : { "show" : true }, "points" : { "show" : true }}'.PHP_EOL; |
$first = false; | $first = false; |
} | } |
} | } |
} | } |
if ($format == "json") { | if ($format == "json") { |
echo '] | echo '] |
}'.PHP_EOL; | }'.PHP_EOL; |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
die; | die; |
} | } |
?> | ?> |
<?php | |
include_once("../include/common.inc.php"); | |
$db = $server->get_db('disclosr-agencies'); | |
$format = "csv"; | |
//$format = "json"; | |
if (isset($_REQUEST['format'])) $format = $_REQUEST['format']; | |
setlocale(LC_CTYPE, 'C'); | |
$headers = Array(); | |
$fp = fopen('php://output', 'w'); | |
if ($fp && $db) { | |
if ($format == "csv") { | |
header('Content-Type: text/csv; charset=utf-8'); | |
header('Content-Disposition: attachment; filename="export.score.' . date("c") . '.csv"'); | |
} | |
header('Pragma: no-cache'); | |
header('Expires: 0'); | |
try { | |
$agencies = $db->get_view("score", "score", null, true)->rows; | |
//print_r($agencies); | |
$first = true; | |
if ($format == "json") { | |
echo '"data" : ['.PHP_EOL; | |
} | |
foreach ($agencies as $agency) { | |
$agencyArray = object_to_array($agency->value); | |
if ($first) { | |
$headers = array_keys($agencyArray); | |
if ($format == "csv") { | |
fputcsv($fp, $headers); | |
} else if ($format == "json") { | |
echo '{ | |
"labels" : ["' . implode('","', $headers) . '"],'.PHP_EOL; | |
} | |
} | |
$row = Array(); | |
foreach ($headers as $i => $fieldName) { | |
if (isset($agencyArray[$fieldName])) { | |
$row[] = $agencyArray[$fieldName]; | |
} else { | |
$row[] = ''; | |
} | |
} | |
if ($format == "csv") { | |
fputcsv($fp, array_values($row)); | |
} else if ($format == "json") { | |
if (!$first) echo ","; | |
echo '{"data" : [' . implode(",", array_values($row)) . '], "label": "'.$agency->value->name.'", "lines" : { "show" : true }, "points" : { "show" : true }}'.PHP_EOL; | |
} | |
$first = false; | |
} | |
if ($format == "json") { | |
echo '] | |
}'.PHP_EOL; | |
} | |
} catch (SetteeRestClientException $e) { | |
setteErrorHandler($e); | |
} | |
die; | |
} | |
?> | |
<?php | <?php |
require_once '../include/common.inc.php'; | require_once '../include/common.inc.php'; |
require($basePath.'lib/phpquery/phpQuery/phpQuery.php'); | require($basePath . 'lib/phpquery/phpQuery/phpQuery.php'); |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
$rows = $db->get_view("app", "byName")->rows; | $rows = $db->get_view("app", "byName")->rows; |
$nametoid = Array(); | $nametoid = Array(); |
$accounts = Array(); | $accounts = Array(); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
$nametoid[trim($row->key)] = $row->value; | $nametoid[trim($row->key)] = $row->value; |
} | } |
function extractHTMLAccounts($url, $accountType) { | function extractHTMLAccounts($url, $accountType) { |
global $accounts, $nametoid; | global $accounts, $nametoid; |
$request = Requests::get($url); | $request = Requests::get($url); |
$doc = phpQuery::newDocumentHTML($request->body); | $doc = phpQuery::newDocumentHTML($request->body); |
phpQuery::selectDocument($doc); | phpQuery::selectDocument($doc); |
foreach (pq('tr')->elements as $tr) { | foreach (pq('tr')->elements as $tr) { |
//echo $tr->nodeValue.PHP_EOL; | //echo $tr->nodeValue.PHP_EOL; |
$agency = ""; | $agency = ""; |
$url = ""; | $url = ""; |
foreach ($tr->childNodes as $td) { | foreach ($tr->childNodes as $td) { |
$class = $td->getAttribute("class"); | $class = $td->getAttribute("class"); |
//echo "cccc $class ".$td->nodeValue.PHP_EOL; | //echo "cccc $class ".$td->nodeValue.PHP_EOL; |
if ($class == "s11" || $class == "s10" || $class == "s7") { | if ($class == "s11" || $class == "s10" || $class == "s7") { |
$agency = $td->nodeValue; | $agency = $td->nodeValue; |
} else if ($class == "s6" || $class == "s9"){ | } else if ($class == "s6" || $class == "s9") { |
$url = $td->nodeValue; | $url = $td->nodeValue; |
foreach($td->childNodes as $a) { | foreach ($td->childNodes as $a) { |
$href = $a->getAttribute("href"); | $href = $a->getAttribute("href"); |
if ($href != "") { | if ($href != "") { |
$url = $href; | $url = $href; |
} | |
} | |
} | |
} | |
if ($agency != "" && $url != "") { | |
if (!in_array(trim($agency), array_keys($nametoid))) { | |
echo trim($agency)." missing" . PHP_EOL; | |
} else { | |
// echo $agency." = ".$url.PHP_EOL; | |
$accounts[$nametoid[trim($agency)]][$accountType][] = $url; | |
} | } |
} | |
} | } |
} | |
if ($agency != "" && $url != "") { | |
if (!in_array(trim($agency), array_keys($nametoid))) { | |
echo trim($agency) . " missing" . PHP_EOL; | |
} else { | |
// echo $agency." = ".$url.PHP_EOL; | |
$accounts[$nametoid[trim($agency)]][$accountType][] = $url; | |
} | |
} | |
} | } |
} | } |
function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) { | function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) { |
global $accounts, $nametoid; | global $accounts, $nametoid; |
$request = Requests::get($url); | $request = Requests::get($url); |
$Data = str_getcsv($request->body, "\n"); //parse the rows | $Data = str_getcsv($request->body, "\n"); //parse the rows |
$headers = Array(); | $headers = Array(); |
foreach ($Data as $num => $line) { | foreach ($Data as $num => $line) { |
$Row = str_getcsv($line, ",",'"'); | $Row = str_getcsv($line, ",", '"'); |
if ($num == 0) { | if ($num == 0) { |
} else if ($num == 1) { | } else if ($num == 1) { |
$headers = $Row; | $headers = $Row; |
//print_r($headers); | //print_r($headers); |
} else { | } else { |
if (isset($Row[array_search($nameField, $headers)])) { | if (isset($Row[array_search($nameField, $headers)])) { |
$agencyName = $Row[array_search($nameField, $headers)]; | $agencyName = $Row[array_search($nameField, $headers)]; |
if (!$filter || $Row[array_search("State", $headers)] == "NAT") { | if (!$filter || $Row[array_search("State", $headers)] == "NAT") { |
if (!in_array(trim($agencyName), array_keys($nametoid))) { | if (!in_array(trim($agencyName), array_keys($nametoid))) { |
echo trim($agencyName)." missing" . PHP_EOL; | echo trim($agencyName) . " missing" . PHP_EOL; |
} else { | } else { |
// echo $Row[array_search($nameField, $headers)] . PHP_EOL; | // echo $Row[array_search($nameField, $headers)] . PHP_EOL; |
$accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)]; | $accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)]; |
} | } |
} | } |
} else { | } else { |
//echo "error finding agency" . $line . PHP_EOL; | //echo "error finding agency" . $line . PHP_EOL; |
} | } |
} | } |
} | } |
} | } |
// http://agimo.govspace.gov.au/page/gov2register/ | // http://agimo.govspace.gov.au/page/gov2register/ |
extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Twitter", "Agency/Body/Event", "", true); | extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Twitter", "Agency/Body/Event", "", true); |
// RSS | // RSS |
extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS"); | extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS"); |
extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook"); | extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook"); |
foreach ($accounts as $id => $accountTypes) { | |
echo $id . "<br>" . PHP_EOL; | |
$doc = object_to_array($db->get($id)); | |
// print_r($doc); | |
foreach ($accountTypes as $accountType => $accounts) { | |
if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) { | |
$doc["has" . $accountType] = Array(); | |
} | |
$doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts)); | |
} | |
$db->save($doc); | |
} | |
?> | ?> |
#http://packages.python.org/CouchDB/client.html | |
import couchdb | |
import json | |
import pprint | |
import re | |
from tidylib import tidy_document | |
couch = couchdb.Server('http://127.0.0.1:5984/') | |
# select database | |
docsdb = couch['disclosr-documents'] | |
def f(x): | |
invalid = re.compile(r"ensure|testing|flicker|updating|longdesc|Accessibility Checks|not recognized") | |
valid = re.compile(r"line") | |
return (not invalid.search(x)) and valid.search(x) and x != '' | |
for row in docsdb.view('app/getValidationRequired'): | |
print row.id | |
html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read() | |
#print html | |
document, errors = tidy_document(html,options={'accessibility-check |