[submodule "couchdb/couchdb-lucene"] | [submodule "couchdb/couchdb-lucene"] |
path = couchdb/couchdb-lucene | path = couchdb/couchdb-lucene |
url = https://github.com/rnewson/couchdb-lucene.git | url = https://github.com/rnewson/couchdb-lucene.git |
[submodule "couchdb/settee"] | [submodule "couchdb/settee"] |
path = couchdb/settee | path = couchdb/settee |
url = https://github.com/inadarei/settee.git | url = https://github.com/inadarei/settee.git |
[submodule "lib/springy"] | |
path = lib/springy | |
url = https://github.com/dhotson/springy.git | |
[submodule "lib/php-diff"] | [submodule "lib/php-diff"] |
path = lib/php-diff | path = lib/php-diff |
url = https://github.com/chrisboulton/php-diff.git | url = https://github.com/chrisboulton/php-diff.git |
[submodule "lib/Requests"] | [submodule "lib/Requests"] |
path = lib/Requests | path = lib/Requests |
url = https://github.com/rmccue/Requests.git | url = https://github.com/rmccue/Requests.git |
[submodule "javascripts/flotr2"] | [submodule "javascripts/flotr2"] |
path = javascripts/flotr2 | path = javascripts/flotr2 |
url = https://github.com/HumbleSoftware/Flotr2.git | url = https://github.com/HumbleSoftware/Flotr2.git |
[submodule "lib/phpquery"] | [submodule "lib/phpquery"] |
path = lib/phpquery | path = lib/phpquery |
url = https://github.com/TobiaszCudnik/phpquery.git | url = https://github.com/TobiaszCudnik/phpquery.git |
[submodule "javascripts/sigma"] | |
path = javascripts/sigma | |
url = https://github.com/jacomyal/sigma.js.git | |
<?php | <?php |
require_once '../include/common.inc.php'; | require_once '../include/common.inc.php'; |
require($basePath.'lib/phpquery/phpQuery/phpQuery.php'); | require($basePath . 'lib/phpquery/phpQuery/phpQuery.php'); |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
$rows = $db->get_view("app", "byName")->rows; | $rows = $db->get_view("app", "byName")->rows; |
$nametoid = Array(); | $nametoid = Array(); |
$accounts = Array(); | $accounts = Array(); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
$nametoid[trim($row->key)] = $row->value; | $nametoid[trim($row->key)] = $row->value; |
} | } |
function extractHTMLAccounts($url, $accountType) { | function extractHTMLAccounts($url, $accountType) { |
global $accounts, $nametoid; | global $accounts, $nametoid; |
$request = Requests::get($url); | $request = Requests::get($url); |
$doc = phpQuery::newDocumentHTML($request->body); | $doc = phpQuery::newDocumentHTML($request->body); |
phpQuery::selectDocument($doc); | phpQuery::selectDocument($doc); |
foreach (pq('tr')->elements as $tr) { | foreach (pq('tr')->elements as $tr) { |
//echo $tr->nodeValue.PHP_EOL; | //echo $tr->nodeValue.PHP_EOL; |
$agency = ""; | $agency = ""; |
$url = ""; | $url = ""; |
foreach ($tr->childNodes as $td) { | foreach ($tr->childNodes as $td) { |
$class = $td->getAttribute("class"); | $class = $td->getAttribute("class"); |
//echo "cccc $class ".$td->nodeValue.PHP_EOL; | //echo "cccc $class ".$td->nodeValue.PHP_EOL; |
if ($class == "s11" || $class == "s10" || $class == "s7") { | if ($class == "s11" || $class == "s10" || $class == "s7") { |
$agency = $td->nodeValue; | $agency = $td->nodeValue; |
} else if ($class == "s6" || $class == "s9"){ | } else if ($class == "s6" || $class == "s9") { |
$url = $td->nodeValue; | $url = $td->nodeValue; |
foreach($td->childNodes as $a) { | foreach ($td->childNodes as $a) { |
$href = $a->getAttribute("href"); | $href = $a->getAttribute("href"); |
if ($href != "") { | if ($href != "") { |
$url = $href; | $url = $href; |
} | |
} | |
} | |
} | |
if ($agency != "" && $url != "") { | |
if (!in_array(trim($agency), array_keys($nametoid))) { | |
echo trim($agency)." missing" . PHP_EOL; | |
} else { | |
// echo $agency." = ".$url.PHP_EOL; | |
$accounts[$nametoid[trim($agency)]][$accountType][] = $url; | |
} | } |
} | |
} | } |
} | |
if ($agency != "" && $url != "") { | |
if (!in_array(trim($agency), array_keys($nametoid))) { | |
echo trim($agency) . " missing" . PHP_EOL; | |
} else { | |
// echo $agency." = ".$url.PHP_EOL; | |
$accounts[$nametoid[trim($agency)]][$accountType][] = $url; | |
} | |
} | |
} | } |
} | } |
function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) { | function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) { |
global $accounts, $nametoid; | global $accounts, $nametoid; |
$request = Requests::get($url); | $request = Requests::get($url); |
$Data = str_getcsv($request->body, "\n"); //parse the rows | $Data = str_getcsv($request->body, "\n"); //parse the rows |
$headers = Array(); | $headers = Array(); |
foreach ($Data as $num => $line) { | foreach ($Data as $num => $line) { |
$Row = str_getcsv($line, ",",'"'); | $Row = str_getcsv($line, ",", '"'); |
if ($num == 0) { | if ($num == 0) { |
} else if ($num == 1) { | } else if ($num == 1) { |
$headers = $Row; | $headers = $Row; |
//print_r($headers); | //print_r($headers); |
} else { | } else { |
if (isset($Row[array_search($nameField, $headers)])) { | if (isset($Row[array_search($nameField, $headers)])) { |
$agencyName = $Row[array_search($nameField, $headers)]; | $agencyName = $Row[array_search($nameField, $headers)]; |
if (!$filter || $Row[array_search("State", $headers)] == "NAT") { | if (!$filter || $Row[array_search("State", $headers)] == "NAT") { |
if (!in_array(trim($agencyName), array_keys($nametoid))) { | if (!in_array(trim($agencyName), array_keys($nametoid))) { |
echo trim($agencyName)." missing" . PHP_EOL; | echo trim($agencyName) . " missing" . PHP_EOL; |
} else { | } else { |
// echo $Row[array_search($nameField, $headers)] . PHP_EOL; | // echo $Row[array_search($nameField, $headers)] . PHP_EOL; |
$accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)]; | $accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)]; |
} | } |
} | } |
} else { | } else { |
//echo "error finding agency" . $line . PHP_EOL; | //echo "error finding agency" . $line . PHP_EOL; |
} | } |
} | } |
} | } |
} | } |
// http://agimo.govspace.gov.au/page/gov2register/ | // http://agimo.govspace.gov.au/page/gov2register/ |
extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Twitter", "Agency/Body/Event", "", true); | extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Twitter", "Agency/Body/Event", "", true); |
// RSS | // RSS |
extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS"); | extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS"); |
extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook"); | extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook"); |
foreach ($accounts as $id => $accountTypes) { | |
echo $id . "<br>" . PHP_EOL; | |
$doc = object_to_array($db->get($id)); | |
// print_r($doc); | |
foreach ($accountTypes as $accountType => $accounts) { | |
if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) { | |
$doc["has" . $accountType] = Array(); | |
} | |
$doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts)); | |
} | |
$db->save($doc); | |
} | |
?> | ?> |
#http://packages.python.org/CouchDB/client.html | |
import couchdb | |
import json | |
import pprint | |
import re | |
from tidylib import tidy_document | |
couch = couchdb.Server('http://127.0.0.1:5984/') | |
# select database | |
docsdb = couch['disclosr-documents'] | |
def f(x): | |
invalid = re.compile(r"ensure|testing|flicker|updating|longdesc|Accessibility Checks|not recognized") | |
valid = re.compile(r"line") | |
return (not invalid.search(x)) and valid.search(x) and x != '' | |
for row in docsdb.view('app/getValidationRequired'): | |
print row.id | |
html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read() | |
#print html | |
document, errors = tidy_document(html,options={'accessibility-check':1,'show-warnings':0,'markup':0},keep_doc=True) | |
#http://www.aprompt.ca/Tidy/accessibilitychecks.html | |
#print document | |
errors = '\n'.join(filter(f,errors.split('\n'))) | |
#print errors | |
doc = docsdb.get(row.id) | |
doc['validation'] = errors | |
docsdb.save(doc) | |
<?php | <?php |
include_once('include/common.inc.php'); | include_once('include/common.inc.php'); |
include_header(); | include_header(); |
function displayValue($key, $value, $mode) { | function displayValue($key, $value, $mode) { |
global $db, $schemas; | global $db, $schemas; |
if ($mode == "view") { | if ($mode == "view") { |
echo "<tr>"; | echo "<tr>"; |
echo "<td>" . $schemas['agency']["properties"][$key]['x-title'] . "<br><small>" . $schemas['agency']["properties"][$key]['description'] . "</small></td><td>"; | echo "<td>" . $schemas['agency']["properties"][$key]['x-title'] . "<br><small>" . $schemas['agency']["properties"][$key]['description'] . "</small></td><td>"; |
if (is_array($value)) { | if (is_array($value)) { |
echo "<ol>"; | echo "<ol>"; |
foreach ($value as $subkey => $subvalue) { | foreach ($value as $subkey => $subvalue) { |
if (isset($schemas['agency']["properties"][$key]['x-property'])) { | if (isset($schemas['agency']["properties"][$key]['x-property'])) { |
echo '<li property="' . $schemas['agency']["properties"][$key]['x-property'] . '">'; | echo '<li property="' . $schemas['agency']["properties"][$key]['x-property'] . '">'; |
} else { | } else { |
echo "<li>"; | echo "<li>"; |
} | } |
echo "$subvalue</li>"; | echo "$subvalue</li>"; |
} | } |
echo "</ol></td></tr>"; | echo "</ol></td></tr>"; |
} else { | } else { |
if (isset($schemas['agency']["properties"][$key]['x-property'])) { | if (isset($schemas['agency']["properties"][$key]['x-property'])) { |
echo '<span property="' . $schemas['agency']["properties"][$key]['x-property'] . '">'; | echo '<span property="' . $schemas['agency']["properties"][$key]['x-property'] . '">'; |
} else { | } else { |
echo "<span>"; | echo "<span>"; |
} | } |
if ((strpos($key, "URL") > 0 || $key == 'website') && $value != "") { | if ((strpos($key, "URL") > 0 || $key == 'website') && $value != "") { |
echo "<a href='$value'>view</a></span>"; | echo "<a href='$value'>view</a></span>"; |
} else { | } else { |
echo "$value</span>"; | echo "$value</span>"; |
} | } |
} | } |
echo "</td></tr>"; | echo "</td></tr>"; |
} | } |
if ($mode == "edit") { | if ($mode == "edit") { |
if (is_array($value)) { | if (is_array($value)) { |
echo '<div class="row"> | echo '<div class="row"> |
<div class="seven columns"> | <div class="seven columns"> |
<fieldset> | <fieldset> |
<h5>' . $key . '</h5>'; | <h5>' . $key . '</h5>'; |
foreach ($value as $subkey => $subvalue) { | foreach ($value as $subkey => $subvalue) { |
echo "<label>$subkey</label><input class='input-text' type='text' id='$key$subkey' name='$key" . '[' . $subkey . "]' value='$subvalue'/></tr>"; | echo "<label>$subkey</label><input class='input-text' type='text' id='$key$subkey' name='$key" . '[' . $subkey . "]' value='$subvalue'/></tr>"; |
} | } |
echo "</fieldset> | echo "</fieldset> |
</div> | </div> |
</div>"; | </div>"; |
} else { | } else { |
if (strpos($key, "_") === 0) { | if (strpos($key, "_") === 0) { |
echo"<input type='hidden' id='$key' name='$key' value='$value'/>"; | echo"<input type='hidden' id='$key' name='$key' value='$value'/>"; |
} else if ($key == "parentOrg") { | } else if ($key == "parentOrg") { |
echo "<label for='$key'>$key</label><select id='$key' name='$key'><option value=''> Select... </option>"; | echo "<label for='$key'>$key</label><select id='$key' name='$key'><option value=''> Select... </option>"; |
$rows = $db->get_view("app", "byDeptStateName")->rows; | $rows = $db->get_view("app", "byDeptStateName")->rows; |
//print_r($rows); | //print_r($rows); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
echo "<option value='{$row->value}'" . (($row->value == $value) ? "SELECTED" : "") . " >" . str_replace("Department of ", "", $row->key) . "</option>"; | echo "<option value='{$row->value}'" . (($row->value == $value) ? "SELECTED" : "") . " >" . str_replace("Department of ", "", $row->key) . "</option>"; |
} | } |
echo" </select>"; | echo" </select>"; |
} else { | } else { |
echo "<label>$key</label><input class='input-text' type='text' id='$key' name='$key' value='$value'/>"; | echo "<label>$key</label><input class='input-text' type='text' id='$key' name='$key' value='$value'/>"; |
if ((strpos($key, "URL") > 0 || $key == 'website') && $value != "") { | if ((strpos($key, "URL") > 0 || $key == 'website') && $value != "") { |
echo "<a href='$value'>view</a>"; | echo "<a href='$value'>view</a>"; |
} | } |
if ($key == 'abn') { | if ($key == 'abn') { |
echo "<a href='http://www.abr.business.gov.au/SearchByAbn.aspx?SearchText=$value'>view abn</a>"; | echo "<a href='http://www.abr.business.gov.au/SearchByAbn.aspx?SearchText=$value'>view abn</a>"; |
} | } |
} | } |
} | } |
} | } |
// | // |
} | } |
function addDefaultFields($row) { | function addDefaultFields($row) { |
global $schemas; | global $schemas; |
$defaultFields = array_keys($schemas['agency']['properties']); | $defaultFields = array_keys($schemas['agency']['properties']); |
foreach ($defaultFields as $defaultField) { | foreach ($defaultFields as $defaultField) { |
if (!isset($row[$defaultField])) { | if (!isset($row[$defaultField])) { |
if ($schemas['agency']['properties'][$defaultField]['type'] == "string") { | if ($schemas['agency']['properties'][$defaultField]['type'] == "string") { |
$row[$defaultField] = ""; | $row[$defaultField] = ""; |
} | } |
if ($schemas['agency']['properties'][$defaultField]['type'] == "array") { | if ($schemas['agency']['properties'][$defaultField]['type'] == "array") { |
$row[$defaultField] = Array(""); | $row[$defaultField] = Array(""); |
} | } |
} else if ($schemas['agency']['properties'][$defaultField]['type'] == "array") { | } else if ($schemas['agency']['properties'][$defaultField]['type'] == "array") { |
if (is_array($row[$defaultField])) { | if (is_array($row[$defaultField])) { |
$row[$defaultField][] = ""; | $row[$defaultField][] = ""; |
$row[$defaultField][] = ""; | |
$row[$defaultField][] = ""; | |
} else { | } else { |
$value = $row[$defaultField]; | $value = $row[$defaultField]; |
$row[$defaultField] = Array($value); | $row[$defaultField] = Array($value); |
$row[$defaultField][] = ""; | |
$row[$defaultField][] = ""; | |
} | } |
} | } |
} | } |
return $row; | return $row; |
} | } |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
if (isset($_REQUEST['id'])) { | if (isset($_REQUEST['id'])) { |
//get an agency record as json/html, search by name/abn/id | //get an agency record as json/html, search by name/abn/id |
// by name = startkey="Ham"&endkey="Ham\ufff0" | // by name = startkey="Ham"&endkey="Ham\ufff0" |
// edit? | // edit? |
$row = $db->get($_REQUEST['id']); | $obj = $db->get($_REQUEST['id']); |
//print_r($row); | //print_r($row); |
if (sizeof($_POST) > 0) { | if (sizeof($_POST) > 0) { |
//print_r($_POST); | //print_r($_POST); |
foreach ($_POST as $postkey => $postvalue) { | foreach ($_POST as $postkey => $postvalue) { |
if ($postvalue == "") { | if ($postvalue == "") { |
unset($_POST[$postkey]); | unset($_POST[$postkey]); |
} | } |
if (is_array($postvalue)) { | if (is_array($postvalue)) { |
if (count($postvalue) == 1 && $postvalue[0] == "") { | if (count($postvalue) == 1 && $postvalue[0] == "") { |
unset($_POST[$postkey]); | unset($_POST[$postkey]); |
} else { | } else { |
foreach ($_POST[$postkey] as $key => &$value) { | foreach ($_POST[$postkey] as $key => &$value) { |
if ($value == "") { | if ($value == "") { |
unset($_POST[$postkey][$key]); | unset($_POST[$postkey][$key]); |
} | } |
} | } |
} | } |
} | } |
} | } |
if (isset($_POST['_id']) && $db->get_rev($_POST['_id']) == $_POST['_rev']) { | if (isset($_POST['_id']) && $db->get_rev($_POST['_id']) == $_POST['_rev']) { |
echo "Edited version was latest version, continue saving"; | echo "Edited version was latest version, continue saving"; |
$newdoc = $_POST; | $newdoc = $_POST; |
$newdoc['metadata']['lastModified'] = time(); | $newdoc['metadata']['lastModified'] = time(); |