Make apsc stats importer nondestructive
Make apsc stats importer nondestructive


Former-commit-id: 1eae8d1c89fabed230ce5d3f02595c29a41b7f5e

<?php <?php
   
require_once '../include/common.inc.php'; require_once '../include/common.inc.php';
   
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
$rows = $db->get_view("app", "byName")->rows; $rows = $db->get_view("app", "byName")->rows;
$nametoid = Array(); $nametoid = Array();
$sums = Array(); $sums = Array();
foreach ($rows as $row) { foreach ($rows as $row) {
$nametoid[trim($row->key)] = $row->value; $nametoid[trim($row->key)] = $row->value;
} }
$employeeCSVs = Array("2002-2003" => "0203apsemployees.csv", $employeeCSVs = Array("2002-2003" => "0203apsemployees.csv",
"2003-2004" => "0304apsemployees.csv", "2003-2004" => "0304apsemployees.csv",
"2004-2005" => "0405apsemployees.csv", "2004-2005" => "0405apsemployees.csv",
"2005-2006" => "0506apsemployees.csv", "2005-2006" => "0506apsemployees.csv",
"2006-2007" => "0607apsemployees.csv", "2006-2007" => "0607apsemployees.csv",
"2007-2008" => "0708apsemployees.csv", "2007-2008" => "0708apsemployees.csv",
"2008-2009" => "0809apsemployees.csv", "2008-2009" => "0809apsemployees.csv",
"2009-2010" => "0910apsemployees.csv", "2009-2010" => "0910apsemployees.csv",
"2010-2011" => "1011apsemployees.csv" "2010-2011" => "1011apsemployees.csv"
); );
foreach ($employeeCSVs as $timePeriod => $employeeCSV) { foreach ($employeeCSVs as $timePeriod => $employeeCSV) {
echo $employeeCSV . "<br>" . PHP_EOL; echo $employeeCSV . "<br>" . PHP_EOL;
$row = 1; $row = 1;
if (($handle = fopen($employeeCSV, "r")) !== FALSE) { if (($handle = fopen($employeeCSV, "r")) !== FALSE) {
while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) {
//print_r($data); //print_r($data);
$name = trim($data[0]); $name = trim($data[0]);
if (isset($nametoid[$name])) { if (isset($nametoid[$name])) {
$id = $nametoid[$name]; $id = $nametoid[$name];
//echo $id . "<br>" . PHP_EOL; //echo $id . "<br>" . PHP_EOL;
@$sums[$id][$timePeriod] += $data[1]; @$sums[$id][$timePeriod] += $data[1];
} else { } else {
echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL; echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL;
   
die(); die();
   
} }
} }
fclose($handle); fclose($handle);
} }
} }
foreach ($sums as $id => $sum) { foreach ($sums as $id => $sum) {
echo $id. "<br>" . PHP_EOL; echo $id . "<br>" . PHP_EOL;
$doc = $db->get($id); $doc = $db->get($id);
// print_r($doc); echo $doc->name . "<br>" . PHP_EOL;
if (isset($doc->statistics)) $doc->statistics = Array(); // print_r($doc);
  $changed = false;
  if (!isset($doc->statistics)) {
  $changed = true;
  $doc->statistics = Array();
  }
foreach ($sum as $timePeriod => $value) { foreach ($sum as $timePeriod => $value) {
$doc->statistics["employees"][$timePeriod] = Array("value"=>$value, "source"=>"http://apsc.gov.au/stateoftheservice/"); if (!isset($doc->statistics->employees->$timePeriod->value)
  || $doc->statistics->employees->$timePeriod->value != $value) {
  $changed = true;
  $doc->statistics["employees"][$timePeriod] = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
  }
} }
$db->save($doc); if ($changed) {
  $db->save($doc);
  } else {
  echo "not changed" . "<br>" . PHP_EOL;
  }
} }
// employees: timeperiod, source = apsc state of service, value // employees: timeperiod, source = apsc state of service, value
?> ?>
   
<?php <?php
   
include $basePath . "schemas/schemas.inc.php"; include $basePath . "schemas/schemas.inc.php";
   
require ($basePath . 'couchdb/settee/src/settee.php'); require ($basePath . 'couchdb/settee/src/settee.php');
   
function createDocumentsDesignDoc() { function createDocumentsDesignDoc() {
/*"views": { /* "views": {
"web_server": { "web_server": {
"map": "function(doc) {\n emit(doc.web_server, 1);\n}", "map": "function(doc) {\n emit(doc.web_server, 1);\n}",
"reduce": "function (key, values, rereduce) {\n return sum(values);\n}" "reduce": "function (key, values, rereduce) {\n return sum(values);\n}"
}, },
"byAgency": { "byAgency": {
"map": "function(doc) {\n emit(doc.agencyID, 1);\n}", "map": "function(doc) {\n emit(doc.agencyID, 1);\n}",
"reduce": "function (key, values, rereduce) {\n return sum(values);\n}" "reduce": "function (key, values, rereduce) {\n return sum(values);\n}"
}, },
"byURL": { "byURL": {
"map": "function(doc) {\n emit(doc.url, doc);\n}" "map": "function(doc) {\n emit(doc.url, doc);\n}"
}, },
"agency": { "agency": {
"map": "function(doc) {\n emit(doc.agencyID, doc);\n}" "map": "function(doc) {\n emit(doc.agencyID, doc);\n}"
}, },
"byWebServer": { "byWebServer": {
"map": "function(doc) {\n emit(doc.web_server, doc);\n}" "map": "function(doc) {\n emit(doc.web_server, doc);\n}"
}, },
"getValidationRequired": { "getValidationRequired": {
"map": "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}" "map": "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}"
} }
}*/ } */
} }
   
function createAgencyDesignDoc() { function createAgencyDesignDoc() {
global $db; global $db;
$obj = new stdClass(); $obj = new stdClass();
$obj->_id = "_design/" . urlencode("app"); $obj->_id = "_design/" . urlencode("app");
$obj->language = "javascript"; $obj->language = "javascript";
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; $obj->views->all->map = "function(doc) { emit(doc._id, doc); };";
$obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };"; $obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };";
$obj->views->byCanonicalName->map = "function(doc) { $obj->views->byCanonicalName->map = "function(doc) {
if (doc.parentOrg || doc.orgType == 'FMA-DepartmentOfState') { if (doc.parentOrg || doc.orgType == 'FMA-DepartmentOfState') {
emit(doc.name, doc); emit(doc.name, doc);
} }
};"; };";
$obj->views->byDeptStateName->map = "function(doc) { $obj->views->byDeptStateName->map = "function(doc) {
if (doc.orgType == 'FMA-DepartmentOfState') { if (doc.orgType == 'FMA-DepartmentOfState') {
emit(doc.name, doc._id); emit(doc.name, doc._id);
} }
};"; };";
$obj->views->parentOrgs->map = "function(doc) { $obj->views->parentOrgs->map = "function(doc) {
if (doc.parentOrg) { if (doc.parentOrg) {
emit(doc._id, doc.parentOrg); emit(doc._id, doc.parentOrg);
} }
};"; };";
$obj->views->byName->map = 'function(doc) { $obj->views->byName->map = 'function(doc) {
if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") { if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") {
emit(doc.name, doc._id); emit(doc.name, doc._id);
if (typeof(doc.shortName) != "undefined" && doc.shortName != doc.name) { if (typeof(doc.shortName) != "undefined" && doc.shortName != doc.name) {
emit(doc.shortName, doc._id); emit(doc.shortName, doc._id);
} }
for (name in doc.otherNames) { for (name in doc.otherNames) {
if (doc.otherNames[name] != "" && doc.otherNames[name] != doc.name) { if (doc.otherNames[name] != "" && doc.otherNames[name] != doc.name) {
emit(doc.otherNames[name], doc._id); emit(doc.otherNames[name], doc._id);
} }
} }
for (name in doc.foiBodies) { for (name in doc.foiBodies) {
if (doc.foiBodies[name] != "" && doc.foiBodies[name] != doc.name) { if (doc.foiBodies[name] != "" && doc.foiBodies[name] != doc.name) {
emit(doc.foiBodies[name], doc._id); emit(doc.foiBodies[name], doc._id);
} }
} }
} }
};'; };';
   
$obj->views->foiEmails->map = "function(doc) { $obj->views->foiEmails->map = "function(doc) {
emit(doc._id, doc.foiEmail); emit(doc._id, doc.foiEmail);
};"; };";
   
$obj->views->byLastModified->map = "function(doc) { emit(doc.metadata.lastModified, doc); }"; $obj->views->byLastModified->map = "function(doc) { emit(doc.metadata.lastModified, doc); }";
$obj->views->getActive->map = 'function(doc) { if (doc.status == "active") { emit(doc._id, doc); } };'; $obj->views->getActive->map = 'function(doc) { if (doc.status == "active") { emit(doc._id, doc); } };';
$obj->views->getSuspended->map = 'function(doc) { if (doc.status == "suspended") { emit(doc._id, doc); } };'; $obj->views->getSuspended->map = 'function(doc) { if (doc.status == "suspended") { emit(doc._id, doc); } };';
$obj->views->getScrapeRequired->map = "function(doc) { $obj->views->getScrapeRequired->map = "function(doc) {
   
var lastScrape = Date.parse(doc.metadata.lastScraped); var lastScrape = Date.parse(doc.metadata.lastScraped);
   
var today = new Date(); var today = new Date();
   
if (!lastScrape || lastScrape.getTime() + 1000 != today.getTime()) { if (!lastScrape || lastScrape.getTime() + 1000 != today.getTime()) {
emit(doc._id, doc); emit(doc._id, doc);
} }
   
};"; };";
$obj->views->showNamesABNs->map = "function(doc) { emit(doc._id, {name: doc.name, abn: doc.abn}); };"; $obj->views->showNamesABNs->map = "function(doc) { emit(doc._id, {name: doc.name, abn: doc.abn}); };";
$obj->views->getConflicts->map = "function(doc) { $obj->views->getConflicts->map = "function(doc) {
if (doc._conflicts) { if (doc._conflicts) {
emit(null, [doc._rev].concat(doc._conflicts)); emit(null, [doc._rev].concat(doc._conflicts));
} }
}"; }";
// http://stackoverflow.com/questions/646628/javascript-startswith // http://stackoverflow.com/questions/646628/javascript-startswith
$obj->views->score->map = 'if(!String.prototype.startsWith){ $obj->views->score->map = 'if(!String.prototype.startsWith){
String.prototype.startsWith = function (str) { String.prototype.startsWith = function (str) {
return !this.indexOf(str); return !this.indexOf(str);
} }
} }
   
function(doc) { function(doc) {
count = 0; count = 0;
if (doc["status"] != "suspended") { if (doc["status"] != "suspended") {
for(var propName in doc) { for(var propName in doc) {
if(typeof(doc[propName]) != "undefined" && doc[propName] != "") { if(typeof(doc[propName]) != "undefined" && doc[propName] != "") {
count++; count++;
} }
} }
portfolio = doc.parentOrg; portfolio = doc.parentOrg;
if (doc.orgType == "FMA-DepartmentOfState") { if (doc.orgType == "FMA-DepartmentOfState") {
portfolio = doc._id; portfolio = doc._id;
} }
if (doc.orgType == "Court-Commonwealth" || doc.orgType == "FMA-DepartmentOfParliament") { if (doc.orgType == "Court-Commonwealth" || doc.orgType == "FMA-DepartmentOfParliament") {
portfolio = doc.orgType; portfolio = doc.orgType;
} }
emit(count+doc._id, {id:doc._id, name: doc.name, score:count, orgType: doc.orgType, portfolio:portfolio}); emit(count+doc._id, {id:doc._id, name: doc.name, score:count, orgType: doc.orgType, portfolio:portfolio});
} }
}'; }';
$obj->views->scoreHas->map = 'if(!String.prototype.startsWith){ $obj->views->scoreHas->map = 'if(!String.prototype.startsWith){
String.prototype.startsWith = function (str) { String.prototype.startsWith = function (str) {
return !this.indexOf(str); return !this.indexOf(str);
} }
} }
if(!String.prototype.endsWith){ if(!String.prototype.endsWith){
String.prototype.endsWith = function(suffix) { String.prototype.endsWith = function(suffix) {
    return this.indexOf(suffix, this.length - suffix.length) !== -1;     return this.indexOf(suffix, this.length - suffix.length) !== -1;
}; };
} }
function(doc) { function(doc) {
if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") { if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") {
for(var propName in doc) { for(var propName in doc) {
if(typeof(doc[propName]) != "undefined" && (propName.startsWith("has") || propName.endsWith("URL"))) { if(typeof(doc[propName]) != "undefined" && (propName.startsWith("has") || propName.endsWith("URL"))) {
emit(propName, 1); emit(propName, 1);
} }
} }
emit("total", 1); emit("total", 1);
} }
}'; }';
$obj->views->scoreHas->reduce = 'function (key, values, rereduce) { $obj->views->scoreHas->reduce = 'function (key, values, rereduce) {
return sum(values); return sum(values);
}'; }';
$obj->views->fieldNames->map = ' $obj->views->fieldNames->map = '
function(doc) { function(doc) {
for(var propName in doc) { for(var propName in doc) {
emit(propName, doc._id); emit(propName, doc._id);
} }
}'; }';
$obj->views->fieldNames->reduce = 'function (key, values, rereduce) { $obj->views->fieldNames->reduce = 'function (key, values, rereduce) {
return values.length; return values.length;
}'; }';
// allow safe updates (even if slightly slower due to extra: rev-detection check). // allow safe updates (even if slightly slower due to extra: rev-detection check).
return $db->save($obj, true); return $db->save($obj, true);
} }
   
if (php_uname('n') == "vanille") { if (php_uname('n') == "vanille") {
$serverAddr = 'http://192.168.178.21:5984/'; $serverAddr = 'http://192.168.178.21:5984/';
   
} else } else
if (php_uname('n') == "KYUUBEY") { if (php_uname('n') == "KYUUBEY") {
   
$serverAddr = 'http://192.168.1.148:5984/'; $serverAddr = 'http://127.0.0.1:5984/';
} else { } else {
$serverAddr = 'http://127.0.0.1:5984/'; $serverAddr = 'http://127.0.0.1:5984/';
} }
$server = new SetteeServer($serverAddr); $server = new SetteeServer($serverAddr);
   
function setteErrorHandler($e) { function setteErrorHandler($e) {
echo $e->getMessage() . "<br>" . PHP_EOL; echo $e->getMessage() . "<br>" . PHP_EOL;
} }
   
<?php <?php
   
include_once('include/common.inc.php'); include_once('include/common.inc.php');
include_header(); include_header();
   
echo "<table> echo "<table>
<tr><th>name</th><th>webserver</th><th>accessiblity errors</th></tr>"; <tr><th>name</th><th>webserver</th><th>accessiblity errors</th></tr>";
$agenciesdb = $server->get_db('disclosr-agencies'); $agenciesdb = $server->get_db('disclosr-agencies');
$docsdb = $server->get_db('disclosr-documents'); $docsdb = $server->get_db('disclosr-documents');
try { try {
$rows = $agenciesdb->get_view("app", "all", null, true)->rows; $rows = $agenciesdb->get_view("app", "all", null, true)->rows;
   
   
if ($rows) { if ($rows) {
foreach ($rows as $row) { foreach ($rows as $row) {
   
echo "<tr><td>" . $row->value->name . "</td>"; echo "<tr><td>" . $row->value->name . "</td>";
if (isset($row->value->website)) { if (isset($row->value->website)) {
try { try {
$website = $docsdb->get(md5($row->value->website)); $website = $docsdb->get(md5($row->value->website));
$serverParts = explode(" ",$website->web_server); $serverParts = explode(" ",$website->web_server);
echo "<td>" . $serverParts[0] . "</td>"; echo "<td>" . $serverParts[0] . "</td>";
echo "<td>" . $website->mime_type . "</td>"; if (!isset($website->validation)) {
  echo "<td>?</td>";
  } else {
  if ($website->validation == "") {
  echo "<td>No error</td>";
  } else {
  echo "<td><pre>" . str_replace("<", "&lt;", $website->validation) . "</pre></td>";
  }
  }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
// setteErrorHandler($e); // setteErrorHandler($e);
} }
} }
echo "</tr>"; echo "</tr>";
} }
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
include_footer(); include_footer();
?> ?>