FOI stats importer fixed
FOI stats importer fixed


Former-commit-id: 81a6a149848e27565b7a7052d2a7ff4e5aaa9310

<?php <?php
   
require_once '../include/common.inc.php'; require_once '../include/common.inc.php';
   
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
$rows = $db->get_view("app", "byName")->rows; $rows = $db->get_view("app", "byName")->rows;
$nametoid = Array(); $nametoid = Array();
$stats = Array(); $stats = Array();
foreach ($rows as $row) { foreach ($rows as $row) {
$nametoid[trim($row->key)] = $row->value; $nametoid[trim($row->key)] = $row->value;
} }
$row = 0; $row = 0;
$headers = Array(); $headers = Array();
// source: http://data.gov.au/dataset/freedom-of-information-quarterly-request-and-review-statistical-data-2011-12/ // source: http://data.gov.au/dataset/freedom-of-information-quarterly-request-and-review-statistical-data-2011-12/
if (($handle = fopen("FOI-quarterly-requests-and-reviews-2011-12.csv", "r")) !== FALSE) { if (($handle = fopen("FOI-quarterly-requests-and-reviews-2011-12.csv", "r")) !== FALSE) {
while (($data = fgetcsv($handle, 10000, ",")) !== FALSE) { while (($data = fgetcsv($handle, 10000, ",")) !== FALSE) {
if ($row >= 1) { if ($row >= 1) {
// print_r($data); // print_r($data);
$name = trim($data[2]); $name = trim($data[2]);
echo "$name <br>"; // echo "$name <br>";
if ($data[0] != "TOTALS" && $data[0] != "") { if ($data[0] != "TOTALS" && $data[0] != "") {
if (isset($nametoid[$name])) { if (isset($nametoid[$name])) {
$id = $nametoid[$name]; $id = $nametoid[$name];
$timePeriod = $data[0] . "-Q" . $data[1]; $timePeriod = $data[0] . "-Q" . $data[1];
   
echo "$timePeriod <br>"; // echo "$timePeriod <br>";
unset($data[0]); unset($data[0]);
unset($data[1]); unset($data[1]);
unset($data[2]); unset($data[2]);
unset($data[3]); unset($data[3]);
unset($data[4]); unset($data[4]);
unset($data[5]); unset($data[5]);
unset($data[6]); unset($data[6]);
unset($data[7]); unset($data[7]);
unset($data[8]); unset($data[8]);
   
//echo $id . "<br>" . PHP_EOL; //echo $id . "<br>" . PHP_EOL;
$result = Array("source" => "http://data.gov.au/dataset/freedom-of-information-quarterly-request-and-review-statistical-data-2011-12/"); $result = Array("source" => "http://data.gov.au/dataset/freedom-of-information-quarterly-request-and-review-statistical-data-2011-12/");
foreach ($data as $key => $datum) { foreach ($data as $key => $datum) {
if ($datum != 0) { if ($datum != 0) {
  // tODO prefix header with "FOI"
  if (isset($stats[$id][$timePeriod][$key])) $datum += $stats[$id][$timePeriod][$key];
$result[trim($headers[$key])] = $datum; $result[trim($headers[$key])] = $datum;
} }
} }
$stats[$id][$timePeriod] = $result; $stats[$id][$timePeriod] = $result;
  // TODO merge if already exists
//print_r($stats); //print_r($stats);
} else { } else {
echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR> $row" . PHP_EOL; echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR> $row" . PHP_EOL;
print_r($data); print_r($data);
die(); die();
} }
} }
} else { } else {
$headers = $data; $headers = $data;
//print_r($headers); //print_r($headers);
} }
$row++; $row++;
} }
fclose($handle); fclose($handle);
} }
  echo "all stats loaded successfuly";
foreach ($stats as $id => $stat) { foreach ($stats as $id => $stat) {
echo $id . "<br>" . PHP_EOL; echo $id . "<br>" . PHP_EOL;
$doc = $db->get($id); $doc = $db->get($id);
echo $doc->name . "<br>" . PHP_EOL; echo $doc->name . "<br>" . PHP_EOL;
print_r($stat); // print_r($stat);
die();  
// print_r($doc); // print_r($doc);
$changed = false; $changed = false;
if (!isset($doc->statistics)) { if (!isset($doc->statistics)) {
$changed = true; $changed = true;
$doc->statistics = Array(); $doc->statistics = Array();
  } else {
  $doc->statistics = object_to_array($doc->statistics);
} }
foreach ($stat as $timePeriod => $value) { foreach ($stat as $timePeriod => $value) {
if (!isset($doc->statistics->foiRequests->$timePeriod) if (!isset($doc->statistics["foiRequests"][$timePeriod])
|| $doc->statistics->foiRequests->$timePeriod != $value) { || $doc->statistics["foiRequests"][$timePeriod] != $value
  ) {
$changed = true; $changed = true;
$doc->statistics["foiRequests"][$timePeriod] = $value; $doc->statistics["foiRequests"][$timePeriod] = $value;
} }
} }
if ($changed) { if ($changed) {
$db->save($doc); $db->save($doc);
} else { } else {
echo "not changed" . "<br>" . PHP_EOL; echo "not changed" . "<br>" . PHP_EOL;
} }
  //print_r($doc);die();
} }
?> ?>
   
  for line in `curl "http://localhost:5984/disclosr-foidocuments/_design/app/_view/byAgencyID?reduce=false&keys=%5B\"5716ce0aacfe98f7d638b7a66b7f1040\"%5D&limit=600" | xargs -L1`; do
  # echo $line
  id=`echo $line | grep -Po '_id:.*?[^\\\],' | perl -pe 's/_id://; s/^//; s/,$//'`
  rev=`echo $line | grep -Po 'rev:.*?[^\\\],'| perl -pe 's/rev://; s/^//; s/,$//'`
  if [ -n "$id" ]; then
  echo "curl -X DELETE http://localhost:5984/disclosr-foidocuments/$id?rev=$rev"
  curl -X DELETE http://localhost:5984/disclosr-foidocuments/$id?rev=$rev
  fi
  done;
 
<?php <?php
   
require_once '../include/common.inc.php'; require_once '../include/common.inc.php';
//function createFOIDocumentsDesignDoc() { //function createFOIDocumentsDesignDoc() {
   
$foidb = $server->get_db('disclosr-foidocuments'); $foidb = $server->get_db('disclosr-foidocuments');
$obj = new stdClass(); $obj = new stdClass();
$obj->_id = "_design/" . urlencode("app"); $obj->_id = "_design/" . urlencode("app");
$obj->language = "javascript"; $obj->language = "javascript";
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; $obj->views->all->map = "function(doc) { emit(doc._id, doc); };";
$obj->views->byDate->map = "function(doc) { if (doc.title != \"Disclosure Log Updated\") { emit(doc.date, doc); } };"; $obj->views->byDate->map = "function(doc) { if (doc.title != \"Disclosure Log Updated\") { emit(doc.date, doc); } };";
$obj->views->byDateMonthYear->map = "function(doc) { emit(doc.date, doc); };"; $obj->views->byDateMonthYear->map = "function(doc) { emit(doc.date, doc); };";
$obj->views->byDateMonthYear->reduce = "_count"; $obj->views->byDateMonthYear->reduce = "_count";
$obj->views->byAgencyID->map = "function(doc) { emit(doc.agencyID, doc); };"; $obj->views->byAgencyID->map = "function(doc) { emit(doc.agencyID, doc); };";
$obj->views->byAgencyID->reduce = "_count"; $obj->views->byAgencyID->reduce = "_count";
$obj->views->fieldNames->map = 'function(doc) { for(var propName in doc) { emit(propName, doc._id); }}'; $obj->views->fieldNames->map = 'function(doc) { for(var propName in doc) { emit(propName, doc._id); }}';
$obj->views->fieldNames->reduce = 'function (key, values, rereduce) { return values.length; }'; $obj->views->fieldNames->reduce = 'function (key, values, rereduce) { return values.length; }';
// allow safe updates (even if slightly slower due to extra: rev-detection check). // allow safe updates (even if slightly slower due to extra: rev-detection check).
$foidb->save($obj, true); $foidb->save($obj, true);
   
   
//function createDocumentsDesignDoc() { //function createDocumentsDesignDoc() {
$docdb = $server->get_db('disclosr-documents'); $docdb = $server->get_db('disclosr-documents');
   
$obj = new stdClass(); $obj = new stdClass();
$obj->_id = "_design/" . urlencode("app"); $obj->_id = "_design/" . urlencode("app");
$obj->language = "javascript"; $obj->language = "javascript";
$obj->views->web_server->map = "function(doc) {\n emit(doc.web_server, 1);\n}"; $obj->views->web_server->map = "function(doc) {\n emit(doc.web_server, 1);\n}";
$obj->views->web_server->reduce = "_sum"; $obj->views->web_server->reduce = "_sum";
$obj->views->byAgency->map = "function(doc) {\n emit(doc.agencyID, 1);\n}"; $obj->views->byAgency->map = "function(doc) {\n emit(doc.agencyID, 1);\n}";
$obj->views->byAgency->reduce = "_sum"; $obj->views->byAgency->reduce = "_sum";
$obj->views->byURL->map = "function(doc) {\n emit(doc.url, doc);\n}"; $obj->views->byURL->map = "function(doc) {\n emit(doc.url, doc);\n}";
$obj->views->agency->map = "function(doc) {\n emit(doc.agencyID, doc);\n}"; $obj->views->agency->map = "function(doc) {\n emit(doc.agencyID, doc);\n}";
$obj->views->byWebServer->map = "function(doc) {\n emit(doc.web_server, doc);\n}"; $obj->views->byWebServer->map = "function(doc) {\n emit(doc.web_server, doc);\n}";
   
$obj->views->datasets->map = "function(doc) {\nif (doc.fieldName == \"data\") {\n emit(doc._id, doc);\n}\n}"; $obj->views->datasets->map = "function(doc) {\nif (doc.fieldName == \"data\") {\n emit(doc._id, doc);\n}\n}";
$obj->views->datasetGroups->map = "function(doc) {\nif (doc.fieldName == \"data\") {\n doc.metadata[\"data.gov.au Category\"] && doc.metadata[\"data.gov.au Category\"].forEach(function(tag) {\n emit(tag, doc.url); \n });\n}\n}"; $obj->views->datasetGroups->map = "function(doc) {\nif (doc.fieldName == \"data\") {\n doc.metadata[\"data.gov.au Category\"] && doc.metadata[\"data.gov.au Category\"].forEach(function(tag) {\n emit(tag, doc.url); \n });\n}\n}";
$obj->views->getValidationRequired->map = "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}"; $obj->views->getValidationRequired->map = "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}";
$docdb->save($obj, true); $docdb->save($obj, true);
   
   
   
   
//function createAgencyDesignDoc() { //function createAgencyDesignDoc() {
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
$obj = new stdClass(); $obj = new stdClass();
$obj->_id = "_design/" . urlencode("app"); $obj->_id = "_design/" . urlencode("app");
$obj->language = "javascript"; $obj->language = "javascript";
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; $obj->views->all->map = "function(doc) { emit(doc._id, doc); };";
$obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };"; $obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };";
$obj->views->byCanonicalName->map = "function(doc) { $obj->views->byCanonicalName->map = "function(doc) {
if (doc.parentOrg || doc.orgType == 'FMA-DepartmentOfState') { if (doc.parentOrg || doc.orgType == 'FMA-DepartmentOfState') {
emit(doc.name, doc); emit(doc.name, doc);
} }
};"; };";
$obj->views->byDeptStateName->map = "function(doc) { $obj->views->byDeptStateName->map = "function(doc) {
if (doc.orgType == 'FMA-DepartmentOfState') { if (doc.orgType == 'FMA-DepartmentOfState') {
emit(doc.name, doc._id); emit(doc.name, doc._id);
} }
};"; };";
$obj->views->parentOrgs->map = "function(doc) { $obj->views->parentOrgs->map = "function(doc) {
if (doc.parentOrg) { if (doc.parentOrg) {
emit(doc._id, doc.parentOrg); emit(doc._id, doc.parentOrg);
} }
};"; };";
$obj->views->byName->map = 'function(doc) { $obj->views->byName->map = 'function(doc) {
if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") { if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") {
emit(doc.name, doc._id); emit(doc.name, doc._id);
if (typeof(doc.shortName) != "undefined" && doc.shortName != doc.name) { if (typeof(doc.shortName) != "undefined" && doc.shortName != doc.name) {
emit(doc.shortName, doc._id); emit(doc.shortName, doc._id);
} }
for (name in doc.otherNames) { for (name in doc.otherNames) {
if (doc.otherNames[name] != "" && doc.otherNames[name] != doc.name) { if (doc.otherNames[name] != "" && doc.otherNames[name] != doc.name) {
emit(doc.otherNames[name], doc._id); emit(doc.otherNames[name], doc._id);
} }
} }
for (name in doc.foiBodies) { for (name in doc.foiBodies) {
if (doc.foiBodies[name] != "" && doc.foiBodies[name] != doc.name) { if (doc.foiBodies[name] != "" && doc.foiBodies[name] != doc.name) {
emit(doc.foiBodies[name], doc._id); emit(doc.foiBodies[name], doc._id);
} }
} }
for (name in doc.positions) { for (name in doc.positions) {
if (doc.positions[name] != "" && doc.positions[name] != doc.name) { if (doc.positions[name] != "" && doc.positions[name] != doc.name) {
emit(doc.positions[name], doc._id); emit(doc.positions[name], doc._id);
} }
} }
} }
};'; };';
   
$obj->views->foiEmails->map = "function(doc) { $obj->views->foiEmails->map = "function(doc) {
emit(doc._id, doc.foiEmail); emit(doc._id, doc.foiEmail);
};"; };";
   
$obj->views->byLastModified->map = "function(doc) { emit(doc.metadata.lastModified, doc); }"; $obj->views->byLastModified->map = "function(doc) { emit(doc.metadata.lastModified, doc); }";
$obj->views->getActive->map = 'function(doc) { if (doc.status == "active") { emit(doc._id, doc); } };'; $obj->views->getActive->map = 'function(doc) { if (doc.status == "active") { emit(doc._id, doc); } };';
$obj->views->getSuspended->map = 'function(doc) { if (doc.status == "suspended") { emit(doc._id, doc); } };'; $obj->views->getSuspended->map = 'function(doc) { if (doc.status == "suspended") { emit(doc._id, doc); } };';
$obj->views->getScrapeRequired->map = "function(doc) { $obj->views->getScrapeRequired->map = "function(doc) {
   
var lastScrape = Date.parse(doc.metadata.lastScraped); var lastScrape = Date.parse(doc.metadata.lastScraped);
   
var today = new Date(); var today = new Date();
   
if (!lastScrape || lastScrape.getTime() + 1000 != today.getTime()) { if (!lastScrape || lastScrape.getTime() + 1000 != today.getTime()) {
emit(doc._id, doc); emit(doc._id, doc);
} }
   
};"; };";
$obj->views->showNamesABNs->map = "function(doc) { emit(doc._id, {name: doc.name, abn: doc.abn}); };"; $obj->views->showNamesABNs->map = "function(doc) { emit(doc._id, {name: doc.name, abn: doc.abn}); };";
$obj->views->getConflicts->map = "function(doc) { $obj->views->getConflicts->map = "function(doc) {
if (doc._conflicts) { if (doc._conflicts) {
emit(null, [doc._rev].concat(doc._conflicts)); emit(null, [doc._rev].concat(doc._conflicts));
} }
}"; }";
$obj->views->getStatistics->map = $obj->views->getStatistics->map =
"function(doc) { "
if (doc.statistics) { function (doc) {
for (var statisticSet in doc.statistics) { if (doc.statistics) {
for (var statisticPeriod in doc.statistics[statisticSet]) { for (var statisticSet in doc.statistics) {
emit([statisticSet,statisticPeriod], doc.statistics[statisticSet][statisticPeriod]['value']); for (var statisticPeriod in doc.statistics[statisticSet]) {
  if (doc.statistics[statisticSet][statisticPeriod]['value']) {
  emit([statisticSet, statisticPeriod], doc.statistics[statisticSet][statisticPeriod]['value']);
  } else {
  for (var statisticSubSet in doc.statistics[statisticSet][statisticPeriod]) {
  if (statisticSubSet != 'source' && statisticSubSet != 'value') {
  emit([statisticSubSet, statisticPeriod], doc.statistics[statisticSet][statisticPeriod][statisticSubSet]);
  }
  }
  }
  }
  }
  }
} }
} ";
}  
}";  
$obj->views->getStatistics->reduce = '_sum'; $obj->views->getStatistics->reduce = '_sum';
// http://stackoverflow.com/questions/646628/javascript-startswith // http://stackoverflow.com/questions/646628/javascript-startswith
$obj->views->score->map = 'if(!String.prototype.startsWith){ $obj->views->score->map = 'if(!String.prototype.startsWith){
String.prototype.startsWith = function (str) { String.prototype.startsWith = function (str) {
return !this.indexOf(str); return !this.indexOf(str);
} }
} }
   
function(doc) { function(doc) {
count = 0; count = 0;
if (doc["status"] != "suspended") { if (doc["status"] != "suspended") {
for(var propName in doc) { for(var propName in doc) {
if(typeof(doc[propName]) != "undefined" && doc[propName] != "") { if(typeof(doc[propName]) != "undefined" && doc[propName] != "") {
count++; count++;
} }
} }
portfolio = doc.parentOrg; portfolio = doc.parentOrg;
if (doc.orgType == "FMA-DepartmentOfState") { if (doc.orgType == "FMA-DepartmentOfState") {
portfolio = doc._id; portfolio = doc._id;
} }
if (doc.orgType == "Court-Commonwealth" || doc.orgType == "FMA-DepartmentOfParliament") { if (doc.orgType == "Court-Commonwealth" || doc.orgType == "FMA-DepartmentOfParliament") {
portfolio = doc.orgType; portfolio = doc.orgType;
} }
emit(count+doc._id, {id:doc._id, name: doc.name, score:count, orgType: doc.orgType, portfolio:portfolio}); emit(count+doc._id, {id:doc._id, name: doc.name, score:count, orgType: doc.orgType, portfolio:portfolio});
} }
}'; }';
$obj->views->scoreHas->map = 'if(!String.prototype.startsWith){ $obj->views->scoreHas->map = 'if(!String.prototype.startsWith){
String.prototype.startsWith = function (str) { String.prototype.startsWith = function (str) {
return !this.indexOf(str); return !this.indexOf(str);
} }
} }
if(!String.prototype.endsWith){ if(!String.prototype.endsWith){
String.prototype.endsWith = function(suffix) { String.prototype.endsWith = function(suffix) {
    return this.indexOf(suffix, this.length - suffix.length) !== -1;     return this.indexOf(suffix, this.length - suffix.length) !== -1;
}; };
} }
function(doc) { function(doc) {
if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") { if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") {
for(var propName in doc) { for(var propName in doc) {
if(typeof(doc[propName]) != "undefined" && (propName.startsWith("has") || propName.endsWith("URL"))) { if(typeof(doc[propName]) != "undefined" && (propName.startsWith("has") || propName.endsWith("URL"))) {
emit(propName, 1); emit(propName, 1);
} }
} }
emit("total", 1); emit("total", 1);
} }
}'; }';
$obj->views->scoreHas->reduce = '_sum'; $obj->views->scoreHas->reduce = '_sum';
$obj->views->fieldNames->map = ' $obj->views->fieldNames->map = '
function(doc) { function(doc) {
for(var propName in doc) { for(var propName in doc) {
emit(propName, doc._id); emit(propName, doc._id);
} }
}'; }';
$obj->views->fieldNames->reduce = '_count'; $obj->views->fieldNames->reduce = '_count';
// allow safe updates (even if slightly slower due to extra: rev-detection check). // allow safe updates (even if slightly slower due to extra: rev-detection check).
$db->save($obj, true); $db->save($obj, true);
?> ?>
   
file:a/documents/about.php -> file: