beginning semantic markup
beginning semantic markup


Former-commit-id: a5eba7337e3796c4f37731406127e5df85c440fa

[submodule "couchdb/couchdb-lucene"] [submodule "couchdb/couchdb-lucene"]
path = couchdb/couchdb-lucene path = couchdb/couchdb-lucene
url = https://github.com/rnewson/couchdb-lucene.git url = https://github.com/rnewson/couchdb-lucene.git
[submodule "couchdb/settee"]  
path = couchdb/settee  
url = https://github.com/inadarei/settee.git  
[submodule "lib/php-diff"] [submodule "lib/php-diff"]
path = lib/php-diff path = lib/php-diff
url = https://github.com/chrisboulton/php-diff.git url = https://github.com/chrisboulton/php-diff.git
[submodule "lib/Requests"] [submodule "lib/Requests"]
path = lib/Requests path = lib/Requests
url = https://github.com/rmccue/Requests.git url = https://github.com/rmccue/Requests.git
[submodule "js/flotr2"] [submodule "js/flotr2"]
path = js/flotr2 path = js/flotr2
url = https://github.com/HumbleSoftware/Flotr2.git url = https://github.com/HumbleSoftware/Flotr2.git
[submodule "lib/phpquery"] [submodule "lib/phpquery"]
path = lib/phpquery path = lib/phpquery
url = https://github.com/TobiaszCudnik/phpquery.git url = https://github.com/TobiaszCudnik/phpquery.git
[submodule "js/sigma"] [submodule "js/sigma"]
path = js/sigma path = js/sigma
url = https://github.com/jacomyal/sigma.js.git url = https://github.com/jacomyal/sigma.js.git
[submodule "js/bubbletree"] [submodule "js/bubbletree"]
path = js/bubbletree path = js/bubbletree
url = https://github.com/okfn/bubbletree.git url = https://github.com/okfn/bubbletree.git
[submodule "lib/querypath"] [submodule "lib/querypath"]
path = lib/querypath path = lib/querypath
url = https://github.com/technosophos/querypath.git url = https://github.com/technosophos/querypath.git
[submodule "lib/amon-php"] [submodule "lib/amon-php"]
path = lib/amon-php path = lib/amon-php
url = https://github.com/martinrusev/amon-php.git url = https://github.com/martinrusev/amon-php.git
[submodule "documents/lib/parsedatetime"] [submodule "documents/lib/parsedatetime"]
path = documents/lib/parsedatetime path = documents/lib/parsedatetime
url = git://github.com/bear/parsedatetime.git url = git://github.com/bear/parsedatetime.git
[submodule "lib/FeedWriter"] [submodule "lib/FeedWriter"]
path = lib/FeedWriter path = lib/FeedWriter
url = https://github.com/mibe/FeedWriter url = https://github.com/mibe/FeedWriter
   
<?php <?php
   
require_once '../include/common.inc.php'; require_once '../include/common.inc.php';
//function createFOIDocumentsDesignDoc() { //function createFOIDocumentsDesignDoc() {
   
$foidb = $server->get_db('disclosr-foidocuments'); $foidb = $server->get_db('disclosr-foidocuments');
$obj = new stdClass(); $obj = new stdClass();
$obj->_id = "_design/" . urlencode("app"); $obj->_id = "_design/" . urlencode("app");
$obj->language = "javascript"; $obj->language = "javascript";
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; $obj->views->all->map = "function(doc) { emit(doc._id, doc); };";
$obj->views->byDate->map = "function(doc) { emit(doc.date, doc); };"; $obj->views->byDate->map = "function(doc) { emit(doc.date, doc); };";
$obj->views->byDate->reduce = "_count"; $obj->views->byDateMonthYear->map = "function(doc) { emit(doc.date, doc); };";
$obj->views->byAgencyID->map = "function(doc) { emit(doc.agencyID, doc); };"; $obj->views->byDateMonthYear->reduce = "_count";
$obj->views->byAgencyID->reduce = "_count"; $obj->views->byAgencyID->map = "function(doc) { emit(doc.agencyID, doc); };";
  $obj->views->byAgencyID->reduce = "_count";
   
// allow safe updates (even if slightly slower due to extra: rev-detection check). // allow safe updates (even if slightly slower due to extra: rev-detection check).
$foidb->save($obj, true); $foidb->save($obj, true);
   
   
function createDocumentsDesignDoc() { //function createDocumentsDesignDoc() {
/* $docdb = $server->get_db('disclosr-documents');
global $db;  
$obj = new stdClass(); $obj = new stdClass();
$obj->_id = "_design/" . urlencode("app"); $obj->_id = "_design/" . urlencode("app");
$obj->language = "javascript"; $obj->language = "javascript";
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; $obj->views->web_server->map = "function(doc) {\n emit(doc.web_server, 1);\n}";
$obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };"; $obj->views->web_server->reduce = "function (key, values, rereduce) {\n return sum(values);\n}";
"views": { $obj->views->byAgency->map = "function(doc) {\n emit(doc.agencyID, 1);\n}";
"web_server": { $obj->views->byAgency->reduce = "function (key, values, rereduce) {\n return sum(values);\n}";
"map": "function(doc) {\n emit(doc.web_server, 1);\n}", $obj->views->byURL->map = "function(doc) {\n emit(doc.url, doc);\n}";
"reduce": "function (key, values, rereduce) {\n return sum(values);\n}" $obj->views->agency->map = "function(doc) {\n emit(doc.agencyID, doc);\n}";
}, $obj->views->byWebServer->map = "function(doc) {\n emit(doc.web_server, doc);\n}";
"byAgency": { $obj->views->getValidationRequired = "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}";
"map": "function(doc) {\n emit(doc.agencyID, 1);\n}",  
"reduce": "function (key, values, rereduce) {\n return sum(values);\n}"  
},  
"byURL": {  
"map": "function(doc) {\n emit(doc.url, doc);\n}"  
},  
"agency": {  
"map": "function(doc) {\n emit(doc.agencyID, doc);\n}"  
},  
"byWebServer": {  
"map": "function(doc) {\n emit(doc.web_server, doc);\n}"  
},  
"getValidationRequired": {  
"map": "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}"  
}  
} */  
}  
   
//function createAgencyDesignDoc() { //function createAgencyDesignDoc() {
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
$obj = new stdClass(); $obj = new stdClass();
$obj->_id = "_design/" . urlencode("app"); $obj->_id = "_design/" . urlencode("app");
$obj->language = "javascript"; $obj->language = "javascript";
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; $obj->views->all->map = "function(doc) { emit(doc._id, doc); };";
$obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };"; $obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };";
$obj->views->byCanonicalName->map = "function(doc) { $obj->views->byCanonicalName->map = "function(doc) {
if (doc.parentOrg || doc.orgType == 'FMA-DepartmentOfState') { if (doc.parentOrg || doc.orgType == 'FMA-DepartmentOfState') {
emit(doc.name, doc); emit(doc.name, doc);
} }
};"; };";
$obj->views->byDeptStateName->map = "function(doc) { $obj->views->byDeptStateName->map = "function(doc) {
if (doc.orgType == 'FMA-DepartmentOfState') { if (doc.orgType == 'FMA-DepartmentOfState') {
emit(doc.name, doc._id); emit(doc.name, doc._id);
} }
};"; };";
$obj->views->parentOrgs->map = "function(doc) { $obj->views->parentOrgs->map = "function(doc) {
if (doc.parentOrg) { if (doc.parentOrg) {
emit(doc._id, doc.parentOrg); emit(doc._id, doc.parentOrg);
} }
};"; };";
$obj->views->byName->map = 'function(doc) { $obj->views->byName->map = 'function(doc) {
if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") { if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") {
emit(doc.name, doc._id); emit(doc.name, doc._id);
if (typeof(doc.shortName) != "undefined" && doc.shortName != doc.name) { if (typeof(doc.shortName) != "undefined" && doc.shortName != doc.name) {
emit(doc.shortName, doc._id); emit(doc.shortName, doc._id);
} }
for (name in doc.otherNames) { for (name in doc.otherNames) {
if (doc.otherNames[name] != "" && doc.otherNames[name] != doc.name) { if (doc.otherNames[name] != "" && doc.otherNames[name] != doc.name) {
emit(doc.otherNames[name], doc._id); emit(doc.otherNames[name], doc._id);
} }
} }
for (name in doc.foiBodies) { for (name in doc.foiBodies) {
if (doc.foiBodies[name] != "" && doc.foiBodies[name] != doc.name) { if (doc.foiBodies[name] != "" && doc.foiBodies[name] != doc.name) {
emit(doc.foiBodies[name], doc._id); emit(doc.foiBodies[name], doc._id);
} }
} }
for (name in doc.positions) { for (name in doc.positions) {
if (doc.positions[name] != "" && doc.positions[name] != doc.name) { if (doc.positions[name] != "" && doc.positions[name] != doc.name) {
emit(doc.positions[name], doc._id); emit(doc.positions[name], doc._id);
} }
} }
} }
};'; };';
   
$obj->views->foiEmails->map = "function(doc) { $obj->views->foiEmails->map = "function(doc) {
emit(doc._id, doc.foiEmail); emit(doc._id, doc.foiEmail);
};"; };";
   
$obj->views->byLastModified->map = "function(doc) { emit(doc.metadata.lastModified, doc); }"; $obj->views->byLastModified->map = "function(doc) { emit(doc.metadata.lastModified, doc); }";
$obj->views->getActive->map = 'function(doc) { if (doc.status == "active") { emit(doc._id, doc); } };'; $obj->views->getActive->map = 'function(doc) { if (doc.status == "active") { emit(doc._id, doc); } };';
$obj->views->getSuspended->map = 'function(doc) { if (doc.status == "suspended") { emit(doc._id, doc); } };'; $obj->views->getSuspended->map = 'function(doc) { if (doc.status == "suspended") { emit(doc._id, doc); } };';
$obj->views->getScrapeRequired->map = "function(doc) { $obj->views->getScrapeRequired->map = "function(doc) {
   
var lastScrape = Date.parse(doc.metadata.lastScraped); var lastScrape = Date.parse(doc.metadata.lastScraped);
   
var today = new Date(); var today = new Date();
   
if (!lastScrape || lastScrape.getTime() + 1000 != today.getTime()) { if (!lastScrape || lastScrape.getTime() + 1000 != today.getTime()) {
emit(doc._id, doc); emit(doc._id, doc);
} }
   
};"; };";
$obj->views->showNamesABNs->map = "function(doc) { emit(doc._id, {name: doc.name, abn: doc.abn}); };"; $obj->views->showNamesABNs->map = "function(doc) { emit(doc._id, {name: doc.name, abn: doc.abn}); };";
$obj->views->getConflicts->map = "function(doc) { $obj->views->getConflicts->map = "function(doc) {
if (doc._conflicts) { if (doc._conflicts) {
emit(null, [doc._rev].concat(doc._conflicts)); emit(null, [doc._rev].concat(doc._conflicts));
} }
}"; }";
// http://stackoverflow.com/questions/646628/javascript-startswith // http://stackoverflow.com/questions/646628/javascript-startswith
$obj->views->score->map = 'if(!String.prototype.startsWith){ $obj->views->score->map = 'if(!String.prototype.startsWith){
String.prototype.startsWith = function (str) { String.prototype.startsWith = function (str) {
return !this.indexOf(str); return !this.indexOf(str);
} }
} }
   
function(doc) { function(doc) {
count = 0; count = 0;
if (doc["status"] != "suspended") { if (doc["status"] != "suspended") {
for(var propName in doc) { for(var propName in doc) {
if(typeof(doc[propName]) != "undefined" && doc[propName] != "") { if(typeof(doc[propName]) != "undefined" && doc[propName] != "") {
count++; count++;
} }
} }
portfolio = doc.parentOrg; portfolio = doc.parentOrg;
if (doc.orgType == "FMA-DepartmentOfState") { if (doc.orgType == "FMA-DepartmentOfState") {
portfolio = doc._id; portfolio = doc._id;
} }
if (doc.orgType == "Court-Commonwealth" || doc.orgType == "FMA-DepartmentOfParliament") { if (doc.orgType == "Court-Commonwealth" || doc.orgType == "FMA-DepartmentOfParliament") {
portfolio = doc.orgType; portfolio = doc.orgType;
} }
emit(count+doc._id, {id:doc._id, name: doc.name, score:count, orgType: doc.orgType, portfolio:portfolio}); emit(count+doc._id, {id:doc._id, name: doc.name, score:count, orgType: doc.orgType, portfolio:portfolio});
} }
}'; }';
$obj->views->scoreHas->map = 'if(!String.prototype.startsWith){ $obj->views->scoreHas->map = 'if(!String.prototype.startsWith){
String.prototype.startsWith = function (str) { String.prototype.startsWith = function (str) {
return !this.indexOf(str); return !this.indexOf(str);
} }
} }
if(!String.prototype.endsWith){ if(!String.prototype.endsWith){
String.prototype.endsWith = function(suffix) { String.prototype.endsWith = function(suffix) {
    return this.indexOf(suffix, this.length - suffix.length) !== -1;     return this.indexOf(suffix, this.length - suffix.length) !== -1;
}; };
} }
function(doc) { function(doc) {
if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") { if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") {
for(var propName in doc) { for(var propName in doc) {
if(typeof(doc[propName]) != "undefined" && (propName.startsWith("has") || propName.endsWith("URL"))) { if(typeof(doc[propName]) != "undefined" && (propName.startsWith("has") || propName.endsWith("URL"))) {
emit(propName, 1); emit(propName, 1);
} }
} }
emit("total", 1); emit("total", 1);
} }
}'; }';
$obj->views->scoreHas->reduce = 'function (key, values, rereduce) { $obj->views->scoreHas->reduce = 'function (key, values, rereduce) {
return sum(values); return sum(values);
}'; }';
$obj->views->fieldNames->map = ' $obj->views->fieldNames->map = '
function(doc) { function(doc) {
for(var propName in doc) { for(var propName in doc) {
emit(propName, doc._id); emit(propName, doc._id);
} }
}'; }';
$obj->views->fieldNames->reduce = 'function (key, values, rereduce) { $obj->views->fieldNames->reduce = 'function (key, values, rereduce) {
return values.length; return values.length;
}'; }';
// allow safe updates (even if slightly slower due to extra: rev-detection check). // allow safe updates (even if slightly slower due to extra: rev-detection check).
$db->save($obj, true); $db->save($obj, true);
   
   
?> ?>
   
<?php  
 
/**  
* Databaase class.  
*/  
class SetteeDatabase {  
 
/**  
* Base URL of the CouchDB REST API  
*/  
private $conn_url;  
 
/**  
* HTTP REST Client instance  
*/  
protected $rest_client;  
 
/**  
* Name of the database  
*/  
private $dbname;  
 
/**  
* Default constructor  
*/  
function __construct($conn_url, $dbname) {  
$this->conn_url = $conn_url;  
$this->dbname = $dbname;  
$this->rest_client = SetteeRestClient::get_instance($this->conn_url);  
}  
 
 
/**  
* Get UUID from CouchDB  
*  
* @return  
* CouchDB-generated UUID string  
*  
*/  
function gen_uuid() {  
$ret = $this->rest_client->http_get('_uuids');  
return $ret['decoded']->uuids[0]; // should never be empty at this point, so no checking  
}  
 
/**  
* Create or update a document database  
*  
* @param $document  
* PHP object, a PHP associative array, or a JSON String representing the document to be saved. PHP Objects and arrays are JSON-encoded automatically.  
*  
* <p>If $document has a an "_id" property set, it will be used as document's unique id (even for "create" operation).  
* If "_id" is missing, CouchDB will be used to generate a UUID.  
*  
* <p>If $document has a "_rev" property (revision), document will be updated, rather than creating a new document.  
* You have to provide "_rev" if you want to update an existing document, otherwise operation will be assumed to be  
* one of creation and you will get a duplicate document exception from CouchDB. Also, you may not provide "_rev" but  
* not provide "_id" since that is an invalid input.  
*  
* @param $allowRevAutoDetection  
* Default: false. When true and _rev is missing from the document, save() function will auto-detect latest revision  
* for a document and use it. This option is "false" by default because it involves an extra http HEAD request and  
* therefore can make save() operation slightly slower if such auto-detection is not required.  
*  
* @return  
* document object with the database id (uuid) and revision