pagination
pagination


Former-commit-id: 65ec7105144f184adf61844dc454c637d8b350ea

[submodule "couchdb/couchdb-lucene"] [submodule "couchdb/couchdb-lucene"]
path = couchdb/couchdb-lucene path = couchdb/couchdb-lucene
url = https://github.com/rnewson/couchdb-lucene.git url = https://github.com/rnewson/couchdb-lucene.git
[submodule "couchdb/settee"]  
path = couchdb/settee  
url = https://github.com/inadarei/settee.git  
[submodule "lib/php-diff"] [submodule "lib/php-diff"]
path = lib/php-diff path = lib/php-diff
url = https://github.com/chrisboulton/php-diff.git url = https://github.com/chrisboulton/php-diff.git
[submodule "lib/Requests"] [submodule "lib/Requests"]
path = lib/Requests path = lib/Requests
url = https://github.com/rmccue/Requests.git url = https://github.com/rmccue/Requests.git
[submodule "js/flotr2"] [submodule "js/flotr2"]
path = js/flotr2 path = js/flotr2
url = https://github.com/HumbleSoftware/Flotr2.git url = https://github.com/HumbleSoftware/Flotr2.git
[submodule "lib/phpquery"] [submodule "lib/phpquery"]
path = lib/phpquery path = lib/phpquery
url = https://github.com/TobiaszCudnik/phpquery.git url = https://github.com/TobiaszCudnik/phpquery.git
[submodule "js/sigma"] [submodule "js/sigma"]
path = js/sigma path = js/sigma
url = https://github.com/jacomyal/sigma.js.git url = https://github.com/jacomyal/sigma.js.git
[submodule "js/bubbletree"] [submodule "js/bubbletree"]
path = js/bubbletree path = js/bubbletree
url = https://github.com/okfn/bubbletree.git url = https://github.com/okfn/bubbletree.git
[submodule "lib/querypath"] [submodule "lib/querypath"]
path = lib/querypath path = lib/querypath
url = https://github.com/technosophos/querypath.git url = https://github.com/technosophos/querypath.git
[submodule "lib/amon-php"] [submodule "lib/amon-php"]
path = lib/amon-php path = lib/amon-php
url = https://github.com/martinrusev/amon-php.git url = https://github.com/martinrusev/amon-php.git
[submodule "documents/lib/parsedatetime"] [submodule "documents/lib/parsedatetime"]
path = documents/lib/parsedatetime path = documents/lib/parsedatetime
url = git://github.com/bear/parsedatetime.git url = git://github.com/bear/parsedatetime.git
  [submodule "lib/FeedWriter"]
  path = lib/FeedWriter
  url = https://github.com/mibe/FeedWriter
   
<?php <?php
   
include_once("../include/common.inc.php"); include_once("../include/common.inc.php");
   
$format = "csv"; $format = "csv";
//$format = "json"; //$format = "json";
if (isset($_REQUEST['format'])) $format = $_REQUEST['format']; if (isset($_REQUEST['format']))
  $format = $_REQUEST['format'];
setlocale(LC_CTYPE, 'C'); setlocale(LC_CTYPE, 'C');
if ($format == "csv") { if ($format == "csv") {
$headers = Array("name"); $headers = Array("name");
} else { } else {
$headers = Array(); $headers = Array();
} }
   
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
try { try {
$rows = $db->get_view("app", "all", null, true)->rows; $rows = $db->get_view("app", "all", null, true)->rows;
   
$dataValues = Array(); $dataValues = Array();
foreach ($rows as $row) { foreach ($rows as $row) {
if (isset($row->value->statistics->employees)) { if (isset($row->value->statistics->employees)) {
   
$headers = array_unique(array_merge($headers, array_keys(object_to_array($row->value->statistics->employees)))); $headers = array_unique(array_merge($headers, array_keys(object_to_array($row->value->statistics->employees))));
   
} }
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
   
$fp = fopen('php://output', 'w'); $fp = fopen('php://output', 'w');
if ($fp && $db) { if ($fp && $db) {
if ($format == "csv") { if ($format == "csv") {
header('Content-Type: text/csv; charset=utf-8'); header('Content-Type: text/csv; charset=utf-8');
header('Content-Disposition: attachment; filename="export.employeestats.' . date("c") . '.csv"'); header('Content-Disposition: attachment; filename="export.employeestats.' . date("c") . '.csv"');
} }
header('Pragma: no-cache'); header('Pragma: no-cache');
header('Expires: 0'); header('Expires: 0');
if ($format == "csv") { if ($format == "csv") {
fputcsv($fp, $headers); fputcsv($fp, $headers);
} else if ($format == "json") { } else if ($format == "json") {
echo '{ echo '{
"labels" : ["' . implode('","', $headers) . '"],'.PHP_EOL; "labels" : ["' . implode('","', $headers) . '"],' . PHP_EOL;
} }
try { try {
$agencies = $db->get_view("app", "all", null, true)->rows; $agencies = $db->get_view("app", "all", null, true)->rows;
//print_r($agencies); //print_r($agencies);
$first = true; $first = true;
if ($format == "json") { if ($format == "json") {
echo '"data" : ['.PHP_EOL; echo '"data" : [' . PHP_EOL;
   
} }
foreach ($agencies as $agency) { foreach ($agencies as $agency) {
   
if (isset($agency->value->statistics->employees)) { if (isset($agency->value->statistics->employees)) {
$row = Array(); $row = Array();
$agencyEmployeesArray = object_to_array($agency->value->statistics->employees); $agencyEmployeesArray = object_to_array($agency->value->statistics->employees);
foreach ($headers as $i => $fieldName) { foreach ($headers as $i => $fieldName) {
  if ($format == "csv") {
  if (isset($agencyEmployeesArray[$fieldName])) {
  $row[] = $agencyEmployeesArray[$fieldName]["value"] ;
  } else if ($i == 0) {
  $row[] = $agency->value->name;
  } else {
  $row[] = 0;
  }
  } else if ($format == "json") {
if (isset($agencyEmployeesArray[$fieldName])) { if (isset($agencyEmployeesArray[$fieldName])) {
$row[] = '['.$i.','.$agencyEmployeesArray[$fieldName]["value"].']'; $row[] = '[' . $i . ',' . $agencyEmployeesArray[$fieldName]["value"] . ']';
} else { } else {
$row[] = '['.$i.',0]'; $row[] = '[' . $i . ',0]';
} }
  }
} }
if ($format == "csv") { if ($format == "csv") {
fputcsv($fp, array_values($row)); fputcsv($fp, array_values($row));
} else if ($format == "json") { } else if ($format == "json") {
if (!$first) echo ","; if (!$first)
echo '{"data" : [' . implode(",", array_values($row)) . '], "label": "'.$agency->value->name.'", "lines" : { "show" : true }, "points" : { "show" : true }}'.PHP_EOL; echo ",";
  echo '{"data" : [' . implode(",", array_values($row)) . '], "label": "' . $agency->value->name . '", "lines" : { "show" : true }, "points" : { "show" : true }}' . PHP_EOL;
$first = false; $first = false;
} }
} }
} }
if ($format == "json") { if ($format == "json") {
echo '] echo ']
}'.PHP_EOL; }' . PHP_EOL;
   
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
   
die; die;
} }
?> ?>
   
<?php <?php
   
require_once '../include/common.inc.php'; require_once '../include/common.inc.php';
   
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
$rows = $db->get_view("app", "byName")->rows; $rows = $db->get_view("app", "byName")->rows;
$nametoid = Array(); $nametoid = Array();
$sums = Array(); $sums = Array();
foreach ($rows as $row) { foreach ($rows as $row) {
$nametoid[trim($row->key)] = $row->value; $nametoid[trim($row->key)] = $row->value;
} }
$employeeCSVs = Array("2002-2003" => "0203apsemployees.csv", $employeeCSVs = Array("2002-2003" => "0203apsemployees.csv",
"2003-2004" => "0304apsemployees.csv", "2003-2004" => "0304apsemployees.csv",
"2004-2005" => "0405apsemployees.csv", "2004-2005" => "0405apsemployees.csv",
"2005-2006" => "0506apsemployees.csv", "2005-2006" => "0506apsemployees.csv",
"2006-2007" => "0607apsemployees.csv", "2006-2007" => "0607apsemployees.csv",
"2007-2008" => "0708apsemployees.csv", "2007-2008" => "0708apsemployees.csv",
"2008-2009" => "0809apsemployees.csv", "2008-2009" => "0809apsemployees.csv",
"2009-2010" => "0910apsemployees.csv", "2009-2010" => "0910apsemployees.csv",
"2010-2011" => "1011apsemployees.csv" "2010-2011" => "1011apsemployees.csv"
); );
foreach ($employeeCSVs as $timePeriod => $employeeCSV) { foreach ($employeeCSVs as $timePeriod => $employeeCSV) {
echo $employeeCSV . "<br>" . PHP_EOL; echo $employeeCSV . "<br>" . PHP_EOL;
$row = 1; $row = 1;
if (($handle = fopen($employeeCSV, "r")) !== FALSE) { if (($handle = fopen($employeeCSV, "r")) !== FALSE) {
while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) {
//print_r($data); //print_r($data);
$name = trim($data[0]); $name = trim($data[0]);
if (isset($nametoid[$name])) { if (isset($nametoid[$name])) {
$id = $nametoid[$name]; $id = $nametoid[$name];
//echo $id . "<br>" . PHP_EOL; //echo $id . "<br>" . PHP_EOL;
@$sums[$id][$timePeriod] += $data[1]; @$sums[$id][$timePeriod] += $data[1];
} else { } else {
echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL; echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL;
   
die(); die();
} }
} }
fclose($handle); fclose($handle);
} }
} }
foreach ($sums as $id => $sum) { foreach ($sums as $id => $sum) {
echo $id . "<br>" . PHP_EOL; echo $id . "<br>" . PHP_EOL;
$doc = $db->get($id); $doc = $db->get($id);
echo $doc->name . "<br>" . PHP_EOL; echo $doc->name . "<br>" . PHP_EOL;
// print_r($doc); // print_r($doc);
$changed = false; $changed = false;
if (!isset($doc->statistics)) { if (!isset($doc->statistics)) {
$changed = true; $changed = true;
$doc->statistics = Array(); $doc->statistics = new stdClass();
  }
  if (!isset($doc->statistics->employees)) {
  $changed = true;
  $doc->statistics->employees = new stdClass();
} }
foreach ($sum as $timePeriod => $value) { foreach ($sum as $timePeriod => $value) {
if (!isset($doc->statistics->employees->$timePeriod->value) if (!isset($doc->statistics->employees->$timePeriod->value)
|| $doc->statistics->employees->$timePeriod->value != $value) { || $doc->statistics->employees->$timePeriod->value != $value) {
$changed = true; $changed = true;
$doc->statistics["employees"][$timePeriod] = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/"); $doc->statistics->employees->$timePeriod = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
} }
} }
if ($changed) { if ($changed) {
$db->save($doc); $db->save($doc);
} else { } else {
echo "not changed" . "<br>" . PHP_EOL; echo "not changed" . "<br>" . PHP_EOL;
} }
} }
// employees: timeperiod, source = apsc state of service, value // employees: timeperiod, source = apsc state of service, value
?> ?>
   
  <?php
 
  require_once '../include/common.inc.php';
  require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
  $db = $server->get_db('disclosr-agencies');
  $rows = $db->get_view("app", "byName")->rows;
  $nametoid = Array();
  $sums = Array();
  $functions = Array();
  foreach ($rows as $row) {
  $nametoid[trim($row->key)] = $row->value;
  }
 
 
  $request = Requests::get("http://www.apsc.gov.au/publications-and-media/parliamentary/state-of-the-service/new-sosr/appendix-2-aps-agencies");
  $doc = phpQuery::newDocumentHTML($request->body);
  phpQuery::selectDocument($doc);
  foreach (pq('tr')->elements as $tr) {
  //echo $tr->nodeValue.PHP_EOL;
  $agency = "";
  $employees = "";
  $function = "";
  $i = 0;
  foreach ($tr->childNodes as $td) {
  //echo $td->nodeValue." $i <br>";
  if ($i == 0)
  $agency = $td->nodeValue;
  if ($i == 2) {
  $employees = trim(str_replace(",", "", $td->nodeValue));
  }
  if ($i == 4) {
  $function = $td->nodeValue;
  }
  $i++;
  }
  if ($agency != "" && $employees != "" && $function != "") {
  $name = trim(str_replace('2','',$agency));
  //echo "$name<br><bR>" . PHP_EOL;
  if (isset($nametoid[$name])) {
  $id = $nametoid[$name];
  //echo $id . "<br>" . PHP_EOL;
  @$sums[$id]["2011-2012"] += $employees;
  $functions[$id] = $function;
  } else if ($agency != "Agency"){
  echo "<br>ERROR NAME '$agency' MISSING FROM ID LIST<br><bR>" . PHP_EOL;
 
  die();
  }
  } else {
  echo "skipped $agency";
  }
  }
  //print_r($sums);
  foreach ($sums as $id => $sum) {
  echo $id . "<br>" . PHP_EOL;
  $doc = $db->get($id);
  echo $doc->name . "<br>" . PHP_EOL;
  // print_r($doc);
  $changed = false;
  if (!isset($doc->statistics)) {
  $changed = true;
  $doc->statistics = new stdClass();
  }
  if (!isset($doc->statistics->employees)) {
  $changed = true;
  $doc->statistics->employees = new stdClass();
  }
  foreach ($sum as $timePeriod => $value) {
  if (!isset($doc->statistics->employees->$timePeriod->value)
  || $doc->statistics->employees->$timePeriod->value != $value) {
  $changed = true;
  $doc->statistics->employees->$timePeriod = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
  $doc->employees = $value;
  $doc->functionClassification = $functions[$id];
  }
  }
 
  if ($changed) {
  $db->save($doc);
  } else {
  echo "not changed" . "<br>" . PHP_EOL;
  }
  }
  // employees: timeperiod, source = apsc state of service, value
  ?>
 
<?php <?php
   
require_once '../include/common.inc.php'; require_once '../include/common.inc.php';
//function createFOIDocumentsDesignDoc() { //function createFOIDocumentsDesignDoc() {
   
$foidb = $server->get_db('disclosr-foidocuments'); $foidb = $server->get_db('disclosr-foidocuments');
$obj = new stdClass(); $obj = new stdClass();
$obj->_id = "_design/" . urlencode("app"); $obj->_id = "_design/" . urlencode("app");
$obj->language = "javascript"; $obj->language = "javascript";
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; $obj->views->all->map = "function(doc) { emit(doc._id, doc); };";
$obj->views->byDate->map = "function(doc) { emit(doc.date, doc); };"; $obj->views->byDate->map = "function(doc) { emit(doc.date, doc); };";
$obj->views->byDate->reduce = "_count"; $obj->views->byDate->reduce = "_count";
$obj->views->byAgencyID->map = "function(doc) { emit(doc.agencyID, doc); };"; $obj->views->byDateMonthYear->map = "function(doc) { emit(doc.date, doc); };";
$obj->views->byAgencyID->reduce = "_count"; $obj->views->byDateMonthYear->reduce = "_count";
  $obj->views->byAgencyID->map = "function(doc) { emit(doc.agencyID, doc); };";
  $obj->views->byAgencyID->reduce = "_count";
   
// allow safe updates (even if slightly slower due to extra: rev-detection check). // allow safe updates (even if slightly slower due to extra: rev-detection check).
$foidb->save($obj, true); $foidb->save($obj, true);
   
   
function createDocumentsDesignDoc() { //function createDocumentsDesignDoc() {
/* $docdb = $server->get_db('disclosr-documents');
global $db;  
$obj = new stdClass(); $obj = new stdClass();
$obj->_id = "_design/" . urlencode("app"); $obj->_id = "_design/" . urlencode("app");
$obj->language = "javascript"; $obj->language = "javascript";
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; $obj->views->web_server->map = "function(doc) {\n emit(doc.web_server, 1);\n}";
$obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };"; $obj->views->web_server->reduce = "function (key, values, rereduce) {\n return sum(values);\n}";
"views": { $obj->views->byAgency->map = "function(doc) {\n emit(doc.agencyID, 1);\n}";
"web_server": { $obj->views->byAgency->reduce = "function (key, values, rereduce) {\n return sum(values);\n}";
"map": "function(doc) {\n emit(doc.web_server, 1);\n}", $obj->views->byURL->map = "function(doc) {\n emit(doc.url, doc);\n}";
"reduce": "function (key, values, rereduce) {\n return sum(values);\n}" $obj->views->agency->map = "function(doc) {\n emit(doc.agencyID, doc);\n}";
}, $obj->views->byWebServer->map = "function(doc) {\n emit(doc.web_server, doc);\n}";
"byAgency": { $obj->views->getValidationRequired = "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}";
"map": "function(doc) {\n emit(doc.agencyID, 1);\n}",  
"reduce": "function (key, values, rereduce) {\n return sum(values);\n}"  
},  
"byURL": {  
"map": "function(doc) {\n emit(doc.url, doc);\n}"  
},  
"agency": {  
"map": "function(doc) {\n emit(doc.agencyID, doc);\n}"  
},  
"byWebServer": {  
"map": "function(doc) {\n emit(doc.web_server, doc);\n}"  
},  
"getValidationRequired": {  
"map": "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}"  
}  
} */  
}  
   
//function createAgencyDesignDoc() { //function createAgencyDesignDoc() {
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
$obj = new stdClass(); $obj = new stdClass();
$obj->_id = "_design/" . urlencode("app"); $obj->_id = "_design/" . urlencode("app");
$obj->language = "javascript"; $obj->language = "javascript";
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; $obj->views->all->map = "function(doc) { emit(doc._id, doc); };";
$obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };"; $obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };";
$obj->views->byCanonicalName->map = "function(doc) { $obj->views->byCanonicalName->map = "function(doc) {
if (doc.parentOrg || doc.orgType == 'FMA-DepartmentOfState') { if (doc.parentOrg || doc.orgType == 'FMA-DepartmentOfState') {
emit(doc.name, doc); emit(doc.name, doc);
} }
};"; };";
$obj->views->byDeptStateName->map = "function(doc) { $obj->views->byDeptStateName->map = "function(doc) {
if (doc.orgType == 'FMA-DepartmentOfState') { if (doc.orgType == 'FMA-DepartmentOfState') {
emit(doc.name, doc._id); emit(doc.name, doc._id);
} }
};"; };";
$obj->views->parentOrgs->map = "function(doc) { $obj->views->parentOrgs->map = "function(doc) {
if (doc.parentOrg) { if (doc.parentOrg) {
emit(doc._id, doc.parentOrg); emit(doc._id, doc.parentOrg);
} }
};"; };";
$obj->views->byName->map = 'function(doc) { $obj->views->byName->map = 'function(doc) {
if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") { if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") {
emit(doc.name, doc._id); emit(doc.name, doc._id);
if (typeof(doc.shortName) != "undefined" && doc.shortName != doc.name) { if (typeof(doc.shortName) != "undefined" && doc.shortName != doc.name) {
emit(doc.shortName, doc._id); emit(doc.shortName, doc._id);
} }
for (name in doc.otherNames) { for (name in doc.otherNames) {
if (doc.otherNames[name] != "" && doc.otherNames[name] != doc.name) { if (doc.otherNames[name] != "" && doc.otherNames[name] != doc.name) {
emit(doc.otherNames[name], doc._id); emit(doc.otherNames[name], doc._id);
} }
} }
for (name in doc.foiBodies) { for (name in doc.foiBodies) {
if (doc.foiBodies[name] != "" && doc.foiBodies[name] != doc.name) { if (doc.foiBodies[name] != "" && doc.foiBodies[name] != doc.name) {
emit(doc.foiBodies[name], doc._id); emit(doc.foiBodies[name], doc._id);
} }
} }
for (name in doc.positions) { for (name in doc.positions) {
if (doc.positions[name] != "" && doc.positions[name] != doc.name) { if (doc.positions[name] != "" && doc.positions[name] != doc.name) {
emit(doc.positions[name], doc._id); emit(doc.positions[name], doc._id);
} }
} }
} }
};'; };';
   
$obj->views->foiEmails->map = "function(doc) { $obj->views->foiEmails->map = "function(doc) {
emit(doc._id, doc.foiEmail); emit(doc._id, doc.foiEmail);
};"; };";
   
$obj->views->byLastModified->map = "function(doc) { emit(doc.metadata.lastModified, doc); }"; $obj->views->byLastModified->map = "function(doc) { emit(doc.metadata.lastModified, doc); }";
$obj->views->getActive->map = 'function(doc) { if (doc.status == "active") { emit(doc._id, doc); } };'; $obj->views->getActive->map = 'function(doc) { if (doc.status == "active") { emit(doc._id, doc); } };';
$obj->views->getSuspended->map = 'function(doc) { if (doc.status == "suspended") { emit(doc._id, doc); } };'; $obj->views->getSuspended->map = 'function(doc) { if (doc.status == "suspended") { emit(doc._id, doc); } };';
$obj->views->getScrapeRequired->map = "function(doc) { $obj->views->getScrapeRequired->map = "function(doc) {
   
var lastScrape = Date.parse(doc.metadata.lastScraped); var lastScrape = Date.parse(doc.metadata.lastScraped);
   
var today = new Date(); var today = new Date();
   
if (!lastScrape || lastScrape.getTime() + 1000 != today.getTime()) { if (!lastScrape || lastScrape.getTime() + 1000 != today.getTime()) {
emit(doc._id, doc); emit(doc._id, doc);
} }
   
};"; };";
$obj->views->showNamesABNs->map = "function(doc) { emit(doc._id, {name: doc.name, abn: doc.abn}); };"; $obj->views->showNamesABNs->map = "function(doc) { emit(doc._id, {name: doc.name, abn: doc.abn}); };";
$obj->views->getConflicts->map = "function(doc) { $obj->views->getConflicts->map = "function(doc) {
if (doc._conflicts) { if (doc._conflicts) {
emit(null, [doc._rev].concat(doc._conflicts)); emit(null, [doc._rev].concat(doc._conflicts));
} }
}"; }";
// http://stackoverflow.com/questions/646628/javascript-startswith // http://stackoverflow.com/questions/646628/javascript-startswith
$obj->views->score->map = 'if(!String.prototype.startsWith){ $obj->views->score->map = 'if(!String.prototype.startsWith){
String.prototype.startsWith = function (str) { String.prototype.startsWith = function (str) {
return !this.indexOf(str); return !this.indexOf(str);
} }
} }
   
function(doc) { function(doc) {
count = 0; count = 0;
if (doc["status"] != "suspended") { if (doc["status"] != "suspended") {
for(var propName in doc) { for(var propName in doc) {
if(typeof(doc[propName]) != "undefined" && doc[propName] != "") { if(typeof(doc[propName]) != "undefined" && doc[propName] != "") {
count++; count++;
} }
} }
portfolio = doc.parentOrg; portfolio = doc.parentOrg;
if (doc.orgType == "FMA-DepartmentOfState") { if (doc.orgType == "FMA-DepartmentOfState") {
portfolio = doc._id; portfolio = doc._id;
} }
if (doc.orgType == "Court-Commonwealth" || doc.orgType == "FMA-DepartmentOfParliament") { if (doc.orgType == "Court-Commonwealth" || doc.orgType == "FMA-DepartmentOfParliament") {
portfolio = doc.orgType; portfolio = doc.orgType;
} }
emit(count+doc._id, {id:doc._id, name: doc.name, score:count, orgType: doc.orgType, portfolio:portfolio}); emit(count+doc._id, {id:doc._id, name: doc.name, score:count, orgType: doc.orgType, portfolio:portfolio});
} }
}'; }';
$obj->views->scoreHas->map = 'if(!String.prototype.startsWith){ $obj->views->scoreHas->map = 'if(!String.prototype.startsWith){
String.prototype.startsWith = function (str) { String.prototype.startsWith = function (str) {
return !this.indexOf(str); return !this.indexOf(str);
} }
} }
if(!String.prototype.endsWith){ if(!String.prototype.endsWith){
String.prototype.endsWith = function(suffix) { String.prototype.endsWith = function(suffix) {
    return this.indexOf(suffix, this.length - suffix.length) !== -1;     return this.indexOf(suffix, this.length - suffix.length) !== -1;
}; };
} }
function(doc) { function(doc) {
if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") { if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") {
for(var propName in doc) { for(var propName in doc) {
if(typeof(doc[propName]) != "undefined" && (propName.startsWith("has") || propName.endsWith("URL"))) { if(typeof(doc[propName]) != "undefined" && (propName.startsWith("has") || propName.endsWith("URL"))) {
emit(propName, 1); emit(propName, 1);
} }
} }
emit("total", 1); emit("total", 1);
} }
}'; }';
$obj->views->scoreHas->reduce = 'function (key, values, rereduce) { $obj->views->scoreHas->reduce = 'function (key, values, rereduce) {
return sum(values); return sum(values);
}'; }';
$obj->views->fieldNames->map = ' $obj->views->fieldNames->map = '
function(doc) { function(doc) {
for(var propName in doc) { for(var propName in doc) {
emit(propName, doc._id); emit(propName, doc._id);
} }
}'; }';
$obj->views->fieldNames->reduce = 'function (key, values, rereduce) { $obj->views->fieldNames->reduce = 'function (key, values, rereduce) {
return values.length; return values.length;
}'; }';
// allow safe updates (even if slightly slower due to extra: rev-detection check). // allow safe updates (even if slightly slower due to extra: rev-detection check).
$db->save($obj, true); $db->save($obj, true);
   
   
?> ?>
   
<?php  
 
/**  
* Databaase class.  
*/  
class SetteeDatabase {  
 
/**  
* Base URL of the CouchDB REST API  
*/  
private $conn_url;  
 
/**  
* HTTP REST Client instance  
*/  
protected $rest_client;  
 
/**  
* Name of the database  
*/  
private $dbname;  
 
/**  
* Default constructor  
*/  
function __construct($conn_url, $dbname) {  
$this->conn_url = $conn_url;  
$this->dbname = $dbname;  
$this->rest_client = SetteeRestClient::get_instance($this->conn_url);  
}  
 
 
/**  
* Get UUID from CouchDB  
*  
* @return  
* CouchDB-generated UUID string  
*  
*/  
function gen_uuid() {  
$ret = $this->rest_client->http_get('_uuids');  
return $ret['decoded']->uuids[0]; // should never be empty at this point, so no checking  
}  
 
/**  
* Create or update a document database  
*  
* @param $document  
* PHP object, a PHP associative array, or a JSON String representing the document to be saved. PHP Objects and arrays are JSON-encoded automatically.  
*  
* <p>If $document has a an "_id" property set, it will be used as document's unique id (even for "create" operation).  
* If "_id" is missing, CouchDB will be used to generate a UUID.  
*  
* <p>If $document has a "_rev" property (revision), document will be updated, rather than creating a new document.  
* You have to provide "_rev" if you want to update an existing document, otherwise operation will be assumed to be  
* one of creation and you will get a duplicate document exception from CouchDB. Also, you may not provide "_rev" but  
* not provide "_id" since that is an invalid input.  
*  
* @param $allowRevAutoDetection  
* Default: false. When true and _rev is missing from the document, save() function will auto-detect latest revision  
* for a document and use it. This option is "false" by default because it involves an extra http HEAD request and  
* therefore can make save() operation slightly slower if such auto-detection is not required.  
*  
* @return  
* document object with the database id (uuid) and revision attached;  
*  
* @throws SetteeCreateDatabaseException  
*/  
function save($document, $allowRevAutoDetection = false) {  
if (is_string($document)) {  
$document = json_decode($document);  
}  
 
// Allow passing of $document as an array (for syntactic simplicity and also because in JSON world it does not matter)  
if(is_array($document)) {  
$document = (object) $document;  
}  
 
if (empty($document->_id) && empty($document->_rev)) {  
$id = $this->gen_uuid();  
}  
elseif (empty($document->_id) && !empty($document->_rev)) {  
throw new SetteeWrongInputException("Error: You can not save a document with a revision provided, but missing id");  
}  
else {  
$id = $document->_id;  
 
if ($allowRevAutoDetection) {  
try {  
$rev = $this->get_rev($id);  
} catch (SetteeRestClientException $e) {  
// auto-detection may fail legitimately, if a document has never been saved before (new doc), so skipping error  
}  
if (!empty($rev)) {  
$document->_rev = $rev;  
}  
}  
}  
 
$full_uri = $this->dbname . "/" . $this->safe_urlencode($id);  
$document_json = json_encode($document, JSON_NUMERIC_CHECK);  
 
$ret = $this->rest_client->http_put($full_uri, $document_json);  
 
$document->_id = $ret['decoded']->id;  
$document->_rev = $ret['decoded']->rev;  
 
return $document;  
}  
 
/**  
* @param $doc  
* @param $name  
* @param $content  
* Content of the attachment in a string-buffer format. This function will automatically base64-encode content for  
* you, so you don't have to do it.  
* @param $mime_type  
* Optional. Will be auto-detected if not provided  
* @return void  
*/  
public function add_attachment($doc, $name, $content, $mime_type = null) {  
if (empty($doc->_attachments) || !is_object($doc->_attachments)) {  
$doc->_attachments = new stdClass();  
}  
 
if (empty($mime_type)) {  
$mime_type = $this->rest_client->content_mime_type($content);  
}  
 
$doc->_attachments->$name = new stdClass();  
$doc->_attachments->$name->content_type = $mime_type;  
$doc->_attachments->$name->data = base64_encode($content);  
}  
 
/**  
* @param $doc  
* @param $name  
* @param $file  
* Full path to a file (e.g. as returned by PHP's realpath function).  
* @param $mime_type  
* Optional. Will be auto-detected if not provided  
* @return void  
*/  
public function add_attachment_file($doc, $name, $file, $mime_type = null) {  
$content = file_get_contents($file);  
$this->add_attachment($doc, $name, $content, $mime_type);  
}  
 
/**  
*  
* Retrieve a document from CouchDB  
*  
* @throws SetteeWrongInputException  
*  
* @param $id  
* Unique ID (usually: UUID) of the document to be retrieved.  
* @return  
* database document in PHP object format.  
*/  
function get($id) {  
if (empty($id)) {  
throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid.");  
}  
 
$full_uri = $this->dbname . "/" . $this->safe_urlencode($id);  
$full_uri = str_replace("%3Frev%3D","?rev=",$full_uri);  
$ret = $this->rest_client->http_get($full_uri);  
return $ret['decoded'];  
}  
 
/**  
*  
* Get the latest revision of a document with document id: $id in CouchDB.  
*  
* @throws SetteeWrongInputException  
*  
* @param $id  
* Unique ID (usually: UUID) of the document to be retrieved.  
* @return  
* database document in PHP object format.  
*/  
function get_rev($id) {  
if (empty($id)) {  
throw new SetteeWrongInputException("Error: Can't query a document without a uuid.");  
}  
 
$full_uri = $this->dbname . "/" . $this->safe_urlencode($id);  
$headers = $this->rest_client->http_head($full_uri);  
if (empty($headers['Etag'])) {  
throw new SetteeRestClientException("Error: could not retrieve revision. Server unexpectedly returned empty Etag");  
}  
$etag = str_replace('"', '', $headers['Etag']);  
return $etag;  
}  
 
/**  
* Delete a document  
*  
* @param $document  
* a PHP object or JSON representation of the document that has _id and _rev fields.  
*  
* @return void  
*/  
function delete($document) {  
if (!is_object($document)) {  
$document = json_decode($document);  
}  
 
$full_uri = $this->dbname . "/" . $this->safe_urlencode($document->_id) . "?rev=" . $document->_rev;  
$this->rest_client->http_delete($full_uri);  
}  
 
 
/*----------------- View-related functions --------------*/  
 
/**  
* Create a new view or update an existing one.  
*  
* @param $design_doc  
* @param $view_name  
* @param $map_src  
* Source code of the map function in Javascript  
* @param $reduce_src  
* Source code of the reduce function in Javascript (optional)  
* @return void  
*/  
function save_view($design_doc, $view_name, $map_src, $reduce_src = null) {  
$obj = new stdClass();  
$obj->_id = "_design/" . urlencode($design_doc);  
$view_name = urlencode($view_name);  
$obj->views->$view_name->map = $map_src;  
if (!empty($reduce_src)) {  
$obj->views->$view_name->reduce = $reduce_src;  
}  
 
// allow safe updates (even if slightly slower due to extra: rev-detection check).  
return $this->save($obj, true);  
}  
 
/**  
* Create a new view or update an existing one.  
*  
* @param $design_doc  
* @param $view_name  
* @param $key  
* key parameter to a view. Can be a single value or an array (for a range). If passed an array, function assumes  
* that first element is startkey, second: endkey.  
* @param $descending  
* return results in descending order. Please don't forget that if you are using a startkey/endkey, when you change  
* order you also need to swap startkey and endkey values!  
*  
* @return void  
*/  
function get_view($design_doc, $view_name, $key = null, $descending = false) {  
$id = "_design/" . urlencode($design_doc);  
$view_name = urlencode($view_name);  
$id .= "/_view/$view_name";  
 
$data = array();  
if (!empty($key)) {  
if (is_string($key)) {  
$data = "key=" . '"' . $key . '"';  
}  
elseif (is_array($key)) {  
list($startkey, $endkey) = $key;  
$data = "startkey=" . '"' . $startkey . '"&' . "endkey=" . '"' . $endkey . '"';  
}  
 
if ($descending) {  
$data .= "&descending=true";  
}  
}  
 
 
 
if (empty($id)) {  
throw new SetteeWrongInputException("Error: Can't retrieve a document without a uuid.");  
}  
 
$full_uri = $this->dbname . "/" . $this->safe_urlencode($id);  
$full_uri = str_replace("%253Fgroup%253Dtrue","?group=true",$full_uri);  
$ret = $this->rest_client->http_get($full_uri, $data);  
return $ret['decoded'];  
 
}  
 
/**  
* @param $id  
* @return  
* return a properly url-encoded id.  
*/  
private function safe_urlencode($id) {  
//-- System views like _design can have "/" in their URLs.  
$id = rawurlencode($id);  
if (substr($id, 0, 1) == '_') {  
$id = str_replace('%2F', '/', $id);  
}  
return $id;  
}  
 
/** Getter for a database name */  
function get_name() {  
return $this->dbname;  
}  
 
}  
 
  *.pyc
<?php <?php
   
include('template.inc.php'); include('template.inc.php');
include_header_documents(""); include_header_documents("");
include_once('../include/common.inc.php'); include_once('../include/common.inc.php');
   
echo "<table> echo "<table>
<tr><th>Agency Name</th><th>Disclosure Log URL recorded?</th><th>Do we monitor this URL?</th></tr>"; <tr><th>Agency Name</th><th>Disclosure Log URL recorded?</th><th>Do we monitor this URL?</th></tr>";
$agenciesdb = $server->get_db('disclosr-agencies'); $agenciesdb = $server->get_db('disclosr-agencies');
$docsdb = $server->get_db('disclosr-documents'); $docsdb = $server->get_db('disclosr-documents');
$agencies = 0; $agencies = 0;
$disclogs = 0; $disclogs = 0;
$red = 0; $red = 0;
$green = 0; $green = 0;
  $yellow = 0;
$orange = 0; $orange = 0;
try { try {
$rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows; $rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows;
   
   
if ($rows) { if ($rows) {
foreach ($rows as $row) { foreach ($rows as $row) {
  if ((!isset($row->value->status) || $row->value->status != "suspended") && isset($row->value->foiEmail)) {
  echo "<tr><td>";
  if (isset($row->value->website)) echo "<a href='" . $row->value->website . "'>";
  echo "<b>" . $row->value->name . "</b>";
  if (isset($row->value->website)) echo "</a>";
  if ($ENV == "DEV")
  echo "<br>(" . $row->id . ")";
  echo "</td>\n";
  $agencies++;
   
echo "<tr><td><b>" . $row->value->name . "</b>"; echo "<td>";
if ($ENV == "DEV") if (isset($row->value->FOIDocumentsURL)) {
echo "<br>(" . $row->id . ")"; $disclogs++;
echo "</td>\n"; echo '<a href="' . $row->value->FOIDocumentsURL . '">'
$agencies++; . $row->value->FOIDocumentsURL . '</a>';
  if ($ENV == "DEV")
echo "<td>"; echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">'
if (isset($row->value->FOIDocumentsURL)) { . 'view local copy</a>)</small>';
$disclogs++; } else {
echo '<a href="' . $row->value->FOIDocumentsURL . '">' echo "<font color='red'><abbr title='No'>✘</abbr></font>";
. $row->value->FOIDocumentsURL . '</a>'; }
if ($ENV == "DEV") echo "</td>\n<td>";
echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">' if (isset($row->value->FOIDocumentsURL)) {
. 'view local copy</a>)</small>'; if (file_exists("./scrapers/" . $row->id . '.py')) {
} else { echo "<font color='green'><abbr title='Yes'>✔</abbr></font>";
echo "<font color='red'>✘</font>"; $green++;
  } else if (file_exists("./scrapers/" . $row->id . '.txt')) {
  if (trim(file_get_contents("./scrapers/" . $row->id . '.txt')) == "no disclog") {
  echo "<font color='yellow'><abbr title='No log table exists at URL to scrape'><b>◎</b></abbr></font>";
  $yellow++;
  } else {
  echo file_get_contents("./scrapers/" . $row->id . '.txt');
  echo "<font color='orange'><abbr title='Work in progress'><b>▬</b></abbr></font>";
  $orange++;
  }
  } else {
  echo "<font color='red'><abbr title='No'>✘</abbr></font>";
  $red++;
  }
  }
  echo "</td></tr>\n";
} }
echo "</td>\n<td>";  
if (isset($row->value->FOIDocumentsURL)) {  
if (file_exists("./scrapers/" . $row->id . '.py')) {  
echo "<font color='green'>✔</font>";  
$green++;  
} else if (file_exists("./scrapers/" . $row->id . '.txt')) {  
echo "<font color='orange'><b>▬</b></font>";  
$orange++;  
} else {  
echo "<font color='red'>✘</font>";  
$red++;  
}  
}  
echo "</td></tr>\n";  
} }
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
echo "</table>"; echo "</table>";
echo $agencies." agencies ".(($disclogs/$agencies)*100)."% with disclosure logs, ".(($green/$disclogs)*100)."% with scrapers ".(($red/$disclogs)*100)."% without scrapers ".(($orange/$disclogs)*100)."% WIP scrapers "; echo $agencies . " agencies, " . round(($disclogs / $agencies) * 100) . "% with disclosure logs; "
  . round(($green / $disclogs) * 100) . "% logs with scrapers " . round(($red / $disclogs) * 100) . "% logs without scrapers " . round(($orange / $disclogs) * 100) . "% logs Work-In-Progress scrapers ";
   
include_footer_documents(); include_footer_documents();
?> ?>
   
import sys,os import sys,os
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
import scrape import scrape
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from time import mktime from time import mktime
import feedparser import feedparser
import abc import abc
import unicodedata, re import unicodedata, re
import dateutil import dateutil
from dateutil.parser import * from dateutil.parser import *
from datetime import * from datetime import *
  import codecs
   
class GenericDisclogScraper(object): class GenericDisclogScraper(object):
__metaclass__ = abc.ABCMeta __metaclass__ = abc.ABCMeta
agencyID = None agencyID = None
disclogURL = None disclogURL = None
def remove_control_chars(self, input): def remove_control_chars(self, input):
return "".join([i for i in input if ord(i) in range(32, 127)]) return "".join([i for i in input if ord(i) in range(32, 127)])
def getAgencyID(self): def getAgencyID(self):
""" disclosr agency id """ """ disclosr agency id """
if self.agencyID == None: if self.agencyID == None:
self.agencyID = os.path.basename(sys.argv[0]).replace(".py","") self.agencyID = os.path.basename(sys.argv[0]).replace(".py","")
return self.agencyID return self.agencyID
   
def getURL(self): def getURL(self):
""" disclog URL""" """ disclog URL"""
if self.disclogURL == None: if self.disclogURL == None:
agency = scrape.agencydb.get(self.getAgencyID()) agency = scrape.agencydb.get(self.getAgencyID())
self.disclogURL = agency['FOIDocumentsURL'] self.disclogURL = agency['FOIDocumentsURL']
return self.disclogURL return self.disclogURL
   
@abc.abstractmethod @abc.abstractmethod
def doScrape(self): def doScrape(self):
""" do the scraping """ """ do the scraping """
return return
   
@abc.abstractmethod @abc.abstractmethod
def getDescription(self, content, entry, doc): def getDescription(self, content, entry, doc):
""" get description""" """ get description"""
return return
   
   
   
class GenericRSSDisclogScraper(GenericDisclogScraper): class GenericRSSDisclogScraper(GenericDisclogScraper):
   
def doScrape(self): def doScrape(self):
foidocsdb = scrape.couch['disclosr-foidocuments'] foidocsdb = scrape.couch['disclosr-foidocuments']
(url,mime_type,content) = scrape.fetchURL(scrape.docsdb, self.getURL(), "foidocuments", self.getAgencyID()) (url,mime_type,content) = scrape.fetchURL(scrape.docsdb, self.getURL(), "foidocuments", self.getAgencyID())
feed = feedparser.parse(content) feed = feedparser.parse(content)
for entry in feed.entries: for entry in feed.entries:
#print entry #print entry
print entry.id print entry.id
hash = scrape.mkhash(entry.id) hash = scrape.mkhash(entry.id)
#print hash #print hash
doc = foidocsdb.get(hash) doc = foidocsdb.get(hash)
#print doc #print doc
if doc == None: if doc == None:
print "saving "+ hash print "saving "+ hash
edate = datetime.fromtimestamp(mktime( entry.published_parsed)).strftime("%Y-%m-%d") edate = datetime.fromtimestamp(mktime( entry.published_parsed)).strftime("%Y-%m-%d")
doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': entry.link, 'docID': entry.id, doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': entry.link, 'docID': entry.id,
"date": edate,"title": entry.title} "date": edate,"title": entry.title}
self.getDescription(entry,entry, doc) self.getDescription(entry,entry, doc)
foidocsdb.save(doc) foidocsdb.save(doc)
else: else:
print "already saved" print "already saved"
def getDescription(self, content, entry, doc): def getDescription(self, content, entry, doc):
""" get description from rss entry""" """ get description from rss entry"""
doc.update({'description': content.summary}) doc.update({'description': content.summary})
return return
   
class GenericOAICDisclogScraper(GenericDisclogScraper): class GenericOAICDisclogScraper(GenericDisclogScraper):
__metaclass__ = abc.ABCMeta __metaclass__ = abc.ABCMeta
@abc.abstractmethod @abc.abstractmethod
def getColumns(self,columns): def getColumns(self,columns):
""" rearranges columns if required """ """ rearranges columns if required """
return return
def getColumnCount(self): def getColumnCount(self):
return 5 return 5
def getDescription(self, content, entry, doc): def getDescription(self, content, entry, doc):
""" get description from rss entry""" """ get description from rss entry"""
descriptiontxt = "" descriptiontxt = ""
for string in content.stripped_strings: for string in content.stripped_strings:
descriptiontxt = descriptiontxt + " \n" + string descriptiontxt = descriptiontxt + " \n" + string
doc.update({'description': descriptiontxt}) doc.update({'description': descriptiontxt})
return return
def getTitle(self, content, entry, doc): def getTitle(self, content, entry, doc):
doc.update({'title': (''.join(content.stripped_strings))}) doc.update({'title': (''.join(content.stripped_strings))})
return return
def getTable(self, soup): def getTable(self, soup):
return soup.table return soup.table
def getRows(self, table): def getRows(self, table):
return table.find_all('tr') return table.find_all('tr')
def getDate(self, content, entry, doc): def getDate(self, content, entry, doc):
date = ''.join(content.stripped_strings).strip() date = ''.join(content.stripped_strings).strip()
date = str.replace("Octber","October",date) (a,b,c) = date.partition("(")
  date = self.remove_control_chars(a.replace("Octber","October"))
  print date
edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d") edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
print edate print edate
doc.update({'date': edate}) doc.update({'date': edate})
return return
def getLinks(self, content, entry, doc): def getLinks(self, content, entry, doc):
links = [] links = []
for atag in entry.find_all("a"): for atag in entry.find_all("a"):
if atag.has_key('href'): if atag.has_key('href'):
links.append(scrape.fullurl(content,atag['href'])) links.append(scrape.fullurl(content,atag['href']))
if links != []: if links != []:
doc.update({'links': links}) doc.update({'links': links})
return return
   
def doScrape(self): def doScrape(self):
foidocsdb = scrape.couch['disclosr-foidocuments'] foidocsdb = scrape.couch['disclosr-foidocuments']
(url,mime_type,content) = scrape.fetchURL(scrape.docsdb, self.getURL(), "foidocuments", self.getAgencyID()) (url,mime_type,content) = scrape.fetchURL(scrape.docsdb, self.getURL(), "foidocuments", self.getAgencyID())
if content != None: if content != None:
if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml": if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml":
# http://www.crummy.com/software/BeautifulSoup/documentation.html # http://www.crummy.com/software/BeautifulSoup/documentation.html
soup = BeautifulSoup(content) soup = BeautifulSoup(content)
table = self.getTable(soup) table = self.getTable(soup)
for row in self.getRows(table): for row in self.getRows(table):
columns = row.find_all('td') columns = row.find_all('td')
if len(columns) == self.getColumnCount(): if len(columns) == self.getColumnCount():
(id, date, title, description, notes) = self.getColumns(columns) (id, date, title, description, notes) = self.getColumns(columns)
print ''.join(id.stripped_strings) print self.remove_control_chars(''.join(id.stripped_strings))
if id.string == None: if id.string == None:
hash = scrape.mkhash(self.remove_control_chars(url+(''.join(date.stripped_strings)))) hash = scrape.mkhash(self.remove_control_chars(url+(''.join(date.stripped_strings))))
else: else:
hash = scrape.mkhash(self.remove_control_chars(url+(''.join(id.stripped_strings)))) hash = scrape.mkhash(self.remove_control_chars(url+(''.join(id.stripped_strings))))
doc = foidocsdb.get(hash) doc = foidocsdb.get(hash)
if doc == None: if doc == None:
print "saving " +hash print "saving " +hash
doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), 'docID': (''.join(id.stripped_strings))} doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), 'docID': (''.join(id.stripped_strings))}
self.getLinks(self.getURL(),row,doc) self.getLinks(self.getURL(),row,doc)
self.getTitle(title,row, doc) self.getTitle(title,row, doc)
self.getDate(date,row, doc) self.getDate(date,row, doc)
self.getDescription(description,row, doc) self.getDescription(description,row, doc)
if notes != None: if notes != None:
doc.update({ 'notes': (''.join(notes.stripped_strings))}) doc.update({ 'notes': (''.join(notes.stripped_strings))})
foidocsdb.save(doc) foidocsdb.save(doc)
else: else:
print "already saved "+hash print "already saved "+hash
elif len(row.find_all('th')) == self.getColumnCount(): elif len(row.find_all('th')) == self.getColumnCount():
print "header row" print "header row"
else: else:
print "ERROR number of columns incorrect" print "ERROR number of columns incorrect"
print row print row
   
<?php <?php
   
include('template.inc.php'); include('template.inc.php');
include_header_documents(""); include_header_documents("");
include_once('../include/common.inc.php'); include_once('../include/common.inc.php');
  $startkey = (isset($_REQUEST['start_key']) ? $_REQUEST['start_key'] : '9999-99-99');
?> ?>
<?php <?php
   
   
   
$agenciesdb = $server->get_db('disclosr-agencies'); $agenciesdb = $server->get_db('disclosr-agencies');
   
$idtoname = Array(); $idtoname = Array();
foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) { foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) {
$idtoname[$row->id] = trim($row->value->name); $idtoname[$row->id] = trim($row->value->name);
} }
$foidocsdb = $server->get_db('disclosr-foidocuments'); $foidocsdb = $server->get_db('disclosr-foidocuments');
try { try {
$rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99','0000-00-00'), true)->rows; $rows = $foidocsdb->get_view("app", "byDate", Array($startkey, '0000-00-00'), true, 20)->rows;
   
   
if ($rows) { if ($rows) {
foreach ($rows as $row) { foreach ($rows as $key => $row) {
displayLogEntry($row,$idtoname); echo displayLogEntry($row, $idtoname);
  $endkey = $row->key;
} }
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
  echo "<a href='?start_key=$endkey'>next page</a>";
include_footer_documents(); include_footer_documents();
?> ?>
   
# www.robotstxt.org/ # www.robotstxt.org/
# http://code.google.com/web/controlcrawlindex/ # http://code.google.com/web/controlcrawlindex/
   
User-agent: * User-agent: *
  Disallow: /admin/
  Sitemap: http://disclosurelo.gs/sitemap.xml.php
<?php <?php
   
// Agency X updated Y, new files, diff of plain text/link text, // Agency X updated Y, new files, diff of plain text/link text,
// feed for just one agency or all // feed for just one agency or all
// This is a minimum example of using the Universal Feed Generator Class // This is a minimum example of using the Universal Feed Generator Class
include("lib/FeedWriter.php"); include("../lib/FeedWriter/FeedTypes.php");
  include_once('../include/common.inc.php');
//Creating an instance of FeedWriter class. //Creating an instance of FeedWriter class.
$TestFeed = new FeedWriter(RSS2); $TestFeed = new RSS2FeedWriter();
//Setting the channel elements //Setting the channel elements
//Use wrapper functions for common channelelements //Use wrapper functions for common channelelements
$TestFeed->setTitle('Last Modified - All'); $TestFeed->setTitle('Last Modified - All');
$TestFeed->setLink('http://disclosr.lambdacomplex.org/rss.xml.php'); $TestFeed->setLink('http://disclosurelo.gs/rss.xml.php');
$TestFeed->setDescription('This is test of creating a RSS 2.0 feed Universal Feed Writer'); $TestFeed->setDescription('This is test of creating a RSS 2.0 feed Universal Feed Writer');
  $TestFeed->setChannelElement('language', 'en-us');
  $TestFeed->setChannelElement('pubDate', date(DATE_RSS, time()));
//Retriving informations from database //Retriving informations from database
$rows = $db->get_view("app", "byLastModified")->rows; $idtoname = Array();
  $agenciesdb = $server->get_db('disclosr-agencies');
  foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) {
  $idtoname[$row->id] = trim($row->value->name);
  }
  $foidocsdb = $server->get_db('disclosr-foidocuments');
  $rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99','0000-00-00'), true)->rows;
//print_r($rows); //print_r($rows);
foreach ($rows as $row) { foreach ($rows as $row) {
//Create an empty FeedItem //Create an empty FeedItem
$newItem = $TestFeed->createNewItem(); $newItem = $TestFeed->createNewItem();
//Add elements to the feed item //Add elements to the feed item
$newItem->setTitle($row['name']); $newItem->setTitle($row->value->title);
$newItem->setLink($row['id']); $newItem->setLink("view.php?id=".$row->value->docID);
$newItem->setDate(date("c", $row['metadata']['lastModified'])); $newItem->setDate(date("c", strtotime($row->value->date)));
$newItem->setDescription($row['name']); $newItem->setDescription(displayLogEntry($row,$idtoname));
  $newItem->addElement('guid', $row->value->_id,array('isPermaLink'=>'true'));
//Now add the feed item //Now add the feed item
$TestFeed->addItem($newItem); $TestFeed->addItem($newItem);
} }
//OK. Everything is done. Now genarate the feed. //OK. Everything is done. Now genarate the feed.
$TestFeed->genarateFeed(); $TestFeed->generateFeed();
?> ?>
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "cphMain_C001_Col01").table
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getTable(self,soup):
  return soup.find(id = "inner_content")
  def getColumnCount(self):
  return 2
  def getColumns(self,columns):
  (date, title) = columns
  return (date, date, title, title, None)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 3
  def getColumns(self,columns):
  (id, title, date) = columns
  return (id, date, title, title, None)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
  import dateutil
  from dateutil.parser import *
  from datetime import *
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description, notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  si = ScraperImplementation()
  si.doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 4
  def getColumns(self,columns):
  (id, date, title, description) = columns
  return (id, date, title, description, None)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 4
  def getColumns(self,columns):
  (date, title, description,notes) = columns
  return (title, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
  import dateutil
  from dateutil.parser import *
  from datetime import *
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description, notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  si = ScraperImplementation()
  si.doScrape()
  si.disclogURL = "http://www.fahcsia.gov.au/disclosure-log-2011-12-financial-year"
  si.doScrape()
  si.disclogURL = "http://www.fahcsia.gov.au/disclosure-log-2010-11-financial-year"
  si.doScrape()
 
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getTable(self,soup):
  return soup.find(id="node-30609")
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 3
  def getColumns(self,columns):
  (id, date, description) = columns
  return (id, date, description, description, None)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getTable(self,soup):
  return soup.find(id = "centercontent").table
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
  import dateutil
  from dateutil.parser import *
  from datetime import *
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description, notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  si = ScraperImplementation()
  si.doScrape()
 
  weird div based log with tables of links
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getTable(self,soup):
  return soup.find(id = "content-middle").table
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getTable(self,soup):
  return soup.find(id = "cphMain_C001_Col01").table
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 3
  def getColumns(self,columns):
  (date, title, description) = columns
  return (date, date, title, description, None)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getTitle(self, content, entry, doc):
  doc.update({'title': content.stripped_strings.next()})
  return
  def getColumnCount(self):
  return 3
  def getColumns(self,columns):
  (date, id, description) = columns
  return (id, date, description, description, None)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
  import codecs
  #http://www.doughellmann.com/PyMOTW/abc/
  class NewScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getDescription(self,content, entry,doc):
  link = None
  links = []
  description = ""
  for atag in entry.find_all('a'):
  if atag.has_key('href'):
  link = scrape.fullurl(self.getURL(),atag['href'])
  (url,mime_type,htcontent) = scrape.fetchURL(scrape.docsdb, link, "foidocuments", self.getAgencyID(), False)
  if htcontent != None:
  if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml":
  # http://www.crummy.com/software/BeautifulSoup/documentation.html
  soup = BeautifulSoup(htcontent)
  for text in soup.find(id="divFullWidthColumn").stripped_strings:
  description = description + text.encode('ascii', 'ignore')
 
  for atag in soup.find(id="divFullWidthColumn").find_all("a"):
  if atag.has_key('href'):
  links.append(scrape.fullurl(link,atag['href']))
 
  if links != []:
  doc.update({'links': links})
  if description != "":
  doc.update({ 'description': description})
 
  def getColumnCount(self):
  return 2
  def getTable(self,soup):
  return soup.find(id = "TwoColumnSorting")
  def getColumns(self,columns):
  ( title, date) = columns
  return (title, date, title, title, None)
  class OldScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getDescription(self,content, entry,doc):
  link = None
  links = []
  description = ""
  for atag in entry.find_all('a'):
  if atag.has_key('href'):
  link = scrape.fullurl(self.getURL(),atag['href'])
  (url,mime_type,htcontent) = scrape.fetchURL(scrape.docsdb, link, "foidocuments", self.getAgencyID(), False)
  if htcontent != None:
  if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml":
  # http://www.crummy.com/software/BeautifulSoup/documentation.html
  soup = BeautifulSoup(htcontent)
  for text in soup.find(id="content-item").stripped_strings:
  description = description + text + " \n"
  for atag in soup.find(id="content-item").find_all("a"):
  if atag.has_key('href'):
  links.append(scrape.fullurl(link,atag['href']))
  if links != []:
  doc.update({'links': links})
  if description != "":
  doc.update({ 'description': description})
 
  if links != []:
  doc.update({'links': links})
  if description != "":
  doc.update({ 'description': description})
 
  def getColumnCount(self):
  return 2
  def getTable(self,soup):
  return soup.find(class_ = "doc-list")
  def getColumns(self,columns):
  (date, title) = columns
  return (title, date, title, title, None)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(NewScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(NewScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  #NewScraperImplementation().doScrape()
  print 'Subclass:', issubclass(OldScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(OldScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  osi = OldScraperImplementation()
  osi.disclogURL = "http://archive.treasury.gov.au/content/foi_publications.asp?year=-1&abstract=0&classification=&=&titl=Disclosure+Log+-+Documents+Released+Under+FOI"
  osi.doScrape()
  # old site too
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
  import dateutil
  from dateutil.parser import *
  from datetime import *
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description, notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  si = ScraperImplementation()
  si.doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 4
  def getColumns(self,columns):
  (id, date, title, description) = columns
  return (id, date, title, description, None)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 4
  def getColumns(self,columns):
  (id, date,logdate, description) = columns
  return (id, date, description, description, None)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 3
  def getColumns(self,columns):
  (date, title, description) = columns
  return (date, date, title, description, None)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getTable(self,soup):
  return soup.find(id="ctl00_ContentPlaceHolderMainNoAjax_EdtrTD1494_2").table
  def getColumnCount(self):
  return 4
  def getColumns(self,columns):
  (blank,id, title,date) = columns
  return (id, date, title, title, None)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getTable(self,soup):
  return soup.find(id = "ctl00_PlaceHolderMain_Content__ControlWrapper_RichHtmlField").table
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
# does not have any disclog entries or table no disclog
   
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 4
  def getColumns(self,columns):
  (id, date, title, description) = columns
  return (id, date, title, description, None)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getTable(self,soup):
  return soup.find(summary="This table shows every FOI request to date.")
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getTable(self,soup):
  return soup.find(id = "ctl00_PlaceHolderMain_ctl01__ControlWrapper_RichHtmlField").table
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getTable(self,soup):
  return soup.find(summary="This table lists the schedule of upcoming courses.")
  def getColumnCount(self):
  return 7
  def getColumns(self,columns):
  (id, date, title, description,link,deldate,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getTable(self,soup):
  return soup.find(id="main").table
  def getColumnCount(self):
  return 7
  def getColumns(self,columns):
  (id, date, title, description,link,deldate,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 6
  def getColumns(self,columns):
  (id, date, title, description,deldate, notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 5
  def getColumns(self,columns):
  (id, date, title, description,notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getTable(self,soup):
  return soup.find(id = "inner_content")
  def getColumnCount(self):
  return 2
  def getColumns(self,columns):
  (date, title) = columns
  return (date, date, title, title, None)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  #def getTable(self,soup):
  # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
  def getColumnCount(self):
  return 4
  def getColumns(self,columns):
  (id, title, date, description) = columns
  return (id, date, title, description, None)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  import sys,os
  sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
  import genericScrapers
  import scrape
  from bs4 import BeautifulSoup
 
  #http://www.doughellmann.com/PyMOTW/abc/
  class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
  def getTable(self,soup):
  return soup.find(id = "ctl00_PlaceHolderMain_PublishingPageContent__ControlWrapper_RichHtmlField").table
  def getColumnCount(self):
  return 7
  def getColumns(self,columns):
  (id, date, title, description,link,deldate, notes) = columns
  return (id, date, title, description, notes)
 
  if __name__ == '__main__':
  print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
  print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
  ScraperImplementation().doScrape()
 
  <?php
 
  include ('../include/common.inc.php');
  $last_updated = date('Y-m-d', @filemtime('cbrfeed.zip'));
  header("Content-Type: text/xml");
  echo "<?xml version='1.0' encoding='UTF-8'?>";
  echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
  echo " <url><loc>" . local_url() . "index.php</loc><priority>1.0</priority></url>\n";
  foreach (scandir("./") as $file) {
  if (strpos($file, ".php") !== false && $file != "index.php" && $file != "sitemap.xml.php")
  echo " <url><loc>" . local_url() . "$file</loc><priority>0.6</priority></url>\n";
  }
 
  $db = $server->get_db('disclosr-foidocuments');
  try {
  $rows = $db->get_view("app", "all")->rows;
  foreach ($rows as $row) {
  echo '<url><loc>' . local_url() . 'view.php?id=' . $row->value->_id . "</loc><priority>0.3</priority></url>\n";
  }
  } catch (SetteeRestClientException $e) {
  setteErrorHandler($e);
  }
  echo '</urlset>';
  ?>
 
<?php <?php
   
function include_header_documents($title) { function include_header_documents($title) {
?> ?>
<!doctype html> <!doctype html>
<!-- paulirish.com/2008/conditional-stylesheets-vs-css-hacks-answer-neither/ --> <!-- paulirish.com/2008/conditional-stylesheets-vs-css-hacks-answer-neither/ -->
<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]--> <!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]-->
<!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8" lang="en"> <![endif]--> <!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8" lang="en"> <![endif]-->
<!--[if IE 8]> <html class="no-js lt-ie9" lang="en"> <![endif]--> <!--[if IE 8]> <html class="no-js lt-ie9" lang="en"> <![endif]-->
<!-- Consider adding a manifest.appcache: h5bp.com/d/Offline --> <!-- Consider adding a manifest.appcache: h5bp.com/d/Offline -->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en"> <!--<![endif]--> <!--[if gt IE 8]><!--> <html class="no-js" lang="en"> <!--<![endif]-->
<head> <head>
<meta charset="utf-8"> <meta charset="utf-8">
   
<!-- Use the .htaccess and remove these lines to avoid edge case issues. <!-- Use the .htaccess and remove these lines to avoid edge case issues.
More info: h5bp.com/i/378 --> More info: h5bp.com/i/378 -->
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"> <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
   
<title>Australian Disclosure Logs<?php if ($title != "") echo " - $title";?></title> <title>Australian Disclosure Logs<?php if ($title != "") echo " - $title";?></title>
<meta name="description" content=""> <meta name="description" content="">
   
<!-- Mobile viewport optimized: h5bp.com/viewport --> <!-- Mobile viewport optimized: h5bp.com/viewport -->
<meta name="viewport" content="width=device-width"> <meta name="viewport" content="width=device-width">
   
<!-- Place favicon.ico and apple-touch-icon.png in the root directory: mathiasbynens.be/notes/touch-icons --> <!-- Place favicon.ico and apple-touch-icon.png in the root directory: mathiasbynens.be/notes/touch-icons -->
<meta name="google-site-verification" content="jkknX5g2FCpQvrW030b1Nq2hyoa6mb3EDiA7kCoHNj8" /> <meta name="google-site-verification" content="jkknX5g2FCpQvrW030b1Nq2hyoa6mb3EDiA7kCoHNj8" />
   
<!-- Le styles --> <!-- Le styles -->
<link href="css/bootstrap.min.css" rel="stylesheet"> <link href="css/bootstrap.min.css" rel="stylesheet">
<style type="text/css"> <style type="text/css">
body { body {
padding-top: 60px; padding-top: 60px;
padding-bottom: 40px; padding-bottom: 40px;
} }
.sidebar-nav { .sidebar-nav {
padding: 9px 0; padding: 9px 0;
} }
</style> </style>
<link href="css/bootstrap-responsive.min.css" rel="stylesheet"> <link href="css/bootstrap-responsive.min.css" rel="stylesheet">
   
<!-- HTML5 shim, for IE6-8 support of HTML5 elements --> <!-- HTML5 shim, for IE6-8 support of HTML5 elements -->
<!--[if lt IE 9]> <!--[if lt IE 9]>
<script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script> <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
<![endif]--> <![endif]-->
<!-- More ideas for your <head> here: h5bp.com/d/head-Tips --> <!-- More ideas for your <head> here: h5bp.com/d/head-Tips -->
   
<!-- All JavaScript at the bottom, except this Modernizr build. <!-- All JavaScript at the bottom, except this Modernizr build.
Modernizr enables HTML5 elements & feature detects for optimal performance. Modernizr enables HTML5 elements & feature detects for optimal performance.
Create your own custom Modernizr build: www.modernizr.com/download/ Create your own custom Modernizr build: www.modernizr.com/download/
<script src="js/libs/modernizr-2.5.3.min.js"></script>--> <script src="js/libs/modernizr-2.5.3.min.js"></script>-->
<script src="js/jquery.js"></script> <script src="js/jquery.js"></script>
<script type="text/javascript" src="js/flotr2.min.js"></script> <script type="text/javascript" src="js/flotr2.min.js"></script>
</head> </head>
<body> <body>
<div class="navbar navbar-inverse navbar-fixed-top"> <div class="navbar navbar-inverse navbar-fixed-top">
<div class="navbar-inner"> <div class="navbar-inner">
<div class="container-fluid"> <div class="container-fluid">
<a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse"> <a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
<span class="icon-bar"></span> <span class="icon-bar"></span>
<span class="icon-bar"></span> <span class="icon-bar"></span>
<span class="icon-bar"></span> <span class="icon-bar"></span>
</a> </a>
<a class="brand" href="#">Australian Disclosure Logs</a> <a class="brand" href="#">Australian Disclosure Logs</a>
<div class="nav-collapse collapse"> <div class="nav-collapse collapse">
<p class="navbar-text pull-right"> <p class="navbar-text pull-right">
Check out our subsites on: Check out our subsites on:
<a href="http://orgs.disclosurelo.gs">Government Agencies</a> <a href="http://orgs.disclosurelo.gs">Government Agencies</a>
• <a href="http://lobbyists.disclosurelo.gs">Political Lobbyists</a> • <a href="http://lobbyists.disclosurelo.gs">Political Lobbyists</a>
• <a href="http://contracts.disclosurelo.gs">Government Contracts and Spending</a> • <a href="http://contracts.disclosurelo.gs">Government Contracts and Spending</a>
   
</p> </p>
<ul class="nav"> <ul class="nav">
<li><a href="index.php">Home</a></li> <li><a href="index.php">Home</a></li>
<li><a href="disclogsList.php">List of Disclosure Logs</a></li> <li><a href="disclogsList.php">List of Disclosure Logs</a></li>
<li><a href="about.php">About</a></li> <li><a href="about.php">About</a></li>
</ul> </ul>
</div><!--/.nav-collapse --> </div><!--/.nav-collapse -->
</div> </div>
</div> </div>
</div> </div>
<div class="container"> <div class="container">
<?php <?php
} }
function include_footer_documents() { function include_footer_documents() {
?> ?>
</div> <!-- /container --> </div> <!-- /container -->
<hr> <hr>
   
<footer> <footer>
<p>&copy; Company 2012</p> <p>&copy; Company 2012</p>
</footer> </footer>
<script type="text/javascript"> <script type="text/javascript">
   
var _gaq = _gaq || []; var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-12341040-4']); _gaq.push(['_setAccount', 'UA-12341040-4']);
_gaq.push(['_setDomainName', 'disclosurelo.gs']); _gaq.push(['_setDomainName', 'disclosurelo.gs']);
_gaq.push(['_setAllowLinker', true]); _gaq.push(['_setAllowLinker', true]);
_gaq.push(['_trackPageview']); _gaq.push(['_trackPageview']);
   
(function() { (function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true; var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})(); })();
   
</script> </script>
<!-- Le javascript <!-- Le javascript
================================================== --> ================================================== -->
<!-- Placed at the end of the document so the pages load faster --> <!-- Placed at the end of the document so the pages load faster -->
<!-- <!--
<script src="js/bootstrap-transition.js"></script> <script src="js/bootstrap-transition.js"></script>
<script src="js/bootstrap-alert.js"></script> <script src="js/bootstrap-alert.js"></script>
<script src="js/bootstrap-modal.js"></script> <script src="js/bootstrap-modal.js"></script>
<script src="js/bootstrap-dropdown.js"></script> <script src="js/bootstrap-dropdown.js"></script>
<script src="js/bootstrap-scrollspy.js"></script> <script src="js/bootstrap-scrollspy.js"></script>
<script src="js/bootstrap-tab.js"></script> <script src="js/bootstrap-tab.js"></script>
<script src="js/bootstrap-tooltip.js"></script> <script src="js/bootstrap-tooltip.js"></script>
<script src="js/bootstrap-popover.js"></script> <script src="js/bootstrap-popover.js"></script>
<script src="js/bootstrap-button.js"></script> <script src="js/bootstrap-button.js"></script>
<script src="js/bootstrap-collapse.js"></script> <script src="js/bootstrap-collapse.js"></script>
<script src="js/bootstrap-carousel.js"></script> <script src="js/bootstrap-carousel.js"></script>
<script src="js/bootstrap-typeahead.js"></script>--> <script src="js/bootstrap-typeahead.js"></script>-->
   
   
</body> </body>
</html> </html>
<?php <?php
} }
   
function displayLogEntry($row, $idtoname) { function displayLogEntry($row, $idtoname) {
echo "<div><h2>".$row->value->date.": ".$row->value->title." (".$idtoname[$row->value->agencyID].")</h2> <p>".str_replace("\n","<br>",$row->value->description); $result = "";
  $result .= "<div><h2>".$row->value->date.": ".$row->value->title." (".$idtoname[$row->value->agencyID].")</h2> <p>".str_replace("\n","<br>",$row->value->description);
if (isset($row->value->notes)) { if (isset($row->value->notes)) {
echo " <br>Note: ".$row->value->notes; $result .= " <br>Note: ".$row->value->notes;
} }
echo "</p>"; $result .= "</p>";
   
if (isset($row->value->links)){ if (isset($row->value->links)){
echo "<h3>Links/Documents</h3><ul>"; $result .= "<h3>Links/Documents</h3><ul>";
foreach ($row->value->links as $link) { foreach ($row->value->links as $link) {
echo "<li><a href='$link'>".$link."</a></li>"; $result .= "<li><a href='$link'>".$link."</a></li>";
} }
   
echo "</ul>"; $result .= "</ul>";
} }
echo "<small><A href='".$row->value->url."'>View original source...</a> ID: ".$row->value->docID."</small>"; $result .= "<small><A href='".$row->value->url."'>View original source...</a> ID: ".strip_tags($row->value->docID)."</small>";
echo"</div>"; $result .= "</div>";
  return $result;
} }
   
  <?php
  include('template.inc.php');
  include_header_documents("");
  include_once('../include/common.inc.php');
  ?>
  <?php
 
 
 
  $agenciesdb = $server->get_db('disclosr-agencies');
 
  $idtoname = Array();
  foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) {
  $idtoname[$row->id] = trim($row->value->name);
  }
  $foidocsdb = $server->get_db('disclosr-foidocuments');
  try {
  $obj = new stdClass();
  $obj->value = $foidocsdb->get($_REQUEST['id']);
  echo displayLogEntry($obj,$idtoname);
 
  } catch (SetteeRestClientException $e) {
  setteErrorHandler($e);
  }
  include_footer_documents();
  ?>
 
file:a/lib/FeedItem.php (deleted)
<?php  
/**  
* Univarsel Feed Writer  
*  
* FeedItem class - Used as feed element in FeedWriter class  
*  
* @package UnivarselFeedWriter  
* @author Anis uddin Ahmad <anisniit@gmail.com>  
* @link http://www.ajaxray.com/projects/rss  
*/  
class FeedItem  
{  
private $elements = array(); //Collection of feed elements  
private $version;  
 
/**  
* Constructor  
*  
* @param contant (RSS1/RSS2/ATOM) RSS2 is default.  
*/  
function __construct($version = RSS2)  
{  
$this->version = $version;  
}  
 
/**  
* Add an element to elements array  
*  
* @access public  
* @param srting The tag name of an element  
* @param srting The content of tag  
* @param array Attributes(if any) in 'attrName' => 'attrValue' format  
* @return void  
*/  
public function addElement($elementName, $content, $attributes = null)  
{  
$this->elements[$elementName]['name'] = $elementName;  
$this->elements[$elementName]['content'] = $content;  
$this->elements[$elementName]['attributes'] = $attributes;  
}  
 
/**  
* Set multiple feed elements from an array.  
* Elements which have attributes cannot be added by this method  
*  
* @access public  
* @param array array of elements in 'tagName' => 'tagContent' format.  
* @return void  
*/  
public function addElementArray($elementArray)  
{  
if(! is_array($elementArray)) return;  
foreach ($elementArray as $elementName => $content)  
{  
$this->addElement($elementName, $content);  
}  
}  
 
/**  
* Return the collection of elements in this feed item  
*  
* @access public  
* @return array  
*/  
public function getElements()  
{  
return $this->elements;  
}  
 
// Wrapper functions ------------------------------------------------------  
 
/**  
* Set the 'dscription' element of feed item  
*  
* @access public  
* @param string The content of 'description' element  
* @return void  
*/  
public function setDescription($description)  
{  
$tag = ($this->version == ATOM)? 'summary' : 'description';  
$this->addElement($tag, $description);  
}  
 
/**  
* @desc Set the 'title' element of feed item  
* @access public  
* @param string The content of 'title' element  
* @return void  
*/  
public function setTitle($title)  
{  
$this->addElement('title', $title);  
}  
 
/**  
* Set the 'date' element of feed item  
*  
* @access public  
* @param string The content of 'date' element  
* @return void  
*/  
public function setDate($date)  
{  
if(! is_numeric($date))  
{  
$date = strtotime($date);  
}  
 
if($this->version == ATOM)  
{  
$tag = 'updated';  
$value = date(DATE_ATOM, $date);  
}  
elseif($this->version == RSS2)  
{  
$tag = 'pubDate';  
$value = date(DATE_RSS, $date);  
}  
else  
{  
$tag = 'dc:date';  
$value = date("Y-m-d", $date);  
}  
 
$this->addElement($tag, $value);  
}  
 
/**  
* Set the 'link' element of feed item  
*  
* @access public  
* @param string The content of 'link' element  
* @return void  
*/  
public function setLink($link)  
{  
if($this->version == RSS2 || $this->version == RSS1)  
{  
$this->addElement('link', $link);  
}  
else  
{  
$this->addElement('link','',array('href'=>$link));  
$this->addElement('id', FeedWriter::uuid($link,'urn:uuid:'));  
}  
 
}  
 
/**  
* Set the 'encloser' element of feed item  
* For RSS 2.0 only  
*  
* @access public  
* @param string The url attribute of encloser tag  
* @param string The length attribute of encloser tag  
* @param string The type attribute of encloser tag  
* @return void  
*/  
public function setEncloser($url, $length, $type)  
{  
$attributes = array('url'=>$url, 'length'=>$length, 'type'=>$type);  
$this->addElement('enclosure','',$attributes);  
}  
 
} // end of class FeedItem  
?>  
 
file:a/lib/FeedWriter.php (deleted)
<?php  
// RSS 0.90 Officially obsoleted by 1.0  
// RSS 0.91, 0.92, 0.93 and 0.94 Officially obsoleted by 2.0  
// So, define constants for RSS 1.0, RSS 2.0 and ATOM  
 
define('RSS1', 'RSS 1.0', true);  
define('RSS2', 'RSS 2.0', true);  
define('ATOM', 'ATOM', true);  
 
/**  
* Univarsel Feed Writer class  
*  
* Genarate RSS 1.0, RSS2.0 and ATOM Feed  
*  
* @package UnivarselFeedWriter  
* @author Anis uddin Ahmad <anisniit@gmail.com>  
* @link http://www.ajaxray.com/projects/rss  
*/  
class FeedWriter  
{  
private $channels = array(); // Collection of channel elements  
private $items = array(); // Collection of items as object of FeedItem class.  
private $data = array(); // Store some other version wise data  
private $CDATAEncoding = array(); // The tag names which have to encoded as CDATA  
 
private $version = null;  
 
/**  
* Constructor  
*  
* @param constant the version constant (RSS1/RSS2/ATOM).  
*/  
function __construct($version = RSS2)  
{  
$this->version = $version;  
 
// Setting default value for assential channel elements  
$this->channels['title'] = $version . ' Feed';  
$this->channels['link'] = 'http://www.ajaxray.com/blog';  
 
//Tag names to encode in CDATA  
$this->CDATAEncoding = array('description', 'content:encoded', 'summary');  
}  
 
// Start # public functions ---------------------------------------------  
 
/**  
* Set a channel element  
* @access public  
* @param srting name of the channel tag  
* @param string content of the channel tag  
* @return void  
*/  
public function setChannelElement($elementName, $content)  
{  
$this->channels[$elementName] = $content ;  
}  
 
/**  
* Set multiple channel elements from an array. Array elements  
* should be 'channelName' => 'channelContent' format.  
*  
* @access public  
* @param array array of channels  
* @return void  
*/  
public function setChannelElementsFromArray($elementArray)  
{  
if(! is_array($elementArray)) return;  
foreach ($elementArray as $elementName => $content)  
{  
$this->setChannelElement($elementName, $content);  
}  
}  
 
/**  
* Genarate the actual RSS/ATOM file  
*  
* @access public  
* @return void  
*/  
public function genarateFeed()  
{  
header("Content-type: text/xml");  
 
$this->printHead();  
$this->printChannels();  
$this->printItems();  
$this->printTale();  
}  
 
/**  
* Create a new FeedItem.  
*  
* @access public  
* @return object instance of FeedItem class  
*/  
public function createNewItem()  
{  
$Item = new FeedItem($this->version);  
return $Item;  
}  
 
/**  
* Add a FeedItem to the main class  
*  
* @access public  
* @param object instance of FeedItem class  
* @return void  
*/  
public function addItem($feedItem)  
{  
$this->items[] = $feedItem;  
}  
 
 
// Wrapper functions -------------------------------------------------------------------  
 
/**  
* Set the 'title' channel element  
*  
* @access public  
* @param srting value of 'title' channel tag  
* @return void  
*/  
public function setTitle($title)  
{  
$this->setChannelElement('title', $title);  
}  
 
/**  
* Set the 'description' channel element  
*  
* @access public  
* @param srting value of 'description' channel tag  
* @return void  
*/  
public function setDescription($desciption)  
{  
$this->setChannelElement('description', $desciption);  
}  
 
/**  
* Set the 'link' channel element  
*  
* @access public  
* @param srting value of 'link' channel tag  
* @return void  
*/  
public function setLink($link)  
{  
$this->setChannelElement('link', $link);  
}  
 
/**  
* Set the 'image' channel element  
*  
* @access public  
* @param srting title of image  
* @param srting link url of the imahe  
* @param srting path url of the image  
* @return void  
*/  
public function setImage($title, $link, $url)  
{  
$this->setChannelElement('image', array('title'=>$title, 'link'=>$link, 'url'=>$url));  
}  
 
/**  
* Set the 'about' channel element. Only for RSS 1.0  
*  
* @access public  
* @param srting value of 'about' channel tag  
* @return void  
*/  
public function setChannelAbout($url)  
{  
$this->data['ChannelAbout'] = $url;  
}  
 
/**  
* Genarates an UUID  
* @author Anis uddin Ahmad <admin@ajaxray.com>  
* @param string an optional prefix  
* @return string the formated uuid  
*/  
public function uuid($key = null, $prefix = '')  
{  
$key = ($key == null)? uniqid(rand()) : $key;  
$chars = md5($key);  
$uuid = substr($chars,0,8) . '-';  
$uuid .= substr($chars,8,4) . '-';  
$uuid .= substr($chars,12,4) . '-';  
$uuid .= substr($chars,16,4) . '-';  
$uuid .= substr($chars,20,12);  
 
return $prefix . $uuid;  
}  
// End # public functions ----------------------------------------------  
 
// Start # private functions ----------------------------------------------  
 
/**  
* Prints the xml and rss namespace  
*  
* @access private  
* @return void  
*/  
private function printHead()  
{  
$out = '<?xml version="1.0" encoding="utf-8"?>' . "\n";  
 
if($this->version == RSS2)  
{  
$out .= '<rss version="2.0"  
xmlns:content="http://purl.org/rss/1.0/modules/content/"  
xmlns:wfw="http://wellformedweb.org/CommentAPI/"  
>' . PHP_EOL;  
}  
elseif($this->version == RSS1)  
{  
$out .= '<rdf:RDF  
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"  
xmlns="http://purl.org/rss/1.0/"  
xmlns:dc="http://purl.org/dc/elements/1.1/"  
>' . PHP_EOL;;  
}  
else if($this->version == ATOM)  
{  
$out .= '<feed xmlns="http://www.w3.org/2005/Atom">' . PHP_EOL;;  
}  
echo $out;  
}  
 
/**  
* Closes the open tags at the end of file  
*  
* @access private  
* @return void  
*/  
private function printTale()  
{  
if($this->version == RSS2)  
{  
echo '</channel>' . PHP_EOL . '</rss>';  
}  
elseif($this->version == RSS1)  
{  
echo '</rdf:RDF>';  
}  
else if($this->version == ATOM)  
{  
echo '</feed>';  
}  
 
}  
 
/**  
* Creates a single node as xml format  
*  
* @access private  
* @param srting name of the tag  
* @param mixed tag value as string or array of nested tags in 'tagName' => 'tagValue' format  
* @param array Attributes(if any) in 'attrName' => 'attrValue' format  
* @return string formatted xml tag  
*/  
private function makeNode($tagName, $tagContent, $attributes = null)  
{  
$nodeText = '';  
$attrText = '';  
 
if(is_array($attributes))  
{  
foreach ($attributes as $key => $value)  
{  
$attrText .= " $key=\"$value\" ";  
}  
}  
 
if(is_array($tagContent) && $this->version == RSS1)  
{  
$attrText = ' rdf:parseType="Resource"';  
}  
 
 
$attrText .= (in_array($tagName, $this->CDATAEncoding) && $this->version == ATOM)? ' type="html" ' : '';  
$nodeText .= (in_array($tagName, $this->CDATAEncoding))? "<{$tagName}{$attrText}><![CDATA[" : "<{$tagName}{$attrText}>";  
 
if(is_array($tagContent))  
{  
foreach ($tagContent as $key => $value)  
{  
$nodeText .= $this->makeNode($key, $value);  
}  
}  
else  
{  
$nodeText .= (in_array($tagName, $this->CDATAEncoding))? $tagContent : htmlentities($tagContent);  
}  
 
$nodeText .= (in_array($tagName, $this->CDATAEncoding))? "]]></$tagName>" : "</$tagName>";  
 
return $nodeText . PHP_EOL;  
}  
 
/**  
* @desc Print channels  
* @access private  
* @return void  
*/  
private function printChannels()  
{  
//Start channel tag  
switch ($this->version)  
{  
case RSS2:  
echo '<channel>' . PHP_EOL;  
break;  
case RSS1:  
echo (isset($this->data['ChannelAbout']))? "<channel rdf:about=\"{$this->data['ChannelAbout']}\">" : "<channel rdf:about=\"{$this->channels['link']}\">";  
break;  
}  
 
//Print Items of channel  
foreach ($this->channels as $key => $value)  
{  
if($this->version == ATOM && $key == 'link')  
{  
// ATOM prints link element as href attribute  
echo $this->makeNode($key,'',array('href'=>$value));  
//Add the id for ATOM  
echo $this->makeNode('id',$this->uuid($value,'urn:uuid:'));  
}  
else  
{  
echo $this->makeNode($key, $value);  
}  
 
}  
 
//RSS 1.0 have special tag <rdf:Seq> with channel  
if($this->version == RSS1)  
{  
echo "<items>" . PHP_EOL . "<rdf:Seq>" . PHP_EOL;  
foreach ($this->items as $item)  
{  
$thisItems = $item->getElements();  
echo "<rdf:li resource=\"{$thisItems['link']['content']}\"/>" . PHP_EOL;  
}  
echo "</rdf:Seq>" . PHP_EOL . "</items>" . PHP_EOL . "</channel>" . PHP_EOL;  
}  
}  
 
/**  
* Prints formatted feed items  
*  
* @access private  
* @return void  
*/  
private function printItems()  
{  
foreach ($this->items as $item)  
{  
$thisItems = $item->getElements();  
 
//the argument is printed as rdf:about attribute of item in rss 1.0  
echo $this->startItem($thisItems['link']['content']);  
 
foreach ($thisItems as $feedItem )  
{  
echo $this->makeNode($feedItem['name'], $feedItem['content'], $feedItem['attributes']);  
}  
echo $this->endItem();  
}  
}  
 
/**  
* Make the starting tag of channels  
*  
* @access private  
* @param srting The vale of about tag which is used for only RSS 1.0  
* @return void  
*/  
private function startItem($about = false)  
{  
if($this->version == RSS2)  
{  
echo '<item>' . PHP_EOL;  
}  
elseif($this->version == RSS1)  
{  
if($about)  
{  
echo "<item rdf:about=\"$about\">" . PHP_EOL;  
}  
else  
{  
die('link element is not set .\n It\'s required for RSS 1.0 to be used as about attribute of item');  
}  
}  
else if($this->version == ATOM)  
{  
echo "<entry>" . PHP_EOL;  
}  
}  
 
/**  
* Closes feed item tag  
*  
* @access private  
* @return void  
*/  
private function endItem()  
{  
if($this->version == RSS2 || $this->version == RSS1)  
{  
echo '</item>' . PHP_EOL;  
}  
else if($this->version == ATOM)  
{  
echo "</entry>" . PHP_EOL;  
}  
}  
 
 
 
// End # private functions ----------------------------------------------  
 
} // end of class FeedWriter  
 
// autoload classes  
function __autoload($class_name)  
{  
require_once $class_name . '.php';  
}