gitphp 0.2.9.1 :: disclosr.git/commitdiff

Merge branch 'master' of ssh://apples.lambdacomplex.org/git/disclosr

Former-commit-id: 49a3d73556fe6378d4b2863cbfa4d74190523969

29 files changed: (show all)
about.php
admin/refreshDesignDoc.php
bubbletree.php (deleted)
budget.php (new)
disclosr.iml (new)
documents/datagov-export-groups.py (new)
documents/datagov-export.py (new)
documents/datagov-merge.php (new)
documents/datagov.py
documents/dataqld.py (new)
documents/gazette.py
documents/genericScrapers.py
documents/runScrapers.sh
documents/scrape.py
documents/scrapers/0049d35216493c545ef5f7f000e6b252.py
documents/scrapers/227cb6eb7d2c9f8a6e846df7447d6caa.py
documents/scrapers/24bd71114d3975ed9a63ad29624c62c9.py
documents/scrapers/3d5871a44abbbc81ef5b3a420070755d.py
documents/scrapers/6fa04af95fbe7de96daa2c7560e0aad3.py
documents/scrapers/7c6adc1d41cf029bf1a0959e5156477a.py
documents/scrapers/8e874a2fde8aa0ccdc6d14573d766540.py
documents/scrapers/a687a9eaab9e10e9e118d3fd7cf0e13a.py
documents/scrapers/b0ca7fddcd1c965787daea47f2d32e0a.py (new)
documents/scrapers/dfd7414bb0c21a0076ab559901ae0588.py
documents/scrapers/e2a845e55bc9986e6c75c5ad2c508b8d.py
documents/scrapers/f2ab2908d8ee56ed8d995ef4187e75e6.py
getAgency.php
headcount.php (new)
include/template.inc.php

file:a/about.php -> file:b/about.php

<?php	<?php
include_once('include/common.inc.php');	include_once('include/common.inc.php');
include_header('About');	include_header('About');
?>	?>
<div class="foundation-header">	<div class="foundation-header">
<h1><a href="about.php">About/FAQ</a></h1>	<h1><a href="about.php">About/FAQ</a></h1>
<h4 class="subheader">Lorem ipsum.</h4>
</div>	</div>
<h2> What is this? </h2>
Disclo.gs is a project to monitor Australian Federal Government agencies
compliance with their <a href="http://www.oaic.gov.au/publications/other_operational/foi_policy_frequently_asked_questions.html#_Toc291837571">"proactive disclosure requirements" to make a transparency league table as suggested by gov2 taskforce http://gov2.net.au/blog/2009/09/19/a-league-ladder-of-psi-openness/</a>.
<h2> Attributions </h2>	<h2> Attributions </h2>
National Archives of Australia, Australian Governments’ Interactive Functions Thesaurus, 2nd edition, September 2005, published at http://www.naa.gov.au/recordkeeping/thesaurus/index.htm <br/>	National Archives of Australia, Australian Governments’ Interactive Functions Thesaurus, 2nd edition, September 2005, published at http://www.naa.gov.au/recordkeeping/thesaurus/index.htm <br/>
data.gov.au http://data.gov.au/dataset/directory-gov-au-full-data-export/ <br/>	data.gov.au http://data.gov.au/dataset/directory-gov-au-full-data-export/ <br/>
directory.gov.au <br/>	directory.gov.au <br/>
australia.gov.au http://australia.gov.au/about/copyright <br/>	australia.gov.au http://australia.gov.au/about/copyright <br/>
<h2> Open everything </h2>
All documents released CC-BY 3 AU
Open source git @

<h2>Organisational Data Sources</h2>	<h2>Organisational Data Sources</h2>

http://www.comlaw.gov.au/Browse/Results/ByTitle/AdministrativeArrangementsOrders/Current/Ad/0 defines departments	http://www.comlaw.gov.au/Browse/Results/ByTitle/AdministrativeArrangementsOrders/Current/Ad/0 defines departments
Agencies can be found in the Schedule to an Appropriation Bill (budget), Schedule to FMA Regulations and/or Public Service Act.<br>	Agencies can be found in the Schedule to an Appropriation Bill (budget), Schedule to FMA Regulations and/or Public Service Act.<br>

http://www.finance.gov.au/publications/flipchart/docs/FMACACFlipchart.pdf summarises these. view-source:https://www.tenders.gov.au/?event=public.advancedsearch.home is great for the suspended/active status<br>	http://www.finance.gov.au/publications/flipchart/docs/FMACACFlipchart.pdf summarises these. view-source:https://www.tenders.gov.au/?event=public.advancedsearch.home is great for the suspended/active status<br>

Fraud in gov depts by Fairfax Media http://www.smh.com.au/national/public-service-keeps-fraud-cases-private-20110923-1kpdr.html	Fraud in gov depts by Fairfax Media http://www.smh.com.au/national/public-service-keeps-fraud-cases-private-20110923-1kpdr.html <br>

When defining the hierachy, this system is designed towards monitoring accountablity. Thus large agencies that have registered their own ABN	When defining the hierachy, this system is designed towards monitoring accountablity. Thus large agencies that have registered their own ABN
and have their own accountablity mechanisms/website receive a seperate record as a child of their department.	and have their own accountablity mechanisms/website receive a seperate record as a child of their department.<br>
Some small agencies will choose to simply rely on their parent department's accountablity measures.<br>	Some small agencies will choose to simply rely on their parent department's accountablity measures.<br>

This flows through to organisation name and other/past names. A department that completely accounts for an agency will list that agency as an other child name.	This flows through to organisation name and other/past names. A department that completely accounts for an agency will list that agency as an other child name.<br>
As agencies themselves shift between departments, there may be scope for providing time ranges but typically the newest hierarchy will be the one recorded.	As agencies themselves shift between departments, there may be scope for providing time ranges but typically the newest hierarchy will be the one recorded.<br>
A department/agency name will be the newest active name assigned to that ABN.<br>	A department/agency name will be the newest active name assigned to that ABN.<br>

ABN information is derived from the ABR. This is the definitive umpire about which former name should be linked to which current name.	ABN information is derived from the ABR. This is the definitive umpire about which former name should be linked to which current name. <br>
For example "Department of Transport and Regional Services" became "Department of Infrastructure, Transport, Regional Development and Local Government" (same ABN)	For example "Department of Transport and Regional Services" became "Department of Infrastructure, Transport, Regional Development and Local Government" (same ABN)
however it later split into "Department of Infrastructure and Transport" (same ABN)	however it later split into "Department of Infrastructure and Transport" (same ABN)
and "Department of Regional Australia, Regional Development and Local Government" (new ABN).<br>	and "Department of Regional Australia, Regional Development and Local Government" (new ABN).<br>

Statistical information from http://www.apsc.gov.au/stateoftheservice/1011/statsbulletin/section1.html#t2total https://www.apsedii.gov.au/apsedii/CustomQueryx33.shtml	Statistical information from http://www.apsc.gov.au/stateoftheservice/1011/statsbulletin/section1.html#t2total https://www.apsedii.gov.au/apsedii/CustomQueryx33.shtml
and individual annual reports.<br>	and individual annual reports.<br>

<h2>Webpage Assessment</h2>
Much due care has been put into correctly recording disclosure URLs. Typically the "About", "Corporate", "Publications" and "Sitemap" sections are checked at the very least.
Occasionally it is nessicary to use a site or Google search. In several rare cases, there is a secret "Disclosure" navigation menu you can find if you find one of the mandatory publishing obligations in that category (seriously).<br>
Some rules about leniency:<br>
<ul>
<li>An empty FOI disclosure log counts, a page outlining what the FOI Act is does not.</li>
<li>A disclosure log in PDF or Word format counts :(</li>
<li>An empty File/Record list counts (although that's very minimalistic that you have no files, electronic or paper)</li>
<li>Only a current information publication scheme page counts, not a s.9 FOI Act page or an organisation chart.</li>
<li>If there isn't a page easily listing all current and past Annual Reports, the most current one (html, pdf) counts.</li>
<li>Consultancy contracts might not need it's own webpage (if in Annual Report), grants/appointments might not apply to all organisations but Legal Services Expenditure (and all other obligations) does need a webpage. </li>

<h2>Open Government Scoring</h2>
+1 point for every true Has... attribute<br>
-1 point for every false Has... (ie. Has Not) attribute</br>

Don't like this? Make your own score, suggest a better scoring mechanism.</br>

<?php	<?php
include_footer();	include_footer();
?>	?>

file:a/admin/refreshDesignDoc.php -> file:b/admin/refreshDesignDoc.php

file:a/bubbletree.php (deleted)


<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="UTF-8"/>
<title>Minimal BubbleTree Demo</title>
<script type="text/javascript" src="http://code.jquery.com/jquery-1.7.2.js"></script>
<script type="text/javascript" src="js/bubbletree/lib/jquery.history.js"></script>
<script type="text/javascript" src="js/bubbletree/lib/raphael.js"></script>
<script type="text/javascript" src="js/bubbletree/lib/vis4.js"></script>
<script type="text/javascript" src="js/bubbletree/lib/Tween.js"></script>
<script type="text/javascript" src="js/bubbletree/build/bubbletree.js"></script>
<link rel="stylesheet" type="text/css" href="js/bubbletree/build/bubbletree.css" />
<script type="text/javascript" src="js/bubbletree/styles/cofog.js"></script>

<?php	<?php

require_once '../include/common.inc.php';	require_once '../include/common.inc.php';
//function createFOIDocumentsDesignDoc() {	//function createFOIDocumentsDesignDoc() {

$foidb = $server->get_db('disclosr-foidocuments');	$foidb = $server->get_db('disclosr-foidocuments');
$obj = new stdClass();	$obj = new stdClass();
$obj->_id = "_design/" . urlencode("app");	$obj->_id = "_design/" . urlencode("app");
$obj->language = "javascript";	$obj->language = "javascript";
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };";	$obj->views->all->map = "function(doc) { emit(doc._id, doc); };";
$obj->views->byDate->map = "function(doc) { emit(doc.date, doc); };";	$obj->views->byDate->map = "function(doc) { emit(doc.date, doc); };";
$obj->views->byDateMonthYear->map = "function(doc) { emit(doc.date, doc); };";	$obj->views->byDateMonthYear->map = "function(doc) { emit(doc.date, doc); };";
$obj->views->byDateMonthYear->reduce = "_count";	$obj->views->byDateMonthYear->reduce = "_count";
$obj->views->byAgencyID->map = "function(doc) { emit(doc.agencyID, doc); };";	$obj->views->byAgencyID->map = "function(doc) { emit(doc.agencyID, doc); };";
$obj->views->byAgencyID->reduce = "_count";	$obj->views->byAgencyID->reduce = "_count";
$obj->views->fieldNames->map = '	$obj->views->fieldNames->map = '
function(doc) {	function(doc) {
for(var propName in doc) {	for(var propName in doc) {
emit(propName, doc._id);	emit(propName, doc._id);
}	}

}';	}';
$obj->views->fieldNames->reduce = 'function (key, values, rereduce) {	$obj->views->fieldNames->reduce = 'function (key, values, rereduce) {
return values.length;	return values.length;
}';	}';
// allow safe updates (even if slightly slower due to extra: rev-detection check).	// allow safe updates (even if slightly slower due to extra: rev-detection check).
$foidb->save($obj, true);	$foidb->save($obj, true);


//function createDocumentsDesignDoc() {	//function createDocumentsDesignDoc() {
$docdb = $server->get_db('disclosr-documents');	$docdb = $server->get_db('disclosr-documents');

$obj = new stdClass();	$obj = new stdClass();
$obj->_id = "_design/" . urlencode("app");	$obj->_id = "_design/" . urlencode("app");
$obj->language = "javascript";	$obj->language = "javascript";
$obj->views->web_server->map = "function(doc) {\n emit(doc.web_server, 1);\n}";	$obj->views->web_server->map = "function(doc) {\n emit(doc.web_server, 1);\n}";
$obj->views->web_server->reduce = "_sum";	$obj->views->web_server->reduce = "_sum";
$obj->views->byAgency->map = "function(doc) {\n emit(doc.agencyID, 1);\n}";	$obj->views->byAgency->map = "function(doc) {\n emit(doc.agencyID, 1);\n}";
$obj->views->byAgency->reduce = "_sum";	$obj->views->byAgency->reduce = "_sum";
$obj->views->byURL->map = "function(doc) {\n emit(doc.url, doc);\n}";	$obj->views->byURL->map = "function(doc) {\n emit(doc.url, doc);\n}";
$obj->views->agency->map = "function(doc) {\n emit(doc.agencyID, doc);\n}";	$obj->views->agency->map = "function(doc) {\n emit(doc.agencyID, doc);\n}";
$obj->views->byWebServer->map = "function(doc) {\n emit(doc.web_server, doc);\n}";	$obj->views->byWebServer->map = "function(doc) {\n emit(doc.web_server, doc);\n}";

	$obj->views->datasets->map = "function(doc) {\nif (doc.fieldName == \"data\") {\n emit(doc._id, doc);\n}\n}";
	$obj->views->datasetGroups->map = "function(doc) {\nif (doc.fieldName == \"data\") {\n doc.metadata[\"data.gov.au Category\"] && doc.metadata[\"data.gov.au Category\"].forEach(function(tag) {\n emit(tag, doc.url); \n });\n}\n}";
$obj->views->getValidationRequired->map = "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}";	$obj->views->getValidationRequired->map = "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}";
$docdb->save($obj, true);	$docdb->save($obj, true);




//function createAgencyDesignDoc() {	//function createAgencyDesignDoc() {
$db = $server->get_db('disclosr-agencies');	$db = $server->get_db('disclosr-agencies');
$obj = new stdClass();	$obj = new stdClass();
$obj->_id = "_design/" . urlencode("app");	$obj->_id = "_design/" . urlencode("app");
$obj->language = "javascript";	$obj->language = "javascript";
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };";	$obj->views->all->map = "function(doc) { emit(doc._id, doc); };";
$obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };";	$obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };";
$obj->views->byCanonicalName->map = "function(doc) {	$obj->views->byCanonicalName->map = "function(doc) {
if (doc.parentOrg \|\| doc.orgType == 'FMA-DepartmentOfState') {	if (doc.parentOrg \|\| doc.orgType == 'FMA-DepartmentOfState') {
emit(doc.name, doc);	emit(doc.name, doc);
}	}
};";	};";
$obj->views->byDeptStateName->map = "function(doc) {	$obj->views->byDeptStateName->map = "function(doc) {
if (doc.orgType == 'FMA-DepartmentOfState') {	if (doc.orgType == 'FMA-DepartmentOfState') {
emit(doc.name, doc._id);	emit(doc.name, doc._id);
}	}
};";	};";
$obj->views->parentOrgs->map = "function(doc) {	$obj->views->parentOrgs->map = "function(doc) {
if (doc.parentOrg) {	if (doc.parentOrg) {
emit(doc._id, doc.parentOrg);	emit(doc._id, doc.parentOrg);
}	}
};";	};";
$obj->views->byName->map = 'function(doc) {	$obj->views->byName->map = 'function(doc) {
if (typeof(doc["status"]) == "undefined" \|\| doc["status"] != "suspended") {	if (typeof(doc["status"]) == "undefined" \|\| doc["status"] != "suspended") {
emit(doc.name, doc._id);	emit(doc.name, doc._id);
if (typeof(doc.shortName) != "undefined" && doc.shortName != doc.name) {	if (typeof(doc.shortName) != "undefined" && doc.shortName != doc.name) {
emit(doc.shortName, doc._id);	emit(doc.shortName, doc._id);
}	}
for (name in doc.otherNames) {	for (name in doc.otherNames) {
if (doc.otherNames[name] != "" && doc.otherNames[name] != doc.name) {	if (doc.otherNames[name] != "" && doc.otherNames[name] != doc.name) {
emit(doc.otherNames[name], doc._id);	emit(doc.otherNames[name], doc._id);
}	}
}	}
for (name in doc.foiBodies) {	for (name in doc.foiBodies) {
if (doc.foiBodies[name] != "" && doc.foiBodies[name] != doc.name) {	if (doc.foiBodies[name] != "" && doc.foiBodies[name] != doc.name) {
emit(doc.foiBodies[name], doc._id);	emit(doc.foiBodies[name], doc._id);
}	}
}	}
for (name in doc.positions) {	for (name in doc.positions) {
if (doc.positions[name] != "" && doc.positions[name] != doc.name) {	if (doc.positions[name] != "" && doc.positions[name] != doc.name) {
emit(doc.positions[name], doc._id);	emit(doc.positions[name], doc._id);
}	}
}	}
}	}
};';	};';

$obj->views->foiEmails->map = "function(doc) {	$obj->views->foiEmails->map = "function(doc) {
emit(doc._id, doc.foiEmail);	emit(doc._id, doc.foiEmail);
};";	};";

$obj->views->byLastModified->map = "function(doc) { emit(doc.metadata.lastModified, doc); }";	$obj->views->byLastModified->map = "function(doc) { emit(doc.metadata.lastModified, doc); }";
$obj->views->getActive->map = 'function(doc) { if (doc.status == "active") { emit(doc._id, doc); } };';	$obj->views->getActive->map = 'function(doc) { if (doc.status == "active") { emit(doc._id, doc); } };';
$obj->views->getSuspended->map = 'function(doc) { if (doc.status == "suspended") { emit(doc._id, doc); } };';	$obj->views->getSuspended->map = 'function(doc) { if (doc.status == "suspended") { emit(doc._id, doc); } };';
$obj->views->getScrapeRequired->map = "function(doc) {	$obj->views->getScrapeRequired->map = "function(doc) {

var lastScrape = Date.parse(doc.metadata.lastScraped);	var lastScrape = Date.parse(doc.metadata.lastScraped);

var today = new Date();	var today = new Date();

if (!lastScrape \|\| lastScrape.getTime() + 1000 != today.getTime()) {	if (!lastScrape \|\| lastScrape.getTime() + 1000 != today.getTime()) {
emit(doc._id, doc);	emit(doc._id, doc);
}	}

};";	};";
$obj->views->showNamesABNs->map = "function(doc) { emit(doc._id, {name: doc.name, abn: doc.abn}); };";	$obj->views->showNamesABNs->map = "function(doc) { emit(doc._id, {name: doc.name, abn: doc.abn}); };";
$obj->views->getConflicts->map = "function(doc) {	$obj->views->getConflicts->map = "function(doc) {
if (doc._conflicts) {	if (doc._conflicts) {
emit(null, [doc._rev].concat(doc._conflicts));	emit(null, [doc._rev].concat(doc._conflicts));
}	}
}";	}";
$obj->views->getStatistics->map =	$obj->views->getStatistics->map =
"function(doc) {	"function(doc) {
if (doc.statistics) {	if (doc.statistics) {
for (var statisticSet in doc.statistics) {	for (var statisticSet in doc.statistics) {
for (var statisticPeriod in doc.statistics[statisticSet]) {	for (var statisticPeriod in doc.statistics[statisticSet]) {
emit([statisticSet,statisticPeriod], doc.statistics[statisticSet][statisticPeriod]['value']);	emit([statisticSet,statisticPeriod], doc.statistics[statisticSet][statisticPeriod]['value']);
}	}
}	}
}	}
}";	}";
$obj->views->getStatistics->reduce = '_sum';	$obj->views->getStatistics->reduce = '_sum';
// http://stackoverflow.com/questions/646628/javascript-startswith	// http://stackoverflow.com/questions/646628/javascript-startswith
$obj->views->score->map = 'if(!String.prototype.startsWith){	$obj->views->score->map = 'if(!String.prototype.startsWith){
String.prototype.startsWith = function (str) {	String.prototype.startsWith = function (str) {
return !this.indexOf(str);	return !this.indexOf(str);
}	}
}	}

function(doc) {	function(doc) {
count = 0;	count = 0;
if (doc["status"] != "suspended") {	if (doc["status"] != "suspended") {
for(var propName in doc) {	for(var propName in doc) {
if(typeof(doc[propName]) != "undefined" && doc[propName] != "") {	if(typeof(doc[propName]) != "undefined" && doc[propName] != "") {
count++;	count++;
}	}
}	}
portfolio = doc.parentOrg;	portfolio = doc.parentOrg;
if (doc.orgType == "FMA-DepartmentOfState") {	if (doc.orgType == "FMA-DepartmentOfState") {
portfolio = doc._id;	portfolio = doc._id;
}	}
if (doc.orgType == "Court-Commonwealth" \|\| doc.orgType == "FMA-DepartmentOfParliament") {	if (doc.orgType == "Court-Commonwealth" \|\| doc.orgType == "FMA-DepartmentOfParliament") {
portfolio = doc.orgType;	portfolio = doc.orgType;
}	}
emit(count+doc._id, {id:doc._id, name: doc.name, score:count, orgType: doc.orgType, portfolio:portfolio});	emit(count+doc._id, {id:doc._id, name: doc.name, score:count, orgType: doc.orgType, portfolio:portfolio});
}	}
}';	}';
$obj->views->scoreHas->map = 'if(!String.prototype.startsWith){	$obj->views->scoreHas->map = 'if(!String.prototype.startsWith){
String.prototype.startsWith = function (str) {	String.prototype.startsWith = function (str) {
return !this.indexOf(str);	return !this.indexOf(str);
}	}
}	}
if(!String.prototype.endsWith){	if(!String.prototype.endsWith){
String.prototype.endsWith = function(suffix) {	String.prototype.endsWith = function(suffix) {
return this.indexOf(suffix, this.length - suffix.length) !== -1;	return this.indexOf(suffix, this.length - suffix.length) !== -1;
};	};
}	}
function(doc) {	function(doc) {
if (typeof(doc["status"]) == "undefined" \|\| doc["status"] != "suspended") {	if (typeof(doc["status"]) == "undefined" \|\| doc["status"] != "suspended") {
for(var propName in doc) {	for(var propName in doc) {
if(typeof(doc[propName]) != "undefined" && (propName.startsWith("has") \|\| propName.endsWith("URL"))) {	if(typeof(doc[propName]) != "undefined" && (propName.startsWith("has") \|\| propName.endsWith("URL"))) {
emit(propName, 1);	emit(propName, 1);
}	}
}	}
emit("total", 1);	emit("total", 1);
}	}
}';	}';
$obj->views->scoreHas->reduce = '_sum';	$obj->views->scoreHas->reduce = '_sum';
$obj->views->fieldNames->map = '	$obj->views->fieldNames->map = '
function(doc) {	function(doc) {
for(var propName in doc) {	for(var propName in doc) {
emit(propName, doc._id);	emit(propName, doc._id);
}	}

}';	}';
$obj->views->fieldNames->reduce = '_count';	$obj->views->fieldNames->reduce = '_count';
// allow safe updates (even if slightly slower due to extra: rev-detection check).	// allow safe updates (even if slightly slower due to extra: rev-detection check).
$db->save($obj, true);	$db->save($obj, true);
?>	?>