<?php | <?php |
include_once('include/common.inc.php'); | include_once('include/common.inc.php'); |
include_header('About'); | include_header('About'); |
?> | ?> |
<div class="foundation-header"> | <div class="foundation-header"> |
<h1><a href="about.php">About/FAQ</a></h1> | <h1><a href="about.php">About/FAQ</a></h1> |
<h4 class="subheader">Lorem ipsum.</h4> | |
</div> | </div> |
<h2> What is this? </h2> | |
Disclo.gs is a project to monitor Australian Federal Government agencies | |
compliance with their <a href="http://www.oaic.gov.au/publications/other_operational/foi_policy_frequently_asked_questions.html#_Toc291837571">"proactive disclosure requirements" to make a transparency league table as suggested by gov2 taskforce http://gov2.net.au/blog/2009/09/19/a-league-ladder-of-psi-openness/</a>. | |
<h2> Attributions </h2> | <h2> Attributions </h2> |
National Archives of Australia, Australian Governments’ Interactive Functions Thesaurus, 2nd edition, September 2005, published at http://www.naa.gov.au/recordkeeping/thesaurus/index.htm <br/> | National Archives of Australia, Australian Governments’ Interactive Functions Thesaurus, 2nd edition, September 2005, published at http://www.naa.gov.au/recordkeeping/thesaurus/index.htm <br/> |
data.gov.au http://data.gov.au/dataset/directory-gov-au-full-data-export/ <br/> | data.gov.au http://data.gov.au/dataset/directory-gov-au-full-data-export/ <br/> |
directory.gov.au <br/> | directory.gov.au <br/> |
australia.gov.au http://australia.gov.au/about/copyright <br/> | australia.gov.au http://australia.gov.au/about/copyright <br/> |
<h2> Open everything </h2> | |
All documents released CC-BY 3 AU | |
Open source git @ | |
<h2>Organisational Data Sources</h2> | <h2>Organisational Data Sources</h2> |
http://www.comlaw.gov.au/Browse/Results/ByTitle/AdministrativeArrangementsOrders/Current/Ad/0 defines departments | http://www.comlaw.gov.au/Browse/Results/ByTitle/AdministrativeArrangementsOrders/Current/Ad/0 defines departments |
Agencies can be found in the Schedule to an Appropriation Bill (budget), Schedule to FMA Regulations and/or Public Service Act.<br> | Agencies can be found in the Schedule to an Appropriation Bill (budget), Schedule to FMA Regulations and/or Public Service Act.<br> |
http://www.finance.gov.au/publications/flipchart/docs/FMACACFlipchart.pdf summarises these. view-source:https://www.tenders.gov.au/?event=public.advancedsearch.home is great for the suspended/active status<br> | http://www.finance.gov.au/publications/flipchart/docs/FMACACFlipchart.pdf summarises these. view-source:https://www.tenders.gov.au/?event=public.advancedsearch.home is great for the suspended/active status<br> |
Fraud in gov depts by Fairfax Media http://www.smh.com.au/national/public-service-keeps-fraud-cases-private-20110923-1kpdr.html | Fraud in gov depts by Fairfax Media http://www.smh.com.au/national/public-service-keeps-fraud-cases-private-20110923-1kpdr.html <br> |
When defining the hierachy, this system is designed towards monitoring accountablity. Thus large agencies that have registered their own ABN | When defining the hierachy, this system is designed towards monitoring accountablity. Thus large agencies that have registered their own ABN |
and have their own accountablity mechanisms/website receive a seperate record as a child of their department. | and have their own accountablity mechanisms/website receive a seperate record as a child of their department.<br> |
Some small agencies will choose to simply rely on their parent department's accountablity measures.<br> | Some small agencies will choose to simply rely on their parent department's accountablity measures.<br> |
This flows through to organisation name and other/past names. A department that completely accounts for an agency will list that agency as an other child name. | This flows through to organisation name and other/past names. A department that completely accounts for an agency will list that agency as an other child name.<br> |
As agencies themselves shift between departments, there may be scope for providing time ranges but typically the newest hierarchy will be the one recorded. | As agencies themselves shift between departments, there may be scope for providing time ranges but typically the newest hierarchy will be the one recorded.<br> |
A department/agency name will be the newest active name assigned to that ABN.<br> | A department/agency name will be the newest active name assigned to that ABN.<br> |
ABN information is derived from the ABR. This is the definitive umpire about which former name should be linked to which current name. | ABN information is derived from the ABR. This is the definitive umpire about which former name should be linked to which current name. <br> |
For example "Department of Transport and Regional Services" became "Department of Infrastructure, Transport, Regional Development and Local Government" (same ABN) | For example "Department of Transport and Regional Services" became "Department of Infrastructure, Transport, Regional Development and Local Government" (same ABN) |
however it later split into "Department of Infrastructure and Transport" (same ABN) | however it later split into "Department of Infrastructure and Transport" (same ABN) |
and "Department of Regional Australia, Regional Development and Local Government" (new ABN).<br> | and "Department of Regional Australia, Regional Development and Local Government" (new ABN).<br> |
Statistical information from http://www.apsc.gov.au/stateoftheservice/1011/statsbulletin/section1.html#t2total https://www.apsedii.gov.au/apsedii/CustomQueryx33.shtml | Statistical information from http://www.apsc.gov.au/stateoftheservice/1011/statsbulletin/section1.html#t2total https://www.apsedii.gov.au/apsedii/CustomQueryx33.shtml |
and individual annual reports.<br> | and individual annual reports.<br> |
<h2>Webpage Assessment</h2> | |
Much due care has been put into correctly recording disclosure URLs. Typically the "About", "Corporate", "Publications" and "Sitemap" sections are checked at the very least. | |
Occasionally it is nessicary to use a site or Google search. In several rare cases, there is a secret "Disclosure" navigation menu you can find if you find one of the mandatory publishing obligations in that category (seriously).<br> | |
Some rules about leniency:<br> | |
<ul> | |
<li>An empty FOI disclosure log counts, a page outlining what the FOI Act is does not.</li> | |
<li>A disclosure log in PDF or Word format counts :(</li> | |
<li>An empty File/Record list counts (although that's very minimalistic that you have no files, electronic or paper)</li> | |
<li>Only a current information publication scheme page counts, not a s.9 FOI Act page or an organisation chart.</li> | |
<li>If there isn't a page easily listing all current and past Annual Reports, the most current one (html, pdf) counts.</li> | |
<li>Consultancy contracts might not need it's own webpage (if in Annual Report), grants/appointments might not apply to all organisations but Legal Services Expenditure (and all other obligations) does need a webpage. </li> | |
<h2>Open Government Scoring</h2> | |
+1 point for every true Has... attribute<br> | |
-1 point for every false Has... (ie. Has Not) attribute</br> | |
Don't like this? Make your own score, suggest a better scoring mechanism.</br> | |
<?php | <?php |
include_footer(); | include_footer(); |
?> | ?> |
<!DOCTYPE html> | |
<html xmlns="http://www.w3.org/1999/xhtml"> | |
<head> | |
<meta charset="UTF-8"/> | |
<title>Minimal BubbleTree Demo</title> | |
<script type="text/javascript" src="http://code.jquery.com/jquery-1.7.2.js"></script> | |
<script type="text/javascript" src="js/bubbletree/lib/jquery.history.js"></script> | |
<script type="text/javascript" src="js/bubbletree/lib/raphael.js"></script> | |
<script type="text/javascript" src="js/bubbletree/lib/vis4.js"></script> | |
<script type="text/javascript" src="js/bubbletree/lib/Tween.js"></script> | |
<script type="text/javascript" src="js/bubbletree/build/bubbletree.js"></script> | |
<link rel="stylesheet" type="text/css" href="js/bubbletree/build/bubbletree.css" /> | |
<script type="text/javascript" src="js/bubbletree/styles/cofog.js"></script> | |
<script type="text/javascript"> | |
$(function() { | |
<?php | |
include_once('include/common.inc.php'); | |
include("lib/Color.php"); | |
$color = new Lux_Color(); | |
$portfolios = Array(); | |
$total = 0; | |
$db = $server->get_db('disclosr-agencies'); | |
try { | |
$rows = $db->get_view("app", "byDeptStateName", null, true)->rows; | |
foreach ($rows as $row) { | |
$portfolios[trim(str_replace(Array("Department of", "Department", "the", "'", "`"), "", $row->key))] = $row->value; | |
} | |
} catch (SetteeRestClientException $e) { | |
setteErrorHandler($e); | |
} | |
$agencies = Array(); | |
try { | |
$rows = $db->get_view("app", "byCanonicalName", null, true)->rows; | |
//print_r($rows); | |
foreach ($rows as $row) { | |
$employees = 0; | |
$portfolioid = 0; | |
if (isset($row->value->employees)) { | |
$employees = $row->value->employees; | |
} | |
if (isset($row->value->statistics->employees)) { | |
$agencyEmployeesArray = object_to_array($row->value->statistics->employees); | |
if (isset($agencyEmployeesArray["2010-2011"]["value"])) { | |
$employees = $agencyEmployeesArray["2010-2011"]["value"]; | |
} else { | |
// bailout for agencies that are closed for business | |
continue; | |
} | |
} | |
if (!($employees > 0)) { | |
$employees = 0; | |
} | |
if (isset($row->value->parentOrg)) { | |
$portfolioid = $row->value->parentOrg; | |
} | |
if (isset($row->value->orgType) && $row->value->orgType == "FMA-DepartmentOfState") { | |
$portfolioid = $row->id; | |
} | |
$agencies[$portfolioid][$row->value->name] = $employees; | |
} | |
} catch (SetteeRestClientException $e) { | |
setteErrorHandler($e); | |
} | |
//print_r($portfolios); | |
//print_r($agencies); | |
// http://martin.ankerl.com/2009/12/09/how-to-create-random-colors-programmatically/ | |
$golden_ratio_conjugate = 0.618033988749895; | |
$h = 0.00+rand(0,10)/10; # use random start value | |
foreach ($portfolios as $portfolioName => $portfolioID) { | |
$h += $golden_ratio_conjugate; | |
$h = fmod($h,1); | |
$portfolioColor = $color->hsv2hex(Array($h, .3, .99)); | |
$subnodes = Array(); | |
$portfolioEmployees = 0; | |
foreach ($agencies[$portfolioID] as $agencyName => $agencyEmployees) { | |
$agencyColor = $color->hsv2hex(Array($h / 10, rand(1, 10) / 10, abs(($h * (1 / 10)) - .5) + .5)); | |
$subnodes[] = Array( | |
"label" => str_replace(Array("'", "`"), "", $agencyName), | |
"amount" => $agencyEmployees, | |
//"color" => "#" . $agencyColor | |
); | |
$portfolioEmployees += $agencyEmployees; | |
} | |
$nodes[] = Array( | |
"label" => $portfolioName, | |
"amount" => $portfolioEmployees, | |
//"color" => "#" . $portfolioColor, | |
"children" => $subnodes | |
); | |
$total += $portfolioEmployees; | |
} | |
$data = Array( | |
"label" => "Australian Federal Government", | |
"amount" => $total, | |
//"color" => "#000000", | |
"children" => $nodes | |
); | |
echo "var data =eval('('+'" . json_encode($data) . "'+')');"; | |
?> | |
new BubbleTree({ | |
data: data, | |
container: '.bubbletree' | |
}); | |
}); | |
</script> | |
</head> | |
<body> | |
<div class="bubbletree-wrapper"> | |
<div class="bubbletree"></div> | |
</div> | |
</body> | |
</html> | |
<!DOCTYPE html> | |
<html xmlns="http://www.w3.org/1999/xhtml"> | |
<head> | |
<meta charset="UTF-8"/> | |
<title>Minimal BubbleTree Demo</title> | |
<script type="text/javascript" src="http://code.jquery.com/jquery-1.7.2.js"></script> | |
<script type="text/javascript" src="js/bubbletree/lib/jquery.history.js"></script> | |
<script type="text/javascript" src="js/bubbletree/lib/raphael.js"></script> | |
<script type="text/javascript" src="js/bubbletree/lib/vis4.js"></script> | |
<script type="text/javascript" src="js/bubbletree/lib/Tween.js"></script> | |
<script type="text/javascript" src="js/bubbletree/build/bubbletree.js"></script> | |
<link rel="stylesheet" type="text/css" href="js/bubbletree/build/bubbletree.css" /> | |
<script type="text/javascript" src="js/bubbletree/styles/cofog.js"></script> | |
<script type="text/javascript"> | |
$(function() { | |
<?php | |
include_once('include/common.inc.php'); | |
include("lib/Color.php"); | |
$color = new Lux_Color(); | |
$portfolios = Array(); | |
$total = 0; | |
$db = $server->get_db('disclosr-agencies'); | |
try { | |
$rows = $db->get_view("app", "byDeptStateName", null, true)->rows; | |
foreach ($rows as $row) { | |
$portfolios[trim(str_replace(Array("Department of", "Department", "the", "'", "`"), "", $row->key))] = $row->value; | |
} | |
} catch (SetteeRestClientException $e) { | |
setteErrorHandler($e); | |
} | |
$agencies = Array(); | |
try { | |
$rows = $db->get_view("app", "byCanonicalName", null, true)->rows; | |
//print_r($rows); | |
foreach ($rows as $row) { | |
$employees = 0; | |
$portfolioid = 0; | |
if (isset($row->value->statistics->budget)) { | |
$agencyEmployeesArray = object_to_array($row->value->statistics->budget); | |
//print_r($agencyEmployeesArray); | |
if (isset($agencyEmployeesArray["2011-2012"]["value"])) { | |
$employees = $agencyEmployeesArray["2011-2012"]["value"]; | |
} else { | |
// bailout for agencies that are closed for business | |
continue; | |
} | |
} | |
if (!($employees > 0)) { | |
$employees = 0; | |
} | |
if (isset($row->value->parentOrg)) { | |
$portfolioid = $row->value->parentOrg; | |
} | |
if (isset($row->value->orgType) && $row->value->orgType == "FMA-DepartmentOfState") { | |
$portfolioid = $row->id; | |
} | |
$agencies[$portfolioid][$row->value->name] = $employees; | |
} | |
} catch (SetteeRestClientException $e) { | |
setteErrorHandler($e); | |
} | |
//print_r($portfolios); | |
//print_r($agencies); | |
// http://martin.ankerl.com/2009/12/09/how-to-create-random-colors-programmatically/ | |
$golden_ratio_conjugate = 0.618033988749895; | |
$h = 0.00+rand(0,10)/10; # use random start value | |
foreach ($portfolios as $portfolioName => $portfolioID) { | |
$h += $golden_ratio_conjugate; | |
$h = fmod($h,1); | |
$portfolioColor = $color->hsv2hex(Array($h, .3, .99)); | |
$subnodes = Array(); | |
$portfolioEmployees = 0; | |
foreach ($agencies[$portfolioID] as $agencyName => $agencyEmployees) { | |
$agencyColor = $color->hsv2hex(Array($h / 10, rand(1, 10) / 10, abs(($h * (1 / 10)) - .5) + .5)); | |
$subnodes[] = Array( | |
"label" => str_replace(Array("'", "`"), "", $agencyName), | |
"amount" => $agencyEmployees, | |
//"color" => "#" . $agencyColor | |
); | |
$portfolioEmployees += $agencyEmployees; | |
} | |
$nodes[] = Array( | |
"label" => $portfolioName, | |
"amount" => $portfolioEmployees, | |
//"color" => "#" . $portfolioColor, | |
"children" => $subnodes | |
); | |
$total += $portfolioEmployees; | |
} | |
$data = Array( | |
"label" => "Australian Federal Government", | |
"amount" => $total, | |
//"color" => "#000000", | |
"children" => $nodes | |
); | |
echo "var data =eval('('+'" . json_encode($data) . "'+')');"; | |
?> | |
new BubbleTree({ | |
data: data, | |
container: '.bubbletree' | |
}); | |
}); | |
</script> | |
</head> | |
<body> | |
<div class="bubbletree-wrapper"> | |
<div class="bubbletree"></div> | |
</div> | |
</body> | |
</html> | |
import sys | import sys |
import os | import os |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) |
import scrape | import scrape |
from bs4 import BeautifulSoup | from bs4 import BeautifulSoup |
from time import mktime | from time import mktime |
import feedparser | import feedparser |
import abc | import abc |
import unicodedata | import unicodedata |
import re | import re |
import dateutil | import dateutil |
from dateutil.parser import * | from dateutil.parser import * |
from datetime import * | from datetime import * |
import codecs | import codecs |
import difflib | import difflib |
from StringIO import StringIO | from StringIO import StringIO |
from pdfminer.pdfparser import PDFDocument, PDFParser | from pdfminer.pdfparser import PDFDocument, PDFParser |
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter, process_pdf | from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter, process_pdf |
from pdfminer.pdfdevice import PDFDevice, TagExtractor | from pdfminer.pdfdevice import PDFDevice, TagExtractor |
from pdfminer.converter import TextConverter | from pdfminer.converter import TextConverter |
from pdfminer.cmapdb import CMapDB | from pdfminer.cmapdb import CMapDB |
from pdfminer.layout import LAParams | from pdfminer.layout import LAParams |
class GenericDisclogScraper(object): | class GenericDisclogScraper(object): |
__metaclass__ = abc.ABCMeta | __metaclass__ = abc.ABCMeta |