<?php | <?php |
include_once("../include/common.inc.php"); | include_once("../include/common.inc.php"); |
$format = "csv"; | $format = "csv"; |
//$format = "json"; | //$format = "json"; |
if (isset($_REQUEST['format'])) $format = $_REQUEST['format']; | if (isset($_REQUEST['format'])) |
$format = $_REQUEST['format']; | |
setlocale(LC_CTYPE, 'C'); | setlocale(LC_CTYPE, 'C'); |
if ($format == "csv") { | if ($format == "csv") { |
$headers = Array("name"); | $headers = Array("name"); |
} else { | } else { |
$headers = Array(); | $headers = Array(); |
} | } |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
try { | try { |
$rows = $db->get_view("app", "all", null, true)->rows; | $rows = $db->get_view("app", "all", null, true)->rows; |
$dataValues = Array(); | $dataValues = Array(); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
if (isset($row->value->statistics->employees)) { | if (isset($row->value->statistics->employees)) { |
$headers = array_unique(array_merge($headers, array_keys(object_to_array($row->value->statistics->employees)))); | $headers = array_unique(array_merge($headers, array_keys(object_to_array($row->value->statistics->employees)))); |
} | } |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
$fp = fopen('php://output', 'w'); | $fp = fopen('php://output', 'w'); |
if ($fp && $db) { | if ($fp && $db) { |
if ($format == "csv") { | if ($format == "csv") { |
header('Content-Type: text/csv; charset=utf-8'); | header('Content-Type: text/csv; charset=utf-8'); |
header('Content-Disposition: attachment; filename="export.employeestats.' . date("c") . '.csv"'); | header('Content-Disposition: attachment; filename="export.employeestats.' . date("c") . '.csv"'); |
} | } |
header('Pragma: no-cache'); | header('Pragma: no-cache'); |
header('Expires: 0'); | header('Expires: 0'); |
if ($format == "csv") { | if ($format == "csv") { |
fputcsv($fp, $headers); | fputcsv($fp, $headers); |
} else if ($format == "json") { | } else if ($format == "json") { |
echo '{ | echo '{ |
"labels" : ["' . implode('","', $headers) . '"],'.PHP_EOL; | "labels" : ["' . implode('","', $headers) . '"],' . PHP_EOL; |
} | } |
try { | try { |
$agencies = $db->get_view("app", "all", null, true)->rows; | $agencies = $db->get_view("app", "all", null, true)->rows; |
//print_r($agencies); | //print_r($agencies); |
$first = true; | $first = true; |
if ($format == "json") { | if ($format == "json") { |
echo '"data" : ['.PHP_EOL; | echo '"data" : [' . PHP_EOL; |
} | } |
foreach ($agencies as $agency) { | foreach ($agencies as $agency) { |
if (isset($agency->value->statistics->employees)) { | if (isset($agency->value->statistics->employees)) { |
$row = Array(); | $row = Array(); |
$agencyEmployeesArray = object_to_array($agency->value->statistics->employees); | $agencyEmployeesArray = object_to_array($agency->value->statistics->employees); |
foreach ($headers as $i => $fieldName) { | foreach ($headers as $i => $fieldName) { |
if ($format == "csv") { | |
if (isset($agencyEmployeesArray[$fieldName])) { | |
$row[] = $agencyEmployeesArray[$fieldName]["value"] ; | |
} else if ($i == 0) { | |
$row[] = $agency->value->name; | |
} else { | |
$row[] = 0; | |
} | |
} else if ($format == "json") { | |
if (isset($agencyEmployeesArray[$fieldName])) { | if (isset($agencyEmployeesArray[$fieldName])) { |
$row[] = '['.$i.','.$agencyEmployeesArray[$fieldName]["value"].']'; | $row[] = '[' . $i . ',' . $agencyEmployeesArray[$fieldName]["value"] . ']'; |
} else { | } else { |
$row[] = '['.$i.',0]'; | $row[] = '[' . $i . ',0]'; |
} | } |
} | |
} | } |
if ($format == "csv") { | if ($format == "csv") { |
fputcsv($fp, array_values($row)); | fputcsv($fp, array_values($row)); |
} else if ($format == "json") { | } else if ($format == "json") { |
if (!$first) echo ","; | if (!$first) |
echo '{"data" : [' . implode(",", array_values($row)) . '], "label": "'.$agency->value->name.'", "lines" : { "show" : true }, "points" : { "show" : true }}'.PHP_EOL; | echo ","; |
echo '{"data" : [' . implode(",", array_values($row)) . '], "label": "' . $agency->value->name . '", "lines" : { "show" : true }, "points" : { "show" : true }}' . PHP_EOL; | |
$first = false; | $first = false; |
} | } |
} | } |
} | } |
if ($format == "json") { | if ($format == "json") { |
echo '] | echo '] |
}'.PHP_EOL; | }' . PHP_EOL; |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
die; | die; |
} | } |
?> | ?> |
<?php | <?php |
require_once '../include/common.inc.php'; | require_once '../include/common.inc.php'; |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
$rows = $db->get_view("app", "byName")->rows; | $rows = $db->get_view("app", "byName")->rows; |
$nametoid = Array(); | $nametoid = Array(); |
$sums = Array(); | $sums = Array(); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
$nametoid[trim($row->key)] = $row->value; | $nametoid[trim($row->key)] = $row->value; |
} | } |
$employeeCSVs = Array("2002-2003" => "0203apsemployees.csv", | $employeeCSVs = Array("2002-2003" => "0203apsemployees.csv", |
"2003-2004" => "0304apsemployees.csv", | "2003-2004" => "0304apsemployees.csv", |
"2004-2005" => "0405apsemployees.csv", | "2004-2005" => "0405apsemployees.csv", |
"2005-2006" => "0506apsemployees.csv", | "2005-2006" => "0506apsemployees.csv", |
"2006-2007" => "0607apsemployees.csv", | "2006-2007" => "0607apsemployees.csv", |
"2007-2008" => "0708apsemployees.csv", | "2007-2008" => "0708apsemployees.csv", |
"2008-2009" => "0809apsemployees.csv", | "2008-2009" => "0809apsemployees.csv", |
"2009-2010" => "0910apsemployees.csv", | "2009-2010" => "0910apsemployees.csv", |
"2010-2011" => "1011apsemployees.csv" | "2010-2011" => "1011apsemployees.csv" |
); | ); |
foreach ($employeeCSVs as $timePeriod => $employeeCSV) { | foreach ($employeeCSVs as $timePeriod => $employeeCSV) { |
echo $employeeCSV . "<br>" . PHP_EOL; | echo $employeeCSV . "<br>" . PHP_EOL; |
$row = 1; | $row = 1; |
if (($handle = fopen($employeeCSV, "r")) !== FALSE) { | if (($handle = fopen($employeeCSV, "r")) !== FALSE) { |
while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { | while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { |
//print_r($data); | //print_r($data); |
$name = trim($data[0]); | $name = trim($data[0]); |
if (isset($nametoid[$name])) { | if (isset($nametoid[$name])) { |
$id = $nametoid[$name]; | $id = $nametoid[$name]; |
//echo $id . "<br>" . PHP_EOL; | //echo $id . "<br>" . PHP_EOL; |
@$sums[$id][$timePeriod] += $data[1]; | @$sums[$id][$timePeriod] += $data[1]; |
} else { | } else { |
echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL; | echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL; |
die(); | die(); |
} | } |
} | } |
fclose($handle); | fclose($handle); |
} | } |
} | } |
foreach ($sums as $id => $sum) { | foreach ($sums as $id => $sum) { |
echo $id . "<br>" . PHP_EOL; | echo $id . "<br>" . PHP_EOL; |
$doc = $db->get($id); | $doc = $db->get($id); |
echo $doc->name . "<br>" . PHP_EOL; | echo $doc->name . "<br>" . PHP_EOL; |
// print_r($doc); | // print_r($doc); |
$changed = false; | $changed = false; |
if (!isset($doc->statistics)) { | if (!isset($doc->statistics)) { |
$changed = true; | $changed = true; |
$doc->statistics = Array(); | $doc->statistics = new stdClass(); |
} | |
if (!isset($doc->statistics->employees)) { | |
$changed = true; | |
$doc->statistics->employees = new stdClass(); | |
} | } |
foreach ($sum as $timePeriod => $value) { | foreach ($sum as $timePeriod => $value) { |
if (!isset($doc->statistics->employees->$timePeriod->value) | if (!isset($doc->statistics->employees->$timePeriod->value) |
|| $doc->statistics->employees->$timePeriod->value != $value) { | || $doc->statistics->employees->$timePeriod->value != $value) { |
$changed = true; | $changed = true; |
$doc->statistics["employees"][$timePeriod] = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/"); | $doc->statistics->employees->$timePeriod = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/"); |
} | } |
} | } |
if ($changed) { | if ($changed) { |
$db->save($doc); | $db->save($doc); |
} else { | } else { |
echo "not changed" . "<br>" . PHP_EOL; | echo "not changed" . "<br>" . PHP_EOL; |
} | } |
} | } |
// employees: timeperiod, source = apsc state of service, value | // employees: timeperiod, source = apsc state of service, value |
?> | ?> |
<?php | |
require_once '../include/common.inc.php'; | |
require($basePath . 'lib/phpquery/phpQuery/phpQuery.php'); | |
$db = $server->get_db('disclosr-agencies'); | |
$rows = $db->get_view("app", "byName")->rows; | |
$nametoid = Array(); | |
$sums = Array(); | |
$functions = Array(); | |
foreach ($rows as $row) { | |
$nametoid[trim($row->key)] = $row->value; | |
} | |
$request = Requests::get("http://www.apsc.gov.au/publications-and-media/parliamentary/state-of-the-service/new-sosr/appendix-2-aps-agencies"); | |
$doc = phpQuery::newDocumentHTML($request->body); | |
phpQuery::selectDocument($doc); | |
foreach (pq('tr')->elements as $tr) { | |
//echo $tr->nodeValue.PHP_EOL; | |
$agency = ""; | |
$employees = ""; | |
$function = ""; | |
$i = 0; | |
foreach ($tr->childNodes as $td) { | |
//echo $td->nodeValue." $i <br>"; | |
if ($i == 0) | |
$agency = $td->nodeValue; | |
if ($i == 2) { | |
$employees = trim(str_replace(",", "", $td->nodeValue)); | |
} | |
if ($i == 4) { | |
$function = $td->nodeValue; | |
} | |
$i++; | |
} | |
if ($agency != "" && $employees != "" && $function != "") { | |
$name = trim(str_replace('2','',$agency)); | |
//echo "$name<br><bR>" . PHP_EOL; | |
if (isset($nametoid[$name])) { | |
$id = $nametoid[$name]; | |
//echo $id . "<br>" . PHP_EOL; | |
@$sums[$id]["2011-2012"] += $employees; | |
$functions[$id] = $function; | |
} else if ($agency != "Agency"){ | |
echo "<br>ERROR NAME '$agency' MISSING FROM ID LIST<br><bR>" . PHP_EOL; | |
die(); | |
} | |
} else { | |
echo "skipped $agency"; | |
} | |
} | |
//print_r($sums); | |
foreach ($sums as $id => $sum) { | |
echo $id . "<br>" . PHP_EOL; | |
$doc = $db->get($id); | |
echo $doc->name . "<br>" . PHP_EOL; | |
// print_r($doc); | |
$changed = false; | |
if (!isset($doc->statistics)) { | |
$changed = true; | |
$doc->statistics = new stdClass(); | |
} | |
if (!isset($doc->statistics->employees)) { | |
$changed = true; | |
$doc->statistics->employees = new stdClass(); | |
} | |
foreach ($sum as $timePeriod => $value) { | |
if (!isset($doc->statistics->employees->$timePeriod->value) | |
|| $doc->statistics->employees->$timePeriod->value != $value) { | |
$changed = true; | |
$doc->statistics->employees->$timePeriod = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/"); | |
$doc->employees = $value; | |
$doc->functionClassification = $functions[$id]; | |
} | |
} | |
if ($changed) { | |
$db->save($doc); | |
} else { | |
echo "not changed" . "<br>" . PHP_EOL; | |
} | |
} | |
// employees: timeperiod, source = apsc state of service, value | |
?> | |
<?php | <?php |
include('template.inc.php'); | include('template.inc.php'); |
include_header_documents(""); | include_header_documents(""); |
include_once('../include/common.inc.php'); | include_once('../include/common.inc.php'); |
echo "<table> | echo "<table> |
<tr><th>Agency Name</th><th>Disclosure Log URL recorded?</th><th>Do we monitor this URL?</th></tr>"; | <tr><th>Agency Name</th><th>Disclosure Log URL recorded?</th><th>Do we monitor this URL?</th></tr>"; |
$agenciesdb = $server->get_db('disclosr-agencies'); | $agenciesdb = $server->get_db('disclosr-agencies'); |
$docsdb = $server->get_db('disclosr-documents'); | $docsdb = $server->get_db('disclosr-documents'); |
$agencies = 0; | $agencies = 0; |
$disclogs = 0; | $disclogs = 0; |
$red = 0; | $red = 0; |
$green = 0; | $green = 0; |
$orange = 0; | $orange = 0; |
try { | try { |
$rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows; | $rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows; |
if ($rows) { | if ($rows) { |
foreach ($rows as $row) { | foreach ($rows as $row) { |
if (!isset($row->value->status) || $row->value->status != "suspended") { | if ((!isset($row->value->status) || $row->value->status != "suspended") && isset($row->value->foiEmail)) { |
echo "<tr><td><a href='" . $row->value->website ."'><b>". $row->value->name . "</b></a>"; | echo "<tr><td>"; |
if ($ENV == "DEV") | if (isset($row->value->website)) echo "<a href='" . $row->value->website . "'>"; |
echo "<br>(" . $row->id . ")"; | echo "<b>" . $row->value->name . "</b>"; |
echo "</td>\n"; | if (isset($row->value->website)) echo "</a>"; |
$agencies++; | if ($ENV == "DEV") |
echo "<br>(" . $row->id . ")"; | |
echo "</td>\n"; | |
$agencies++; | |
echo "<td>"; | echo "<td>"; |
if (isset($row->value->FOIDocumentsURL)) { | if (isset($row->value->FOIDocumentsURL)) { |
$disclogs++; | $disclogs++; |
echo '<a href="' . $row->value->FOIDocumentsURL . '">' | echo '<a href="' . $row->value->FOIDocumentsURL . '">' |
. $row->value->FOIDocumentsURL . '</a>'; | . $row->value->FOIDocumentsURL . '</a>'; |
if ($ENV == "DEV") | if ($ENV == "DEV") |
echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">' | echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">' |
. 'view local copy</a>)</small>'; | . 'view local copy</a>)</small>'; |
} else { | |
echo "<font color='red'><abbr title='No'>✘</abbr></font>"; | |
} | |
echo "</td>\n<td>"; | |
if (isset($row->value->FOIDocumentsURL)) { | |
if (file_exists("./scrapers/" . $row->id . '.py')) { | |
echo "<font color='green'><abbr title='Yes'>✔</abbr></font>"; | |
$green++; | |
} else if (file_exists("./scrapers/" . $row->id . '.txt')) { | |
echo "<font color='orange'><abbr title='Work in progress'><b>▬</b></abbr></font>"; | |
$orange++; | |
} else { | } else { |
echo "<font color='red'><abbr title='No'>✘</abbr></font>"; | echo "<font color='red'><abbr title='No'>✘</abbr></font>"; |
$red++; | |
} | } |
echo "</td>\n<td>"; | |
if (isset($row->value->FOIDocumentsURL)) { | |
if (file_exists("./scrapers/" . $row->id . '.py')) { | |
echo "<font color='green'><abbr title='Yes'>✔</abbr></font>"; | |
$green++; | |
} else if (file_exists("./scrapers/" . $row->id . '.txt')) { | |
echo "<font color='orange'><abbr title='Work in progress'><b>▬</b></abbr></font>"; | |
$orange++; | |
} else { | |
echo "<font color='red'><abbr title='No'>✘</abbr></font>"; | |
$red++; | |
} | |
} | |
echo "</td></tr>\n"; | |
} | } |
echo "</td></tr>\n"; | |
} | } |
} | } |
} | |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
echo "</table>"; | echo "</table>"; |
echo $agencies." agencies, ".round(($disclogs/$agencies)*100)."% with disclosure logs; " | echo $agencies . " agencies, " . round(($disclogs / $agencies) * 100) . "% with disclosure logs; " |
.round(($green/$disclogs)*100)."% logs with scrapers ".round(($red/$disclogs)*100)."% logs without scrapers ".round(($orange/$disclogs)*100)."% logs Work-In-Progress scrapers "; | . round(($green / $disclogs) * 100) . "% logs with scrapers " . round(($red / $disclogs) * 100) . "% logs without scrapers " . round(($orange / $disclogs) * 100) . "% logs Work-In-Progress scrapers "; |
include_footer_documents(); | include_footer_documents(); |
?> | ?> |
import sys,os | |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | |
import genericScrapers | |
import scrape | |
from bs4 import BeautifulSoup | |
#http://www.doughellmann.com/PyMOTW/abc/ | |
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): | |
#def getTable(self,soup): | |
# return soup.find(id = "cphMain_C001_Col01").table | |
def getColumnCount(self): | |
return 5 | |
def getColumns(self,columns): | |
(id, date, title, description,notes) = columns | |
return (id, date, title, description, notes) | |
if __name__ == '__main__': | |
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) | |
print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) | |
ScraperImplementation().doScrape() | |
import sys,os | |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | |
import genericScrapers | |
import scrape | |
from bs4 import BeautifulSoup | |
#http://www.doughellmann.com/PyMOTW/abc/ | |
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): | |
def getTable(self,soup): | |
return soup.find(id = "cphMain_C001_Col01").table | |
def getColumnCount(self): | |
return 5 | |
def getColumns(self,columns): | |
(id, date, title, description,notes) = columns | |
return (id, date, title, description, notes) | |
if __name__ == '__main__': | |
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) | |
print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) | |
ScraperImplementation().doScrape() | |
no disclog |
import sys,os | |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | |
import genericScrapers | |
import scrape | |
from bs4 import BeautifulSoup | |
#http://www.doughellmann.com/PyMOTW/abc/ | |
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): | |
def getTable(self,soup): | |
return soup.find(summary="This table shows every FOI request to date.") | |
def getColumnCount(self): | |
return 5 | |
def getColumns(self,columns): | |
(id, date, title, description,notes) = columns | |
return (id, date, title, description, notes) | |
if __name__ == '__main__': | |
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) | |
print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) | |
ScraperImplementation().doScrape() | |
import sys,os | |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | |
import genericScrapers | |
import scrape | |
from bs4 import BeautifulSoup | |
#http://www.doughellmann.com/PyMOTW/abc/ | |
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): | |
#def getTable(self,soup): | |
# return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table | |
def getColumnCount(self): | |
return 6 | |
def getColumns(self,columns): | |
(id, date, title, description,deldate, notes) = columns | |
return (id, date, title, description, notes) | |
if __name__ == '__main__': | |
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) | |
print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) | |
ScraperImplementation().doScrape() | |