[submodule "couchdb/couchdb-lucene"] | [submodule "couchdb/couchdb-lucene"] |
path = couchdb/couchdb-lucene | path = couchdb/couchdb-lucene |
url = https://github.com/rnewson/couchdb-lucene.git | url = https://github.com/rnewson/couchdb-lucene.git |
[submodule "couchdb/settee"] | [submodule "couchdb/settee"] |
path = couchdb/settee | path = couchdb/settee |
url = https://github.com/inadarei/settee.git | url = https://github.com/inadarei/settee.git |
[submodule "lib/springy"] | [submodule "lib/springy"] |
path = lib/springy | path = lib/springy |
url = https://github.com/dhotson/springy.git | url = https://github.com/dhotson/springy.git |
[submodule "lib/php-diff"] | [submodule "lib/php-diff"] |
path = lib/php-diff | path = lib/php-diff |
url = https://github.com/chrisboulton/php-diff.git | url = https://github.com/chrisboulton/php-diff.git |
[submodule "javascripts/flot"] | |
path = javascripts/flot | |
url = https://github.com/paradoxxxzero/flot.git | |
<?php | <?php |
include_once('include/common.inc.php'); | include_once('include/common.inc.php'); |
include_header(); | include_header(); |
?> | ?> |
<div class="foundation-header"> | <div class="foundation-header"> |
<h1><a href="about.php">About/FAQ</a></h1> | <h1><a href="about.php">About/FAQ</a></h1> |
<h4 class="subheader">Lorem ipsum.</h4> | <h4 class="subheader">Lorem ipsum.</h4> |
</div> | </div> |
<h2> What is this? </h2> | <h2> What is this? </h2> |
Disclosr is a project to monitor Australian Federal Government agencies | Disclosr is a project to monitor Australian Federal Government agencies |
compliance with their <a href="http://www.oaic.gov.au/publications/other_operational/foi_policy_frequently_asked_questions.html#_Toc291837571">"proactive disclosure requirements"</a>. | compliance with their <a href="http://www.oaic.gov.au/publications/other_operational/foi_policy_frequently_asked_questions.html#_Toc291837571">"proactive disclosure requirements"</a>. |
OGRE (Open Government Realization Evaluation) is a ranking of compliance with these requirements. | OGRE (Open Government Realization Evaluation) is a ranking of compliance with these requirements. |
Prometheus is the agent which polls agency websites to assess compliance. | Prometheus is the agent which polls agency websites to assess compliance. |
<h2> Open everything </h2> | <h2> Open everything </h2> |
all documents released CC-BY 3 AU | All documents released CC-BY 3 AU |
Open source git @ | Open source git @ |
<h2>Organisational Data Sources</h2> | <h2>Organisational Data Sources</h2> |
http://www.comlaw.gov.au/Browse/Results/ByTitle/AdministrativeArrangementsOrders/Current/Ad/0 defines departments | http://www.comlaw.gov.au/Browse/Results/ByTitle/AdministrativeArrangementsOrders/Current/Ad/0 defines departments |
Agencies can be found in the Schedule to an Appropriation Bill (budget), Schedule to FMA Regulations and/or Public Service Act. | Agencies can be found in the Schedule to an Appropriation Bill (budget), Schedule to FMA Regulations and/or Public Service Act.<br> |
http://www.finance.gov.au/publications/flipchart/docs/FMACACFlipchart.pdf summarises these. view-source:https://www.tenders.gov.au/?event=public.advancedsearch.home is great for the suspended/active status | http://www.finance.gov.au/publications/flipchart/docs/FMACACFlipchart.pdf summarises these. view-source:https://www.tenders.gov.au/?event=public.advancedsearch.home is great for the suspended/active status<br> |
When defining the hierachy, this system is designed towards monitoring accountablity. Thus large agencies that have registered their own ABN | When defining the hierachy, this system is designed towards monitoring accountablity. Thus large agencies that have registered their own ABN |
and have their own accountablity mechanisms/website receive a seperate record as a child of their department. | and have their own accountablity mechanisms/website receive a seperate record as a child of their department. |
Some small agencies will choose to simply rely on their parent department's accountablity measures. | Some small agencies will choose to simply rely on their parent department's accountablity measures.<br> |
This flows through to organisation name and other/past names. A department that completely accounts for an agency will list that agency as an other child name. | This flows through to organisation name and other/past names. A department that completely accounts for an agency will list that agency as an other child name. |
As agencies themselves shift between departments, there may be scope for providing time ranges but typically the newest hierarchy will be the one recorded. | As agencies themselves shift between departments, there may be scope for providing time ranges but typically the newest hierarchy will be the one recorded. |
A department/agency name will be the newest active name assigned to that ABN. | A department/agency name will be the newest active name assigned to that ABN.<br> |
ABN information is derived from the ABR. This is the definitive umpire about which former name should be linked to which current name. | ABN information is derived from the ABR. This is the definitive umpire about which former name should be linked to which current name. |
For example "Department of Transport and Regional Services" became "Department of Infrastructure, Transport, Regional Development and Local Government" (same ABN) | For example "Department of Transport and Regional Services" became "Department of Infrastructure, Transport, Regional Development and Local Government" (same ABN) |
however it later split into "Department of Infrastructure and Transport" (same ABN) | however it later split into "Department of Infrastructure and Transport" (same ABN) |
and "Department of Regional Australia, Regional Development and Local Government" (new ABN). | and "Department of Regional Australia, Regional Development and Local Government" (new ABN).<br> |
Statistical information from http://www.apsc.gov.au/stateoftheservice/1011/statsbulletin/section1.html#t2total https://www.apsedii.gov.au/apsedii/CustomQueryx33.shtml | Statistical information from http://www.apsc.gov.au/stateoftheservice/1011/statsbulletin/section1.html#t2total https://www.apsedii.gov.au/apsedii/CustomQueryx33.shtml |
and individual annual reports. | and individual annual reports.<br> |
Webpage Assessment | <h2>Webpage Assessment</h2> |
Much due care has been put into correctly recording disclosure URLs. Typically the "About", "Corporate", "Publications" and "Sitemap" sections are checked at the very least. | Much due care has been put into correctly recording disclosure URLs. Typically the "About", "Corporate", "Publications" and "Sitemap" sections are checked at the very least. |
Occasionally it is nessicary to use a site or Google search. In several rare cases, there is a secret "Disclosure" navigation menu you can find if you find one of the mandatory publishing obligations in that category (seriously). | Occasionally it is nessicary to use a site or Google search. In several rare cases, there is a secret "Disclosure" navigation menu you can find if you find one of the mandatory publishing obligations in that category (seriously).<br> |
Some rules about leniency: | Some rules about leniency:<br> |
An empty FOI disclosure log counts, a page outlining what the FOI Act is does not. | <ul> |
A disclosure log in PDF or Word format counts :( | <li>An empty FOI disclosure log counts, a page outlining what the FOI Act is does not.</li> |
An empty File/Record list counts (although that's very minimalistic that you have no files, electronic or paper) | <li>A disclosure log in PDF or Word format counts :(</li> |
Only a current information publication scheme page counts, not a s.9 FOI Act page or an organisation chart. | <li>An empty File/Record list counts (although that's very minimalistic that you have no files, electronic or paper)</li> |
If there isn't a page easily listing all current and past Annual Reports, the most current one (html, pdf) counts. | <li>Only a current information publication scheme page counts, not a s.9 FOI Act page or an organisation chart.</li> |
Consultancy contracts might not need it's own webpage (if in Annual Report), grants/appointments might not apply to all organisations but Legal Services Expenditure (and all other obligations) does need a webpage. | <li>If there isn't a page easily listing all current and past Annual Reports, the most current one (html, pdf) counts.</li> |
<li>Consultancy contracts might not need it's own webpage (if in Annual Report), grants/appointments might not apply to all organisations but Legal Services Expenditure (and all other obligations) does need a webpage. </li> | |
<h2>Open Government Scoring</h2> | <h2>Open Government Scoring</h2> |
+1 point for every true Has... attribute | +1 point for every true Has... attribute<br> |
-1 point for every false Has... (ie. Has Not) attribute | -1 point for every false Has... (ie. Has Not) attribute</br> |
Don't like this? Make your own score, suggest a better scoring mechanism. | Don't like this? Make your own score, suggest a better scoring mechanism.</br> |
<?php | <?php |
include_footer(); | include_footer(); |
?> | ?> |
<?php | <?php |
include_once("../include/common.inc.php"); | include_once("../include/common.inc.php"); |
function shortName($name) { | |
$name = trim($name); | |
if (strstr($name,"Minister ") || strstr($name,"Treasurer") || strstr($name,"Parliamentary Secretary")) { | |
$badWords = Array ("Assisting the Prime Minister on","Assisting on"," the "," of "," for "," on "," and "," to ",","," ","'","`"); | |
return str_replace($badWords,"",$name); | |
} | |
else { | |
$out = Array(); | |
preg_match_all('/[A-Z]/', $name, $out); | |
return implode("", $out[0]); | |
} | |
} | |
setlocale(LC_CTYPE, 'C'); | setlocale(LC_CTYPE, 'C'); |
$headers = Array("#id", "name", "request_email", "short_name", "notes", "publication_scheme", "home_page", "tag_string"); | $headers = Array("#id", "name", "request_email", "short_name", "notes", "publication_scheme", "home_page", "tag_string"); |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
$tag = Array(); | $tag = Array(); |
try { | try { |
$rows = $db->get_view("app", "byDeptStateName", null, true)->rows; | $rows = $db->get_view("app", "byDeptStateName", null, true)->rows; |
//print_r($rows); | //print_r($rows); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
$tag[$row->value] = phrase_to_tag(dept_to_portfolio($row->key)); | $tag[$row->id] = phrase_to_tag(dept_to_portfolio($row->key)); |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
die(); | |
} | |
$foiEmail = Array(); | |
try { | |
$rows = $db->get_view("app", "foiEmails", null, true)->rows; | |
//print_r($rows); | |
foreach ($rows as $row) { | |
$foiEmail[$row->key] = $row->value; | |
} | |
} catch (SetteeRestClientException $e) { | |
setteErrorHandler($e); | |
die(); | |
} | } |
$fp = fopen('php://output', 'w'); | $fp = fopen('php://output', 'w'); |
if ($fp && $db) { | if ($fp && $db) { |
header('Content-Type: text/csv'); | header('Content-Type: text/csv; charset=utf-8'); |
header('Content-Disposition: attachment; filename="export.' . date("c") . '.csv"'); | header('Content-Disposition: attachment; filename="export.' . date("c") . '.csv"'); |
header('Pragma: no-cache'); | header('Pragma: no-cache'); |
header('Expires: 0'); | header('Expires: 0'); |
fputcsv($fp, $headers); | fputcsv($fp, $headers); |
try { | try { |
$agencies = $db->get_view("app", "byCanonicalName", null, true)->rows; | $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows; |
//print_r($rows); | //print_r($rows); |
foreach ($agencies as $agency) { | foreach ($agencies as $agency) { |
// print_r($agency); | // print_r($agency); |
$row = Array(); | |
$row["#id"] = $agency->value->_id; | if (isset($agency->value->foiEmail) && $agency->value->foiEmail != "null" && !isset($agency->value->status)) { |
$row["name"] = $agency->value->name; | $row = Array(); |
if (isset($agency->value->foi_email)) { | $row["#id"] = $agency->id; |
$row["request_email"] = $agency->value->foiEmail; | $row["name"] = trim($agency->value->name); |
} else { | if (isset($agency->value->foiEmail)) { |
$row["request_email"] = "foi@".GetDomain($agency->value->website); | $row["request_email"] = $agency->value->foiEmail; |
// | } else { |
} | if ($agency->value->orgType == "FMA-DepartmentOfState") { |
if (isset($agency->value->shortName)) { | $row["request_email"] = "foi@" . GetDomain($agency->value->website); |
$row["short_name"] = $agency->value->shortName; | } else { |
} else { | $row["request_email"] = $foiEmail[$agency->value->parentOrg]; |
$out = Array(); | } |
preg_match_all('/[A-Z]/', $agency->value->name, $out); | } |
$row["short_name"] = implode("",$out[0]); | if (isset($agency->value->shortName)) { |
} | $row["short_name"] = $agency->value->shortName; |
$row["notes"] = ""; | } else { |
$row["publication_scheme"] = $agency->value->infoPublicationSchemeURL; | $row["short_name"] = shortName($agency->value->name); |
$row["home_page"] = $agency->value->website; | } |
if ($agency->value->orgType == "FMA-DepartmentOfState") { | $row["notes"] = ""; |
$row["tag_string"] = $tag[$agency->value->_id]; | $row["publication_scheme"] = (isset($agency->value->infoPublicationSchemeURL) ? $agency->value->infoPublicationSchemeURL : ""); |
} else { | $row["home_page"] = (isset($agency->value->website) ? $agency->value->website : ""); |
$row["tag_string"] = $tag[$agency->value->parentOrg];; | if ($agency->value->orgType == "FMA-DepartmentOfState") { |
} | $row["tag_string"] = $tag[$agency->value->_id] . " " . $agency->value->orgType; |
} else { | |
fputcsv($fp, array_values($row)); | $row["tag_string"] = $tag[$agency->value->parentOrg] . " " . $agency->value->orgType; |
} | |
if (isset($agency->value->foiBodies)) { | |
foreach ($agency->value->foiBodies as $foiBody) { | fputcsv($fp, array_values($row)); |
$row['name'] = $foiBody; | |
$row['short_name'] = ""; | if (isset($agency->value->foiBodies)) { |
fputcsv($fp, array_values($row)); | foreach ($agency->value->foiBodies as $foiBody) { |
$row['name'] = iconv("UTF-8", "ASCII//TRANSLIT",$foiBody); | |
$row["short_name"] = shortName($foiBody); | |
fputcsv($fp, array_values($row)); | |
} | |
} | } |
} | } |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
die; | die; |
} | } |
?> | ?> |
<?php | <?php |
include_once("../include/common.inc.php"); | include_once("../include/common.inc.php"); |
setlocale(LC_CTYPE, 'C'); | setlocale(LC_CTYPE, 'C'); |
header('Content-Type: text/csv'); | header('Content-Type: text/csv'); |
header('Content-Disposition: attachment; filename="public_body_categories_en.rb"'); | header('Content-Disposition: attachment; filename="public_body_categories_en.rb"'); |
header('Pragma: no-cache'); | header('Pragma: no-cache'); |
header('Expires: 0'); | header('Expires: 0'); |
echo 'PublicBodyCategories.add(:en, ['.PHP_EOL; | echo 'PublicBodyCategories.add(:en, [' . PHP_EOL; |
echo ' "Portfolios",'.PHP_EOL; | echo ' "Portfolios",' . PHP_EOL; |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
try { | try { |
$rows = $db->get_view("app", "byDeptStateName", null, true)->rows; | $rows = $db->get_view("app", "byDeptStateName", null, true)->rows; |
//print_r($rows); | //print_r($rows); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
echo ' [ "'.phrase_to_tag(dept_to_portfolio($row->key)).'","'. dept_to_portfolio($row->key).'","part of the '.dept_to_portfolio($row->key).' portfolio" ],'.PHP_EOL; | echo ' [ "' . phrase_to_tag(dept_to_portfolio($row->key)) . '","' . dept_to_portfolio($row->key) . '","part of the ' . dept_to_portfolio($row->key) . ' portfolio" ],' . PHP_EOL; |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
echo '])'; | echo '])'; |
?> | ?> |
<?php | |
include_once('include/common.inc.php'); | |
include_header(); | |
$db = $server->get_db('disclosr-agencies'); | |
?> | |
<div class="foundation-header"> | |
<h1><a href="about.php">Charts</a></h1> | |
<h4 class="subheader">Lorem ipsum.</h4> | |
</div> | |
<div id="placeholder" style="width:900px;height:600px;"></div> | |
<script id="source"> | |
window.onload = function() { | |
$(document).ready(function() { | |
var d1 = []; | |
var labels = []; | |
<?php | |
try { | |
$rows = $db->get_view("app", "scoreHas?group=true", null, true)->rows; | |
/*foreach ($rows as $key => $row) { | |
echo " d1.push([$key, {$row->value}]);".PHP_EOL; | |
echo " labels.push('{$row->key}');".PHP_EOL; | |
}*/ | |
$dataValues = Array(); | |
foreach ($rows as $row) { | |
$dataValues[$row->value] = $row->key; | |
} | |
$i = 0; | |
ksort($dataValues); | |
foreach($dataValues as $value => $key) { | |
echo " d1.push([$i, $value]);".PHP_EOL; | |
echo " labels.push('$key');".PHP_EOL; | |
$i++; | |
} | |
} catch (SetteeRestClientException $e) { | |
setteErrorHandler($e); | |
} | |
?> | |
$.plot($("#placeholder"), [ d1], { | |
grid: { hoverable: true }, | |
series: { | |
bars: { show: true, barWidth: 0.6 } | |
}, | |
xaxis: { | |
tickFormatter: function formatter(val, axis) { | |
if (labels[val]) { | |
return(labels[val]); | |
} else { | |
return ""; | |
} | |
}, | |
labelAngle: 90 | |
} | |
}); | |
var previousPoint = null; | |
$("#placeholder").bind("plothover", function (event, pos, item) { | |
if (item) { | |
if (previousPoint != item.datapoint) { | |
previousPoint = item.datapoint; | |
$("#tooltip").remove(); | |
var x = item.datapoint[0], | |
y = item.datapoint[1] - item.datapoint[2]; | |
showTooltip(item.pageX, item.pageY, y ); | |
} | |
} | |
else { | |
$("#tooltip").remove(); | |
previousPoint = null; | |
} | |
}); | |
}); | |
}; | |
function showTooltip(x, y, contents) { | |
$('<div id="tooltip">' + contents + '</div>').css( { | |
position: 'absolute', | |
display: 'none', | |
top: y + 5, | |
left: x + 5, | |
border: '1px solid #fdd', | |
padding: '2px', | |
'background-color': '#fee', | |
opacity: 0.80 | |
}).appendTo("body").fadeIn(200); | |
} | |
</script> | |
<?php | |
include_footer(); | |
?> |
<?php | <?php |
include_once('include/common.inc.php'); | include_once('include/common.inc.php'); |
include_header(); | include_header(); |
function displayValue($key, $value, $mode) { | function displayValue($key, $value, $mode) { |
global $db; | global $db, $schemas; |
if ($mode == "view") { | if ($mode == "view") { |
echo "<tr>"; | |
echo "<td>" . $schemas['agency']["properties"][$key]['x-title'] . "<br><small>" . $schemas['agency']["properties"][$key]['description'] . "</small></td><td>"; | |
if (is_array($value)) { | if (is_array($value)) { |
echo "<tr><td>$key</td><td><ol>"; | echo "<ol>"; |
foreach ($value as $subkey => $subvalue) { | foreach ($value as $subkey => $subvalue) { |
echo "<li>$subvalue</li>"; | if (isset($schemas['agency']["properties"][$key]['x-itemprop'])) { |
echo '<li itemprop="' . $schemas['agency']["properties"][$key]['x-itemprop'] . '">'; | |
} else { | |
echo "<li>"; | |
} | |
echo "$subvalue</li>"; | |
} | } |
echo "</ol></td></tr>"; | echo "</ol></td></tr>"; |
} else { | } else { |
echo "<tr><td>$key</td><td>$value</td></tr>"; | if (isset($schemas['agency']["properties"][$key]['x-itemprop'])) { |
echo '<span itemprop="' . $schemas['agency']["properties"][$key]['x-itemprop'] . '">'; | |
} else { | |
echo "<span>"; | |
} | |
if ((strpos($key, "URL") > 0 || $key == 'website') && $value != "") { | |
echo "<a href='$value'>view</a></span>"; | |
} else { | |
echo "$value</span>"; | |
} | |
} | } |
echo "</td></tr>"; | |
} | } |
if ($mode == "edit") { | if ($mode == "edit") { |
if (is_array($value)) { | if (is_array($value)) { |
echo '<div class="row"> | echo '<div class="row"> |
<div class="seven columns"> | <div class="seven columns"> |
<fieldset> | <fieldset> |
<h5>' . $key . '</h5>'; | <h5>' . $key . '</h5>'; |
foreach ($value as $subkey => $subvalue) { | foreach ($value as $subkey => $subvalue) { |
echo "<label>$subkey</label><input class='input-text' type='text' id='$key$subkey' name='$key" . '[' . $subkey . "]' value='$subvalue'/></tr>"; | echo "<label>$subkey</label><input class='input-text' type='text' id='$key$subkey' name='$key" . '[' . $subkey . "]' value='$subvalue'/></tr>"; |
} | } |
echo "</fieldset> | echo "</fieldset> |
</div> | </div> |
</div>"; | </div>"; |
} else { | } else { |
if (strpos($key, "_") === 0) { | if (strpos($key, "_") === 0) { |
echo"<input type='hidden' id='$key' name='$key' value='$value'/>"; | echo"<input type='hidden' id='$key' name='$key' value='$value'/>"; |
} else if ($key == "parentOrg") { | } else if ($key == "parentOrg") { |
echo "<label for='$key'>$key</label><select id='$key' name='$key'><option value=''> Select... </option>"; | echo "<label for='$key'>$key</label><select id='$key' name='$key'><option value=''> Select... </option>"; |
$rows = $db->get_view("app", "byDeptStateName")->rows; | $rows = $db->get_view("app", "byDeptStateName")->rows; |
//print_r($rows); | //print_r($rows); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
echo "<option value='{$row->value}'".(($row->value == $value) ? "SELECTED":"")." >".str_replace("Department of ","",$row->key)."</option>"; | echo "<option value='{$row->value}'" . (($row->value == $value) ? "SELECTED" : "") . " >" . str_replace("Department of ", "", $row->key) . "</option>"; |
} | } |
echo" </select>"; | echo" </select>"; |
} else if (strpos($key, "has") === 0) { | } else if (strpos($key, "has") === 0) { |
echo "<label for='$key'><input type='checkbox' id='$key' name='$key' ".(($value=='on' || $value=='true')?"checked='$value'":"")."> $key</label>"; | echo "<label for='$key'><input type='checkbox' id='$key' name='$key' " . (($value == 'on' || $value == 'true') ? "checked='$value'" : "") . "> $key</label>"; |
} else { | } else { |
echo "<label>$key</label><input class='input-text' type='text' id='$key' name='$key' value='$value'/>"; | echo "<label>$key</label><input class='input-text' type='text' id='$key' name='$key' value='$value'/>"; |
if ((strpos($key,"URL") > 0 || $key == 'website')&& $value != "") { | if ((strpos($key, "URL") > 0 || $key == 'website') && $value != "") { |
echo "<a href='$value'>view</a>"; | echo "<a href='$value'>view</a>"; |
} | } |
if ($key == 'abn') { | if ($key == 'abn') { |
echo "<a href='http://www.abr.business.gov.au/SearchByAbn.aspx?SearchText=$value'>view abn</a>"; | echo "<a href='http://www.abr.business.gov.au/SearchByAbn.aspx?SearchText=$value'>view abn</a>"; |
} | } |
} | } |
} | } |
} | } |
// | // |
} | } |
function addDefaultFields($row) { | function addDefaultFields($row) { |
global $schemas; | global $schemas; |
$defaultFields = array_keys($schemas['agency']['properties']); | $defaultFields = array_keys($schemas['agency']['properties']); |
foreach ($defaultFields as $defaultField) { | foreach ($defaultFields as $defaultField) { |
if (!isset($row[$defaultField])) { | if (!isset($row[$defaultField])) { |
if ($schemas['agency']['properties'][$defaultField]['type'] == "string") { | if ($schemas['agency']['properties'][$defaultField]['type'] == "string") { |
if (strpos($defaultField, "has") === 0) { | if (strpos($defaultField, "has") === 0) { |
$row[$defaultField] = "false"; | $row[$defaultField] = "false"; |
} else { | } else { |
$row[$defaultField] = ""; | $row[$defaultField] = ""; |
} | } |
} | } |
if ($schemas['agency']['properties'][$defaultField]['type'] == "array") { | if ($schemas['agency']['properties'][$defaultField]['type'] == "array") { |
$row[$defaultField] = Array(""); | $row[$defaultField] = Array(""); |
} | } |
} | } |
} | } |
return $row; | return $row; |
} | } |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
if (isset($_REQUEST['id'])) { | if (isset($_REQUEST['id'])) { |
//get an agency record as json/html, search by name/abn/id | //get an agency record as json/html, search by name/abn/id |
// by name = startkey="Ham"&endkey="Ham\ufff0" | // by name = startkey="Ham"&endkey="Ham\ufff0" |
// edit? | // edit? |
$row = $db->get($_REQUEST['id']); | $row = $db->get($_REQUEST['id']); |
//print_r($row); | //print_r($row); |
if (sizeof($_POST) > 0) { | if (sizeof($_POST) > 0) { |
//print_r($_POST); | //print_r($_POST); |
foreach ($_POST as $postkey => $postvalue) { | foreach ($_POST as $postkey => $postvalue) { |
if ($postvalue == "") { | if ($postvalue == "") { |
unset($_POST[$postkey]); | unset($_POST[$postkey]); |
} | } |
if (is_array($postvalue) && count($postvalue) == 1 && $postvalue[0] == "") { | if (is_array($postvalue) && count($postvalue) == 1 && $postvalue[0] == "") { |
unset($_POST[$postkey]); | unset($_POST[$postkey]); |
} | } |
} | } |
if (isset($_POST['_id']) && $db->get_rev($_POST['_id']) == $_POST['_rev']) { | if (isset($_POST['_id']) && $db->get_rev($_POST['_id']) == $_POST['_rev']) { |
echo "Edited version was latest version, continue saving"; | echo "Edited version was latest version, continue saving"; |
$newdoc = $_POST; | $newdoc = $_POST; |
$newdoc['metadata']['lastModified'] = time(); | $newdoc['metadata']['lastModified'] = time(); |
$row = $db->save($newdoc); | $row = $db->save($newdoc); |
} else { | } else { |
echo "ALERT doc revised by someone else while editing. Document not saved."; | echo "ALERT doc revised by someone else while editing. Document not saved."; |
} | } |
} | } |
$mode = "edit"; | $mode = "view"; |
$row = addDefaultFields(object_to_array($row)); | if ($mode == "edit") { |
$row = addDefaultFields(object_to_array($row)); | |
} else { | |
$row = object_to_array($row); | |
} | |
if ($mode == "view") { | if ($mode == "view") { |
echo '<table width="100%">'; | echo '<div itemscope itemtype ="http://schema.org/GovernmentOrganisation"><table width="100%">'; |
echo '<tr> <td colspan="2"><h3>' . $row['name'] . "</h3></td></tr>"; | echo '<tr> <td colspan="2"><h3>' . $row['name'] . "</h3></td></tr>"; |
echo "<tr><th>Field Name</th><th>Field Value</th></tr>"; | echo "<tr><th>Field Name</th><th>Field Value</th></tr>"; |
} | } |
if ($mode == "edit") { | if ($mode == "edit") { |
?> | ?> |
<input id="addfield" type="button" value="Add Field"/> | <input id="addfield" type="button" value="Add Field"/> |
<script> | <script> |
window.onload = function() { | window.onload = function() { |
$(document).ready(function() { | $(document).ready(function() { |
// put all your jQuery goodness in here. | // put all your jQuery goodness in here. |
// http://charlie.griefer.com/blog/2009/09/17/jquery-dynamically-adding-form-elements/ | // http://charlie.griefer.com/blog/2009/09/17/jquery-dynamically-adding-form-elements/ |
$('#addfield').click(function() { | $('#addfield').click(function() { |
var field_name=window.prompt("fieldname?",""); | var field_name=window.prompt("fieldname?",""); |
if (field_name !="") { | if (field_name !="") { |
$('#submitbutton').before($('<span></span>') | $('#submitbutton').before($('<span></span>') |
.append("<label>"+field_name+"</label>") | .append("<label>"+field_name+"</label>") |
.append("<input class='input-text' type='text' id='"+field_name+"' name='"+field_name+"'/>") | .append("<input class='input-text' type='text' id='"+field_name+"' name='"+field_name+"'/>") |
); | ); |
} | } |
}); | }); |
}); | }); |
}; | }; |
</script> | </script> |
<form id="editform" class="nice" method="post"> | <form id="editform" class="nice" method="post"> |
<?php | <?php |
} | |
foreach ($row as $key => $value) { | |
echo displayValue($key, $value, $mode); | |
} | |
if ($mode == "view") { | |
echo "</table></div>"; | |
} | |
if ($mode == "edit") { | |
echo '<input id="submitbutton" type="submit"/></form>'; | |
} | |
} else { | |
try { | |
/* $rows = $db->get_view("app", "showNamesABNs")->rows; | |
//print_r($rows); | |
foreach ($rows as $row) { | |
// print_r($row); | |
echo '<li><a href="getAgency.php?id=' . $row->key . '">' . | |
(isset($row->value->name) && $row->value->name != "" ? $row->value->name : "NO NAME " . $row->value->abn) | |
. '</a></li>'; | |
} */ | |
$rows = $db->get_view("app", "byName")->rows; | |
//print_r($rows); | |
foreach ($rows as $row) { | |
// print_r($row); | |
echo '<li itemscope itemtype="http://schema.org/GovernmentOrganization"><a href="getAgency.php?id=' . $row->value . '" itemprop="url"><span itemprop="name">' . | |
$row->key | |
. '</span></a></li>'; | |
} | } |
foreach ($row as $key => $value) { | } catch (SetteeRestClientException $e) { |
echo displayValue($key, $value, $mode); | setteErrorHandler($e); |
} | |
if ($mode == "view") { | |
echo "</table>"; | |
} | |
if ($mode == "edit") { | |
echo '<input id="submitbutton" type="submit"/></form>'; | |
} | |
} else { | |
try { | |
/*$rows = $db->get_view("app", "showNamesABNs")->rows; | |
//print_r($rows); | |
foreach ($rows as $row) { | |
// print_r($row); | |
echo '<li><a href="getAgency.php?id=' . $row->key . '">' . | |
(isset($row->value->name) && $row->value->name != "" ? $row->value->name : "NO NAME " . $row->value->abn) | |
. '</a></li>'; | |
}*/ | |
$rows = $db->get_view("app", "byName")->rows; | |
//print_r($rows); | |
foreach ($rows as $row) { | |
// print_r($row); | |
echo '<li><a href="getAgency.php?id=' . $row->value . '">' . | |
$row->key | |
. '</a></li>'; | |
} | |
} catch (SetteeRestClientException $e) { | |
setteErrorHandler($e); | |
} | |
} | } |
include_footer(); | } |
?> | include_footer(); |
?> |
<?php | <?php |
include_once('include/common.inc.php'); | include_once('include/common.inc.php'); |
//include_header(); | //include_header(); |
?> | $format = "html"; |
<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.3.2/jquery.min.js"></script> | if (isset($_REQUEST['format'])) { |
<script src="lib/springy/springy.js"></script> | $format = $_REQUEST['format']; |
<script src="lib/springy/springyui.js"></script> | } |
<script> | |
var graph = new Graph(); | function add_node($id, $label) { |
var nodes = []; | global $format; |
<?php | if ($format == "html") { |
echo "nodes[\"$id\"] = graph.newNode({label: \"$label\"});" . PHP_EOL; | |
} | |
if ($format == "dot" && $label != "") { | |
echo "$id [label=\"$label\"];". PHP_EOL; | |
} | |
} | |
function add_edge($from, $to, $color) { | |
global $format; | |
if ($format == "html") { | |
echo "graph.newEdge(nodes[\"$from\"], nodes['$to'], {color: '$color'});" . PHP_EOL; | |
} | |
if ($format == "dot") { | |
echo "$from -> $to ".($color != ""? "[color=$color]":"").";". PHP_EOL; | |
} | |
} | |
if ($format == "html") { | |
?> | |
<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.3.2/jquery.min.js"></script> | |
<script src="lib/springy/springy.js"></script> | |
<script src="lib/springy/springyui.js"></script> | |
<script> | |
var graph = new Graph(); | |
var nodes = []; | |
<?php | |
} | |
if ($format == "dot") { | |
echo 'digraph g {'. PHP_EOL; | |
} | |
$db = $server->get_db('disclosr-agencies'); | $db = $server->get_db('disclosr-agencies'); |
echo "nodes[\"fedg\"] = graph.newNode({label: \"Federal Government - Commonwealth of Australia\"});" . PHP_EOL; | add_node("fedg","Federal Government - Commonwealth of Australia"); |
try { | try { |
$rows = $db->get_view("app", "byCanonicalName", null, true)->rows; | $rows = $db->get_view("app", "byCanonicalName", null, true)->rows; |
//print_r($rows); | //print_r($rows); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
echo "nodes[\"{$row->id}\"] = graph.newNode({label: \"{$row->key}\"});" . PHP_EOL; | add_node($row->id, $row->key); |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
try { | try { |
$rows = $db->get_view("app", "byDeptStateName", null, true)->rows; | $rows = $db->get_view("app", "byDeptStateName", null, true)->rows; |
//print_r($rows); | //print_r($rows); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
echo "graph.newEdge(nodes[\"fedg\"], nodes['{$row->value}'], {color: '#00A0B0'});" . PHP_EOL; | add_edge("fedg", $row->value, 'yellow'); |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
try { | try { |
$rows = $db->get_view("app", "parentOrgs", null, true)->rows; | $rows = $db->get_view("app", "parentOrgs", null, true)->rows; |
// print_r($rows); | // print_r($rows); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
echo "graph.newEdge(nodes[\"{$row->key}\"], nodes['{$row->value}'], {color: '#FFA0B0'});" . PHP_EOL; | add_edge($row->key, $row->value, 'blue'); |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
?> | if ($format == "html") { |
window.onload = function() { | ?> |
$(document).ready(function() { | window.onload = function() { |
var springy = $('#springydemo').springy({ | $(document).ready(function() { |
graph: graph | var springy = $('#springydemo').springy({ |
graph: graph | |
}); | |
}); | }); |
}); | }; |
}; | </script> |
</script> | |
<canvas id="springydemo" width="1260" height="680" /> | <canvas id="springydemo" width="1260" height="680" /> |
<?php | <?php |
} | |
if ($format == "dot") { | |
echo "}"; | |
} | |
//include_footer(); | //include_footer(); |
?> | ?> |
<?php | <?php |
date_default_timezone_set("Australia/Sydney"); | date_default_timezone_set("Australia/Sydney"); |
$basePath = ""; | $basePath = ""; |
if (strstr($_SERVER['PHP_SELF'], "alaveteli/") | if (strstr($_SERVER['PHP_SELF'], "alaveteli/") |
|| strstr($_SERVER['PHP_SELF'], "admin/") | || strstr($_SERVER['PHP_SELF'], "admin/") |
|| strstr($_SERVER['PHP_SELF'], "lib/") | || strstr($_SERVER['PHP_SELF'], "lib/") |
|| strstr($_SERVER['PHP_SELF'], "include/")) | || strstr($_SERVER['PHP_SELF'], "include/")) |
$basePath = "../"; | $basePath = "../"; |
include_once ('couchdb.inc.php'); | include_once ('couchdb.inc.php'); |
include_once ('template.inc.php'); | include_once ('template.inc.php'); |
# Convert a stdClass to an Array. http://www.php.net/manual/en/language.types.object.php#102735 | # Convert a stdClass to an Array. http://www.php.net/manual/en/language.types.object.php#102735 |
function object_to_array(stdClass $Class) { | function object_to_array(stdClass $Class) { |
# Typecast to (array) automatically converts stdClass -> array. | # Typecast to (array) automatically converts stdClass -> array. |
$Class = (array) $Class; | $Class = (array) $Class; |
# Iterate through the former properties looking for any stdClass properties. | # Iterate through the former properties looking for any stdClass properties. |
# Recursively apply (array). | # Recursively apply (array). |
foreach ($Class as $key => $value) { | foreach ($Class as $key => $value) { |
if (is_object($value) && get_class($value) === 'stdClass') { | if (is_object($value) && get_class($value) === 'stdClass') { |
$Class[$key] = object_to_array($value); | $Class[$key] = object_to_array($value); |
} | } |
} | } |
return $Class; | return $Class; |
} | } |
# Convert an Array to stdClass. http://www.php.net/manual/en/language.types.object.php#102735 | # Convert an Array to stdClass. http://www.php.net/manual/en/language.types.object.php#102735 |
function array_to_object(array $array) { | function array_to_object(array $array) { |
# Iterate through our array looking for array values. | # Iterate through our array looking for array values. |
# If found recurvisely call itself. | # If found recurvisely call itself. |
foreach ($array as $key => $value) { | foreach ($array as $key => $value) { |
if (is_array($value)) { | if (is_array($value)) { |
$array[$key] = array_to_object($value); | $array[$key] = array_to_object($value); |
} | } |
} | } |
# Typecast to (object) will automatically convert array -> stdClass | # Typecast to (object) will automatically convert array -> stdClass |
return (object) $array; | return (object) $array; |
} | } |
function dept_to_portfolio($deptName) { | function dept_to_portfolio($deptName) { |
return trim(str_replace("Department of", "", str_replace("Department of the", "Department of", $deptName))); | return trim(str_replace("Department of", "", str_replace("Department of the", "Department of", $deptName))); |
} | } |
function phrase_to_tag ($phrase) { | function phrase_to_tag ($phrase) { |
return str_replace(" ","_",str_replace("'","",str_replace(",","",strtolower($phrase)))); | return str_replace(" ","_",str_replace("'","",str_replace(",","",strtolower($phrase)))); |
} | } |
function GetDomain($url) | function GetDomain($url) |
{ | { |
$nowww = ereg_replace('www\.','',$url); | $nowww = ereg_replace('www\.','',$url); |
$domain = parse_url($nowww); | $domain = parse_url($nowww); |
if(!empty($domain["host"])) | if(!empty($domain["host"])) |
{ | { |
return $domain["host"]; | return $domain["host"]; |
} else | } else |
{ | { |
return $domain["path"]; | return $domain["path"]; |
} | } |
} | } |
?> | |
<?php | <?php |
include $basePath."schemas/schemas.inc.php"; | include $basePath . "schemas/schemas.inc.php"; |
require ($basePath.'couchdb/settee/src/settee.php'); | require ($basePath . 'couchdb/settee/src/settee.php'); |
function createAgencyDesignDoc() { | function createAgencyDesignDoc() { |
global $db; | global $db; |
$obj = new stdClass(); | $obj = new stdClass(); |
$obj->_id = "_design/" . urlencode("app"); | $obj->_id = "_design/" . urlencode("app"); |
$obj->language = "javascript"; | $obj->language = "javascript"; |
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; | $obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; |
$obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };"; | $obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };"; |
$obj->views->byCanonicalName->map = "function(doc) { | $obj->views->byCanonicalName->map = "function(doc) { |
if (doc.parentOrg || doc.orgType == 'FMA-DepartmentOfState') { | if (doc.parentOrg || doc.orgType == 'FMA-DepartmentOfState') { |
emit(doc.name, doc._id); | emit(doc.name, doc); |
} | } |
};"; | };"; |
$obj->views->byDeptStateName->map = "function(doc) { | $obj->views->byDeptStateName->map = "function(doc) { |
if (doc.orgType == 'FMA-DepartmentOfState') { | if (doc.orgType == 'FMA-DepartmentOfState') { |
emit(doc.name, doc._id); | emit(doc.name, doc._id); |
} | } |
};"; | };"; |
$obj->views->parentOrgs->map = "function(doc) { | $obj->views->parentOrgs->map = "function(doc) { |
if (doc.parentOrg) { | if (doc.parentOrg) { |
emit(doc._id, doc.parentOrg); | emit(doc._id, doc.parentOrg); |
} | } |
};"; | };"; |
$obj->views->byName->map = "function(doc) { | $obj->views->byName->map = "function(doc) { |
emit(doc.name, doc._id); | emit(doc.name, doc._id); |
for (name in doc.otherNames) { | for (name in doc.otherNames) { |
if (doc.otherNames[name] != '' && doc.otherNames[name] != doc.name) { | if (doc.otherNames[name] != '' && doc.otherNames[name] != doc.name) { |
emit(doc.otherNames[name], doc._id); | emit(doc.otherNames[name], doc._id); |
} | } |
} | } |
};"; | };"; |
$obj->views->foiEmails->map = "function(doc) { | |
emit(doc._id, doc.foiEmail); | |
};"; | |
$obj->views->byLastModified->map = "function(doc) { emit(doc.metadata.lastModified, doc); }"; | $obj->views->byLastModified->map = "function(doc) { emit(doc.metadata.lastModified, doc); }"; |
$obj->views->getActive->map = 'function(doc) { if (doc.status == "active") { emit(doc._id, doc); } };'; | $obj->views->getActive->map = 'function(doc) { if (doc.status == "active") { emit(doc._id, doc); } };'; |
$obj->views->getSuspended->map = 'function(doc) { if (doc.status == "suspended") { emit(doc._id, doc); } };'; | $obj->views->getSuspended->map = 'function(doc) { if (doc.status == "suspended") { emit(doc._id, doc); } };'; |
$obj->views->getScrapeRequired->map = "function(doc) { emit(doc.abn, doc); };"; | $obj->views->getScrapeRequired->map = "function(doc) { |
var lastScrape = Date.parse(doc.metadata.lastScraped); | |
var today = new Date(); | |
if (!lastScrape || lastScrape.getTime() + 1000 != today.getTime()) { | |
emit(doc._id, doc); | |
} | |
};"; | |
$obj->views->showNamesABNs->map = "function(doc) { emit(doc._id, {name: doc.name, abn: doc.abn}); };"; | $obj->views->showNamesABNs->map = "function(doc) { emit(doc._id, {name: doc.name, abn: doc.abn}); };"; |
$obj->views->getConflicts->map = "function(doc) { | $obj->views->getConflicts->map = "function(doc) { |
if (doc._conflicts) { | if (doc._conflicts) { |
emit(null, [doc._rev].concat(doc._conflicts)); | emit(null, [doc._rev].concat(doc._conflicts)); |
} | } |
}"; | }"; |
// http://stackoverflow.com/questions/646628/javascript-startswith | // http://stackoverflow.com/questions/646628/javascript-startswith |
$obj->views->scoreHas->map = 'if(!String.prototype.startsWith){ | |
String.prototype.startsWith = function (str) { | |
return !this.indexOf(str); | |
} | |
} | |
if(!String.prototype.endsWith){ | |
String.prototype.endsWith = function(suffix) { | |
return this.indexOf(suffix, this.length - suffix.length) !== -1; | |
}; | |
} | |
function(doc) { | |
if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") { | |
for(var propName in doc) { | |
if(typeof(doc[propName]) != "undefined" && (propName.startsWith("has") || propName.endsWith("URL"))) { | |
emit(propName, 1); | |
} | |
} | |
emit("total", 1); | |
} | |
}'; | |
$obj->views->score->map = 'if(!String.prototype.startsWith){ | $obj->views->score->map = 'if(!String.prototype.startsWith){ |
String.prototype.startsWith = function (str) { | String.prototype.startsWith = function (str) { |
return !this.indexOf(str); | return !this.indexOf(str); |
} | } |
} | } |
function(doc) { | function(doc) { |
count = 0; | count = 0; |
if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") { | if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") { |
for(var propName in doc) { | for(var propName in doc) { |
if(typeof(doc[propName]) != "undefined" && propName.startsWith("l")) { | if(typeof(doc[propName]) != "undefined" && propName.startsWith("l")) { |
count++ | count++ |
} | } |
} | } |
emit(count+doc._id, {id:doc._id, name: doc.name, score:count}); | emit(count+doc._id, {id:doc._id, name: doc.name, score:count}); |
} | } |
}'; | }'; |
// allow safe updates (even if slightly slower due to extra: rev-detection check). | // allow safe updates (even if slightly slower due to extra: rev-detection check). |
return $db->save($obj, true); | return $db->save($obj, true); |
} | } |
if (php_uname('n') == "vanille") { | |
if( php_uname('n') == "vanille") { | $server = new SetteeServer('http://192.168.178.21:5984'); |
} else | |
if (php_uname('n') == "KYUUBEY") { | |
$server = new SetteeServer('http://192.168.178.21:5984'); | $server = new SetteeServer('http://192.168.1.148:5984'); |
} else | |
if( php_uname('n') == "KYUUBEY") { | |
$server = new SetteeServer('http://192.168.1.148:5984'); | |
} else { | } else { |
$server = new SetteeServer('http://127.0.0.1:5984'); | $server = new SetteeServer('http://127.0.0.1:5984'); |
} | } |
function setteErrorHandler($e) { | function setteErrorHandler($e) { |
echo $e->getMessage() . "<br>" . PHP_EOL; | echo $e->getMessage() . "<br>" . PHP_EOL; |
} | } |
?> | |
<?php | <?php |
function include_header() { | function include_header() { |
global $basePath; | global $basePath; |
?> | ?> |
<!DOCTYPE html> | <!DOCTYPE html> |
<!-- paulirish.com/2008/conditional-stylesheets-vs-css-hacks-answer-neither/ --> | <!-- paulirish.com/2008/conditional-stylesheets-vs-css-hacks-answer-neither/ --> |
<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]--> | <!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]--> |
<!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8" lang="en"> <![endif]--> | <!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8" lang="en"> <![endif]--> |
<!--[if IE 8]> <html class="no-js lt-ie9" lang="en"> <![endif]--> | <!--[if IE 8]> <html class="no-js lt-ie9" lang="en"> <![endif]--> |
<!--[if gt IE 8]><!--> <html lang="en"> <!--<![endif]--> | <!--[if gt IE 8]><!--> <html lang="en"> <!--<![endif]--> |
<head> | <head> |
<meta charset="utf-8" /> | <meta charset="utf-8" /> |
<!-- Set the viewport width to device width for mobile --> | <!-- Set the viewport width to device width for mobile --> |
<meta name="viewport" content="width=device-width" /> | <meta name="viewport" content="width=device-width" /> |
<title>Disclosr</title> | <title>Disclosr</title> |
<!-- Included CSS Files --> | <!-- Included CSS Files --> |
<link rel="stylesheet" href="<?php echo $basePath ?>stylesheets/foundation.css"> | <link rel="stylesheet" href="<?php echo $basePath ?>stylesheets/foundation.css"> |
<link rel="stylesheet" href="<?php echo $basePath ?>stylesheets/app.css"> | <link rel="stylesheet" href="<?php echo $basePath ?>stylesheets/app.css"> |
<!--[if lt IE 9]> | <!--[if lt IE 9]> |
<link rel="stylesheet" href="<?php echo $basePath ?>stylesheets/ie.css"> | <link rel="stylesheet" href="<?php echo $basePath ?>stylesheets/ie.css"> |
<![endif]--> | <![endif]--> |
<!-- IE Fix for HTML5 Tags --> | <!-- IE Fix for HTML5 Tags --> |
<!--[if lt IE 9]> | <!--[if lt IE 9]> |
<script src="http://html5shiv.googlecode.com/svn/trunk/html5.js"></script> | <script src="http://html5shiv.googlecode.com/svn/trunk/html5.js"></script> |
<![endif]--> | <![endif]--> |
</head> | </head> |
<body> | <body> |
<!-- navBar --> | <!-- navBar --> |
<div id="navbar" class="container"> | <div id="navbar" class="container"> |
<div class="row"> | <div class="row"> |
<div class="four columns"> | <div class="four columns"> |
<h1><a href="/">Disclosr</a></h1> | <h1><a href="/">Disclosr</a></h1> |
</div> | </div> |
<div class="eight columns hide-on-phones"> | <div class="eight columns hide-on-phones"> |
<strong class="right"> | <strong class="right"> |
<a href="getAgency.php">Agencies</a> | <a href="getAgency.php">Agencies</a> |
<a href="about.php">About/FAQ</a> | <a href="about.php">About/FAQ</a> |
</strong> | </strong> |
</div> | </div> |
</div> | </div> |
</div> | </div> |
<!-- /navBar --> | <!-- /navBar --> |
<!-- container --> | <!-- container --> |
<div class="container"> | <div class="container"> |
<?php } | <?php } |
function include_footer() { | function include_footer() { |
global $basePath; | global $basePath; |
?> | ?> |
</div> | </div> |
<!-- container --> | <!-- container --> |
<!-- Included JS Files --> | <!-- Included JS Files --> |
<script src="<?php echo $basePath; ?>javascripts/foundation.js"></script> | <script src="<?php echo $basePath; ?>javascripts/foundation.js"></script> |
<script src="<?php echo $basePath; ?>javascripts/app.js"></script> | <script src="<?php echo $basePath; ?>javascripts/app.js"></script> |
<script src="http://code.jquery.com/jquery-1.7.1.min.js"></script> | <script src="http://code.jquery.com/jquery-1.7.1.min.js"></script> |
<!--<script language="javascript" type="text/javascript" src="javascripts/jquery.js"></script>--> | |
<script language="javascript" type="text/javascript" src="javascripts/flot/jquery.flot.js"></script> | |
</body> | </body> |
</html> | </html> |
<?php } | <?php } |
?> | |
/* Foundation v2.1.4 http://foundation.zurb.com */ | /* Foundation v2.1.4 http://foundation.zurb.com */ |
$(document).ready(function () { | $(document).ready(function () { |
/* Use this js doc for all application specific JS */ | /* Use this js doc for all application specific JS */ |
/* TABS --------------------------------- */ | /* TABS --------------------------------- */ |
/* Remove if you don't need :) */ | /* Remove if you don't need :) */ |
function activateTab($tab) { | function activateTab($tab) { |
var $activeTab = $tab.closest('dl').find('a.active'), | var $activeTab = $tab.closest('dl').find('a.active'), |
contentLocation = $tab.attr("href") + 'Tab'; | contentLocation = $tab.attr("href") + 'Tab'; |
//Make Tab Active | //Make Tab Active |
$activeTab.removeClass('active'); | $activeTab.removeClass('active'); |
$tab.addClass('active'); | $tab.addClass('active'); |
//Show Tab Content | //Show Tab Content |
$(contentLocation).closest('.tabs-content').children('li').hide(); | $(contentLocation).closest('.tabs-content').children('li').hide(); |
$(contentLocation).show(); | $(contentLocation).show(); |
} | } |
$('dl.tabs').each(function () { | $('dl.tabs').each(function () { |
//Get all tabs | //Get all tabs |
var tabs = $(this).children('dd').children('a'); | var tabs = $(this).children('dd').children('a'); |
tabs.click(function (e) { | tabs.click(function (e) { |
activateTab($(this)); | activateTab($(this)); |
}); | }); |
}); | }); |
if (window.location.hash) { | if (window.location.hash) { |
activateTab($('a[href="' + window.location.hash + '"]')); | activateTab($('a[href="' + window.location.hash + '"]')); |
} | } |
/* ALERT BOXES ------------ */ | /* ALERT BOXES ------------ */ |
$(".alert-box").delegate("a.close", "click", function(event) { | $(".alert-box").delegate("a.close", "click", function(event) { |
event.preventDefault(); | event.preventDefault(); |
$(this).closest(".alert-box").fadeOut(function(event){ | $(this).closest(".alert-box").fadeOut(function(event){ |
$(this).remove(); | $(this).remove(); |
}); | }); |
}); | }); |
/* PLACEHOLDER FOR FORMS ------------- */ | /* PLACEHOLDER FOR FORMS ------------- */ |
/* Remove this and jquery.placeholder.min.js if you don't need :) */ | /* Remove this and jquery.placeholder.min.js if you don't need :) */ |
$('input, textarea').placeholder(); | //$('input, textarea').placeholder(); |
/* UNCOMMENT THE LINE YOU WANT BELOW IF YOU WANT IE6/7/8 SUPPORT AND ARE USING .block-grids */ | /* UNCOMMENT THE LINE YOU WANT BELOW IF YOU WANT IE6/7/8 SUPPORT AND ARE USING .block-grids */ |
// $('.block-grid.two-up>li:nth-child(2n+1)').css({clear: 'left'}); | // $('.block-grid.two-up>li:nth-child(2n+1)').css({clear: 'left'}); |
// $('.block-grid.three-up>li:nth-child(3n+1)').css({clear: 'left'}); | // $('.block-grid.three-up>li:nth-child(3n+1)').css({clear: 'left'}); |
// $('.block-grid.four-up>li:nth-child(4n+1)').css({clear: 'left'}); | // $('.block-grid.four-up>li:nth-child(4n+1)').css({clear: 'left'}); |
// $('.block-grid.five-up>li:nth-child(5n+1)').css({clear: 'left'}); | // $('.block-grid.five-up>li:nth-child(5n+1)').css({clear: 'left'}); |
/* DROPDOWN NAV ------------- */ | /* DROPDOWN NAV ------------- */ |
var currentFoundationDropdown = null; | var currentFoundationDropdown = null; |
$('.nav-bar li a, .nav-bar li a:after').each(function() { | $('.nav-bar li a, .nav-bar li a:after').each(function() { |
$(this).data('clicks', 0); | $(this).data('clicks', 0); |
}); | }); |
$('.nav-bar li a, .nav-bar li a:after').live('click', function(e) { | $('.nav-bar li a, .nav-bar li a:after').live('click', function(e) { |
e.preventDefault(); | e.preventDefault(); |
if (currentFoundationDropdown !== $(this).index() || currentFoundationDropdown === null) { | if (currentFoundationDropdown !== $(this).index() || currentFoundationDropdown === null) { |
$(this).data('clicks', 0); | $(this).data('clicks', 0); |
currentFoundationDropdown = $(this).index(); | currentFoundationDropdown = $(this).index(); |
} | } |
$(this).data('clicks', ($(this).data('clicks') + 1)); | $(this).data('clicks', ($(this).data('clicks') + 1)); |
var f = $(this).siblings('.flyout'); | var f = $(this).siblings('.flyout'); |
if (!f.is(':visible') && $(this).parent('.has-flyout').length > 1) { | if (!f.is(':visible') && $(this).parent('.has-flyout').length > 1) { |
$('.nav-bar li .flyout').hide(); | $('.nav-bar li .flyout').hide(); |
f.show(); | f.show(); |
} else if (($(this).data('clicks') > 1) || ($(this).parent('.has-flyout').length < 1)) { | } else if (($(this).data('clicks') > 1) || ($(this).parent('.has-flyout').length < 1)) { |
window.location = $(this).attr('href'); | window.location = $(this).attr('href'); |
} | } |
}); | }); |
$('.nav-bar').live('click', function(e) { | $('.nav-bar').live('click', function(e) { |
e.stopPropagation(); | e.stopPropagation(); |
if ($(e.target).parents().is('.flyout') || $(e.target).is('.flyout')) { | if ($(e.target).parents().is('.flyout') || $(e.target).is('.flyout')) { |
e.preventDefault(); | e.preventDefault(); |
} | } |
}); | }); |
// $('body').bind('touchend', function(e) { | // $('body').bind('touchend', function(e) { |
// if (!$(e.target).parents().is('.nav-bar') || !$(e.target).is('.nav-bar')) { | // if (!$(e.target).parents().is('.nav-bar') || !$(e.target).is('.nav-bar')) { |
// $('.nav-bar li .flyout').is(':visible').hide(); | // $('.nav-bar li .flyout').is(':visible').hide(); |
// } | // } |
// }); | // }); |
/* DISABLED BUTTONS ------------- */ | /* DISABLED BUTTONS ------------- */ |
/* Gives elements with a class of 'disabled' a return: false; */ | /* Gives elements with a class of 'disabled' a return: false; */ |
}); | }); |
<?php | <?php |
$schemas['agency'] = Array( | $schemas['agency'] = Array( |
"description" => "Representation of government agency and online transparency measures", | "description" => "Representation of government agency and online transparency measures", |
"type" => "object", | "type" => "object", |
"properties" => Array( | "properties" => Array( |
"name" => Array("type" => "string", "required" => true, "x-title" => "Name", "description" => "Name, most recent and broadest"), | "name" => Array("type" => "string", "required" => true, "x-itemprop" => "name", "x-title" => "Name", "description" => "Name, most recent and broadest"), |
"shortName" => Array("type" => "string", "required" => false, "x-title" => "Short Name", "description" => "Name shortened, usually to an acronym"), | "shortName" => Array("type" => "string", "required" => false, "x-title" => "Short Name", "description" => "Name shortened, usually to an acronym"), |
"foiEmail" => Array("type" => "string", "required" => false, "x-title" => "FOI Contact Email", "description" => "FOI contact email if not foi@"), | "foiEmail" => Array("type" => "string", "required" => false, "x-title" => "FOI Contact Email", "description" => "FOI contact email if not foi@"), |
"sameAs" => Array("type" => "array", "required" => false, "x-itemprop"=>"http://www.w3.org/2002/07/owl#sameAs","x-title" => "Same As", "description" => "Same as other URLs/URIs for this entity", | |
"items" => Array("type" => "string")), | |
"otherNames" => Array("type" => "array", "required" => true, "x-title" => "Past/Other Names", "description" => "Other names for organisation", | "otherNames" => Array("type" => "array", "required" => true, "x-title" => "Past/Other Names", "description" => "Other names for organisation", |
"items" => Array("type" => "string")), | "items" => Array("type" => "string")), |
"foiBodies" => Array("type" => "array", "required" => true, "x-title" => "FOI Bodies", "description" => "Organisational units within this agency that are subject to FOI Act but are not autonomous", | "foiBodies" => Array("type" => "array", "required" => true, "x-title" => "FOI Bodies","x-itemprop"=>"members", "description" => "Organisational units within this agency that are subject to FOI Act but are not autonomous", |
"items" => Array("type" => "string")), | "items" => Array("type" => "string")), |
"orgType" => Array("type" => "string", "required" => true, "x-title" => "Organisation Type", "description" => "Org type based on legal formation via FMA/CAC legislation etc."), | "orgType" => Array("type" => "string", "required" => true, "x-title" => "Organisation Type", "description" => "Org type based on legal formation via FMA/CAC legislation etc."), |
"parentOrg" => Array("type" => "string", "required" => true, "x-title" => "Parent Organisation", "description" => "Parent organisation, usually a department of state"), | "parentOrg" => Array("type" => "string", "required" => true, "x-title" => "Parent Organisation", "description" => "Parent organisation, usually a department of state"), |
"website" => Array("type" => "string", "required" => true, "x-title" => "Website", "description" => "Website URL"), | "website" => Array("type" => "string", "required" => true, "x-title" => "Website", "x-itemprop" => "url", "description" => "Website URL"), |
"abn" => Array("type" => "string", "required" => true, "x-title" => "Australian Business Number", "description" => "ABN from business register"), | "abn" => Array("type" => "string", "required" => true, "x-title" => "Australian Business Number", "description" => "ABN from business register"), |
"contractListURL" => Array("type" => "string", "required" => true, "x-title" => "Contract Listing", "description" => "Departmental and agency contracts, mandated by the Senate @ http://www.aph.gov.au/senate/pubs/standing_orders/d05.htm"), | "contractListURL" => Array("type" => "string", "required" => true, "x-title" => "Contract Listing", "description" => "Departmental and agency contracts, <a href='http://www.aph.gov.au/senate/pubs/standing_orders/d05.htm'>mandated by the Senate</a>"), |
"grantsReportingURL" => Array("type" => "string", "required" => true, "x-title" => "Grants Awarded", | "grantsReportingURL" => Array("type" => "string", "required" => true, "x-title" => "Grants Awarded", |
"description" => "Departmental and agency grants mandated by the Senate @ http://www.aph.gov.au/senate/pubs/standing_orders/d05.htm and Commonwealth grants guidelines http://www.finance.gov.au/publications/fmg-series/23-commonwealth-grant-guidelines.html"), | "description" => "Departmental and agency grants <a href='http://www.aph.gov.au/senate/pubs/standing_orders/d05.htm'>mandated by the Senate</a> and <a href='http://www.finance.gov.au/publications/fmg-series/23-commonwealth-grant-guidelines.html'>Commonwealth grants guidelines</a> "), |
"annualReportURL" => Array("type" => "string", "required" => true, "x-title" => "Annual Report(s)", "description" => ""), | "annualReportURL" => Array("type" => "string", "required" => true, "x-title" => "Annual Report(s)", "description" => ""), |
"consultanciesURL" => Array("type" => "string", "required" => true, "x-title" => "Consultants Hired", "description" => ""), | "consultanciesURL" => Array("type" => "string", "required" => true, "x-title" => "Consultants Hired", "description" => ""), |
"legalExpenditureURL" => Array("type" => "string", "required" => true, "x-title" => "Legal Services Expenditure", "description" => "Legal Services Expenditure mandated by Legal Services Directions 2005"), | "legalExpenditureURL" => Array("type" => "string", "required" => true, "x-title" => "Legal Services Expenditure", "description" => "Legal Services Expenditure mandated by Legal Services Directions 2005"), |
"recordsListURL" => Array("type" => "string", "required" => true, "x-title" => "Files/Records Held", "description" => "Indexed lists of departmental and agency files, mandated by the Senate @ http://www.aph.gov.au/senate/pubs/standing_orders/d05.htm"), | "recordsListURL" => Array("type" => "string", "required" => true, "x-title" => "Files/Records Held", "description" => "Indexed lists of departmental and agency files, <a href='http://www.aph.gov.au/senate/pubs/standing_orders/d05.htm'>mandated by the Senate</a>"), |
"FOIDocumentsURL" => Array("type" => "string", "required" => true, "x-title" => "FOI Documents Released", "description" => ""), | "FOIDocumentsURL" => Array("type" => "string", "required" => true, "x-title" => "FOI Documents Released", "description" => ""), |
"infoPublicationSchemeURL" => Array("type" => "string", "required" => true, "x-title" => "Information Publication Scheme", "description" => ""), | "infoPublicationSchemeURL" => Array("type" => "string", "required" => true, "x-title" => "Information Publication Scheme", "description" => ""), |
"appointmentsURL" => Array("type" => "string", "required" => true, "x-title" => "Agency Appointments/Boards", "description" => "Departmental and agency appointments and vacancies , mandated by the Senate @ http://www.aph.gov.au/senate/pubs/standing_orders/d05.htm"), | "appointmentsURL" => Array("type" => "string", "required" => true, "x-title" => "Agency Appointments/Boards", "description" => "Departmental and agency appointments and vacancies , <a href='http://www.aph.gov.au/senate/pubs/standing_orders/d05.htm'>mandated by the Senate</a>"), |
"advertisingURL" => Array("type" => "string", "required" => true, "x-title" => "Approved Advertising Campaigns", "description" => " Agency advertising and public information projects, mandated by the Senate @ http://www.aph.gov.au/senate/pubs/standing_orders/d05.htm "), | "advertisingURL" => Array("type" => "string", "required" => true, "x-title" => "Approved Advertising Campaigns", "description" => " Agency advertising and public information projects, <a href='http://www.aph.gov.au/senate/pubs/standing_orders/d05.htm'>mandated by the Senate</a> "), |
"hasRSS" => Array("type" => "string", "required" => true, "x-title" => "Has RSS", "description" => ""), | "hasRSS" => Array("type" => "string", "required" => true, "x-title" => "Has RSS", "description" => ""), |
"hasMailingList" => Array("type" => "string", "required" => true, "x-title" => "Has Mailing List", "description" => ""), | "hasMailingList" => Array("type" => "string", "required" => true, "x-title" => "Has Mailing List", "description" => ""), |
"hasTwitter" => Array("type" => "string", "required" => true, "x-title" => "Has Twitter", "description" => ""), | "hasTwitter" => Array("type" => "string", "required" => true, "x-title" => "Has Twitter", "description" => ""), |
"hasFacebook" => Array("type" => "string", "required" => true, "x-title" => "Has Facebook", "description" => ""), | "hasFacebook" => Array("type" => "string", "required" => true, "x-title" => "Has Facebook", "description" => ""), |
"hasYouTube" => Array("type" => "string", "required" => true, "x-title" => "Has YouTube", "description" => ""), | "hasYouTube" => Array("type" => "string", "required" => true, "x-title" => "Has YouTube", "description" => ""), |
"hasFlickr" => Array("type" => "string", "required" => true, "x-title" => "Has Flickr", "description" => ""), | |
"hasFlickr" => Array("type" => "string", "required" => true, "x-title" => "Has YouTube", "description" => ""), | |
"hasCCBY" => Array("type" => "string", "required" => true, "x-title" => "Has CC-BY", "description" => "Has any page licenced Creative Commons - Attribution"), | "hasCCBY" => Array("type" => "string", "required" => true, "x-title" => "Has CC-BY", "description" => "Has any page licenced Creative Commons - Attribution"), |
), | ), |
/* "org":{"type":"object", | /* "org":{"type":"object", |
"properties":{ | "properties":{ |
"organizationName":{"type":"string"}, | "organizationName":{"type":"string"}, |
"organizationUnit":{"type":"string"}}, | "organizationUnit":{"type":"string"}}, |
} | } |
} */ | } */ |
); | ); |
?> | ?> |
#http://packages.python.org/CouchDB/client.html | |
import couchdb | |
import urllib2 | |
from BeautifulSoup import BeautifulSoup | |
import re | |
#http://diveintopython.org/http_web_services/etags.html | |
class NotModifiedHandler(urllib2.BaseHandler): | |
def http_error_304(self, req, fp, code, message, headers): | |
addinfourl = urllib2.addinfourl(fp, headers, req.get_full_url()) | |
addinfourl.code = code | |
return addinfourl | |
def scrapeAndStore(URL, depth, agency): | |
URL = "http://www.google.com" | |
req = urllib2.Request(URL) | |
etag = 'y' | |
last_modified = 'y' | |
#if there is a previous version sotred in couchdb, load caching helper tags | |
if etag: | |
req.add_header("If-None-Match", etag) | |
if last_modified: | |
req.add_header("If-Modified-Since", last_modified) | |
opener = urllib2.build_opener(NotModifiedHandler()) | |
url_handle = opener.open(req) | |
headers = url_handle.info() # the addinfourls have the .info() too | |
etag = headers.getheader("ETag") | |
last_modified = headers.getheader("Last-Modified") | |
web_server = headers.getheader("Server") | |
file_size = headers.getheader("Content-Length") | |
mime_type = headers.getheader("Content-Type") | |
if hasattr(url_handle, 'code'): | |
if url_handle.code == 304: | |
print "the web page has not been modified" | |
else: | |
#do scraping | |
html = url_handle.read() | |
# http://www.crummy.com/software/BeautifulSoup/documentation.html | |
soup = BeautifulSoup(html) | |
links = soup.findAll('a') # soup.findAll('a', id=re.compile("^p-")) | |
for link in links: | |
print link['href'] | |
#for each unique link | |
#if html mimetype | |
# go down X levels, | |
# diff with last stored attachment, store in document | |
#if not | |
# remember to save parentURL and title (link text that lead to document) | |
#store as attachment epoch-filename | |
else: | |
print "error %s in downloading %s", url_handle.code, URL | |
#record/alert error to error database | |
couch = couchdb.Server('http://192.168.1.148:5984/') | |
# select database | |
agencydb = couch['disclosr-agencies'] | |
for row in agencydb.view('app/getScrapeRequired'): #not recently scraped agencies view? | |
agency = agencydb.get(row.id) | |
print agency['name'] | |
scrapeAndStore("A",1,1) | |
<?php | |
include_once("./lib/common.inc.php"); | |
setlocale(LC_CTYPE, 'C'); | |
// source: http://stackoverflow.com/questions/81934/easy-way-to-export-a-sql-table-without-access-to-the-server-or-phpmyadmin#81951 | |
$unspsc = Array(); | |
$unspscresult = $conn->prepare('select * from "UNSPSCcategories" where "UNSPSC"::text like \'%00000\';'); | |
$unspscresult->execute(); | |
foreach ($unspscresult->fetchAll() as $row) { | |
$unspsc[$row['UNSPSC']] = $row['Title']; | |
} | |
$query = $conn->prepare(' | |
SELECT "CNID",contractnotice."agencyName",agency_nametoabn.abn as "agencyABN", | |
EXTRACT(EPOCH FROM "publishDate") as "publishDate", | |
EXTRACT(EPOCH FROM "contractStart") as "contractStart", | |
EXTRACT(EPOCH FROM "contractEnd") as "contractEnd", | |
value,description,category, | |
"supplierName",(case when "supplierABN" != 0 THEN "supplierABN"::text ELSE "supplierName" END) as supplierID, | |
(\'https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN\'::text || "CNID"::text) as sourceURL | |
FROM contractnotice join agency_nametoabn on contractnotice."agencyName"=agency_nametoabn."agencyName" | |
where "childCN" is null' | |
, array(PDO::ATTR_CURSOR => PDO::FETCH_ORI_NEXT)); | |
$query->execute(); | |
$errors = $conn->errorInfo(); | |
if ($errors[2] != "") { | |
die("Export terminated, db error" . print_r($errors, true)); | |
} | |
$num_fields = $query->columnCount(); | |
$headers = Array(); | |
for ($i = 0; $i < $num_fields; $i++) { // for each column in query, make a CSV header | |
$meta = $query->getColumnMeta($i); | |
$headers[] = $meta['name']; | |
} | |
$fp = fopen('php://output', 'w'); | |
if ($fp && $query) { | |
header('Content-Type: text/csv'); | |
header('Content-Disposition: attachment; filename="export.' . date("c") . '.csv"'); | |
header('Pragma: no-cache'); | |
header('Expires: 0'); | |
fputcsv($fp, $headers); | |
while ($row = $query->fetch(PDO::FETCH_NUM, PDO::FETCH_ORI_NEXT)) { | |
foreach ($row as $key => &$colvalue) { | |
$colvalue = preg_replace('/[^[:print:]]/', '', utf8_encode($colvalue)); | |
if ($headers[$key] == "publishDate" || $headers[$key] == "contractStart" | |
|| $headers[$key] == "contractEnd") { | |
$colvalue = date("Y-m-d", $colvalue); | |
} | |
/* if ($headers[$key] == "CNID") { | |
$colvalue = str_replace("A","", $colvalue); | |
}*/ | |
if ($headers[$key] == "cat1" || $headers[$key] == "cat2" | |
|| $headers[$key] == "cat3") { | |
$colvalue = $unspsc[$colvalue]; | |
} | |
} | |
fputcsv($fp, array_values($row)); | |
} | |
die; | |
} | |
?> | |
#http://packages.python.org/CouchDB/client.html | |
import couchdb | |
import urllib2 | |
from BeautifulSoup import BeautifulSoup | |
import re | |
couch = couchdb.Server() # Assuming localhost:5984 | |
# If your CouchDB server is running elsewhere, set it up like this: | |
# couch = couchdb.Server('http://example.com:5984/') | |
# select database | |
agencydb = couch['disclosr-agencies'] | |
for row in agencydb.view('app/getScrapeRequired'): #not recently scraped agencies view? | |
agency = agencydb.get(row.id) | |
print agency['agencyName'] | |
#http://diveintopython.org/http_web_services/etags.html | |
class NotModifiedHandler(urllib2.BaseHandler): | |
def http_error_304(self, req, fp, code, message, headers): | |
addinfourl = urllib2.addinfourl(fp, headers, req.get_full_url()) | |
addinfourl.code = code | |
return addinfourl | |
def scrapeAndStore(URL, depth, agency): | |
URL = "http://www.hole.fi/jajvirta/weblog/" | |
req = urllib2.Request(URL) | |
#if there is a previous version sotred in couchdb, load caching helper tags | |
if etag: | |
req.add_header("If-None-Match", etag) | |
if last_modified: | |
req.add_header("If-Modified-Since", last_modified) | |
opener = urllib2.build_opener(NotModifiedHandler()) | |
url_handle = opener.open(req) | |
headers = url_handle.info() # the addinfourls have the .info() too | |
etag = headers.getheader("ETag") | |
last_modified = headers.getheader("Last-Modified") | |
web_server = headers.getheader("Server") | |
file_size = headers.getheader("Content-Length") | |
mime_type = headers.getheader("Content-Type") | |
if hasattr(url_handle, 'code') and url_handle.code == 304: | |
print "the web page has not been modified" | |
else: | |
print "error %s in downloading %s", url_handle.code, URL | |
#record/alert error to error database | |
#do scraping | |
html = ? | |
# http://www.crummy.com/software/BeautifulSoup/documentation.html | |
soup = BeautifulSoup(html) | |
links = soup.findAll('a') # soup.findAll('a', id=re.compile("^p-")) | |
for link in links: | |
print link['href'] | |
#for each unique link | |
#if html mimetype | |
# go down X levels, | |
# diff with last stored attachment, store in document | |
#if not | |
# remember to save parentURL and title (link text that lead to document) | |
#store as attachment epoch-filename |