for line in `curl "http://localhost:5984/disclosr-foidocuments/_design/app/_view/byAgencyID?reduce=false&keys=%5B\"5716ce0aacfe98f7d638b7a66b7f1040\"%5D&limit=600" | xargs -L1`; do | |
# echo $line | |
id=`echo $line | grep -Po '_id:.*?[^\\\],' | perl -pe 's/_id://; s/^//; s/,$//'` | |
rev=`echo $line | grep -Po 'rev:.*?[^\\\],'| perl -pe 's/rev://; s/^//; s/,$//'` | |
if [ -n "$id" ]; then | |
echo "curl -X DELETE http://localhost:5984/disclosr-foidocuments/$id?rev=$rev" | |
curl -X DELETE http://localhost:5984/disclosr-foidocuments/$id?rev=$rev | |
fi | |
done; | |
<?php | <?php |
include('template.inc.php'); | include('template.inc.php'); |
include_header_documents("About"); | include_header_documents("About"); |
include_once('../include/common.inc.php'); | include_once('../include/common.inc.php'); |
?> | ?> |
<h1>About</h1> | <h1>About</h1> |
Written and managed by Alex Sadleir (maxious [at] lambdacomplex.org) | |
<?php | <?php |
include_footer_documents(); | include_footer_documents(); |
?> | ?> |
<?php | <?php |
include('template.inc.php'); | include('template.inc.php'); |
include_once('../include/common.inc.php'); | include_once('../include/common.inc.php'); |
$agenciesdb = $server->get_db('disclosr-agencies'); | $agenciesdb = $server->get_db('disclosr-agencies'); |
$idtoname = Array(); | $idtoname = Array(); |
foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) { | foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) { |
$idtoname[$row->id] = trim($row->value->name); | $idtoname[$row->id] = trim($row->value->name); |
} | } |
$foidocsdb = $server->get_db('disclosr-foidocuments'); | $foidocsdb = $server->get_db('disclosr-foidocuments'); |
include_header_documents((isset($_REQUEST['id']) ? $idtoname[$_REQUEST['id']] : 'Entries by Agency')); | include_header_documents((isset($_REQUEST['id']) ? $idtoname[$_REQUEST['id']] : 'Entries by Agency')); |
$endkey = (isset($_REQUEST['end_key']) ? $_REQUEST['end_key'] : '9999-99-99'); | $endkey = (isset($_REQUEST['end_key']) ? $_REQUEST['end_key'] : '9999-99-99'); |
?> | ?> |
<div class="headline">Read all the information released by Australian Federal Government agencies under the FOI Act | <div class="headline">Read all the information released by Australian Federal Government agencies under the FOI Act |
in one place! | in one place! |
</div> | </div> |
<a style='float:right' href="rss.xml.php"><img src="img/feed-icon-14x14.png" alt="RSS Icon"/> All Agencies RSS Feed</a> | <a style='float:right' href="rss.xml.php"><img src="img/feed-icon-14x14.png" alt="RSS Icon"/> All Agencies RSS Feed</a> |
<br> | <br> |
<?php | <?php |
try { | try { |
if (isset($_REQUEST['id'])) { | if (isset($_REQUEST['id'])) { |
$rows = $foidocsdb->get_view("app", "byAgencyID", $_REQUEST['id'], false, false, false)->rows; | $rows = $foidocsdb->get_view("app", "byAgencyID", $_REQUEST['id'], false, false, false)->rows; |
foreach ($rows as $row) { | foreach ($rows as $row) { |
//print_r($rows); | //print_r($rows); |
echo displayLogEntry($row, $idtoname); | echo displayLogEntry($row, $idtoname); |
if (!isset($startkey)) | if (!isset($startkey)) |
$startkey = $row->key; | $startkey = $row->key; |
$endkey = $row->key; | $endkey = $row->key; |
} | } |
} else { | } else { |
$rows = $foidocsdb->get_view("app", "byAgencyID?group=true", null, false, false, true)->rows; | $rows = $foidocsdb->get_view("app", "byAgencyID?group=true", null, false, false, true)->rows; |
if ($rows) { | if ($rows) { |
function cmp($a, $b) | |
{ | |
global $idtoname; | |
return strcmp($idtoname[$a->key], $idtoname[$b->key]); | |
} | |
usort($rows, "cmp"); | |
foreach ($rows as $row) { | foreach ($rows as $row) { |
echo '<a href="agency.php?id=' . $row->key . '">' . $idtoname[$row->key] . " (" . $row->value . " records)</a> <br>\n"; | echo '<a href="agency.php?id=' . $row->key . '">' . $idtoname[$row->key] . " (" . $row->value . " records)</a> <br>\n"; |
} | } |
} | } |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
echo "<a class='btn btn-large btn-primary' href='?end_key=$endkey' style='float:right;'>next page <i class='icon-circle-arrow-right icon-white'></i></a>"; | echo "<a class='btn btn-large btn-primary' href='?end_key=$endkey' style='float:right;'>next page <i class='icon-circle-arrow-right icon-white'></i></a>"; |
include_footer_documents(); | include_footer_documents(); |
?> | ?> |
<?php | <?php |
include('template.inc.php'); | include('template.inc.php'); |
include_header_documents("Charts"); | include_header_documents("Charts"); |
include_once('../include/common.inc.php'); | include_once('../include/common.inc.php'); |
$agenciesdb = $server->get_db('disclosr-agencies'); | $agenciesdb = $server->get_db('disclosr-agencies'); |
$idtoname = Array(); | $idtoname = Array(); |
foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) { | foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) { |
$idtoname[$row->id] = trim($row->value->name); | $idtoname[$row->id] = trim($row->value->name); |
} | } |
$foidocsdb = $server->get_db('disclosr-foidocuments'); | $foidocsdb = $server->get_db('disclosr-foidocuments'); |
?> | ?> |
<div class="foundation-header"> | <div class="foundation-header"> |
<h1><a href="about.php">Charts</a></h1> | <h1><a href="about.php">Charts</a></h1> |
<h4 class="subheader"></h4> | <h4 class="subheader"></h4> |
</div> | </div> |
<div id="bydate" style="width:1000px;height:300px;"></div> | <div id="bydate" style="width:1000px;height:300px;"></div> |
<div id="byagency" style="width:1000px;height:1400px;"></div> | <div id="byagency" style="width:1000px;height:1400px;"></div> |
<script id="source"> | <script id="source"> |
window.onload = function () { | window.onload = function () { |
$(document).ready(function () { | $(document).ready(function () { |
var | var |
d1 = [], | d1 = [], |
options1, | options1, |
o1; | o1; |
<?php | <?php |
try { | try { |
$rows = $foidocsdb->get_view("app", "byDateMonthYear?group=true",null, false,false,true)->rows; | $rows = $foidocsdb->get_view("app", "byDateMonthYear?group=true",null, false,false,true)->rows; |
$dataValues = Array(); | $dataValues = Array(); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
$dataValues[$row->key] = $row->value; | $dataValues[$row->key] = $row->value; |
} | } |
$i = 0; | $i = 0; |
ksort($dataValues); | ksort($dataValues); |
foreach ($dataValues as $key => $value) { | foreach ($dataValues as $key => $value) { |
$date = date_create_from_format('Y-m-d', $key); | $date = date_create_from_format('Y-m-d', $key); |
if (date_format($date, 'U') != "") { | if (date_format($date, 'U') != "") { |
echo " d1.push([".date_format($date, 'U')."000, $value]);" . PHP_EOL; | echo " d1.push([".date_format($date, 'U')."000, $value]);" . PHP_EOL; |
// echo " emplabels.push('$key');" . PHP_EOL; | // echo " emplabels.push('$key');" . PHP_EOL; |
$i++; | $i++; |
} | } |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
?> | ?> |
options1 = { | options1 = { |
xaxis: { | xaxis: { |
mode: 'time', | mode: 'time', |
labelsAngle: 45 | labelsAngle: 45 |
}, | }, |
selection: { | selection: { |
mode: 'x' | mode: 'x' |
}, | }, |
HtmlText: false, | HtmlText: false, |
title: 'Disclosure Log entries added by Date' | title: 'Disclosure Log entries added by Date' |
}; | }; |
// Draw graph with default options, overwriting with passed options | // Draw graph with default options, overwriting with passed options |
function drawGraph(opts) { | function drawGraph(opts) { |
// Clone the options, so the 'options' variable always keeps intact. | // Clone the options, so the 'options' variable always keeps intact. |
o1 = Flotr._.extend(Flotr._.clone(options1), opts || {}); | o1 = Flotr._.extend(Flotr._.clone(options1), opts || {}); |
// Return a new graph. | // Return a new graph. |
return Flotr.draw( | return Flotr.draw( |
document.getElementById("bydate"), | document.getElementById("bydate"), |
[ d1 ], | [ d1 ], |
o1 | o1 |
); | ); |
} | } |
graph = drawGraph(); | graph = drawGraph(); |
Flotr.EventAdapter.observe(document.getElementById("bydate"), 'flotr:select', function (area) { | Flotr.EventAdapter.observe(document.getElementById("bydate"), 'flotr:select', function (area) { |
// Draw selected area | // Draw selected area |
graph = drawGraph({ | graph = drawGraph({ |
xaxis: { min: area.x1, max: area.x2, mode: 'time', labelsAngle: 45 }, | xaxis: { min: area.x1, max: area.x2, mode: 'time', labelsAngle: 45 }, |
yaxis: { min: area.y1, max: area.y2 } | yaxis: { min: area.y1, max: area.y2 } |
}); | }); |
}); | }); |
// When graph is clicked, draw the graph with default area. | // When graph is clicked, draw the graph with default area. |
Flotr.EventAdapter.observe(document.getElementById("bydate"), 'flotr:click', function () { | Flotr.EventAdapter.observe(document.getElementById("bydate"), 'flotr:click', function () { |
graph = drawGraph(); | graph = drawGraph(); |
}); | }); |
}); | }); |
}; | }; |
var d2 = []; | var d2 = []; |
var agencylabels = []; | var agencylabels = []; |
function agencytrackformatter(obj) { | function agencytrackformatter(obj) { |
return agencylabels[Math.floor(obj.y)] + " = " + obj.x; | return agencylabels[Math.floor(obj.y)] + " = " + obj.x; |
} | } |
function agencytickformatter(val, axis) { | function agencytickformatter(val, axis) { |
if (agencylabels[Math.floor(val)]) { | if (agencylabels[Math.floor(val)]) { |
return (agencylabels[Math.floor(val)]) ; | return (agencylabels[Math.floor(val)]) ; |
} else { | } else { |
return ""; | return ""; |
} | } |
} | } |
<?php | <?php |
try { | try { |
$rows = $foidocsdb->get_view("app", "byAgencyID?group=true",null, false,false,true)->rows; | $rows = $foidocsdb->get_view("app", "byAgencyID?group=true",null, false,false,true)->rows; |
function cmp($a, $b) | |
{ | |
return $a->value > $b->value; | |
} | |
usort($rows, "cmp"); | |
$dataValues = Array(); | $dataValues = Array(); |
$i = 0; | $i = 0; |
foreach ($rows as $row) { | foreach ($rows as $row) { |
echo " d2.push([ $row->value,$i]);" . PHP_EOL; | echo " d2.push([ $row->value,$i]);" . PHP_EOL; |
echo " agencylabels.push(['".str_replace("'","",$idtoname[$row->key])."']);" . PHP_EOL; | echo " agencylabels.push(['".str_replace("'","",$idtoname[$row->key])."']);" . PHP_EOL; |
$i++; | $i++; |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
?> | ?> |
// Draw the graph | // Draw the graph |
Flotr.draw( | Flotr.draw( |
document.getElementById("byagency"), | document.getElementById("byagency"), |
[d2], | [d2], |
{ | { |
title: "Disclosure Log entries by Agency", | title: "Disclosure Log entries by Agency", |
bars: { | bars: { |
show: true, | show: true, |
horizontal: true, | horizontal: true, |
shadowSize: 0, | shadowSize: 0, |
barWidth: 0.5 | barWidth: 0.5 |
}, | }, |
mouse: { | mouse: { |
track: true, | track: true, |
relative: true, | relative: true, |
trackFormatter: agencytrackformatter | trackFormatter: agencytrackformatter |
}, | }, |
yaxis: { | yaxis: { |
minorTickFreq: 1, | minorTickFreq: 1, |
noTicks: agencylabels.length, | noTicks: agencylabels.length, |
showMinorLabels: true, | showMinorLabels: true, |
tickFormatter: agencytickformatter | tickFormatter: agencytickformatter |
}, | }, |
xaxis: { | xaxis: { |
min: 0, | min: 0, |
autoscaleMargin: 1 | autoscaleMargin: 1 |
}, | }, |
legend: { | legend: { |
show: false | show: false |
} | } |
} | } |
); | ); |
</script> | </script> |
<?php | <?php |
include_footer_documents(); | include_footer_documents(); |
?> | ?> |
<?php | <?php |
include('template.inc.php'); | include('template.inc.php'); |
include_header_documents(""); | include_header_documents(""); |
include_once('../include/common.inc.php'); | include_once('../include/common.inc.php'); |
$endkey = (isset($_REQUEST['end_key']) ? $_REQUEST['end_key'] : '9999-99-99'); | $endkey = (isset($_REQUEST['end_key']) ? $_REQUEST['end_key'] : '9999-99-99'); |
$enddocid = (isset($_REQUEST['end_docid']) ? $_REQUEST['end_docid'] : null); | $enddocid = (isset($_REQUEST['end_docid']) ? $_REQUEST['end_docid'] : null); |
?> | ?> |
<div class="headline">Read all the information released by Australian Federal Government agencies under the FOI Act in | <div class="headline">Read all the information released by Australian Federal Government agencies under the FOI Act in |
one place! | one place! |
</div> | </div> |
<a style='float:right' href="rss.xml.php"><img src="img/feed-icon-14x14.png" alt="RSS Icon"/> All Agencies RSS Feed</a> | <a style='float:right' href="rss.xml.php"><img src="img/feed-icon-14x14.png" alt="RSS Icon"/> All Agencies RSS Feed</a> |
<br> | <br> |
<?php | <?php |
$agenciesdb = $server->get_db('disclosr-agencies'); | $agenciesdb = $server->get_db('disclosr-agencies'); |
$idtoname = Array(); | $idtoname = Array(); |
foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) { | foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) { |
$idtoname[$row->id] = trim($row->value->name); | $idtoname[$row->id] = trim($row->value->name); |
} | } |
$foidocsdb = $server->get_db('disclosr-foidocuments'); | $foidocsdb = $server->get_db('disclosr-foidocuments'); |
//print_r($foidocsdb); | |
try { | try { |
$rows = $foidocsdb->get_view("app", "byDate", Array($endkey, '0000-00-00'), true, 20, null, $enddocid)->rows; | $rows = $foidocsdb->get_view("app", "byDate", Array($endkey, '0000-00-00'), true, 20, null, $enddocid)->rows; |
if ($rows) { | if ($rows) { |
foreach ($rows as $key => $row) { | foreach ($rows as $key => $row) { |
echo displayLogEntry($row, $idtoname); | echo displayLogEntry($row, $idtoname); |
if (!isset($startkey)) | if (!isset($startkey)) |
$startkey = $row->key; | $startkey = $row->key; |
$endkey = $row->key; | $endkey = $row->key; |
$enddocid = $row->value->_id; | $enddocid = $row->value->_id; |
} | } |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
echo "<a class='btn btn-large btn-primary' href='?end_key=$endkey&end_docid=$enddocid' style='float:right;'>next page <i class='icon-circle-arrow-right icon-white'></i></a>"; | echo "<a class='btn btn-large btn-primary' href='?end_key=$endkey&end_docid=$enddocid' style='float:right;'>next page <i class='icon-circle-arrow-right icon-white'></i></a>"; |
include_footer_documents(); | include_footer_documents(); |
?> | ?> |
<?php | <?php |
// Agency X updated Y, new files, diff of plain text/link text, | // Agency X updated Y, new files, diff of plain text/link text, |
// feed for just one agency or all | // feed for just one agency or all |
// This is a minimum example of using the Universal Feed Generator Class | // This is a minimum example of using the Universal Feed Generator Class |
include("../lib/FeedWriter/FeedTypes.php"); | include("../lib/FeedWriter/FeedTypes.php"); |
include_once('../include/common.inc.php'); | include_once('../include/common.inc.php'); |
//Creating an instance of FeedWriter class. | //Creating an instance of FeedWriter class. |
$TestFeed = new RSS2FeedWriter(); | $TestFeed = new RSS2FeedWriter(); |
//Setting the channel elements | //Setting the channel elements |
////Retriving informations from database | ////Retriving informations from database |
$idtoname = Array(); | $idtoname = Array(); |
$agenciesdb = $server->get_db('disclosr-agencies'); | $agenciesdb = $server->get_db('disclosr-agencies'); |
foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) { | foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) { |
$idtoname[$row->id] = trim($row->value->name); | $idtoname[$row->id] = trim($row->value->name); |
} | } |
$foidocsdb = $server->get_db('disclosr-foidocuments'); | $foidocsdb = $server->get_db('disclosr-foidocuments'); |
if (isset($_REQUEST['id'])) { | if (isset($_REQUEST['id'])) { |
$rows = $foidocsdb->get_view("app", "byAgencyID", $_REQUEST['id'], false, false, false)->rows; | $rows = $foidocsdb->get_view("app", "byAgencyID", $_REQUEST['id'], false, false, false)->rows; |
$title = $idtoname[$_REQUEST['id']]; | $title = $idtoname[$_REQUEST['id']]; |
} else { | } else { |
$rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99', '0000-00-00', 50), true)->rows; | $rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99', '0000-00-00', 50), true)->rows; |
$title = 'All Agencies'; | $title = 'All Agencies'; |
} | } |
//Use wrapper functions for common channelelements | //Use wrapper functions for common channelelements |
$TestFeed->setTitle('disclosurelo.gs Newest Entries - ' . $title); | $TestFeed->setTitle('disclosurelo.gs Newest Entries - ' . $title); |
$TestFeed->setLink('http://disclosurelo.gs/rss.xml.php' . (isset($_REQUEST['id']) ? '?id=' . $_REQUEST['id'] : '')); | $TestFeed->setLink('http://disclosurelo.gs/rss.xml.php' . (isset($_REQUEST['id']) ? '?id=' . $_REQUEST['id'] : '')); |
$TestFeed->setDescription('disclosurelo.gs Newest Entries - ' . $title); | $TestFeed->setDescription('disclosurelo.gs Newest Entries - ' . $title); |
$TestFeed->setChannelElement('language', 'en-us'); | $TestFeed->setChannelElement('language', 'en-us'); |
$TestFeed->setChannelElement('pubDate', date(DATE_RSS, time())); | $TestFeed->setChannelElement('pubDate', date(DATE_RSS, time())); |
//print_r($rows); | //print_r($rows); |
$i =0; | |
foreach ($rows as $row) { | foreach ($rows as $row) { |
//Create an empty FeedItem | //Create an empty FeedItem |
$newItem = $TestFeed->createNewItem(); | $newItem = $TestFeed->createNewItem(); |
//Add elements to the feed item | //Add elements to the feed item |
$newItem->setTitle($row->value->title); | $newItem->setTitle(preg_replace('/[\x00-\x1F\x80-\xFF]/', '', $row->value->title)); |
$newItem->setLink("http://disclosurelo.gs/view.php?id=" . $row->value->_id); | $newItem->setLink("http://disclosurelo.gs/view.php?id=" . $row->value->_id); |
$newItem->setDate(strtotime($row->value->date)); | $newItem->setDate(strtotime($row->value->date)); |
$newItem->setDescription(displayLogEntry($row, $idtoname)); | $newItem->setDescription(displayLogEntry($row, $idtoname)); |
$newItem->setAuthor($idtoname[$row->value->agencyID]); | $newItem->setAuthor($idtoname[$row->value->agencyID]); |
$newItem->addElement('guid', "http://disclosurelo.gs/view.php?id=" . $row->value->_id, array('isPermaLink' => 'true')); | $newItem->addElement('guid', "http://disclosurelo.gs/view.php?id=" . $row->value->_id, array('isPermaLink' => 'true')); |
//Now add the feed item | //Now add the feed item |
$TestFeed->addItem($newItem); | $TestFeed->addItem($newItem); |
$i++; | |
if ($i > 50) break; | |
} | } |
//OK. Everything is done. Now genarate the feed. | //OK. Everything is done. Now genarate the feed. |
$TestFeed->generateFeed(); | $TestFeed->generateFeed(); |
?> | ?> |
import sys,os | import sys,os |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) |
import genericScrapers | import genericScrapers |
import scrape | import scrape |
from bs4 import BeautifulSoup | from bs4 import BeautifulSoup |
#http://www.doughellmann.com/PyMOTW/abc/ | #http://www.doughellmann.com/PyMOTW/abc/ |
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): | class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): |
#def getTable(self,soup): | def getTable(self,soup): |
# return soup.find(id = "cphMain_C001_Col01").table | return soup.findAll('table')[1] |
def getColumnCount(self): | def getColumnCount(self): |
return 5 | return 5 |
def getColumns(self,columns): | def getColumns(self,columns): |
(id, date, title, description,notes) = columns | (id, date, title, description,notes) = columns |
return (id, date, title, description, notes) | return (id, date, title, description, notes) |
if __name__ == '__main__': | if __name__ == '__main__': |
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) | print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) |
print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) | print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) |
ScraperImplementation().doScrape() | ScraperImplementation().doScrape() |
import sys,os | import sys,os |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) |
import genericScrapers | import genericScrapers |
import scrape | import scrape |
from bs4 import BeautifulSoup | from bs4 import BeautifulSoup |
from datetime import date | from datetime import date |
#http://www.doughellmann.com/PyMOTW/abc/ | #http://www.doughellmann.com/PyMOTW/abc/ |
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): | class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): |
def getTable(self,soup): | def getTable(self,soup): |
return soup.find(id= "ctl00_MSO_ContentDiv").table | return soup.find(class_ = "rgMasterTable") |
def getColumns(self,columns): | def getColumns(self,columns): |
(id, title, description, notes) = columns | (id, title, description, notes) = columns |
return (id, title, title, description, notes) | return (id, title, title, description, notes) |
def getDate(self, content, entry, doc): | def getDate(self, content, entry, doc): |
edate = date.today().strftime("%Y-%m-%d") | edate = date.today().strftime("%Y-%m-%d") |
doc.update({'date': edate}) | doc.update({'date': edate}) |
return | return |
def getColumnCount(self): | def getColumnCount(self): |
return 4 | return 4 |
if __name__ == '__main__': | if __name__ == '__main__': |
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) | print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) |
print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) | print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) |
ScraperImplementation().doScrape() | ScraperImplementation().doScrape() |
import sys,os | import sys,os |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) |
import genericScrapers | import genericScrapers |
import scrape | import scrape |
from bs4 import BeautifulSoup | from bs4 import BeautifulSoup |
#http://www.doughellmann.com/PyMOTW/abc/ | #http://www.doughellmann.com/PyMOTW/abc/ |
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): | class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): |
#def getTable(self,soup): | def getTable(self,soup): |
# return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table | return soup.find(id = "main").table |
def getColumnCount(self): | def getColumnCount(self): |
return 4 | return 4 |
def getColumns(self,columns): | def getColumns(self,columns): |
(date, title, description,notes) = columns | (date, title, description,notes) = columns |
return (title, date, title, description, notes) | return (title, date, title, description, notes) |
if __name__ == '__main__': | if __name__ == '__main__': |
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) | print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) |
print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) | print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) |
ScraperImplementation().doScrape() | ScraperImplementation().doScrape() |
import sys,os | import sys,os |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) |
import genericScrapers | import genericScrapers |
#RSS feed not detailed | #RSS feed not detailed |
#http://www.doughellmann.com/PyMOTW/abc/ | #http://www.doughellmann.com/PyMOTW/abc/ |
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): | class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): |
def getTable(self,soup): | |
return soup.find(id = "page_content").table | |
def getColumns(self,columns): | def getColumns(self,columns): |
(id, date, title, description, notes) = columns | (id, date, title, description, notes) = columns |
return (id, date, title, description, notes) | return (id, date, title, description, notes) |
if __name__ == '__main__': | if __name__ == '__main__': |
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) | print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) |
print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) | print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) |
ScraperImplementation().doScrape() | ScraperImplementation().doScrape() |
import sys,os | import sys,os |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) |
import genericScrapers | import genericScrapers |
import scrape | import scrape |
from bs4 import BeautifulSoup | from bs4 import BeautifulSoup |
#http://www.doughellmann.com/PyMOTW/abc/ | #http://www.doughellmann.com/PyMOTW/abc/ |
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): | class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): |
def getTable(self,soup): | # def getTable(self,soup): |
return soup.find(id = "_ctl0__ctl0_MainContentPlaceHolder_MainContentPlaceHolder_ContentSpan").findAll("table")[3] | # return soup.find(_class = "content").table |
def getColumnCount(self): | def getColumnCount(self): |
return 5 | return 5 |
def getColumns(self,columns): | def getColumns(self,columns): |
(id, date, title, description,notes) = columns | (id, date, title, description,notes) = columns |
return (id, date, title, description, notes) | return (id, date, title, description, notes) |
if __name__ == '__main__': | if __name__ == '__main__': |
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) | print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) |
print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) | print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) |
ScraperImplementation().doScrape() | ScraperImplementation().doScrape() |
import sys,os | import sys,os |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) |
import genericScrapers | import genericScrapers |
#RSS feed not detailed | import dateutil |
from dateutil.parser import * | |
from datetime import * | |
import scrape | |
from bs4 import BeautifulSoup | |
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): | |
#http://www.doughellmann.com/PyMOTW/abc/ | def __init__(self): |
class ScraperImplementation(genericScrapers.GenericRSSDisclogScraper): | super(ScraperImplementation, self).__init__() |
def getColumns(self,columns): | def getTable(self, soup): |
(id, date, title, description, notes) = columns | return soup.find(id='content') |
return (id, date, title, description, notes) | |
def getDescription(self,content, entry,doc): | |
link = None | |
links = [] | |
description = "" | |
for atag in entry.find_all('a'): | |
if atag.has_attr('href'): | |
link = scrape.fullurl(self.getURL(), atag['href']) | |
(url, mime_type, htcontent) = scrape.fetchURL(scrape.docsdb, link, "foidocuments", self.getAgencyID(), False) | |
if htcontent != None: | |
if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml": | |
soup = BeautifulSoup(htcontent) | |
row = soup.find(id="foidetails") | |
if row == None: | |
row = soup.find(id="content").table | |
if row == None: | |
row = soup.find(id="content") | |
description = ''.join(row.stripped_strings) | |
for atag in row.find_all("a"): | |
if atag.has_attr('href'): | |
links.append(scrape.fullurl(link, atag['href'])) | |
if links != []: | |
doc.update({'links': links}) | |
if description != "": | |
doc.update({ 'description': description}) | |
def getColumnCount(self): | |
return 3 | |
def getColumns(self, columns): | |
(id, title, date) = columns | |
return (id, date, title, title, None) | |
if __name__ == '__main__': | if __name__ == '__main__': |
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericRSSDisclogScraper) | print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) |
print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericRSSDisclogScraper) | print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) |
ScraperImplementation().doScrape() | ScraperImplementation().doScrape() |
<?php | <?php |
function include_header_documents($title) | function include_header_documents($title) |
{ | { |
header('X-UA-Compatible: IE=edge,chrome=1'); | header('X-UA-Compatible: IE=edge,chrome=1'); |
?> | ?> |
<!doctype html> | <!doctype html> |
<!-- paulirish.com/2008/conditional-stylesheets-vs-css-hacks-answer-neither/ --> | <!-- paulirish.com/2008/conditional-stylesheets-vs-css-hacks-answer-neither/ --> |
<!--[if lt IE 7]> | <!--[if lt IE 7]> |
<html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]--> | <html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]--> |
<!--[if IE 7]> | <!--[if IE 7]> |
<html class="no-js lt-ie9 lt-ie8" lang="en"> <![endif]--> | <html class="no-js lt-ie9 lt-ie8" lang="en"> <![endif]--> |
<!--[if IE 8]> | <!--[if IE 8]> |
<html class="no-js lt-ie9" lang="en"> <![endif]--> | <html class="no-js lt-ie9" lang="en"> <![endif]--> |
<!-- Consider adding a manifest.appcache: h5bp.com/d/Offline --> | <!-- Consider adding a manifest.appcache: h5bp.com/d/Offline --> |
<!--[if gt IE 8]><!--> <html class="no-js" lang="en"> <!--<![endif]--> | <!--[if gt IE 8]><!--> <html class="no-js" lang="en"> <!--<![endif]--> |
<head> | <head> |
<meta charset="utf-8"> | <meta charset="utf-8"> |
<title>Australian Disclosure Logs<?php if ($title != "") echo " - $title"; ?></title> | <title>Australian Disclosure Logs<?php if ($title != "") echo " - $title"; ?></title> |
<meta name="description" content=""> | <meta name="description" content=""> |
<!-- Mobile viewport optimized: h5bp.com/viewport --> | <!-- Mobile viewport optimized: h5bp.com/viewport --> |
<meta name="viewport" content="width=device-width"> | <meta name="viewport" content="width=device-width"> |
<link rel="alternate" type="application/rss+xml" title="Latest Disclosure Log Entries" href="rss.xml.php"/> | <link rel="alternate" type="application/rss+xml" title="Latest Disclosure Log Entries" href="rss.xml.php"/> |
<!-- Place favicon.ico and apple-touch-icon.png in the root directory: mathiasbynens.be/notes/touch-icons --> | <!-- Place favicon.ico and apple-touch-icon.png in the root directory: mathiasbynens.be/notes/touch-icons --> |
<meta name="google-site-verification" content="jkknX5g2FCpQvrW030b1Nq2hyoa6mb3EDiA7kCoHNj8"/> | <meta name="google-site-verification" content="jkknX5g2FCpQvrW030b1Nq2hyoa6mb3EDiA7kCoHNj8"/> |
<!-- Le styles --> | <!-- Le styles --> |
<link href="css/bootstrap.min.css" rel="stylesheet"> | <link href="css/bootstrap.min.css" rel="stylesheet"> |
<style type="text/css"> | <style type="text/css"> |
body { | body { |
padding-top: 60px; | padding-top: 60px; |
padding-bottom: 40px; | padding-bottom: 40px; |
} | } |
.sidebar-nav { | .sidebar-nav { |
padding: 9px 0; | padding: 9px 0; |
} | } |
</style> | </style> |
<link href="css/bootstrap-responsive.min.css" rel="stylesheet"> | <link href="css/bootstrap-responsive.min.css" rel="stylesheet"> |
<!-- HTML5 shim, for IE6-8 support of HTML5 elements --> | <!-- HTML5 shim, for IE6-8 support of HTML5 elements --> |
<!--[if lt IE 9]> | <!--[if lt IE 9]> |
<script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script> | <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script> |
<![endif]--> | <![endif]--> |
<!-- More ideas for your <head> here: h5bp.com/d/head-Tips --> | <!-- More ideas for your <head> here: h5bp.com/d/head-Tips --> |
<!-- All JavaScript at the bottom, except this Modernizr build. | <!-- All JavaScript at the bottom, except this Modernizr build. |
Modernizr enables HTML5 elements & feature detects for optimal performance. | Modernizr enables HTML5 elements & feature detects for optimal performance. |
Create your own custom Modernizr build: www.modernizr.com/download/ | Create your own custom Modernizr build: www.modernizr.com/download/ |
<script src="js/libs/modernizr-2.5.3.min.js"></script>--> | <script src="js/libs/modernizr-2.5.3.min.js"></script>--> |
<script src="js/jquery.js"></script> | <script src="js/jquery.js"></script> |
<script type="text/javascript" src="js/flotr2.min.js"></script> | <script type="text/javascript" src="js/flotr2.min.js"></script> |
</head> | </head> |
<body> | <body> |
<div class="navbar navbar-inverse navbar-fixed-top"> | <div class="navbar navbar-inverse navbar-fixed-top"> |
<div class="navbar-inner"> | <div class="navbar-inner"> |
<div class="container-fluid"> | <div class="container-fluid"> |
<!--<a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse"> | <!--<a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse"> |
<span class="icon-bar"></span> | <span class="icon-bar"></span> |
<span class="icon-bar"></span> | <span class="icon-bar"></span> |
<span class="icon-bar"></span> | <span class="icon-bar"></span> |
</a> --> | </a> --> |
<a class="brand" href="#">Australian Disclosure Logs</a> | <a class="brand" href="#">Australian Disclosure Logs</a> |
<div class="nav-collapse collapse"> | <div class="nav-collapse collapse"> |
<p class="navbar-text pull-right"> | <p class="navbar-text pull-right"> |
<small> | <small> |
Subsites on: | Subsites on: |
</small> | </small> |
<a href="http://orgs.disclosurelo.gs">Government Agencies</a> | <a href="http://orgs.disclosurelo.gs">Government Agencies</a> |
• <a href="http://lobbyists.disclosurelo.gs">Political Lobbyists</a> | • <a href="http://lobbyists.disclosurelo.gs">Political Lobbyists</a> |
• <a href="http://contracts.disclosurelo.gs">Government Contracts and Spending</a> | • <a href="http://contracts.disclosurelo.gs">Government Contracts and Spending</a> |
</p> | </p> |
<ul class="nav"> | <ul class="nav"> |
<li><a href="agency.php">By Agency</a></li> | <li><a href="agency.php">By Agency</a></li> |
<li><a href="date.php">By Date</a></li> | <!-- <li><a href="date.php">By Date</a></li> --> |
<li><a href="disclogsList.php">List of Disclosure Logs</a></li> | <li><a href="disclogsList.php">List of Disclosure Logs</a></li> |
<li><a href="charts.php">Charts</a></li> | <li><a href="charts.php">Charts</a></li> |
<li><a href="about.php">About</a></li> | <li><a href="about.php">About</a></li> |
</ul> | </ul> |
</div> | </div> |
<!--/.nav-collapse --> | <!--/.nav-collapse --> |
</div> | </div> |
</div> | </div> |
</div> | </div> |
<div class="container"> | <div class="container"> |
<?php | <?php |
} | } |
function include_footer_documents() | function include_footer_documents() |
{ | { |
global $ENV; | global $ENV; |
?> | ?> |
</div> <!-- /container --> | </div> <!-- /container --> |
<hr> | <hr> |
<footer> | <footer> |
<p>Not affiliated with or endorsed by any government agency.</p> | <p>Not affiliated with or endorsed by any government agency.</p> |
</footer> | </footer> |
<?php | <?php |
if ($ENV != "DEV") { | if ($ENV != "DEV") { |
echo "<script type='text/javascript'> | echo "<script type='text/javascript'> |
var _gaq = _gaq || []; | var _gaq = _gaq || []; |
_gaq.push(['_setAccount', 'UA-12341040-4']); | _gaq.push(['_setAccount', 'UA-12341040-4']); |
_gaq.push(['_setDomainName', 'disclosurelo.gs']); | _gaq.push(['_setDomainName', 'disclosurelo.gs']); |
_gaq.push(['_setAllowLinker', true]); | _gaq.push(['_setAllowLinker', true]); |
_gaq.push(['_trackPageview']); | _gaq.push(['_trackPageview']); |
(function() { | (function() { |
var ga = document.createElement('script'); | var ga = document.createElement('script'); |
ga.type = 'text/javascript'; | ga.type = 'text/javascript'; |
ga.async = true; | ga.async = true; |
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; | ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; |
var s = document.getElementsByTagName('script')[0]; | var s = document.getElementsByTagName('script')[0]; |
s.parentNode.insertBefore(ga, s); | s.parentNode.insertBefore(ga, s); |
})(); | })(); |
</script>"; | </script>"; |
} | } |
?> | ?> |
<!-- Le javascript | <!-- Le javascript |
================================================== --> | ================================================== --> |
<!-- Placed at the end of the document so the pages load faster --> | <!-- Placed at the end of the document so the pages load faster --> |
<!-- | <!-- |
<script src="js/bootstrap-transition.js"></script> | <script src="js/bootstrap-transition.js"></script> |
<script src="js/bootstrap-alert.js"></script> | <script src="js/bootstrap-alert.js"></script> |
<script src="js/bootstrap-modal.js"></script> | <script src="js/bootstrap-modal.js"></script> |
<script src="js/bootstrap-dropdown.js"></script> | <script src="js/bootstrap-dropdown.js"></script> |
<script src="js/bootstrap-scrollspy.js"></script> | <script src="js/bootstrap-scrollspy.js"></script> |
<script src="js/bootstrap-tab.js"></script> | <script src="js/bootstrap-tab.js"></script> |
<script src="js/bootstrap-tooltip.js"></script> | <script src="js/bootstrap-tooltip.js"></script> |
<script src="js/bootstrap-popover.js"></script> | <script src="js/bootstrap-popover.js"></script> |
<script src="js/bootstrap-button.js"></script> | <script src="js/bootstrap-button.js"></script> |
<script src="js/bootstrap-collapse.js"></script> | <script src="js/bootstrap-collapse.js"></script> |
<script src="js/bootstrap-carousel.js"></script> | <script src="js/bootstrap-carousel.js"></script> |
<script src="js/bootstrap-typeahead.js"></script>--> | <script src="js/bootstrap-typeahead.js"></script>--> |
</body> | </body> |
</html> | </html> |
<?php | <?php |
} | } |
function truncate($string, $length, $stopanywhere = false) | function truncate($string, $length, $stopanywhere = false) |
{ | { |
//truncates a string to a certain char length, stopping on a word if not specified otherwise. | //truncates a string to a certain char length, stopping on a word if not specified otherwise. |
if (strlen($string) > $length) { | if (strlen($string) > $length) { |
//limit hit! | //limit hit! |
$string = substr($string, 0, ($length - 3)); | $string = substr($string, 0, ($length - 3)); |
if ($stopanywhere) { | if ($stopanywhere) { |
//stop anywhere | //stop anywhere |
$string .= '...'; | $string .= '...'; |
} else { | } else { |
//stop on a word. | //stop on a word. |
$string = substr($string, 0, strrpos($string, ' ')) . '...'; | $string = substr($string, 0, strrpos($string, ' ')) . '...'; |
} | } |
} | } |
return $string; | return $string; |
} | } |
function displayLogEntry($row, $idtoname) | function displayLogEntry($row, $idtoname) |
{ | { |
$result = ""; | $result = ""; |
$result .= '<div itemscope itemtype="http://schema.org/Article">'; | $result .= '<div itemscope itemtype="http://schema.org/Article">'; |
$result .= '<h2><a href="http://disclosurelo.gs/view.php?id=' . $row->value->_id . '"> <span itemprop="datePublished">' . $row->value->date . "</span>: <span itemprop='name headline'>" . truncate($row->value->title, 120) . "</span>"; | $result .= '<h2><a href="http://disclosurelo.gs/view.php?id=' . $row->value->_id . '"> <span itemprop="datePublished">' . $row->value->date . "</span>: <span itemprop='name headline'>" . truncate($row->value->title, 120) . "</span>"; |
$result .= ' (<span itemprop="author publisher creator">' . $idtoname[$row->value->agencyID] . '</span>)</a></h2>'; | $result .= ' (<span itemprop="author publisher creator">' . $idtoname[$row->value->agencyID] . '</span>)</a></h2>'; |
$result .= "<p itemprop='description articleBody text'> Title: " . $row->value->title . "<br/>"; | $result .= "<p itemprop='description articleBody text'> Title: " . $row->value->title . "<br/>"; |
if (isset($row->value->description)) { | if (isset($row->value->description)) { |
$result .= str_replace("\n", "<br>", preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "", trim($row->value->description))); | $result .= str_replace("\n", "<br>", preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "", trim($row->value->description))); |
} | } |
if (isset($row->value->notes)) { | if (isset($row->value->notes)) { |
$result .= " <br>Note: " . $row->value->notes; | $result .= " <br>Note: " . $row->value->notes; |
} | } |
$result .= "</p>"; | $result .= "</p>"; |
if (isset($row->value->links)) { | if (isset($row->value->links)) { |
$result .= '<h3>Links/Documents</h3><ul itemprop="associatedMedia">'; | $result .= '<h3>Links/Documents</h3><ul itemprop="associatedMedia">'; |
foreach ($row->value->links as $link) { | foreach ($row->value->links as $link) { |
$result .= '<li itemscope itemtype="http://schema.org/MediaObject"><a href="' . htmlspecialchars($link) . '" itemprop="url contentURL">' . htmlspecialchars($link) . "</a></li>"; | $result .= '<li itemscope itemtype="http://schema.org/MediaObject"><a href="' . htmlspecialchars($link) . '" itemprop="url contentURL">' . htmlspecialchars($link) . "</a></li>"; |
} | } |
$result .= "</ul>"; | $result .= "</ul>"; |
} | } |
$result .= "<small><A itemprop='url' href='" . $row->value->url . "'>View original source...</a> ID: " . strip_tags($row->value->docID) . "</small>"; | $result .= "<small><A itemprop='url' href='" . $row->value->url . "'>View original source...</a> ID: " . strip_tags($row->value->docID) . "</small>"; |
$result .= "</div>\n"; | $result .= "</div>\n"; |
return $result; | return $result; |
} | } |