scraper and sort order updatyes
scraper and sort order updatyes


Former-commit-id: c8bfc5c3ecbee616fa6dd8bfdd147bedf4d64646

<?php <?php
include('template.inc.php'); include('template.inc.php');
include_header_documents("Charts"); include_header_documents("Charts");
include_once('../include/common.inc.php'); include_once('../include/common.inc.php');
$agenciesdb = $server->get_db('disclosr-agencies'); $agenciesdb = $server->get_db('disclosr-agencies');
   
$idtoname = Array(); $idtoname = Array();
foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) { foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) {
$idtoname[$row->id] = trim($row->value->name); $idtoname[$row->id] = trim($row->value->name);
} }
$foidocsdb = $server->get_db('disclosr-foidocuments'); $foidocsdb = $server->get_db('disclosr-foidocuments');
   
?> ?>
<div class="foundation-header"> <div class="foundation-header">
<h1><a href="about.php">Charts</a></h1> <h1><a href="about.php">Charts</a></h1>
<h4 class="subheader"></h4> <h4 class="subheader"></h4>
</div> </div>
<div id="bydate" style="width:1000px;height:300px;"></div> <div id="bydate" style="width:1000px;height:300px;"></div>
<div id="byagency" style="width:1000px;height:1400px;"></div> <div id="byagency" style="width:1000px;height:1400px;"></div>
<script id="source"> <script id="source">
window.onload = function () { window.onload = function () {
$(document).ready(function () { $(document).ready(function () {
var var
d1 = [], d1 = [],
options1, options1,
o1; o1;
   
<?php <?php
try { try {
$rows = $foidocsdb->get_view("app", "byDateMonthYear?group=true",null, false,false,true)->rows; $rows = $foidocsdb->get_view("app", "byDateMonthYear?group=true",null, false,false,true)->rows;
   
   
$dataValues = Array(); $dataValues = Array();
foreach ($rows as $row) { foreach ($rows as $row) {
$dataValues[$row->key] = $row->value; $dataValues[$row->key] = $row->value;
} }
$i = 0; $i = 0;
ksort($dataValues); ksort($dataValues);
foreach ($dataValues as $key => $value) { foreach ($dataValues as $key => $value) {
$date = date_create_from_format('Y-m-d', $key); $date = date_create_from_format('Y-m-d', $key);
if (date_format($date, 'U') != "") { if (date_format($date, 'U') != "") {
echo " d1.push([".date_format($date, 'U')."000, $value]);" . PHP_EOL; echo " d1.push([".date_format($date, 'U')."000, $value]);" . PHP_EOL;
// echo " emplabels.push('$key');" . PHP_EOL; // echo " emplabels.push('$key');" . PHP_EOL;
$i++; $i++;
} }
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
?> ?>
   
   
options1 = { options1 = {
xaxis: { xaxis: {
mode: 'time', mode: 'time',
labelsAngle: 45 labelsAngle: 45
}, },
selection: { selection: {
mode: 'x' mode: 'x'
}, },
HtmlText: false, HtmlText: false,
title: 'Disclosure Log entries added by Date' title: 'Disclosure Log entries added by Date'
}; };
   
// Draw graph with default options, overwriting with passed options // Draw graph with default options, overwriting with passed options
function drawGraph(opts) { function drawGraph(opts) {
   
// Clone the options, so the 'options' variable always keeps intact. // Clone the options, so the 'options' variable always keeps intact.
o1 = Flotr._.extend(Flotr._.clone(options1), opts || {}); o1 = Flotr._.extend(Flotr._.clone(options1), opts || {});
   
// Return a new graph. // Return a new graph.
return Flotr.draw( return Flotr.draw(
document.getElementById("bydate"), document.getElementById("bydate"),
[ d1 ], [ d1 ],
o1 o1
); );
} }
   
graph = drawGraph(); graph = drawGraph();
   
Flotr.EventAdapter.observe(document.getElementById("bydate"), 'flotr:select', function (area) { Flotr.EventAdapter.observe(document.getElementById("bydate"), 'flotr:select', function (area) {
// Draw selected area // Draw selected area
graph = drawGraph({ graph = drawGraph({
xaxis: { min: area.x1, max: area.x2, mode: 'time', labelsAngle: 45 }, xaxis: { min: area.x1, max: area.x2, mode: 'time', labelsAngle: 45 },
yaxis: { min: area.y1, max: area.y2 } yaxis: { min: area.y1, max: area.y2 }
}); });
}); });
   
// When graph is clicked, draw the graph with default area. // When graph is clicked, draw the graph with default area.
Flotr.EventAdapter.observe(document.getElementById("bydate"), 'flotr:click', function () { Flotr.EventAdapter.observe(document.getElementById("bydate"), 'flotr:click', function () {
graph = drawGraph(); graph = drawGraph();
}); });
   
}); });
}; };
   
var d2 = []; var d2 = [];
var agencylabels = []; var agencylabels = [];
function agencytrackformatter(obj) { function agencytrackformatter(obj) {
   
return agencylabels[Math.floor(obj.y)] + " = " + obj.x; return agencylabels[Math.floor(obj.y)] + " = " + obj.x;
   
} }
function agencytickformatter(val, axis) { function agencytickformatter(val, axis) {
if (agencylabels[Math.floor(val)]) { if (agencylabels[Math.floor(val)]) {
return (agencylabels[Math.floor(val)]) ; return (agencylabels[Math.floor(val)]) ;
   
} else { } else {
return ""; return "";
} }
} }
<?php <?php
try { try {
$rows = $foidocsdb->get_view("app", "byAgencyID?group=true",null, false,false,true)->rows; $rows = $foidocsdb->get_view("app", "byAgencyID?group=true",null, false,false,true)->rows;
  function cmp($a, $b)
  {
  return $a->value > $b->value;
  }
  usort($rows, "cmp");
   
$dataValues = Array(); $dataValues = Array();
$i = 0; $i = 0;
foreach ($rows as $row) { foreach ($rows as $row) {
echo " d2.push([ $row->value,$i]);" . PHP_EOL; echo " d2.push([ $row->value,$i]);" . PHP_EOL;
echo " agencylabels.push(['".str_replace("'","",$idtoname[$row->key])."']);" . PHP_EOL; echo " agencylabels.push(['".str_replace("'","",$idtoname[$row->key])."']);" . PHP_EOL;
   
$i++; $i++;
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
?> ?>
// Draw the graph // Draw the graph
Flotr.draw( Flotr.draw(
document.getElementById("byagency"), document.getElementById("byagency"),
[d2], [d2],
{ {
title: "Disclosure Log entries by Agency", title: "Disclosure Log entries by Agency",
bars: { bars: {
show: true, show: true,
horizontal: true, horizontal: true,
shadowSize: 0, shadowSize: 0,
barWidth: 0.5 barWidth: 0.5
}, },
mouse: { mouse: {
track: true, track: true,
relative: true, relative: true,
trackFormatter: agencytrackformatter trackFormatter: agencytrackformatter
}, },
yaxis: { yaxis: {
minorTickFreq: 1, minorTickFreq: 1,
noTicks: agencylabels.length, noTicks: agencylabels.length,
showMinorLabels: true, showMinorLabels: true,
tickFormatter: agencytickformatter tickFormatter: agencytickformatter
}, },
xaxis: { xaxis: {
min: 0, min: 0,
autoscaleMargin: 1 autoscaleMargin: 1
}, },
legend: { legend: {
show: false show: false
} }
} }
); );
</script> </script>
   
<?php <?php
include_footer_documents(); include_footer_documents();
?> ?>
   
   
<?php <?php
include('template.inc.php'); include('template.inc.php');
include_header_documents(""); include_header_documents("");
include_once('../include/common.inc.php'); include_once('../include/common.inc.php');
$endkey = (isset($_REQUEST['end_key']) ? $_REQUEST['end_key'] : '9999-99-99'); $endkey = (isset($_REQUEST['end_key']) ? $_REQUEST['end_key'] : '9999-99-99');
$enddocid = (isset($_REQUEST['end_docid']) ? $_REQUEST['end_docid'] : null); $enddocid = (isset($_REQUEST['end_docid']) ? $_REQUEST['end_docid'] : null);
?> ?>
<div class="headline">Read all the information released by Australian Federal Government agencies under the FOI Act in <div class="headline">Read all the information released by Australian Federal Government agencies under the FOI Act in
one place! one place!
</div> </div>
<a style='float:right' href="rss.xml.php"><img src="img/feed-icon-14x14.png" alt="RSS Icon"/> All Agencies RSS Feed</a> <a style='float:right' href="rss.xml.php"><img src="img/feed-icon-14x14.png" alt="RSS Icon"/> All Agencies RSS Feed</a>
<br> <br>
<?php <?php
$agenciesdb = $server->get_db('disclosr-agencies'); $agenciesdb = $server->get_db('disclosr-agencies');
   
$idtoname = Array(); $idtoname = Array();
foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) { foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) {
$idtoname[$row->id] = trim($row->value->name); $idtoname[$row->id] = trim($row->value->name);
} }
$foidocsdb = $server->get_db('disclosr-foidocuments'); $foidocsdb = $server->get_db('disclosr-foidocuments');
  //print_r($foidocsdb);
try { try {
$rows = $foidocsdb->get_view("app", "byDate", Array($endkey, '0000-00-00'), true, 20, null, $enddocid)->rows; $rows = $foidocsdb->get_view("app", "byDate", Array($endkey, '0000-00-00'), true, 20, null, $enddocid)->rows;
if ($rows) { if ($rows) {
foreach ($rows as $key => $row) { foreach ($rows as $key => $row) {
echo displayLogEntry($row, $idtoname); echo displayLogEntry($row, $idtoname);
if (!isset($startkey)) if (!isset($startkey))
$startkey = $row->key; $startkey = $row->key;
$endkey = $row->key; $endkey = $row->key;
$enddocid = $row->value->_id; $enddocid = $row->value->_id;
} }
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
echo "<a class='btn btn-large btn-primary' href='?end_key=$endkey&amp;end_docid=$enddocid' style='float:right;'>next page <i class='icon-circle-arrow-right icon-white'></i></a>"; echo "<a class='btn btn-large btn-primary' href='?end_key=$endkey&amp;end_docid=$enddocid' style='float:right;'>next page <i class='icon-circle-arrow-right icon-white'></i></a>";
include_footer_documents(); include_footer_documents();
?> ?>
   
<?php <?php
   
// Agency X updated Y, new files, diff of plain text/link text, // Agency X updated Y, new files, diff of plain text/link text,
// feed for just one agency or all // feed for just one agency or all
// This is a minimum example of using the Universal Feed Generator Class // This is a minimum example of using the Universal Feed Generator Class
include("../lib/FeedWriter/FeedTypes.php"); include("../lib/FeedWriter/FeedTypes.php");
include_once('../include/common.inc.php'); include_once('../include/common.inc.php');
//Creating an instance of FeedWriter class. //Creating an instance of FeedWriter class.
$TestFeed = new RSS2FeedWriter(); $TestFeed = new RSS2FeedWriter();
//Setting the channel elements //Setting the channel elements
////Retriving informations from database ////Retriving informations from database
$idtoname = Array(); $idtoname = Array();
$agenciesdb = $server->get_db('disclosr-agencies'); $agenciesdb = $server->get_db('disclosr-agencies');
foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) { foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) {
$idtoname[$row->id] = trim($row->value->name); $idtoname[$row->id] = trim($row->value->name);
} }
$foidocsdb = $server->get_db('disclosr-foidocuments'); $foidocsdb = $server->get_db('disclosr-foidocuments');
if (isset($_REQUEST['id'])) { if (isset($_REQUEST['id'])) {
$rows = $foidocsdb->get_view("app", "byAgencyID", $_REQUEST['id'], false, false, false)->rows; $rows = $foidocsdb->get_view("app", "byAgencyID", $_REQUEST['id'], false, false, false)->rows;
$title = $idtoname[$_REQUEST['id']]; $title = $idtoname[$_REQUEST['id']];
} else { } else {
$rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99', '0000-00-00', 50), true)->rows; $rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99', '0000-00-00', 50), true)->rows;
$title = 'All Agencies'; $title = 'All Agencies';
} }
//Use wrapper functions for common channelelements //Use wrapper functions for common channelelements
$TestFeed->setTitle('disclosurelo.gs Newest Entries - ' . $title); $TestFeed->setTitle('disclosurelo.gs Newest Entries - ' . $title);
$TestFeed->setLink('http://disclosurelo.gs/rss.xml.php' . (isset($_REQUEST['id']) ? '?id=' . $_REQUEST['id'] : '')); $TestFeed->setLink('http://disclosurelo.gs/rss.xml.php' . (isset($_REQUEST['id']) ? '?id=' . $_REQUEST['id'] : ''));
$TestFeed->setDescription('disclosurelo.gs Newest Entries - ' . $title); $TestFeed->setDescription('disclosurelo.gs Newest Entries - ' . $title);
$TestFeed->setChannelElement('language', 'en-us'); $TestFeed->setChannelElement('language', 'en-us');
$TestFeed->setChannelElement('pubDate', date(DATE_RSS, time())); $TestFeed->setChannelElement('pubDate', date(DATE_RSS, time()));
   
   
//print_r($rows); //print_r($rows);
  $i =0;
foreach ($rows as $row) { foreach ($rows as $row) {
//Create an empty FeedItem //Create an empty FeedItem
$newItem = $TestFeed->createNewItem(); $newItem = $TestFeed->createNewItem();
//Add elements to the feed item //Add elements to the feed item
$newItem->setTitle($row->value->title); $newItem->setTitle(preg_replace('/[\x00-\x1F\x80-\xFF]/', '', $row->value->title));
$newItem->setLink("http://disclosurelo.gs/view.php?id=" . $row->value->_id); $newItem->setLink("http://disclosurelo.gs/view.php?id=" . $row->value->_id);
$newItem->setDate(strtotime($row->value->date)); $newItem->setDate(strtotime($row->value->date));
$newItem->setDescription(displayLogEntry($row, $idtoname)); $newItem->setDescription(displayLogEntry($row, $idtoname));
$newItem->setAuthor($idtoname[$row->value->agencyID]); $newItem->setAuthor($idtoname[$row->value->agencyID]);
$newItem->addElement('guid', "http://disclosurelo.gs/view.php?id=" . $row->value->_id, array('isPermaLink' => 'true')); $newItem->addElement('guid', "http://disclosurelo.gs/view.php?id=" . $row->value->_id, array('isPermaLink' => 'true'));
//Now add the feed item //Now add the feed item
$TestFeed->addItem($newItem); $TestFeed->addItem($newItem);
  $i++;
  if ($i > 50) break;
} }
//OK. Everything is done. Now genarate the feed. //OK. Everything is done. Now genarate the feed.
$TestFeed->generateFeed(); $TestFeed->generateFeed();
?> ?>
   
import sys,os import sys,os
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
import genericScrapers import genericScrapers
import scrape import scrape
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
   
#http://www.doughellmann.com/PyMOTW/abc/ #http://www.doughellmann.com/PyMOTW/abc/
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
#def getTable(self,soup): def getTable(self,soup):
# return soup.find(id = "cphMain_C001_Col01").table return soup.findAll('table')[1]
def getColumnCount(self): def getColumnCount(self):
return 5 return 5
def getColumns(self,columns): def getColumns(self,columns):
(id, date, title, description,notes) = columns (id, date, title, description,notes) = columns
return (id, date, title, description, notes) return (id, date, title, description, notes)
   
if __name__ == '__main__': if __name__ == '__main__':
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
ScraperImplementation().doScrape() ScraperImplementation().doScrape()
   
import sys,os import sys,os
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
import genericScrapers import genericScrapers
import scrape import scrape
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from datetime import date from datetime import date
   
#http://www.doughellmann.com/PyMOTW/abc/ #http://www.doughellmann.com/PyMOTW/abc/
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper): class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
def getTable(self,soup): def getTable(self,soup):
return soup.find(id= "ctl00_MSO_ContentDiv").table return soup.find(class_ = "rgMasterTable")
   
def getColumns(self,columns): def getColumns(self,columns):
(id, title, description, notes) = columns (id, title, description, notes) = columns
return (id, title, title, description, notes) return (id, title, title, description, notes)
def getDate(self, content, entry, doc): def getDate(self, content, entry, doc):
edate = date.today().strftime("%Y-%m-%d") edate = date.today().strftime("%Y-%m-%d")
doc.update({'date': edate}) doc.update({'date': edate})
return return
def getColumnCount(self): def getColumnCount(self):
return 4 return 4
   
if __name__ == '__main__': if __name__ == '__main__':
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper) print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper) print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
ScraperImplementation().doScrape() ScraperImplementation().doScrape()