scraper and sort order updatyes
Former-commit-id: c8bfc5c3ecbee616fa6dd8bfdd147bedf4d64646
--- a/documents/charts.php
+++ b/documents/charts.php
@@ -112,7 +112,11 @@
<?php
try {
$rows = $foidocsdb->get_view("app", "byAgencyID?group=true",null, false,false,true)->rows;
-
+function cmp($a, $b)
+{
+ return $a->value > $b->value;
+}
+usort($rows, "cmp");
$dataValues = Array();
$i = 0;
--- a/documents/index.php
+++ b/documents/index.php
@@ -18,6 +18,7 @@
$idtoname[$row->id] = trim($row->value->name);
}
$foidocsdb = $server->get_db('disclosr-foidocuments');
+//print_r($foidocsdb);
try {
$rows = $foidocsdb->get_view("app", "byDate", Array($endkey, '0000-00-00'), true, 20, null, $enddocid)->rows;
if ($rows) {
--- a/documents/rss.xml.php
+++ b/documents/rss.xml.php
@@ -31,11 +31,12 @@
//print_r($rows);
+$i =0;
foreach ($rows as $row) {
//Create an empty FeedItem
$newItem = $TestFeed->createNewItem();
//Add elements to the feed item
- $newItem->setTitle($row->value->title);
+ $newItem->setTitle(preg_replace('/[\x00-\x1F\x80-\xFF]/', '', $row->value->title));
$newItem->setLink("http://disclosurelo.gs/view.php?id=" . $row->value->_id);
$newItem->setDate(strtotime($row->value->date));
$newItem->setDescription(displayLogEntry($row, $idtoname));
@@ -43,6 +44,8 @@
$newItem->addElement('guid', "http://disclosurelo.gs/view.php?id=" . $row->value->_id, array('isPermaLink' => 'true'));
//Now add the feed item
$TestFeed->addItem($newItem);
+$i++;
+if ($i > 50) break;
}
//OK. Everything is done. Now genarate the feed.
$TestFeed->generateFeed();
--- a/documents/scrapers/1d404c4934f74feacd00dcb434e7c10a.py
+++ b/documents/scrapers/1d404c4934f74feacd00dcb434e7c10a.py
@@ -6,8 +6,8 @@
#http://www.doughellmann.com/PyMOTW/abc/
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
- #def getTable(self,soup):
- # return soup.find(id = "cphMain_C001_Col01").table
+ def getTable(self,soup):
+ return soup.findAll('table')[1]
def getColumnCount(self):
return 5
def getColumns(self,columns):
--- a/documents/scrapers/41a166419503bb50e410c58be54c102f.py
+++ b/documents/scrapers/41a166419503bb50e410c58be54c102f.py
@@ -8,7 +8,7 @@
#http://www.doughellmann.com/PyMOTW/abc/
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
def getTable(self,soup):
- return soup.find(id= "ctl00_MSO_ContentDiv").table
+ return soup.find(class_ = "rgMasterTable")
def getColumns(self,columns):
(id, title, description, notes) = columns
--- a/documents/scrapers/601aedeef4344638d635bdd761e9fdba.py
+++ b/documents/scrapers/601aedeef4344638d635bdd761e9fdba.py
@@ -6,8 +6,8 @@
#http://www.doughellmann.com/PyMOTW/abc/
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
- #def getTable(self,soup):
- # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
+ def getTable(self,soup):
+ return soup.find(id = "main").table
def getColumnCount(self):
return 4
def getColumns(self,columns):
--- a/documents/scrapers/8c9421f852c441910bf1d93a57b31d64.py
+++ b/documents/scrapers/8c9421f852c441910bf1d93a57b31d64.py
@@ -5,6 +5,8 @@
#http://www.doughellmann.com/PyMOTW/abc/
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+ def getTable(self,soup):
+ return soup.find(id = "page_content").table
def getColumns(self,columns):
(id, date, title, description, notes) = columns
return (id, date, title, description, notes)
--- a/documents/scrapers/ad033512610d8e36886ab6a795f26561.py
+++ b/documents/scrapers/ad033512610d8e36886ab6a795f26561.py
@@ -6,8 +6,8 @@
#http://www.doughellmann.com/PyMOTW/abc/
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
- def getTable(self,soup):
- return soup.find(id = "_ctl0__ctl0_MainContentPlaceHolder_MainContentPlaceHolder_ContentSpan").findAll("table")[3]
+# def getTable(self,soup):
+# return soup.find(_class = "content").table
def getColumnCount(self):
return 5
def getColumns(self,columns):