more scrapers
Former-commit-id: a1b071fe60c375afcb0527b3c5a86844b8f0035b
--- a/.gitmodules
+++ b/.gitmodules
@@ -31,4 +31,7 @@
[submodule "documents/lib/parsedatetime"]
path = documents/lib/parsedatetime
url = git://github.com/bear/parsedatetime.git
+[submodule "lib/FeedWriter"]
+ path = lib/FeedWriter
+ url = https://github.com/mibe/FeedWriter
--- a/admin/exportEmployees.csv.php
+++ b/admin/exportEmployees.csv.php
@@ -4,7 +4,8 @@
$format = "csv";
//$format = "json";
-if (isset($_REQUEST['format'])) $format = $_REQUEST['format'];
+if (isset($_REQUEST['format']))
+ $format = $_REQUEST['format'];
setlocale(LC_CTYPE, 'C');
if ($format == "csv") {
$headers = Array("name");
@@ -21,7 +22,6 @@
if (isset($row->value->statistics->employees)) {
$headers = array_unique(array_merge($headers, array_keys(object_to_array($row->value->statistics->employees))));
-
}
}
} catch (SetteeRestClientException $e) {
@@ -40,15 +40,14 @@
fputcsv($fp, $headers);
} else if ($format == "json") {
echo '{
- "labels" : ["' . implode('","', $headers) . '"],'.PHP_EOL;
+ "labels" : ["' . implode('","', $headers) . '"],' . PHP_EOL;
}
try {
$agencies = $db->get_view("app", "all", null, true)->rows;
//print_r($agencies);
$first = true;
if ($format == "json") {
- echo '"data" : ['.PHP_EOL;
-
+ echo '"data" : [' . PHP_EOL;
}
foreach ($agencies as $agency) {
@@ -56,25 +55,35 @@
$row = Array();
$agencyEmployeesArray = object_to_array($agency->value->statistics->employees);
foreach ($headers as $i => $fieldName) {
+ if ($format == "csv") {
+ if (isset($agencyEmployeesArray[$fieldName])) {
+ $row[] = $agencyEmployeesArray[$fieldName]["value"] ;
+ } else if ($i == 0) {
+ $row[] = $agency->value->name;
+ } else {
+ $row[] = 0;
+ }
+ } else if ($format == "json") {
if (isset($agencyEmployeesArray[$fieldName])) {
- $row[] = '['.$i.','.$agencyEmployeesArray[$fieldName]["value"].']';
+ $row[] = '[' . $i . ',' . $agencyEmployeesArray[$fieldName]["value"] . ']';
} else {
- $row[] = '['.$i.',0]';
+ $row[] = '[' . $i . ',0]';
}
+ }
}
if ($format == "csv") {
fputcsv($fp, array_values($row));
} else if ($format == "json") {
- if (!$first) echo ",";
- echo '{"data" : [' . implode(",", array_values($row)) . '], "label": "'.$agency->value->name.'", "lines" : { "show" : true }, "points" : { "show" : true }}'.PHP_EOL;
+ if (!$first)
+ echo ",";
+ echo '{"data" : [' . implode(",", array_values($row)) . '], "label": "' . $agency->value->name . '", "lines" : { "show" : true }, "points" : { "show" : true }}' . PHP_EOL;
$first = false;
}
}
}
if ($format == "json") {
- echo ']
- }'.PHP_EOL;
-
+ echo ']
+ }' . PHP_EOL;
}
} catch (SetteeRestClientException $e) {
setteErrorHandler($e);
--- a/admin/importAPSCEmployees.php
+++ b/admin/importAPSCEmployees.php
@@ -47,13 +47,17 @@
$changed = false;
if (!isset($doc->statistics)) {
$changed = true;
- $doc->statistics = Array();
+ $doc->statistics = new stdClass();
+ }
+ if (!isset($doc->statistics->employees)) {
+ $changed = true;
+ $doc->statistics->employees = new stdClass();
}
foreach ($sum as $timePeriod => $value) {
if (!isset($doc->statistics->employees->$timePeriod->value)
|| $doc->statistics->employees->$timePeriod->value != $value) {
$changed = true;
- $doc->statistics["employees"][$timePeriod] = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
+ $doc->statistics->employees->$timePeriod = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
}
}
if ($changed) {
--- /dev/null
+++ b/admin/importAPSCEmployees2012.php
@@ -1,1 +1,86 @@
+<?php
+require_once '../include/common.inc.php';
+require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
+$db = $server->get_db('disclosr-agencies');
+$rows = $db->get_view("app", "byName")->rows;
+$nametoid = Array();
+$sums = Array();
+$functions = Array();
+foreach ($rows as $row) {
+ $nametoid[trim($row->key)] = $row->value;
+}
+
+
+$request = Requests::get("http://www.apsc.gov.au/publications-and-media/parliamentary/state-of-the-service/new-sosr/appendix-2-aps-agencies");
+$doc = phpQuery::newDocumentHTML($request->body);
+phpQuery::selectDocument($doc);
+foreach (pq('tr')->elements as $tr) {
+ //echo $tr->nodeValue.PHP_EOL;
+ $agency = "";
+ $employees = "";
+ $function = "";
+ $i = 0;
+ foreach ($tr->childNodes as $td) {
+ //echo $td->nodeValue." $i <br>";
+ if ($i == 0)
+ $agency = $td->nodeValue;
+ if ($i == 2) {
+ $employees = trim(str_replace(",", "", $td->nodeValue));
+ }
+ if ($i == 4) {
+ $function = $td->nodeValue;
+ }
+ $i++;
+ }
+ if ($agency != "" && $employees != "" && $function != "") {
+ $name = trim(str_replace('2','',$agency));
+ //echo "$name<br><bR>" . PHP_EOL;
+ if (isset($nametoid[$name])) {
+ $id = $nametoid[$name];
+ //echo $id . "<br>" . PHP_EOL;
+ @$sums[$id]["2011-2012"] += $employees;
+ $functions[$id] = $function;
+ } else if ($agency != "Agency"){
+ echo "<br>ERROR NAME '$agency' MISSING FROM ID LIST<br><bR>" . PHP_EOL;
+
+ die();
+ }
+ } else {
+ echo "skipped $agency";
+ }
+}
+//print_r($sums);
+foreach ($sums as $id => $sum) {
+ echo $id . "<br>" . PHP_EOL;
+ $doc = $db->get($id);
+ echo $doc->name . "<br>" . PHP_EOL;
+ // print_r($doc);
+ $changed = false;
+ if (!isset($doc->statistics)) {
+ $changed = true;
+ $doc->statistics = new stdClass();
+ }
+ if (!isset($doc->statistics->employees)) {
+ $changed = true;
+ $doc->statistics->employees = new stdClass();
+ }
+ foreach ($sum as $timePeriod => $value) {
+ if (!isset($doc->statistics->employees->$timePeriod->value)
+ || $doc->statistics->employees->$timePeriod->value != $value) {
+ $changed = true;
+ $doc->statistics->employees->$timePeriod = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
+ $doc->employees = $value;
+ $doc->functionClassification = $functions[$id];
+ }
+ }
+
+ if ($changed) {
+ $db->save($doc);
+ } else {
+ echo "not changed" . "<br>" . PHP_EOL;
+ }
+}
+// employees: timeperiod, source = apsc state of service, value
+?>
+
--- /dev/null
+++ b/documents/.gitignore
@@ -1,1 +1,2 @@
+*.pyc
--- a/documents/disclogsList.php
+++ b/documents/disclogsList.php
@@ -12,6 +12,7 @@
$disclogs = 0;
$red = 0;
$green = 0;
+$yellow = 0;
$orange = 0;
try {
$rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows;
@@ -19,45 +20,56 @@
if ($rows) {
foreach ($rows as $row) {
+ if ((!isset($row->value->status) || $row->value->status != "suspended") && isset($row->value->foiEmail)) {
+ echo "<tr><td>";
+ if (isset($row->value->website)) echo "<a href='" . $row->value->website . "'>";
+ echo "<b>" . $row->value->name . "</b>";
+ if (isset($row->value->website)) echo "</a>";
+ if ($ENV == "DEV")
+ echo "<br>(" . $row->id . ")";
+ echo "</td>\n";
+ $agencies++;
- echo "<tr><td><b>" . $row->value->name . "</b>";
- if ($ENV == "DEV")
- echo "<br>(" . $row->id . ")";
- echo "</td>\n";
-$agencies++;
-
- echo "<td>";
- if (isset($row->value->FOIDocumentsURL)) {
- $disclogs++;
- echo '<a href="' . $row->value->FOIDocumentsURL . '">'
- . $row->value->FOIDocumentsURL . '</a>';
- if ($ENV == "DEV")
- echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">'
- . 'view local copy</a>)</small>';
- } else {
- echo "<font color='red'>✘</font>";
+ echo "<td>";
+ if (isset($row->value->FOIDocumentsURL)) {
+ $disclogs++;
+ echo '<a href="' . $row->value->FOIDocumentsURL . '">'
+ . $row->value->FOIDocumentsURL . '</a>';
+ if ($ENV == "DEV")
+ echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">'
+ . 'view local copy</a>)</small>';
+ } else {
+ echo "<font color='red'><abbr title='No'>✘</abbr></font>";
+ }
+ echo "</td>\n<td>";
+ if (isset($row->value->FOIDocumentsURL)) {
+ if (file_exists("./scrapers/" . $row->id . '.py')) {
+ echo "<font color='green'><abbr title='Yes'>✔</abbr></font>";
+ $green++;
+ } else if (file_exists("./scrapers/" . $row->id . '.txt')) {
+ if (trim(file_get_contents("./scrapers/" . $row->id . '.txt')) == "no disclog") {
+ echo "<font color='yellow'><abbr title='No log table exists at URL to scrape'><b>◎</b></abbr></font>";
+ $yellow++;
+ } else {
+ echo file_get_contents("./scrapers/" . $row->id . '.txt');
+ echo "<font color='orange'><abbr title='Work in progress'><b>▬</b></abbr></font>";
+ $orange++;
+ }
+ } else {
+ echo "<font color='red'><abbr title='No'>✘</abbr></font>";
+ $red++;
+ }
+ }
+ echo "</td></tr>\n";
}
- echo "</td>\n<td>";
- if (isset($row->value->FOIDocumentsURL)) {
- if (file_exists("./scrapers/" . $row->id . '.py')) {
- echo "<font color='green'>✔</font>";
- $green++;
- } else if (file_exists("./scrapers/" . $row->id . '.txt')) {
- echo "<font color='orange'><b>▬</b></font>";
- $orange++;
- } else {
- echo "<font color='red'>✘</font>";
- $red++;
- }
- }
- echo "</td></tr>\n";
}
}
} catch (SetteeRestClientException $e) {
setteErrorHandler($e);
}
echo "</table>";
-echo $agencies." agencies ".(($disclogs/$agencies)*100)."% with disclosure logs, ".(($green/$disclogs)*100)."% with scrapers ".(($red/$disclogs)*100)."% without scrapers ".(($orange/$disclogs)*100)."% WIP scrapers ";
+echo $agencies . " agencies, " . round(($disclogs / $agencies) * 100) . "% with disclosure logs; "
+ . round(($green / $disclogs) * 100) . "% logs with scrapers " . round(($red / $disclogs) * 100) . "% logs without scrapers " . round(($orange / $disclogs) * 100) . "% logs Work-In-Progress scrapers ";
include_footer_documents();
?>
--- a/documents/genericScrapers.py
+++ b/documents/genericScrapers.py
@@ -9,6 +9,7 @@
import dateutil
from dateutil.parser import *
from datetime import *
+import codecs
class GenericDisclogScraper(object):
__metaclass__ = abc.ABCMeta
@@ -88,8 +89,14 @@
return
def getTable(self, soup):
return soup.table
+ def getRows(self, table):
+ return table.find_all('tr')
def getDate(self, content, entry, doc):
- edate = parse(''.join(content.stripped_strings).strip(), dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
+ date = ''.join(content.stripped_strings).strip()
+ (a,b,c) = date.partition("(")
+ date = a.replace("Octber","October")
+ print date
+ edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
print edate
doc.update({'date': edate})
return
@@ -110,11 +117,11 @@
# http://www.crummy.com/software/BeautifulSoup/documentation.html
soup = BeautifulSoup(content)
table = self.getTable(soup)
- for row in table.find_all('tr'):
+ for row in self.getRows(table):
columns = row.find_all('td')
if len(columns) == self.getColumnCount():
- (id, date, description, title, notes) = self.getColumns(columns)
- print ''.join(id.stripped_strings)
+ (id, date, title, description, notes) = self.getColumns(columns)
+ print ''.join(id.stripped_strings).encode('ascii', 'ignore')
if id.string == None:
hash = scrape.mkhash(self.remove_control_chars(url+(''.join(date.stripped_strings))))
else:
--- a/documents/index.php
+++ b/documents/index.php
@@ -20,7 +20,7 @@
if ($rows) {
foreach ($rows as $row) {
-displayLogEntry($row,$idtoname);
+echo displayLogEntry($row,$idtoname);
}
}
} catch (SetteeRestClientException $e) {
--- a/documents/robots.txt
+++ b/documents/robots.txt
@@ -2,4 +2,5 @@
# http://code.google.com/web/controlcrawlindex/
User-agent: *
-
+Disallow: /admin/
+Sitemap: http://disclosurelo.gs/sitemap.xml.php
--- a/documents/rss.xml.php
+++ b/documents/rss.xml.php
@@ -3,28 +3,38 @@
// Agency X updated Y, new files, diff of plain text/link text,
// feed for just one agency or all
// This is a minimum example of using the Universal Feed Generator Class
-include("lib/FeedWriter.php");
+include("../lib/FeedWriter/FeedTypes.php");
+include_once('../include/common.inc.php');
//Creating an instance of FeedWriter class.
-$TestFeed = new FeedWriter(RSS2);
+$TestFeed = new RSS2FeedWriter();
//Setting the channel elements
//Use wrapper functions for common channelelements
$TestFeed->setTitle('Last Modified - All');
-$TestFeed->setLink('http://disclosr.lambdacomplex.org/rss.xml.php');
+$TestFeed->setLink('http://disclosurelo.gs/rss.xml.php');
$TestFeed->setDescription('This is test of creating a RSS 2.0 feed Universal Feed Writer');
+ $TestFeed->setChannelElement('language', 'en-us');
+ $TestFeed->setChannelElement('pubDate', date(DATE_RSS, time()));
//Retriving informations from database
-$rows = $db->get_view("app", "byLastModified")->rows;
+$idtoname = Array();
+$agenciesdb = $server->get_db('disclosr-agencies');
+foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) {
+ $idtoname[$row->id] = trim($row->value->name);
+}
+$foidocsdb = $server->get_db('disclosr-foidocuments');
+$rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99','0000-00-00'), true)->rows;
//print_r($rows);
foreach ($rows as $row) {
//Create an empty FeedItem
$newItem = $TestFeed->createNewItem();
//Add elements to the feed item
- $newItem->setTitle($row['name']);
- $newItem->setLink($row['id']);
- $newItem->setDate(date("c", $row['metadata']['lastModified']));
- $newItem->setDescription($row['name']);
+ $newItem->setTitle($row->value->title);
+ $newItem->setLink("view.php?id=".$row->value->docID);
+ $newItem->setDate(date("c", strtotime($row->value->date)));
+ $newItem->setDescription(displayLogEntry($row,$idtoname));
+ $newItem->addElement('guid', $row->value->_id,array('isPermaLink'=>'true'));
//Now add the feed item
$TestFeed->addItem($newItem);
}
//OK. Everything is done. Now genarate the feed.
-$TestFeed->genarateFeed();
+$TestFeed->generateFeed();
?>
--- /dev/null
+++ b/documents/scrapers/0049d35216493c545ef5f7f000e6b252.txt
@@ -1,1 +1,2 @@
+pdf
--- /dev/null
+++ b/documents/scrapers/00a294de663db69062ca09aede7c0487.txt
@@ -1,1 +1,2 @@
+multipage
--- /dev/null
+++ b/documents/scrapers/0372b19123076338d483f624c433727b.txt
@@ -1,1 +1,2 @@
+docx
--- /dev/null
+++ b/documents/scrapers/0ae822d1a748e60d90f0b79b97d5a3e5.txt
@@ -1,1 +1,2 @@
+ACMA style
--- /dev/null
+++ b/documents/scrapers/0ced9dd2de36100c3cabdb7fd8e843a9.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+ #def getTable(self,soup):
+ # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
+ def getColumnCount(self):
+ return 5
+ def getColumns(self,columns):
+ (id, date, title, description,notes) = columns
+ return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/1097fa8afdcf5db89d212d0979226667.py
+++ b/documents/scrapers/1097fa8afdcf5db89d212d0979226667.py
@@ -8,7 +8,7 @@
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
def getColumns(self,columns):
(id, date, title, description, notes) = columns
- return (id, date, description, title, notes)
+ return (id, date, title, description, notes)
if __name__ == '__main__':
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
--- a/documents/scrapers/157cbe654bdaa0a48e6650152ae34489.py
+++ b/documents/scrapers/157cbe654bdaa0a48e6650152ae34489.py
@@ -10,7 +10,7 @@
return 5
def getColumns(self,columns):
(id, date, title, description, notes) = columns
- return (id, date, description, title, notes)
+ return (id, date, title, description, notes)
def getTable(self,soup):
return soup.find_all('table')[4]
--- /dev/null
+++ b/documents/scrapers/1803322b27286950cab0c543168b5f21.txt
@@ -1,1 +1,2 @@
+multipage log
--- /dev/null
+++ b/documents/scrapers/1d404c4934f74feacd00dcb434e7c10a.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+ #def getTable(self,soup):
+ # return soup.find(id = "cphMain_C001_Col01").table
+ def getColumnCount(self):
+ return 5
+ def getColumns(self,columns):
+ (id, date, title, description,notes) = columns
+ return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- /dev/null
+++ b/documents/scrapers/24bd71114d3975ed9a63ad29624c62c9.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+ def getTable(self,soup):
+ return soup.find(id = "inner_content")
+ def getColumnCount(self):
+ return 2
+ def getColumns(self,columns):
+ (date, title) = columns
+ return (date, date, title, title, None)
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- /dev/null
+++ b/documents/scrapers/2cac2cd1f42687db2d04fa20b5b6a538.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+ #def getTable(self,soup):
+ # return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table
+ def getColumnCount(self):
+ return 3
+ def getColumns(self,columns):
+ (id, title, date) = columns
+ return (id, date, title, title, None)
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- /dev/null
+++ b/documents/scrapers/31685505438d393f45a90f442b8fa27f.txt
@@ -1,1 +1,2 @@
+pdf
--- /dev/null
+++ b/documents/scrapers/38ca99d2790975a40dde3fae41dbdc3d.py
@@ -1,1 +1,32 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+ def getColumnCount(self):
+ return 3
+ def getColumns(self,columns):
+ (date, title, description) = columns
+ return (date, date, title, description, None)
+ def getTitle(self, content, entry, doc):
+ i = 0
+ title = ""
+ for string in content.stripped_strings:
+ if i < 2:
+ title = title + string
+ i = i+1
+ doc.update({'title': title})
+ print title
+ return
+
+if __name__ == '__main__':
+ print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+ print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+ ScraperImplementation().doScrape()
+
--- a/documents/scrapers/3cd40b1240e987cbcd3f0e67054ce259.py
+++ b/documents/scrapers/3cd40b1240e987cbcd3f0e67054ce259.py
@@ -7,7 +7,7 @@
class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
def getColumns(self,columns):
(id, date, description, title, notes) = columns
- return (id, date, description, title, notes)
+ return (id, date, title, description, notes)
if __name__ == '__main__':
print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
--- /dev/null
+++ b/documents/scrapers/3d5871a44abbbc81ef5b3a420070755d.py
@@ -1,1 +1,47 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import dateutil
+from dateutil.parser import *
+from datetime import *
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+ def getTable(self,soup):
+ return soup.find(class_ = "inner-column").table