From: Maxious <maxious@lambdacomplex.org>
Date: Fri, 30 Nov 2012 12:50:36 +0000
Subject: more scrapers
X-Git-Url: http://maxious.lambdacomplex.org/git/?p=disclosr.git&a=commitdiff&h=bffa93e6d665a9a84f42927ea7a61776fccc2bf9
---
more scrapers


Former-commit-id: a1b071fe60c375afcb0527b3c5a86844b8f0035b
---


--- a/admin/exportEmployees.csv.php
+++ b/admin/exportEmployees.csv.php
@@ -4,7 +4,8 @@
 
 $format = "csv";
 //$format = "json";
-if (isset($_REQUEST['format']))  $format = $_REQUEST['format'];
+if (isset($_REQUEST['format']))
+    $format = $_REQUEST['format'];
 setlocale(LC_CTYPE, 'C');
 if ($format == "csv") {
     $headers = Array("name");
@@ -21,7 +22,6 @@
         if (isset($row->value->statistics->employees)) {
 
             $headers = array_unique(array_merge($headers, array_keys(object_to_array($row->value->statistics->employees))));
-
         }
     }
 } catch (SetteeRestClientException $e) {
@@ -40,15 +40,14 @@
         fputcsv($fp, $headers);
     } else if ($format == "json") {
         echo '{
-            "labels" : ["' . implode('","', $headers) . '"],'.PHP_EOL;
+            "labels" : ["' . implode('","', $headers) . '"],' . PHP_EOL;
     }
     try {
         $agencies = $db->get_view("app", "all", null, true)->rows;
         //print_r($agencies);
         $first = true;
         if ($format == "json") {
-        echo '"data" : ['.PHP_EOL;
-        
+            echo '"data" : [' . PHP_EOL;
         }
         foreach ($agencies as $agency) {
 
@@ -56,25 +55,35 @@
                 $row = Array();
                 $agencyEmployeesArray = object_to_array($agency->value->statistics->employees);
                 foreach ($headers as $i => $fieldName) {
+                    if ($format == "csv") {
+                     if (isset($agencyEmployeesArray[$fieldName])) {
+                        $row[] = $agencyEmployeesArray[$fieldName]["value"] ;
+                    } else if ($i == 0) {
+                        $row[] = $agency->value->name;
+                    } else {
+                        $row[] = 0;
+                    }
+                } else if ($format == "json") {
                     if (isset($agencyEmployeesArray[$fieldName])) {
-                        $row[] = '['.$i.','.$agencyEmployeesArray[$fieldName]["value"].']';
+                        $row[] = '[' . $i . ',' . $agencyEmployeesArray[$fieldName]["value"] . ']';
                     } else {
-                        $row[] = '['.$i.',0]';
+                        $row[] = '[' . $i . ',0]';
                     }
+                }
                 }
                 if ($format == "csv") {
                     fputcsv($fp, array_values($row));
                 } else if ($format == "json") {
-                    if (!$first) echo ",";
-                    echo '{"data" : [' . implode(",", array_values($row)) . '], "label": "'.$agency->value->name.'", "lines" : { "show" : true }, "points" : { "show" : true }}'.PHP_EOL;
+                    if (!$first)
+                        echo ",";
+                    echo '{"data" : [' . implode(",", array_values($row)) . '], "label": "' . $agency->value->name . '", "lines" : { "show" : true }, "points" : { "show" : true }}' . PHP_EOL;
                     $first = false;
                 }
             }
         }
         if ($format == "json") {
-        echo ']
-            }'.PHP_EOL;
-        
+            echo ']
+            }' . PHP_EOL;
         }
     } catch (SetteeRestClientException $e) {
         setteErrorHandler($e);

--- a/admin/importAPSCEmployees.php
+++ b/admin/importAPSCEmployees.php
@@ -47,13 +47,17 @@
     $changed = false;
     if (!isset($doc->statistics)) {
         $changed = true;
-        $doc->statistics = Array();
+        $doc->statistics = new stdClass();
+    }
+    if (!isset($doc->statistics->employees)) {
+        $changed = true;
+        $doc->statistics->employees = new stdClass();
     }
     foreach ($sum as $timePeriod => $value) {
         if (!isset($doc->statistics->employees->$timePeriod->value) 
                 || $doc->statistics->employees->$timePeriod->value != $value) {
             $changed = true;
-            $doc->statistics["employees"][$timePeriod] = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
+            $doc->statistics->employees->$timePeriod = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
         }
     }
     if ($changed) {

--- /dev/null
+++ b/admin/importAPSCEmployees2012.php
@@ -1,1 +1,86 @@
+<?php
 
+require_once '../include/common.inc.php';
+require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
+$db = $server->get_db('disclosr-agencies');
+$rows = $db->get_view("app", "byName")->rows;
+$nametoid = Array();
+$sums = Array();
+$functions = Array();
+foreach ($rows as $row) {
+    $nametoid[trim($row->key)] = $row->value;
+}
+
+
+$request = Requests::get("http://www.apsc.gov.au/publications-and-media/parliamentary/state-of-the-service/new-sosr/appendix-2-aps-agencies");
+$doc = phpQuery::newDocumentHTML($request->body);
+phpQuery::selectDocument($doc);
+foreach (pq('tr')->elements as $tr) {
+    //echo $tr->nodeValue.PHP_EOL;
+    $agency = "";
+    $employees = "";
+    $function = "";
+    $i = 0;
+    foreach ($tr->childNodes as $td) {
+        //echo  $td->nodeValue." $i <br>";
+        if ($i == 0)
+            $agency = $td->nodeValue;
+        if ($i == 2) {
+            $employees = trim(str_replace(",", "", $td->nodeValue));
+        }
+        if ($i == 4) {
+            $function = $td->nodeValue;
+        }
+        $i++;
+    }
+    if ($agency != "" && $employees != "" && $function != "") {
+        $name = trim(str_replace('2','',$agency));
+         //echo "$name<br><bR>" . PHP_EOL;
+        if (isset($nametoid[$name])) {
+            $id = $nametoid[$name];
+            //echo $id . "<br>" . PHP_EOL;
+            @$sums[$id]["2011-2012"] += $employees;
+            $functions[$id] = $function;
+        } else if ($agency != "Agency"){
+            echo "<br>ERROR NAME '$agency' MISSING FROM ID LIST<br><bR>" . PHP_EOL;
+
+            die();
+        }
+    } else {
+        echo "skipped $agency";
+    }
+}
+//print_r($sums);
+foreach ($sums as $id => $sum) {
+    echo $id . "<br>" . PHP_EOL;
+    $doc = $db->get($id);
+    echo $doc->name . "<br>" . PHP_EOL;
+    // print_r($doc);
+    $changed = false;
+    if (!isset($doc->statistics)) {
+        $changed = true;
+        $doc->statistics = new stdClass();
+    }
+    if (!isset($doc->statistics->employees)) {
+        $changed = true;
+        $doc->statistics->employees = new stdClass();
+    }
+    foreach ($sum as $timePeriod => $value) {
+        if (!isset($doc->statistics->employees->$timePeriod->value)
+                || $doc->statistics->employees->$timePeriod->value != $value) {
+            $changed = true;
+            $doc->statistics->employees->$timePeriod = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
+            $doc->employees = $value;
+              $doc->functionClassification = $functions[$id];
+        }
+    }
+  
+    if ($changed) {
+        $db->save($doc);
+    } else {
+        echo "not changed" . "<br>" . PHP_EOL;
+    }
+}
+// employees: timeperiod, source = apsc state of service, value 
+?>
+

--- a/documents/disclogsList.php
+++ b/documents/disclogsList.php
@@ -12,6 +12,7 @@
 $disclogs = 0;
 $red = 0;
 $green = 0;
+$yellow = 0;
 $orange = 0;
 try {
     $rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows;
@@ -19,47 +20,56 @@
 
     if ($rows) {
         foreach ($rows as $row) {
-if (!isset($row->value->status) || $row->value->status != "suspended") {
-            echo "<tr><td><a href='" . $row->value->website ."'><b>". $row->value->name . "</b></a>";
-            if ($ENV == "DEV")
-                echo "<br>(" . $row->id . ")";
-            echo "</td>\n";
-$agencies++;
+            if ((!isset($row->value->status) || $row->value->status != "suspended") && isset($row->value->foiEmail)) {
+                echo "<tr><td>";
+                if (isset($row->value->website)) echo "<a href='" . $row->value->website . "'>";
+                echo "<b>" . $row->value->name . "</b>";
+                if (isset($row->value->website)) echo "</a>";
+                if ($ENV == "DEV")
+                    echo "<br>(" . $row->id . ")";
+                echo "</td>\n";
+                $agencies++;
 
-            echo "<td>";
-            if (isset($row->value->FOIDocumentsURL)) {
-                $disclogs++;
-                echo '<a href="' . $row->value->FOIDocumentsURL . '">'
-                . $row->value->FOIDocumentsURL . '</a>';
-                if ($ENV == "DEV")
-                    echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">'
-                    . 'view local copy</a>)</small>';
-            } else {
-                echo "<font color='red'><abbr title='No'>✘</abbr></font>";
-            }
-            echo "</td>\n<td>";
-            if (isset($row->value->FOIDocumentsURL)) {
-                if (file_exists("./scrapers/" . $row->id . '.py')) {
-                    echo "<font color='green'><abbr title='Yes'>✔</abbr></font>";
-                    $green++;
-                } else if (file_exists("./scrapers/" . $row->id . '.txt')) {
-                    echo "<font color='orange'><abbr title='Work in progress'><b>▬</b></abbr></font>";
-                    $orange++;
+                echo "<td>";
+                if (isset($row->value->FOIDocumentsURL)) {
+                    $disclogs++;
+                    echo '<a href="' . $row->value->FOIDocumentsURL . '">'
+                    . $row->value->FOIDocumentsURL . '</a>';
+                    if ($ENV == "DEV")
+                        echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">'
+                        . 'view local copy</a>)</small>';
                 } else {
                     echo "<font color='red'><abbr title='No'>✘</abbr></font>";
-                    $red++;
                 }
+                echo "</td>\n<td>";
+                if (isset($row->value->FOIDocumentsURL)) {
+                    if (file_exists("./scrapers/" . $row->id . '.py')) {
+                        echo "<font color='green'><abbr title='Yes'>✔</abbr></font>";
+                        $green++;
+                    } else if (file_exists("./scrapers/" . $row->id . '.txt')) {
+                        if (trim(file_get_contents("./scrapers/" . $row->id . '.txt')) == "no disclog") {
+                            echo "<font color='yellow'><abbr title='No log table exists at URL to scrape'><b>◎</b></abbr></font>";
+                        $yellow++;
+                        } else {
+                            echo file_get_contents("./scrapers/" . $row->id . '.txt');
+                        echo "<font color='orange'><abbr title='Work in progress'><b>▬</b></abbr></font>";
+                        $orange++;
+                        }
+                    } else {
+                        echo "<font color='red'><abbr title='No'>✘</abbr></font>";
+                        $red++;
+                    }
+                }
+                echo "</td></tr>\n";
             }
-            echo "</td></tr>\n";
         }
     }
-}
 } catch (SetteeRestClientException $e) {
     setteErrorHandler($e);
 }
 echo "</table>";
-echo $agencies." agencies, ".round(($disclogs/$agencies)*100)."% with disclosure logs; "
-.round(($green/$disclogs)*100)."% logs with scrapers ".round(($red/$disclogs)*100)."% logs without scrapers ".round(($orange/$disclogs)*100)."% logs Work-In-Progress scrapers ";
+echo $agencies . " agencies, " . round(($disclogs / $agencies) * 100) . "% with disclosure logs; "
+ . round(($green / $disclogs) * 100) . "% logs with scrapers " . round(($red / $disclogs) * 100) . "% logs without scrapers " . round(($orange / $disclogs) * 100) . "% logs Work-In-Progress scrapers ";
 
 include_footer_documents();
 ?>

--- a/documents/genericScrapers.py
+++ b/documents/genericScrapers.py
@@ -9,6 +9,7 @@
 import dateutil
 from dateutil.parser import *
 from datetime import *
+import codecs
 
 class GenericDisclogScraper(object):
         __metaclass__ = abc.ABCMeta
@@ -120,7 +121,7 @@
 					columns = row.find_all('td')
 					if len(columns) == self.getColumnCount():
 						(id, date, title, description, notes) = self.getColumns(columns)
-						print ''.join(id.stripped_strings)
+						print ''.join(id.stripped_strings).encode('ascii', 'ignore')
 						if id.string == None:
 							hash = scrape.mkhash(self.remove_control_chars(url+(''.join(date.stripped_strings))))
 						else:

--- /dev/null
+++ b/documents/scrapers/1d404c4934f74feacd00dcb434e7c10a.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "cphMain_C001_Col01").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/627f116dfe42c9f27ad6747be0aa44e2.txt
+++ b/documents/scrapers/627f116dfe42c9f27ad6747be0aa44e2.txt
@@ -1,2 +1,1 @@
-see parent dhs
-
+no disclog

--- /dev/null
+++ b/documents/scrapers/794ea270edc9aa4f70f2a84bbc5ecc7a.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "cphMain_C001_Col01").table       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/7c6adc1d41cf029bf1a0959e5156477a.txt
+++ b/documents/scrapers/7c6adc1d41cf029bf1a0959e5156477a.txt
@@ -1,1 +1,1 @@
-
+acma style

--- /dev/null
+++ b/documents/scrapers/8e874a2fde8aa0ccdc6d14573d766540.py
@@ -1,1 +1,86 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
+import codecs 
+#http://www.doughellmann.com/PyMOTW/abc/
+class NewScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getDescription(self,content, entry,doc):
+		link = None
+                links = []
+                description = ""
+		for atag in entry.find_all('a'):
+			if atag.has_key('href'):
+				link = scrape.fullurl(self.getURL(),atag['href'])			
+                                (url,mime_type,htcontent) = scrape.fetchURL(scrape.docsdb, link, "foidocuments", self.getAgencyID(), False)
+                                if htcontent != None:
+                                        if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml":
+                                        # http://www.crummy.com/software/BeautifulSoup/documentation.html
+                                                soup = BeautifulSoup(htcontent)
+                                                for text in soup.find(id="divFullWidthColumn").stripped_strings:
+                                                    description = description + text.encode('ascii', 'ignore')
+                                                
+                                                for atag in soup.find(id="divFullWidthColumn").find_all("a"):
+                                                      	if atag.has_key('href'):
+                                                              	links.append(scrape.fullurl(link,atag['href']))
 
+		if links != []:
+                 	doc.update({'links': links})
+                if description != "":
+                        doc.update({ 'description': description})
+
+	def getColumnCount(self):
+		return 2
+	def getTable(self,soup):
+		return soup.find(id = "TwoColumnSorting")
+	def getColumns(self,columns):
+		( title, date) = columns
+		return (title, date, title, title, None)
+class OldScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getDescription(self,content, entry,doc):
+		link = None
+                links = []
+                description = ""
+		for atag in entry.find_all('a'):
+			if atag.has_key('href'):
+				link = scrape.fullurl(self.getURL(),atag['href'])			
+                                (url,mime_type,htcontent) = scrape.fetchURL(scrape.docsdb, link, "foidocuments", self.getAgencyID(), False)
+                                if htcontent != None:
+                                        if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml":
+                                        # http://www.crummy.com/software/BeautifulSoup/documentation.html
+                                                soup = BeautifulSoup(htcontent)
+                                                for text in soup.find(id="content-item").stripped_strings:
+                                                    description = description + text + " \n"
+                                                for atag in soup.find(id="content-item").find_all("a"):
+                                                    if atag.has_key('href'):
+                                                        links.append(scrape.fullurl(link,atag['href']))
+		if links != []:
+                 	doc.update({'links': links})
+                if description != "":
+                        doc.update({ 'description': description})
+
+		if links != []:
+                 	doc.update({'links': links})
+                if description != "":
+                        doc.update({ 'description': description})
+
+	def getColumnCount(self):
+		return 2
+	def getTable(self,soup):
+		return soup.find(class_ = "doc-list")
+	def getColumns(self,columns):
+		(date, title) = columns
+		return (title, date, title, title, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(NewScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(NewScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    #NewScraperImplementation().doScrape()
+    print 'Subclass:', issubclass(OldScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(OldScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    osi = OldScraperImplementation()
+    osi.disclogURL = "http://archive.treasury.gov.au/content/foi_publications.asp?year=-1&abstract=0&classification=&=&titl=Disclosure+Log+-+Documents+Released+Under+FOI"
+    osi.doScrape()
+# old site too
+

--- a/documents/scrapers/8e874a2fde8aa0ccdc6d14573d766540.txt
+++ /dev/null
@@ -1,49 +1,1 @@
-import sys,os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
-import genericScrapers
-import scrape
-from bs4 import BeautifulSoup
 
-#http://www.doughellmann.com/PyMOTW/abc/
-class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
-        def getDescription(self,content, entry,doc):
-		link = None
-                links = []
-                description = ""
-		for atag in entry.find_all('a'):
-			if atag.has_key('href'):
-				link = scrape.fullurl(self.getURL(),atag['href'])			
-                                (url,mime_type,htcontent) = scrape.fetchURL(scrape.docsdb, link, "foidocuments", self.getAgencyID(), False)
-                                if htcontent != None:
-                                        if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml":
-                                        # http://www.crummy.com/software/BeautifulSoup/documentation.html
-                                                soup = BeautifulSoup(htcontent)
-                                                for row in soup.find(class_ = "ms-rteTable-GreyAlternating").find_all('tr'):
-                                                        if row != None:
-								rowtitle = row.find('th').string
-                                                                description = description + "\n" + rowtitle + ": "
-                                                                for text in row.find('td').stripped_strings:
-                                                                        description = description + text
-                                                     		for atag in row.find_all("a"):
-                                                                	if atag.has_key('href'):
-                                                                        	links.append(scrape.fullurl(link,atag['href']))
-
-		if links != []:
-                 	doc.update({'links': links})
-                if description != "":
-                        doc.update({ 'description': description})
-
-	def getColumnCount(self):
-		return 2
-	def getTable(self,soup):
-		return soup.find(class_ = "ms-rteTable-GreyAlternating")
-	def getColumns(self,columns):
-		(date, title) = columns
-		return (title, date, title, title, None)
-
-if __name__ == '__main__':
-    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
-    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
-    ScraperImplementation().doScrape()
-# old site too http://archive.treasury.gov.au/content/foi_publications.asp
-

--- /dev/null
+++ b/documents/scrapers/9f4815bfdcb918a036e4bb43a30f8d77.txt
@@ -1,1 +1,1 @@
-
+no disclog

--- a/documents/scrapers/bb96fe4065afb7e0872136dd657f9369.txt
+++ b/documents/scrapers/bb96fe4065afb7e0872136dd657f9369.txt
@@ -1,2 +1,1 @@
-# does not have any disclog entries or table
-
+no disclog

--- /dev/null
+++ b/documents/scrapers/bf16d4ba0d306ee03e5a1d32aaba3da1.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(summary="This table shows every FOI request to date.")       
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description,notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/bf6e587f166040b63681cd2ff76fbfdf.txt
+++ b/documents/scrapers/bf6e587f166040b63681cd2ff76fbfdf.txt
@@ -1,1 +1,1 @@
-no disclog yet
+no disclog

--- a/documents/scrapers/cb7f40e3495b682de6eee61bf09c1cfc.txt
+++ b/documents/scrapers/cb7f40e3495b682de6eee61bf09c1cfc.txt
@@ -1,2 +1,1 @@
-no log
-
+no disclog

--- a/documents/scrapers/d72744fb1e5d6e87af9a5ea16cc27fa5.txt
+++ b/documents/scrapers/d72744fb1e5d6e87af9a5ea16cc27fa5.txt
@@ -1,1 +1,1 @@
-
+acma style

--- /dev/null
+++ b/documents/scrapers/e0614dc3a9e25d375370ffd82f7165ac.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        #def getTable(self,soup):
+        #        return soup.find(id = "ctl00_PlaceHolderMain_intro2__ControlWrapper_CerRichHtmlField").table       
+        def getColumnCount(self):
+                return 6
+        def getColumns(self,columns):
+                (id, date, title, description,deldate, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/documents/scrapers/e770921522a49dc77de208cc724ce134.txt
+++ b/documents/scrapers/e770921522a49dc77de208cc724ce134.txt
@@ -1,2 +1,1 @@
-c'est ne pas une table
-
+no disclog