add stats
add stats


Former-commit-id: 7d58ec500723843bb55f285d866ce8d0c0ae41de

--- a/documents/disclogsList.php
+++ b/documents/disclogsList.php
@@ -8,6 +8,11 @@
     <tr><th>Agency Name</th><th>Disclosure Log URL recorded?</th><th>Do we monitor this URL?</th></tr>";
 $agenciesdb = $server->get_db('disclosr-agencies');
 $docsdb = $server->get_db('disclosr-documents');
+$agencies = 0;
+$disclogs = 0;
+$red = 0;
+$green = 0;
+$orange = 0;
 try {
     $rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows;
 
@@ -19,10 +24,11 @@
             if ($ENV == "DEV")
                 echo "<br>(" . $row->id . ")";
             echo "</td>\n";
-
+$agencies++;
 
             echo "<td>";
             if (isset($row->value->FOIDocumentsURL)) {
+                $disclogs++;
                 echo '<a href="' . $row->value->FOIDocumentsURL . '">'
                 . $row->value->FOIDocumentsURL . '</a>';
                 if ($ENV == "DEV")
@@ -35,10 +41,13 @@
             if (isset($row->value->FOIDocumentsURL)) {
                 if (file_exists("./scrapers/" . $row->id . '.py')) {
                     echo "<font color='green'>✔</font>";
+                    $green++;
                 } else if (file_exists("./scrapers/" . $row->id . '.txt')) {
-                    echo "<font color='blue'><b>▬</b></font>";
+                    echo "<font color='orange'><b>▬</b></font>";
+                    $orange++;
                 } else {
                     echo "<font color='red'>✘</font>";
+                    $red++;
                 }
             }
             echo "</td></tr>\n";
@@ -48,5 +57,8 @@
     setteErrorHandler($e);
 }
 echo "</table>";
+echo $agencies." agencies ".(($disclogs/$agencies)*100)."% with disclosure logs, ".(($green/$disclogs)*100)."% with scrapers ".(($red/$disclogs)*100)."% without scrapers ".(($orange/$disclogs)*100)."% WIP scrapers ";
+
 include_footer_documents();
 ?>
+

--- a/documents/scrape.py
+++ b/documents/scrape.py
@@ -204,12 +204,12 @@
 				scrapeAndStore(docsdb, agency[key],0,key,agency['_id'])
 			if key == 'website' and False:
 				scrapeAndStore(docsdb, agency[key],0,key,agency['_id'])
+                                agency['metadata']['lastScraped'] = time.time()
 			if key.endswith('URL') and False:
 				print key 
 				depth = 1
 				if 'scrapeDepth' in agency.keys():
 					depth = agency['scrapeDepth']
 				scrapeAndStore(docsdb, agency[key],depth,key,agency['_id'])
-		agency['metadata']['lastScraped'] = time.time()
 		agencydb.save(agency)
 

--- /dev/null
+++ b/documents/scrapers/0603dfcc930a791efaa64f31ae5fceda.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id="body-content").table
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/0e46f8bd1414b1fdd4f0543d54a97500.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "maincontentcontainer").table
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (date, disclogdate, title, description, notes) = columns
+                return (date, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/1097fa8afdcf5db89d212d0979226667.py
@@ -1,1 +1,17 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, description, title, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/1ad74ca88932f90f0b92b69387171441.py
@@ -1,1 +1,17 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumns(self,columns):
+                (id,  date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/1fda9544d2a3fa4cd92aec4b206a6763.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(_class = "article-content").table
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, title, date, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/269680be088f3d8e663251655f3825b4.py
@@ -1,1 +1,17 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumns(self,columns):
+                (date, id, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/31b7c75cf484747b6b120680bddd33b0.py
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 2
+        def getColumns(self,columns):
+                (title, date) = columns
+                return (date, date, title, title, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/329fccdca068b78ab7edd550e2957398.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 7
+        def getTable(self,soup):
+                return soup.find(class_ = "foi-disclosure")
+        def getColumns(self,columns):
+                (disclogid, id,  date, title, link, removedate, notes) = columns
+                return (id, date, title, title, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/3d426eb8c85c8f04b814eee597efd866.py
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "primary").table
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/50601505ef69483121a6d130bb0515e4.txt
@@ -1,1 +1,1 @@
-
+apsc has ACMA style disclog

--- /dev/null
+++ b/documents/scrapers/6fa04af95fbe7de96daa2c7560e0aad3.txt
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "content_div_50269").table
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/6fe3c812a99d486963133459b2768cf6.py
@@ -1,1 +1,17 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumns(self,columns):
+                (id, date, title, description, notes) = columns
+                return (id, date, description, title, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/734f12db07e844b30cd11dc98500f2ce.py
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (id,  date, title, description) = columns
+                return (id, date, title, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/955dc4cb047b5439dfb65549ce2696a6.py
@@ -1,1 +1,17 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumns(self,columns):
+                (id,  date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/a2d871c5d28de1dde8a3b66c4957e1a5.py
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 7
+        def getColumns(self,columns):
+                (id, date, title, description,link,deldate, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/bf6e587f166040b63681cd2ff76fbfdf.txt
@@ -1,1 +1,1 @@
-
+no disclog yet

--- /dev/null
+++ b/documents/scrapers/c1302c8d7cbbd911f0d4d8a4128f8079.txt
@@ -1,1 +1,1 @@
-
+uses RET disclog

--- /dev/null
+++ b/documents/scrapers/c25f628f9f38d889485d7a4bff873b23.txt
@@ -1,1 +1,20 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+       
+        def getColumnCount(self):
+                return 4
+        def getColumns(self,columns):
+                (id, date, title, description) = columns
+                return (id, date, title, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/c414e65ed728d05307d5b27f13e195e1.py
@@ -1,1 +1,19 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumnCount(self):
+                return 3
+        def getColumns(self,columns):
+                (date, title, description) = columns
+                return (date, date, title, description, None)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/f0caafbcf292c90e7b8ad18ddcf9afc3.txt
@@ -1,1 +1,21 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getTable(self,soup):
+                return soup.find(id = "genericContent").table.tbody
+        def getColumnCount(self):
+                return 5
+        def getColumns(self,columns):
+                (id,  date,title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- /dev/null
+++ b/documents/scrapers/fa9b3badd6c686398c1c6982a4b02475.py
@@ -1,1 +1,17 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import genericScrapers
+import scrape
+from bs4 import BeautifulSoup
 
+#http://www.doughellmann.com/PyMOTW/abc/
+class ScraperImplementation(genericScrapers.GenericOAICDisclogScraper):
+        def getColumns(self,columns):
+                (id,  date, title, description, notes) = columns
+                return (id, date, title, description, notes)
+
+if __name__ == '__main__':
+    print 'Subclass:', issubclass(ScraperImplementation, genericScrapers.GenericOAICDisclogScraper)
+    print 'Instance:', isinstance(ScraperImplementation(), genericScrapers.GenericOAICDisclogScraper)
+    ScraperImplementation().doScrape()
+

--- a/include/couchdb.inc.php
+++ b/include/couchdb.inc.php
@@ -10,6 +10,7 @@
 if (php_uname('n') == "KYUUBEY") {
 
     $serverAddr = 'http://192.168.1.148:5984/';
+    $serverAddr = 'http://127.0.0.1:5984/';
 } else {
     $serverAddr = 'http://127.0.0.1:5984/';
 }