fix that darn CASA scraper
fix that darn CASA scraper


Former-commit-id: 288d5ab60033e36608bf898869eecdf64180ba59

<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 15.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) --> <!-- Generator: Adobe Illustrator 15.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" width="100px" <svg version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" width="100px"
height="100px" viewBox="0 -25.635 100 100" enable-background="new 0 -25.635 100 100" xml:space="preserve"> height="100px" viewBox="0 -25.635 100 100" enable-background="new 0 -25.635 100 100" xml:space="preserve">
<g id="docs"> <g id="docs">
<path fill="#C2A385" d="M86.108-9.909l5.229,9.952c0,0-1.832,0.083-5.297,1.95c-2.312,1.249-6.468,6.246-6.468,6.246L71.827-2.909 <path fill="#C2A385" d="M86.108-9.909l5.229,9.952c0,0-1.832,0.083-5.297,1.95c-2.312,1.249-6.468,6.246-6.468,6.246L71.827-2.909
c0,0,4.201-3.996,6.513-5.242C81.805-10.022,86.108-9.909,86.108-9.909"/> c0,0,4.201-3.996,6.513-5.242C81.805-10.022,86.108-9.909,86.108-9.909"/>
<path fill="#C2A385" d="M65.604,20.731l-3.152-9.868c0,0-5.441,3.56-8.017,4.074c-1.008,0.202-1.93,0.335-2.749,0.425 <path fill="#C2A385" d="M65.604,20.731l-3.152-9.868c0,0-5.441,3.56-8.017,4.074c-1.008,0.202-1.93,0.335-2.749,0.425
L65.604,20.731z"/> L65.604,20.731z"/>
<path fill="#C2A385" d="M72.326,23.321c0.268-0.226,0.537-0.44,0.804-0.616c3.104-2.054,6.139-3.685,6.268-3.755l1.882-1.005 <path fill="#C2A385" d="M72.326,23.321c0.268-0.226,0.537-0.44,0.804-0.616c3.104-2.054,6.139-3.685,6.268-3.755l1.882-1.005
l1.369,1.634l2.864,3.417l3.198-4.334L76.68,9.783l-8.74,11.847L72.326,23.321z"/> l1.369,1.634l2.864,3.417l3.198-4.334L76.68,9.783l-8.74,11.847L72.326,23.321z"/>
<path fill="#C2A385" d="M39.918,10.823l4.825,1.86l3.33,0.212c0.04,0.001,0.269,0.015,0.652,0.015c0.91,0,2.798-0.072,5.196-0.551 <path fill="#C2A385" d="M39.918,10.823l4.825,1.86l3.33,0.212c0.04,0.001,0.269,0.015,0.652,0.015c0.91,0,2.798-0.072,5.196-0.551
c1.427-0.284,5.007-2.332,7.092-3.695l2.889-1.888l1.05,3.285l2.496,7.812l5.889-7.985l-4.625,0.163l1.348-6.225L55.133,0.593 c1.427-0.284,5.007-2.332,7.092-3.695l2.889-1.888l1.05,3.285l2.496,7.812l5.889-7.985l-4.625,0.163l1.348-6.225L55.133,0.593
l-2.095,9.667c-0.531-2.599-1.841-5.727-1.841-5.727L37.709,6.055c0,0,0.885,2.206,1.586,4.529L39.918,10.823z"/> l-2.095,9.667c-0.531-2.599-1.841-5.727-1.841-5.727L37.709,6.055c0,0,0.885,2.206,1.586,4.529L39.918,10.823z"/>
<path fill="#C2A385" d="M91.233,45.562c-1.102-0.691-2.323-1.142-3.415-1.433l-3.779,9.804c1.932,1.246,5.197,5.738,5.197,5.738 <path fill="#C2A385" d="M91.233,45.562c-1.102-0.691-2.323-1.142-3.415-1.433l-3.779,9.804c1.932,1.246,5.197,5.738,5.197,5.738
l7.336-9.206C96.572,50.466,93.162,46.771,91.233,45.562z"/> l7.336-9.206C96.572,50.466,93.162,46.771,91.233,45.562z"/>
<path fill="#C2A385" d="M93.192,32.166l-3.656,1.224c-0.019,0.007-1.779,0.613-4.117,2.069l2.817,4.868l0.626,1.08 <path fill="#C2A385" d="M93.192,32.166l-3.656,1.224c-0.019,0.007-1.779,0.613-4.117,2.069l2.817,4.868l0.626,1.08
c3.306-0.562,7.727-1.922,7.727-1.922l-2.332-15.261c0,0-2.934,1.277-5.852,2.221l2.318,2.765L93.192,32.166z"/> c3.306-0.562,7.727-1.922,7.727-1.922l-2.332-15.261c0,0-2.934,1.277-5.852,2.221l2.318,2.765L93.192,32.166z"/>
<path fill="#C2A385" d="M79.272,25.999l0.864,0.334l0.46,0.801l3.503,6.05c2.646-1.636,4.611-2.287,4.611-2.287l-8.075-9.632 <path fill="#C2A385" d="M79.272,25.999l0.864,0.334l0.46,0.801l3.503,6.05c2.646-1.636,4.611-2.287,4.611-2.287l-8.075-9.632
c0,0-2.584,1.391-5.376,3.188L79.272,25.999z"/> c0,0-2.584,1.391-5.376,3.188L79.272,25.999z"/>
</g> </g>
<g id="trunk"> <g id="trunk">
<circle fill="#C00000" cx="66.019" cy="52.945" r="7.877"/> <circle fill="#C00000" cx="66.019" cy="52.945" r="7.877"/>
<circle fill="#C00000" cx="22.693" cy="52.945" r="7.877"/> <circle fill="#C00000" cx="22.693" cy="52.945" r="7.877"/>
<path fill="#C00000" d="M22.693,42.441c1.915,0,3.705,0.522,5.251,1.421V28.001H12.191L3,37.192v14.439h9.281 <path fill="#C00000" d="M22.693,42.441c1.915,0,3.705,0.522,5.251,1.421V28.001H12.191L3,37.192v14.439h9.281
C12.931,46.459,17.347,42.441,22.693,42.441z"/> C12.931,46.459,17.347,42.441,22.693,42.441z"/>
<path fill="#C00000" d="M55.925,50.075l-9.583-3.695H30.88c1.186,1.476,1.978,3.28,2.225,5.252h22.502 <path fill="#C00000" d="M55.925,50.075l-9.583-3.695H30.88c1.186,1.476,1.978,3.28,2.225,5.252h22.502
C55.674,51.1,55.78,50.58,55.925,50.075z"/> C55.674,51.1,55.78,50.58,55.925,50.075z"/>
<path fill="#C00000" d="M79.312,28.328L39.961,13.149l-9.384,24.335l26.381,10.174c1.824-3.115,5.198-5.218,9.062-5.218 <path fill="#C00000" d="M79.312,28.328L39.961,13.149l-9.384,24.335l26.381,10.174c1.824-3.115,5.198-5.218,9.062-5.218
c5.791,0,10.503,4.712,10.503,10.502c0,0.744-0.081,1.471-0.229,2.173l4.713,1.817L86.95,41.52L79.312,28.328z"/> c5.791,0,10.503,4.712,10.503,10.502c0,0.744-0.081,1.471-0.229,2.173l4.713,1.817L86.95,41.52L79.312,28.328z"/>
</g> </g>
</svg> </svg>
   
<?php <?php
   
require_once '../include/common.inc.php'; require_once '../include/common.inc.php';
//function createFOIDocumentsDesignDoc() { //function createFOIDocumentsDesignDoc() {
   
$foidb = $server->get_db('disclosr-foidocuments'); $foidb = $server->get_db('disclosr-foidocuments');
$obj = new stdClass(); $obj = new stdClass();
$obj->_id = "_design/" . urlencode("app"); $obj->_id = "_design/" . urlencode("app");
$obj->language = "javascript"; $obj->language = "javascript";
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; $obj->views->all->map = "function(doc) { emit(doc._id, doc); };";
$obj->views->byDate->map = "function(doc) { if (doc.title != "Disclosure Log Updated") { emit(doc.date, doc); } };"; $obj->views->byDate->map = "function(doc) { if (doc.title != \"Disclosure Log Updated\") { emit(doc.date, doc); } };";
$obj->views->byDateMonthYear->map = "function(doc) { emit(doc.date, doc); };"; $obj->views->byDateMonthYear->map = "function(doc) { emit(doc.date, doc); };";
$obj->views->byDateMonthYear->reduce = "_count"; $obj->views->byDateMonthYear->reduce = "_count";
$obj->views->byAgencyID->map = "function(doc) { emit(doc.agencyID, doc); };"; $obj->views->byAgencyID->map = "function(doc) { emit(doc.agencyID, doc); };";
$obj->views->byAgencyID->reduce = "_count"; $obj->views->byAgencyID->reduce = "_count";
$obj->views->fieldNames->map = ' $obj->views->fieldNames->map = 'function(doc) { for(var propName in doc) { emit(propName, doc._id); }}';
function(doc) { $obj->views->fieldNames->reduce = 'function (key, values, rereduce) { return values.length; }';
for(var propName in doc) {  
emit(propName, doc._id);  
}  
   
}';  
$obj->views->fieldNames->reduce = 'function (key, values, rereduce) {  
return values.length;  
}';  
// allow safe updates (even if slightly slower due to extra: rev-detection check). // allow safe updates (even if slightly slower due to extra: rev-detection check).
$foidb->save($obj, true); $foidb->save($obj, true);
   
   
//function createDocumentsDesignDoc() { //function createDocumentsDesignDoc() {
$docdb = $server->get_db('disclosr-documents'); $docdb = $server->get_db('disclosr-documents');
   
$obj = new stdClass(); $obj = new stdClass();
$obj->_id = "_design/" . urlencode("app"); $obj->_id = "_design/" . urlencode("app");
$obj->language = "javascript"; $obj->language = "javascript";
$obj->views->web_server->map = "function(doc) {\n emit(doc.web_server, 1);\n}"; $obj->views->web_server->map = "function(doc) {\n emit(doc.web_server, 1);\n}";
$obj->views->web_server->reduce = "_sum"; $obj->views->web_server->reduce = "_sum";
$obj->views->byAgency->map = "function(doc) {\n emit(doc.agencyID, 1);\n}"; $obj->views->byAgency->map = "function(doc) {\n emit(doc.agencyID, 1);\n}";
$obj->views->byAgency->reduce = "_sum"; $obj->views->byAgency->reduce = "_sum";
$obj->views->byURL->map = "function(doc) {\n emit(doc.url, doc);\n}"; $obj->views->byURL->map = "function(doc) {\n emit(doc.url, doc);\n}";
$obj->views->agency->map = "function(doc) {\n emit(doc.agencyID, doc);\n}"; $obj->views->agency->map = "function(doc) {\n emit(doc.agencyID, doc);\n}";
$obj->views->byWebServer->map = "function(doc) {\n emit(doc.web_server, doc);\n}"; $obj->views->byWebServer->map = "function(doc) {\n emit(doc.web_server, doc);\n}";
   
$obj->views->datasets->map = "function(doc) {\nif (doc.fieldName == \"data\") {\n emit(doc._id, doc);\n}\n}"; $obj->views->datasets->map = "function(doc) {\nif (doc.fieldName == \"data\") {\n emit(doc._id, doc);\n}\n}";
$obj->views->datasetGroups->map = "function(doc) {\nif (doc.fieldName == \"data\") {\n doc.metadata[\"data.gov.au Category\"] && doc.metadata[\"data.gov.au Category\"].forEach(function(tag) {\n emit(tag, doc.url); \n });\n}\n}"; $obj->views->datasetGroups->map = "function(doc) {\nif (doc.fieldName == \"data\") {\n doc.metadata[\"data.gov.au Category\"] && doc.metadata[\"data.gov.au Category\"].forEach(function(tag) {\n emit(tag, doc.url); \n });\n}\n}";
$obj->views->getValidationRequired->map = "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}"; $obj->views->getValidationRequired->map = "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}";
$docdb->save($obj, true); $docdb->save($obj, true);
   
   
   
   
//function createAgencyDesignDoc() { //function createAgencyDesignDoc() {
$db = $server->get_db('disclosr-agencies'); $db = $server->get_db('disclosr-agencies');
$obj = new stdClass(); $obj = new stdClass();
$obj->_id = "_design/" . urlencode("app"); $obj->_id = "_design/" . urlencode("app");
$obj->language = "javascript"; $obj->language = "javascript";
$obj->views->all->map = "function(doc) { emit(doc._id, doc); };"; $obj->views->all->map = "function(doc) { emit(doc._id, doc); };";
$obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };"; $obj->views->byABN->map = "function(doc) { emit(doc.abn, doc); };";
$obj->views->byCanonicalName->map = "function(doc) { $obj->views->byCanonicalName->map = "function(doc) {
if (doc.parentOrg || doc.orgType == 'FMA-DepartmentOfState') { if (doc.parentOrg || doc.orgType == 'FMA-DepartmentOfState') {
emit(doc.name, doc); emit(doc.name, doc);
} }
};"; };";
$obj->views->byDeptStateName->map = "function(doc) { $obj->views->byDeptStateName->map = "function(doc) {
if (doc.orgType == 'FMA-DepartmentOfState') { if (doc.orgType == 'FMA-DepartmentOfState') {
emit(doc.name, doc._id); emit(doc.name, doc._id);
} }
};"; };";
$obj->views->parentOrgs->map = "function(doc) { $obj->views->parentOrgs->map = "function(doc) {
if (doc.parentOrg) { if (doc.parentOrg) {
emit(doc._id, doc.parentOrg); emit(doc._id, doc.parentOrg);
} }
};"; };";
$obj->views->byName->map = 'function(doc) { $obj->views->byName->map = 'function(doc) {
if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") { if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") {
emit(doc.name, doc._id); emit(doc.name, doc._id);
if (typeof(doc.shortName) != "undefined" && doc.shortName != doc.name) { if (typeof(doc.shortName) != "undefined" && doc.shortName != doc.name) {
emit(doc.shortName, doc._id); emit(doc.shortName, doc._id);
} }
for (name in doc.otherNames) { for (name in doc.otherNames) {
if (doc.otherNames[name] != "" && doc.otherNames[name] != doc.name) { if (doc.otherNames[name] != "" && doc.otherNames[name] != doc.name) {
emit(doc.otherNames[name], doc._id); emit(doc.otherNames[name], doc._id);
} }
} }
for (name in doc.foiBodies) { for (name in doc.foiBodies) {
if (doc.foiBodies[name] != "" && doc.foiBodies[name] != doc.name) { if (doc.foiBodies[name] != "" && doc.foiBodies[name] != doc.name) {
emit(doc.foiBodies[name], doc._id); emit(doc.foiBodies[name], doc._id);
} }
} }
for (name in doc.positions) { for (name in doc.positions) {
if (doc.positions[name] != "" && doc.positions[name] != doc.name) { if (doc.positions[name] != "" && doc.positions[name] != doc.name) {
emit(doc.positions[name], doc._id); emit(doc.positions[name], doc._id);
} }
} }
} }
};'; };';
   
$obj->views->foiEmails->map = "function(doc) { $obj->views->foiEmails->map = "function(doc) {
emit(doc._id, doc.foiEmail); emit(doc._id, doc.foiEmail);
};"; };";
   
$obj->views->byLastModified->map = "function(doc) { emit(doc.metadata.lastModified, doc); }"; $obj->views->byLastModified->map = "function(doc) { emit(doc.metadata.lastModified, doc); }";
$obj->views->getActive->map = 'function(doc) { if (doc.status == "active") { emit(doc._id, doc); } };'; $obj->views->getActive->map = 'function(doc) { if (doc.status == "active") { emit(doc._id, doc); } };';
$obj->views->getSuspended->map = 'function(doc) { if (doc.status == "suspended") { emit(doc._id, doc); } };'; $obj->views->getSuspended->map = 'function(doc) { if (doc.status == "suspended") { emit(doc._id, doc); } };';
$obj->views->getScrapeRequired->map = "function(doc) { $obj->views->getScrapeRequired->map = "function(doc) {
   
var lastScrape = Date.parse(doc.metadata.lastScraped); var lastScrape = Date.parse(doc.metadata.lastScraped);
   
var today = new Date(); var today = new Date();
   
if (!lastScrape || lastScrape.getTime() + 1000 != today.getTime()) { if (!lastScrape || lastScrape.getTime() + 1000 != today.getTime()) {
emit(doc._id, doc); emit(doc._id, doc);
} }
   
};"; };";
$obj->views->showNamesABNs->map = "function(doc) { emit(doc._id, {name: doc.name, abn: doc.abn}); };"; $obj->views->showNamesABNs->map = "function(doc) { emit(doc._id, {name: doc.name, abn: doc.abn}); };";
$obj->views->getConflicts->map = "function(doc) { $obj->views->getConflicts->map = "function(doc) {
if (doc._conflicts) { if (doc._conflicts) {
emit(null, [doc._rev].concat(doc._conflicts)); emit(null, [doc._rev].concat(doc._conflicts));
} }
}"; }";
$obj->views->getStatistics->map = $obj->views->getStatistics->map =
"function(doc) { "function(doc) {
if (doc.statistics) { if (doc.statistics) {
for (var statisticSet in doc.statistics) { for (var statisticSet in doc.statistics) {
for (var statisticPeriod in doc.statistics[statisticSet]) { for (var statisticPeriod in doc.statistics[statisticSet]) {
emit([statisticSet,statisticPeriod], doc.statistics[statisticSet][statisticPeriod]['value']); emit([statisticSet,statisticPeriod], doc.statistics[statisticSet][statisticPeriod]['value']);
} }
} }
} }
}"; }";
$obj->views->getStatistics->reduce = '_sum'; $obj->views->getStatistics->reduce = '_sum';
// http://stackoverflow.com/questions/646628/javascript-startswith // http://stackoverflow.com/questions/646628/javascript-startswith
$obj->views->score->map = 'if(!String.prototype.startsWith){ $obj->views->score->map = 'if(!String.prototype.startsWith){
String.prototype.startsWith = function (str) { String.prototype.startsWith = function (str) {
return !this.indexOf(str); return !this.indexOf(str);
} }
} }
   
function(doc) { function(doc) {
count = 0; count = 0;
if (doc["status"] != "suspended") { if (doc["status"] != "suspended") {
for(var propName in doc) { for(var propName in doc) {
if(typeof(doc[propName]) != "undefined" && doc[propName] != "") { if(typeof(doc[propName]) != "undefined" && doc[propName] != "") {
count++; count++;
} }
} }
portfolio = doc.parentOrg; portfolio = doc.parentOrg;
if (doc.orgType == "FMA-DepartmentOfState") { if (doc.orgType == "FMA-DepartmentOfState") {
portfolio = doc._id; portfolio = doc._id;
} }
if (doc.orgType == "Court-Commonwealth" || doc.orgType == "FMA-DepartmentOfParliament") { if (doc.orgType == "Court-Commonwealth" || doc.orgType == "FMA-DepartmentOfParliament") {
portfolio = doc.orgType; portfolio = doc.orgType;
} }
emit(count+doc._id, {id:doc._id, name: doc.name, score:count, orgType: doc.orgType, portfolio:portfolio}); emit(count+doc._id, {id:doc._id, name: doc.name, score:count, orgType: doc.orgType, portfolio:portfolio});
} }
}'; }';
$obj->views->scoreHas->map = 'if(!String.prototype.startsWith){ $obj->views->scoreHas->map = 'if(!String.prototype.startsWith){
String.prototype.startsWith = function (str) { String.prototype.startsWith = function (str) {
return !this.indexOf(str); return !this.indexOf(str);
} }
} }
if(!String.prototype.endsWith){ if(!String.prototype.endsWith){
String.prototype.endsWith = function(suffix) { String.prototype.endsWith = function(suffix) {
    return this.indexOf(suffix, this.length - suffix.length) !== -1;     return this.indexOf(suffix, this.length - suffix.length) !== -1;
}; };
} }
function(doc) { function(doc) {
if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") { if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") {
for(var propName in doc) { for(var propName in doc) {
if(typeof(doc[propName]) != "undefined" && (propName.startsWith("has") || propName.endsWith("URL"))) { if(typeof(doc[propName]) != "undefined" && (propName.startsWith("has") || propName.endsWith("URL"))) {
emit(propName, 1); emit(propName, 1);
} }
} }
emit("total", 1); emit("total", 1);
} }
}'; }';
$obj->views->scoreHas->reduce = '_sum'; $obj->views->scoreHas->reduce = '_sum';
$obj->views->fieldNames->map = ' $obj->views->fieldNames->map = '
function(doc) { function(doc) {
for(var propName in doc) { for(var propName in doc) {
emit(propName, doc._id); emit(propName, doc._id);
} }
}'; }';
$obj->views->fieldNames->reduce = '_count'; $obj->views->fieldNames->reduce = '_count';
// allow safe updates (even if slightly slower due to extra: rev-detection check). // allow safe updates (even if slightly slower due to extra: rev-detection check).
$db->save($obj, true); $db->save($obj, true);
?> ?>
   
import sys import sys
import os import os
   
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
import scrape import scrape
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from time import mktime from time import mktime
import feedparser import feedparser
import abc import abc
import unicodedata import unicodedata
import re import re
import dateutil import dateutil
from dateutil.parser import * from dateutil.parser import *
from datetime import * from datetime import *
import codecs import codecs
   
import difflib import difflib
   
from StringIO import StringIO from StringIO import StringIO
   
from pdfminer.pdfparser import PDFDocument, PDFParser from pdfminer.pdfparser import PDFDocument, PDFParser
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter, process_pdf from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter, process_pdf
from pdfminer.pdfdevice import PDFDevice, TagExtractor from pdfminer.pdfdevice import PDFDevice, TagExtractor
from pdfminer.converter import TextConverter from pdfminer.converter import TextConverter
from pdfminer.cmapdb import CMapDB from pdfminer.cmapdb import CMapDB
from pdfminer.layout import LAParams from pdfminer.layout import LAParams
   
   
class GenericDisclogScraper(object): class GenericDisclogScraper(object):
__metaclass__ = abc.ABCMeta __metaclass__ = abc.ABCMeta
agencyID = None agencyID = None
disclogURL = None disclogURL = None
   
def remove_control_chars(self, input): def remove_control_chars(self, input):
return "".join([i for i in input if ord(i) in range(32, 127)]) return "".join([i for i in input if ord(i) in range(32, 127)])
   
def getAgencyID(self): def getAgencyID(self):
""" disclosr agency id """ """ disclosr agency id """
if self.agencyID is None: if self.agencyID is None:
self.agencyID = os.path.basename(sys.argv[0]).replace(".py", "") self.agencyID = os.path.basename(sys.argv[0]).replace(".py", "")
return self.agencyID return self.agencyID
   
def getURL(self): def getURL(self):
""" disclog URL""" """ disclog URL"""
if self.disclogURL is None: if self.disclogURL is None:
agency = scrape.agencydb.get(self.getAgencyID()) agency = scrape.agencydb.get(self.getAgencyID())
self.disclogURL = agency['FOIDocumentsURL'] self.disclogURL = agency['FOIDocumentsURL']
return self.disclogURL return self.disclogURL
   
@abc.abstractmethod @abc.abstractmethod
def doScrape(self): def doScrape(self):