<?php | <?php |
include('template.inc.php'); | include('template.inc.php'); |
include_header_documents(""); | include_header_documents("About"); |
include_once('../include/common.inc.php'); | include_once('../include/common.inc.php'); |
?> | ?> |
<h1>About</h1> | <h1>About</h1> |
<?php | <?php |
include_footer_documents(); | include_footer_documents(); |
?> | ?> |
<?php | <?php |
include('template.inc.php'); | include('template.inc.php'); |
include_header_documents(""); | include_header_documents("Charts"); |
include_once('../include/common.inc.php'); | include_once('../include/common.inc.php'); |
$agenciesdb = $server->get_db('disclosr-agencies'); | $agenciesdb = $server->get_db('disclosr-agencies'); |
$idtoname = Array(); | $idtoname = Array(); |
foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) { | foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) { |
$idtoname[$row->id] = trim($row->value->name); | $idtoname[$row->id] = trim($row->value->name); |
} | } |
$foidocsdb = $server->get_db('disclosr-foidocuments'); | $foidocsdb = $server->get_db('disclosr-foidocuments'); |
?> | ?> |
<div class="foundation-header"> | <div class="foundation-header"> |
<h1><a href="about.php">Charts</a></h1> | <h1><a href="about.php">Charts</a></h1> |
<h4 class="subheader">Lorem ipsum.</h4> | <h4 class="subheader">Lorem ipsum.</h4> |
</div> | </div> |
<div id="employees" style="width:1000px;height:900px;"></div> | <div id="bydate" style="width:1000px;height:300px;"></div> |
<div id="byagency" style="width:1200px;height:300px;"></div> | |
<script id="source"> | <script id="source"> |
window.onload = function() { | window.onload = function() { |
$(document).ready(function() { | $(document).ready(function() { |
var | var |
d1 = [], | d1 = [], |
start = new Date("2009/01/01 01:00").getTime(), | options1, |
options, | o1; |
graph, | |
i, x, o; | |
<?php | <?php |
try { | try { |
$rows = $foidocsdb->get_view("app", "byDate?group=true", null, true)->rows; | $rows = $foidocsdb->get_view("app", "byDateMonthYear?group=true")->rows; |
$dataValues = Array(); | $dataValues = Array(); |
foreach ($rows as $row) { | foreach ($rows as $row) { |
$dataValues[$row->value] = $row->key; | $dataValues[$row->key] = $row->value; |
} | } |
$i = 0; | $i = 0; |
ksort($dataValues); | ksort($dataValues); |
foreach ($dataValues as $value => $key) { | foreach ($dataValues as $key => $value) { |
$date = date_create_from_format('Y-m-d', $key); | $date = date_create_from_format('Y-m-d', $key); |
if (date_format($date, 'U') != "") { | if (date_format($date, 'U') != "") { |
echo " d1.push([".date_format($date, 'U')."000, $value]);" . PHP_EOL; | echo " d1.push([".date_format($date, 'U')."000, $value]);" . PHP_EOL; |
// echo " emplabels.push('$key');" . PHP_EOL; | // echo " emplabels.push('$key');" . PHP_EOL; |
$i++; | $i++; |
} | } |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
?> | ?> |
options = { | options1 = { |
xaxis : { | xaxis : { |
mode : 'time', | mode : 'time', |
labelsAngle : 45 | labelsAngle : 45 |
}, | }, |
selection : { | selection : { |
mode : 'x' | mode : 'x' |
}, | }, |
HtmlText : false, | HtmlText : false, |
title : 'Time' | title : 'Time' |
}; | }; |
// Draw graph with default options, overwriting with passed options | // Draw graph with default options, overwriting with passed options |
function drawGraph (opts) { | function drawGraph (opts) { |
// Clone the options, so the 'options' variable always keeps intact. | // Clone the options, so the 'options' variable always keeps intact. |
o = Flotr._.extend(Flotr._.clone(options), opts || {}); | o1 = Flotr._.extend(Flotr._.clone(options1), opts || {}); |
// Return a new graph. | // Return a new graph. |
return Flotr.draw( | return Flotr.draw( |
document.getElementById("employees"), | document.getElementById("bydate"), |
[ d1 ], | [ d1 ], |
o | o1 |
); | ); |
} | } |
graph = drawGraph(); | graph = drawGraph(); |
Flotr.EventAdapter.observe(container, 'flotr:select', function(area){ | Flotr.EventAdapter.observe(document.getElementById("bydate"), 'flotr:select', function(area){ |
// Draw selected area | // Draw selected area |
graph = drawGraph({ | graph = drawGraph({ |
xaxis : { min : area.x1, max : area.x2, mode : 'time', labelsAngle : 45 }, | xaxis : { min : area.x1, max : area.x2, mode : 'time', labelsAngle : 45 }, |
yaxis : { min : area.y1, max : area.y2 } | yaxis : { min : area.y1, max : area.y2 } |
}); | }); |
}); | }); |
// When graph is clicked, draw the graph with default area. | // When graph is clicked, draw the graph with default area. |
Flotr.EventAdapter.observe(container, 'flotr:click', function () { graph = drawGraph(); }); | Flotr.EventAdapter.observe(document.getElementById("bydate"), 'flotr:click', function () { graph = drawGraph(); }); |
}); | }); |
}; | }; |
var d2 = []; | |
var agencylabels = []; | |
function agencytrackformatter(obj) { | |
return agencylabels[Math.floor(obj.x)] +" = "+obj.y; | |
} | |
function agencytickformatter(val, axis) { | |
if (agencylabels[Math.floor(val)]) { | |
return '<p style="margin-top:8em;-webkit-transform:rotate(-90deg);">'+(agencylabels[Math.floor(val)])+"</b>"; | |
} else { | |
return ""; | |
} | |
} | |
<?php | |
try { | |
$rows = $foidocsdb->get_view("app", "byAgencyID?group=true")->rows; | |
$dataValues = Array(); | |
$i = 0; | |
foreach ($rows as $row) { | |
echo " d2.push([".$i.", $row->value]);" . PHP_EOL; | |
echo " agencylabels.push(['".str_replace("'","",$idtoname[$row->key])."']);" . PHP_EOL; | |
$i++; | |
} | |
} catch (SetteeRestClientException $e) { | |
setteErrorHandler($e); | |
} | |
?> | |
// Draw the graph | |
Flotr.draw( | |
document.getElementById("byagency"), | |
[d2], | |
{ | |
bars : { | |
show : true, | |
horizontal : false, | |
shadowSize : 0, | |
barWidth : 0.5 | |
}, | |
mouse : { | |
track : true, | |
relative : true, | |
trackFormatter: agencytrackformatter | |
}, | |
yaxis : { | |
min : 0, | |
autoscaleMargin : 1 | |
}, | |
xaxis: { | |
minorTickFreq: 1, | |
noTicks: agencylabels.length, | |
showMinorLabels: true, | |
tickFormatter: agencytickformatter | |
}, | |
legend: { | |
show: false | |
} | |
} | |
); | |
</script> | </script> |
<?php | <?php |
include_footer_documents(); | include_footer_documents(); |
?> | ?> |
<?php | <?php |
include('template.inc.php'); | include('template.inc.php'); |
include_header_documents(""); | include_header_documents("List of Disclosure Logs"); |
include_once('../include/common.inc.php'); | include_once('../include/common.inc.php'); |
echo "<table> | echo "<table> |
<tr><th>Agency Name</th><th>Disclosure Log URL recorded?</th><th>Do we monitor this URL?</th></tr>"; | <tr><th>Agency Name</th><th>Disclosure Log URL recorded?</th><th>Do we monitor this URL?</th></tr>"; |
$agenciesdb = $server->get_db('disclosr-agencies'); | $agenciesdb = $server->get_db('disclosr-agencies'); |
$docsdb = $server->get_db('disclosr-documents'); | $docsdb = $server->get_db('disclosr-documents'); |
$agencies = 0; | $agencies = 0; |
$disclogs = 0; | $disclogs = 0; |
$red = 0; | $red = 0; |
$green = 0; | $green = 0; |
$yellow = 0; | $yellow = 0; |
$orange = 0; | $orange = 0; |
try { | try { |
$rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows; | $rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows; |
if ($rows) { | if ($rows) { |
foreach ($rows as $row) { | foreach ($rows as $row) { |
if ((!isset($row->value->status) || $row->value->status != "suspended") && isset($row->value->foiEmail)) { | if ((!isset($row->value->status) || $row->value->status != "suspended") && isset($row->value->foiEmail)) { |
echo "<tr><td>"; | echo "<tr><td>"; |
if (isset($row->value->website)) echo "<a href='" . $row->value->website . "'>"; | if (isset($row->value->website)) echo "<a href='" . $row->value->website . "'>"; |
echo "<b>" . $row->value->name . "</b>"; | echo "<b>" . $row->value->name . "</b>"; |
if (isset($row->value->website)) echo "</a>"; | if (isset($row->value->website)) echo "</a>"; |
if ($ENV == "DEV") | if ($ENV == "DEV") |
echo "<br>(" . $row->id . ")"; | echo "<br>(" . $row->id . ")"; |
echo "</td>\n"; | echo "</td>\n"; |
$agencies++; | $agencies++; |
echo "<td>"; | echo "<td>"; |
if (isset($row->value->FOIDocumentsURL)) { | if (isset($row->value->FOIDocumentsURL)) { |
$disclogs++; | $disclogs++; |
echo '<a href="' . $row->value->FOIDocumentsURL . '">' | echo '<a href="' . $row->value->FOIDocumentsURL . '">' |
. $row->value->FOIDocumentsURL . '</a>'; | . $row->value->FOIDocumentsURL . '</a>'; |
if ($ENV == "DEV") | if ($ENV == "DEV") |
echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">' | echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">' |
. 'view local copy</a>)</small>'; | . 'view local copy</a>)</small>'; |
} else { | } else { |
echo "<font color='red'><abbr title='No'>✘</abbr></font>"; | echo "<font color='red'><abbr title='No'>✘</abbr></font>"; |
} | } |
echo "</td>\n<td>"; | echo "</td>\n<td>"; |
if (isset($row->value->FOIDocumentsURL)) { | if (isset($row->value->FOIDocumentsURL)) { |
if (file_exists("./scrapers/" . $row->id . '.py')) { | if (file_exists("./scrapers/" . $row->id . '.py')) { |
echo "<font color='green'><abbr title='Yes'>✔</abbr></font>"; | echo "<font color='green'><abbr title='Yes'>✔</abbr></font>"; |
$green++; | $green++; |
} else if (file_exists("./scrapers/" . $row->id . '.txt')) { | } else if (file_exists("./scrapers/" . $row->id . '.txt')) { |
if (trim(file_get_contents("./scrapers/" . $row->id . '.txt')) == "no disclog") { | if (trim(file_get_contents("./scrapers/" . $row->id . '.txt')) == "no disclog") { |
echo "<font color='yellow'><abbr title='No log table exists at URL to scrape'><b>◎</b></abbr></font>"; | echo "<font color='yellow'><abbr title='No log table exists at URL to scrape'><b>◎</b></abbr></font>"; |
$yellow++; | $yellow++; |
} else { | } else { |
echo file_get_contents("./scrapers/" . $row->id . '.txt'); | echo file_get_contents("./scrapers/" . $row->id . '.txt'); |
echo "<font color='orange'><abbr title='Work in progress'><b>▬</b></abbr></font>"; | echo "<font color='orange'><abbr title='Work in progress'><b>▬</b></abbr></font>"; |
$orange++; | $orange++; |
} | } |
} else { | } else { |
echo "<font color='red'><abbr title='No'>✘</abbr></font>"; | echo "<font color='red'><abbr title='No'>✘</abbr></font>"; |
$red++; | $red++; |
} | } |
} | } |
echo "</td></tr>\n"; | echo "</td></tr>\n"; |
} | } |
} | } |
} | } |
} catch (SetteeRestClientException $e) { | } catch (SetteeRestClientException $e) { |
setteErrorHandler($e); | setteErrorHandler($e); |
} | } |
echo "</table>"; | echo "</table>"; |
echo $agencies . " agencies, " . round(($disclogs / $agencies) * 100) . "% with disclosure logs; " | echo $agencies . " agencies, " . round(($disclogs / $agencies) * 100) . "% with disclosure logs; " |
. round(($green / $disclogs) * 100) . "% logs with scrapers " . round(($red / $disclogs) * 100) . "% logs without scrapers " . round(($orange / $disclogs) * 100) . "% logs Work-In-Progress scrapers "; | . round(($green / $disclogs) * 100) . "% logs with scrapers " . round(($red / $disclogs) * 100) . "% logs without scrapers " . round(($orange / $disclogs) * 100) . "% logs Work-In-Progress scrapers "; |
include_footer_documents(); | include_footer_documents(); |
?> | ?> |
{ | |
"venv": "", | |
"project-type": "Import from sources", | |
"name": "disclosr-documents", | |
"license": "GNU General Public License v3", | |
"description": "" | |
} |
import sys,os | import sys |
import os | |
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) | sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) |
import scrape | import scrape |
from bs4 import BeautifulSoup | from bs4 import BeautifulSoup |
from time import mktime | from time import mktime |
import feedparser | import feedparser |
import abc | import abc |
import unicodedata, re | import unicodedata |
import re | |
import dateutil | import dateutil |
from dateutil.parser import * | from dateutil.parser import * |
from datetime import * | from datetime import * |
import codecs | import codecs |
from StringIO import StringIO | |
from pdfminer.pdfparser import PDFDocument, PDFParser | |
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter, process_pdf | |
from pdfminer.pdfdevice import PDFDevice, TagExtractor | |
from pdfminer.converter import TextConverter | |
from pdfminer.cmapdb import CMapDB | |
from pdfminer.layout import LAParams | |
class GenericDisclogScraper(object): | class GenericDisclogScraper(object): |