fix charts
fix charts


Former-commit-id: d6e49522e61927665c8ba633dad5a13344f34841

<?php <?php
   
include('template.inc.php'); include('template.inc.php');
include_header_documents(""); include_header_documents("About");
include_once('../include/common.inc.php'); include_once('../include/common.inc.php');
?> ?>
<h1>About</h1> <h1>About</h1>
<?php <?php
include_footer_documents(); include_footer_documents();
?> ?>
   
<?php <?php
include('template.inc.php'); include('template.inc.php');
include_header_documents(""); include_header_documents("Charts");
include_once('../include/common.inc.php'); include_once('../include/common.inc.php');
$agenciesdb = $server->get_db('disclosr-agencies'); $agenciesdb = $server->get_db('disclosr-agencies');
   
$idtoname = Array(); $idtoname = Array();
foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) { foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) {
$idtoname[$row->id] = trim($row->value->name); $idtoname[$row->id] = trim($row->value->name);
} }
$foidocsdb = $server->get_db('disclosr-foidocuments'); $foidocsdb = $server->get_db('disclosr-foidocuments');
   
?> ?>
<div class="foundation-header"> <div class="foundation-header">
<h1><a href="about.php">Charts</a></h1> <h1><a href="about.php">Charts</a></h1>
<h4 class="subheader">Lorem ipsum.</h4> <h4 class="subheader">Lorem ipsum.</h4>
</div> </div>
<div id="employees" style="width:1000px;height:900px;"></div> <div id="bydate" style="width:1000px;height:300px;"></div>
  <div id="byagency" style="width:1200px;height:300px;"></div>
<script id="source"> <script id="source">
window.onload = function() { window.onload = function() {
$(document).ready(function() { $(document).ready(function() {
var var
d1 = [], d1 = [],
start = new Date("2009/01/01 01:00").getTime(), options1,
options, o1;
graph,  
i, x, o;  
   
<?php <?php
try { try {
$rows = $foidocsdb->get_view("app", "byDate?group=true", null, true)->rows; $rows = $foidocsdb->get_view("app", "byDateMonthYear?group=true")->rows;
   
   
$dataValues = Array(); $dataValues = Array();
foreach ($rows as $row) { foreach ($rows as $row) {
$dataValues[$row->value] = $row->key; $dataValues[$row->key] = $row->value;
} }
$i = 0; $i = 0;
ksort($dataValues); ksort($dataValues);
foreach ($dataValues as $value => $key) { foreach ($dataValues as $key => $value) {
$date = date_create_from_format('Y-m-d', $key); $date = date_create_from_format('Y-m-d', $key);
if (date_format($date, 'U') != "") { if (date_format($date, 'U') != "") {
echo " d1.push([".date_format($date, 'U')."000, $value]);" . PHP_EOL; echo " d1.push([".date_format($date, 'U')."000, $value]);" . PHP_EOL;
// echo " emplabels.push('$key');" . PHP_EOL; // echo " emplabels.push('$key');" . PHP_EOL;
$i++; $i++;
} }
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
?> ?>
   
   
options = { options1 = {
xaxis : { xaxis : {
mode : 'time', mode : 'time',
labelsAngle : 45 labelsAngle : 45
}, },
selection : { selection : {
mode : 'x' mode : 'x'
}, },
HtmlText : false, HtmlText : false,
title : 'Time' title : 'Time'
}; };
// Draw graph with default options, overwriting with passed options // Draw graph with default options, overwriting with passed options
function drawGraph (opts) { function drawGraph (opts) {
   
// Clone the options, so the 'options' variable always keeps intact. // Clone the options, so the 'options' variable always keeps intact.
o = Flotr._.extend(Flotr._.clone(options), opts || {}); o1 = Flotr._.extend(Flotr._.clone(options1), opts || {});
   
// Return a new graph. // Return a new graph.
return Flotr.draw( return Flotr.draw(
document.getElementById("employees"), document.getElementById("bydate"),
[ d1 ], [ d1 ],
o o1
); );
} }
   
graph = drawGraph(); graph = drawGraph();
Flotr.EventAdapter.observe(container, 'flotr:select', function(area){ Flotr.EventAdapter.observe(document.getElementById("bydate"), 'flotr:select', function(area){
// Draw selected area // Draw selected area
graph = drawGraph({ graph = drawGraph({
xaxis : { min : area.x1, max : area.x2, mode : 'time', labelsAngle : 45 }, xaxis : { min : area.x1, max : area.x2, mode : 'time', labelsAngle : 45 },
yaxis : { min : area.y1, max : area.y2 } yaxis : { min : area.y1, max : area.y2 }
}); });
}); });
// When graph is clicked, draw the graph with default area. // When graph is clicked, draw the graph with default area.
Flotr.EventAdapter.observe(container, 'flotr:click', function () { graph = drawGraph(); }); Flotr.EventAdapter.observe(document.getElementById("bydate"), 'flotr:click', function () { graph = drawGraph(); });
   
}); });
}; };
   
  var d2 = [];
  var agencylabels = [];
  function agencytrackformatter(obj) {
   
  return agencylabels[Math.floor(obj.x)] +" = "+obj.y;
   
  }
  function agencytickformatter(val, axis) {
  if (agencylabels[Math.floor(val)]) {
  return '<p style="margin-top:8em;-webkit-transform:rotate(-90deg);">'+(agencylabels[Math.floor(val)])+"</b>";
   
  } else {
  return "";
  }
  }
  <?php
  try {
  $rows = $foidocsdb->get_view("app", "byAgencyID?group=true")->rows;
   
   
  $dataValues = Array();
  $i = 0;
  foreach ($rows as $row) {
  echo " d2.push([".$i.", $row->value]);" . PHP_EOL;
  echo " agencylabels.push(['".str_replace("'","",$idtoname[$row->key])."']);" . PHP_EOL;
   
  $i++;
  }
  } catch (SetteeRestClientException $e) {
  setteErrorHandler($e);
  }
  ?>
  // Draw the graph
  Flotr.draw(
  document.getElementById("byagency"),
  [d2],
  {
  bars : {
  show : true,
  horizontal : false,
  shadowSize : 0,
  barWidth : 0.5
  },
  mouse : {
  track : true,
  relative : true,
  trackFormatter: agencytrackformatter
  },
  yaxis : {
  min : 0,
  autoscaleMargin : 1
  },
  xaxis: {
  minorTickFreq: 1,
  noTicks: agencylabels.length,
  showMinorLabels: true,
  tickFormatter: agencytickformatter
  },
  legend: {
  show: false
  }
  }
  );
</script> </script>
   
<?php <?php
include_footer_documents(); include_footer_documents();
?> ?>
   
   
<?php <?php
   
include('template.inc.php'); include('template.inc.php');
include_header_documents(""); include_header_documents("List of Disclosure Logs");
include_once('../include/common.inc.php'); include_once('../include/common.inc.php');
   
echo "<table> echo "<table>
<tr><th>Agency Name</th><th>Disclosure Log URL recorded?</th><th>Do we monitor this URL?</th></tr>"; <tr><th>Agency Name</th><th>Disclosure Log URL recorded?</th><th>Do we monitor this URL?</th></tr>";
$agenciesdb = $server->get_db('disclosr-agencies'); $agenciesdb = $server->get_db('disclosr-agencies');
$docsdb = $server->get_db('disclosr-documents'); $docsdb = $server->get_db('disclosr-documents');
$agencies = 0; $agencies = 0;
$disclogs = 0; $disclogs = 0;
$red = 0; $red = 0;
$green = 0; $green = 0;
$yellow = 0; $yellow = 0;
$orange = 0; $orange = 0;
try { try {
$rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows; $rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows;
   
   
if ($rows) { if ($rows) {
foreach ($rows as $row) { foreach ($rows as $row) {
if ((!isset($row->value->status) || $row->value->status != "suspended") && isset($row->value->foiEmail)) { if ((!isset($row->value->status) || $row->value->status != "suspended") && isset($row->value->foiEmail)) {
echo "<tr><td>"; echo "<tr><td>";
if (isset($row->value->website)) echo "<a href='" . $row->value->website . "'>"; if (isset($row->value->website)) echo "<a href='" . $row->value->website . "'>";
echo "<b>" . $row->value->name . "</b>"; echo "<b>" . $row->value->name . "</b>";
if (isset($row->value->website)) echo "</a>"; if (isset($row->value->website)) echo "</a>";
if ($ENV == "DEV") if ($ENV == "DEV")
echo "<br>(" . $row->id . ")"; echo "<br>(" . $row->id . ")";
echo "</td>\n"; echo "</td>\n";
$agencies++; $agencies++;
   
echo "<td>"; echo "<td>";
if (isset($row->value->FOIDocumentsURL)) { if (isset($row->value->FOIDocumentsURL)) {
$disclogs++; $disclogs++;
echo '<a href="' . $row->value->FOIDocumentsURL . '">' echo '<a href="' . $row->value->FOIDocumentsURL . '">'
. $row->value->FOIDocumentsURL . '</a>'; . $row->value->FOIDocumentsURL . '</a>';
if ($ENV == "DEV") if ($ENV == "DEV")
echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">' echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">'
. 'view local copy</a>)</small>'; . 'view local copy</a>)</small>';
} else { } else {
echo "<font color='red'><abbr title='No'>✘</abbr></font>"; echo "<font color='red'><abbr title='No'>✘</abbr></font>";
} }
echo "</td>\n<td>"; echo "</td>\n<td>";
if (isset($row->value->FOIDocumentsURL)) { if (isset($row->value->FOIDocumentsURL)) {
if (file_exists("./scrapers/" . $row->id . '.py')) { if (file_exists("./scrapers/" . $row->id . '.py')) {
echo "<font color='green'><abbr title='Yes'>✔</abbr></font>"; echo "<font color='green'><abbr title='Yes'>✔</abbr></font>";
$green++; $green++;
} else if (file_exists("./scrapers/" . $row->id . '.txt')) { } else if (file_exists("./scrapers/" . $row->id . '.txt')) {
if (trim(file_get_contents("./scrapers/" . $row->id . '.txt')) == "no disclog") { if (trim(file_get_contents("./scrapers/" . $row->id . '.txt')) == "no disclog") {
echo "<font color='yellow'><abbr title='No log table exists at URL to scrape'><b>◎</b></abbr></font>"; echo "<font color='yellow'><abbr title='No log table exists at URL to scrape'><b>◎</b></abbr></font>";
$yellow++; $yellow++;
} else { } else {
echo file_get_contents("./scrapers/" . $row->id . '.txt'); echo file_get_contents("./scrapers/" . $row->id . '.txt');
echo "<font color='orange'><abbr title='Work in progress'><b>▬</b></abbr></font>"; echo "<font color='orange'><abbr title='Work in progress'><b>▬</b></abbr></font>";
$orange++; $orange++;
} }
} else { } else {
echo "<font color='red'><abbr title='No'>✘</abbr></font>"; echo "<font color='red'><abbr title='No'>✘</abbr></font>";
$red++; $red++;
} }
} }
echo "</td></tr>\n"; echo "</td></tr>\n";
} }
} }
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
echo "</table>"; echo "</table>";
echo $agencies . " agencies, " . round(($disclogs / $agencies) * 100) . "% with disclosure logs; " echo $agencies . " agencies, " . round(($disclogs / $agencies) * 100) . "% with disclosure logs; "
. round(($green / $disclogs) * 100) . "% logs with scrapers " . round(($red / $disclogs) * 100) . "% logs without scrapers " . round(($orange / $disclogs) * 100) . "% logs Work-In-Progress scrapers "; . round(($green / $disclogs) * 100) . "% logs with scrapers " . round(($red / $disclogs) * 100) . "% logs without scrapers " . round(($orange / $disclogs) * 100) . "% logs Work-In-Progress scrapers ";
   
include_footer_documents(); include_footer_documents();
?> ?>
   
  {
  "venv": "",
  "project-type": "Import from sources",
  "name": "disclosr-documents",
  "license": "GNU General Public License v3",
  "description": ""
  }
import sys,os import sys
  import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
import scrape import scrape
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from time import mktime from time import mktime
import feedparser import feedparser
import abc import abc
import unicodedata, re import unicodedata
  import re
import dateutil import dateutil
from dateutil.parser import * from dateutil.parser import *
from datetime import * from datetime import *
import codecs import codecs
   
  from StringIO import StringIO
   
  from pdfminer.pdfparser import PDFDocument, PDFParser
  from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter, process_pdf
  from pdfminer.pdfdevice import PDFDevice, TagExtractor
  from pdfminer.converter import TextConverter
  from pdfminer.cmapdb import CMapDB
  from pdfminer.layout import LAParams
   
   
class GenericDisclogScraper(object): class GenericDisclogScraper(object):