fix charts
fix charts


Former-commit-id: d6e49522e61927665c8ba633dad5a13344f34841

<?php <?php
   
include('template.inc.php'); include('template.inc.php');
include_header_documents(""); include_header_documents("About");
include_once('../include/common.inc.php'); include_once('../include/common.inc.php');
?> ?>
<h1>About</h1> <h1>About</h1>
<?php <?php
include_footer_documents(); include_footer_documents();
?> ?>
   
<?php <?php
include('template.inc.php'); include('template.inc.php');
include_header_documents(""); include_header_documents("Charts");
include_once('../include/common.inc.php'); include_once('../include/common.inc.php');
$agenciesdb = $server->get_db('disclosr-agencies'); $agenciesdb = $server->get_db('disclosr-agencies');
   
$idtoname = Array(); $idtoname = Array();
foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) { foreach ($agenciesdb->get_view("app", "byCanonicalName")->rows as $row) {
$idtoname[$row->id] = trim($row->value->name); $idtoname[$row->id] = trim($row->value->name);
} }
$foidocsdb = $server->get_db('disclosr-foidocuments'); $foidocsdb = $server->get_db('disclosr-foidocuments');
   
?> ?>
<div class="foundation-header"> <div class="foundation-header">
<h1><a href="about.php">Charts</a></h1> <h1><a href="about.php">Charts</a></h1>
<h4 class="subheader">Lorem ipsum.</h4> <h4 class="subheader">Lorem ipsum.</h4>
</div> </div>
<div id="employees" style="width:1000px;height:900px;"></div> <div id="bydate" style="width:1000px;height:300px;"></div>
  <div id="byagency" style="width:1200px;height:300px;"></div>
<script id="source"> <script id="source">
window.onload = function() { window.onload = function() {
$(document).ready(function() { $(document).ready(function() {
var var
d1 = [], d1 = [],
start = new Date("2009/01/01 01:00").getTime(), options1,
options, o1;
graph,  
i, x, o;  
   
<?php <?php
try { try {
$rows = $foidocsdb->get_view("app", "byDate?group=true", null, true)->rows; $rows = $foidocsdb->get_view("app", "byDateMonthYear?group=true")->rows;
   
   
$dataValues = Array(); $dataValues = Array();
foreach ($rows as $row) { foreach ($rows as $row) {
$dataValues[$row->value] = $row->key; $dataValues[$row->key] = $row->value;
} }
$i = 0; $i = 0;
ksort($dataValues); ksort($dataValues);
foreach ($dataValues as $value => $key) { foreach ($dataValues as $key => $value) {
$date = date_create_from_format('Y-m-d', $key); $date = date_create_from_format('Y-m-d', $key);
if (date_format($date, 'U') != "") { if (date_format($date, 'U') != "") {
echo " d1.push([".date_format($date, 'U')."000, $value]);" . PHP_EOL; echo " d1.push([".date_format($date, 'U')."000, $value]);" . PHP_EOL;
// echo " emplabels.push('$key');" . PHP_EOL; // echo " emplabels.push('$key');" . PHP_EOL;
$i++; $i++;
} }
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
?> ?>
   
   
options = { options1 = {
xaxis : { xaxis : {
mode : 'time', mode : 'time',
labelsAngle : 45 labelsAngle : 45
}, },
selection : { selection : {
mode : 'x' mode : 'x'
}, },
HtmlText : false, HtmlText : false,
title : 'Time' title : 'Time'
}; };
// Draw graph with default options, overwriting with passed options // Draw graph with default options, overwriting with passed options
function drawGraph (opts) { function drawGraph (opts) {
   
// Clone the options, so the 'options' variable always keeps intact. // Clone the options, so the 'options' variable always keeps intact.
o = Flotr._.extend(Flotr._.clone(options), opts || {}); o1 = Flotr._.extend(Flotr._.clone(options1), opts || {});
   
// Return a new graph. // Return a new graph.
return Flotr.draw( return Flotr.draw(
document.getElementById("employees"), document.getElementById("bydate"),
[ d1 ], [ d1 ],
o o1
); );
} }
   
graph = drawGraph(); graph = drawGraph();
Flotr.EventAdapter.observe(container, 'flotr:select', function(area){ Flotr.EventAdapter.observe(document.getElementById("bydate"), 'flotr:select', function(area){
// Draw selected area // Draw selected area
graph = drawGraph({ graph = drawGraph({
xaxis : { min : area.x1, max : area.x2, mode : 'time', labelsAngle : 45 }, xaxis : { min : area.x1, max : area.x2, mode : 'time', labelsAngle : 45 },
yaxis : { min : area.y1, max : area.y2 } yaxis : { min : area.y1, max : area.y2 }
}); });
}); });
// When graph is clicked, draw the graph with default area. // When graph is clicked, draw the graph with default area.
Flotr.EventAdapter.observe(container, 'flotr:click', function () { graph = drawGraph(); }); Flotr.EventAdapter.observe(document.getElementById("bydate"), 'flotr:click', function () { graph = drawGraph(); });
   
}); });
}; };
   
  var d2 = [];
  var agencylabels = [];
  function agencytrackformatter(obj) {
   
  return agencylabels[Math.floor(obj.x)] +" = "+obj.y;
   
  }
  function agencytickformatter(val, axis) {
  if (agencylabels[Math.floor(val)]) {
  return '<p style="margin-top:8em;-webkit-transform:rotate(-90deg);">'+(agencylabels[Math.floor(val)])+"</b>";
   
  } else {
  return "";
  }
  }
  <?php
  try {
  $rows = $foidocsdb->get_view("app", "byAgencyID?group=true")->rows;
   
   
  $dataValues = Array();
  $i = 0;
  foreach ($rows as $row) {
  echo " d2.push([".$i.", $row->value]);" . PHP_EOL;
  echo " agencylabels.push(['".str_replace("'","",$idtoname[$row->key])."']);" . PHP_EOL;
   
  $i++;
  }
  } catch (SetteeRestClientException $e) {
  setteErrorHandler($e);
  }
  ?>
  // Draw the graph
  Flotr.draw(
  document.getElementById("byagency"),
  [d2],
  {
  bars : {
  show : true,
  horizontal : false,
  shadowSize : 0,
  barWidth : 0.5
  },
  mouse : {
  track : true,
  relative : true,
  trackFormatter: agencytrackformatter
  },
  yaxis : {
  min : 0,
  autoscaleMargin : 1
  },
  xaxis: {
  minorTickFreq: 1,
  noTicks: agencylabels.length,
  showMinorLabels: true,
  tickFormatter: agencytickformatter
  },
  legend: {
  show: false
  }
  }
  );
</script> </script>
   
<?php <?php
include_footer_documents(); include_footer_documents();
?> ?>
   
   
<?php <?php
   
include('template.inc.php'); include('template.inc.php');
include_header_documents(""); include_header_documents("List of Disclosure Logs");
include_once('../include/common.inc.php'); include_once('../include/common.inc.php');
   
echo "<table> echo "<table>
<tr><th>Agency Name</th><th>Disclosure Log URL recorded?</th><th>Do we monitor this URL?</th></tr>"; <tr><th>Agency Name</th><th>Disclosure Log URL recorded?</th><th>Do we monitor this URL?</th></tr>";
$agenciesdb = $server->get_db('disclosr-agencies'); $agenciesdb = $server->get_db('disclosr-agencies');
$docsdb = $server->get_db('disclosr-documents'); $docsdb = $server->get_db('disclosr-documents');
$agencies = 0; $agencies = 0;
$disclogs = 0; $disclogs = 0;
$red = 0; $red = 0;
$green = 0; $green = 0;
$yellow = 0; $yellow = 0;
$orange = 0; $orange = 0;
try { try {
$rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows; $rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows;
   
   
if ($rows) { if ($rows) {
foreach ($rows as $row) { foreach ($rows as $row) {
if ((!isset($row->value->status) || $row->value->status != "suspended") && isset($row->value->foiEmail)) { if ((!isset($row->value->status) || $row->value->status != "suspended") && isset($row->value->foiEmail)) {
echo "<tr><td>"; echo "<tr><td>";
if (isset($row->value->website)) echo "<a href='" . $row->value->website . "'>"; if (isset($row->value->website)) echo "<a href='" . $row->value->website . "'>";
echo "<b>" . $row->value->name . "</b>"; echo "<b>" . $row->value->name . "</b>";
if (isset($row->value->website)) echo "</a>"; if (isset($row->value->website)) echo "</a>";
if ($ENV == "DEV") if ($ENV == "DEV")
echo "<br>(" . $row->id . ")"; echo "<br>(" . $row->id . ")";
echo "</td>\n"; echo "</td>\n";
$agencies++; $agencies++;
   
echo "<td>"; echo "<td>";
if (isset($row->value->FOIDocumentsURL)) { if (isset($row->value->FOIDocumentsURL)) {
$disclogs++; $disclogs++;
echo '<a href="' . $row->value->FOIDocumentsURL . '">' echo '<a href="' . $row->value->FOIDocumentsURL . '">'
. $row->value->FOIDocumentsURL . '</a>'; . $row->value->FOIDocumentsURL . '</a>';
if ($ENV == "DEV") if ($ENV == "DEV")
echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">' echo '<br><small>(<a href="viewDocument.php?hash=' . md5($row->value->FOIDocumentsURL) . '">'
. 'view local copy</a>)</small>'; . 'view local copy</a>)</small>';
} else { } else {
echo "<font color='red'><abbr title='No'>✘</abbr></font>"; echo "<font color='red'><abbr title='No'>✘</abbr></font>";
} }
echo "</td>\n<td>"; echo "</td>\n<td>";
if (isset($row->value->FOIDocumentsURL)) { if (isset($row->value->FOIDocumentsURL)) {
if (file_exists("./scrapers/" . $row->id . '.py')) { if (file_exists("./scrapers/" . $row->id . '.py')) {
echo "<font color='green'><abbr title='Yes'>✔</abbr></font>"; echo "<font color='green'><abbr title='Yes'>✔</abbr></font>";
$green++; $green++;
} else if (file_exists("./scrapers/" . $row->id . '.txt')) { } else if (file_exists("./scrapers/" . $row->id . '.txt')) {
if (trim(file_get_contents("./scrapers/" . $row->id . '.txt')) == "no disclog") { if (trim(file_get_contents("./scrapers/" . $row->id . '.txt')) == "no disclog") {
echo "<font color='yellow'><abbr title='No log table exists at URL to scrape'><b>◎</b></abbr></font>"; echo "<font color='yellow'><abbr title='No log table exists at URL to scrape'><b>◎</b></abbr></font>";
$yellow++; $yellow++;
} else { } else {
echo file_get_contents("./scrapers/" . $row->id . '.txt'); echo file_get_contents("./scrapers/" . $row->id . '.txt');
echo "<font color='orange'><abbr title='Work in progress'><b>▬</b></abbr></font>"; echo "<font color='orange'><abbr title='Work in progress'><b>▬</b></abbr></font>";
$orange++; $orange++;
} }
} else { } else {
echo "<font color='red'><abbr title='No'>✘</abbr></font>"; echo "<font color='red'><abbr title='No'>✘</abbr></font>";
$red++; $red++;
} }
} }
echo "</td></tr>\n"; echo "</td></tr>\n";
} }
} }
} }
} catch (SetteeRestClientException $e) { } catch (SetteeRestClientException $e) {
setteErrorHandler($e); setteErrorHandler($e);
} }
echo "</table>"; echo "</table>";
echo $agencies . " agencies, " . round(($disclogs / $agencies) * 100) . "% with disclosure logs; " echo $agencies . " agencies, " . round(($disclogs / $agencies) * 100) . "% with disclosure logs; "
. round(($green / $disclogs) * 100) . "% logs with scrapers " . round(($red / $disclogs) * 100) . "% logs without scrapers " . round(($orange / $disclogs) * 100) . "% logs Work-In-Progress scrapers "; . round(($green / $disclogs) * 100) . "% logs with scrapers " . round(($red / $disclogs) * 100) . "% logs without scrapers " . round(($orange / $disclogs) * 100) . "% logs Work-In-Progress scrapers ";
   
include_footer_documents(); include_footer_documents();
?> ?>
   
  {
  "venv": "",
  "project-type": "Import from sources",
  "name": "disclosr-documents",
  "license": "GNU General Public License v3",
  "description": ""
  }
import sys,os import sys
  import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../')) sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
import scrape import scrape
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from time import mktime from time import mktime
import feedparser import feedparser
import abc import abc
import unicodedata, re import unicodedata
  import re
import dateutil import dateutil
from dateutil.parser import * from dateutil.parser import *
from datetime import * from datetime import *
import codecs import codecs
   
from StringIO import StringIO from StringIO import StringIO
   
from docx import *  
from lxml import etree  
import zipfile  
   
from pdfminer.pdfparser import PDFDocument, PDFParser from pdfminer.pdfparser import PDFDocument, PDFParser
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter, process_pdf from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter, process_pdf
from pdfminer.pdfdevice import PDFDevice, TagExtractor from pdfminer.pdfdevice import PDFDevice, TagExtractor
from pdfminer.converter import TextConverter from pdfminer.converter import TextConverter
from pdfminer.cmapdb import CMapDB from pdfminer.cmapdb import CMapDB
from pdfminer.layout import LAParams from pdfminer.layout import LAParams
   
   
class GenericDisclogScraper(object): class GenericDisclogScraper(object):
__metaclass__ = abc.ABCMeta __metaclass__ = abc.ABCMeta
agencyID = None agencyID = None
disclogURL = None disclogURL = None
def remove_control_chars(self, input):  
return "".join([i for i in input if ord(i) in range(32, 127)]) def remove_control_chars(self, input):
def getAgencyID(self): return "".join([i for i in input if ord(i) in range(32, 127)])
""" disclosr agency id """  
if self.agencyID == Non