codestyle
codestyle


Former-commit-id: e85a6fcfab76f7ea0b140471a810430aa544e81d

--- a/documents/genericScrapers.py
+++ b/documents/genericScrapers.py
@@ -1,155 +1,260 @@
-import sys,os
+import sys
+import os
 sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
 import scrape
 from bs4 import BeautifulSoup
 from time import mktime
 import feedparser
 import abc
-import unicodedata, re
+import unicodedata
+import re
 import dateutil
 from dateutil.parser import *
 from datetime import *
 import codecs
 
+from StringIO import StringIO
+
+from docx import *
+from lxml import etree
+import zipfile
+
+from pdfminer.pdfparser import PDFDocument, PDFParser
+from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter, process_pdf
+from pdfminer.pdfdevice import PDFDevice, TagExtractor
+from pdfminer.converter import TextConverter
+from pdfminer.cmapdb import CMapDB
+from pdfminer.layout import LAParams
+
+
 class GenericDisclogScraper(object):
-        __metaclass__ = abc.ABCMeta
-	agencyID = None
-	disclogURL = None
-	def remove_control_chars(self, input):
-		return "".join([i for i in input if ord(i) in range(32, 127)])
-        def getAgencyID(self):
-                """ disclosr agency id """
-		if self.agencyID == None:
-			self.agencyID = os.path.basename(sys.argv[0]).replace(".py","")
-                return self.agencyID
-
-        def getURL(self):
-                """ disclog URL"""
-		if self.disclogURL == None:
-			agency = scrape.agencydb.get(self.getAgencyID())
-			self.disclogURL = agency['FOIDocumentsURL']
-                return self.disclogURL
-
-	@abc.abstractmethod
-	def doScrape(self):
-		""" do the scraping """
-		return
-
-	@abc.abstractmethod
-        def getDescription(self, content, entry, doc):
-                """ get description"""
-		return
-
+    __metaclass__ = abc.ABCMeta
+    agencyID = None
+    disclogURL = None
+
+    def remove_control_chars(self, input):
+        return "".join([i for i in input if ord(i) in range(32, 127)])
+
+    def getAgencyID(self):
+        """ disclosr agency id """
+        if self.agencyID is None:
+            self.agencyID = os.path.basename(sys.argv[0]).replace(".py", "")
+            return self.agencyID
+
+    def getURL(self):
+        """ disclog URL"""
+        if self.disclogURL is None:
+            agency = scrape.agencydb.get(self.getAgencyID())
+            self.disclogURL = agency['FOIDocumentsURL']
+            return self.disclogURL
+
+    @abc.abstractmethod
+    def doScrape(self):
+        """ do the scraping """
+        return
+
+
+class GenericPDFDisclogScraper(GenericDisclogScraper):
+
+    def doScrape(self):
+        foidocsdb = scrape.couch['disclosr-foidocuments']
+        (url, mime_type, content) = scrape.fetchURL(scrape.docsdb,
+             self.getURL(), "foidocuments", self.getAgencyID())
+        laparams = LAParams()
+        rsrcmgr = PDFResourceManager(caching=True)
+        outfp = StringIO.StringIO()
+        device = TextConverter(rsrcmgr, outfp, codec='utf-8',
+             laparams=laparams)
+        fp = StringIO.StringIO()
+        fp.write(content)
+        description = output.getvalue()
+        process_pdf(rsrcmgr, device, fp, set(), caching=True,
+             check_extractable=True)
+        fp.close()
+        device.close()
+        outfp.close()
+        dochash = scrape.mkhash(description)
+        doc = foidocsdb.get(dochash)
+        if doc is None:
+            print "saving " + dochash
+            edate = datetime.fromtimestamp(mktime()).strftime("%Y-%m-%d")
+            doc = {'_id': dochash, 'agencyID': self.getAgencyID()
+            , 'url': self.getURL(), 'docID': dochash,
+            "date": edate, "title": "Disclosure Log Updated"}
+            self.getDescription(entry, entry, doc)
+            foidocsdb.save(doc)
+        else:
+            print "already saved"
+
+
+class GenericDOCXDisclogScraper(GenericDisclogScraper):
+
+    def doScrape(self):
+        foidocsdb = scrape.couch['disclosr-foidocuments']
+        (url, mime_type, content) = scrape.fetchURL(scrape.docsdb
+        , self.getURL(), "foidocuments", self.getAgencyID())
+        mydoc = zipfile.ZipFile(file)
+        xmlcontent = mydoc.read('word/document.xml')
+        document = etree.fromstring(xmlcontent)
+        ## Fetch all the text out of the document we just created
+        paratextlist = getdocumenttext(document)
+        # Make explicit unicode version
+        newparatextlist = []
+        for paratext in paratextlist:
+            newparatextlist.append(paratext.encode("utf-8"))
+        ## Print our documnts test with two newlines under each paragraph
+        description = '\n\n'.join(newparatextlist)
+        dochash = scrape.mkhash(description)
+        doc = foidocsdb.get(dochash)
+
+        if doc is None:
+            print "saving " + dochash
+            edate = datetime.fromtimestamp(mktime()).strftime("%Y-%m-%d")
+            doc = {'_id': dochash, 'agencyID': self.getAgencyID()
+            , 'url': self.getURL(), 'docID': dochash,
+            "date": edate, "title": "Disclosure Log Updated"}
+            self.getDescription(entry, entry, doc)
+            foidocsdb.save(doc)
+        else:
+            print "already saved"
 
 
 class GenericRSSDisclogScraper(GenericDisclogScraper):
 
-       	def doScrape(self):
-               	foidocsdb = scrape.couch['disclosr-foidocuments']
-                (url,mime_type,content) = scrape.fetchURL(scrape.docsdb, self.getURL(), "foidocuments", self.getAgencyID())
-		feed = feedparser.parse(content)		
-		for entry in feed.entries:
-			#print entry
-			print entry.id
-			hash = scrape.mkhash(entry.id)
-			#print hash
-		  	doc = foidocsdb.get(hash)
-			#print doc
-			if doc == None:
-                        	print "saving "+ hash
-				edate = datetime.fromtimestamp(mktime( entry.published_parsed)).strftime("%Y-%m-%d")
-                                doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': entry.link, 'docID': entry.id,
-                                "date": edate,"title": entry.title}
-				self.getDescription(entry,entry, doc)
-                                foidocsdb.save(doc)
+        def doScrape(self):
+            foidocsdb = scrape.couch['disclosr-foidocuments']
+            (url, mime_type, content) = scrape.fetchURL(scrape.docsdb,
+                 self.getURL(), "foidocuments", self.getAgencyID())
+            feed = feedparser.parse(content)
+            for entry in feed.entries:
+                #print entry
+                print entry.id
+                dochash = scrape.mkhash(entry.id)
+                doc = foidocsdb.get(dochash)
+                #print doc
+                if doc is None:
+                    print "saving " + dochash
+                    edate = datetime.fromtimestamp(
+                        mktime(entry.published_parsed)).strftime("%Y-%m-%d")
+                    doc = {'_id': dochash, 'agencyID': self.getAgencyID(),
+                        'url': entry.link, 'docID': entry.id,
+                        "date": edate, "title": entry.title}
+                    self.getDescription(entry, entry, doc)
+                    foidocsdb.save(doc)
+                else:
+                    print "already saved"
+
+            def getDescription(self, content, entry, doc):
+                    """ get description from rss entry"""
+                    doc.update({'description': content.summary})
+            return
+
+
+class GenericOAICDisclogScraper(GenericDisclogScraper):
+    __metaclass__ = abc.ABCMeta
+
+    @abc.abstractmethod
+    def getColumns(self, columns):
+        """ rearranges columns if required """
+        return
+
+    def getColumnCount(self):
+        return 5
+
+    def getDescription(self, content, entry, doc):
+        """ get description from rss entry"""
+        descriptiontxt = ""
+        for string in content.stripped_strings:
+                    descriptiontxt = descriptiontxt + " \n" + string
+        doc.update({'description': descriptiontxt})
+
+    def getTitle(self, content, entry, doc):
+        doc.update({'title': (''.join(content.stripped_strings))})
+
+    def getTable(self, soup):
+        return soup.table
+
+    def getRows(self, table):
+        return table.find_all('tr')
+
+    def getDate(self, content, entry, doc):
+        date = ''.join(content.stripped_strings).strip()
+        (a, b, c) = date.partition("(")
+        date = self.remove_control_chars(a.replace("Octber", "October"))
+        print date
+        edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
+        print edate
+        doc.update({'date': edate})
+        return
+
+    def getLinks(self, content, entry, doc):
+        links = []
+        for atag in entry.find_all("a"):
+            if atag.has_key('href'):
+                links.append(scrape.fullurl(content, atag['href']))
+        if links != []:
+                    doc.update({'links': links})
+        return
+
+    def doScrape(self):
+        foidocsdb = scrape.couch['disclosr-foidocuments']
+        (url, mime_type, content) = scrape.fetchURL(scrape.docsdb,
+            self.getURL(), "foidocuments", self.getAgencyID())
+        if content is not None:
+            if mime_type is "text/html"\
+            or mime_type is "application/xhtml+xml"\
+            or mime_type is"application/xml":
+            # http://www.crummy.com/software/BeautifulSoup/documentation.html
+                soup = BeautifulSoup(content)
+                table = self.getTable(soup)
+                for row in self.getRows(table):
+                    columns = row.find_all('td')
+                    if len(columns) is self.getColumnCount():
+                        (id, date, title,
+                        description, notes) = self.getColumns(columns)
+                        print self.remove_control_chars(
+                            ''.join(id.stripped_strings))
+                        if id.string is None:
+                            dochash = scrape.mkhash(
+                                self.remove_control_chars(
+                                    url + (''.join(date.stripped_strings))))
                         else:
-                        	print "already saved"			
-        def getDescription(self, content, entry, doc):
-                """ get description from rss entry"""
-                doc.update({'description': content.summary})
-		return
-
-class GenericOAICDisclogScraper(GenericDisclogScraper):
-        __metaclass__ = abc.ABCMeta
-	@abc.abstractmethod
-	def getColumns(self,columns):
-		""" rearranges columns if required """
-		return
-        def getColumnCount(self):
-                return 5
-        def getDescription(self, content, entry, doc):
-                """ get description from rss entry"""
-		descriptiontxt = ""
-		for string in content.stripped_strings:
-                	descriptiontxt = descriptiontxt + " \n" + string
-                doc.update({'description': descriptiontxt})
-		return
-        def getTitle(self, content, entry, doc):
-                doc.update({'title': (''.join(content.stripped_strings))})
-		return
-	def getTable(self, soup):
-		return soup.table
-	def getRows(self, table):
-		return table.find_all('tr')
-	def getDate(self, content, entry, doc):
-		date = ''.join(content.stripped_strings).strip()
-		(a,b,c) = date.partition("(")
-		date = self.remove_control_chars(a.replace("Octber","October"))
-		print date
-		edate = parse(date, dayfirst=True, fuzzy=True).strftime("%Y-%m-%d")
-		print edate
-		doc.update({'date': edate})
-		return
-	def getLinks(self, content, entry, doc):
-                links = []
-                for atag in entry.find_all("a"):
-                       	if atag.has_key('href'):
-                               	links.append(scrape.fullurl(content,atag['href']))
-                if links != []:
-	                doc.update({'links': links})
-		return
-
-	def doScrape(self):
-		foidocsdb = scrape.couch['disclosr-foidocuments']
-		(url,mime_type,content) = scrape.fetchURL(scrape.docsdb, self.getURL(), "foidocuments", self.getAgencyID())
-		if content != None:
-			if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml":
-			# http://www.crummy.com/software/BeautifulSoup/documentation.html
-				soup = BeautifulSoup(content)
-				table = self.getTable(soup)
-				for row in self.getRows(table):
-					columns = row.find_all('td')
-					if len(columns) == self.getColumnCount():
-						(id, date, title, description, notes) = self.getColumns(columns)
-						print self.remove_control_chars(''.join(id.stripped_strings))
-						if id.string == None:
-							hash = scrape.mkhash(self.remove_control_chars(url+(''.join(date.stripped_strings))))
-						else:
-							hash = scrape.mkhash(self.remove_control_chars(url+(''.join(id.stripped_strings))))
-						doc = foidocsdb.get(hash)
-							
-						if doc == None:
-							print "saving " +hash
-							doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), 'docID': (''.join(id.stripped_strings))}
-							self.getLinks(self.getURL(),row,doc)
-                                			self.getTitle(title,row, doc)
-                                			self.getDate(date,row, doc)
-							self.getDescription(description,row, doc)
-							if notes != None:
-                                        			doc.update({ 'notes': (''.join(notes.stripped_strings))})
-                                                        badtitles = ['-','Summary of FOI Request','FOI request(in summary form)','Summary of FOI request received by the ASC',
-'Summary of FOI request received by agency/minister','Description of Documents Requested','FOI request','Description of FOI Request','Summary of request','Description','Summary',
+                            dochash = scrape.mkhash(
+                                self.remove_control_chars(
+                                    url + (''.join(id.stripped_strings))))
+                        doc = foidocsdb.get(hash)
+
+                        if doc is None:
+                            print "saving " + hash
+                            doc = {'_id': hash,
+                            'agencyID': self.getAgencyID(),
+                            'url': self.getURL(),
+                            'docID': (''.join(id.stripped_strings))}
+                            self.getLinks(self.getURL(), row, doc)
+                            self.getTitle(title, row, doc)
+                            self.getDate(date, row, doc)
+                            self.getDescription(description, row, doc)
+                            if notes is not None:
+                                doc.update({ 'notes': (
+                                    ''.join(notes.stripped_strings))})
+                            badtitles = ['-','Summary of FOI Request'
+                            , 'FOI request(in summary form)'
+                            , 'Summary of FOI request received by the ASC',
+'Summary of FOI request received by agency/minister',
+'Description of Documents Requested','FOI request',
+'Description of FOI Request','Summary of request','Description','Summary',
 'Summary of FOIrequest received by agency/minister','Summary of FOI request received','Description of    FOI Request',"FOI request",'Results 1 to 67 of 67']
-							if doc['title'] not in badtitles and doc['description'] != '':
+                            if doc['title'] not in badtitles\
+                            and doc['description'] != '':
                                                             print "saving"
                                                             foidocsdb.save(doc)
-						else:
-							print "already saved "+hash
-					
-					elif len(row.find_all('th')) == self.getColumnCount():
-						print "header row"
-					
-					else:
-						print "ERROR number of columns incorrect"
-						print row
-
+                        else:
+                            print "already saved " + dochash
+
+                    elif len(row.find_all('th')) is self.getColumnCount():
+                        print "header row"
+
+                    else:
+                        print "ERROR number of columns incorrect"
+                        print row
+

--- a/documents/rss.xml.php
+++ b/documents/rss.xml.php
@@ -9,11 +9,12 @@
 $TestFeed = new RSS2FeedWriter();
 //Setting the channel elements
 //Use wrapper functions for common channelelements
-$TestFeed->setTitle('Last Modified - All');
+$TestFeed->setTitle('disclosurelo.gs Newest Entries - All');
 $TestFeed->setLink('http://disclosurelo.gs/rss.xml.php');
-$TestFeed->setDescription('Latest entries');
-  $TestFeed->setChannelElement('language', 'en-us');
-  $TestFeed->setChannelElement('pubDate', date(DATE_RSS, time()));
+$TestFeed->setDescription('disclosurelo.gs Newest Entries - All Agencies');
+$TestFeed->setChannelElement('language', 'en-us');
+$TestFeed->setChannelElement('pubDate', date(DATE_RSS, time()));
+
 //Retriving informations from database
 $idtoname = Array();
 $agenciesdb = $server->get_db('disclosr-agencies');
@@ -21,17 +22,18 @@
     $idtoname[$row->id] = trim($row->value->name);
 }
 $foidocsdb = $server->get_db('disclosr-foidocuments');
-$rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99','0000-00-00', 50), true)->rows;
+$rows = $foidocsdb->get_view("app", "byDate", Array('9999-99-99', '0000-00-00', 50), true)->rows;
 //print_r($rows);
 foreach ($rows as $row) {
     //Create an empty FeedItem
     $newItem = $TestFeed->createNewItem();
     //Add elements to the feed item
     $newItem->setTitle($row->value->title);
-    $newItem->setLink("view.php?id=".$row->value->_id);
-    $newItem->setDate(date("c", strtotime($row->value->date)));
-    $newItem->setDescription(displayLogEntry($row,$idtoname));
-    $newItem->addElement('guid', $row->value->_id,array('isPermaLink'=>'true'));
+    $newItem->setLink("http://disclosurelo.gs/view.php?id=" . $row->value->_id);
+    $newItem->setDate(strtotime($row->value->date));
+    $newItem->setDescription(displayLogEntry($row, $idtoname));
+    $newItem->setAuthor($idtoname[$row->value->agencyID]);
+    $newItem->addElement('guid', "http://disclosurelo.gs/view.php?id=" . $row->value->_id, array('isPermaLink' => 'true'));
     //Now add the feed item
     $TestFeed->addItem($newItem);
 }

--- a/documents/template.inc.php
+++ b/documents/template.inc.php
@@ -1,167 +1,175 @@
 <?php
 
 function include_header_documents($title) {
-?>
-<!doctype html>
-<!-- paulirish.com/2008/conditional-stylesheets-vs-css-hacks-answer-neither/ -->
-<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]-->
-<!--[if IE 7]>    <html class="no-js lt-ie9 lt-ie8" lang="en"> <![endif]-->
-<!--[if IE 8]>    <html class="no-js lt-ie9" lang="en"> <![endif]-->
-<!-- Consider adding a manifest.appcache: h5bp.com/d/Offline -->
-<!--[if gt IE 8]><!--> <html class="no-js" lang="en"> <!--<![endif]-->
-<head>
-  <meta charset="utf-8">
+    header('X-UA-Compatible: IE=edge,chrome=1');
+    ?>
+    <!doctype html>
+    <!-- paulirish.com/2008/conditional-stylesheets-vs-css-hacks-answer-neither/ -->
+    <!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]-->
+    <!--[if IE 7]>    <html class="no-js lt-ie9 lt-ie8" lang="en"> <![endif]-->
+    <!--[if IE 8]>    <html class="no-js lt-ie9" lang="en"> <![endif]-->
+    <!-- Consider adding a manifest.appcache: h5bp.com/d/Offline -->
+    <!--[if gt IE 8]><!--> <html class="no-js" lang="en"> <!--<![endif]-->
+        <head>
+            <meta charset="utf-8">
 
-  <!-- Use the .htaccess and remove these lines to avoid edge case issues.
-       More info: h5bp.com/i/378 -->
-  <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
+            <title>Australian Disclosure Logs<?php if ($title != "") echo " - $title"; ?></title>
+            <meta name="description" content="">
 
-  <title>Australian Disclosure Logs<?php if ($title != "") echo " - $title";?></title>
-  <meta name="description" content="">
+            <!-- Mobile viewport optimized: h5bp.com/viewport -->
+            <meta name="viewport" content="width=device-width">
+            <link rel="alternate" type="application/rss+xml" title="Latest Disclosure Log Entries" href="rss.xml.php" />
+            <!-- Place favicon.ico and apple-touch-icon.png in the root directory: mathiasbynens.be/notes/touch-icons -->
+            <meta name="google-site-verification" content="jkknX5g2FCpQvrW030b1Nq2hyoa6mb3EDiA7kCoHNj8" />
 
-  <!-- Mobile viewport optimized: h5bp.com/viewport -->
-  <meta name="viewport" content="width=device-width">
-<link rel="alternate" type="application/rss+xml" title="Latest Disclosure Log Entries" href="rss.xml.php" />
-  <!-- Place favicon.ico and apple-touch-icon.png in the root directory: mathiasbynens.be/notes/touch-icons -->
-<meta name="google-site-verification" content="jkknX5g2FCpQvrW030b1Nq2hyoa6mb3EDiA7kCoHNj8" />
+            <!-- Le styles -->
+            <link href="css/bootstrap.min.css" rel="stylesheet">
+            <style type="text/css">
+                body {
+                    padding-top: 60px;
+                    padding-bottom: 40px;
+                }
+                .sidebar-nav {
+                    padding: 9px 0;
+                }
+            </style>
+            <link href="css/bootstrap-responsive.min.css" rel="stylesheet">
 
-    <!-- Le styles -->
-    <link href="css/bootstrap.min.css" rel="stylesheet">
-    <style type="text/css">
-      body {
-        padding-top: 60px;
-        padding-bottom: 40px;
-      }
-      .sidebar-nav {
-        padding: 9px 0;
-      }
-    </style>
-    <link href="css/bootstrap-responsive.min.css" rel="stylesheet">
+            <!-- HTML5 shim, for IE6-8 support of HTML5 elements -->
+            <!--[if lt IE 9]>
+              <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
+            <![endif]-->
+            <!-- More ideas for your <head> here: h5bp.com/d/head-Tips -->
 
-    <!-- HTML5 shim, for IE6-8 support of HTML5 elements -->
-    <!--[if lt IE 9]>
-      <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
-    <![endif]-->
-  <!-- More ideas for your <head> here: h5bp.com/d/head-Tips -->
+            <!-- All JavaScript at the bottom, except this Modernizr build.
+                 Modernizr enables HTML5 elements & feature detects for optimal performance.
+                 Create your own custom Modernizr build: www.modernizr.com/download/ 
+            <script src="js/libs/modernizr-2.5.3.min.js"></script>-->
+            <script src="js/jquery.js"></script>
+            <script type="text/javascript" src="js/flotr2.min.js"></script>
 
-  <!-- All JavaScript at the bottom, except this Modernizr build.
-       Modernizr enables HTML5 elements & feature detects for optimal performance.
-       Create your own custom Modernizr build: www.modernizr.com/download/ 
-  <script src="js/libs/modernizr-2.5.3.min.js"></script>-->
-    <script src="js/jquery.js"></script>
-    <script type="text/javascript" src="js/flotr2.min.js"></script>
-  
-</head>
-<body>
-  <div class="navbar navbar-inverse navbar-fixed-top">
-      <div class="navbar-inner">
-        <div class="container-fluid">
-          <a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
-            <span class="icon-bar"></span>
-            <span class="icon-bar"></span>
-            <span class="icon-bar"></span>
-          </a>
-          <a class="brand" href="#">Australian Disclosure Logs</a>
-          <div class="nav-collapse collapse">
-            <p class="navbar-text pull-right">
-              Check out our subsites on: 
-<a href="http://orgs.disclosurelo.gs">Government Agencies</a>
-• <a href="http://lobbyists.disclosurelo.gs">Political Lobbyists</a>
-• <a href="http://contracts.disclosurelo.gs">Government Contracts and Spending</a>
+        </head>
+        <body>
+            <div class="navbar navbar-inverse navbar-fixed-top">
+                <div class="navbar-inner">
+                    <div class="container-fluid">
+                        <a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+                            <span class="icon-bar"></span>
+                            <span class="icon-bar"></span>
+                            <span class="icon-bar"></span>
+                        </a>
+                        <a class="brand" href="#">Australian Disclosure Logs</a>
+                        <div class="nav-collapse collapse">
+                            <p class="navbar-text pull-right">
+                                Check out our subsites on: 
+                                <a href="http://orgs.disclosurelo.gs">Government Agencies</a>
+                                • <a href="http://lobbyists.disclosurelo.gs">Political Lobbyists</a>
+                                • <a href="http://contracts.disclosurelo.gs">Government Contracts and Spending</a>
 
-            </p>
-            <ul class="nav">
-              <li><a href="index.php">Home</a></li>
-              <li><a href="disclogsList.php">List of Disclosure Logs</a></li>
-              <li><a href="about.php">About</a></li>
-              
-            </ul>
-          </div><!--/.nav-collapse -->
-        </div>
-      </div>
-    </div>
-   <div class="container">
-       <?php
-}
-function include_footer_documents() {
-       ?>
-           </div> <!-- /container -->
-      <hr>
+                            </p>
+                            <ul class="nav">
+                                <li><a href="index.php">Home</a></li>
+                                <li><a href="disclogsList.php">List of Disclosure Logs</a></li>
+                                <li><a href="about.php">About</a></li>
 
-      <footer>
-        <p>Not affiliated with or endorsed by any government agency.</p>
-      </footer>
-      <script type="text/javascript">
+                            </ul>
+                        </div><!--/.nav-collapse -->
+                    </div>
+                </div>
+            </div>
+            <div class="container">
+                <?php
+            }
 
-  var _gaq = _gaq || [];
-  _gaq.push(['_setAccount', 'UA-12341040-4']);
-  _gaq.push(['_setDomainName', 'disclosurelo.gs']);
-  _gaq.push(['_setAllowLinker', true]);
-  _gaq.push(['_trackPageview']);
+            function include_footer_documents() {
+                ?>
+            </div> <!-- /container -->
+            <hr>
 
-  (function() {
-    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
-    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
-    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
-  })();
+            <footer>
+                <p>Not affiliated with or endorsed by any government agency.</p>
+            </footer>
+            <script type="text/javascript">
 
-</script>
-    <!-- Le javascript
-    ================================================== -->
-    <!-- Placed at the end of the document so the pages load faster -->
-<!--
-    <script src="js/bootstrap-transition.js"></script>
-    <script src="js/bootstrap-alert.js"></script>
-    <script src="js/bootstrap-modal.js"></script>
-    <script src="js/bootstrap-dropdown.js"></script>
-    <script src="js/bootstrap-scrollspy.js"></script>
-    <script src="js/bootstrap-tab.js"></script>
-    <script src="js/bootstrap-tooltip.js"></script>
-    <script src="js/bootstrap-popover.js"></script>
-    <script src="js/bootstrap-button.js"></script>
-    <script src="js/bootstrap-collapse.js"></script>
-    <script src="js/bootstrap-carousel.js"></script>
-    <script src="js/bootstrap-typeahead.js"></script>-->
+                var _gaq = _gaq || [];
+                _gaq.push(['_setAccount', 'UA-12341040-4']);
+                _gaq.push(['_setDomainName', 'disclosurelo.gs']);
+                _gaq.push(['_setAllowLinker', true]);
+                _gaq.push(['_trackPageview']);
+
+                (function() {
+                    var ga = document.createElement('script');
+                    ga.type = 'text/javascript';
+                    ga.async = true;
+                    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+                    var s = document.getElementsByTagName('script')[0];
+                    s.parentNode.insertBefore(ga, s);
+                })();
+
+            </script>
+            <!-- Le javascript
+            ================================================== -->
+            <!-- Placed at the end of the document so the pages load faster -->
+            <!--
+                <script src="js/bootstrap-transition.js"></script>
+                <script src="js/bootstrap-alert.js"></script>
+                <script src="js/bootstrap-modal.js"></script>
+                <script src="js/bootstrap-dropdown.js"></script>
+                <script src="js/bootstrap-scrollspy.js"></script>
+                <script src="js/bootstrap-tab.js"></script>
+                <script src="js/bootstrap-tooltip.js"></script>
+                <script src="js/bootstrap-popover.js"></script>
+                <script src="js/bootstrap-button.js"></script>
+                <script src="js/bootstrap-collapse.js"></script>
+                <script src="js/bootstrap-carousel.js"></script>
+                <script src="js/bootstrap-typeahead.js"></script>-->
 
 
-  </body>
-</html>
-<?php
+        </body>
+    </html>
+    <?php
 }
-function truncate($string, $length, $stopanywhere=false) {
+
+function truncate($string, $length, $stopanywhere = false) {
     //truncates a string to a certain char length, stopping on a word if not specified otherwise.
     if (strlen($string) > $length) {
         //limit hit!
-        $string = substr($string,0,($length -3));
+        $string = substr($string, 0, ($length - 3));
         if ($stopanywhere) {
             //stop anywhere
             $string .= '...';
-        } else{
+        } else {
             //stop on a word.
-            $string = substr($string,0,strrpos($string,' ')).'...';
+            $string = substr($string, 0, strrpos($string, ' ')) . '...';
         }
     }
     return $string;
 }
+
 function displayLogEntry($row, $idtoname) {
     $result = "";
-    $result .= "<div><h2>".$row->value->date.": ".truncate($row->value->title, 80)." (".$idtoname[$row->value->agencyID].")</h2>";
-    echo "<p> Title".$row->value->title."<br/>".str_replace("\n","<br>",$row->value->description);
-if (isset($row->value->notes)) {
-$result .= " <br>Note: ".$row->value->notes;
-}
-$result .= "</p>";
+    $result .= '<div itemscope itemtype="http://schema.org/Article">';
+    $result .= '<h2> <span itemprop="datePublished">' . $row->value->date . "</span>: <span itemprop='name headline'>" . truncate($row->value->title, 120) . "</span>";
+    $result .= '(<span itemprop="author publisher creator">' . $idtoname[$row->