Merge branch 'master' of ssh://apples.lambdacomplex.org/git/disclosr
Merge branch 'master' of ssh://apples.lambdacomplex.org/git/disclosr

Conflicts:
documents/rss.xml.php
lib/FeedWriter

Former-commit-id: 9f314c20fca6d7ffb1eaa4892e7b316bc0ea3628

--- a/admin/refreshDesignDoc.php
+++ b/admin/refreshDesignDoc.php
@@ -9,7 +9,6 @@
 $obj->language = "javascript";
 $obj->views->all->map = "function(doc) {   emit(doc._id, doc); };";
 $obj->views->byDate->map = "function(doc) {   emit(doc.date, doc); };";
-$obj->views->byDate->reduce = "_count";
 $obj->views->byDateMonthYear->map = "function(doc) {   emit(doc.date, doc); };";
 $obj->views->byDateMonthYear->reduce = "_count";
 $obj->views->byAgencyID->map = "function(doc) {   emit(doc.agencyID, doc); };";

--- /dev/null
+++ b/documents/about.php
@@ -1,1 +1,11 @@
+<?php
 
+include('template.inc.php');
+include_header_documents("");
+include_once('../include/common.inc.php');
+?>
+<h1>About</h1>
+<?php
+include_footer_documents();
+?>
+

--- a/documents/genericScrapers.py
+++ b/documents/genericScrapers.py
@@ -217,7 +217,12 @@
 							self.getDescription(description,row, doc)
 							if notes != None:
                                         			doc.update({ 'notes': (''.join(notes.stripped_strings))})
-							foidocsdb.save(doc)
+                                                        badtitles = ['-','Summary of FOI Request','FOI request(in summary form)','Summary of FOI request received by the ASC',
+'Summary of FOI request received by agency/minister','Description of Documents Requested','FOI request','Description of FOI Request','Summary of request','Description','Summary',
+'Summary of FOIrequest received by agency/minister','Summary of FOI request received','Description of    FOI Request',"FOI request",'Results 1 to 67 of 67']
+							if doc['title'] not in badtitles and doc['description'] != '':
+                                                            print "saving"
+                                                            foidocsdb.save(doc)
 						else:
 							print "already saved "+hash
 					

--- a/documents/rss.xml.php
+++ b/documents/rss.xml.php
@@ -14,6 +14,7 @@
 $TestFeed->setDescription('disclosurelo.gs Newest Entries - All Agencies');
   $TestFeed->setChannelElement('language', 'en-us');
   $TestFeed->setChannelElement('pubDate', date(DATE_RSS, time()));
+  
 //Retriving informations from database
 $idtoname = Array();
 $agenciesdb = $server->get_db('disclosr-agencies');
@@ -31,6 +32,7 @@
     $newItem->setLink("http://disclosurelo.gs/view.php?id=".$row->value->_id);
     $newItem->setDate(strtotime($row->value->date));
     $newItem->setDescription(displayLogEntry($row,$idtoname));
+    $newItem->setAuthor($idtoname[$row->value->agencyID]);
     $newItem->addElement('guid', "http://disclosurelo.gs/view.php?id=".$row->value->_id,array('isPermaLink'=>'true'));
     //Now add the feed item
     $TestFeed->addItem($newItem);

--- a/documents/scrapers/227cb6eb7d2c9f8a6e846df7447d6caa.py
+++ b/documents/scrapers/227cb6eb7d2c9f8a6e846df7447d6caa.py
@@ -21,9 +21,10 @@
                                                 for row in soup.find(class_ = "ms-rteTable-GreyAlternating").find_all('tr'):
                                                         if row != None:
 								rowtitle = row.find('th').string
-                                                                description = description + "\n" + rowtitle + ": "
+                                                                if rowtitle != None:
+                                                                    description = description + "\n" + rowtitle + ": "
                                                                 for text in row.find('td').stripped_strings:
-                                                                        description = description + text
+                                                                    description = description + text
                                                      		for atag in row.find_all("a"):
                                                                 	if atag.has_key('href'):
                                                                         	links.append(scrape.fullurl(link,atag['href']))

--- a/documents/template.inc.php
+++ b/documents/template.inc.php
@@ -1,152 +1,169 @@
 <?php
 
 function include_header_documents($title) {
-?>
-<!doctype html>
-<!-- paulirish.com/2008/conditional-stylesheets-vs-css-hacks-answer-neither/ -->
-<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]-->
-<!--[if IE 7]>    <html class="no-js lt-ie9 lt-ie8" lang="en"> <![endif]-->
-<!--[if IE 8]>    <html class="no-js lt-ie9" lang="en"> <![endif]-->
-<!-- Consider adding a manifest.appcache: h5bp.com/d/Offline -->
-<!--[if gt IE 8]><!--> <html class="no-js" lang="en"> <!--<![endif]-->
-<head>
-  <meta charset="utf-8">
+    header('X-UA-Compatible: IE=edge,chrome=1');
+    ?>
+    <!doctype html>
+    <!-- paulirish.com/2008/conditional-stylesheets-vs-css-hacks-answer-neither/ -->
+    <!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]-->
+    <!--[if IE 7]>    <html class="no-js lt-ie9 lt-ie8" lang="en"> <![endif]-->
+    <!--[if IE 8]>    <html class="no-js lt-ie9" lang="en"> <![endif]-->
+    <!-- Consider adding a manifest.appcache: h5bp.com/d/Offline -->
+    <!--[if gt IE 8]><!--> <html class="no-js" lang="en"> <!--<![endif]-->
+        <head>
+            <meta charset="utf-8">
 
-  <!-- Use the .htaccess and remove these lines to avoid edge case issues.
-       More info: h5bp.com/i/378 -->
-  <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
+            <title>Australian Disclosure Logs<?php if ($title != "") echo " - $title"; ?></title>
+            <meta name="description" content="">
 
-  <title>Australian Disclosure Logs<?php if ($title != "") echo " - $title";?></title>
-  <meta name="description" content="">
+            <!-- Mobile viewport optimized: h5bp.com/viewport -->
+            <meta name="viewport" content="width=device-width">
+            <link rel="alternate" type="application/rss+xml" title="Latest Disclosure Log Entries" href="rss.xml.php" />
+            <!-- Place favicon.ico and apple-touch-icon.png in the root directory: mathiasbynens.be/notes/touch-icons -->
+            <meta name="google-site-verification" content="jkknX5g2FCpQvrW030b1Nq2hyoa6mb3EDiA7kCoHNj8" />
 
-  <!-- Mobile viewport optimized: h5bp.com/viewport -->
-  <meta name="viewport" content="width=device-width">
-<link rel="alternate" type="application/rss+xml" title="Latest Disclosure Log Entries" href="rss.xml.php" />
-  <!-- Place favicon.ico and apple-touch-icon.png in the root directory: mathiasbynens.be/notes/touch-icons -->
-<meta name="google-site-verification" content="jkknX5g2FCpQvrW030b1Nq2hyoa6mb3EDiA7kCoHNj8" />
+            <!-- Le styles -->
+            <link href="css/bootstrap.min.css" rel="stylesheet">
+            <style type="text/css">
+                body {
+                    padding-top: 60px;
+                    padding-bottom: 40px;
+                }
+                .sidebar-nav {
+                    padding: 9px 0;
+                }
+            </style>
+            <link href="css/bootstrap-responsive.min.css" rel="stylesheet">
 
-    <!-- Le styles -->
-    <link href="css/bootstrap.min.css" rel="stylesheet">
-    <style type="text/css">
-      body {
-        padding-top: 60px;
-        padding-bottom: 40px;
-      }
-      .sidebar-nav {
-        padding: 9px 0;
-      }
-    </style>
-    <link href="css/bootstrap-responsive.min.css" rel="stylesheet">
+            <!-- HTML5 shim, for IE6-8 support of HTML5 elements -->
+            <!--[if lt IE 9]>
+              <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
+            <![endif]-->
+            <!-- More ideas for your <head> here: h5bp.com/d/head-Tips -->
 
-    <!-- HTML5 shim, for IE6-8 support of HTML5 elements -->
-    <!--[if lt IE 9]>
-      <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
-    <![endif]-->
-  <!-- More ideas for your <head> here: h5bp.com/d/head-Tips -->
+            <!-- All JavaScript at the bottom, except this Modernizr build.
+                 Modernizr enables HTML5 elements & feature detects for optimal performance.
+                 Create your own custom Modernizr build: www.modernizr.com/download/ 
+            <script src="js/libs/modernizr-2.5.3.min.js"></script>-->
+            <script src="js/jquery.js"></script>
+            <script type="text/javascript" src="js/flotr2.min.js"></script>
 
-  <!-- All JavaScript at the bottom, except this Modernizr build.
-       Modernizr enables HTML5 elements & feature detects for optimal performance.
-       Create your own custom Modernizr build: www.modernizr.com/download/ 
-  <script src="js/libs/modernizr-2.5.3.min.js"></script>-->
-    <script src="js/jquery.js"></script>
-    <script type="text/javascript" src="js/flotr2.min.js"></script>
-  
-</head>
-<body>
-  <div class="navbar navbar-inverse navbar-fixed-top">
-      <div class="navbar-inner">
-        <div class="container-fluid">
-          <a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
-            <span class="icon-bar"></span>
-            <span class="icon-bar"></span>
-            <span class="icon-bar"></span>
-          </a>
-          <a class="brand" href="#">Australian Disclosure Logs</a>
-          <div class="nav-collapse collapse">
-            <p class="navbar-text pull-right">
-              Check out our subsites on: 
-<a href="http://orgs.disclosurelo.gs">Government Agencies</a>
-• <a href="http://lobbyists.disclosurelo.gs">Political Lobbyists</a>
-• <a href="http://contracts.disclosurelo.gs">Government Contracts and Spending</a>
+        </head>
+        <body>
+            <div class="navbar navbar-inverse navbar-fixed-top">
+                <div class="navbar-inner">
+                    <div class="container-fluid">
+                        <a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+                            <span class="icon-bar"></span>
+                            <span class="icon-bar"></span>
+                            <span class="icon-bar"></span>
+                        </a>
+                        <a class="brand" href="#">Australian Disclosure Logs</a>
+                        <div class="nav-collapse collapse">
+                            <p class="navbar-text pull-right">
+                                Check out our subsites on: 
+                                <a href="http://orgs.disclosurelo.gs">Government Agencies</a>
+                                • <a href="http://lobbyists.disclosurelo.gs">Political Lobbyists</a>
+                                • <a href="http://contracts.disclosurelo.gs">Government Contracts and Spending</a>
 
-            </p>
-            <ul class="nav">
-              <li><a href="index.php">Home</a></li>
-              <li><a href="disclogsList.php">List of Disclosure Logs</a></li>
-              <li><a href="about.php">About</a></li>
-              
-            </ul>
-          </div><!--/.nav-collapse -->
-        </div>
-      </div>
-    </div>
-   <div class="container">
-       <?php
-}
-function include_footer_documents() {
-       ?>
-           </div> <!-- /container -->
-      <hr>
+                            </p>
+                            <ul class="nav">
+                                <li><a href="index.php">Home</a></li>
+                                <li><a href="disclogsList.php">List of Disclosure Logs</a></li>
+                                <li><a href="about.php">About</a></li>
 
-      <footer>
-        <p>&copy; Company 2012</p>
-      </footer>
-      <script type="text/javascript">
+                            </ul>
+                        </div><!--/.nav-collapse -->
+                    </div>
+                </div>
+            </div>
+            <div class="container">
+                <?php
+            }
 
-  var _gaq = _gaq || [];
-  _gaq.push(['_setAccount', 'UA-12341040-4']);
-  _gaq.push(['_setDomainName', 'disclosurelo.gs']);
-  _gaq.push(['_setAllowLinker', true]);
-  _gaq.push(['_trackPageview']);
+            function include_footer_documents() {
+                ?>
+            </div> <!-- /container -->
+            <hr>
 
-  (function() {
-    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
-    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
-    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
-  })();
+            <footer>
+                <p>Not affiliated with or endorsed by any government agency.</p>
+            </footer>
+            <script type="text/javascript">
 
-</script>
-    <!-- Le javascript
-    ================================================== -->
-    <!-- Placed at the end of the document so the pages load faster -->
-<!--
-    <script src="js/bootstrap-transition.js"></script>
-    <script src="js/bootstrap-alert.js"></script>
-    <script src="js/bootstrap-modal.js"></script>
-    <script src="js/bootstrap-dropdown.js"></script>
-    <script src="js/bootstrap-scrollspy.js"></script>
-    <script src="js/bootstrap-tab.js"></script>
-    <script src="js/bootstrap-tooltip.js"></script>
-    <script src="js/bootstrap-popover.js"></script>
-    <script src="js/bootstrap-button.js"></script>
-    <script src="js/bootstrap-collapse.js"></script>
-    <script src="js/bootstrap-carousel.js"></script>
-    <script src="js/bootstrap-typeahead.js"></script>-->
+                var _gaq = _gaq || [];
+                _gaq.push(['_setAccount', 'UA-12341040-4']);
+                _gaq.push(['_setDomainName', 'disclosurelo.gs']);
+                _gaq.push(['_setAllowLinker', true]);
+                _gaq.push(['_trackPageview']);
+
+                (function() {
+                    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+                    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+                    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+                })();
+
+            </script>
+            <!-- Le javascript
+            ================================================== -->
+            <!-- Placed at the end of the document so the pages load faster -->
+            <!--
+                <script src="js/bootstrap-transition.js"></script>
+                <script src="js/bootstrap-alert.js"></script>
+                <script src="js/bootstrap-modal.js"></script>
+                <script src="js/bootstrap-dropdown.js"></script>
+                <script src="js/bootstrap-scrollspy.js"></script>
+                <script src="js/bootstrap-tab.js"></script>
+                <script src="js/bootstrap-tooltip.js"></script>
+                <script src="js/bootstrap-popover.js"></script>
+                <script src="js/bootstrap-button.js"></script>
+                <script src="js/bootstrap-collapse.js"></script>
+                <script src="js/bootstrap-carousel.js"></script>
+                <script src="js/bootstrap-typeahead.js"></script>-->
 
 
-  </body>
-</html>
-<?php
+        </body>
+    </html>
+    <?php
+}
+
+function truncate($string, $length, $stopanywhere = false) {
+    //truncates a string to a certain char length, stopping on a word if not specified otherwise.
+    if (strlen($string) > $length) {
+        //limit hit!
+        $string = substr($string, 0, ($length - 3));
+        if ($stopanywhere) {
+            //stop anywhere
+            $string .= '...';
+        } else {
+            //stop on a word.
+            $string = substr($string, 0, strrpos($string, ' ')) . '...';
+        }
+    }
+    return $string;
 }
 
 function displayLogEntry($row, $idtoname) {
     $result = "";
-    $result .= "<div><h2>".$row->value->date.": ".$row->value->title." (".$idtoname[$row->value->agencyID].")</h2> <p>".str_replace("\n","<br>",$row->value->description);
-if (isset($row->value->notes)) {
-$result .= " <br>Note: ".$row->value->notes;
-}
-$result .= "</p>";
+    $result .= '<div itemscope itemtype="http://schema.org/Article">';
+    $result .= '<h2> <span itemprop="datePublished">' . $row->value->date . "</span>: <span itemprop='name headline'>" . truncate($row->value->title, 120)."</span>";
+    $result .= '(<span itemprop="author publisher creator">' . $idtoname[$row->value->agencyID] . '</span>)</h2>';
+    $result .= "<p itemprop='description articleBody text'> Title" . $row->value->title . "<br/>" . str_replace("\n", "<br>", $row->value->description);
+    if (isset($row->value->notes)) {
+        $result .= " <br>Note: " . $row->value->notes;
+    }
+    $result .= "</p>";
 
-if (isset($row->value->links)){
-$result .= "<h3>Links/Documents</h3><ul>";
-foreach ($row->value->links as $link) {
-    $result .= "<li><a href='$link'>".$link."</a></li>";
+    if (isset($row->value->links)) {
+        $result .= '<h3>Links/Documents</h3><ul itemprop="associatedMedia">';
+        foreach ($row->value->links as $link) {
+            $result .= '<li itemscope itemtype="http://schema.org/MediaObject"><a href='.$link.' itemprop="url contentURL">' . urlencode($link) . "</a></li>";
+        }
+
+        $result .= "</ul>";
+    }
+    $result .= "<small><A itemprop='url' href='" . $row->value->url . "'>View original source...</a> ID: " . strip_tags($row->value->docID) . "</small>";
+    $result .= "</div>";
+    return $result;
 }
 
-        $result .= "</ul>";
-}
-        $result .= "<small><A href='".$row->value->url."'>View original source...</a> ID: ".strip_tags($row->value->docID)."</small>";
-$result .= "</div>";
-return $result;
-}
-