Add homepage
Add homepage

Former-commit-id: 6abf389463f189798b499509f3dc589f78c6eacf

--- a/.gitmodules
+++ b/.gitmodules
@@ -10,16 +10,22 @@
 [submodule "lib/Requests"]
 	path = lib/Requests
 	url =
-[submodule "javascripts/flotr2"]
-	path = javascripts/flotr2
+[submodule "js/flotr2"]
+	path = js/flotr2
 	url =
 [submodule "lib/phpquery"]
 	path = lib/phpquery
 	url =
-[submodule "javascripts/sigma"]
-	path = javascripts/sigma
+[submodule "js/sigma"]
+	path = js/sigma
 	url =
-[submodule "javascripts/bubbletree"]
-	path = javascripts/bubbletree
+[submodule "js/bubbletree"]
+	path = js/bubbletree
 	url =
+[submodule "lib/querypath"]
+	path = lib/querypath
+	url =
+[submodule "lib/amon-php"]
+	path = lib/amon-php
+	url =

file:a/about.php -> file:b/about.php
--- a/about.php
+++ b/about.php
@@ -8,10 +8,12 @@
 <h2> What is this? </h2> is a project to monitor Australian Federal Government agencies 
-compliance with their <a href="">"proactive disclosure requirements"</a>.
+compliance with their <a href="">"proactive disclosure requirements" to make a transparency league table as suggested by gov2 taskforce</a>.
 <h2> Attributions </h2>
-National Archives of Australia, Australian Governments’ Interactive Functions Thesaurus, 2nd edition, September 2005, published at
+National Archives of Australia, Australian Governments’ Interactive Functions Thesaurus, 2nd edition, September 2005, published at <br/> <br/> <br/> <br/>
 <h2> Open everything </h2>
 All documents released CC-BY 3 AU
 Open source git @

file:b/admin/agls.php (new)
--- /dev/null
+++ b/admin/agls.php
@@ -1,1 +1,35 @@
+include_header('Webserver and Accessiblity');
+echo "<table>
+    <tr><th>name</th><th>function</th></tr>";
+$db = $server->get_db('disclosr-agencies');
+try {
+     $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows;
+    if ($agencies) {
+        foreach ($agencies as $row) {
+            echo "<tr><td>" . $row->value->name . "</td>";
+            if (isset($row->value->metaTags)) {
+                if (is_array($row->value->metaTags)) {
+                        $tags =$row->value->metaTags;
+                } else {
+			$tags = object_to_array($row->value->metaTags);
+                }
+                if (isset($tags['AGLS.Function'])) {
+                    echo "<td>" . $tags['AGLS.Function'] . "</td>";
+                }
+            }
+            echo "</tr>";
+        }
+    }
+} catch (SetteeRestClientException $e) {
+    setteErrorHandler($e);

--- a/admin/conflicts.php
+++ b/admin/conflicts.php
@@ -1,7 +1,7 @@
+include_header('Fix Conflicts');
                 require_once '../lib/php-diff/lib/Diff.php';
                 require_once '../lib/php-diff/lib/Diff/Renderer/Html/SideBySide.php';

--- /dev/null
+++ b/admin/genericAgencyFixer.php
@@ -1,1 +1,44 @@
+require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
+setlocale(LC_CTYPE, 'C');
+$db = $server->get_db('disclosr-agencies');
+try {
+    $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows;
+    //print_r($rows);
+    foreach ($agencies as $agency) {
+        //echo $agency->value->name . " ".$agency->value->website."<br />\n";
+         // print_r($agency);
+        //hasRestricitiveLicence"	hasRestrictiveLicense -> has Restrictive Licence
+        // "hasYoutube" -> Tube
+        // "comment" -> "comments"
+        if (!isset($agency->value->metaTags) && isset($agency->value->website)) {
+                echo $agency->value->name . " ".$agency->value->website."<br />\n";
+            $agency->value->metaTags = Array();
+            $request = Requests::get($agency->value->website);
+            $html = phpQuery::newDocumentHTML($request->body);
+            phpQuery::selectDocument($html);
+            foreach (pq('meta')->elements as $meta) {
+                $tagName = $meta->getAttribute('name');;
+                $content = $meta->getAttribute('content');
+                if ($tagName != "") {
+echo "$tagName == $content <br>\n";
+                 $agency->value->metaTags[$tagName] = $content;
+                }
+            }
+            //print_r($agency->value->metaTags);
+            $db->save($agency->value);
+            echo "<hr>";
+            flush();
+        }
+    }
+} catch (SetteeRestClientException $e) {
+    setteErrorHandler($e);

--- /dev/null
+++ b/admin/importRTKbodies.php
@@ -1,1 +1,56 @@
+require_once '../include/';
+$db = $server->get_db('disclosr-agencies');
+$rows = $db->get_view("app", "byName")->rows;
+$nametoid = Array();
+$accounts = Array();
+foreach ($rows as $row) {
+    $nametoid[trim($row->key)] = $row->value;
+function extractCSVAccounts($url, $nameField, $accountField, $filter) {
+    global $accounts, $nametoid;
+    $request = Requests::get($url);
+    echo $url;
+    $Data = str_getcsv($request->body, "\n"); //parse the rows 
+    $headers = Array();
+    foreach ($Data as $num => $line) {
+        $Row = str_getcsv($line, ",");
+        if ($num == 0) {
+            $headers = $Row;
+            print_r($headers);
+        } else {
+            if (isset($Row[array_search($nameField, $headers)])) {
+                $agencyName = $Row[array_search($nameField, $headers)];
+                    if (!in_array(trim($agencyName), array_keys($nametoid))) {
+                        echo "$agencyName missing" . PHP_EOL;
+                    } else {
+                        echo $Row[array_search($nameField, $headers)] . PHP_EOL;
+                             $accounts[$nametoid[trim($agencyName)]]["rtkURLs"][$agencyName] = ''.$Row[array_search($accountField, $headers)];
+                    }
+               } else {
+                echo "error finding any agency" . $line . PHP_EOL;
+            }
+        }
+    }
+extractCSVAccounts("","Agency","URL name");
+/* foreach ($accounts as $id => $accountTypes) {
+    echo $id . "<br>" . PHP_EOL;
+    $doc = object_to_array($db->get($id));
+    // print_r($doc);
+    foreach ($accountTypes as $accountType => $accounts) {
+        if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) {
+            $doc["has" . $accountType] = Array();
+        }
+        $doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts));
+    }
+    $db->save($doc);

--- a/admin/naa-agift-tree.php
+++ b/admin/naa-agift-tree.php
@@ -31,9 +31,9 @@
 <div id="tree-container"></div>
  <div id="viz"></div>
-<script type="text/javascript" src="../javascripts/jquery-1.6.2.min.js"></script>
-<script type="text/javascript" src="../javascripts/d3.min.js"></script>
-<script type="text/javascript" src="../javascripts/d3.layout.min.js"></script>
+<script type="text/javascript" src="../js/jquery-1.6.2.min.js"></script>
+<script type="text/javascript" src="../js/d3.min.js"></script>
+<script type="text/javascript" src="../js/d3.layout.min.js"></script>

--- a/alaveteli/exportAgencies.csv.php
+++ b/alaveteli/exportAgencies.csv.php
@@ -1,20 +1,6 @@
-function shortName($name) {
-    $name = trim($name);
-    if (strstr($name,"Minister ") || strstr($name,"Treasurer") || strstr($name,"Parliamentary Secretary")) {
-        $badWords = Array ("Assisting the Prime Minister on","Assisting on"," the "," of "," for "," on "," and "," to ",","," ","'","`");
-        return str_replace($badWords,"",$name);
-    }
-    else {
-    $out = Array();
-    preg_match_all('/[A-Z]/', $name, $out);
-    return implode("", $out[0]);
-    }
 setlocale(LC_CTYPE, 'C');
@@ -63,38 +49,36 @@
                 $row = Array();
                 $row["#id"] = $agency->id;
                 $row["name"] = trim($agency->value->name);
-                if (isset($agency->value->foiEmail)) {
-                    $row["request_email"] = $agency->value->foiEmail;
-                } else {
-                    if ($agency->value->orgType == "FMA-DepartmentOfState") {
-                        $row["request_email"] = "foi@" . GetDomain($agency->value->website);
-                    } else {
-                        $row["request_email"] = $foiEmail[$agency->value->parentOrg];
+                $row["request_email"] = (isset($agency->value->foiEmail) ? $agency->value->foiEmail : "");
+                $row["short_name"] = (isset($agency->value->shortName) ? $agency->value->shortName : "");
+                $row["notes"] = (isset($agency->value->description) ? $agency->value->description : "");
+                $otherBodies = Array();
+                if (isset($agency->value->foiBodies)) {
+                    $otherBodies = array_merge($otherBodies, $agency->value->foiBodies);
+                }
+                if (isset($agency->value->positions)) {
+                    $positions = Array();
+                    foreach ($agency->value->positions as $position) {
+                        $positions[] = "Office of the ".$position;
+                    $otherBodies = array_merge($otherBodies, $positions);
-                if (isset($agency->value->shortName)) {
-                    $row["short_name"] = $agency->value->shortName;
-                } else {
-                    $row["short_name"] = shortName($agency->value->name);
+                sort($otherBodies);
+                if (count($otherBodies) > 0) {
+                    $row["notes"] .= "<br/> This department also responds to requests for information held by " . implode(", ", $otherBodies);
-                $row["notes"] = "";
                 $row["publication_scheme"] = (isset($agency->value->infoPublicationSchemeURL) ? $agency->value->infoPublicationSchemeURL : "");
                 $row["home_page"] = (isset($agency->value->website) ? $agency->value->website : "");
                 if ($agency->value->orgType == "FMA-DepartmentOfState") {
-                    $row["tag_string"] = $tag[$agency->value->_id] . " " . $agency->value->orgType;
+                    $row["tag_string"] = $tag[$agency->value->_id];
                 } else {
-                    $row["tag_string"] = $tag[$agency->value->parentOrg] . " " . $agency->value->orgType;
+                    $row["tag_string"] = $tag[$agency->value->parentOrg];
+                $row["tag_string"] .= " " . $agency->value->orgType;
+                $row["tag_string"] .= " federal";
                 fputcsv($fp, array_values($row));
-                if (isset($agency->value->foiBodies)) {
-                    foreach ($agency->value->foiBodies as $foiBody) {
-                        $row['name'] = iconv("UTF-8", "ASCII//TRANSLIT",$foiBody);
-                        $row["short_name"] = shortName($foiBody);
-                        fputcsv($fp, array_values($row));
-                    }
-                }
     } catch (SetteeRestClientException $e) {

--- a/bubbletree.php
+++ b/bubbletree.php
@@ -5,13 +5,13 @@
         <meta charset="UTF-8"/> 
         <title>Minimal BubbleTree Demo</title> 
         <script type="text/javascript" src=""></script> 
-        <script type="text/javascript" src="javascripts/bubbletree/lib/jquery.history.js"></script> 
-        <script type="text/javascript" src="javascripts/bubbletree/lib/raphael.js"></script> 
-        <script type="text/javascript" src="javascripts/bubbletree/lib/vis4.js"></script> 
-        <script type="text/javascript" src="javascripts/bubbletree/lib/Tween.js"></script> 
-        <script type="text/javascript" src="javascripts/bubbletree/build/bubbletree.js"></script> 
-        <link rel="stylesheet" type="text/css" href="javascripts/bubbletree/build/bubbletree.css" /> 
-        <script type="text/javascript" src="javascripts/bubbletree/styles/cofog.js"></script> 
+        <script type="text/javascript" src="js/bubbletree/lib/jquery.history.js"></script> 
+        <script type="text/javascript" src="js/bubbletree/lib/raphael.js"></script> 
+        <script type="text/javascript" src="js/bubbletree/lib/vis4.js"></script> 
+        <script type="text/javascript" src="js/bubbletree/lib/Tween.js"></script> 
+        <script type="text/javascript" src="js/bubbletree/build/bubbletree.js"></script> 
+        <link rel="stylesheet" type="text/css" href="js/bubbletree/build/bubbletree.css" /> 
+        <script type="text/javascript" src="js/bubbletree/styles/cofog.js"></script> 
         <script type="text/javascript"> 

--- /dev/null
+++ b/documents/404.html
@@ -1,1 +1,44 @@
+<!doctype html>
+<html lang="en">
+  <meta charset="utf-8">
+  <title>Page Not Found :(</title>
+  <style>
+    ::-moz-selection { background: #fe57a1; color: #fff; text-shadow: none; }
+    ::selection { background: #fe57a1; color: #fff; text-shadow: none; }
+    html { padding: 30px 10px; font-size: 20px; line-height: 1.4; color: #737373; background: #f0f0f0; -webkit-text-size-adjust: 100%; -ms-text-size-adjust: 100%; }
+    html, input { font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; }
+    body { max-width: 500px; _width: 500px; padding: 30px 20px 50px; border: 1px solid #b3b3b3; border-radius: 4px; margin: 0 auto; box-shadow: 0 1px 10px #a7a7a7, inset 0 1px 0 #fff; background: #fcfcfc; }
+    h1 { margin: 0 10px; font-size: 50px; text-align: center; }
+    h1 span { color: #bbb; }
+    h3 { margin: 1.5em 0 0.5em; }
+    p { margin: 1em 0; }
+    ul { padding: 0 0 0 40px; margin: 1em 0; }
+    .container { max-width: 380px; _width: 380px; margin: 0 auto; }
+    /* google search */
+    #goog-fixurl ul { list-style: none; padding: 0; margin: 0; }
+    #goog-fixurl form { margin: 0; }
+    #goog-wm-qt, #goog-wm-sb { border: 1px solid #bbb; font-size: 16px; line-height: normal; vertical-align: top; color: #444; border-radius: 2px; }
+    #goog-wm-qt { width: 220px; height: 20px; padding: 5px; margin: 5px 10px 0 0; box-shadow: inset 0 1px 1px #ccc; }
+    #goog-wm-sb { display: inline-block; height: 32px; padding: 0 10px; margin: 5px 0 0; white-space: nowrap; cursor: pointer; background-color: #f5f5f5; background-image: -webkit-linear-gradient(rgba(255,255,255,0), #f1f1f1); background-image: -moz-linear-gradient(rgba(255,255,255,0), #f1f1f1); background-image: -ms-linear-gradient(rgba(255,255,255,0), #f1f1f1); background-image: -o-linear-gradient(rgba(255,255,255,0), #f1f1f1); -webkit-appearance: none; -moz-appearance: none; appearance: none; *overflow: visible; *display: inline; *zoom: 1; }
+    #goog-wm-sb:hover, #goog-wm-sb:focus { border-color: #aaa; box-shadow: 0 1px 1px rgba(0, 0, 0, 0.1); background-color: #f8f8f8; }
+    #goog-wm-qt:focus, #goog-wm-sb:focus { border-color: #105cb6; outline: 0; color: #222; }
+    input::-moz-focus-inner { padding: 0; border: 0; }
+  </style>
+  <div class="container">
+    <h1>Not found <span>:(</span></h1>
+    <p>Sorry, but the page you were trying to view does not exist.</p>
+    <p>It looks like this was the result of either:</p>
+    <ul>
+      <li>a mistyped address</li>
+      <li>an out-of-date link</li>
+    </ul>
+    <script>
+      var GOOG_FIXURL_LANG = (navigator.language || '').slice(0,2),GOOG_FIXURL_SITE =;
+    </script>
+    <script src=""></script>
+  </div>

 Binary files /dev/null and b/documents/apple-touch-icon-114x114-precomposed.png differ
 Binary files /dev/null and b/documents/apple-touch-icon-57x57-precomposed.png differ
 Binary files /dev/null and b/documents/apple-touch-icon-72x72-precomposed.png differ
 Binary files /dev/null and b/documents/apple-touch-icon-precomposed.png differ
 Binary files /dev/null and b/documents/apple-touch-icon.png differ
--- /dev/null
+++ b/documents/checkScrapers.php
@@ -1,1 +1,42 @@
+include_header('Webserver and Accessiblity');
+echo "<table>
+    <tr><th>name</th><th>disclog</th><th>scraper?</th></tr>";
+$agenciesdb = $server->get_db('disclosr-agencies');
+$docsdb = $server->get_db('disclosr-documents');
+try {
+    $rows = $agenciesdb->get_view("app", "byCanonicalName", null, true)->rows;
+    if ($rows) {
+        foreach ($rows as $row) {
+            echo "<tr><td>" . $row->value->name . " (".$row->id.")</td>\n";
+             echo "<td>";
+             if (isset($row->value->FOIDocumentsURL)) {
+                 echo '<a href="viewDocument.php?hash='.md5($row->value->FOIDocumentsURL).'">'
+                     .$row->value->FOIDocumentsURL.'</a>';
+             } else {
+                echo "<font color='red'>✘</font>";
+            }
+            echo "</td>\n<td>";
+            if (isset($row->value->FOIDocumentsURL)) {
+            if (file_exists("./scrapers/".$row->id.'.py')) {
+                echo "<font color='green'>✔</font>";
+            } else if (file_exists("./scrapers/".$row->id.'.txt')){
+                echo "pass";
+            } else {
+                echo "<font color='red'>✘</font>";
+            }
+            }
+            echo "</td></tr>\n";
+        }
+    }
+} catch (SetteeRestClientException $e) {
+    setteErrorHandler($e);

--- /dev/null
+++ b/documents/crossdomain.xml
@@ -1,1 +1,26 @@
+<?xml version="1.0"?>
+<!DOCTYPE cross-domain-policy SYSTEM "">
+<!-- Read this: -->
+<!-- Most restrictive policy: -->
+	<site-control permitted-cross-domain-policies="none"/>
+<!-- Least restrictive policy: -->
+	<site-control permitted-cross-domain-policies="all"/>
+	<allow-access-from domain="*" to-ports="*" secure="false"/>
+	<allow-http-request-headers-from domain="*" headers="*" secure="false"/>
+  If you host a crossdomain.xml file with allow-access-from domain="*"
+  and don’t understand all of the points described here, you probably
+  have a nasty security vulnerability. ~ simon willison

 Binary files /dev/null and b/documents/favicon.ico differ
--- /dev/null
+++ b/documents/
@@ -1,1 +1,61 @@
+import sys,os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__) or '.', '../'))
+import scrape
+from bs4 import BeautifulSoup
+import abc
+class GenericOAICDisclogScraper(object):
+	__metaclass__ = abc.ABCMeta
+	@abc.abstractmethod
+	def getAgencyID(self):
+		""" disclosr agency id """
+		return
+	@abc.abstractmethod
+	def getURL(self):
+		""" disclog URL"""
+		return
+	@abc.abstractmethod
+	def getColumns(self,columns):
+		""" rearranges columns if required """
+		return
+	def doScrape(self):
+		foidocsdb = scrape.couch['disclosr-foidocuments']
+		(url,mime_type,content) = scrape.fetchURL(scrape.docsdb, self.getURL(), "foidocuments", self.getAgencyID())
+		if content != None:
+			if mime_type == "text/html" or mime_type == "application/xhtml+xml" or mime_type =="application/xml":
+			#
+				soup = BeautifulSoup(content)
+				for row in soup.table.find_all('tr'):
+					columns = row.find_all('td')
+					if len(columns) == 5:
+						(id, date, description, title, notes) = self.getColumns(columns)
+						print id.string
+						hash = scrape.mkhash(url+id.string)
+						links = []
+						for atag in row.find_all("a"):
+							if atag.has_key('href'):
+								links.append(scrape.fullurl(url,atag['href']))
+						doc = foidocsdb.get(hash)
+						descriptiontxt = ""
+						for string in description.stripped_strings:
+							descriptiontxt = descriptiontxt + string
+						if doc == None:
+							print "saving"
+							doc = {'_id': hash, 'agencyID': self.getAgencyID(), 'url': self.getURL(), "links": links, 'docID': id.string,
+			 				 "date": date.string, "description": descriptiontxt,"title": title.string,"notes": notes.string}
+						else:
+							print "already saved"
+					elif len(row.find_all('th')) == 5:
+						print "header row"
+					else:
+						print "ERROR number of columns incorrect"
+						print row

--- /dev/null
+++ b/documents/google676a414ad086cefb.html
@@ -1,1 +1,2 @@
+google-site-verification: google676a414ad086cefb.html

--- /dev/null
+++ b/documents/googlebcce906c6b666bb8.html
@@ -1,1 +1,2 @@
+google-site-verification: googlebcce906c6b666bb8.html

--- /dev/null
+++ b/documents/humans.txt
@@ -1,1 +1,44 @@
+/* the humans responsible & colophon */
+/* */
+/* TEAM */
+  <your title>: <your name>
+  Site:
+  Twitter:
+  Location:
+/* THANKS */
+  Names (& URL):
+/* SITE */
+  Standards: HTML5, CSS3
+  Components: Modernizr, jQuery
+  Software:
+                               -o/-
+                               +oo//-
+                              :ooo+//:
+                             -ooooo///-
+                             /oooooo//:
+                            :ooooooo+//-
+                           -+oooooooo///-
+           -://////////////+oooooooooo++////////////::
+            :+ooooooooooooooooooooooooooooooooooooo+:::-
+              -/+ooooooooooooooooooooooooooooooo+/::////:-
+                -:+oooooooooooooooooooooooooooo/::///////:-
+                  --/+ooooooooooooooooooooo+::://////:-
+                     -:+ooooooooooooooooo+:://////:--
+                       /ooooooooooooooooo+//////:-
+                      -ooooooooooooooooooo////-
+                      /ooooooooo+oooooooooo//:
+                     :ooooooo+/::/+oooooooo+//-
+                    -oooooo/::///////+oooooo///-
+                    /ooo+::://////:---:/+oooo//:
+                   -o+/::///////:-      -:/+o+//-
+                   :-:///////:-            -:/://
+                     -////:-                 --//:
+                       --                       -:

--- /dev/null
+++ b/documents/index.php
@@ -1,1 +1,94 @@
+<!doctype html>
+<!-- -->
+<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]-->
+<!--[if IE 7]>    <html class="no-js lt-ie9 lt-ie8" lang="en"> <![endif]-->
+<!--[if IE 8]>    <html class="no-js lt-ie9" lang="en"> <![endif]-->
+<!-- Consider adding a manifest.appcache: -->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en"> <!--<![endif]-->
+  <meta charset="utf-8">
+  <!-- Use the .htaccess and remove these lines to avoid edge case issues.
+       More info: -->
+  <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
+  <title>disclosure logs</title>
+  <meta name="description" content="">
+  <!-- Mobile viewport optimized: -->
+  <meta name="viewport" content="width=device-width">
+  <!-- Place favicon.ico and apple-touch-icon.png in the root directory: -->
+<meta name="google-site-verification" content="jkknX5g2FCpQvrW030b1Nq2hyoa6mb3EDiA7kCoHNj8" />
+  <link rel="stylesheet" href="css/style.css">
+  <!-- More ideas for your <head> here: -->
+  <!-- All JavaScript at the bottom, except this Modernizr build.
+       Modernizr enables HTML5 elements & feature detects for optimal performance.
+       Create your own custom Modernizr build: -->
+  <script src="js/libs/modernizr-2.5.3.min.js"></script>
+  <!-- Prompt IE 6 users to install Chrome Frame. Remove this if you support IE 6.
+ -->
+  <!--[if lt IE 7]><p class=chromeframe>Your browser is <em>ancient!</em> <a href="">Upgrade to a different browser</a> or <a href="">install Google Chrome Frame</a> to experience this site.</p><![endif]-->
+  <header>
+  </header>
+  <div role="main">
+  <dt>Disclosure Log</dt>
+    <dd></dd>
+<a href="http://information.<?php echo $_SERVER['HTTP_HOST'];?>"> - documents</a><br>
+<a href="http://orgs.<?php echo $_SERVER['HTTP_HOST'];?>"> - structure</a><br>
+<a href="http://lobbyists.<?php echo $_SERVER['HTTP_H