upgrade scrapers to beautifulsoup4
--- a/.gitmodules
+++ b/.gitmodules
@@ -4,16 +4,28 @@
 [submodule "couchdb/settee"]
 	path = couchdb/settee
 	url = https://github.com/inadarei/settee.git
-[submodule "lib/springy"]
-	path = lib/springy
-	url = https://github.com/dhotson/springy.git
 [submodule "lib/php-diff"]
 	path = lib/php-diff
 	url = https://github.com/chrisboulton/php-diff.git
 [submodule "lib/Requests"]
 	path = lib/Requests
 	url = https://github.com/rmccue/Requests.git
-[submodule "javascripts/flotr2"]
-	path = javascripts/flotr2
+[submodule "js/flotr2"]
+	path = js/flotr2
 	url = https://github.com/HumbleSoftware/Flotr2.git
+[submodule "lib/phpquery"]
+	path = lib/phpquery
+	url = https://github.com/TobiaszCudnik/phpquery.git
+[submodule "js/sigma"]
+	path = js/sigma
+	url = https://github.com/jacomyal/sigma.js.git
+[submodule "js/bubbletree"]
+	path = js/bubbletree
+	url = https://github.com/okfn/bubbletree.git
+[submodule "lib/querypath"]
+	path = lib/querypath
+	url = https://github.com/technosophos/querypath.git
+[submodule "lib/amon-php"]
+	path = lib/amon-php
+	url = https://github.com/martinrusev/amon-php.git

file:a/about.php -> file:b/about.php
--- a/about.php
+++ b/about.php
@@ -1,17 +1,19 @@
 <div class="foundation-header">
     <h1><a href="about.php">About/FAQ</a></h1>
     <h4 class="subheader">Lorem ipsum.</h4>
 <h2> What is this? </h2>
-Disclosr is a project to monitor Australian Federal Government agencies 
-compliance with their <a href="http://www.oaic.gov.au/publications/other_operational/foi_policy_frequently_asked_questions.html#_Toc291837571">"proactive disclosure requirements"</a>.
-OGRE (Open Government Realization Evaluation) is a ranking of compliance with these requirements.
-Prometheus is the agent which polls agency websites to assess compliance.
+Disclo.gs is a project to monitor Australian Federal Government agencies 
+compliance with their <a href="http://www.oaic.gov.au/publications/other_operational/foi_policy_frequently_asked_questions.html#_Toc291837571">"proactive disclosure requirements" to make a transparency league table as suggested by gov2 taskforce http://gov2.net.au/blog/2009/09/19/a-league-ladder-of-psi-openness/</a>.
+<h2> Attributions </h2>
+National Archives of Australia, Australian Governments’ Interactive Functions Thesaurus, 2nd edition, September 2005, published at http://www.naa.gov.au/recordkeeping/thesaurus/index.htm <br/>
+data.gov.au http://data.gov.au/dataset/directory-gov-au-full-data-export/ <br/>
+directory.gov.au <br/>
+australia.gov.au http://australia.gov.au/about/copyright <br/>
 <h2> Open everything </h2>
 All documents released CC-BY 3 AU
 Open source git @

file:b/admin/agls.php (new)
--- /dev/null
+++ b/admin/agls.php
@@ -1,1 +1,35 @@
+include_header('Webserver and Accessiblity');
+echo "<table>
+    <tr><th>name</th><th>function</th></tr>";
+$db = $server->get_db('disclosr-agencies');
+try {
+     $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows;
+    if ($agencies) {
+        foreach ($agencies as $row) {
+            echo "<tr><td>" . $row->value->name . "</td>";
+            if (isset($row->value->metaTags)) {
+                if (is_array($row->value->metaTags)) {
+                        $tags =$row->value->metaTags;
+                } else {
+			$tags = object_to_array($row->value->metaTags);
+                }
+                if (isset($tags['AGLS.Function'])) {
+                    echo "<td>" . $tags['AGLS.Function'] . "</td>";
+                }
+            }
+            echo "</tr>";
+        }
+    }
+} catch (SetteeRestClientException $e) {
+    setteErrorHandler($e);

--- /dev/null
+++ b/admin/conflicts.php
@@ -1,1 +1,48 @@
+include_header('Fix Conflicts');
+                require_once '../lib/php-diff/lib/Diff.php';
+                require_once '../lib/php-diff/lib/Diff/Renderer/Html/SideBySide.php';
+$db = $server->get_db('disclosr-agencies');
+try {
+    $rows = $db->get_view("app", "getConflicts", null, true)->rows;
+    //print_r($rows);
+    foreach ($rows as $row) {
+echo "<h2>".$row->id."</h2>";
+$request = Requests::get($serverAddr."disclosr-agencies/".$row->id);
+$origSort = object_to_array(json_decode($request->body));
+    $origDoc = explode(",",json_encode($origSort));
+	foreach($row->value as $conflictRev) {
+$conflictURL = $serverAddr."disclosr-agencies/".$row->id."?rev=".$conflictRev;
+$request = Requests::get($conflictURL);
+$conflictSort = object_to_array(json_decode($request->body));
+    $conflictDoc = explode(",",json_encode($conflictSort));
+echo "curl -X DELETE ".$conflictURL."<br>".PHP_EOL;
+                // Options for generating the diff
+                $options = array(
+                        //'ignoreWhitespace' => true,
+                        //'ignoreCase' => true,
+                );
+                // Initialize the diff class
+                $diff = new Diff($conflictDoc, $origDoc, $options);
+                // Generate a side by side diff
+                $renderer = new Diff_Renderer_Html_SideBySide;
+                echo $diff->Render($renderer);
+    }
+} catch (SetteeRestClientException $e) {
+    setteErrorHandler($e);

--- /dev/null
+++ b/admin/directory.gexf.php
@@ -1,1 +1,59 @@
+$nodes = Array(Array("id" => "gov", "label" => "Federal Government"));
+$edges = Array();
+function addEdge($source, $target) {
+    global $edges;
+    $edges[] = Array("id" => md5($source . $target), "source" => $source, "target" => $target);
+function addNode($id, $label, $pid) {
+    global $nodes;
+    $nodes[] = Array("id" => $id, "label" => $label , "pid" => $pid);
+function addChildren($parentID, $parentXML) {
+    foreach ($parentXML as $childXML) {
+        if ($childXML->getName() == "organization" || $childXML->getName() == "organizationalUnit" || $childXML->getName() == "person") {
+            $attr = $childXML->attributes();
+            $id = $attr['UUID'];
+            if ($childXML->getName() == "organization" || $childXML->getName() == "organizationalUnit") {
+                $label = $childXML->name;
+            } else if ($childXML->getName() == "person") {
+                  $label = $childXML->fullName;
+            }
+            addNode($id, $label, $parentID);
+            addEdge($id, $parentID);
+            addChildren($id, $childXML);
+        }
+    }
+if (file_exists('directoryexport.xml')) {
+    $xml = simplexml_load_file('directoryexport.xml');
+    addChildren("gov", $xml);
+} else {
+    exit('Failed to open directoryexport.xml');
+  header('Content-Type: application/gexf+xml');
+echo '<?xml version="1.0" encoding="UTF-8"?>
+<gexf xmlns="http://www.gexf.net/1.2draft" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd" version="1.2">
+    <graph mode="static" defaultedgetype="directed">
+        <nodes>';
+foreach ($nodes as $node) {
+    echo '          <node id="' . $node['id'] . '" label="' . htmlentities($node['label'],ENT_XML1) . '" ' . (isset($node['pid']) ? 'pid="' . $node['pid'] . '"' : "") . ' />';
+echo '</nodes>
+        <edges>';
+foreach ($edges as $edge) {
+    echo '            <edge id="' . $edge['id'] . '" source="' . $edge['source'] . '" target="' . $edge['target'] . '" />';
+echo '</edges>
+    </graph>

--- /dev/null
+++ b/admin/directoryexport.xml

--- /dev/null
+++ b/admin/exportEmployees.csv.php
@@ -1,1 +1,86 @@
+$format = "csv";
+//$format = "json";
+if (isset($_REQUEST['format']))  $format = $_REQUEST['format'];
+setlocale(LC_CTYPE, 'C');
+if ($format == "csv") {
+    $headers = Array("name");
+} else {
+    $headers = Array();
+$db = $server->get_db('disclosr-agencies');
+try {
+    $rows = $db->get_view("app", "all", null, true)->rows;
+    $dataValues = Array();
+    foreach ($rows as $row) {
+        if (isset($row->value->statistics->employees)) {
+            $headers = array_unique(array_merge($headers, array_keys(object_to_array($row->value->statistics->employees))));
+        }
+    }
+} catch (SetteeRestClientException $e) {
+    setteErrorHandler($e);
+$fp = fopen('php://output', 'w');
+if ($fp && $db) {
+    if ($format == "csv") {
+        header('Content-Type: text/csv; charset=utf-8');
+        header('Content-Disposition: attachment; filename="export.employeestats.' . date("c") . '.csv"');
+    }
+    header('Pragma: no-cache');
+    header('Expires: 0');
+    if ($format == "csv") {
+        fputcsv($fp, $headers);
+    } else if ($format == "json") {
+        echo '{
+            "labels" : ["' . implode('","', $headers) . '"],'.PHP_EOL;
+    }
+    try {
+        $agencies = $db->get_view("app", "all", null, true)->rows;
+        //print_r($agencies);
+        $first = true;
+        if ($format == "json") {
+        echo '"data" : ['.PHP_EOL;
+        }
+        foreach ($agencies as $agency) {
+            if (isset($agency->value->statistics->employees)) {
+                $row = Array();
+                $agencyEmployeesArray = object_to_array($agency->value->statistics->employees);
+                foreach ($headers as $i => $fieldName) {
+                    if (isset($agencyEmployeesArray[$fieldName])) {
+                        $row[] = '['.$i.','.$agencyEmployeesArray[$fieldName]["value"].']';
+                    } else {
+                        $row[] = '['.$i.',0]';
+                    }
+                }
+                if ($format == "csv") {
+                    fputcsv($fp, array_values($row));
+                } else if ($format == "json") {
+                    if (!$first) echo ",";
+                    echo '{"data" : [' . implode(",", array_values($row)) . '], "label": "'.$agency->value->name.'", "lines" : { "show" : true }, "points" : { "show" : true }}'.PHP_EOL;
+                    $first = false;
+                }
+            }
+        }
+        if ($format == "json") {
+        echo ']
+            }'.PHP_EOL;
+        }
+    } catch (SetteeRestClientException $e) {
+        setteErrorHandler($e);
+    }
+    die;

--- /dev/null
+++ b/admin/exportScore.csv.php
@@ -1,1 +1,73 @@
+$db = $server->get_db('disclosr-agencies');
+$format = "csv";
+//$format = "json";
+if (isset($_REQUEST['format']))  $format = $_REQUEST['format'];
+setlocale(LC_CTYPE, 'C');
+    $headers = Array();
+$fp = fopen('php://output', 'w');
+if ($fp && $db) {
+    if ($format == "csv") {
+        header('Content-Type: text/csv; charset=utf-8');
+        header('Content-Disposition: attachment; filename="export.score.' . date("c") . '.csv"');
+    }
+    header('Pragma: no-cache');
+    header('Expires: 0');
+    try {
+        $agencies = $db->get_view("score", "score", null, true)->rows;
+        //print_r($agencies);
+        $first = true;
+        if ($format == "json") {
+        echo '"data" : ['.PHP_EOL;
+        }
+        foreach ($agencies as $agency) {
+            $agencyArray = object_to_array($agency->value);
+            if ($first) {
+                $headers  = array_keys($agencyArray);
+if ($format == "csv") {
+        fputcsv($fp, $headers);
+    } else if ($format == "json") {
+        echo '{
+            "labels" : ["' . implode('","', $headers) . '"],'.PHP_EOL;
+    }
+            }
+                      $row = Array();
+                foreach ($headers as $i => $fieldName) {
+                    if (isset($agencyArray[$fieldName])) {
+                        $row[] = $agencyArray[$fieldName];
+                    } else {
+                        $row[] = '';
+                    }
+                }
+                if ($format == "csv") {
+                    fputcsv($fp, array_values($row));
+                } else if ($format == "json") {
+                    if (!$first) echo ",";
+                    echo '{"data" : [' . implode(",", array_values($row)) . '], "label": "'.$agency->value->name.'", "lines" : { "show" : true }, "points" : { "show" : true }}'.PHP_EOL;
+                }
+                 $first = false;
+            }
+        if ($format == "json") {
+        echo ']
+            }'.PHP_EOL;
+        }
+    } catch (SetteeRestClientException $e) {
+        setteErrorHandler($e);
+    }
+    die;

--- /dev/null
+++ b/admin/genericAgencyFixer.php
@@ -1,1 +1,44 @@
+require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
+setlocale(LC_CTYPE, 'C');
+$db = $server->get_db('disclosr-agencies');
+try {
+    $agencies = $db->get_view("app", "byCanonicalName", null, true)->rows;
+    //print_r($rows);
+    foreach ($agencies as $agency) {
+        //echo $agency->value->name . " ".$agency->value->website."<br />\n";
+         // print_r($agency);
+        //hasRestricitiveLicence"	hasRestrictiveLicense -> has Restrictive Licence
+        // "hasYoutube" -> Tube
+        // "comment" -> "comments"
+        if (!isset($agency->value->metaTags) && isset($agency->value->website)) {
+                echo $agency->value->name . " ".$agency->value->website."<br />\n";
+            $agency->value->metaTags = Array();
+            $request = Requests::get($agency->value->website);
+            $html = phpQuery::newDocumentHTML($request->body);
+            phpQuery::selectDocument($html);
+            foreach (pq('meta')->elements as $meta) {
+                $tagName = $meta->getAttribute('name');;
+                $content = $meta->getAttribute('content');
+                if ($tagName != "") {
+echo "$tagName == $content <br>\n";
+                 $agency->value->metaTags[$tagName] = $content;
+                }
+            }
+            //print_r($agency->value->metaTags);
+            $db->save($agency->value);
+            echo "<hr>";
+            flush();
+        }
+    }
+} catch (SetteeRestClientException $e) {
+    setteErrorHandler($e);

--- a/admin/importAPSCEmployees.php
+++ b/admin/importAPSCEmployees.php
@@ -32,23 +32,35 @@
                 @$sums[$id][$timePeriod] += $data[1];
             } else {
                 echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL;
 foreach ($sums as $id => $sum) {
-    echo $id. "<br>" . PHP_EOL;
+    echo $id . "<br>" . PHP_EOL;
     $doc = $db->get($id);
-   // print_r($doc);
-    if (isset($doc->statistics)) $doc->statistics = Array();
+     echo $doc->name . "<br>" . PHP_EOL;
+    // print_r($doc);
+    $changed = false;
+    if (!isset($doc->statistics)) {
+        $changed = true;
+        $doc->statistics = Array();
+    }
     foreach ($sum as $timePeriod => $value) {
-        $doc->statistics["employees"][$timePeriod] = Array("value"=>$value, "source"=>"http://apsc.gov.au/stateoftheservice/");
+        if (!isset($doc->statistics->employees->$timePeriod->value) 
+                || $doc->statistics->employees->$timePeriod->value != $value) {
+            $changed = true;
+            $doc->statistics["employees"][$timePeriod] = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
+        }
-    $db->save($doc);
+    if ($changed) {
+        $db->save($doc);
+    } else {
+        echo "not changed" . "<br>" . PHP_EOL;
+    }
 // employees: timeperiod, source = apsc state of service, value 

--- /dev/null
+++ b/admin/importAustraliaGovAuGov2.php
@@ -1,1 +1,61 @@
+require_once '../include/common.inc.php';
+$db = $server->get_db('disclosr-agencies');
+$rows = $db->get_view("app", "byName")->rows;
+$nametoid = Array();
+$accounts = Array();
+foreach ($rows as $row) {
+    $nametoid[trim($row->key)] = $row->value;
+function extractCSVAccounts($url, $nameField, $accountField, $filter) {
+    global $accounts, $nametoid;
+    $request = Requests::get($url);
+    $Data = str_getcsv($request->body, "\n"); //parse the rows 
+    $headers = Array();
+    foreach ($Data as $num => $line) {
+        $Row = str_getcsv($line, ",");
+        if ($num == 0) {
+        } else if ($num == 1) {
+            $headers = $Row;
+            //print_r($headers);
+        } else {
+            if (isset($Row[array_search($nameField, $headers)])) {
+                $agencyName = $Row[array_search($nameField, $headers)];
+                if (!$filter || $Row[array_search("State", $headers)] == "NAT") {
+                    if (!in_array(trim($agencyName), array_keys($nametoid))) {
+                        echo "$agencyName missing" . PHP_EOL;
+                    } else {
+                        // echo $Row[array_search($nameField, $headers)] . PHP_EOL;
+                    }
+                }
+            } else {
+                //echo "error finding agency" . $line . PHP_EOL;
+            }
+        }
+    }
+// http://agimo.govspace.gov.au/page/gov2register/
+// twitter
+//extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Agency/Body/Event", "", true);
+// RSS
+// https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=csv
+// facebook 
+ * http://australia.gov.au/news-and-media/media-release-rss-feeds
+ * http://australia.gov.au/news-and-media/social-media/blogs
+ * http://australia.gov.au/news-and-media/social-media/twitter
+ * http://australia.gov.au/news-and-media/social-media/facebook
+ * http://australia.gov.au/news-and-media/social-media/youtube
+ * http://australia.gov.au/news-and-media/social-media/flickr
+ * http://australia.gov.au/news-and-media/social-media/apps http://www.harmony.gov.au/get-involved/app-downloads.htm http://www.em.gov.au/Resources/Pages/Before-the-Storm-phone-game.aspx
+ * http://australia.gov.au/news-and-media/social-media/podcasts
+ */

--- a/admin/importDirectoryUUIDs.php
+++ b/admin/importDirectoryUUIDs.php
@@ -1,13 +1,37 @@
 // http://gold.gov.au/reports/department/index.xml
 require_once '../include/common.inc.php';
-try {
-    $server->create_db('disclosr-agencies');
-} catch (SetteeRestClientException $e) {
-    setteErrorHandler($e);
+$db = $server->get_db('disclosr-agencies');
+$rows = $db->get_view("app", "byName")->rows;
+$nametoid = Array();
+$sums = Array();
+foreach ($rows as $row) {
+    $nametoid[trim($row->key)] = $row->value;
-$db = $server->get_db('disclosr-agencies');
+if (file_exists('index.xml')) {
+    $xml = simplexml_load_file('index.xml');
+    foreach ($xml as $agency) {
+        $names = Array();
+        $names[] = "".$agency->name;
+        if (isset($agency->shortName)) {
+             $names[] = "".$agency->shortName;
+        }
+       foreach ($names as $name) {
+           if (!in_array($name, array_keys($nametoid))) {
+               echo "$name missing".PHP_EOL;
+               print_r($names);
+               echo $agency->dn;
+               echo PHP_EOL;
+               echo PHP_EOL;
+           }
+       }
+    }
+} else {
+    exit('Failed to open test.xml.');

--- a/admin/importGov2RegisterRSSFacebookTwitter.php
+++ b/admin/importGov2RegisterRSSFacebookTwitter.php
@@ -1,16 +1,100 @@
 require_once '../include/common.inc.php';
-try {
-    $server->create_db('disclosr-agencies');
-} catch (SetteeRestClientException $e) {
-    setteErrorHandler($e);
+require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
+$db = $server->get_db('disclosr-agencies');
+$rows = $db->get_view("app", "byName")->rows;
+$nametoid = Array();
+$accounts = Array();
+foreach ($rows as $row) {
+    $nametoid[trim($row->key)] = $row->value;
-$db = $server->get_db('disclosr-agencies');
-// twitter https://docs.google.com/spreadsheet/fm?id=tsJVd9EYoAjbl014y3qMgWg.03918275400592898296.8568379511161083736&hl=en&fmcmd=5&gid=0
-// RSS https://docs.google.com/spreadsheet/fm?id=tbqjwIDHKHiVYF_glJ93GgA.03918275400592898296.8789688748524615194&authkey=CJDP-uQG&hl=en_GB&fmcmd=5&gid=0
-// facebook https://docs.google.com/spreadsheet/fm?id=tkcqoo9wrgzNWmoANuVhsBw.03918275400592898296.3040387705062056060&authkey=CKzl7r0I&hl=en_GB&fmcmd=5&gid=0
+function extractHTMLAccounts($url, $accountType) {
+    global $accounts, $nametoid;
+    $request = Requests::get($url);
+    $doc = phpQuery::newDocumentHTML($request->body);
+    phpQuery::selectDocument($doc);
+    foreach (pq('tr')->elements as $tr) {
+        //echo $tr->nodeValue.PHP_EOL;
+        $agency = "";
+        $url = "";
+        foreach ($tr->childNodes as $td) {
+            $class = $td->getAttribute("class");
+            //echo "cccc $class ".$td->nodeValue.PHP_EOL;
+            if ($class == "s11" || $class == "s10" || $class == "s7") {
+                $agency = $td->nodeValue;
+            } else if ($class == "s6" || $class == "s9") {
+                $url = $td->nodeValue;
+                foreach ($td->childNodes as $a) {
+                    $href = $a->getAttribute("href");
+                    if ($href != "") {
+                        $url = $href;
+                    }
+                }
+            }
+        }
+        if ($agency != "" && $url != "") {
+            if (!in_array(trim($agency), array_keys($nametoid))) {
+                echo trim($agency) . " missing" . PHP_EOL;
+            } else {
+                //   echo $agency." = ".$url.PHP_EOL;
+                $accounts[$nametoid[trim($agency)]][$accountType][] = $url;
+            }
+        }
+    }
+function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) {
+    global $accounts, $nametoid;
+    $request = Requests::get($url);
+    $Data = str_getcsv($request->body, "\n"); //parse the rows 
+    $headers = Array();
+    foreach ($Data as $num => $line) {
+        $Row = str_getcsv($line, ",", '"');
+        if ($num == 0) {
+        } else if ($num == 1) {
+            $headers = $Row;
+            //print_r($headers);
+        } else {
+            if (isset($Row[array_search($nameField, $headers)])) {
+                $agencyName = $Row[array_search($nameField, $headers)];
+                if (!$filter || $Row[array_search("State", $headers)] == "NAT") {
+                    if (!in_array(trim($agencyName), array_keys($nametoid))) {
+                        echo trim($agencyName) . " missing" . PHP_EOL;
+                    } else {
+                        // echo $Row[array_search($nameField, $headers)] . PHP_EOL;
+                        $accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)];
+                    }
+                }
+            } else {
+                //echo "error finding agency" . $line . PHP_EOL;
+            }
+        }
+    }
+// http://agimo.govspace.gov.au/page/gov2register/
+// twitter
+extractCSVAccounts("https://docs.google.com/spreadsheet/pub?key=0Ap1exl80wB8OdHNKVmQ5RVlvQWpibDAxNHkzcU1nV2c&single=true&gid=0&output=csv", "Twitter", "Agency/Body/Event", "", true);
+// RSS
+extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS");
+// facebook 
+extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook");
+foreach ($accounts as $id => $accountTypes) {
+    echo $id . "<br>" . PHP_EOL;
+    $doc = object_to_array($db->get($id));
+    // print_r($doc);
+    foreach ($accountTypes as $accountType => $accounts) {
+        if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) {
+            $doc["has" . $accountType] = Array();
+        }
+        $doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts));
+    }
+    $db->save($doc);

--- /dev/null
+++ b/admin/importRTKbodies.php
@@ -1,1 +1,56 @@
+require_once '../include/common.inc.php';
+$db = $server->get_db('disclosr-agencies');
+$rows = $db->get_view("app", "byName")->rows;
+$nametoid = Array();
+$accounts = Array();
+foreach ($rows as $row) {
+    $nametoid[trim($row->key)] = $row->value;
+function extractCSVAccounts($url, $nameField, $accountField, $filter) {
+    global $accounts, $nametoid;
+    $request = Requests::get($url);
+    echo $url;
+    $Data = str_getcsv($request->body, "\n"); //parse the rows 
+    $headers = Array();
+    foreach ($Data as $num => $line) {
+        $Row = str_getcsv($line, ",");
+        if ($num == 0) {
+            $headers = $Row;
+            print_r($headers);
+        } else {
+            if (isset($Row[array_search($nameField, $headers)])) {
+                $agencyName = $Row[array_search($nameField, $headers)];
+                    if (!in_array(trim($agencyName), array_keys($nametoid))) {
+                        echo "$agencyName missing" . PHP_EOL;
+                    } else {
+                        echo $Row[array_search($nameField, $headers)] . PHP_EOL;
+                             $accounts[$nametoid[trim($agencyName)]]["rtkURLs"][$agencyName] = 'http://www.righttoknow.org.au/body/'.$Row[array_search($accountField, $headers)];
+                    }
+               } else {
+                echo "error finding any agency" . $line . PHP_EOL;
+            }
+        }
+    }
+extractCSVAccounts("http://www.righttoknow.org.au/body/all-authorities.csv","Agency","URL name");
+/* foreach ($accounts as $id => $accountTypes) {
+    echo $id . "<br>" . PHP_EOL;
+    $doc =