Add validation script
Add validation script


Former-commit-id: 8f0da9f23f1e65c57b3e158ef358630a33f10141

--- a/admin/exportEmployees.csv.php
+++ b/admin/exportEmployees.csv.php
@@ -22,6 +22,7 @@
         if (isset($row->value->statistics->employees)) {
 
             $headers = array_unique(array_merge($headers, array_keys(object_to_array($row->value->statistics->employees))));
+
         }
     }
 } catch (SetteeRestClientException $e) {

--- /dev/null
+++ b/admin/exportScore.csv.php
@@ -1,1 +1,73 @@
+<?php
 
+include_once("../include/common.inc.php");
+
+$db = $server->get_db('disclosr-agencies');
+$format = "csv";
+//$format = "json";
+if (isset($_REQUEST['format']))  $format = $_REQUEST['format'];
+
+setlocale(LC_CTYPE, 'C');
+
+    $headers = Array();
+
+$fp = fopen('php://output', 'w');
+if ($fp && $db) {
+    if ($format == "csv") {
+        header('Content-Type: text/csv; charset=utf-8');
+        header('Content-Disposition: attachment; filename="export.score.' . date("c") . '.csv"');
+    }
+    header('Pragma: no-cache');
+    header('Expires: 0');
+    
+    try {
+        $agencies = $db->get_view("score", "score", null, true)->rows;
+        //print_r($agencies);
+        $first = true;
+        if ($format == "json") {
+        echo '"data" : ['.PHP_EOL;
+        
+        }
+        foreach ($agencies as $agency) {
+            $agencyArray = object_to_array($agency->value);
+            if ($first) {
+                $headers  = array_keys($agencyArray);
+if ($format == "csv") {
+        fputcsv($fp, $headers);
+    } else if ($format == "json") {
+        echo '{
+            "labels" : ["' . implode('","', $headers) . '"],'.PHP_EOL;
+    }
+            }
+                      $row = Array();
+            
+                foreach ($headers as $i => $fieldName) {
+                    if (isset($agencyArray[$fieldName])) {
+                        $row[] = $agencyArray[$fieldName];
+                    } else {
+                        $row[] = '';
+                    }
+                }
+                if ($format == "csv") {
+                    fputcsv($fp, array_values($row));
+                } else if ($format == "json") {
+                    if (!$first) echo ",";
+                    echo '{"data" : [' . implode(",", array_values($row)) . '], "label": "'.$agency->value->name.'", "lines" : { "show" : true }, "points" : { "show" : true }}'.PHP_EOL;
+                   
+                }
+                 $first = false;
+            }
+        
+        if ($format == "json") {
+        echo ']
+            }'.PHP_EOL;
+        
+        }
+    } catch (SetteeRestClientException $e) {
+        setteErrorHandler($e);
+    }
+
+    die;
+}
+?>
+

--- a/admin/importGov2RegisterRSSFacebookTwitter.php
+++ b/admin/importGov2RegisterRSSFacebookTwitter.php
@@ -1,7 +1,7 @@
 <?php
 
 require_once '../include/common.inc.php';
-require($basePath.'lib/phpquery/phpQuery/phpQuery.php');
+require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
 
 $db = $server->get_db('disclosr-agencies');
 $rows = $db->get_view("app", "byName")->rows;
@@ -10,41 +10,40 @@
 foreach ($rows as $row) {
     $nametoid[trim($row->key)] = $row->value;
 }
+
 function extractHTMLAccounts($url, $accountType) {
     global $accounts, $nametoid;
     $request = Requests::get($url);
     $doc = phpQuery::newDocumentHTML($request->body);
     phpQuery::selectDocument($doc);
     foreach (pq('tr')->elements as $tr) {
-       //echo $tr->nodeValue.PHP_EOL;
-       $agency = "";
-       $url = "";
-       foreach ($tr->childNodes as $td) {
-           $class = $td->getAttribute("class");
-           //echo "cccc $class ".$td->nodeValue.PHP_EOL;
-           if ($class == "s11" || $class == "s10" || $class == "s7") {
-               $agency = $td->nodeValue;
-           } else if ($class == "s6" || $class == "s9"){
-               $url = $td->nodeValue;
-               foreach($td->childNodes as $a) {
-                   $href = $a->getAttribute("href");
-                   if ($href != "") {
-                       $url = $href;
-                   }
-               }
-           }
-       }
-       if ($agency != "" && $url != "") {
-           if (!in_array(trim($agency), array_keys($nametoid))) {
-                        echo trim($agency)." missing" . PHP_EOL;
-                    } else {
-                     //   echo $agency." = ".$url.PHP_EOL;
-                        $accounts[$nametoid[trim($agency)]][$accountType][] = $url;
+        //echo $tr->nodeValue.PHP_EOL;
+        $agency = "";
+        $url = "";
+        foreach ($tr->childNodes as $td) {
+            $class = $td->getAttribute("class");
+            //echo "cccc $class ".$td->nodeValue.PHP_EOL;
+            if ($class == "s11" || $class == "s10" || $class == "s7") {
+                $agency = $td->nodeValue;
+            } else if ($class == "s6" || $class == "s9") {
+                $url = $td->nodeValue;
+                foreach ($td->childNodes as $a) {
+                    $href = $a->getAttribute("href");
+                    if ($href != "") {
+                        $url = $href;
                     }
-       
-       }
+                }
+            }
+        }
+        if ($agency != "" && $url != "") {
+            if (!in_array(trim($agency), array_keys($nametoid))) {
+                echo trim($agency) . " missing" . PHP_EOL;
+            } else {
+                //   echo $agency." = ".$url.PHP_EOL;
+                $accounts[$nametoid[trim($agency)]][$accountType][] = $url;
+            }
+        }
     }
-    
 }
 
 function extractCSVAccounts($url, $accountType, $nameField, $accountField, $filter) {
@@ -53,7 +52,7 @@
     $Data = str_getcsv($request->body, "\n"); //parse the rows 
     $headers = Array();
     foreach ($Data as $num => $line) {
-        $Row = str_getcsv($line, ",",'"');
+        $Row = str_getcsv($line, ",", '"');
         if ($num == 0) {
             
         } else if ($num == 1) {
@@ -64,7 +63,7 @@
                 $agencyName = $Row[array_search($nameField, $headers)];
                 if (!$filter || $Row[array_search("State", $headers)] == "NAT") {
                     if (!in_array(trim($agencyName), array_keys($nametoid))) {
-                        echo trim($agencyName)." missing" . PHP_EOL;
+                        echo trim($agencyName) . " missing" . PHP_EOL;
                     } else {
                         // echo $Row[array_search($nameField, $headers)] . PHP_EOL;
                         $accounts[$nametoid[trim($agencyName)]][$accountType][] = $Row[array_search($accountField, $headers)];
@@ -84,6 +83,18 @@
 extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGJxandJREhLSGlWWUZfZ2xKOTNHZ0E&output=html", "RSS");
 // facebook 
 extractHTMLAccounts("https://docs.google.com/spreadsheet/pub?hl=en_GB&hl=en_GB&key=0Ah41IAK0HzSTdGtjcW9vOXdyZ3pOV21vQU51VmhzQnc&single=true&gid=0&output=html", "Facebook");
+foreach ($accounts as $id => $accountTypes) {
+    echo $id . "<br>" . PHP_EOL;
+    $doc = object_to_array($db->get($id));
+    // print_r($doc);
 
+    foreach ($accountTypes as $accountType => $accounts) {
+        if (!isset($doc["has" . $accountType]) || !is_array($doc["has" . $accountType])) {
+            $doc["has" . $accountType] = Array();
+        }
+        $doc["has" . $accountType] = array_unique(array_merge($doc["has" . $accountType], $accounts));
+    }
+    $db->save($doc);
+}
 ?>
 

--- /dev/null
+++ b/admin/validation.py
@@ -1,1 +1,30 @@
+#http://packages.python.org/CouchDB/client.html
+import couchdb
+import json
+import pprint
+import re
+from tidylib import tidy_document
 
+couch = couchdb.Server('http://127.0.0.1:5984/')
+
+# select database
+docsdb = couch['disclosr-documents']
+
+def f(x):
+	invalid = re.compile(r"ensure|testing|flicker|updating|longdesc|Accessibility Checks|not recognized")
+	valid = re.compile(r"line")
+	return (not invalid.search(x)) and valid.search(x) and x != ''
+
+for row in docsdb.view('app/getValidationRequired'): 
+    print row.id
+    html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read()
+    #print html
+    document, errors = tidy_document(html,options={'accessibility-check':1,'show-warnings':0,'markup':0},keep_doc=True)
+    #http://www.aprompt.ca/Tidy/accessibilitychecks.html
+    #print document
+    errors = '\n'.join(filter(f,errors.split('\n')))
+    #print errors
+    doc = docsdb.get(row.id)
+    doc['validation'] = errors
+    docsdb.save(doc)
+

directory:a/couchdb/settee -> directory:b/couchdb/settee
--- a/couchdb/settee
+++ b/couchdb/settee

--- a/getAgency.php
+++ b/getAgency.php
@@ -86,9 +86,14 @@
         } else if ($schemas['agency']['properties'][$defaultField]['type'] == "array") {
             if (is_array($row[$defaultField])) {
                 $row[$defaultField][] = "";
+                $row[$defaultField][] = "";
+                $row[$defaultField][] = "";
             } else {
                 $value = $row[$defaultField];
                 $row[$defaultField] = Array($value);
+                $row[$defaultField][] = "";
+                $row[$defaultField][] = "";
+                
             }
         }
     }
@@ -102,7 +107,7 @@
 // by name = startkey="Ham"&endkey="Ham\ufff0"
 // edit?
 
-    $row = $db->get($_REQUEST['id']);
+    $obj = $db->get($_REQUEST['id']);
 //print_r($row);
     if (sizeof($_POST) > 0) {
 //print_r($_POST);
@@ -126,17 +131,19 @@
             echo "Edited version was latest version, continue saving";
             $newdoc = $_POST;
             $newdoc['metadata']['lastModified'] = time();
-            $row = $db->save($newdoc);
+            $obj = $db->save($newdoc);
         } else {
             echo "ALERT doc revised by someone else while editing. Document not saved.";
         }
     }
 
     $mode = "edit";
+    $rowArray = object_to_array($obj);
+ksort($rowArray);
     if ($mode == "edit") {
-        $row = addDefaultFields(object_to_array($row));
+        $row = addDefaultFields($rowArray);
     } else {
-        $row = object_to_array($row);
+        $row = $rowArray;
     }
 
     if ($mode == "view") {
@@ -188,14 +195,14 @@
               (isset($row->value->name) && $row->value->name != "" ? $row->value->name : "NO NAME " . $row->value->abn)
               . '</a></li>';
               } */
-            $rows = $db->get_view("app", "byName")->rows;
+            $rows = $db->get_view("app", "byCanonicalName")->rows;
             //print_r($rows);
             echo '<ul>';
             foreach ($rows as $row) {
                 //   print_r($row);
-                echo '<li typeof="schema:GovernmentOrganisation foaf:Organization" about="getAgency.php?id=' . $row->value . '">
-<a href="getAgency.php?id=' . $row->value . '" rel="schema:url foaf:page" property="schema:name foaf:name">' .
-                $row->key
+                echo '<li typeof="schema:GovernmentOrganisation foaf:Organization" about="getAgency.php?id=' . $row->value->_id . '">
+<a href="getAgency.php?id=' . $row->value->_id . '" rel="schema:url foaf:page" property="schema:name foaf:name">' .
+                $row->value->name
                 . '</a></li>';
             }
             echo "</ul>";

--- a/include/couchdb.inc.php
+++ b/include/couchdb.inc.php
@@ -22,7 +22,10 @@
        },
        "byWebServer": {
            "map": "function(doc) {\n  emit(doc.web_server, doc);\n}"
-       }
+       },
+  "getValidationRequired": {
+       "map": "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n  emit(doc._id, doc._attachments);\n}\n}"
+   }
    }*/
 }
 
@@ -92,25 +95,29 @@
   }
 }";
     // http://stackoverflow.com/questions/646628/javascript-startswith
-    $obj->views->scoreHas->map = 'if(!String.prototype.startsWith){
+$obj->views->score->map =  'if(!String.prototype.startsWith){
     String.prototype.startsWith = function (str) {
         return !this.indexOf(str);
     }
 }
-if(!String.prototype.endsWith){
-	String.prototype.endsWith = function(suffix) {
-	    return this.indexOf(suffix, this.length - suffix.length) !== -1;
-	};
-}
+
 function(doc) {
-if (typeof(doc["status"]) == "undefined" || doc["status"] != "suspended") {
-for(var propName in doc) {
-      if(typeof(doc[propName]) != "undefined" && (propName.startsWith("has") || propName.endsWith("URL"))) {
-  	emit(propName, 1);
-	}
-}
-  emit("total", 1);
-  }
+    count = 0;
+    if (doc["status"] != "suspended") {
+        for(var propName in doc) {
+            if(typeof(doc[propName]) != "undefined" && doc[propName] != "") {
+                count++;
+            }
+        }
+        portfolio = doc.parentOrg;
+        if (doc.orgType == "FMA-DepartmentOfState") {
+            portfolio = doc._id;
+        }
+        if (doc.orgType == "Court-Commonwealth" || doc.orgType == "FMA-DepartmentOfParliament") {
+            portfolio = doc.orgType;
+        }
+        emit(count+doc._id, {id:doc._id, name: doc.name, score:count, orgType: doc.orgType, portfolio:portfolio});
+    }
 }';
         $obj->views->scoreHas->map = 'if(!String.prototype.startsWith){
     String.prototype.startsWith = function (str) {
@@ -163,3 +170,4 @@
 function setteErrorHandler($e) {
     echo $e->getMessage() . "<br>" . PHP_EOL;
 }
+

--- a/schemas/agency.json.php
+++ b/schemas/agency.json.php
@@ -17,6 +17,7 @@
         "parentOrg" => Array("type" => "string", "required" => true, "x-title" => "Parent Organisation", "description" => "Parent organisation, usually a department of state"),
         "website" => Array("type" => "string", "required" => true, "x-title" => "Website", "x-property" => "schema:url foaf:homepage", "description" => "Website URL"),
         "abn" => Array("type" => "string", "required" => true, "x-title" => "Australian Business Number", "description" => "ABN from business register"),
+        "employees" => Array("type" => "string", "required" => true, "x-title" => "2010-2011 employees", "description" => "2010-2011 employees"),
         "contractListURL" => Array("type" => "string", "required" => true,  "x-title" => "Contract Listing", "description" => "Departmental and agency contracts, <a href='http://www.aph.gov.au/senate/pubs/standing_orders/d05.htm'>mandated by the Senate</a>" ),
         "budgetURL" => Array("type" => "string", "required" => true,"x-title" => "Budget", "description" => "Portfolio Budget Statements and Portfolio Additional Estimates Statements"),
         "grantsReportingURL" => Array("type" => "string", "required" => true, "x-title" => "Grants Awarded",
@@ -49,6 +50,8 @@
             "items" => Array("type" => "string")),
          "hasRestrictiveLicence" => Array("type" => "array","required" => true, "x-title" => "Has Restrictive Licence", "description" => "Has any page licenced under terms more restrictive than Crown Copyright",
             "items" => Array("type" => "string")),
+         "hasPermissiveLicence" => Array("type" => "array","required" => true, "x-title" => "Has Permissive Licence", "description" => "Has any page licenced under terms more permissive than Crown Copyright but not clear CCBY",
+            "items" => Array("type" => "string")),
            "hasCrownCopyright" => Array("type" => "array", "required" => true, "x-title" => "Has Standard Crown Copyright licence", "description" => "Has any page still licenced under the former Commonwealth Copyright Administration",
             "items" => Array("type" => "string")),
     ),