Parse complete directory.gov.au export into gexf
Former-commit-id: 7be06add579fbd14042ea9084bc738168a62dcdf
--- /dev/null
+++ b/admin/directory.gexf.php
@@ -1,1 +1,59 @@
+<?php
+$nodes = Array(Array("id" => "gov", "label" => "Federal Government"));
+$edges = Array();
+
+function addEdge($source, $target) {
+ global $edges;
+ $edges[] = Array("id" => md5($source . $target), "source" => $source, "target" => $target);
+}
+
+function addNode($id, $label, $pid) {
+ global $nodes;
+ $nodes[] = Array("id" => $id, "label" => $label , "pid" => $pid);
+}
+
+function addChildren($parentID, $parentXML) {
+ foreach ($parentXML as $childXML) {
+
+ if ($childXML->getName() == "organization" || $childXML->getName() == "organizationalUnit" || $childXML->getName() == "person") {
+ $attr = $childXML->attributes();
+ $id = $attr['UUID'];
+ if ($childXML->getName() == "organization" || $childXML->getName() == "organizationalUnit") {
+
+ $label = $childXML->name;
+ } else if ($childXML->getName() == "person") {
+ $label = $childXML->fullName;
+ }
+ addNode($id, $label, $parentID);
+ addEdge($id, $parentID);
+ addChildren($id, $childXML);
+ }
+ }
+}
+
+if (file_exists('directoryexport.xml')) {
+ $xml = simplexml_load_file('directoryexport.xml');
+
+ addChildren("gov", $xml);
+} else {
+ exit('Failed to open directoryexport.xml');
+}
+ header('Content-Type: application/gexf+xml');
+echo '<?xml version="1.0" encoding="UTF-8"?>
+<gexf xmlns="http://www.gexf.net/1.2draft" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd" version="1.2">
+ <graph mode="static" defaultedgetype="directed">
+ <nodes>';
+foreach ($nodes as $node) {
+ echo ' <node id="' . $node['id'] . '" label="' . htmlentities($node['label'],ENT_XML1) . '" ' . (isset($node['pid']) ? 'pid="' . $node['pid'] . '"' : "") . ' />';
+}
+echo '</nodes>
+ <edges>';
+foreach ($edges as $edge) {
+ echo ' <edge id="' . $edge['id'] . '" source="' . $edge['source'] . '" target="' . $edge['target'] . '" />';
+}
+echo '</edges>
+ </graph>
+</gexf>';
+?>
+
--- /dev/null
+++ b/admin/directoryexport.xml
--- a/admin/importAPSCEmployees.php
+++ b/admin/importAPSCEmployees.php
@@ -32,23 +32,35 @@
@$sums[$id][$timePeriod] += $data[1];
} else {
echo "<br>ERROR NAME MISSING FROM ID LIST<br><bR>" . PHP_EOL;
-
+
die();
-
}
}
fclose($handle);
}
}
foreach ($sums as $id => $sum) {
- echo $id. "<br>" . PHP_EOL;
+ echo $id . "<br>" . PHP_EOL;
$doc = $db->get($id);
- // print_r($doc);
- if (isset($doc->statistics)) $doc->statistics = Array();
+ echo $doc->name . "<br>" . PHP_EOL;
+ // print_r($doc);
+ $changed = false;
+ if (!isset($doc->statistics)) {
+ $changed = true;
+ $doc->statistics = Array();
+ }
foreach ($sum as $timePeriod => $value) {
- $doc->statistics["employees"][$timePeriod] = Array("value"=>$value, "source"=>"http://apsc.gov.au/stateoftheservice/");
+ if (!isset($doc->statistics->employees->$timePeriod->value)
+ || $doc->statistics->employees->$timePeriod->value != $value) {
+ $changed = true;
+ $doc->statistics["employees"][$timePeriod] = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/");
+ }
}
- $db->save($doc);
+ if ($changed) {
+ $db->save($doc);
+ } else {
+ echo "not changed" . "<br>" . PHP_EOL;
+ }
}
// employees: timeperiod, source = apsc state of service, value
?>
--- /dev/null
+++ b/admin/metadata.py
@@ -1,1 +1,22 @@
+#http://packages.python.org/CouchDB/client.html
+import couchdb
+from BeautifulSoup import BeautifulSoup
+couch = couchdb.Server('http://127.0.0.1:5984/')
+
+# select database
+docsdb = couch['disclosr-documents']
+
+for row in docsdb.view('app/getMetadataExtractRequired'):
+ print row.id
+ html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read()
+ metadata = []
+ # http://www.crummy.com/software/BeautifulSoup/documentation.html
+ soup = BeautifulSoup(html)
+metatags = soup.meta
+ for metatag in metatags:
+ print metatag['name']
+ doc = docsdb.get(row.id)
+ //doc['metadata'] = metadata
+ //docsdb.save(doc)
+
--- a/bubbletree.php
+++ b/bubbletree.php
@@ -24,7 +24,7 @@
$color = new Lux_Color();
$portfolios = Array();
-
+$total = 0;
$db = $server->get_db('disclosr-agencies');
try {
$rows = $db->get_view("app", "byDeptStateName", null, true)->rows;
@@ -42,10 +42,15 @@
foreach ($rows as $row) {
$employees = 0;
$portfolioid = 0;
- if ($row->value->employees) $employees = $row->value->employees;
+ if (isset($row->value->employees)) $employees = $row->value->employees;
if (isset($row->value->statistics->employees)) {
$agencyEmployeesArray = object_to_array($row->value->statistics->employees);
+if (isset($agencyEmployeesArray["2010-2011"]["value"])) {
$employees = $agencyEmployeesArray["2010-2011"]["value"];
+} else {
+ // get last year that is recorded? throw error?
+ continue;
+}
}
if (!($employees > 0)) $employees =0;
if (isset($row->value->parentOrg)) $portfolioid = $row->value->parentOrg;
--- a/include/couchdb.inc.php
+++ b/include/couchdb.inc.php
@@ -5,28 +5,28 @@
require ($basePath . 'couchdb/settee/src/settee.php');
function createDocumentsDesignDoc() {
- /*"views": {
- "web_server": {
- "map": "function(doc) {\n emit(doc.web_server, 1);\n}",
- "reduce": "function (key, values, rereduce) {\n return sum(values);\n}"
- },
- "byAgency": {
- "map": "function(doc) {\n emit(doc.agencyID, 1);\n}",
- "reduce": "function (key, values, rereduce) {\n return sum(values);\n}"
- },
- "byURL": {
- "map": "function(doc) {\n emit(doc.url, doc);\n}"
- },
- "agency": {
- "map": "function(doc) {\n emit(doc.agencyID, doc);\n}"
- },
- "byWebServer": {
- "map": "function(doc) {\n emit(doc.web_server, doc);\n}"
- },
- "getValidationRequired": {
- "map": "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}"
- }
- }*/
+ /* "views": {
+ "web_server": {
+ "map": "function(doc) {\n emit(doc.web_server, 1);\n}",
+ "reduce": "function (key, values, rereduce) {\n return sum(values);\n}"
+ },
+ "byAgency": {
+ "map": "function(doc) {\n emit(doc.agencyID, 1);\n}",
+ "reduce": "function (key, values, rereduce) {\n return sum(values);\n}"
+ },
+ "byURL": {
+ "map": "function(doc) {\n emit(doc.url, doc);\n}"
+ },
+ "agency": {
+ "map": "function(doc) {\n emit(doc.agencyID, doc);\n}"
+ },
+ "byWebServer": {
+ "map": "function(doc) {\n emit(doc.web_server, doc);\n}"
+ },
+ "getValidationRequired": {
+ "map": "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}"
+ }
+ } */
}
function createAgencyDesignDoc() {
@@ -95,7 +95,7 @@
}
}";
// http://stackoverflow.com/questions/646628/javascript-startswith
-$obj->views->score->map = 'if(!String.prototype.startsWith){
+ $obj->views->score->map = 'if(!String.prototype.startsWith){
String.prototype.startsWith = function (str) {
return !this.indexOf(str);
}
@@ -119,7 +119,7 @@
emit(count+doc._id, {id:doc._id, name: doc.name, score:count, orgType: doc.orgType, portfolio:portfolio});
}
}';
- $obj->views->scoreHas->map = 'if(!String.prototype.startsWith){
+ $obj->views->scoreHas->map = 'if(!String.prototype.startsWith){
String.prototype.startsWith = function (str) {
return !this.indexOf(str);
}
@@ -142,7 +142,7 @@
$obj->views->scoreHas->reduce = 'function (key, values, rereduce) {
return sum(values);
}';
- $obj->views->fieldNames->map = '
+ $obj->views->fieldNames->map = '
function(doc) {
for(var propName in doc) {
emit(propName, doc._id);
@@ -157,16 +157,16 @@
}
if (php_uname('n') == "vanille") {
-$serverAddr = 'http://192.168.178.21:5984/';
-
+ $serverAddr = 'http://192.168.178.21:5984/';
} else
if (php_uname('n') == "KYUUBEY") {
- $serverAddr = 'http://192.168.1.148:5984/';
+ $serverAddr = 'http://127.0.0.1:5984/';
} else {
$serverAddr = 'http://127.0.0.1:5984/';
}
- $server = new SetteeServer($serverAddr);
+$server = new SetteeServer($serverAddr);
+
function setteErrorHandler($e) {
echo $e->getMessage() . "<br>" . PHP_EOL;
}
--- a/webserver.php
+++ b/webserver.php
@@ -20,7 +20,15 @@
$website = $docsdb->get(md5($row->value->website));
$serverParts = explode(" ",$website->web_server);
echo "<td>" . $serverParts[0] . "</td>";
- echo "<td>" . $website->mime_type . "</td>";
+ if (!isset($website->validation)) {
+ echo "<td>?</td>";
+ } else {
+ if ($website->validation == "") {
+ echo "<td>No error</td>";
+ } else {
+ echo "<td><pre>" . str_replace("<", "<", $website->validation) . "</pre></td>";
+ }
+ }
} catch (SetteeRestClientException $e) {
// setteErrorHandler($e);
}