From: Maxious Date: Thu, 29 Mar 2012 08:04:51 +0000 Subject: Parse complete directory.gov.au export into gexf X-Git-Url: https://maxious.lambdacomplex.org/git/?p=disclosr.git&a=commitdiff&h=24d280f3923b34fc5f7ef1e1fb72e3aaee4e9531 --- Parse complete directory.gov.au export into gexf Former-commit-id: 7be06add579fbd14042ea9084bc738168a62dcdf --- --- /dev/null +++ b/admin/directory.gexf.php @@ -1,1 +1,59 @@ + "gov", "label" => "Federal Government")); +$edges = Array(); + +function addEdge($source, $target) { + global $edges; + $edges[] = Array("id" => md5($source . $target), "source" => $source, "target" => $target); +} + +function addNode($id, $label, $pid) { + global $nodes; + $nodes[] = Array("id" => $id, "label" => $label , "pid" => $pid); +} + +function addChildren($parentID, $parentXML) { + foreach ($parentXML as $childXML) { + + if ($childXML->getName() == "organization" || $childXML->getName() == "organizationalUnit" || $childXML->getName() == "person") { + $attr = $childXML->attributes(); + $id = $attr['UUID']; + if ($childXML->getName() == "organization" || $childXML->getName() == "organizationalUnit") { + + $label = $childXML->name; + } else if ($childXML->getName() == "person") { + $label = $childXML->fullName; + } + addNode($id, $label, $parentID); + addEdge($id, $parentID); + addChildren($id, $childXML); + } + } +} + +if (file_exists('directoryexport.xml')) { + $xml = simplexml_load_file('directoryexport.xml'); + + addChildren("gov", $xml); +} else { + exit('Failed to open directoryexport.xml'); +} + header('Content-Type: application/gexf+xml'); +echo ' + + + '; +foreach ($nodes as $node) { + echo ' '; +} +echo ' + '; +foreach ($edges as $edge) { + echo ' '; +} +echo ' + +'; +?> + --- /dev/null +++ b/admin/directoryexport.xml --- a/admin/importAPSCEmployees.php +++ b/admin/importAPSCEmployees.php @@ -32,23 +32,35 @@ @$sums[$id][$timePeriod] += $data[1]; } else { echo "
ERROR NAME MISSING FROM ID LIST

" . PHP_EOL; - + die(); - } } fclose($handle); } } foreach ($sums as $id => $sum) { - echo $id. "
" . PHP_EOL; + echo $id . "
" . PHP_EOL; $doc = $db->get($id); - // print_r($doc); - if (isset($doc->statistics)) $doc->statistics = Array(); + echo $doc->name . "
" . PHP_EOL; + // print_r($doc); + $changed = false; + if (!isset($doc->statistics)) { + $changed = true; + $doc->statistics = Array(); + } foreach ($sum as $timePeriod => $value) { - $doc->statistics["employees"][$timePeriod] = Array("value"=>$value, "source"=>"http://apsc.gov.au/stateoftheservice/"); + if (!isset($doc->statistics->employees->$timePeriod->value) + || $doc->statistics->employees->$timePeriod->value != $value) { + $changed = true; + $doc->statistics["employees"][$timePeriod] = Array("value" => $value, "source" => "http://apsc.gov.au/stateoftheservice/"); + } } - $db->save($doc); + if ($changed) { + $db->save($doc); + } else { + echo "not changed" . "
" . PHP_EOL; + } } // employees: timeperiod, source = apsc state of service, value ?> --- /dev/null +++ b/admin/metadata.py @@ -1,1 +1,22 @@ +#http://packages.python.org/CouchDB/client.html +import couchdb +from BeautifulSoup import BeautifulSoup +couch = couchdb.Server('http://127.0.0.1:5984/') + +# select database +docsdb = couch['disclosr-documents'] + +for row in docsdb.view('app/getMetadataExtractRequired'): + print row.id + html = docsdb.get_attachment(row.id,row.value.iterkeys().next()).read() + metadata = [] + # http://www.crummy.com/software/BeautifulSoup/documentation.html + soup = BeautifulSoup(html) +metatags = soup.meta + for metatag in metatags: + print metatag['name'] + doc = docsdb.get(row.id) + //doc['metadata'] = metadata + //docsdb.save(doc) + --- a/bubbletree.php +++ b/bubbletree.php @@ -24,7 +24,7 @@ $color = new Lux_Color(); $portfolios = Array(); - +$total = 0; $db = $server->get_db('disclosr-agencies'); try { $rows = $db->get_view("app", "byDeptStateName", null, true)->rows; @@ -42,10 +42,15 @@ foreach ($rows as $row) { $employees = 0; $portfolioid = 0; - if ($row->value->employees) $employees = $row->value->employees; + if (isset($row->value->employees)) $employees = $row->value->employees; if (isset($row->value->statistics->employees)) { $agencyEmployeesArray = object_to_array($row->value->statistics->employees); +if (isset($agencyEmployeesArray["2010-2011"]["value"])) { $employees = $agencyEmployeesArray["2010-2011"]["value"]; +} else { + // get last year that is recorded? throw error? + continue; +} } if (!($employees > 0)) $employees =0; if (isset($row->value->parentOrg)) $portfolioid = $row->value->parentOrg; --- a/include/couchdb.inc.php +++ b/include/couchdb.inc.php @@ -5,28 +5,28 @@ require ($basePath . 'couchdb/settee/src/settee.php'); function createDocumentsDesignDoc() { - /*"views": { - "web_server": { - "map": "function(doc) {\n emit(doc.web_server, 1);\n}", - "reduce": "function (key, values, rereduce) {\n return sum(values);\n}" - }, - "byAgency": { - "map": "function(doc) {\n emit(doc.agencyID, 1);\n}", - "reduce": "function (key, values, rereduce) {\n return sum(values);\n}" - }, - "byURL": { - "map": "function(doc) {\n emit(doc.url, doc);\n}" - }, - "agency": { - "map": "function(doc) {\n emit(doc.agencyID, doc);\n}" - }, - "byWebServer": { - "map": "function(doc) {\n emit(doc.web_server, doc);\n}" - }, - "getValidationRequired": { - "map": "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}" - } - }*/ + /* "views": { + "web_server": { + "map": "function(doc) {\n emit(doc.web_server, 1);\n}", + "reduce": "function (key, values, rereduce) {\n return sum(values);\n}" + }, + "byAgency": { + "map": "function(doc) {\n emit(doc.agencyID, 1);\n}", + "reduce": "function (key, values, rereduce) {\n return sum(values);\n}" + }, + "byURL": { + "map": "function(doc) {\n emit(doc.url, doc);\n}" + }, + "agency": { + "map": "function(doc) {\n emit(doc.agencyID, doc);\n}" + }, + "byWebServer": { + "map": "function(doc) {\n emit(doc.web_server, doc);\n}" + }, + "getValidationRequired": { + "map": "function(doc) {\nif (doc.mime_type == \"text/html\" \n&& typeof(doc.validation) == \"undefined\") {\n emit(doc._id, doc._attachments);\n}\n}" + } + } */ } function createAgencyDesignDoc() { @@ -95,7 +95,7 @@ } }"; // http://stackoverflow.com/questions/646628/javascript-startswith -$obj->views->score->map = 'if(!String.prototype.startsWith){ + $obj->views->score->map = 'if(!String.prototype.startsWith){ String.prototype.startsWith = function (str) { return !this.indexOf(str); } @@ -119,7 +119,7 @@ emit(count+doc._id, {id:doc._id, name: doc.name, score:count, orgType: doc.orgType, portfolio:portfolio}); } }'; - $obj->views->scoreHas->map = 'if(!String.prototype.startsWith){ + $obj->views->scoreHas->map = 'if(!String.prototype.startsWith){ String.prototype.startsWith = function (str) { return !this.indexOf(str); } @@ -142,7 +142,7 @@ $obj->views->scoreHas->reduce = 'function (key, values, rereduce) { return sum(values); }'; - $obj->views->fieldNames->map = ' + $obj->views->fieldNames->map = ' function(doc) { for(var propName in doc) { emit(propName, doc._id); @@ -157,16 +157,16 @@ } if (php_uname('n') == "vanille") { -$serverAddr = 'http://192.168.178.21:5984/'; - + $serverAddr = 'http://192.168.178.21:5984/'; } else if (php_uname('n') == "KYUUBEY") { - $serverAddr = 'http://192.168.1.148:5984/'; + $serverAddr = 'http://127.0.0.1:5984/'; } else { $serverAddr = 'http://127.0.0.1:5984/'; } - $server = new SetteeServer($serverAddr); +$server = new SetteeServer($serverAddr); + function setteErrorHandler($e) { echo $e->getMessage() . "
" . PHP_EOL; } --- a/webserver.php +++ b/webserver.php @@ -20,7 +20,15 @@ $website = $docsdb->get(md5($row->value->website)); $serverParts = explode(" ",$website->web_server); echo "" . $serverParts[0] . ""; - echo "" . $website->mime_type . ""; + if (!isset($website->validation)) { + echo "?"; + } else { + if ($website->validation == "") { + echo "No error"; + } else { + echo "
" . str_replace("<", "<", $website->validation) . "
"; + } + } } catch (SetteeRestClientException $e) { // setteErrorHandler($e); }