From: Alexander Sadleir Date: Tue, 15 Nov 2011 07:50:54 +0000 Subject: Merge branch 'master' of ssh://apples.lambdacomplex.org/git/contractdashboard X-Git-Url: https://maxious.lambdacomplex.org/git/?p=contractdashboard.git&a=commitdiff&h=e7a65556b17654f82887e0597999a58bbde3588d --- Merge branch 'master' of ssh://apples.lambdacomplex.org/git/contractdashboard --- --- a/admin/import.php +++ b/admin/import.php @@ -261,10 +261,10 @@ } // run post import data processing // cn -//include ("linkAmendments.php"); +include ("linkAmendments.php"); include ("updateUNSPSC.php"); // agency -include ("setAgencyStatus.php"); -include ("setAgencyURLABN.php"); +//include ("setAgencyStatus.php"); +//include ("setAgencyURLABN.php"); ?> --- /dev/null +++ b/admin/neo4jimporter/nbactions.xml @@ -1,1 +1,53 @@ + + + + run + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.2:exec + + + runtime + -Xmx4048M -server -classpath %classpath ${packageClassName} + java + + + + debug + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.2:exec + + + runtime + -Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -Xmx4048M -server -classpath %classpath ${packageClassName} + true + java + + + + profile + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.1.1:exec + + + ${profiler.args} -Xmx4048M -server -classpath %classpath ${packageClassName} + profile + ${profiler.java} + + + + run + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.2:exec + + + runtime + -classpath %classpath Importer + java + + + --- a/admin/neo4jimporter/pom.xml +++ b/admin/neo4jimporter/pom.xml @@ -6,8 +6,8 @@ org.neo4j - neo4j-kernel - 1.4 + neo4j + 1.5 postgresql --- a/admin/neo4jimporter/src/main/java/Importer.java +++ b/admin/neo4jimporter/src/main/java/Importer.java @@ -1,3 +1,4 @@ + import java.io.ObjectInputStream.GetField; import java.math.BigInteger; import java.security.MessageDigest; @@ -12,144 +13,147 @@ import java.util.Map; import org.neo4j.graphdb.DynamicRelationshipType; +import org.neo4j.graphdb.GraphDatabaseService; +import org.neo4j.graphdb.Node; +import org.neo4j.graphdb.index.BatchInserterIndex; +import org.neo4j.graphdb.index.BatchInserterIndexProvider; +import org.neo4j.helpers.collection.MapUtil; import org.neo4j.kernel.impl.batchinsert.BatchInserter; import org.neo4j.kernel.impl.batchinsert.BatchInserterImpl; +import org.neo4j.index.impl.lucene.*; public class Importer { - public static void main(String[] argv) { - - Map props = new HashMap(); - props.put("neostore.nodestore.db.mapped_memory", "22000000"); // - props.put("neostore.relationshipstore.db.mapped_memory", "22000000"); // - // create the batch inserter - BatchInserter inserter = new - BatchInserterImpl("neo4j-db/",props - ); - + public static void main(String[] argv) { + BatchInserter inserter = new BatchInserterImpl("target/neo4jdb-batchinsert"); + BatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(inserter); + BatchInserterIndex labels = indexProvider.nodeIndex("labels", MapUtil.stringMap("type", "exact")); + labels.setCacheCapacity("Label", 100000); - - - System.out.println("-------- PostgreSQL " - + "JDBC Connection Testing ------------"); - try { + System.out.println("-------- PostgreSQL " + + "JDBC Connection Testing ------------"); - Class.forName("org.postgresql.Driver"); + try { - } catch (ClassNotFoundException e) { + Class.forName("org.postgresql.Driver"); - System.out.println("Where is your PostgreSQL JDBC Driver? " - + "Include in your library path!"); - e.printStackTrace(); + } catch (ClassNotFoundException e) { - } + System.out.println("Where is your PostgreSQL JDBC Driver? " + + "Include in your library path!"); + e.printStackTrace(); - System.out.println("PostgreSQL JDBC Driver Registered!"); + } - Connection conn = null; + System.out.println("PostgreSQL JDBC Driver Registered!"); - try { + Connection conn = null; - conn = DriverManager.getConnection( - "jdbc:postgresql://127.0.0.1:5432/contractDashboard", - "postgres", "snmc"); + try { - } catch (SQLException e) { + conn = DriverManager.getConnection( + "jdbc:postgresql://127.0.0.1:5432/contractDashboard", + "postgres", "snmc"); - System.out.println("Connection Failed! Check output console"); - e.printStackTrace(); + } catch (SQLException e) { - } + System.out.println("Connection Failed! Check output console"); + e.printStackTrace(); - if (conn != null) { - System.out.println("You made it, take control your database now!"); - } else { - System.out.println("Failed to make connection!"); - } - try { - // Print all warnings - for (SQLWarning warn = conn.getWarnings(); warn != null; warn = warn - .getNextWarning()) { - System.out.println("SQL Warning:"); - System.out.println("State : " + warn.getSQLState()); - System.out.println("Message: " + warn.getMessage()); - System.out.println("Error : " + warn.getErrorCode()); - } + } - // Get a statement from the connection - Statement stmt = conn.createStatement(); + if (conn != null) { + System.out.println("You made it, take control your database now!"); + } else { + System.out.println("Failed to make connection!"); + } + try { + // Print all warnings + for (SQLWarning warn = conn.getWarnings(); warn != null; warn = warn.getNextWarning()) { + System.out.println("SQL Warning:"); + System.out.println("State : " + warn.getSQLState()); + System.out.println("Message: " + warn.getMessage()); + System.out.println("Error : " + warn.getErrorCode()); + } - // Execute the query - ResultSet rs = stmt.executeQuery("SELECT distinct contractnotice.\"agencyName\", contractnotice.\"supplierABN\", contractnotice.\"supplierName\" FROM public.contractnotice limit 300;"); -String previousAgency = ""; + // Get a statement from the connection + Statement stmt = conn.createStatement(); - // Loop through the result set - while (rs.next()) { - long supplier,agency; - agency = doHash(rs.getString("agencyName")); - if (rs.getString("agencyName") != previousAgency) { - if (!inserter.nodeExists(agency)) { - Map properties = new HashMap(); - properties.put("Label", rs.getString("agencyName")); - inserter.createNode(agency, properties); - } - } - if (rs.getString("supplierABN") != "0") { - supplier = doHash(rs.getString("supplierABN")); - } else { - supplier = doHash(rs.getString("supplierName")); - } - // inject some data - if (!inserter.nodeExists(supplier)) { - Map properties = new HashMap(); - - properties.put("Label", rs.getString("supplierName")); - inserter.createNode(supplier, properties); - } - - inserter.createRelationship(agency, supplier, - DynamicRelationshipType.withName("KNOWS"), null); - - } - // Close the result set, statement and the connection - rs.close(); - stmt.close(); - conn.close(); - } catch (SQLException se) { - System.out.println("SQL Exception:"); - - // Loop through the SQL Exceptions - while (se != null) { - System.out.println("State : " + se.getSQLState()); - System.out.println("Message: " + se.getMessage()); - System.out.println("Error : " + se.getErrorCode()); - - se = se.getNextException(); - } - } catch (Exception e) { - System.out.println(e); - } - // shutdown, makes sure all changes are written to disk - inserter.shutdown(); - } - - static long doHash(String input) { - MessageDigest m; - try { - m = MessageDigest.getInstance("MD5"); - m.reset(); - m.update(input.getBytes()); - byte[] digest = m.digest(); - return new BigInteger(1, digest).longValue(); - } catch (NoSuchAlgorithmException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - return 0; - - } + // Execute the query + ResultSet rs = stmt.executeQuery("SELECT contractnotice.\"agencyName\", " + + " contractnotice.\"supplierABN\",contractnotice.\"supplierName\",sum(value) as sum " + + "FROM public.contractnotice where contractnotice.\"agencyName\" != 'Department of Defence'" + + " AND contractnotice.\"agencyName\" != 'Defence Materiel Organisation' GROUP BY contractnotice.\"agencyName\", " + + " contractnotice.\"supplierABN\",contractnotice.\"supplierName\""); + String previousAgency = ""; +GraphDatabaseService gds = inserter.getGraphDbService(); +HashMap supplierIDs = new HashMap(); +HashMap agencyIDs = new HashMap(); + + // Loop through the result set + while (rs.next()) { + long supplierID, agencyID; + String supplierKey; + if (agencyIDs.get(rs.getString("agencyName")) == null) { + Node myNode = gds.createNode(); + myNode.setProperty("Label", rs.getString("agencyName")); + myNode.setProperty("type", "agency"); + agencyIDs.put(rs.getString("agencyName"), myNode.getId()); + if (myNode.getId() %100 == 0) { + System.out.println("Agency "+myNode.getId()); +} + } + agencyID = agencyIDs.get(rs.getString("agencyName")); + + + if (rs.getString("supplierABN") != "0" && rs.getString("supplierABN") != "") { + supplierKey = rs.getString("supplierABN"); + } else { + supplierKey = rs.getString("supplierName"); + } + // inject some data + if (supplierIDs.get(supplierKey) == null) { + Node myNode = gds.createNode(); + myNode.setProperty("Label", rs.getString("supplierName")); + myNode.setProperty("type", "supplier"); + supplierIDs.put(supplierKey, myNode.getId()); + if (myNode.getId() %1000 == 0) { + System.out.println("Supplier "+myNode.getId()); +} + } + supplierID = supplierIDs.get(supplierKey); + long rel = inserter.createRelationship(agencyID, supplierID, + DynamicRelationshipType.withName("KNOWS"), null); + inserter.setRelationshipProperty(rel, "Weight", rs.getDouble("sum")); + + } + // Close the result set, statement and the connection + rs.close(); + stmt.close(); + conn.close(); + } catch (SQLException se) { + System.out.println("SQL Exception:"); + + // Loop through the SQL Exceptions + while (se != null) { + System.out.println("State : " + se.getSQLState()); + System.out.println("Message: " + se.getMessage()); + System.out.println("Error : " + se.getErrorCode()); + + se = se.getNextException(); + } + } +//make the changes visible for reading, use this sparsely, requires IO! + labels.flush(); + +// Make sure to shut down the index provider + indexProvider.shutdown(); + inserter.shutdown(); + } + + } --- a/admin/partialdata/import.php +++ b/admin/partialdata/import.php @@ -136,7 +136,7 @@ if ($dhandle) { // loop through all of the files while (false !== ($fname = readdir($dhandle))) { - if (($fname != '.') && ($fname != '..')) { + if (($fname != '.') && ($fname != '..') && (!isset($_REQUEST["filter"]) || strpos($fname,$_REQUEST["filter"]) != false)) { echo "$fname " . filesize($path . $fname) . " " . date("c", filemtime($path . $fname)) . "
"; processFile($path . $fname, "contractnotice"); } --- a/admin/partialdata/importamendments.php +++ b/admin/partialdata/importamendments.php @@ -1,6 +1,13 @@ Database temporarily unavailable: "; @@ -212,6 +221,10 @@ $cnFields[$fieldName] = $fieldValue; } } +if (isset($cnFields[""])) { + $cnFields["description"] .= $cnFields[""]; + unset($cnFields[""]); +} $cnFields["importFile"] = $url; $contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', array_keys($cnFields)) . '") VALUES ( '; for($key = 0; $key < sizeof($cnFields); $key++) { @@ -297,7 +310,7 @@ if ($dhandle) { // loop through all of the files while (false !== ($fname = readdir($dhandle))) { - if (($fname != '.') && ($fname != '..')) { + if (($fname != '.') && ($fname != '..') && (strpos($fname,".xls")>0)) { echo "$fname " . filesize($path . $fname) . " " . date("c", filemtime($path . $fname)) . "
"; processFile($path . $fname, "contractnotice"); } @@ -313,3 +326,4 @@ } ?> + --- a/admin/updateUNSPSC.php +++ b/admin/updateUNSPSC.php @@ -10,12 +10,16 @@ // some Australian spellings $isiz = str_replace("iz","is",$row['Title']); $unspsc[$isiz] = $row['UNSPSC']; + $filfill = str_replace("fill","fil",$row['Title']); + $unspsc[$filfill] = $row['UNSPSC']; $defence = str_replace("efense","efence",$row['Title']); $unspsc[$defence] = $row['UNSPSC']; $armor = str_replace("rmored","rmoured",$row['Title']); $unspsc[$armor] = $row['UNSPSC']; - $center = str_replace("enter","entre",$row['Title']); - $unspsc[$center] = $row['UNSPSC']; + $erre = str_replace("er","re",$row['Title']); + $unspsc[$erre] = $row['UNSPSC']; + $lyslyz = str_replace("lyz","lys",$row['Title']); + $unspsc[$lyslyz] = $row['UNSPSC']; // some divergence from standard $tobacco = str_replace("Food Beverage and Tobacco Products","Food and Beverage Products",$row['Title']); $unspsc[$tobacco] = $row['UNSPSC']; --- /dev/null +++ b/australian_federal_government_contract_spending.json @@ -1,1 +1,125 @@ +{ + "dataset":{ + "name":"australian_federal_government_contract_spending", + "label":"Australian Federal Government Contract Spending", + "description":"Spending by Australian Federal Government agencies on goods and services from 2007 onwards.", + "currency":"AUD", + "unique_keys":[ + "id" + ], + "temporal_granularity":"year" + }, + "mapping":{ + "category": { + "fields": [ { + "column": "category", + "datatype": "string", + "name": "label" + } ], + "type": "classifier", + "description": "", + "taxonomy": "unspsc", + "label": "Contract Goods/Services Category" + }, + "from":{ + "fields":[ + { + "column":"agencyABN", + "datatype":"string", + "name":"id" + }, + { + "column":"agencyName", + "datatype":"string", + "name":"label" + } + ], + "type":"entity", + "description":"", + "label":"Government Agency Australian Business Number" + }, + "description":{ + "column":"description", + "datatype":"string", + "type":"value", + "description":"Contract as described by agency", + "label":"Contract Description" + }, + "to":{ + "fields":[ + { + "column":"supplierName", + "datatype":"string", + "name":"label" + }, + { + "column":"supplierID", + "datatype":"string", + "name":"id" + } + ], + "type":"entity", + "description":"Government Supplier", + "label":"Supplier Name" + }, + "currency":{ + "default_value":"AUD", + "description":"All entries in AUD, foreign transactions are converted at the time of their payment", + "column":"", + "label":"", + "datatype":"currency", + "type":"value" + }, + "amount":{ + "column":"value", + "datatype":"float", + "type":"value", + "description":"The total value of the contract including all variations/amendments/extensions", + "label":"Contract Value" + }, + "time":{ + "column":"contractStart", + "datatype":"date", + "type":"value", + "description":"Start of the contract period (goods or services being received)", + "label":"Contract Start Date" + }, + "id":{ +"column": "CNID", + "datatype": "string", + "name": "label", + "type": "value", + "description": "", + "label": "Contract Notice ID" + } + }, + +"views": [ + { + "name": "default", + "entity": "dataset", + "label": "Default breakdown by Category", + "dimension": "dataset", + "breakdown": "category", + "filters" : {"name": "australian_federal_government_contract_spending" } + }, + { + "name":"default", + "entity": "classifier", + "label":"Breakdown of Spending Areas by Supplier Company", + "dimension": "category", + "breakdown": "to", + "filters": {"taxonomy": "unspsc"} + }, + { + "name":"default", + "entity": "classifier", + "label":"Breakdown of Spending Areas by Government Agency", + "dimension": "category", + "breakdown": "from", + "filters": {"taxonomy": "unspsc"} + } +] +} + --- a/exportData.csv.php +++ b/exportData.csv.php @@ -3,31 +3,45 @@ include_once("./lib/common.inc.php"); setlocale(LC_CTYPE, 'C'); // source: http://stackoverflow.com/questions/81934/easy-way-to-export-a-sql-table-without-access-to-the-server-or-phpmyadmin#81951 -$result = mysql_query(' -SELECT CNID,contractnotice.agencyName,agency.abn as agencyABN,DATE(publishDate),contractStart,contractEnd,value,description,procurementMethod,category,categoryUNSPSC -,supplierABN,supplierName,supplierCity,supplierPostcode,supplierCountry,contactPostcode, +$query = $conn->prepare(' +SELECT "CNID",contractnotice."agencyName",agency.abn as "agencyABN",EXTRACT(EPOCH FROM "publishDate") as "publishDate",EXTRACT(EPOCH FROM "contractStart") as "contractStart",EXTRACT(EPOCH FROM "contractEnd") as "contractEnd",value,description,"procurementMethod",category,"categoryUNSPSC", + "supplierABN","supplierName", ( - case when "supplierABN" != 0 THEN "supplierABN"::text ELSE "supplierName" END) as supplierID + case when "supplierABN" != 0 THEN "supplierABN"::text ELSE "supplierName" END) as supplierID, -concat("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN",CNID) as sourceURL -FROM `contractnotice` join agency on contractnotice.agencyName=agency.agencyName where childCN = 0'); -if (!$result) die('Couldn\'t fetch records'); -$num_fields = mysql_num_fields($result); +(\'https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN\'::text || "CNID"::text) as sourceURL +FROM contractnotice join agency on contractnotice."agencyName"=agency."agencyName" where "childCN" = 0' +, array(PDO::ATTR_CURSOR => PDO::FETCH_ORI_NEXT)); +// "supplierCity","supplierPostcode","supplierCountry","contactPostcode", +// (substr( "categoryUNSPSC"::text, 0, 2 ) || \'0000000\'::text) as "categoryUNSPSClv1", "categoryUNSPSC", (substr( "categoryUNSPSC"::text, 0, 3 ) || \'000000\'::text) as "categoryUNSPSClv2" "categoryUNSPSC", (substr( "categoryUNSPSC"::text, 0, 4 ) || \'00000\'::text as "categoryUNSPSClv3") +$query->execute(); +if (!$query) { + databaseError($conn->errorInfo()); +die('Couldn\'t fetch records'); +} + +$num_fields = $query->columnCount(); $headers = array(); for ($i = 0; $i < $num_fields; $i++) { - $headers[] = mysql_field_name($result , $i); + $meta = $query->getColumnMeta($i); + $headers[] = $meta['name']; } $fp = fopen('php://output', 'w'); -if ($fp && $result) { +if ($fp && $query) { header('Content-Type: text/csv'); header('Content-Disposition: attachment; filename="export.'.date("c").'.csv"'); header('Pragma: no-cache'); header('Expires: 0'); fputcsv($fp, $headers); - while ($row = mysql_fetch_assoc($result)) { - foreach ($row as &$colvalue) { + while ($row = $query->fetch(PDO::FETCH_NUM, PDO::FETCH_ORI_NEXT)) { + foreach ($row as $key => &$colvalue) { + $colvalue = preg_replace( '/[^[:print:]]/', '', utf8_encode($colvalue)); + if ($headers[$key] == "publishDate" || $headers[$key] == "contractStart" + || $headers[$key] == "contractEnd") { + $colvalue = date("Y-m-d",$colvalue); + } } fputcsv($fp, array_values($row)); } --- a/lib/graphs.inc.php +++ b/lib/graphs.inc.php @@ -7,6 +7,7 @@ + '; $includedFlot = true; @@ -23,8 +24,9 @@ $(function () { var d1 = []; + var d2 = []; prepare($query); $query->execute(); if (!$query) { @@ -35,6 +37,18 @@ foreach ($query->fetchAll() as $delta) { echo "d1.push([ ".intval($delta['cnid']).", ".intval($delta['count'])."]); \n"; +}; +$query = 'select cnid, count(*) from (select ("CNID" - MOD("CNID",100)) as cnid from contractnotice where "CNID" < 999999 and "parentCN" is not null) as a group by cnid order by cnid'; +$query = $conn->prepare($query); +$query->execute(); +if (!$query) { + databaseError($conn->errorInfo()); + return Array(); +} + +foreach ($query->fetchAll() as $delta) { + + echo "d2.push([ ".intval($delta['cnid']).", ".intval($delta['count'])."]); \n"; }; ?> @@ -47,10 +61,20 @@ }, bars: { show: true } }, + { + data: d2, +series: { + lines: { show: true }, + points: { show: true } + }, + bars: { show: true } + }, ]; var options = { - +series: { + stack: true, + }, grid: { hoverable: true, clickable: true, labelMargin: 17 }, selection: { mode: "x" } };