Upgrade neo4j batchimporter to 1.5
--- /dev/null
+++ b/admin/neo4jimporter/nbactions.xml
@@ -1,1 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<actions>
+ <action>
+ <actionName>run</actionName>
+ <goals>
+ <goal>process-classes</goal>
+ <goal>org.codehaus.mojo:exec-maven-plugin:1.2:exec</goal>
+ </goals>
+ <properties>
+ <exec.classpathScope>runtime</exec.classpathScope>
+ <exec.args>-Xmx4048M -server -classpath %classpath ${packageClassName}</exec.args>
+ <exec.executable>java</exec.executable>
+ </properties>
+ </action>
+ <action>
+ <actionName>debug</actionName>
+ <goals>
+ <goal>process-classes</goal>
+ <goal>org.codehaus.mojo:exec-maven-plugin:1.2:exec</goal>
+ </goals>
+ <properties>
+ <exec.classpathScope>runtime</exec.classpathScope>
+ <exec.args>-Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -Xmx4048M -server -classpath %classpath ${packageClassName}</exec.args>
+ <jpda.listen>true</jpda.listen>
+ <exec.executable>java</exec.executable>
+ </properties>
+ </action>
+ <action>
+ <actionName>profile</actionName>
+ <goals>
+ <goal>process-classes</goal>
+ <goal>org.codehaus.mojo:exec-maven-plugin:1.1.1:exec</goal>
+ </goals>
+ <properties>
+ <exec.args>${profiler.args} -Xmx4048M -server -classpath %classpath ${packageClassName}</exec.args>
+ <profiler.action>profile</profiler.action>
+ <exec.executable>${profiler.java}</exec.executable>
+ </properties>
+ </action>
+ <action>
+ <actionName>run</actionName>
+ <goals>
+ <goal>process-classes</goal>
+ <goal>org.codehaus.mojo:exec-maven-plugin:1.2:exec</goal>
+ </goals>
+ <properties>
+ <exec.classpathScope>runtime</exec.classpathScope>
+ <exec.args>-classpath %classpath Importer</exec.args>
+ <exec.executable>java</exec.executable>
+ </properties>
+ </action>
+ </actions>
--- a/admin/neo4jimporter/pom.xml
+++ b/admin/neo4jimporter/pom.xml
@@ -6,8 +6,8 @@
<dependencies>
<dependency>
<groupId>org.neo4j</groupId>
- <artifactId>neo4j-kernel</artifactId>
- <version>1.4</version>
+ <artifactId>neo4j</artifactId>
+ <version>1.5</version>
</dependency>
<dependency>
<groupId>postgresql</groupId>
--- a/admin/neo4jimporter/src/main/java/Importer.java
+++ b/admin/neo4jimporter/src/main/java/Importer.java
@@ -1,3 +1,4 @@
+
import java.io.ObjectInputStream.GetField;
import java.math.BigInteger;
import java.security.MessageDigest;
@@ -12,144 +13,147 @@
import java.util.Map;
import org.neo4j.graphdb.DynamicRelationshipType;
+import org.neo4j.graphdb.GraphDatabaseService;
+import org.neo4j.graphdb.Node;
+import org.neo4j.graphdb.index.BatchInserterIndex;
+import org.neo4j.graphdb.index.BatchInserterIndexProvider;
+import org.neo4j.helpers.collection.MapUtil;
import org.neo4j.kernel.impl.batchinsert.BatchInserter;
import org.neo4j.kernel.impl.batchinsert.BatchInserterImpl;
+import org.neo4j.index.impl.lucene.*;
public class Importer {
- public static void main(String[] argv) {
-
- Map<String,String> props = new HashMap<String, String>();
- props.put("neostore.nodestore.db.mapped_memory", "22000000"); // <expected number of nodes * 9 bytes>
- props.put("neostore.relationshipstore.db.mapped_memory", "22000000"); // <expected number of relationships * 33 bytes>
- // create the batch inserter
- BatchInserter inserter = new
- BatchInserterImpl("neo4j-db/",props
- );
-
+ public static void main(String[] argv) {
+ BatchInserter inserter = new BatchInserterImpl("target/neo4jdb-batchinsert");
+ BatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(inserter);
+ BatchInserterIndex labels = indexProvider.nodeIndex("labels", MapUtil.stringMap("type", "exact"));
+ labels.setCacheCapacity("Label", 100000);
-
-
- System.out.println("-------- PostgreSQL "
- + "JDBC Connection Testing ------------");
- try {
+ System.out.println("-------- PostgreSQL "
+ + "JDBC Connection Testing ------------");
- Class.forName("org.postgresql.Driver");
+ try {
- } catch (ClassNotFoundException e) {
+ Class.forName("org.postgresql.Driver");
- System.out.println("Where is your PostgreSQL JDBC Driver? "
- + "Include in your library path!");
- e.printStackTrace();
+ } catch (ClassNotFoundException e) {
- }
+ System.out.println("Where is your PostgreSQL JDBC Driver? "
+ + "Include in your library path!");
+ e.printStackTrace();
- System.out.println("PostgreSQL JDBC Driver Registered!");
+ }
- Connection conn = null;
+ System.out.println("PostgreSQL JDBC Driver Registered!");
- try {
+ Connection conn = null;
- conn = DriverManager.getConnection(
- "jdbc:postgresql://127.0.0.1:5432/contractDashboard",
- "postgres", "snmc");
+ try {
- } catch (SQLException e) {
+ conn = DriverManager.getConnection(
+ "jdbc:postgresql://127.0.0.1:5432/contractDashboard",
+ "postgres", "snmc");
- System.out.println("Connection Failed! Check output console");
- e.printStackTrace();
+ } catch (SQLException e) {
- }
+ System.out.println("Connection Failed! Check output console");
+ e.printStackTrace();
- if (conn != null) {
- System.out.println("You made it, take control your database now!");
- } else {
- System.out.println("Failed to make connection!");
- }
- try {
- // Print all warnings
- for (SQLWarning warn = conn.getWarnings(); warn != null; warn = warn
- .getNextWarning()) {
- System.out.println("SQL Warning:");
- System.out.println("State : " + warn.getSQLState());
- System.out.println("Message: " + warn.getMessage());
- System.out.println("Error : " + warn.getErrorCode());
- }
+ }
- // Get a statement from the connection
- Statement stmt = conn.createStatement();
+ if (conn != null) {
+ System.out.println("You made it, take control your database now!");
+ } else {
+ System.out.println("Failed to make connection!");
+ }
+ try {
+ // Print all warnings
+ for (SQLWarning warn = conn.getWarnings(); warn != null; warn = warn.getNextWarning()) {
+ System.out.println("SQL Warning:");
+ System.out.println("State : " + warn.getSQLState());
+ System.out.println("Message: " + warn.getMessage());
+ System.out.println("Error : " + warn.getErrorCode());
+ }
- // Execute the query
- ResultSet rs = stmt.executeQuery("SELECT distinct contractnotice.\"agencyName\", contractnotice.\"supplierABN\", contractnotice.\"supplierName\" FROM public.contractnotice limit 300;");
-String previousAgency = "";
+ // Get a statement from the connection
+ Statement stmt = conn.createStatement();
- // Loop through the result set
- while (rs.next()) {
- long supplier,agency;
- agency = doHash(rs.getString("agencyName"));
- if (rs.getString("agencyName") != previousAgency) {
- if (!inserter.nodeExists(agency)) {
- Map<String, Object> properties = new HashMap<String, Object>();
- properties.put("Label", rs.getString("agencyName"));
- inserter.createNode(agency, properties);
- }
- }
- if (rs.getString("supplierABN") != "0") {
- supplier = doHash(rs.getString("supplierABN"));
- } else {
- supplier = doHash(rs.getString("supplierName"));
- }
- // inject some data
- if (!inserter.nodeExists(supplier)) {
- Map<String, Object> properties = new HashMap<String, Object>();
-
- properties.put("Label", rs.getString("supplierName"));
- inserter.createNode(supplier, properties);
- }
-
- inserter.createRelationship(agency, supplier,
- DynamicRelationshipType.withName("KNOWS"), null);
-
- }
- // Close the result set, statement and the connection
- rs.close();
- stmt.close();
- conn.close();
- } catch (SQLException se) {
- System.out.println("SQL Exception:");
-
- // Loop through the SQL Exceptions
- while (se != null) {
- System.out.println("State : " + se.getSQLState());
- System.out.println("Message: " + se.getMessage());
- System.out.println("Error : " + se.getErrorCode());
-
- se = se.getNextException();
- }
- } catch (Exception e) {
- System.out.println(e);
- }
- // shutdown, makes sure all changes are written to disk
- inserter.shutdown();
- }
-
- static long doHash(String input) {
- MessageDigest m;
- try {
- m = MessageDigest.getInstance("MD5");
- m.reset();
- m.update(input.getBytes());
- byte[] digest = m.digest();
- return new BigInteger(1, digest).longValue();
- } catch (NoSuchAlgorithmException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- return 0;
-
- }
+ // Execute the query
+ ResultSet rs = stmt.executeQuery("SELECT contractnotice.\"agencyName\", "
+ + " contractnotice.\"supplierABN\",contractnotice.\"supplierName\",sum(value) as sum "
+ + "FROM public.contractnotice where contractnotice.\"agencyName\" != 'Department of Defence'"
+ + " AND contractnotice.\"agencyName\" != 'Defence Materiel Organisation' GROUP BY contractnotice.\"agencyName\", "
+ + " contractnotice.\"supplierABN\",contractnotice.\"supplierName\"");
+ String previousAgency = "";
+GraphDatabaseService gds = inserter.getGraphDbService();
+HashMap<String,Long> supplierIDs = new HashMap<String,Long>();
+HashMap<String,Long> agencyIDs = new HashMap<String,Long>();
+
+ // Loop through the result set
+ while (rs.next()) {
+ long supplierID, agencyID;
+ String supplierKey;
+ if (agencyIDs.get(rs.getString("agencyName")) == null) {
+ Node myNode = gds.createNode();
+ myNode.setProperty("Label", rs.getString("agencyName"));
+ myNode.setProperty("type", "agency");
+ agencyIDs.put(rs.getString("agencyName"), myNode.getId());
+ if (myNode.getId() %100 == 0) {
+ System.out.println("Agency "+myNode.getId());
+}
+ }
+ agencyID = agencyIDs.get(rs.getString("agencyName"));
+
+
+ if (rs.getString("supplierABN") != "0" && rs.getString("supplierABN") != "") {
+ supplierKey = rs.getString("supplierABN");
+ } else {
+ supplierKey = rs.getString("supplierName");
+ }
+ // inject some data
+ if (supplierIDs.get(supplierKey) == null) {
+ Node myNode = gds.createNode();
+ myNode.setProperty("Label", rs.getString("supplierName"));
+ myNode.setProperty("type", "supplier");
+ supplierIDs.put(supplierKey, myNode.getId());
+ if (myNode.getId() %1000 == 0) {
+ System.out.println("Supplier "+myNode.getId());
+}
+ }
+ supplierID = supplierIDs.get(supplierKey);
+ long rel = inserter.createRelationship(agencyID, supplierID,
+ DynamicRelationshipType.withName("KNOWS"), null);
+ inserter.setRelationshipProperty(rel, "Weight", rs.getDouble("sum"));
+
+ }
+ // Close the result set, statement and the connection
+ rs.close();
+ stmt.close();
+ conn.close();
+ } catch (SQLException se) {
+ System.out.println("SQL Exception:");
+
+ // Loop through the SQL Exceptions
+ while (se != null) {
+ System.out.println("State : " + se.getSQLState());
+ System.out.println("Message: " + se.getMessage());
+ System.out.println("Error : " + se.getErrorCode());
+
+ se = se.getNextException();
+ }
+ }
+//make the changes visible for reading, use this sparsely, requires IO!
+ labels.flush();
+
+// Make sure to shut down the index provider
+ indexProvider.shutdown();
+ inserter.shutdown();
+ }
+
+
}
Binary files a/admin/neo4jimporter/target/classes/Importer.class and b/admin/neo4jimporter/target/classes/Importer.class differ
--- a/admin/partialdata/import.php
+++ b/admin/partialdata/import.php
@@ -136,7 +136,7 @@
if ($dhandle) {
// loop through all of the files
while (false !== ($fname = readdir($dhandle))) {
- if (($fname != '.') && ($fname != '..')) {
+ if (($fname != '.') && ($fname != '..') && (!isset($_REQUEST["filter"]) || strpos($fname,$_REQUEST["filter"]) != false)) {
echo "<a href=\"import.php?fname=$fname\">$fname</a> " . filesize($path . $fname) . " " . date("c", filemtime($path . $fname)) . "<br/>";
processFile($path . $fname, "contractnotice");
}
--- a/admin/partialdata/importamendments.php
+++ b/admin/partialdata/importamendments.php
@@ -1,6 +1,13 @@
<?php
include_once ("../../lib/common.inc.php");
-
+/*
+update contractnotice set "supplierABN" = a."supplierABN"
+from contractnotice as cn inner join (select "supplierABN",
+"supplierName" from contractnotice where "supplierABN"
+IS NOT NULL and "supplierABN" != 0) as a on
+cn."supplierName" = a."supplierName" where
+cn."CNID"=contractnotice."CNID" and (contractnotice."supplierABN"
+IS NULL or contractnotice."supplierABN" = 0) */
// http://www.lastcraft.com/browser_documentation.php
// http://code.google.com/p/phpquery/
require('phpQuery-onefile.php');
@@ -313,3 +320,4 @@
}
?>
+
--- a/admin/updateUNSPSC.php
+++ b/admin/updateUNSPSC.php
@@ -10,12 +10,16 @@
// some Australian spellings
$isiz = str_replace("iz","is",$row['Title']);
$unspsc[$isiz] = $row['UNSPSC'];
+ $filfill = str_replace("fill","fil",$row['Title']);
+ $unspsc[$filfill] = $row['UNSPSC'];
$defence = str_replace("efense","efence",$row['Title']);
$unspsc[$defence] = $row['UNSPSC'];
$armor = str_replace("rmored","rmoured",$row['Title']);
$unspsc[$armor] = $row['UNSPSC'];
- $center = str_replace("enter","entre",$row['Title']);
- $unspsc[$center] = $row['UNSPSC'];
+ $erre = str_replace("er","re",$row['Title']);
+ $unspsc[$erre] = $row['UNSPSC'];
+ $lyslyz = str_replace("lyz","lys",$row['Title']);
+ $unspsc[$lyslyz] = $row['UNSPSC'];
// some divergence from standard
$tobacco = str_replace("Food Beverage and Tobacco Products","Food and Beverage Products",$row['Title']);
$unspsc[$tobacco] = $row['UNSPSC'];
--- /dev/null
+++ b/australian_federal_government_contract_spending.json
@@ -1,1 +1,125 @@
+{
+ "dataset":{
+ "name":"australian_federal_government_contract_spending",
+ "label":"Australian Federal Government Contract Spending",
+ "description":"Spending by Australian Federal Government agencies on goods and services from 2007 onwards.",
+ "currency":"AUD",
+ "unique_keys":[
+ "id"
+ ],
+ "temporal_granularity":"year"
+ },
+ "mapping":{
+ "category": {
+ "fields": [ {
+ "column": "category",
+ "datatype": "string",
+ "name": "label"
+ } ],
+ "type": "classifier",
+ "description": "",
+ "taxonomy": "unspsc",
+ "label": "Contract Goods/Services Category"
+ },
+ "from":{
+ "fields":[
+ {
+ "column":"agencyABN",
+ "datatype":"string",
+ "name":"id"
+ },
+ {
+ "column":"agencyName",
+ "datatype":"string",
+ "name":"label"
+ }
+ ],
+ "type":"entity",
+ "description":"",
+ "label":"Government Agency Australian Business Number"
+ },
+ "description":{
+ "column":"description",
+ "datatype":"string",
+ "type":"value",
+ "description":"Contract as described by agency",
+ "label":"Contract Description"
+ },
+ "to":{
+ "fields":[
+ {
+ "column":"supplierName",
+ "datatype":"string",
+ "name":"label"
+ },
+ {
+ "column":"supplierID",
+ "datatype":"string",
+ "name":"id"
+ }
+ ],
+ "type":"entity",
+ "description":"Government Supplier",
+ "label":"Supplier Name"
+ },
+ "currency":{
+ "default_value":"AUD",
+ "description":"All entries in AUD, foreign transactions are converted at the time of their payment",
+ "column":"",
+ "label":"",
+ "datatype":"currency",
+ "type":"value"
+ },
+ "amount":{
+ "column":"value",
+ "datatype":"float",
+ "type":"value",
+ "description":"The total value of the contract including all variations/amendments/extensions",
+ "label":"Contract Value"
+ },
+ "time":{
+ "column":"contractStart",
+ "datatype":"date",
+ "type":"value",
+ "description":"Start of the contract period (goods or services being received)",
+ "label":"Contract Start Date"
+ },
+ "id":{
+"column": "CNID",
+ "datatype": "string",
+ "name": "label",
+ "type": "value",
+ "description": "",
+ "label": "Contract Notice ID"
+ }
+ },
+
+"views": [
+ {
+ "name": "default",
+ "entity": "dataset",
+ "label": "Default breakdown by Category",
+ "dimension": "dataset",
+ "breakdown": "category",
+ "filters" : {"name": "australian_federal_government_contract_spending" }
+ },
+ {
+ "name":"default",
+ "entity": "classifier",
+ "label":"Breakdown of Spending Areas by Supplier Company",
+ "dimension": "category",
+ "breakdown": "to",
+ "filters": {"taxonomy": "unspsc"}
+ },
+ {
+ "name":"default",
+ "entity": "classifier",
+ "label":"Breakdown of Spending Areas by Government Agency",
+ "dimension": "category",
+ "breakdown": "from",
+ "filters": {"taxonomy": "unspsc"}
+ }
+]
+}
+
--- a/exportData.csv.php
+++ b/exportData.csv.php
@@ -3,31 +3,45 @@
include_once("./lib/common.inc.php");
setlocale(LC_CTYPE, 'C');
// source: http://stackoverflow.com/questions/81934/easy-way-to-export-a-sql-table-without-access-to-the-server-or-phpmyadmin#81951
-$result = mysql_query('
-SELECT CNID,contractnotice.agencyName,agency.abn as agencyABN,DATE(publishDate),contractStart,contractEnd,value,description,procurementMethod,category,categoryUNSPSC
-,supplierABN,supplierName,supplierCity,supplierPostcode,supplierCountry,contactPostcode,
+$query = $conn->prepare('
+SELECT "CNID",contractnotice."agencyName",agency.abn as "agencyABN",EXTRACT(EPOCH FROM "publishDate") as "publishDate",EXTRACT(EPOCH FROM "contractStart") as "contractStart",EXTRACT(EPOCH FROM "contractEnd") as "contractEnd",value,description,"procurementMethod",category,"categoryUNSPSC",
+ "supplierABN","supplierName",
(
- case when "supplierABN" != 0 THEN "supplierABN"::text ELSE "supplierName" END) as supplierID
+ case when "supplierABN" != 0 THEN "supplierABN"::text ELSE "supplierName" END) as supplierID,
-concat("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN",CNID) as sourceURL
-FROM `contractnotice` join agency on contractnotice.agencyName=agency.agencyName where childCN = 0');
-if (!$result) die('Couldn\'t fetch records');
-$num_fields = mysql_num_fields($result);
+(\'https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN\'::text || "CNID"::text) as sourceURL
+FROM contractnotice join agency on contractnotice."agencyName"=agency."agencyName" where "childCN" = 0'
+, array(PDO::ATTR_CURSOR => PDO::FETCH_ORI_NEXT));
+// "supplierCity","supplierPostcode","supplierCountry","contactPostcode",
+// (substr( "categoryUNSPSC"::text, 0, 2 ) || \'0000000\'::text) as "categoryUNSPSClv1", "categoryUNSPSC", (substr( "categoryUNSPSC"::text, 0, 3 ) || \'000000\'::text) as "categoryUNSPSClv2" "categoryUNSPSC", (substr( "categoryUNSPSC"::text, 0, 4 ) || \'00000\'::text as "categoryUNSPSClv3")
+$query->execute();
+if (!$query) {
+ databaseError($conn->errorInfo());
+die('Couldn\'t fetch records');
+}
+
+$num_fields = $query->columnCount();
$headers = array();
for ($i = 0; $i < $num_fields; $i++) {
- $headers[] = mysql_field_name($result , $i);
+ $meta = $query->getColumnMeta($i);
+ $headers[] = $meta['name'];
}
$fp = fopen('php://output', 'w');
-if ($fp && $result) {
+if ($fp && $query) {
header('Content-Type: text/csv');
header('Content-Disposition: attachment; filename="export.'.date("c").'.csv"');
header('Pragma: no-cache');
header('Expires: 0');
fputcsv($fp, $headers);
- while ($row = mysql_fetch_assoc($result)) {
- foreach ($row as &$colvalue) {
+ while ($row = $query->fetch(PDO::FETCH_NUM, PDO::FETCH_ORI_NEXT)) {
+ foreach ($row as $key => &$colvalue) {
+
$colvalue = preg_replace( '/[^[:print:]]/', '',
utf8_encode($colvalue));
+ if ($headers[$key] == "publishDate" || $headers[$key] == "contractStart"
+ || $headers[$key] == "contractEnd") {
+ $colvalue = date("Y-m-d",$colvalue);
+ }
}
fputcsv($fp, array_values($row));
}