From: Maxious Date: Sun, 13 Nov 2011 10:40:47 +0000 Subject: Upgrade neo4j batchimporter to 1.5 X-Git-Url: http://maxious.lambdacomplex.org/git/?p=contractdashboard.git&a=commitdiff&h=52f29fc1e37b7be0cde452c0b42ed12c65059ca0 --- Upgrade neo4j batchimporter to 1.5 --- --- /dev/null +++ b/admin/neo4jimporter/nbactions.xml @@ -1,1 +1,53 @@ + + + + run + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.2:exec + + + runtime + -Xmx4048M -server -classpath %classpath ${packageClassName} + java + + + + debug + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.2:exec + + + runtime + -Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -Xmx4048M -server -classpath %classpath ${packageClassName} + true + java + + + + profile + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.1.1:exec + + + ${profiler.args} -Xmx4048M -server -classpath %classpath ${packageClassName} + profile + ${profiler.java} + + + + run + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.2:exec + + + runtime + -classpath %classpath Importer + java + + + --- a/admin/neo4jimporter/pom.xml +++ b/admin/neo4jimporter/pom.xml @@ -6,8 +6,8 @@ org.neo4j - neo4j-kernel - 1.4 + neo4j + 1.5 postgresql --- a/admin/neo4jimporter/src/main/java/Importer.java +++ b/admin/neo4jimporter/src/main/java/Importer.java @@ -1,3 +1,4 @@ + import java.io.ObjectInputStream.GetField; import java.math.BigInteger; import java.security.MessageDigest; @@ -12,144 +13,147 @@ import java.util.Map; import org.neo4j.graphdb.DynamicRelationshipType; +import org.neo4j.graphdb.GraphDatabaseService; +import org.neo4j.graphdb.Node; +import org.neo4j.graphdb.index.BatchInserterIndex; +import org.neo4j.graphdb.index.BatchInserterIndexProvider; +import org.neo4j.helpers.collection.MapUtil; import org.neo4j.kernel.impl.batchinsert.BatchInserter; import org.neo4j.kernel.impl.batchinsert.BatchInserterImpl; +import org.neo4j.index.impl.lucene.*; public class Importer { - public static void main(String[] argv) { - - Map props = new HashMap(); - props.put("neostore.nodestore.db.mapped_memory", "22000000"); // - props.put("neostore.relationshipstore.db.mapped_memory", "22000000"); // - // create the batch inserter - BatchInserter inserter = new - BatchInserterImpl("neo4j-db/",props - ); - + public static void main(String[] argv) { + BatchInserter inserter = new BatchInserterImpl("target/neo4jdb-batchinsert"); + BatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(inserter); + BatchInserterIndex labels = indexProvider.nodeIndex("labels", MapUtil.stringMap("type", "exact")); + labels.setCacheCapacity("Label", 100000); - - - System.out.println("-------- PostgreSQL " - + "JDBC Connection Testing ------------"); - try { + System.out.println("-------- PostgreSQL " + + "JDBC Connection Testing ------------"); - Class.forName("org.postgresql.Driver"); + try { - } catch (ClassNotFoundException e) { + Class.forName("org.postgresql.Driver"); - System.out.println("Where is your PostgreSQL JDBC Driver? " - + "Include in your library path!"); - e.printStackTrace(); + } catch (ClassNotFoundException e) { - } + System.out.println("Where is your PostgreSQL JDBC Driver? " + + "Include in your library path!"); + e.printStackTrace(); - System.out.println("PostgreSQL JDBC Driver Registered!"); + } - Connection conn = null; + System.out.println("PostgreSQL JDBC Driver Registered!"); - try { + Connection conn = null; - conn = DriverManager.getConnection( - "jdbc:postgresql://127.0.0.1:5432/contractDashboard", - "postgres", "snmc"); + try { - } catch (SQLException e) { + conn = DriverManager.getConnection( + "jdbc:postgresql://127.0.0.1:5432/contractDashboard", + "postgres", "snmc"); - System.out.println("Connection Failed! Check output console"); - e.printStackTrace(); + } catch (SQLException e) { - } + System.out.println("Connection Failed! Check output console"); + e.printStackTrace(); - if (conn != null) { - System.out.println("You made it, take control your database now!"); - } else { - System.out.println("Failed to make connection!"); - } - try { - // Print all warnings - for (SQLWarning warn = conn.getWarnings(); warn != null; warn = warn - .getNextWarning()) { - System.out.println("SQL Warning:"); - System.out.println("State : " + warn.getSQLState()); - System.out.println("Message: " + warn.getMessage()); - System.out.println("Error : " + warn.getErrorCode()); - } + } - // Get a statement from the connection - Statement stmt = conn.createStatement(); + if (conn != null) { + System.out.println("You made it, take control your database now!"); + } else { + System.out.println("Failed to make connection!"); + } + try { + // Print all warnings + for (SQLWarning warn = conn.getWarnings(); warn != null; warn = warn.getNextWarning()) { + System.out.println("SQL Warning:"); + System.out.println("State : " + warn.getSQLState()); + System.out.println("Message: " + warn.getMessage()); + System.out.println("Error : " + warn.getErrorCode()); + } - // Execute the query - ResultSet rs = stmt.executeQuery("SELECT distinct contractnotice.\"agencyName\", contractnotice.\"supplierABN\", contractnotice.\"supplierName\" FROM public.contractnotice limit 300;"); -String previousAgency = ""; + // Get a statement from the connection + Statement stmt = conn.createStatement(); - // Loop through the result set - while (rs.next()) { - long supplier,agency; - agency = doHash(rs.getString("agencyName")); - if (rs.getString("agencyName") != previousAgency) { - if (!inserter.nodeExists(agency)) { - Map properties = new HashMap(); - properties.put("Label", rs.getString("agencyName")); - inserter.createNode(agency, properties); - } - } - if (rs.getString("supplierABN") != "0") { - supplier = doHash(rs.getString("supplierABN")); - } else { - supplier = doHash(rs.getString("supplierName")); - } - // inject some data - if (!inserter.nodeExists(supplier)) { - Map properties = new HashMap(); - - properties.put("Label", rs.getString("supplierName")); - inserter.createNode(supplier, properties); - } - - inserter.createRelationship(agency, supplier, - DynamicRelationshipType.withName("KNOWS"), null); - - } - // Close the result set, statement and the connection - rs.close(); - stmt.close(); - conn.close(); - } catch (SQLException se) { - System.out.println("SQL Exception:"); - - // Loop through the SQL Exceptions - while (se != null) { - System.out.println("State : " + se.getSQLState()); - System.out.println("Message: " + se.getMessage()); - System.out.println("Error : " + se.getErrorCode()); - - se = se.getNextException(); - } - } catch (Exception e) { - System.out.println(e); - } - // shutdown, makes sure all changes are written to disk - inserter.shutdown(); - } - - static long doHash(String input) { - MessageDigest m; - try { - m = MessageDigest.getInstance("MD5"); - m.reset(); - m.update(input.getBytes()); - byte[] digest = m.digest(); - return new BigInteger(1, digest).longValue(); - } catch (NoSuchAlgorithmException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - return 0; - - } + // Execute the query + ResultSet rs = stmt.executeQuery("SELECT contractnotice.\"agencyName\", " + + " contractnotice.\"supplierABN\",contractnotice.\"supplierName\",sum(value) as sum " + + "FROM public.contractnotice where contractnotice.\"agencyName\" != 'Department of Defence'" + + " AND contractnotice.\"agencyName\" != 'Defence Materiel Organisation' GROUP BY contractnotice.\"agencyName\", " + + " contractnotice.\"supplierABN\",contractnotice.\"supplierName\""); + String previousAgency = ""; +GraphDatabaseService gds = inserter.getGraphDbService(); +HashMap supplierIDs = new HashMap(); +HashMap agencyIDs = new HashMap(); + + // Loop through the result set + while (rs.next()) { + long supplierID, agencyID; + String supplierKey; + if (agencyIDs.get(rs.getString("agencyName")) == null) { + Node myNode = gds.createNode(); + myNode.setProperty("Label", rs.getString("agencyName")); + myNode.setProperty("type", "agency"); + agencyIDs.put(rs.getString("agencyName"), myNode.getId()); + if (myNode.getId() %100 == 0) { + System.out.println("Agency "+myNode.getId()); +} + } + agencyID = agencyIDs.get(rs.getString("agencyName")); + + + if (rs.getString("supplierABN") != "0" && rs.getString("supplierABN") != "") { + supplierKey = rs.getString("supplierABN"); + } else { + supplierKey = rs.getString("supplierName"); + } + // inject some data + if (supplierIDs.get(supplierKey) == null) { + Node myNode = gds.createNode(); + myNode.setProperty("Label", rs.getString("supplierName")); + myNode.setProperty("type", "supplier"); + supplierIDs.put(supplierKey, myNode.getId()); + if (myNode.getId() %1000 == 0) { + System.out.println("Supplier "+myNode.getId()); +} + } + supplierID = supplierIDs.get(supplierKey); + long rel = inserter.createRelationship(agencyID, supplierID, + DynamicRelationshipType.withName("KNOWS"), null); + inserter.setRelationshipProperty(rel, "Weight", rs.getDouble("sum")); + + } + // Close the result set, statement and the connection + rs.close(); + stmt.close(); + conn.close(); + } catch (SQLException se) { + System.out.println("SQL Exception:"); + + // Loop through the SQL Exceptions + while (se != null) { + System.out.println("State : " + se.getSQLState()); + System.out.println("Message: " + se.getMessage()); + System.out.println("Error : " + se.getErrorCode()); + + se = se.getNextException(); + } + } +//make the changes visible for reading, use this sparsely, requires IO! + labels.flush(); + +// Make sure to shut down the index provider + indexProvider.shutdown(); + inserter.shutdown(); + } + + }