[contractdashboard.git] / admin / neo4jimporter / src / main / java /
blob:a/admin/neo4jimporter/src/main/java/ -> blob:b/admin/neo4jimporter/src/main/java/
--- a/admin/neo4jimporter/src/main/java/
+++ b/admin/neo4jimporter/src/main/java/
@@ -1,3 +1,4 @@
 import java.math.BigInteger;
@@ -12,144 +13,145 @@
 import java.util.Map;
 import org.neo4j.graphdb.DynamicRelationshipType;
+import org.neo4j.graphdb.GraphDatabaseService;
+import org.neo4j.graphdb.Node;
+import org.neo4j.graphdb.index.BatchInserterIndex;
+import org.neo4j.graphdb.index.BatchInserterIndexProvider;
+import org.neo4j.helpers.collection.MapUtil;
 import org.neo4j.kernel.impl.batchinsert.BatchInserter;
 import org.neo4j.kernel.impl.batchinsert.BatchInserterImpl;
+import org.neo4j.index.impl.lucene.*;
 public class Importer {
-	public static void main(String[] argv) {
-		 Map<String,String> props = new HashMap<String, String>();
-		 props.put("neostore.nodestore.db.mapped_memory", "22000000"); // <expected number of nodes * 9 bytes>
-		 props.put("neostore.relationshipstore.db.mapped_memory", "22000000"); // <expected number of relationships * 33 bytes>
-		  // create the batch inserter 
-		BatchInserter inserter = new
-		  BatchInserterImpl("neo4j-db/",props
-		 );
+    public static void main(String[] argv) {
+        BatchInserter inserter = new BatchInserterImpl("target/neo4jdb-batchinsert");
+        BatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(inserter);
+        BatchInserterIndex labels = indexProvider.nodeIndex("labels", MapUtil.stringMap("type", "exact"));
+        labels.setCacheCapacity("Label", 100000);
-		System.out.println("-------- PostgreSQL "
-				+ "JDBC Connection Testing ------------");
-		try {
+        System.out.println("-------- PostgreSQL "
+                + "JDBC Connection Testing ------------");
-			Class.forName("org.postgresql.Driver");
+        try {
-		} catch (ClassNotFoundException e) {
+            Class.forName("org.postgresql.Driver");
-			System.out.println("Where is your PostgreSQL JDBC Driver? "
-					+ "Include in your library path!");
-			e.printStackTrace();
+        } catch (ClassNotFoundException e) {
-		}
+            System.out.println("Where is your PostgreSQL JDBC Driver? "
+                    + "Include in your library path!");
+            e.printStackTrace();
-		System.out.println("PostgreSQL JDBC Driver Registered!");
+        }
-		Connection conn = null;
+        System.out.println("PostgreSQL JDBC Driver Registered!");
-		try {
+        Connection conn = null;
-			conn = DriverManager.getConnection(
-					"jdbc:postgresql://",
-					"postgres", "snmc");
+        try {
-		} catch (SQLException e) {
+            conn = DriverManager.getConnection(
+                    "jdbc:postgresql://",
+                    "postgres", "snmc");
-			System.out.println("Connection Failed! Check output console");
-			e.printStackTrace();
+        } catch (SQLException e) {
-		}
+            System.out.println("Connection Failed! Check output console");
+            e.printStackTrace();
-		if (conn != null) {
-			System.out.println("You made it, take control your database now!");
-		} else {
-			System.out.println("Failed to make connection!");
-		}
-		try {
-			// Print all warnings
-			for (SQLWarning warn = conn.getWarnings(); warn != null; warn = warn
-					.getNextWarning()) {
-				System.out.println("SQL Warning:");
-				System.out.println("State  : " + warn.getSQLState());
-				System.out.println("Message: " + warn.getMessage());
-				System.out.println("Error  : " + warn.getErrorCode());
-			}
+        }
-			// Get a statement from the connection
-			Statement stmt = conn.createStatement();
+        if (conn != null) {
+            System.out.println("You made it, take control your database now!");
+        } else {
+            System.out.println("Failed to make connection!");
+        }
+        try {
+            // Print all warnings
+            for (SQLWarning warn = conn.getWarnings(); warn != null; warn = warn.getNextWarning()) {
+                System.out.println("SQL Warning:");
+                System.out.println("State  : " + warn.getSQLState());
+                System.out.println("Message: " + warn.getMessage());
+                System.out.println("Error  : " + warn.getErrorCode());
+            }
-			// Execute the query
-			ResultSet rs = stmt.executeQuery("SELECT distinct contractnotice.\"agencyName\",   contractnotice.\"supplierABN\",   contractnotice.\"supplierName\" FROM  public.contractnotice limit 300;");
-String previousAgency = "";
+            // Get a statement from the connection
+            Statement stmt = conn.createStatement();
-			// Loop through the result set
-			while ( {
-				long supplier,agency;
-				agency = doHash(rs.getString("agencyName"));
-				if (rs.getString("agencyName") != previousAgency) {
-					if (!inserter.nodeExists(agency)) {
-					Map<String, Object> properties = new HashMap<String, Object>();
-					  properties.put("Label", rs.getString("agencyName"));
-					  inserter.createNode(agency, properties);
-					}
-				}
-				if (rs.getString("supplierABN") != "0") {
-					supplier = doHash(rs.getString("supplierABN"));
-				} else {
-				supplier = doHash(rs.getString("supplierName"));
-				}
-				  // inject some data 
-			if (!inserter.nodeExists(supplier)) {
-				Map<String, Object> properties = new HashMap<String, Object>();
-				  properties.put("Label", rs.getString("supplierName"));
-				  inserter.createNode(supplier, properties);
-			}
-				  inserter.createRelationship(agency, supplier,
-				  DynamicRelationshipType.withName("KNOWS"), null);
-			}
-			// Close the result set, statement and the connection
-			rs.close();
-			stmt.close();
-			conn.close();
-		} catch (SQLException se) {
-			System.out.println("SQL Exception:");
+            // Execute the query
+            ResultSet rs = stmt.executeQuery("SELECT contractnotice.\"agencyName\", "
+                    + "  contractnotice.\"supplierABN\",contractnotice.\"supplierName\",sum(value) as sum "
+                    + "FROM  public.contractnotice where contractnotice.\"agencyName\" != 'Department of Defence'"
+                    + " AND contractnotice.\"agencyName\" != 'Defence Materiel Organisation' GROUP BY contractnotice.\"agencyName\", "
+                    + "  contractnotice.\"supplierABN\",contractnotice.\"supplierName\"");
+            String previousAgency = "";
+            GraphDatabaseService gds = inserter.getGraphDbService();
+            HashMap<String, Long> supplierIDs = new HashMap<String, Long>();
+            HashMap<String, Long> agencyIDs = new HashMap<String, Long>();
-			// Loop through the SQL Exceptions
-			while (se != null) {
-				System.out.println("State  : " + se.getSQLState());
-				System.out.println("Message: " + se.getMessage());
-				System.out.println("Error  : " + se.getErrorCode());
-				se = se.getNextException();
-			}
-		} catch (Exception e) {
-			System.out.println(e);
-		}
-		  // shutdown, makes sure all changes are written to disk
-		  inserter.shutdown();
-	}
-	static long doHash(String input) {
-		MessageDigest m;
-		try {
-			m = MessageDigest.getInstance("MD5");
-			m.reset();
-			m.update(input.getBytes());
-			byte[] digest = m.digest();
-			return new BigInteger(1, digest).longValue();
-		} catch (NoSuchAlgorithmException e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		}
-		return 0;
-	}
+            // Loop through the result set
+            while ( {
+                long supplierID, agencyID;
+                String supplierKey;
+                if (agencyIDs.get(rs.getString("agencyName")) == null) {
+                    Node myNode = gds.createNode();
+                    myNode.setProperty("Label", rs.getString("agencyName"));
+                    myNode.setProperty("type", "agency");
+                    agencyIDs.put(rs.getString("agencyName"), myNode.getId());
+                    if (myNode.getId() % 100 == 0) {
+                        System.out.println("Agency " + myNode.getId());
+                    }
+                }
+                agencyID = agencyIDs.get(rs.getString("agencyName"));
+                if (rs.getString("supplierABN") != "0" && rs.getString("supplierABN") != "") {
+                    supplierKey = rs.getString("supplierABN");
+                } else {
+                    supplierKey = rs.getString("supplierName");
+                }
+                // inject some data 
+                if (supplierIDs.get(supplierKey) == null) {
+                    Node myNode = gds.createNode();
+                    myNode.setProperty("Label", rs.getString("supplierName"));
+                    myNode.setProperty("type", "supplier");
+                    supplierIDs.put(supplierKey, myNode.getId());
+                    if (myNode.getId() % 1000 == 0) {
+                        System.out.println("Supplier " + myNode.getId());
+                    }
+                }
+                supplierID = supplierIDs.get(supplierKey);
+                long rel = inserter.createRelationship(agencyID, supplierID,
+                        DynamicRelationshipType.withName("KNOWS"), null);
+                inserter.setRelationshipProperty(rel, "Weight", rs.getDouble("sum"));
+            }
+            // Close the result set, statement and the connection
+            rs.close();
+            stmt.close();
+            conn.close();
+        } catch (SQLException se) {
+            System.out.println("SQL Exception:");
+            // Loop through the SQL Exceptions
+            while (se != null) {
+                System.out.println("State  : " + se.getSQLState());
+                System.out.println("Message: " + se.getMessage());
+                System.out.println("Error  : " + se.getErrorCode());
+                se = se.getNextException();
+            }
+        }
+//make the changes visible for reading, use this sparsely, requires IO!
+        labels.flush();
+// Make sure to shut down the index provider
+        indexProvider.shutdown();
+        inserter.shutdown();
+    }