naa import update
naa import update

import org.neo4j.graphdb.DynamicLabel; import org.neo4j.graphdb.DynamicLabel;
import org.neo4j.graphdb.DynamicRelationshipType; import org.neo4j.graphdb.DynamicRelationshipType;
import org.neo4j.graphdb.Label; import org.neo4j.graphdb.Label;
import org.neo4j.unsafe.batchinsert.BatchInserter; import org.neo4j.unsafe.batchinsert.BatchInserter;
import org.neo4j.unsafe.batchinsert.BatchInserters; import org.neo4j.unsafe.batchinsert.BatchInserters;
   
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
   
import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLEventReader; import javax.xml.stream.XMLEventReader;
import javax.xml.stream.events.XMLEvent; import javax.xml.stream.events.XMLEvent;
   
public class StAXSample { public class StAXSample {
   
   
HashMap<String, Long> agencyIDs = new HashMap<String, Long>(); HashMap<String, Long> agencyIDs = new HashMap<String, Long>();
HashMap<String, Boolean> agencyFullVersion = new HashMap<String, Boolean>(); HashMap<String, Boolean> agencyFullVersion = new HashMap<String, Boolean>();
Label agencyLabel = DynamicLabel.label("Agency"); Label agencyLabel = DynamicLabel.label("Agency");
HashMap<String, Long> locationIDs = new HashMap<String, Long>(); HashMap<String, Long> locationIDs = new HashMap<String, Long>();
Label locationLabel = DynamicLabel.label("Location"); Label locationLabel = DynamicLabel.label("Location");
HashMap<String, Long> functionIDs = new HashMap<String, Long>(); HashMap<String, Long> functionIDs = new HashMap<String, Long>();
Label functionLabel = DynamicLabel.label("Function"); Label functionLabel = DynamicLabel.label("Function");
HashMap<String, Long> statusIDs = new HashMap<String, Long>(); HashMap<String, Long> statusIDs = new HashMap<String, Long>();
Label statusLabel = DynamicLabel.label("Location"); Label statusLabel = DynamicLabel.label("Location");
BatchInserter inserter; BatchInserter inserter;
   
private String filename; private String filename;
   
public StAXSample() { public StAXSample() {
} }
   
public static void main(String[] args) { public static void main(String[] args) {
/*if (args.length != 1) { /*if (args.length != 1) {
System.out.println("Usage: StAXSample file.xml"); System.out.println("Usage: StAXSample file.xml");
System.exit(-1); System.exit(-1);
} */ } */
   
StAXSample ss = new StAXSample(); StAXSample ss = new StAXSample();
//ss.setFilename(args[0]); //ss.setFilename(args[0]);
ss.setFilename("agency-sample.xml"); ss.setFilename("agency-sample.xml");
ss.run(); ss.run();
} }
   
public void run() { public void run() {
   
Map<String, String> config = new HashMap<String, String>(); Map<String, String> config = new HashMap<String, String>();
config.put("neostore.nodestore.db.mapped_memory", "90M"); config.put("neostore.nodestore.db.mapped_memory", "90M");
inserter = BatchInserters.inserter("target/batchinserter-example-config", config); inserter = BatchInserters.inserter("target/batchinserter-example-config", config);
inserter.createDeferredSchemaIndex(agencyLabel).on("agency_no"); inserter.createDeferredSchemaIndex(agencyLabel).on("agency_no");
inserter.createDeferredSchemaIndex(locationLabel).on("location_name"); inserter.createDeferredSchemaIndex(locationLabel).on("location_name");
inserter.createDeferredSchemaIndex(functionLabel).on("thesaurus_term"); inserter.createDeferredSchemaIndex(functionLabel).on("thesaurus_term");
inserter.createDeferredSchemaIndex(statusLabel).on("status_name"); inserter.createDeferredSchemaIndex(statusLabel).on("status_name");
   
try { try {
XMLInputFactory xmlif = XMLInputFactory.newInstance(); XMLInputFactory xmlif = XMLInputFactory.newInstance();
xmlif.setProperty( xmlif.setProperty(
XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES,
Boolean.TRUE); Boolean.TRUE);
xmlif.setProperty( xmlif.setProperty(
XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES,
Boolean.FALSE); Boolean.FALSE);
//set the IS_COALESCING property to true //set the IS_COALESCING property to true
//to get whole text data as one event. //to get whole text data as one event.
xmlif.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE); xmlif.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
   
try { try {
XMLEventReader r = null; XMLEventReader r = null;
r = xmlif.createXMLEventReader( r = xmlif.createXMLEventReader(
filename, filename,
//new FileInputStream(new File(xmlFileURL.toURI()))); //new FileInputStream(new File(xmlFileURL.toURI())));
new FileInputStream(new File(filename))); new FileInputStream(new File(filename)));
   
   
//iterate as long as there are more events on the input stream //iterate as long as there are more events on the input stream
Map<String, Object> previousAgency = new HashMap<String, Object>(); Map<String, Object> previousAgency = new HashMap<String, Object>();
while (r.hasNext()) { while (r.hasNext()) {
XMLEvent e = r.nextEvent(); XMLEvent e = r.nextEvent();
   
if (e.isStartElement()) { if (e.isStartElement()) {
if (hasStartTagName(e, "AGENCIES")) { if (hasStartTagName(e, "AGENCIES")) {
System.out.println("Agencies file loaded... "); System.out.println("Agencies file loaded... ");
  } else if (hasStartTagName(e, "AGENCY_NO")) {
  previousAgency.put("agency_no", getCharacters(r));
  } else if (hasStartTagName(e, "TITLE")) {
  String title = getCharacters(r);
  previousAgency.put("name", title);
  previousAgency.put("label", title);
  } else if (hasStartTagName(e, "START_DATE")) {
  String start_date = getCharacters(r);
  if (start_date != null && !start_date.equals(" ") && !start_date.equals("(null)")) {
  previousAgency.put("start_date", Integer.parseInt(start_date));
  }
  } else if (hasStartTagName(e, "START_DATE_QUAL")) {
  previousAgency.put("start_date_qual", getCharacters(r));
  } else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) {
  previousAgency.put("alternative_title", getCharacters(r));
  } else if (hasStartTagName(e, "END_DATE")) {
  String end_date = getCharacters(r);
  if (end_date != null && !end_date.equals(" ") && !end_date.equals("(null)")) {
  previousAgency.put("end_date", Integer.parseInt(end_date));
  }
  } else if (hasStartTagName(e, "END_DATE_QUAL")) {
  previousAgency.put("end_date_qual", getCharacters(r));
  // save agency
  getAgency(previousAgency);
  previousAgency = new HashMap<String, Object>();
  }
  }
  }
  r = xmlif.createXMLEventReader(
  filename,
  //new FileInputStream(new File(xmlFileURL.toURI())));
  new FileInputStream(new File(filename)));
  while (r.hasNext()) {
  XMLEvent e = r.nextEvent();
   
  if (e.isStartElement()) {
  if (hasStartTagName(e, "AGENCIES")) {
  System.out.println("Agencies file loaded again... ");
} else if (hasStartTagName(e, "AGENCY_LINK")) { } else if (hasStartTagName(e, "AGENCY_LINK")) {
processAgencyLink(r); processAgencyLink(r);
} else if (hasStartTagName(e, "AGENCY_LOCATION")) { } else if (hasStartTagName(e, "AGENCY_LOCATION")) {
processAgencyLocation(r); processAgencyLocation(r);
} else if (hasStartTagName(e, "AGENCY_FUNCTION")) { } else if (hasStartTagName(e, "AGENCY_FUNCTION")) {
processAgencyFunction(r); processAgencyFunction(r);
} else if (hasStartTagName(e, "AGENCY_STATUS")) { } else if (hasStartTagName(e, "AGENCY_STATUS")) {
processAgencyStatus(r); processAgencyStatus(r);
/* TODO /* TODO
Unhandled tag: AGENCY_NOTE content: Unhandled tag: AGENCY_NOTE content:
   
Unhandled tag: NOTE_AGENCY_NO content:CA 4886 Unhandled tag: NOTE_AGENCY_NO content:CA 4886
Unhandled tag: NOTE_TYPE content:Archivists note Unhandled tag: NOTE_TYPE content:Archivists note
Unhandled tag: NOTE content:null Unhandled tag: NOTE content:null
Unhandled tag: head content: Unhandled tag: head content:
   
Unexpected character 'C' (code 67) in start tag Expected a quote Unexpected character 'C' (code 67) in start tag Expected a quote
at [row,col,system-id]: [1093387,18,"agency-sample.xml"] at [row,col,system-id]: [1093387,18,"agency-sample.xml"]
*/ */
} else if (hasStartTagName(e, "AGENCY_NO")) {  
previousAgency.put("agency_no", getCharacters(r));  
} else if (hasStartTagName(e, "TITLE")) {  
String title = getCharacters(r);  
previousAgency.put("name", title);  
previousAgency.put("label", title);  
} else if (hasStartTagName(e, "START_DATE")) {  
previousAgency.put("start_date", getCharacters(r));  
} else if (hasStartTagName(e, "START_DATE_QUAL")) {  
previousAgency.put("start_date_qual", getCharacters(r));  
} else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) {  
previousAgency.put("alternative_title", getCharacters(r));  
} else if (hasStartTagName(e, "END_DATE")) {  
previousAgency.put("end_date", getCharacters(r));  
} else if (hasStartTagName(e, "END_DATE_QUAL")) {  
previousAgency.put("end_date_qual", getCharacters(r));  
// save agency  
getAgency(previousAgency);  
previousAgency = new HashMap<String, Object>();  
} else {  
System.out.println("Unhandled tag: " + getStartTagName(e) + " content:" + getCharacters(r));  
} }
} }
} }
} catch (XMLStreamException ex) { } catch (XMLStreamException ex) {
System.out.println(ex.getMessage()); System.out.println(ex.getMessage());
   
if (ex.getNestedException() != null) { if (ex.getNestedException() != null) {
ex.getNestedException().printStackTrace(); ex.getNestedException().printStackTrace();
} }
} }
   
} catch (FileNotFoundException ex) { } catch (FileNotFoundException ex) {
System.err.println("Error. Cannot find \"" + filename + "\" in classpath."); System.err.println("Error. Cannot find \"" + filename + "\" in classpath.");
ex.printStackTrace(); ex.printStackTrace();
} catch (Exception ex) { } catch (Exception ex) {
ex.printStackTrace(); ex.printStackTrace();
} }
   
inserter.shutdown(); inserter.shutdown();
} }
   
private long getAgency(Map<String, Object> properties) { private long getAgency(Map<String, Object> properties) {
  if (properties.get("agency_no") == null || properties.get("agency_no") == "(null)" || properties.get("agency_no") == " ") {
  return 0;
  }
if (agencyIDs.get(properties.get("agency_no").toString()) == null) { if (agencyIDs.get(properties.get("agency_no").toString()) == null) {
long agencyID = inserter.createNode(properties, agencyLabel); long agencyID = inserter.createNode(properties, agencyLabel);
if (properties.values().size() > 2) { /*if (properties.values().size() > 1) {
agencyFullVersion.put(properties.get("agency_no").toString(), true); agencyFullVersion.put(properties.get("agency_no").toString(), true);
} } */
agencyIDs.put(properties.get("agency_no").toString(), agencyID); agencyIDs.put(properties.get("agency_no").toString(), agencyID);
//if (agencyID % 10 == 0) { //if (agencyID % 10 == 0) {
System.out.println("Agency #"+agencyID); System.out.println("Agency #"+agencyID);
//} //}
return agencyID; return agencyID;
} else { } else {
long agencyID = agencyIDs.get(properties.get("agency_no").toString()); long agencyID = agencyIDs.get(properties.get("agency_no").toString());
if (properties.values().size() > 2 && agencyFullVersion.get(properties.get("agency_no")) == null) { /*if (properties.values().size() > 1 && agencyFullVersion.get(properties.get("agency_no")) == null) {
inserter.setNodeProperties(agencyID, properties); inserter.setNodeProperties(agencyID, properties);
agencyFullVersion.put(properties.get("agency_no").toString(), true); agencyFullVersion.put(properties.get("agency_no").toString(), true);
} } */
return agencyID; return agencyID;
} }
} }
   
private long getLocation(String locationName) { private long getLocation(String locationName) {
if (locationIDs.get(locationName) == null) { if (locationIDs.get(locationName) == null) {
HashMap properties = new HashMap< String,Object > (); HashMap properties = new HashMap< String,Object > ();
properties.put("name", locationName); properties.put("name", locationName);
properties.put("label", locationName); properties.put("label", locationName);
long locationID = inserter.createNode(properties, locationLabel); long locationID = inserter.createNode(properties, locationLabel);
locationIDs.put(locationName, locationID); locationIDs.put(locationName, locationID);
return locationID; return locationID;
} else { } else {
return locationIDs.get(locationName); return locationIDs.get(locationName);
} }
} }
private long getFunction(String functionName) { private long getFunction(String functionName) {
if (functionIDs.get(functionName) == null) { if (functionIDs.get(functionName) == null) {
HashMap properties = new HashMap< String,Object > (); HashMap properties = new HashMap< String,Object > ();
properties.put("name", functionName); properties.put("name", functionName);
properties.put("label", functionName); properties.put("label", functionName);
long functionID = inserter.createNode(properties, functionLabel); long functionID = inserter.createNode(properties, functionLabel);
functionIDs.put(functionName, functionID); functionIDs.put(functionName, functionID);
return functionID; return functionID;
} else { } else {
return functionIDs.get(functionName); return functionIDs.get(functionName);
} }
} }
private long getStatus(String statusName) { private long getStatus(String statusName) {
if (statusIDs.get(statusName) == null) { if (statusIDs.get(statusName) == null) {
HashMap properties = new HashMap< String,Object > (); HashMap properties = new HashMap< String,Object > ();
properties.put("name", statusName); properties.put("name", statusName);
properties.put("label", statusName); properties.put("label", statusName);
long statusID = inserter.createNode(properties, statusLabel); long statusID = inserter.createNode(properties, statusLabel);
statusIDs.put(statusName, statusID); statusIDs.put(statusName, statusID);
return statusID; return statusID;
} else { } else {
return statusIDs.get(statusName); return statusIDs.get(statusName);
} }
} }
   
private void processAgencyLink(XMLEventReader rdr) throws Exception { private void processAgencyLink(XMLEventReader rdr) throws Exception {
String agency_from_no = null; String agency_from_no = null;
String agency_to_no = null; String agency_to_no = null;
String link_type = null; String link_type = null;
String start_date = null; String start_date = null;
String start_date_qual = null; String start_date_qual = null;
String end_date = null; String end_date = null;
String end_date_qual = null; String end_date_qual = null;
   
while (rdr.hasNext()) { while (rdr.hasNext()) {
XMLEvent e = rdr.nextEvent(); XMLEvent e = rdr.nextEvent();
if (e.isStartElement()) { if (e.isStartElement()) {
if (hasStartTagName(e, "LINK_AGENCY_NO")) { if (hasStartTagName(e, "LINK_AGENCY_NO")) {
agency_from_no = getCharacters(rdr); agency_from_no = getCharacters(rdr);
} else if (hasStartTagName(e, "LINK_TO_AGENCY_NO")) { } else if (hasStartTagName(e, "LINK_TO_AGENCY_NO")) {
agency_to_no = getCharacters(rdr); agency_to_no = getCharacters(rdr);
} else if (hasStartTagName(e, "LINK_TYPE")) { } else if (hasStartTagName(e, "LINK_TYPE")) {
link_type = getCharacters(rdr); link_type = getCharacters(rdr);
} else if (hasStartTagName(e, "START_DATE")) { } else if (hasStartTagName(e, "START_DATE")) {
start_date = getCharacters(rdr); start_date = getCharacters(rdr);
}else if (hasStartTagName(e, "START_DATE_QUAL")) { }else if (hasStartTagName(e, "START_DATE_QUAL")) {
start_date_qual = getCharacters(rdr); start_date_qual = getCharacters(rdr);
}else if (hasStartTagName(e, "END_DATE")) { }else if (hasStartTagName(e, "END_DATE")) {
end_date = getCharacters(rdr); end_date = getCharacters(rdr);
}else if (hasStartTagName(e, "END_DATE_QUAL")) { }else if (hasStartTagName(e, "END_DATE_QUAL")) {
end_date_qual = getCharacters(rdr); end_date_qual = getCharacters(rdr);
} }
} }
if (e.isEndElement()) { if (e.isEndElement()) {
if (hasEndTagName(e, "AGENCY_LINK")) { if (hasEndTagName(e, "AGENCY_LINK")) {
   
//System.out.println("Finished processing link: type = " + link_type+ "; from = " + agency_from_no + "; to = " + agency_to_no); //System.out.println("Finished processing link: type = " + link_type+ "; from = " + agency_from_no + "; to = " + agency_to_no);
long agencyFromID, agencyToID; if (agency_to_no != null && !agency_to_no.equals("(null)")) {
Map<String, Object> agencyFromProperties = new HashMap<String, Object>(); long agencyFromID, agencyToID;
agencyFromProperties.put("agency_no",agency_from_no); Map<String, Object> agencyFromProperties = new HashMap<String, Object>();
agencyFromID = getAgency(agencyFromProperties); agencyFromProperties.put("agency_from_no", agency_from_no);
Map<String, Object> agencyToProperties = new HashMap<String, Object>(); agencyFromID = getAgency(agencyFromProperties);
agencyToProperties.put("agency_no",agency_to_no); Map<String, Object> agencyToProperties = new HashMap<String, Object>();
agencyToID = getAgency(agencyToProperties); agencyToProperties.put("agency_to_no", agency_to_no);
Map<String, Object> relProperties = new HashMap<String, Object>(); agencyToID = getAgency(agencyToProperties);
relProperties.put("link_type", link_type); Map<String, Object> relProperties = new HashMap<String, Object>();
relProperties.put("start_date", start_date); relProperties.put("link_type", link_type);
if (start_date_qual != null && !start_date_qual.equals("(null)")) { if (start_date != null && !start_date.equals("(null)")) {
relProperties.put("start_date_qual", start_date_qual); relProperties.put("start_date", Integer.parseInt(start_date));
} }
if (end_date != null && !end_date.equals("(null)")) { if (start_date_qual != null && !start_date_qual.equals("(null)")) {
relProperties.put("end_date", end_date); relProperties.put("start_date_qual", start_date_qual);
} }
if (end_date_qual != null && !end_date_qual.equals("(null)")) { if (end_date != null && !end_date.equals("(null)")) {
relProperties.put("end_date_qual", end_date_qual); relProperties.put("end_date", Integer.parseInt(end_date));
} }
inserter.createRelationship(agencyFromID, agencyToID, if (end_date_qual != null && !end_date_qual.equals("(null)")) {
DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties); relProperties.put("end_date_qual", end_date_qual);
  }
break; inserter.createRelationship(agencyFromID, agencyToID,
  DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties);
  }
  break;
   
} }
} }
} }
} }
   
private void processAgencyLocation(XMLEventReader rdr) throws Exception { private void processAgencyLocation(XMLEventReader rdr) throws Exception {
String of = nul