import org.neo4j.graphdb.DynamicLabel; |
import org.neo4j.graphdb.DynamicLabel; |
import org.neo4j.graphdb.DynamicRelationshipType; |
import org.neo4j.graphdb.DynamicRelationshipType; |
import org.neo4j.graphdb.Label; |
import org.neo4j.graphdb.Label; |
import org.neo4j.unsafe.batchinsert.BatchInserter; |
import org.neo4j.unsafe.batchinsert.BatchInserter; |
import org.neo4j.unsafe.batchinsert.BatchInserters; |
import org.neo4j.unsafe.batchinsert.BatchInserters; |
|
|
import java.io.File; |
import java.io.File; |
import java.io.FileInputStream; |
import java.io.FileInputStream; |
import java.io.FileNotFoundException; |
import java.io.FileNotFoundException; |
import java.util.HashMap; |
import java.util.HashMap; |
import java.util.Map; |
import java.util.Map; |
|
|
import javax.xml.stream.XMLInputFactory; |
import javax.xml.stream.XMLInputFactory; |
import javax.xml.stream.XMLStreamException; |
import javax.xml.stream.XMLStreamException; |
import javax.xml.stream.XMLEventReader; |
import javax.xml.stream.XMLEventReader; |
import javax.xml.stream.events.XMLEvent; |
import javax.xml.stream.events.XMLEvent; |
|
|
public class StAXSample { |
public class StAXSample { |
|
|
|
|
HashMap<String, Long> agencyIDs = new HashMap<String, Long>(); |
HashMap<String, Long> agencyIDs = new HashMap<String, Long>(); |
HashMap<String, Boolean> agencyFullVersion = new HashMap<String, Boolean>(); |
HashMap<String, Boolean> agencyFullVersion = new HashMap<String, Boolean>(); |
Label agencyLabel = DynamicLabel.label("Agency"); |
Label agencyLabel = DynamicLabel.label("Agency"); |
HashMap<String, Long> locationIDs = new HashMap<String, Long>(); |
HashMap<String, Long> locationIDs = new HashMap<String, Long>(); |
Label locationLabel = DynamicLabel.label("Location"); |
Label locationLabel = DynamicLabel.label("Location"); |
HashMap<String, Long> functionIDs = new HashMap<String, Long>(); |
HashMap<String, Long> functionIDs = new HashMap<String, Long>(); |
Label functionLabel = DynamicLabel.label("Function"); |
Label functionLabel = DynamicLabel.label("Function"); |
HashMap<String, Long> statusIDs = new HashMap<String, Long>(); |
HashMap<String, Long> statusIDs = new HashMap<String, Long>(); |
Label statusLabel = DynamicLabel.label("Location"); |
Label statusLabel = DynamicLabel.label("Location"); |
BatchInserter inserter; |
BatchInserter inserter; |
|
|
private String filename; |
private String filename; |
|
|
public StAXSample() { |
public StAXSample() { |
} |
} |
|
|
public static void main(String[] args) { |
public static void main(String[] args) { |
/*if (args.length != 1) { |
/*if (args.length != 1) { |
System.out.println("Usage: StAXSample file.xml"); |
System.out.println("Usage: StAXSample file.xml"); |
System.exit(-1); |
System.exit(-1); |
} */ |
} */ |
|
|
StAXSample ss = new StAXSample(); |
StAXSample ss = new StAXSample(); |
//ss.setFilename(args[0]); |
//ss.setFilename(args[0]); |
ss.setFilename("agency-sample.xml"); |
ss.setFilename("agency-sample.xml"); |
ss.run(); |
ss.run(); |
} |
} |
|
|
public void run() { |
public void run() { |
|
|
Map<String, String> config = new HashMap<String, String>(); |
Map<String, String> config = new HashMap<String, String>(); |
config.put("neostore.nodestore.db.mapped_memory", "90M"); |
config.put("neostore.nodestore.db.mapped_memory", "90M"); |
inserter = BatchInserters.inserter("target/batchinserter-example-config", config); |
inserter = BatchInserters.inserter("target/batchinserter-example-config", config); |
inserter.createDeferredSchemaIndex(agencyLabel).on("agency_no"); |
inserter.createDeferredSchemaIndex(agencyLabel).on("agency_no"); |
inserter.createDeferredSchemaIndex(locationLabel).on("location_name"); |
inserter.createDeferredSchemaIndex(locationLabel).on("location_name"); |
inserter.createDeferredSchemaIndex(functionLabel).on("thesaurus_term"); |
inserter.createDeferredSchemaIndex(functionLabel).on("thesaurus_term"); |
inserter.createDeferredSchemaIndex(statusLabel).on("status_name"); |
inserter.createDeferredSchemaIndex(statusLabel).on("status_name"); |
|
|
try { |
try { |
XMLInputFactory xmlif = XMLInputFactory.newInstance(); |
XMLInputFactory xmlif = XMLInputFactory.newInstance(); |
xmlif.setProperty( |
xmlif.setProperty( |
XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, |
XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, |
Boolean.TRUE); |
Boolean.TRUE); |
xmlif.setProperty( |
xmlif.setProperty( |
XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, |
XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, |
Boolean.FALSE); |
Boolean.FALSE); |
//set the IS_COALESCING property to true |
//set the IS_COALESCING property to true |
//to get whole text data as one event. |
//to get whole text data as one event. |
xmlif.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE); |
xmlif.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE); |
|
|
try { |
try { |
XMLEventReader r = null; |
XMLEventReader r = null; |
r = xmlif.createXMLEventReader( |
r = xmlif.createXMLEventReader( |
filename, |
filename, |
//new FileInputStream(new File(xmlFileURL.toURI()))); |
//new FileInputStream(new File(xmlFileURL.toURI()))); |
new FileInputStream(new File(filename))); |
new FileInputStream(new File(filename))); |
|
|
|
|
//iterate as long as there are more events on the input stream |
//iterate as long as there are more events on the input stream |
Map<String, Object> previousAgency = new HashMap<String, Object>(); |
Map<String, Object> previousAgency = new HashMap<String, Object>(); |
while (r.hasNext()) { |
while (r.hasNext()) { |
XMLEvent e = r.nextEvent(); |
XMLEvent e = r.nextEvent(); |
|
|
if (e.isStartElement()) { |
if (e.isStartElement()) { |
if (hasStartTagName(e, "AGENCIES")) { |
if (hasStartTagName(e, "AGENCIES")) { |
System.out.println("Agencies file loaded... "); |
System.out.println("Agencies file loaded... "); |
|
} else if (hasStartTagName(e, "AGENCY_NO")) { |
|
previousAgency.put("agency_no", getCharacters(r)); |
|
} else if (hasStartTagName(e, "TITLE")) { |
|
String title = getCharacters(r); |
|
previousAgency.put("name", title); |
|
previousAgency.put("label", title); |
|
} else if (hasStartTagName(e, "START_DATE")) { |
|
String start_date = getCharacters(r); |
|
if (start_date != null && !start_date.equals(" ") && !start_date.equals("(null)")) { |
|
previousAgency.put("start_date", Integer.parseInt(start_date)); |
|
} |
|
} else if (hasStartTagName(e, "START_DATE_QUAL")) { |
|
previousAgency.put("start_date_qual", getCharacters(r)); |
|
} else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) { |
|
previousAgency.put("alternative_title", getCharacters(r)); |
|
} else if (hasStartTagName(e, "END_DATE")) { |
|
String end_date = getCharacters(r); |
|
if (end_date != null && !end_date.equals(" ") && !end_date.equals("(null)")) { |
|
previousAgency.put("end_date", Integer.parseInt(end_date)); |
|
} |
|
} else if (hasStartTagName(e, "END_DATE_QUAL")) { |
|
previousAgency.put("end_date_qual", getCharacters(r)); |
|
// save agency |
|
getAgency(previousAgency); |
|
previousAgency = new HashMap<String, Object>(); |
|
} |
|
} |
|
} |
|
r = xmlif.createXMLEventReader( |
|
filename, |
|
//new FileInputStream(new File(xmlFileURL.toURI()))); |
|
new FileInputStream(new File(filename))); |
|
while (r.hasNext()) { |
|
XMLEvent e = r.nextEvent(); |
|
|
|
if (e.isStartElement()) { |
|
if (hasStartTagName(e, "AGENCIES")) { |
|
System.out.println("Agencies file loaded again... "); |
} else if (hasStartTagName(e, "AGENCY_LINK")) { |
} else if (hasStartTagName(e, "AGENCY_LINK")) { |
processAgencyLink(r); |
processAgencyLink(r); |
} else if (hasStartTagName(e, "AGENCY_LOCATION")) { |
} else if (hasStartTagName(e, "AGENCY_LOCATION")) { |
processAgencyLocation(r); |
processAgencyLocation(r); |
} else if (hasStartTagName(e, "AGENCY_FUNCTION")) { |
} else if (hasStartTagName(e, "AGENCY_FUNCTION")) { |
processAgencyFunction(r); |
processAgencyFunction(r); |
} else if (hasStartTagName(e, "AGENCY_STATUS")) { |
} else if (hasStartTagName(e, "AGENCY_STATUS")) { |
processAgencyStatus(r); |
processAgencyStatus(r); |
/* TODO |
/* TODO |
Unhandled tag: AGENCY_NOTE content: |
Unhandled tag: AGENCY_NOTE content: |
|
|
Unhandled tag: NOTE_AGENCY_NO content:CA 4886 |
Unhandled tag: NOTE_AGENCY_NO content:CA 4886 |
Unhandled tag: NOTE_TYPE content:Archivists note |
Unhandled tag: NOTE_TYPE content:Archivists note |
Unhandled tag: NOTE content:null |
Unhandled tag: NOTE content:null |
Unhandled tag: head content: |
Unhandled tag: head content: |
|
|
Unexpected character 'C' (code 67) in start tag Expected a quote |
Unexpected character 'C' (code 67) in start tag Expected a quote |
at [row,col,system-id]: [1093387,18,"agency-sample.xml"] |
at [row,col,system-id]: [1093387,18,"agency-sample.xml"] |
*/ |
*/ |
} else if (hasStartTagName(e, "AGENCY_NO")) { |
|
previousAgency.put("agency_no", getCharacters(r)); |
|
} else if (hasStartTagName(e, "TITLE")) { |
|
String title = getCharacters(r); |
|
previousAgency.put("name", title); |
|
previousAgency.put("label", title); |
|
} else if (hasStartTagName(e, "START_DATE")) { |
|
previousAgency.put("start_date", getCharacters(r)); |
|
} else if (hasStartTagName(e, "START_DATE_QUAL")) { |
|
previousAgency.put("start_date_qual", getCharacters(r)); |
|
} else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) { |
|
previousAgency.put("alternative_title", getCharacters(r)); |
|
} else if (hasStartTagName(e, "END_DATE")) { |
|
previousAgency.put("end_date", getCharacters(r)); |
|
} else if (hasStartTagName(e, "END_DATE_QUAL")) { |
|
previousAgency.put("end_date_qual", getCharacters(r)); |
|
// save agency |
|
getAgency(previousAgency); |
|
previousAgency = new HashMap<String, Object>(); |
|
} else { |
|
System.out.println("Unhandled tag: " + getStartTagName(e) + " content:" + getCharacters(r)); |
|
} |
} |
} |
} |
} |
} |
} catch (XMLStreamException ex) { |
} catch (XMLStreamException ex) { |
System.out.println(ex.getMessage()); |
System.out.println(ex.getMessage()); |
|
|
if (ex.getNestedException() != null) { |
if (ex.getNestedException() != null) { |
ex.getNestedException().printStackTrace(); |
ex.getNestedException().printStackTrace(); |
} |
} |
} |
} |
|
|
} catch (FileNotFoundException ex) { |
} catch (FileNotFoundException ex) { |
System.err.println("Error. Cannot find \"" + filename + "\" in classpath."); |
System.err.println("Error. Cannot find \"" + filename + "\" in classpath."); |
ex.printStackTrace(); |
ex.printStackTrace(); |
} catch (Exception ex) { |
} catch (Exception ex) { |
ex.printStackTrace(); |
ex.printStackTrace(); |
} |
} |
|
|
inserter.shutdown(); |
inserter.shutdown(); |
} |
} |
|
|
private long getAgency(Map<String, Object> properties) { |
private long getAgency(Map<String, Object> properties) { |
|
if (properties.get("agency_no") == null || properties.get("agency_no") == "(null)" || properties.get("agency_no") == " ") { |
|
return 0; |
|
} |
if (agencyIDs.get(properties.get("agency_no").toString()) == null) { |
if (agencyIDs.get(properties.get("agency_no").toString()) == null) { |
long agencyID = inserter.createNode(properties, agencyLabel); |
long agencyID = inserter.createNode(properties, agencyLabel); |
if (properties.values().size() > 2) { |
/*if (properties.values().size() > 1) { |
agencyFullVersion.put(properties.get("agency_no").toString(), true); |
agencyFullVersion.put(properties.get("agency_no").toString(), true); |
} |
} */ |
agencyIDs.put(properties.get("agency_no").toString(), agencyID); |
agencyIDs.put(properties.get("agency_no").toString(), agencyID); |
//if (agencyID % 10 == 0) { |
//if (agencyID % 10 == 0) { |
System.out.println("Agency #"+agencyID); |
System.out.println("Agency #"+agencyID); |
//} |
//} |
return agencyID; |
return agencyID; |
} else { |
} else { |
long agencyID = agencyIDs.get(properties.get("agency_no").toString()); |
long agencyID = agencyIDs.get(properties.get("agency_no").toString()); |
if (properties.values().size() > 2 && agencyFullVersion.get(properties.get("agency_no")) == null) { |
/*if (properties.values().size() > 1 && agencyFullVersion.get(properties.get("agency_no")) == null) { |
inserter.setNodeProperties(agencyID, properties); |
inserter.setNodeProperties(agencyID, properties); |
agencyFullVersion.put(properties.get("agency_no").toString(), true); |
agencyFullVersion.put(properties.get("agency_no").toString(), true); |
} |
} */ |
return agencyID; |
return agencyID; |
} |
} |
} |
} |
|
|
private long getLocation(String locationName) { |
private long getLocation(String locationName) { |
if (locationIDs.get(locationName) == null) { |
if (locationIDs.get(locationName) == null) { |
HashMap properties = new HashMap< String,Object > (); |
HashMap properties = new HashMap< String,Object > (); |
properties.put("name", locationName); |
properties.put("name", locationName); |
properties.put("label", locationName); |
properties.put("label", locationName); |
long locationID = inserter.createNode(properties, locationLabel); |
long locationID = inserter.createNode(properties, locationLabel); |
locationIDs.put(locationName, locationID); |
locationIDs.put(locationName, locationID); |
return locationID; |
return locationID; |
} else { |
} else { |
return locationIDs.get(locationName); |
return locationIDs.get(locationName); |
} |
} |
} |
} |
private long getFunction(String functionName) { |
private long getFunction(String functionName) { |
if (functionIDs.get(functionName) == null) { |
if (functionIDs.get(functionName) == null) { |
HashMap properties = new HashMap< String,Object > (); |
HashMap properties = new HashMap< String,Object > (); |
properties.put("name", functionName); |
properties.put("name", functionName); |
properties.put("label", functionName); |
properties.put("label", functionName); |
long functionID = inserter.createNode(properties, functionLabel); |
long functionID = inserter.createNode(properties, functionLabel); |
functionIDs.put(functionName, functionID); |
functionIDs.put(functionName, functionID); |
return functionID; |
return functionID; |
} else { |
} else { |
return functionIDs.get(functionName); |
return functionIDs.get(functionName); |
} |
} |
} |
} |
private long getStatus(String statusName) { |
private long getStatus(String statusName) { |
if (statusIDs.get(statusName) == null) { |
if (statusIDs.get(statusName) == null) { |
HashMap properties = new HashMap< String,Object > (); |
HashMap properties = new HashMap< String,Object > (); |
properties.put("name", statusName); |
properties.put("name", statusName); |
properties.put("label", statusName); |
properties.put("label", statusName); |
long statusID = inserter.createNode(properties, statusLabel); |
long statusID = inserter.createNode(properties, statusLabel); |
statusIDs.put(statusName, statusID); |
statusIDs.put(statusName, statusID); |
return statusID; |
return statusID; |
} else { |
} else { |
return statusIDs.get(statusName); |
return statusIDs.get(statusName); |
} |
} |
} |
} |
|
|
private void processAgencyLink(XMLEventReader rdr) throws Exception { |
private void processAgencyLink(XMLEventReader rdr) throws Exception { |
String agency_from_no = null; |
String agency_from_no = null; |
String agency_to_no = null; |
String agency_to_no = null; |
String link_type = null; |
String link_type = null; |
String start_date = null; |
String start_date = null; |
String start_date_qual = null; |
String start_date_qual = null; |
String end_date = null; |
String end_date = null; |
String end_date_qual = null; |
String end_date_qual = null; |
|
|
while (rdr.hasNext()) { |
while (rdr.hasNext()) { |
XMLEvent e = rdr.nextEvent(); |
XMLEvent e = rdr.nextEvent(); |
if (e.isStartElement()) { |
if (e.isStartElement()) { |
if (hasStartTagName(e, "LINK_AGENCY_NO")) { |
if (hasStartTagName(e, "LINK_AGENCY_NO")) { |
agency_from_no = getCharacters(rdr); |
agency_from_no = getCharacters(rdr); |
} else if (hasStartTagName(e, "LINK_TO_AGENCY_NO")) { |
} else if (hasStartTagName(e, "LINK_TO_AGENCY_NO")) { |
agency_to_no = getCharacters(rdr); |
agency_to_no = getCharacters(rdr); |
} else if (hasStartTagName(e, "LINK_TYPE")) { |
} else if (hasStartTagName(e, "LINK_TYPE")) { |
link_type = getCharacters(rdr); |
link_type = getCharacters(rdr); |
} else if (hasStartTagName(e, "START_DATE")) { |
} else if (hasStartTagName(e, "START_DATE")) { |
start_date = getCharacters(rdr); |
start_date = getCharacters(rdr); |
}else if (hasStartTagName(e, "START_DATE_QUAL")) { |
}else if (hasStartTagName(e, "START_DATE_QUAL")) { |
start_date_qual = getCharacters(rdr); |
start_date_qual = getCharacters(rdr); |
}else if (hasStartTagName(e, "END_DATE")) { |
}else if (hasStartTagName(e, "END_DATE")) { |
end_date = getCharacters(rdr); |
end_date = getCharacters(rdr); |
}else if (hasStartTagName(e, "END_DATE_QUAL")) { |
}else if (hasStartTagName(e, "END_DATE_QUAL")) { |
end_date_qual = getCharacters(rdr); |
end_date_qual = getCharacters(rdr); |
} |
} |
} |
} |
if (e.isEndElement()) { |
if (e.isEndElement()) { |
if (hasEndTagName(e, "AGENCY_LINK")) { |
if (hasEndTagName(e, "AGENCY_LINK")) { |
|
|
//System.out.println("Finished processing link: type = " + link_type+ "; from = " + agency_from_no + "; to = " + agency_to_no); |
//System.out.println("Finished processing link: type = " + link_type+ "; from = " + agency_from_no + "; to = " + agency_to_no); |
long agencyFromID, agencyToID; |
if (agency_to_no != null && !agency_to_no.equals("(null)")) { |
Map<String, Object> agencyFromProperties = new HashMap<String, Object>(); |
long agencyFromID, agencyToID; |
agencyFromProperties.put("agency_no",agency_from_no); |
Map<String, Object> agencyFromProperties = new HashMap<String, Object>(); |
agencyFromID = getAgency(agencyFromProperties); |
agencyFromProperties.put("agency_from_no", agency_from_no); |
Map<String, Object> agencyToProperties = new HashMap<String, Object>(); |
agencyFromID = getAgency(agencyFromProperties); |
agencyToProperties.put("agency_no",agency_to_no); |
Map<String, Object> agencyToProperties = new HashMap<String, Object>(); |
agencyToID = getAgency(agencyToProperties); |
agencyToProperties.put("agency_to_no", agency_to_no); |
Map<String, Object> relProperties = new HashMap<String, Object>(); |
agencyToID = getAgency(agencyToProperties); |
relProperties.put("link_type", link_type); |
Map<String, Object> relProperties = new HashMap<String, Object>(); |
relProperties.put("start_date", start_date); |
relProperties.put("link_type", link_type); |
if (start_date_qual != null && !start_date_qual.equals("(null)")) { |
if (start_date != null && !start_date.equals("(null)")) { |
relProperties.put("start_date_qual", start_date_qual); |
relProperties.put("start_date", Integer.parseInt(start_date)); |
} |
} |
if (end_date != null && !end_date.equals("(null)")) { |
if (start_date_qual != null && !start_date_qual.equals("(null)")) { |
relProperties.put("end_date", end_date); |
relProperties.put("start_date_qual", start_date_qual); |
} |
} |
if (end_date_qual != null && !end_date_qual.equals("(null)")) { |
if (end_date != null && !end_date.equals("(null)")) { |
relProperties.put("end_date_qual", end_date_qual); |
relProperties.put("end_date", Integer.parseInt(end_date)); |
} |
} |
inserter.createRelationship(agencyFromID, agencyToID, |
if (end_date_qual != null && !end_date_qual.equals("(null)")) { |
DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties); |
relProperties.put("end_date_qual", end_date_qual); |
|
} |
break; |
inserter.createRelationship(agencyFromID, agencyToID, |
|
DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties); |
|
} |
|
break; |
|
|
} |
} |
} |
} |
} |
} |
} |
} |
|
|
private void processAgencyLocation(XMLEventReader rdr) throws Exception { |
private void processAgencyLocation(XMLEventReader rdr) throws Exception { |
String of = nul |