--- a/admin/neo4jimporter/src/main/java/StAXSample.java +++ b/admin/neo4jimporter/src/main/java/StAXSample.java @@ -1,1 +1,375 @@ - +import org.neo4j.graphdb.DynamicLabel; +import org.neo4j.graphdb.DynamicRelationshipType; +import org.neo4j.graphdb.Label; +import org.neo4j.unsafe.batchinsert.BatchInserter; +import org.neo4j.unsafe.batchinsert.BatchInserters; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.util.HashMap; +import java.util.Map; + +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLEventReader; +import javax.xml.stream.events.XMLEvent; + +public class StAXSample { + + + HashMap agencyIDs = new HashMap(); + HashMap agencyFullVersion = new HashMap(); + Label agencyLabel = DynamicLabel.label("Agency"); + HashMap locationIDs = new HashMap(); + Label locationLabel = DynamicLabel.label("Location"); + HashMap functionIDs = new HashMap(); + Label functionLabel = DynamicLabel.label("Function"); + HashMap statusIDs = new HashMap(); + Label statusLabel = DynamicLabel.label("Location"); + BatchInserter inserter; + + private String filename; + + public StAXSample() { + } + + public static void main(String[] args) { + if (args.length != 1) { + System.out.println("Usage: StAXSample file.xml"); + System.exit(-1); + } + + StAXSample ss = new StAXSample(); + ss.setFilename(args[0]); + ss.run(); + } + + public void run() { + + Map config = new HashMap(); + config.put("neostore.nodestore.db.mapped_memory", "90M"); + inserter = BatchInserters.inserter("target/batchinserter-example-config", config); + inserter.createDeferredSchemaIndex(agencyLabel).on("agency_no"); + inserter.createDeferredSchemaIndex(locationLabel).on("location_name"); + inserter.createDeferredSchemaIndex(functionLabel).on("thesaurus_term"); + inserter.createDeferredSchemaIndex(statusLabel).on("status_name"); + + try { + XMLInputFactory xmlif = XMLInputFactory.newInstance(); + xmlif.setProperty( + XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, + Boolean.TRUE); + xmlif.setProperty( + XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, + Boolean.FALSE); + //set the IS_COALESCING property to true + //to get whole text data as one event. + xmlif.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE); + + try { + XMLEventReader r = null; + r = xmlif.createXMLEventReader( + filename, + //new FileInputStream(new File(xmlFileURL.toURI()))); + new FileInputStream(new File(filename))); + + + //iterate as long as there are more events on the input stream + while (r.hasNext()) { + XMLEvent e = r.nextEvent(); + Map previousAgency = new HashMap(); + if (e.isStartElement()) { + if (hasStartTagName(e, "AGENCIES")) { + System.out.println("Agencies file loaded... "); + } else if (hasStartTagName(e, "TITLE")) { + System.out.println("TITLE is: " + getCharacters(r)); + previousAgency.put("title", getCharacters(r)); + } else if (hasStartTagName(e, "END_DATE_QUAL")) { + System.out.println("END_DATE_QUAL is: " + getCharacters(r)); + previousAgency.put("end_date_qual", getCharacters(r)); + // save agency + getAgency(previousAgency); + previousAgency = new HashMap(); + } else if (hasStartTagName(e, "AGENCY_LINK")) { + processAgencyLink(r); + } else if (hasStartTagName(e, "AGENCY_LOCATION")) { + processAgencyLocation(r); + } else if (hasStartTagName(e, "AGENCY_FUNCTION")) { + processAgencyFunction(r); + } else if (hasStartTagName(e, "AGENCY_STATUS")) { + processAgencyStatus(r); + } else { + System.out.println("Unhandled tag: " + getStartTagName(e) + " content:" + getCharacters(r)); + } + } + } + } catch (XMLStreamException ex) { + System.out.println(ex.getMessage()); + + if (ex.getNestedException() != null) { + ex.getNestedException().printStackTrace(); + } + } + + } catch (FileNotFoundException ex) { + System.err.println("Error. Cannot find \"" + filename + "\" in classpath."); + ex.printStackTrace(); + } catch (Exception ex) { + ex.printStackTrace(); + } + + inserter.shutdown(); + } + + private long getAgency(Map properties) { + if (agencyIDs.get(properties.get("agency_no").toString()) == null) { + long agencyID = inserter.createNode(properties, agencyLabel); + if (properties.values().size() > 2) { + agencyFullVersion.put(properties.get("agency_no").toString(), true); + } + agencyIDs.put(properties.get("agency_no").toString(), agencyID); + return agencyID; + } else { + long agencyID = agencyIDs.get(properties.get("agency_no").toString()); + if (properties.values().size() > 2 && agencyFullVersion.get(properties.get("agency_no")) == null) { + inserter.setNodeProperties(agencyID, properties); + agencyFullVersion.put(properties.get("agency_no").toString(), true); + } + return agencyID; + } + } + + private long getLocation(String locationName) { + if (locationIDs.get(locationName) == null) { + HashMap properties = new HashMap< String,Object > (); + properties.put("location_name", locationName); + long locationID = inserter.createNode(properties, locationLabel); + locationIDs.put(locationName, locationID); + return locationID; + } else { + return locationIDs.get(locationName); + } + } + private long getFunction(String functionName) { + if (functionIDs.get(functionName) == null) { + HashMap properties = new HashMap< String,Object > (); + properties.put("function_name", functionName); + long functionID = inserter.createNode(properties, functionLabel); + functionIDs.put(functionName, functionID); + return functionID; + } else { + return functionIDs.get(functionName); + } + } + private long getStatus(String statusName) { + if (statusIDs.get(statusName) == null) { + HashMap properties = new HashMap< String,Object > (); + properties.put("status_name", statusName); + long statusID = inserter.createNode(properties, statusLabel); + statusIDs.put(statusName, statusID); + return statusID; + } else { + return statusIDs.get(statusName); + } + } + + private void processAgencyLink(XMLEventReader rdr) throws Exception { + String agency_from_no = null; + String agency_to_no = null; + String link_type = null; + String start_date = null; + String start_date_qual = null; + String end_date = null; + String end_date_qual = null; + + while (rdr.hasNext()) { + XMLEvent e = rdr.nextEvent(); + if (e.isStartElement()) { + if (hasStartTagName(e, "LINK_AGENCY_NO")) { + agency_from_no = getCharacters(rdr); + } else if (hasStartTagName(e, "LINK_TO_AGENCY_NO")) { + agency_to_no = getCharacters(rdr); + } else if (hasStartTagName(e, "LINK_TYPE")) { + link_type = getCharacters(rdr); + } else if (hasStartTagName(e, "START_DATE")) { + start_date = getCharacters(rdr); + }else if (hasStartTagName(e, "START_DATE_QUAL")) { + start_date_qual = getCharacters(rdr); + }else if (hasStartTagName(e, "END_DATE")) { + end_date = getCharacters(rdr); + }else if (hasStartTagName(e, "END_DATE_QUAL")) { + end_date_qual = getCharacters(rdr); + } + } + if (e.isEndElement()) { + if (hasEndTagName(e, "AGENCY_LINK")) { + + //System.out.println("Finished processing link: Name = " + name + "; of = " + of + "; date = " + date); + long agencyFromID, agencyToID; + Map agencyFromProperties = new HashMap(); + agencyFromProperties.put("agency_no",agency_from_no); + agencyFromID = getAgency(agencyFromProperties); + Map agencyToProperties = new HashMap(); + agencyToProperties.put("agency_no",agency_to_no); + agencyToID = getAgency(agencyToProperties); + Map relProperties = new HashMap(); + relProperties.put("link_type", link_type); + relProperties.put("start_date", start_date); + relProperties.put("start_date_qual", start_date_qual); + relProperties.put("end_date", end_date); + relProperties.put("end_date_qual", end_date_qual); + inserter.createRelationship(agencyFromID, agencyToID, + DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties); + + break; + } + } + } + } + + private void processAgencyLocation(XMLEventReader rdr) throws Exception { + String of = null; + String name = null; + String date = null; + + while (rdr.hasNext()) { + XMLEvent e = rdr.nextEvent(); + if (e.isStartElement()) { + if (hasStartTagName(e, "LOCATION_AGENCY_NO")) { + of = getCharacters(rdr); + } else if (hasStartTagName(e, "LOCATION_TEXT")) { + name = getCharacters(rdr); + } else if (hasStartTagName(e, "LOCATION_DATE")) { + date = getCharacters(rdr); + } + } + if (e.isEndElement()) { + if (hasEndTagName(e, "AGENCY_LOCATION")) { + System.out.println("Finished processing location: Name = " + name + "; of = " + of + "; date = " + date); + long locationID, agencyID; + locationID = getLocation(name); + Map agencyProperties = new HashMap(); + agencyProperties.put("agency_no",of); + agencyID = getAgency(agencyProperties); + Map relProperties = new HashMap(); + relProperties.put("date", date); + inserter.createRelationship(agencyID, locationID, + DynamicRelationshipType.withName("HAS_LOCATION"), relProperties); + + break; + } + } + } + } + + private void processAgencyStatus(XMLEventReader rdr) throws Exception { + String of = null; + String status = null; + String date = null; + + while (rdr.hasNext()) { + XMLEvent e = rdr.nextEvent(); + if (e.isStartElement()) { + if (hasStartTagName(e, "STATUS_AGENCY_NO")) { + of = getCharacters(rdr); + } else if (hasStartTagName(e, "STATUS")) { + status = getCharacters(rdr); + } else if (hasStartTagName(e, "STATUS_DATE")) { + date = getCharacters(rdr); + } + } + if (e.isEndElement()) { + if (hasEndTagName(e, "AGENCY_STATUS")) { + System.out.println("Finished processing status: Status = " + status + "; of = " + of + "; date = " + date); + long statusID, agencyID; + statusID = getStatus(status); + Map agencyProperties = new HashMap(); + agencyProperties.put("agency_no",of); + agencyID = getAgency(agencyProperties); + Map relProperties = new HashMap(); + relProperties.put("date", date); + inserter.createRelationship(agencyID, statusID, + DynamicRelationshipType.withName("HAS_STATUS"), relProperties); + + break; + } + } + } + } + + private void processAgencyFunction(XMLEventReader rdr) throws Exception { + String agency = null; + String thesaurus_term = null; + String start_date = null; + String start_date_qual = null; + String end_date = null; + String end_date_qual = null; + + while (rdr.hasNext()) { + XMLEvent e = rdr.nextEvent(); + if (e.isStartElement()) { + if (hasStartTagName(e, "FUNCTION_AGENCY_NO")) { + agency = getCharacters(rdr); + } else if (hasStartTagName(e, "THESAURUS_TERM")) { + thesaurus_term = getCharacters(rdr); + } else if (hasStartTagName(e, "START_DATE")) { + start_date = getCharacters(rdr); + }else if (hasStartTagName(e, "START_DATE_QUAL")) { + start_date_qual = getCharacters(rdr); + }else if (hasStartTagName(e, "END_DATE")) { + end_date = getCharacters(rdr); + }else if (hasStartTagName(e, "END_DATE_QUAL")) { + end_date_qual = getCharacters(rdr); + } + } + if (e.isEndElement()) { + if (hasEndTagName(e, "AGENCY_FUNCTION")) { + //System.out.println("Finished processing function: Name = " + name + "; of = " + of + "; date = " + date); + long functionID, agencyID; + functionID = getFunction(thesaurus_term); + Map agencyProperties = new HashMap(); + agencyProperties.put("agency_no",agency); + agencyID = getAgency(agencyProperties); + Map relProperties = new HashMap(); + relProperties.put("start_date", start_date); + relProperties.put("start_date_qual", start_date_qual); + relProperties.put("end_date", end_date); + relProperties.put("end_date_qual", end_date_qual); + inserter.createRelationship(agencyID, functionID, + DynamicRelationshipType.withName("HAS_FUNCTION"), relProperties); + + break; + } + } + } + } + + private String getCharacters(XMLEventReader rdr) throws XMLStreamException { + XMLEvent e = rdr.nextEvent(); + if (e.isCharacters()) { + return e.asCharacters().getData(); + } else { + return null; + } + } + + private boolean hasStartTagName(XMLEvent e, String name) { + return e.asStartElement().getName().getLocalPart().equals(name); + } + + private String getStartTagName(XMLEvent e) { + return e.asStartElement().getName().getLocalPart(); + } + + private boolean hasEndTagName(XMLEvent e, String name) { + return e.asEndElement().getName().getLocalPart().equals(name); + } + + public void setFilename(String filename) { + this.filename = filename; + } + + +} +