--- a/admin/neo4jimporter/src/main/java/StAXSample.java +++ b/admin/neo4jimporter/src/main/java/StAXSample.java @@ -35,13 +35,14 @@ } public static void main(String[] args) { - if (args.length != 1) { + /*if (args.length != 1) { System.out.println("Usage: StAXSample file.xml"); System.exit(-1); - } + } */ StAXSample ss = new StAXSample(); - ss.setFilename(args[0]); + //ss.setFilename(args[0]); + ss.setFilename("agency-sample.xml"); ss.run(); } @@ -76,21 +77,51 @@ //iterate as long as there are more events on the input stream + Map<String, Object> previousAgency = new HashMap<String, Object>(); while (r.hasNext()) { XMLEvent e = r.nextEvent(); - Map<String, Object> previousAgency = new HashMap<String, Object>(); + if (e.isStartElement()) { if (hasStartTagName(e, "AGENCIES")) { System.out.println("Agencies file loaded... "); + } else if (hasStartTagName(e, "AGENCY_NO")) { + previousAgency.put("agency_no", getCharacters(r)); } else if (hasStartTagName(e, "TITLE")) { - System.out.println("TITLE is: " + getCharacters(r)); - previousAgency.put("title", getCharacters(r)); + String title = getCharacters(r); + previousAgency.put("name", title); + previousAgency.put("label", title); + } else if (hasStartTagName(e, "START_DATE")) { + String start_date = getCharacters(r); + if (start_date != null && !start_date.equals(" ") && !start_date.equals("(null)")) { + previousAgency.put("start_date", Integer.parseInt(start_date)); + } + } else if (hasStartTagName(e, "START_DATE_QUAL")) { + previousAgency.put("start_date_qual", getCharacters(r)); + } else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) { + previousAgency.put("alternative_title", getCharacters(r)); + } else if (hasStartTagName(e, "END_DATE")) { + String end_date = getCharacters(r); + if (end_date != null && !end_date.equals(" ") && !end_date.equals("(null)")) { + previousAgency.put("end_date", Integer.parseInt(end_date)); + } } else if (hasStartTagName(e, "END_DATE_QUAL")) { - System.out.println("END_DATE_QUAL is: " + getCharacters(r)); previousAgency.put("end_date_qual", getCharacters(r)); // save agency getAgency(previousAgency); previousAgency = new HashMap<String, Object>(); + } + } + } + r = xmlif.createXMLEventReader( + filename, + //new FileInputStream(new File(xmlFileURL.toURI()))); + new FileInputStream(new File(filename))); + while (r.hasNext()) { + XMLEvent e = r.nextEvent(); + + if (e.isStartElement()) { + if (hasStartTagName(e, "AGENCIES")) { + System.out.println("Agencies file loaded again... "); } else if (hasStartTagName(e, "AGENCY_LINK")) { processAgencyLink(r); } else if (hasStartTagName(e, "AGENCY_LOCATION")) { @@ -99,8 +130,17 @@ processAgencyFunction(r); } else if (hasStartTagName(e, "AGENCY_STATUS")) { processAgencyStatus(r); - } else { - System.out.println("Unhandled tag: " + getStartTagName(e) + " content:" + getCharacters(r)); + /* TODO + Unhandled tag: AGENCY_NOTE content: + +Unhandled tag: NOTE_AGENCY_NO content:CA 4886 +Unhandled tag: NOTE_TYPE content:Archivists note +Unhandled tag: NOTE content:null +Unhandled tag: head content: + +Unexpected character 'C' (code 67) in start tag Expected a quote + at [row,col,system-id]: [1093387,18,"agency-sample.xml"] + */ } } } @@ -123,19 +163,25 @@ } private long getAgency(Map<String, Object> properties) { + if (properties.get("agency_no") == null || properties.get("agency_no") == "(null)" || properties.get("agency_no") == " ") { + return 0; + } if (agencyIDs.get(properties.get("agency_no").toString()) == null) { long agencyID = inserter.createNode(properties, agencyLabel); - if (properties.values().size() > 2) { + /*if (properties.values().size() > 1) { agencyFullVersion.put(properties.get("agency_no").toString(), true); - } + } */ agencyIDs.put(properties.get("agency_no").toString(), agencyID); + //if (agencyID % 10 == 0) { + System.out.println("Agency #"+agencyID); + //} return agencyID; } else { long agencyID = agencyIDs.get(properties.get("agency_no").toString()); - if (properties.values().size() > 2 && agencyFullVersion.get(properties.get("agency_no")) == null) { + /*if (properties.values().size() > 1 && agencyFullVersion.get(properties.get("agency_no")) == null) { inserter.setNodeProperties(agencyID, properties); agencyFullVersion.put(properties.get("agency_no").toString(), true); - } + } */ return agencyID; } } @@ -143,7 +189,8 @@ private long getLocation(String locationName) { if (locationIDs.get(locationName) == null) { HashMap properties = new HashMap< String,Object > (); - properties.put("location_name", locationName); + properties.put("name", locationName); + properties.put("label", locationName); long locationID = inserter.createNode(properties, locationLabel); locationIDs.put(locationName, locationID); return locationID; @@ -154,7 +201,8 @@ private long getFunction(String functionName) { if (functionIDs.get(functionName) == null) { HashMap properties = new HashMap< String,Object > (); - properties.put("function_name", functionName); + properties.put("name", functionName); + properties.put("label", functionName); long functionID = inserter.createNode(properties, functionLabel); functionIDs.put(functionName, functionID); return functionID; @@ -165,7 +213,8 @@ private long getStatus(String statusName) { if (statusIDs.get(statusName) == null) { HashMap properties = new HashMap< String,Object > (); - properties.put("status_name", statusName); + properties.put("name", statusName); + properties.put("label", statusName); long statusID = inserter.createNode(properties, statusLabel); statusIDs.put(statusName, statusID); return statusID; @@ -205,24 +254,34 @@ if (e.isEndElement()) { if (hasEndTagName(e, "AGENCY_LINK")) { - //System.out.println("Finished processing link: Name = " + name + "; of = " + of + "; date = " + date); - long agencyFromID, agencyToID; - Map<String, Object> agencyFromProperties = new HashMap<String, Object>(); - agencyFromProperties.put("agency_no",agency_from_no); - agencyFromID = getAgency(agencyFromProperties); - Map<String, Object> agencyToProperties = new HashMap<String, Object>(); - agencyToProperties.put("agency_no",agency_to_no); - agencyToID = getAgency(agencyToProperties); - Map<String, Object> relProperties = new HashMap<String, Object>(); - relProperties.put("link_type", link_type); - relProperties.put("start_date", start_date); - relProperties.put("start_date_qual", start_date_qual); - relProperties.put("end_date", end_date); - relProperties.put("end_date_qual", end_date_qual); - inserter.createRelationship(agencyFromID, agencyToID, - DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties); - - break; + //System.out.println("Finished processing link: type = " + link_type+ "; from = " + agency_from_no + "; to = " + agency_to_no); + if (agency_to_no != null && !agency_to_no.equals("(null)")) { + long agencyFromID, agencyToID; + Map<String, Object> agencyFromProperties = new HashMap<String, Object>(); + agencyFromProperties.put("agency_from_no", agency_from_no); + agencyFromID = getAgency(agencyFromProperties); + Map<String, Object> agencyToProperties = new HashMap<String, Object>(); + agencyToProperties.put("agency_to_no", agency_to_no); + agencyToID = getAgency(agencyToProperties); + Map<String, Object> relProperties = new HashMap<String, Object>(); + relProperties.put("link_type", link_type); + if (start_date != null && !start_date.equals("(null)")) { + relProperties.put("start_date", Integer.parseInt(start_date)); + } + if (start_date_qual != null && !start_date_qual.equals("(null)")) { + relProperties.put("start_date_qual", start_date_qual); + } + if (end_date != null && !end_date.equals("(null)")) { + relProperties.put("end_date", Integer.parseInt(end_date)); + } + if (end_date_qual != null && !end_date_qual.equals("(null)")) { + relProperties.put("end_date_qual", end_date_qual); + } + inserter.createRelationship(agencyFromID, agencyToID, + DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties); + } + break; + } } } @@ -246,14 +305,14 @@ } if (e.isEndElement()) { if (hasEndTagName(e, "AGENCY_LOCATION")) { - System.out.println("Finished processing location: Name = " + name + "; of = " + of + "; date = " + date); + //System.out.println("Finished processing location: Name = " + name + "; of = " + of + "; date = " + date); long locationID, agencyID; locationID = getLocation(name); Map<String, Object> agencyProperties = new HashMap<String, Object>(); agencyProperties.put("agency_no",of); agencyID = getAgency(agencyProperties); Map<String, Object> relProperties = new HashMap<String, Object>(); - relProperties.put("date", date); + relProperties.put("date", fixDate(date)); inserter.createRelationship(agencyID, locationID, DynamicRelationshipType.withName("HAS_LOCATION"), relProperties); @@ -281,14 +340,14 @@ } if (e.isEndElement()) { if (hasEndTagName(e, "AGENCY_STATUS")) { - System.out.println("Finished processing status: Status = " + status + "; of = " + of + "; date = " + date); + //System.out.println("Finished processing status: Status = " + status + "; of = " + of + "; date = " + date); long statusID, agencyID; statusID = getStatus(status); Map<String, Object> agencyProperties = new HashMap<String, Object>(); agencyProperties.put("agency_no",of); agencyID = getAgency(agencyProperties); Map<String, Object> relProperties = new HashMap<String, Object>(); - relProperties.put("date", date); + relProperties.put("date", fixDate(date)); inserter.createRelationship(agencyID, statusID, DynamicRelationshipType.withName("HAS_STATUS"), relProperties); @@ -332,10 +391,16 @@ agencyProperties.put("agency_no",agency); agencyID = getAgency(agencyProperties); Map<String, Object> relProperties = new HashMap<String, Object>(); - relProperties.put("start_date", start_date); - relProperties.put("start_date_qual", start_date_qual); - relProperties.put("end_date", end_date); - relProperties.put("end_date_qual", end_date_qual); + relProperties.put("start_date", Integer.parseInt(start_date)); + if (start_date_qual != null && !start_date_qual.equals("(null)")) { + relProperties.put("start_date_qual", start_date_qual); + } + if (end_date != null && !end_date.equals("(null)")) { + relProperties.put("end_date", Integer.parseInt(end_date)); + } + if (end_date_qual != null && !end_date_qual.equals("(null)")) { + relProperties.put("end_date_qual", end_date_qual); + } inserter.createRelationship(agencyID, functionID, DynamicRelationshipType.withName("HAS_FUNCTION"), relProperties); @@ -344,7 +409,14 @@ } } } - + private int fixDate(String date) { + String[] parts = date.split("-"); + if (parts.length == 3) { + return Integer.parseInt(""+parts[2]+parts[1]+parts[0]); + } else { + return 0; + } + } private String getCharacters(XMLEventReader rdr) throws XMLStreamException { XMLEvent e = rdr.nextEvent(); if (e.isCharacters()) { @@ -355,15 +427,11 @@ } private boolean hasStartTagName(XMLEvent e, String name) { - return e.asStartElement().getName().getLocalPart().equals(name); - } - - private String getStartTagName(XMLEvent e) { - return e.asStartElement().getName().getLocalPart(); + return e.asStartElement().getName().getLocalPart().toLowerCase().equals(name.toLowerCase()); } private boolean hasEndTagName(XMLEvent e, String name) { - return e.asEndElement().getName().getLocalPart().equals(name); + return e.asEndElement().getName().getLocalPart().toLowerCase().equals(name.toLowerCase()); } public void setFilename(String filename) { @@ -373,3 +441,4 @@ } +