--- a/admin/neo4jimporter/src/main/java/StAXSample.java +++ b/admin/neo4jimporter/src/main/java/StAXSample.java @@ -35,13 +35,14 @@ } public static void main(String[] args) { - if (args.length != 1) { + /*if (args.length != 1) { System.out.println("Usage: StAXSample file.xml"); System.exit(-1); - } + } */ StAXSample ss = new StAXSample(); - ss.setFilename(args[0]); + //ss.setFilename(args[0]); + ss.setFilename("agency-sample.xml"); ss.run(); } @@ -76,21 +77,13 @@ //iterate as long as there are more events on the input stream + Map previousAgency = new HashMap(); while (r.hasNext()) { XMLEvent e = r.nextEvent(); - Map previousAgency = new HashMap(); + if (e.isStartElement()) { if (hasStartTagName(e, "AGENCIES")) { System.out.println("Agencies file loaded... "); - } else if (hasStartTagName(e, "TITLE")) { - System.out.println("TITLE is: " + getCharacters(r)); - previousAgency.put("title", getCharacters(r)); - } else if (hasStartTagName(e, "END_DATE_QUAL")) { - System.out.println("END_DATE_QUAL is: " + getCharacters(r)); - previousAgency.put("end_date_qual", getCharacters(r)); - // save agency - getAgency(previousAgency); - previousAgency = new HashMap(); } else if (hasStartTagName(e, "AGENCY_LINK")) { processAgencyLink(r); } else if (hasStartTagName(e, "AGENCY_LOCATION")) { @@ -99,6 +92,36 @@ processAgencyFunction(r); } else if (hasStartTagName(e, "AGENCY_STATUS")) { processAgencyStatus(r); + /* TODO + Unhandled tag: AGENCY_NOTE content: + +Unhandled tag: NOTE_AGENCY_NO content:CA 4886 +Unhandled tag: NOTE_TYPE content:Archivists note +Unhandled tag: NOTE content:null +Unhandled tag: head content: + +Unexpected character 'C' (code 67) in start tag Expected a quote + at [row,col,system-id]: [1093387,18,"agency-sample.xml"] + */ + } else if (hasStartTagName(e, "AGENCY_NO")) { + previousAgency.put("agency_no", getCharacters(r)); + } else if (hasStartTagName(e, "TITLE")) { + String title = getCharacters(r); + previousAgency.put("name", title); + previousAgency.put("label", title); + } else if (hasStartTagName(e, "START_DATE")) { + previousAgency.put("start_date", getCharacters(r)); + } else if (hasStartTagName(e, "START_DATE_QUAL")) { + previousAgency.put("start_date_qual", getCharacters(r)); + } else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) { + previousAgency.put("alternative_title", getCharacters(r)); + } else if (hasStartTagName(e, "END_DATE")) { + previousAgency.put("end_date", getCharacters(r)); + } else if (hasStartTagName(e, "END_DATE_QUAL")) { + previousAgency.put("end_date_qual", getCharacters(r)); + // save agency + getAgency(previousAgency); + previousAgency = new HashMap(); } else { System.out.println("Unhandled tag: " + getStartTagName(e) + " content:" + getCharacters(r)); } @@ -129,6 +152,9 @@ agencyFullVersion.put(properties.get("agency_no").toString(), true); } agencyIDs.put(properties.get("agency_no").toString(), agencyID); + //if (agencyID % 10 == 0) { + System.out.println("Agency #"+agencyID); + //} return agencyID; } else { long agencyID = agencyIDs.get(properties.get("agency_no").toString()); @@ -143,7 +169,8 @@ private long getLocation(String locationName) { if (locationIDs.get(locationName) == null) { HashMap properties = new HashMap< String,Object > (); - properties.put("location_name", locationName); + properties.put("name", locationName); + properties.put("label", locationName); long locationID = inserter.createNode(properties, locationLabel); locationIDs.put(locationName, locationID); return locationID; @@ -154,7 +181,8 @@ private long getFunction(String functionName) { if (functionIDs.get(functionName) == null) { HashMap properties = new HashMap< String,Object > (); - properties.put("function_name", functionName); + properties.put("name", functionName); + properties.put("label", functionName); long functionID = inserter.createNode(properties, functionLabel); functionIDs.put(functionName, functionID); return functionID; @@ -165,7 +193,8 @@ private long getStatus(String statusName) { if (statusIDs.get(statusName) == null) { HashMap properties = new HashMap< String,Object > (); - properties.put("status_name", statusName); + properties.put("name", statusName); + properties.put("label", statusName); long statusID = inserter.createNode(properties, statusLabel); statusIDs.put(statusName, statusID); return statusID; @@ -205,7 +234,7 @@ if (e.isEndElement()) { if (hasEndTagName(e, "AGENCY_LINK")) { - //System.out.println("Finished processing link: Name = " + name + "; of = " + of + "; date = " + date); + //System.out.println("Finished processing link: type = " + link_type+ "; from = " + agency_from_no + "; to = " + agency_to_no); long agencyFromID, agencyToID; Map agencyFromProperties = new HashMap(); agencyFromProperties.put("agency_no",agency_from_no); @@ -216,9 +245,15 @@ Map relProperties = new HashMap(); relProperties.put("link_type", link_type); relProperties.put("start_date", start_date); - relProperties.put("start_date_qual", start_date_qual); - relProperties.put("end_date", end_date); - relProperties.put("end_date_qual", end_date_qual); + if (start_date_qual != null && !start_date_qual.equals("(null)")) { + relProperties.put("start_date_qual", start_date_qual); + } + if (end_date != null && !end_date.equals("(null)")) { + relProperties.put("end_date", end_date); + } + if (end_date_qual != null && !end_date_qual.equals("(null)")) { + relProperties.put("end_date_qual", end_date_qual); + } inserter.createRelationship(agencyFromID, agencyToID, DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties); @@ -246,7 +281,7 @@ } if (e.isEndElement()) { if (hasEndTagName(e, "AGENCY_LOCATION")) { - System.out.println("Finished processing location: Name = " + name + "; of = " + of + "; date = " + date); + //System.out.println("Finished processing location: Name = " + name + "; of = " + of + "; date = " + date); long locationID, agencyID; locationID = getLocation(name); Map agencyProperties = new HashMap(); @@ -281,7 +316,7 @@ } if (e.isEndElement()) { if (hasEndTagName(e, "AGENCY_STATUS")) { - System.out.println("Finished processing status: Status = " + status + "; of = " + of + "; date = " + date); + //System.out.println("Finished processing status: Status = " + status + "; of = " + of + "; date = " + date); long statusID, agencyID; statusID = getStatus(status); Map agencyProperties = new HashMap(); @@ -332,10 +367,16 @@ agencyProperties.put("agency_no",agency); agencyID = getAgency(agencyProperties); Map relProperties = new HashMap(); - relProperties.put("start_date", start_date); - relProperties.put("start_date_qual", start_date_qual); - relProperties.put("end_date", end_date); - relProperties.put("end_date_qual", end_date_qual); + relProperties.put("start_date", start_date); + if (start_date_qual != null && !start_date_qual.equals("(null)")) { + relProperties.put("start_date_qual", start_date_qual); + } + if (end_date != null && !end_date.equals("(null)")) { + relProperties.put("end_date", end_date); + } + if (end_date_qual != null && !end_date_qual.equals("(null)")) { + relProperties.put("end_date_qual", end_date_qual); + } inserter.createRelationship(agencyID, functionID, DynamicRelationshipType.withName("HAS_FUNCTION"), relProperties); @@ -355,7 +396,7 @@ } private boolean hasStartTagName(XMLEvent e, String name) { - return e.asStartElement().getName().getLocalPart().equals(name); + return e.asStartElement().getName().getLocalPart().toLowerCase().equals(name.toLowerCase()); } private String getStartTagName(XMLEvent e) { @@ -363,7 +404,7 @@ } private boolean hasEndTagName(XMLEvent e, String name) { - return e.asEndElement().getName().getLocalPart().equals(name); + return e.asEndElement().getName().getLocalPart().toLowerCase().equals(name.toLowerCase()); } public void setFilename(String filename) { @@ -373,3 +414,4 @@ } +