--- a/admin/neo4jimporter/src/main/java/StAXSample.java +++ b/admin/neo4jimporter/src/main/java/StAXSample.java @@ -84,6 +84,44 @@ if (e.isStartElement()) { if (hasStartTagName(e, "AGENCIES")) { System.out.println("Agencies file loaded... "); + } else if (hasStartTagName(e, "AGENCY_NO")) { + previousAgency.put("agency_no", getCharacters(r)); + } else if (hasStartTagName(e, "TITLE")) { + String title = getCharacters(r); + previousAgency.put("name", title); + previousAgency.put("label", title); + } else if (hasStartTagName(e, "START_DATE")) { + String start_date = getCharacters(r); + if (start_date != null && !start_date.equals(" ") && !start_date.equals("(null)")) { + previousAgency.put("start_date", Integer.parseInt(start_date)); + } + } else if (hasStartTagName(e, "START_DATE_QUAL")) { + previousAgency.put("start_date_qual", getCharacters(r)); + } else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) { + previousAgency.put("alternative_title", getCharacters(r)); + } else if (hasStartTagName(e, "END_DATE")) { + String end_date = getCharacters(r); + if (end_date != null && !end_date.equals(" ") && !end_date.equals("(null)")) { + previousAgency.put("end_date", Integer.parseInt(end_date)); + } + } else if (hasStartTagName(e, "END_DATE_QUAL")) { + previousAgency.put("end_date_qual", getCharacters(r)); + // save agency + getAgency(previousAgency); + previousAgency = new HashMap<String, Object>(); + } + } + } + r = xmlif.createXMLEventReader( + filename, + //new FileInputStream(new File(xmlFileURL.toURI()))); + new FileInputStream(new File(filename))); + while (r.hasNext()) { + XMLEvent e = r.nextEvent(); + + if (e.isStartElement()) { + if (hasStartTagName(e, "AGENCIES")) { + System.out.println("Agencies file loaded again... "); } else if (hasStartTagName(e, "AGENCY_LINK")) { processAgencyLink(r); } else if (hasStartTagName(e, "AGENCY_LOCATION")) { @@ -92,25 +130,17 @@ processAgencyFunction(r); } else if (hasStartTagName(e, "AGENCY_STATUS")) { processAgencyStatus(r); - } else if (hasStartTagName(e, "AGENCY_NO")) { - previousAgency.put("agency_no", getCharacters(r)); - } else if (hasStartTagName(e, "TITLE")) { - previousAgency.put("name", getCharacters(r)); - } else if (hasStartTagName(e, "START_DATE")) { - previousAgency.put("start_date", getCharacters(r)); - } else if (hasStartTagName(e, "START_DATE_QUAL")) { - previousAgency.put("start_date_qual", getCharacters(r)); - } else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) { - previousAgency.put("alternative_title", getCharacters(r)); - } else if (hasStartTagName(e, "END_DATE")) { - previousAgency.put("end_date", getCharacters(r)); - } else if (hasStartTagName(e, "END_DATE_QUAL")) { - previousAgency.put("end_date_qual", getCharacters(r)); - // save agency - getAgency(previousAgency); - previousAgency = new HashMap<String, Object>(); - } else { - System.out.println("Unhandled tag: " + getStartTagName(e) + " content:" + getCharacters(r)); + /* TODO + Unhandled tag: AGENCY_NOTE content: + +Unhandled tag: NOTE_AGENCY_NO content:CA 4886 +Unhandled tag: NOTE_TYPE content:Archivists note +Unhandled tag: NOTE content:null +Unhandled tag: head content: + +Unexpected character 'C' (code 67) in start tag Expected a quote + at [row,col,system-id]: [1093387,18,"agency-sample.xml"] + */ } } } @@ -133,19 +163,25 @@ } private long getAgency(Map<String, Object> properties) { + if (properties.get("agency_no") == null || properties.get("agency_no") == "(null)" || properties.get("agency_no") == " ") { + return 0; + } if (agencyIDs.get(properties.get("agency_no").toString()) == null) { long agencyID = inserter.createNode(properties, agencyLabel); - if (properties.values().size() > 2) { + /*if (properties.values().size() > 1) { agencyFullVersion.put(properties.get("agency_no").toString(), true); - } + } */ agencyIDs.put(properties.get("agency_no").toString(), agencyID); + //if (agencyID % 10 == 0) { + System.out.println("Agency #"+agencyID); + //} return agencyID; } else { long agencyID = agencyIDs.get(properties.get("agency_no").toString()); - if (properties.values().size() > 2 && agencyFullVersion.get(properties.get("agency_no")) == null) { + /*if (properties.values().size() > 1 && agencyFullVersion.get(properties.get("agency_no")) == null) { inserter.setNodeProperties(agencyID, properties); agencyFullVersion.put(properties.get("agency_no").toString(), true); - } + } */ return agencyID; } } @@ -154,6 +190,7 @@ if (locationIDs.get(locationName) == null) { HashMap properties = new HashMap< String,Object > (); properties.put("name", locationName); + properties.put("label", locationName); long locationID = inserter.createNode(properties, locationLabel); locationIDs.put(locationName, locationID); return locationID; @@ -165,6 +202,7 @@ if (functionIDs.get(functionName) == null) { HashMap properties = new HashMap< String,Object > (); properties.put("name", functionName); + properties.put("label", functionName); long functionID = inserter.createNode(properties, functionLabel); functionIDs.put(functionName, functionID); return functionID; @@ -176,6 +214,7 @@ if (statusIDs.get(statusName) == null) { HashMap properties = new HashMap< String,Object > (); properties.put("name", statusName); + properties.put("label", statusName); long statusID = inserter.createNode(properties, statusLabel); statusIDs.put(statusName, statusID); return statusID; @@ -215,30 +254,34 @@ if (e.isEndElement()) { if (hasEndTagName(e, "AGENCY_LINK")) { - //System.out.println("Finished processing link: Name = " + name + "; of = " + of + "; date = " + date); - long agencyFromID, agencyToID; - Map<String, Object> agencyFromProperties = new HashMap<String, Object>(); - agencyFromProperties.put("agency_no",agency_from_no); - agencyFromID = getAgency(agencyFromProperties); - Map<String, Object> agencyToProperties = new HashMap<String, Object>(); - agencyToProperties.put("agency_no",agency_to_no); - agencyToID = getAgency(agencyToProperties); - Map<String, Object> relProperties = new HashMap<String, Object>(); - relProperties.put("link_type", link_type); - relProperties.put("start_date", start_date); - if (start_date_qual != null && !start_date_qual.equals("(null)")) { - relProperties.put("start_date_qual", start_date_qual); - } - if (end_date != null && !end_date.equals("(null)")) { - relProperties.put("end_date", end_date); - } - if (end_date_qual != null && !end_date_qual.equals("(null)")) { - relProperties.put("end_date_qual", end_date_qual); - } - inserter.createRelationship(agencyFromID, agencyToID, - DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties); - - break; + //System.out.println("Finished processing link: type = " + link_type+ "; from = " + agency_from_no + "; to = " + agency_to_no); + if (agency_to_no != null && !agency_to_no.equals("(null)")) { + long agencyFromID, agencyToID; + Map<String, Object> agencyFromProperties = new HashMap<String, Object>(); + agencyFromProperties.put("agency_from_no", agency_from_no); + agencyFromID = getAgency(agencyFromProperties); + Map<String, Object> agencyToProperties = new HashMap<String, Object>(); + agencyToProperties.put("agency_to_no", agency_to_no); + agencyToID = getAgency(agencyToProperties); + Map<String, Object> relProperties = new HashMap<String, Object>(); + relProperties.put("link_type", link_type); + if (start_date != null && !start_date.equals("(null)")) { + relProperties.put("start_date", Integer.parseInt(start_date)); + } + if (start_date_qual != null && !start_date_qual.equals("(null)")) { + relProperties.put("start_date_qual", start_date_qual); + } + if (end_date != null && !end_date.equals("(null)")) { + relProperties.put("end_date", Integer.parseInt(end_date)); + } + if (end_date_qual != null && !end_date_qual.equals("(null)")) { + relProperties.put("end_date_qual", end_date_qual); + } + inserter.createRelationship(agencyFromID, agencyToID, + DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties); + } + break; + } } } @@ -262,14 +305,14 @@ } if (e.isEndElement()) { if (hasEndTagName(e, "AGENCY_LOCATION")) { - System.out.println("Finished processing location: Name = " + name + "; of = " + of + "; date = " + date); + //System.out.println("Finished processing location: Name = " + name + "; of = " + of + "; date = " + date); long locationID, agencyID; locationID = getLocation(name); Map<String, Object> agencyProperties = new HashMap<String, Object>(); agencyProperties.put("agency_no",of); agencyID = getAgency(agencyProperties); Map<String, Object> relProperties = new HashMap<String, Object>(); - relProperties.put("date", date); + relProperties.put("date", fixDate(date)); inserter.createRelationship(agencyID, locationID, DynamicRelationshipType.withName("HAS_LOCATION"), relProperties); @@ -297,14 +340,14 @@ } if (e.isEndElement()) { if (hasEndTagName(e, "AGENCY_STATUS")) { - System.out.println("Finished processing status: Status = " + status + "; of = " + of + "; date = " + date); + //System.out.println("Finished processing status: Status = " + status + "; of = " + of + "; date = " + date); long statusID, agencyID; statusID = getStatus(status); Map<String, Object> agencyProperties = new HashMap<String, Object>(); agencyProperties.put("agency_no",of); agencyID = getAgency(agencyProperties); Map<String, Object> relProperties = new HashMap<String, Object>(); - relProperties.put("date", date); + relProperties.put("date", fixDate(date)); inserter.createRelationship(agencyID, statusID, DynamicRelationshipType.withName("HAS_STATUS"), relProperties); @@ -348,12 +391,12 @@ agencyProperties.put("agency_no",agency); agencyID = getAgency(agencyProperties); Map<String, Object> relProperties = new HashMap<String, Object>(); - relProperties.put("start_date", start_date); + relProperties.put("start_date", Integer.parseInt(start_date)); if (start_date_qual != null && !start_date_qual.equals("(null)")) { relProperties.put("start_date_qual", start_date_qual); } if (end_date != null && !end_date.equals("(null)")) { - relProperties.put("end_date", end_date); + relProperties.put("end_date", Integer.parseInt(end_date)); } if (end_date_qual != null && !end_date_qual.equals("(null)")) { relProperties.put("end_date_qual", end_date_qual); @@ -366,7 +409,14 @@ } } } - + private int fixDate(String date) { + String[] parts = date.split("-"); + if (parts.length == 3) { + return Integer.parseInt(""+parts[2]+parts[1]+parts[0]); + } else { + return 0; + } + } private String getCharacters(XMLEventReader rdr) throws XMLStreamException { XMLEvent e = rdr.nextEvent(); if (e.isCharacters()) { @@ -380,10 +430,6 @@ return e.asStartElement().getName().getLocalPart().toLowerCase().equals(name.toLowerCase()); } - private String getStartTagName(XMLEvent e) { - return e.asStartElement().getName().getLocalPart(); - } - private boolean hasEndTagName(XMLEvent e, String name) { return e.asEndElement().getName().getLocalPart().toLowerCase().equals(name.toLowerCase()); } @@ -395,3 +441,4 @@ } +