--- a/admin/neo4jimporter/src/main/java/StAXSample.java +++ b/admin/neo4jimporter/src/main/java/StAXSample.java @@ -84,6 +84,44 @@ if (e.isStartElement()) { if (hasStartTagName(e, "AGENCIES")) { System.out.println("Agencies file loaded... "); + } else if (hasStartTagName(e, "AGENCY_NO")) { + previousAgency.put("agency_no", getCharacters(r)); + } else if (hasStartTagName(e, "TITLE")) { + String title = getCharacters(r); + previousAgency.put("name", title); + previousAgency.put("label", title); + } else if (hasStartTagName(e, "START_DATE")) { + String start_date = getCharacters(r); + if (start_date != null && !start_date.equals(" ") && !start_date.equals("(null)")) { + previousAgency.put("start_date", Integer.parseInt(start_date)); + } + } else if (hasStartTagName(e, "START_DATE_QUAL")) { + previousAgency.put("start_date_qual", getCharacters(r)); + } else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) { + previousAgency.put("alternative_title", getCharacters(r)); + } else if (hasStartTagName(e, "END_DATE")) { + String end_date = getCharacters(r); + if (end_date != null && !end_date.equals(" ") && !end_date.equals("(null)")) { + previousAgency.put("end_date", Integer.parseInt(end_date)); + } + } else if (hasStartTagName(e, "END_DATE_QUAL")) { + previousAgency.put("end_date_qual", getCharacters(r)); + // save agency + getAgency(previousAgency); + previousAgency = new HashMap(); + } + } + } + r = xmlif.createXMLEventReader( + filename, + //new FileInputStream(new File(xmlFileURL.toURI()))); + new FileInputStream(new File(filename))); + while (r.hasNext()) { + XMLEvent e = r.nextEvent(); + + if (e.isStartElement()) { + if (hasStartTagName(e, "AGENCIES")) { + System.out.println("Agencies file loaded again... "); } else if (hasStartTagName(e, "AGENCY_LINK")) { processAgencyLink(r); } else if (hasStartTagName(e, "AGENCY_LOCATION")) { @@ -103,27 +141,6 @@ Unexpected character 'C' (code 67) in start tag Expected a quote at [row,col,system-id]: [1093387,18,"agency-sample.xml"] */ - } else if (hasStartTagName(e, "AGENCY_NO")) { - previousAgency.put("agency_no", getCharacters(r)); - } else if (hasStartTagName(e, "TITLE")) { - String title = getCharacters(r); - previousAgency.put("name", title); - previousAgency.put("label", title); - } else if (hasStartTagName(e, "START_DATE")) { - previousAgency.put("start_date", getCharacters(r)); - } else if (hasStartTagName(e, "START_DATE_QUAL")) { - previousAgency.put("start_date_qual", getCharacters(r)); - } else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) { - previousAgency.put("alternative_title", getCharacters(r)); - } else if (hasStartTagName(e, "END_DATE")) { - previousAgency.put("end_date", getCharacters(r)); - } else if (hasStartTagName(e, "END_DATE_QUAL")) { - previousAgency.put("end_date_qual", getCharacters(r)); - // save agency - getAgency(previousAgency); - previousAgency = new HashMap(); - } else { - System.out.println("Unhandled tag: " + getStartTagName(e) + " content:" + getCharacters(r)); } } } @@ -146,11 +163,14 @@ } private long getAgency(Map properties) { + if (properties.get("agency_no") == null || properties.get("agency_no") == "(null)" || properties.get("agency_no") == " ") { + return 0; + } if (agencyIDs.get(properties.get("agency_no").toString()) == null) { long agencyID = inserter.createNode(properties, agencyLabel); - if (properties.values().size() > 2) { + /*if (properties.values().size() > 1) { agencyFullVersion.put(properties.get("agency_no").toString(), true); - } + } */ agencyIDs.put(properties.get("agency_no").toString(), agencyID); //if (agencyID % 10 == 0) { System.out.println("Agency #"+agencyID); @@ -158,10 +178,10 @@ return agencyID; } else { long agencyID = agencyIDs.get(properties.get("agency_no").toString()); - if (properties.values().size() > 2 && agencyFullVersion.get(properties.get("agency_no")) == null) { + /*if (properties.values().size() > 1 && agencyFullVersion.get(properties.get("agency_no")) == null) { inserter.setNodeProperties(agencyID, properties); agencyFullVersion.put(properties.get("agency_no").toString(), true); - } + } */ return agencyID; } } @@ -235,29 +255,33 @@ if (hasEndTagName(e, "AGENCY_LINK")) { //System.out.println("Finished processing link: type = " + link_type+ "; from = " + agency_from_no + "; to = " + agency_to_no); - long agencyFromID, agencyToID; - Map agencyFromProperties = new HashMap(); - agencyFromProperties.put("agency_no",agency_from_no); - agencyFromID = getAgency(agencyFromProperties); - Map agencyToProperties = new HashMap(); - agencyToProperties.put("agency_no",agency_to_no); - agencyToID = getAgency(agencyToProperties); - Map relProperties = new HashMap(); - relProperties.put("link_type", link_type); - relProperties.put("start_date", start_date); - if (start_date_qual != null && !start_date_qual.equals("(null)")) { - relProperties.put("start_date_qual", start_date_qual); - } - if (end_date != null && !end_date.equals("(null)")) { - relProperties.put("end_date", end_date); - } - if (end_date_qual != null && !end_date_qual.equals("(null)")) { - relProperties.put("end_date_qual", end_date_qual); - } - inserter.createRelationship(agencyFromID, agencyToID, - DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties); - - break; + if (agency_to_no != null && !agency_to_no.equals("(null)")) { + long agencyFromID, agencyToID; + Map agencyFromProperties = new HashMap(); + agencyFromProperties.put("agency_from_no", agency_from_no); + agencyFromID = getAgency(agencyFromProperties); + Map agencyToProperties = new HashMap(); + agencyToProperties.put("agency_to_no", agency_to_no); + agencyToID = getAgency(agencyToProperties); + Map relProperties = new HashMap(); + relProperties.put("link_type", link_type); + if (start_date != null && !start_date.equals("(null)")) { + relProperties.put("start_date", Integer.parseInt(start_date)); + } + if (start_date_qual != null && !start_date_qual.equals("(null)")) { + relProperties.put("start_date_qual", start_date_qual); + } + if (end_date != null && !end_date.equals("(null)")) { + relProperties.put("end_date", Integer.parseInt(end_date)); + } + if (end_date_qual != null && !end_date_qual.equals("(null)")) { + relProperties.put("end_date_qual", end_date_qual); + } + inserter.createRelationship(agencyFromID, agencyToID, + DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties); + } + break; + } } } @@ -288,7 +312,7 @@ agencyProperties.put("agency_no",of); agencyID = getAgency(agencyProperties); Map relProperties = new HashMap(); - relProperties.put("date", date); + relProperties.put("date", fixDate(date)); inserter.createRelationship(agencyID, locationID, DynamicRelationshipType.withName("HAS_LOCATION"), relProperties); @@ -323,7 +347,7 @@ agencyProperties.put("agency_no",of); agencyID = getAgency(agencyProperties); Map relProperties = new HashMap(); - relProperties.put("date", date); + relProperties.put("date", fixDate(date)); inserter.createRelationship(agencyID, statusID, DynamicRelationshipType.withName("HAS_STATUS"), relProperties); @@ -367,12 +391,12 @@ agencyProperties.put("agency_no",agency); agencyID = getAgency(agencyProperties); Map relProperties = new HashMap(); - relProperties.put("start_date", start_date); + relProperties.put("start_date", Integer.parseInt(start_date)); if (start_date_qual != null && !start_date_qual.equals("(null)")) { relProperties.put("start_date_qual", start_date_qual); } if (end_date != null && !end_date.equals("(null)")) { - relProperties.put("end_date", end_date); + relProperties.put("end_date", Integer.parseInt(end_date)); } if (end_date_qual != null && !end_date_qual.equals("(null)")) { relProperties.put("end_date_qual", end_date_qual); @@ -385,7 +409,14 @@ } } } - + private int fixDate(String date) { + String[] parts = date.split("-"); + if (parts.length == 3) { + return Integer.parseInt(""+parts[2]+parts[1]+parts[0]); + } else { + return 0; + } + } private String getCharacters(XMLEventReader rdr) throws XMLStreamException { XMLEvent e = rdr.nextEvent(); if (e.isCharacters()) { @@ -399,10 +430,6 @@ return e.asStartElement().getName().getLocalPart().toLowerCase().equals(name.toLowerCase()); } - private String getStartTagName(XMLEvent e) { - return e.asStartElement().getName().getLocalPart(); - } - private boolean hasEndTagName(XMLEvent e, String name) { return e.asEndElement().getName().getLocalPart().toLowerCase().equals(name.toLowerCase()); }