naa import update
[disclosr.git] / admin / neo4jimporter / src / main / java / StAXSample.java
blob:a/admin/neo4jimporter/src/main/java/StAXSample.java -> blob:b/admin/neo4jimporter/src/main/java/StAXSample.java
--- a/admin/neo4jimporter/src/main/java/StAXSample.java
+++ b/admin/neo4jimporter/src/main/java/StAXSample.java
@@ -84,6 +84,44 @@
                     if (e.isStartElement()) {
                         if (hasStartTagName(e, "AGENCIES")) {
                             System.out.println("Agencies file loaded... ");
+                        } else if (hasStartTagName(e, "AGENCY_NO")) {
+                            previousAgency.put("agency_no", getCharacters(r));
+                        } else if (hasStartTagName(e, "TITLE")) {
+                            String title = getCharacters(r);
+                            previousAgency.put("name", title);
+                            previousAgency.put("label", title);
+                        } else if (hasStartTagName(e, "START_DATE")) {
+                            String start_date = getCharacters(r);
+                            if (start_date != null && !start_date.equals(" ") && !start_date.equals("(null)")) {
+                                previousAgency.put("start_date", Integer.parseInt(start_date));
+                            }
+                        } else if (hasStartTagName(e, "START_DATE_QUAL")) {
+                            previousAgency.put("start_date_qual", getCharacters(r));
+                        } else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) {
+                            previousAgency.put("alternative_title", getCharacters(r));
+                        } else if (hasStartTagName(e, "END_DATE")) {
+                            String end_date = getCharacters(r);
+                            if (end_date != null && !end_date.equals(" ") && !end_date.equals("(null)")) {
+                                previousAgency.put("end_date", Integer.parseInt(end_date));
+                            }
+                        } else if (hasStartTagName(e, "END_DATE_QUAL")) {
+                            previousAgency.put("end_date_qual", getCharacters(r));
+                            // save agency
+                            getAgency(previousAgency);
+                            previousAgency = new HashMap<String, Object>();
+                        }
+                    }
+                }
+                r = xmlif.createXMLEventReader(
+                        filename,
+                        //new FileInputStream(new File(xmlFileURL.toURI())));
+                        new FileInputStream(new File(filename)));
+                while (r.hasNext()) {
+                    XMLEvent e = r.nextEvent();
+
+                    if (e.isStartElement()) {
+                        if (hasStartTagName(e, "AGENCIES")) {
+                            System.out.println("Agencies file loaded again... ");
                         } else if (hasStartTagName(e, "AGENCY_LINK")) {
                             processAgencyLink(r);
                         } else if (hasStartTagName(e, "AGENCY_LOCATION")) {
@@ -103,27 +141,6 @@
 Unexpected character 'C' (code 67) in start tag Expected a quote
  at [row,col,system-id]: [1093387,18,"agency-sample.xml"]
                              */
-                        } else if (hasStartTagName(e, "AGENCY_NO")) {
-                            previousAgency.put("agency_no", getCharacters(r));
-                        } else if (hasStartTagName(e, "TITLE")) {
-                            String title = getCharacters(r);
-                            previousAgency.put("name", title);
-                            previousAgency.put("label", title);
-                        } else if (hasStartTagName(e, "START_DATE")) {
-                            previousAgency.put("start_date", getCharacters(r));
-                        } else if (hasStartTagName(e, "START_DATE_QUAL")) {
-                            previousAgency.put("start_date_qual", getCharacters(r));
-                        } else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) {
-                            previousAgency.put("alternative_title", getCharacters(r));
-                        } else if (hasStartTagName(e, "END_DATE")) {
-                            previousAgency.put("end_date", getCharacters(r));
-                        } else if (hasStartTagName(e, "END_DATE_QUAL")) {
-                            previousAgency.put("end_date_qual", getCharacters(r));
-                            // save agency
-                            getAgency(previousAgency);
-                            previousAgency = new HashMap<String, Object>();
-                        } else {
-                            System.out.println("Unhandled tag: " + getStartTagName(e) + " content:" + getCharacters(r));
                         }
                     }
                 }
@@ -146,11 +163,14 @@
     }
 
     private long getAgency(Map<String, Object> properties) {
+        if (properties.get("agency_no") == null || properties.get("agency_no") == "(null)" || properties.get("agency_no") == " ") {
+            return 0;
+        }
         if (agencyIDs.get(properties.get("agency_no").toString()) == null) {
             long agencyID = inserter.createNode(properties, agencyLabel);
-            if (properties.values().size() > 2) {
+            /*if (properties.values().size() > 1) {
                 agencyFullVersion.put(properties.get("agency_no").toString(), true);
-            }
+            } */
             agencyIDs.put(properties.get("agency_no").toString(), agencyID);
             //if (agencyID % 10 == 0) {
                 System.out.println("Agency #"+agencyID);
@@ -158,10 +178,10 @@
             return agencyID;
         } else {
             long agencyID = agencyIDs.get(properties.get("agency_no").toString());
-            if (properties.values().size() > 2 && agencyFullVersion.get(properties.get("agency_no")) == null) {
+            /*if (properties.values().size() > 1 && agencyFullVersion.get(properties.get("agency_no")) == null) {
                 inserter.setNodeProperties(agencyID, properties);
                 agencyFullVersion.put(properties.get("agency_no").toString(), true);
-            }
+            } */
             return agencyID;
         }
     }
@@ -235,29 +255,33 @@
                 if (hasEndTagName(e, "AGENCY_LINK")) {
 
                     //System.out.println("Finished processing link:  type = " + link_type+ "; from = " + agency_from_no + "; to = " + agency_to_no);
-                    long agencyFromID, agencyToID;
-                    Map<String, Object> agencyFromProperties = new HashMap<String, Object>();
-                    agencyFromProperties.put("agency_no",agency_from_no);
-                    agencyFromID = getAgency(agencyFromProperties);
-                    Map<String, Object> agencyToProperties = new HashMap<String, Object>();
-                    agencyToProperties.put("agency_no",agency_to_no);
-                    agencyToID = getAgency(agencyToProperties);
-                    Map<String, Object> relProperties = new HashMap<String, Object>();
-                    relProperties.put("link_type", link_type);
-                    relProperties.put("start_date", start_date);
-                    if (start_date_qual != null && !start_date_qual.equals("(null)")) {
-                        relProperties.put("start_date_qual", start_date_qual);
-                    }
-                    if (end_date != null && !end_date.equals("(null)")) {
-                        relProperties.put("end_date", end_date);
-                    }
-                    if (end_date_qual != null && !end_date_qual.equals("(null)")) {
-                        relProperties.put("end_date_qual", end_date_qual);
-                    }
-                    inserter.createRelationship(agencyFromID, agencyToID,
-                            DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties);
-
-                    break;
+                    if (agency_to_no != null && !agency_to_no.equals("(null)")) {
+                        long agencyFromID, agencyToID;
+                        Map<String, Object> agencyFromProperties = new HashMap<String, Object>();
+                        agencyFromProperties.put("agency_from_no", agency_from_no);
+                        agencyFromID = getAgency(agencyFromProperties);
+                        Map<String, Object> agencyToProperties = new HashMap<String, Object>();
+                        agencyToProperties.put("agency_to_no", agency_to_no);
+                        agencyToID = getAgency(agencyToProperties);
+                        Map<String, Object> relProperties = new HashMap<String, Object>();
+                        relProperties.put("link_type", link_type);
+                        if (start_date != null && !start_date.equals("(null)")) {
+                            relProperties.put("start_date", Integer.parseInt(start_date));
+                        }
+                        if (start_date_qual != null && !start_date_qual.equals("(null)")) {
+                            relProperties.put("start_date_qual", start_date_qual);
+                        }
+                        if (end_date != null && !end_date.equals("(null)")) {
+                            relProperties.put("end_date", Integer.parseInt(end_date));
+                        }
+                        if (end_date_qual != null && !end_date_qual.equals("(null)")) {
+                            relProperties.put("end_date_qual", end_date_qual);
+                        }
+                        inserter.createRelationship(agencyFromID, agencyToID,
+                                DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties);
+                    }
+                        break;
+
                 }
             }
         }
@@ -288,7 +312,7 @@
                     agencyProperties.put("agency_no",of);
                     agencyID = getAgency(agencyProperties);
                     Map<String, Object> relProperties = new HashMap<String, Object>();
-                    relProperties.put("date", date);
+                    relProperties.put("date", fixDate(date));
                     inserter.createRelationship(agencyID, locationID,
                             DynamicRelationshipType.withName("HAS_LOCATION"), relProperties);
 
@@ -323,7 +347,7 @@
                     agencyProperties.put("agency_no",of);
                     agencyID = getAgency(agencyProperties);
                     Map<String, Object> relProperties = new HashMap<String, Object>();
-                    relProperties.put("date", date);
+                    relProperties.put("date", fixDate(date));
                     inserter.createRelationship(agencyID, statusID,
                             DynamicRelationshipType.withName("HAS_STATUS"), relProperties);
 
@@ -367,12 +391,12 @@
                     agencyProperties.put("agency_no",agency);
                     agencyID = getAgency(agencyProperties);
                     Map<String, Object> relProperties = new HashMap<String, Object>();
-                    relProperties.put("start_date", start_date);
+                    relProperties.put("start_date", Integer.parseInt(start_date));
                     if (start_date_qual != null && !start_date_qual.equals("(null)")) {
                         relProperties.put("start_date_qual", start_date_qual);
                     }
                     if (end_date != null && !end_date.equals("(null)")) {
-                        relProperties.put("end_date", end_date);
+                        relProperties.put("end_date", Integer.parseInt(end_date));
                     }
                     if (end_date_qual != null && !end_date_qual.equals("(null)")) {
                         relProperties.put("end_date_qual", end_date_qual);
@@ -385,7 +409,14 @@
             }
         }
     }
-
+    private int fixDate(String date) {
+        String[] parts = date.split("-");
+        if (parts.length == 3) {
+            return Integer.parseInt(""+parts[2]+parts[1]+parts[0]);
+        } else {
+            return 0;
+        }
+    }
     private String getCharacters(XMLEventReader rdr) throws XMLStreamException {
         XMLEvent e = rdr.nextEvent();
         if (e.isCharacters()) {
@@ -399,10 +430,6 @@
         return e.asStartElement().getName().getLocalPart().toLowerCase().equals(name.toLowerCase());
     }
 
-    private String getStartTagName(XMLEvent e) {
-        return e.asStartElement().getName().getLocalPart();
-    }
-
     private boolean hasEndTagName(XMLEvent e, String name) {
         return e.asEndElement().getName().getLocalPart().toLowerCase().equals(name.toLowerCase());
     }