beginnings of NAA data import
[disclosr.git] / admin / neo4jimporter / src / main / java / StAXSample.java
blob:a/admin/neo4jimporter/src/main/java/StAXSample.java -> blob:b/admin/neo4jimporter/src/main/java/StAXSample.java
--- a/admin/neo4jimporter/src/main/java/StAXSample.java
+++ b/admin/neo4jimporter/src/main/java/StAXSample.java
@@ -35,13 +35,14 @@
     }
 
     public static void main(String[] args) {
-        if (args.length != 1) {
+        /*if (args.length != 1) {
             System.out.println("Usage: StAXSample file.xml");
             System.exit(-1);
-        }
+        } */
 
         StAXSample ss = new StAXSample();
-        ss.setFilename(args[0]);
+        //ss.setFilename(args[0]);
+        ss.setFilename("agency-sample.xml");
         ss.run();
     }
 
@@ -76,21 +77,13 @@
 
 
                 //iterate as long as there are more events on the input stream
+                Map<String, Object> previousAgency = new HashMap<String, Object>();
                 while (r.hasNext()) {
                     XMLEvent e = r.nextEvent();
-                    Map<String, Object> previousAgency = new HashMap<String, Object>();
+
                     if (e.isStartElement()) {
                         if (hasStartTagName(e, "AGENCIES")) {
                             System.out.println("Agencies file loaded... ");
-                        } else if (hasStartTagName(e, "TITLE")) {
-                            System.out.println("TITLE is: " + getCharacters(r));
-                            previousAgency.put("title", getCharacters(r));
-                        } else if (hasStartTagName(e, "END_DATE_QUAL")) {
-                            System.out.println("END_DATE_QUAL is: " + getCharacters(r));
-                            previousAgency.put("end_date_qual", getCharacters(r));
-                            // save agency
-                            getAgency(previousAgency);
-                            previousAgency = new HashMap<String, Object>();
                         } else if (hasStartTagName(e, "AGENCY_LINK")) {
                             processAgencyLink(r);
                         } else if (hasStartTagName(e, "AGENCY_LOCATION")) {
@@ -99,6 +92,36 @@
                             processAgencyFunction(r);
                         } else if (hasStartTagName(e, "AGENCY_STATUS")) {
                             processAgencyStatus(r);
+                            /* TODO
+                            Unhandled tag: AGENCY_NOTE content:
+
+Unhandled tag: NOTE_AGENCY_NO content:CA 4886
+Unhandled tag: NOTE_TYPE content:Archivists note
+Unhandled tag: NOTE content:null
+Unhandled tag: head content:
+
+Unexpected character 'C' (code 67) in start tag Expected a quote
+ at [row,col,system-id]: [1093387,18,"agency-sample.xml"]
+                             */
+                        } else if (hasStartTagName(e, "AGENCY_NO")) {
+                            previousAgency.put("agency_no", getCharacters(r));
+                        } else if (hasStartTagName(e, "TITLE")) {
+                            String title = getCharacters(r);
+                            previousAgency.put("name", title);
+                            previousAgency.put("label", title);
+                        } else if (hasStartTagName(e, "START_DATE")) {
+                            previousAgency.put("start_date", getCharacters(r));
+                        } else if (hasStartTagName(e, "START_DATE_QUAL")) {
+                            previousAgency.put("start_date_qual", getCharacters(r));
+                        } else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) {
+                            previousAgency.put("alternative_title", getCharacters(r));
+                        } else if (hasStartTagName(e, "END_DATE")) {
+                            previousAgency.put("end_date", getCharacters(r));
+                        } else if (hasStartTagName(e, "END_DATE_QUAL")) {
+                            previousAgency.put("end_date_qual", getCharacters(r));
+                            // save agency
+                            getAgency(previousAgency);
+                            previousAgency = new HashMap<String, Object>();
                         } else {
                             System.out.println("Unhandled tag: " + getStartTagName(e) + " content:" + getCharacters(r));
                         }
@@ -129,6 +152,9 @@
                 agencyFullVersion.put(properties.get("agency_no").toString(), true);
             }
             agencyIDs.put(properties.get("agency_no").toString(), agencyID);
+            //if (agencyID % 10 == 0) {
+                System.out.println("Agency #"+agencyID);
+            //}
             return agencyID;
         } else {
             long agencyID = agencyIDs.get(properties.get("agency_no").toString());
@@ -143,7 +169,8 @@
     private long getLocation(String locationName) {
         if (locationIDs.get(locationName) == null) {
             HashMap properties = new HashMap< String,Object > ();
-            properties.put("location_name", locationName);
+            properties.put("name", locationName);
+            properties.put("label", locationName);
             long locationID = inserter.createNode(properties, locationLabel);
             locationIDs.put(locationName, locationID);
             return locationID;
@@ -154,7 +181,8 @@
     private long getFunction(String functionName) {
         if (functionIDs.get(functionName) == null) {
             HashMap properties = new HashMap< String,Object > ();
-            properties.put("function_name", functionName);
+            properties.put("name", functionName);
+            properties.put("label", functionName);
             long functionID = inserter.createNode(properties, functionLabel);
             functionIDs.put(functionName, functionID);
             return functionID;
@@ -165,7 +193,8 @@
     private long getStatus(String statusName) {
         if (statusIDs.get(statusName) == null) {
             HashMap properties = new HashMap< String,Object > ();
-            properties.put("status_name", statusName);
+            properties.put("name", statusName);
+            properties.put("label", statusName);
             long statusID = inserter.createNode(properties, statusLabel);
             statusIDs.put(statusName, statusID);
             return statusID;
@@ -205,7 +234,7 @@
             if (e.isEndElement()) {
                 if (hasEndTagName(e, "AGENCY_LINK")) {
 
-                    //System.out.println("Finished processing link:  Name = " + name + "; of = " + of + "; date = " + date);
+                    //System.out.println("Finished processing link:  type = " + link_type+ "; from = " + agency_from_no + "; to = " + agency_to_no);
                     long agencyFromID, agencyToID;
                     Map<String, Object> agencyFromProperties = new HashMap<String, Object>();
                     agencyFromProperties.put("agency_no",agency_from_no);
@@ -216,9 +245,15 @@
                     Map<String, Object> relProperties = new HashMap<String, Object>();
                     relProperties.put("link_type", link_type);
                     relProperties.put("start_date", start_date);
-                    relProperties.put("start_date_qual", start_date_qual);
-                    relProperties.put("end_date", end_date);
-                    relProperties.put("end_date_qual", end_date_qual);
+                    if (start_date_qual != null && !start_date_qual.equals("(null)")) {
+                        relProperties.put("start_date_qual", start_date_qual);
+                    }
+                    if (end_date != null && !end_date.equals("(null)")) {
+                        relProperties.put("end_date", end_date);
+                    }
+                    if (end_date_qual != null && !end_date_qual.equals("(null)")) {
+                        relProperties.put("end_date_qual", end_date_qual);
+                    }
                     inserter.createRelationship(agencyFromID, agencyToID,
                             DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties);
 
@@ -246,7 +281,7 @@
             }
             if (e.isEndElement()) {
                 if (hasEndTagName(e, "AGENCY_LOCATION")) {
-                    System.out.println("Finished processing location:  Name = " + name + "; of = " + of + "; date = " + date);
+                    //System.out.println("Finished processing location:  Name = " + name + "; of = " + of + "; date = " + date);
                     long locationID, agencyID;
                     locationID = getLocation(name);
                     Map<String, Object> agencyProperties = new HashMap<String, Object>();
@@ -281,7 +316,7 @@
             }
             if (e.isEndElement()) {
                 if (hasEndTagName(e, "AGENCY_STATUS")) {
-                    System.out.println("Finished processing status:  Status = " + status + "; of = " + of + "; date = " + date);
+                    //System.out.println("Finished processing status:  Status = " + status + "; of = " + of + "; date = " + date);
                     long statusID, agencyID;
                             statusID = getStatus(status);
                     Map<String, Object> agencyProperties = new HashMap<String, Object>();
@@ -332,10 +367,16 @@
                     agencyProperties.put("agency_no",agency);
                     agencyID = getAgency(agencyProperties);
                     Map<String, Object> relProperties = new HashMap<String, Object>();
-                                        relProperties.put("start_date", start_date);
-                    relProperties.put("start_date_qual", start_date_qual);
-                    relProperties.put("end_date", end_date);
-                    relProperties.put("end_date_qual", end_date_qual);
+                    relProperties.put("start_date", start_date);
+                    if (start_date_qual != null && !start_date_qual.equals("(null)")) {
+                        relProperties.put("start_date_qual", start_date_qual);
+                    }
+                    if (end_date != null && !end_date.equals("(null)")) {
+                        relProperties.put("end_date", end_date);
+                    }
+                    if (end_date_qual != null && !end_date_qual.equals("(null)")) {
+                        relProperties.put("end_date_qual", end_date_qual);
+                    }
                     inserter.createRelationship(agencyID, functionID,
                             DynamicRelationshipType.withName("HAS_FUNCTION"), relProperties);
 
@@ -355,7 +396,7 @@
     }
 
     private boolean hasStartTagName(XMLEvent e, String name) {
-        return e.asStartElement().getName().getLocalPart().equals(name);
+        return e.asStartElement().getName().getLocalPart().toLowerCase().equals(name.toLowerCase());
     }
 
     private String getStartTagName(XMLEvent e) {
@@ -363,7 +404,7 @@
     }
 
     private boolean hasEndTagName(XMLEvent e, String name) {
-        return e.asEndElement().getName().getLocalPart().equals(name);
+        return e.asEndElement().getName().getLocalPart().toLowerCase().equals(name.toLowerCase());
     }
 
     public void setFilename(String filename) {
@@ -373,3 +414,4 @@
 
 }
 
+