moar neo4j fixes
[disclosr.git] / admin / neo4jimporter / src / main / java / StAXSample.java
blob:a/admin/neo4jimporter/src/main/java/StAXSample.java -> blob:b/admin/neo4jimporter/src/main/java/StAXSample.java
--- a/admin/neo4jimporter/src/main/java/StAXSample.java
+++ b/admin/neo4jimporter/src/main/java/StAXSample.java
@@ -84,6 +84,44 @@
                     if (e.isStartElement()) {
                         if (hasStartTagName(e, "AGENCIES")) {
                             System.out.println("Agencies file loaded... ");
+                        } else if (hasStartTagName(e, "AGENCY_NO")) {
+                            previousAgency.put("agency_no", getCharacters(r));
+                        } else if (hasStartTagName(e, "TITLE")) {
+                            String title = getCharacters(r);
+                            previousAgency.put("name", title);
+                            previousAgency.put("label", title);
+                        } else if (hasStartTagName(e, "START_DATE")) {
+                            String start_date = getCharacters(r);
+                            if (start_date != null && !start_date.equals(" ") && !start_date.equals("(null)")) {
+                                previousAgency.put("start_date", Integer.parseInt(start_date));
+                            }
+                        } else if (hasStartTagName(e, "START_DATE_QUAL")) {
+                            previousAgency.put("start_date_qual", getCharacters(r));
+                        } else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) {
+                            previousAgency.put("alternative_title", getCharacters(r));
+                        } else if (hasStartTagName(e, "END_DATE")) {
+                            String end_date = getCharacters(r);
+                            if (end_date != null && !end_date.equals(" ") && !end_date.equals("(null)")) {
+                                previousAgency.put("end_date", Integer.parseInt(end_date));
+                            }
+                        } else if (hasStartTagName(e, "END_DATE_QUAL")) {
+                            previousAgency.put("end_date_qual", getCharacters(r));
+                            // save agency
+                            getAgency(previousAgency);
+                            previousAgency = new HashMap<String, Object>();
+                        }
+                    }
+                }
+                r = xmlif.createXMLEventReader(
+                        filename,
+                        //new FileInputStream(new File(xmlFileURL.toURI())));
+                        new FileInputStream(new File(filename)));
+                while (r.hasNext()) {
+                    XMLEvent e = r.nextEvent();
+
+                    if (e.isStartElement()) {
+                        if (hasStartTagName(e, "AGENCIES")) {
+                            System.out.println("Agencies file loaded again... ");
                         } else if (hasStartTagName(e, "AGENCY_LINK")) {
                             processAgencyLink(r);
                         } else if (hasStartTagName(e, "AGENCY_LOCATION")) {
@@ -92,25 +130,17 @@
                             processAgencyFunction(r);
                         } else if (hasStartTagName(e, "AGENCY_STATUS")) {
                             processAgencyStatus(r);
-                        } else if (hasStartTagName(e, "AGENCY_NO")) {
-                            previousAgency.put("agency_no", getCharacters(r));
-                        } else if (hasStartTagName(e, "TITLE")) {
-                            previousAgency.put("name", getCharacters(r));
-                        } else if (hasStartTagName(e, "START_DATE")) {
-                            previousAgency.put("start_date", getCharacters(r));
-                        } else if (hasStartTagName(e, "START_DATE_QUAL")) {
-                            previousAgency.put("start_date_qual", getCharacters(r));
-                        } else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) {
-                            previousAgency.put("alternative_title", getCharacters(r));
-                        } else if (hasStartTagName(e, "END_DATE")) {
-                            previousAgency.put("end_date", getCharacters(r));
-                        } else if (hasStartTagName(e, "END_DATE_QUAL")) {
-                            previousAgency.put("end_date_qual", getCharacters(r));
-                            // save agency
-                            getAgency(previousAgency);
-                            previousAgency = new HashMap<String, Object>();
-                        } else {
-                            System.out.println("Unhandled tag: " + getStartTagName(e) + " content:" + getCharacters(r));
+                            /* TODO
+                            Unhandled tag: AGENCY_NOTE content:
+
+Unhandled tag: NOTE_AGENCY_NO content:CA 4886
+Unhandled tag: NOTE_TYPE content:Archivists note
+Unhandled tag: NOTE content:null
+Unhandled tag: head content:
+
+Unexpected character 'C' (code 67) in start tag Expected a quote
+ at [row,col,system-id]: [1093387,18,"agency-sample.xml"]
+                             */
                         }
                     }
                 }
@@ -133,19 +163,25 @@
     }
 
     private long getAgency(Map<String, Object> properties) {
+        if (properties.get("agency_no") == null || properties.get("agency_no") == "(null)" || properties.get("agency_no") == " ") {
+            return 0;
+        }
         if (agencyIDs.get(properties.get("agency_no").toString()) == null) {
             long agencyID = inserter.createNode(properties, agencyLabel);
-            if (properties.values().size() > 2) {
+            /*if (properties.values().size() > 1) {
                 agencyFullVersion.put(properties.get("agency_no").toString(), true);
-            }
+            } */
             agencyIDs.put(properties.get("agency_no").toString(), agencyID);
+            //if (agencyID % 10 == 0) {
+                System.out.println("Agency #"+agencyID);
+            //}
             return agencyID;
         } else {
             long agencyID = agencyIDs.get(properties.get("agency_no").toString());
-            if (properties.values().size() > 2 && agencyFullVersion.get(properties.get("agency_no")) == null) {
+            /*if (properties.values().size() > 1 && agencyFullVersion.get(properties.get("agency_no")) == null) {
                 inserter.setNodeProperties(agencyID, properties);
                 agencyFullVersion.put(properties.get("agency_no").toString(), true);
-            }
+            } */
             return agencyID;
         }
     }
@@ -154,6 +190,7 @@
         if (locationIDs.get(locationName) == null) {
             HashMap properties = new HashMap< String,Object > ();
             properties.put("name", locationName);
+            properties.put("label", locationName);
             long locationID = inserter.createNode(properties, locationLabel);
             locationIDs.put(locationName, locationID);
             return locationID;
@@ -165,6 +202,7 @@
         if (functionIDs.get(functionName) == null) {
             HashMap properties = new HashMap< String,Object > ();
             properties.put("name", functionName);
+            properties.put("label", functionName);
             long functionID = inserter.createNode(properties, functionLabel);
             functionIDs.put(functionName, functionID);
             return functionID;
@@ -176,6 +214,7 @@
         if (statusIDs.get(statusName) == null) {
             HashMap properties = new HashMap< String,Object > ();
             properties.put("name", statusName);
+            properties.put("label", statusName);
             long statusID = inserter.createNode(properties, statusLabel);
             statusIDs.put(statusName, statusID);
             return statusID;
@@ -215,30 +254,34 @@
             if (e.isEndElement()) {
                 if (hasEndTagName(e, "AGENCY_LINK")) {
 
-                    //System.out.println("Finished processing link:  Name = " + name + "; of = " + of + "; date = " + date);
-                    long agencyFromID, agencyToID;
-                    Map<String, Object> agencyFromProperties = new HashMap<String, Object>();
-                    agencyFromProperties.put("agency_no",agency_from_no);
-                    agencyFromID = getAgency(agencyFromProperties);
-                    Map<String, Object> agencyToProperties = new HashMap<String, Object>();
-                    agencyToProperties.put("agency_no",agency_to_no);
-                    agencyToID = getAgency(agencyToProperties);
-                    Map<String, Object> relProperties = new HashMap<String, Object>();
-                    relProperties.put("link_type", link_type);
-                    relProperties.put("start_date", start_date);
-                    if (start_date_qual != null && !start_date_qual.equals("(null)")) {
-                        relProperties.put("start_date_qual", start_date_qual);
-                    }
-                    if (end_date != null && !end_date.equals("(null)")) {
-                        relProperties.put("end_date", end_date);
-                    }
-                    if (end_date_qual != null && !end_date_qual.equals("(null)")) {
-                        relProperties.put("end_date_qual", end_date_qual);
-                    }
-                    inserter.createRelationship(agencyFromID, agencyToID,
-                            DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties);
-
-                    break;
+                    //System.out.println("Finished processing link:  type = " + link_type+ "; from = " + agency_from_no + "; to = " + agency_to_no);
+                    if (agency_to_no != null && !agency_to_no.equals("(null)")) {
+                        long agencyFromID, agencyToID;
+                        Map<String, Object> agencyFromProperties = new HashMap<String, Object>();
+                        agencyFromProperties.put("agency_from_no", agency_from_no);
+                        agencyFromID = getAgency(agencyFromProperties);
+                        Map<String, Object> agencyToProperties = new HashMap<String, Object>();
+                        agencyToProperties.put("agency_to_no", agency_to_no);
+                        agencyToID = getAgency(agencyToProperties);
+                        Map<String, Object> relProperties = new HashMap<String, Object>();
+                        relProperties.put("link_type", link_type);
+                        if (start_date != null && !start_date.equals("(null)")) {
+                            relProperties.put("start_date", Integer.parseInt(start_date));
+                        }
+                        if (start_date_qual != null && !start_date_qual.equals("(null)")) {
+                            relProperties.put("start_date_qual", start_date_qual);
+                        }
+                        if (end_date != null && !end_date.equals("(null)")) {
+                            relProperties.put("end_date", Integer.parseInt(end_date));
+                        }
+                        if (end_date_qual != null && !end_date_qual.equals("(null)")) {
+                            relProperties.put("end_date_qual", end_date_qual);
+                        }
+                        inserter.createRelationship(agencyFromID, agencyToID,
+                                DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties);
+                    }
+                        break;
+
                 }
             }
         }
@@ -262,14 +305,14 @@
             }
             if (e.isEndElement()) {
                 if (hasEndTagName(e, "AGENCY_LOCATION")) {
-                    System.out.println("Finished processing location:  Name = " + name + "; of = " + of + "; date = " + date);
+                    //System.out.println("Finished processing location:  Name = " + name + "; of = " + of + "; date = " + date);
                     long locationID, agencyID;
                     locationID = getLocation(name);
                     Map<String, Object> agencyProperties = new HashMap<String, Object>();
                     agencyProperties.put("agency_no",of);
                     agencyID = getAgency(agencyProperties);
                     Map<String, Object> relProperties = new HashMap<String, Object>();
-                    relProperties.put("date", date);
+                    relProperties.put("date", fixDate(date));
                     inserter.createRelationship(agencyID, locationID,
                             DynamicRelationshipType.withName("HAS_LOCATION"), relProperties);
 
@@ -297,14 +340,14 @@
             }
             if (e.isEndElement()) {
                 if (hasEndTagName(e, "AGENCY_STATUS")) {
-                    System.out.println("Finished processing status:  Status = " + status + "; of = " + of + "; date = " + date);
+                    //System.out.println("Finished processing status:  Status = " + status + "; of = " + of + "; date = " + date);
                     long statusID, agencyID;
                             statusID = getStatus(status);
                     Map<String, Object> agencyProperties = new HashMap<String, Object>();
                     agencyProperties.put("agency_no",of);
                     agencyID = getAgency(agencyProperties);
                     Map<String, Object> relProperties = new HashMap<String, Object>();
-                    relProperties.put("date", date);
+                    relProperties.put("date", fixDate(date));
                     inserter.createRelationship(agencyID, statusID,
                             DynamicRelationshipType.withName("HAS_STATUS"), relProperties);
 
@@ -348,12 +391,12 @@
                     agencyProperties.put("agency_no",agency);
                     agencyID = getAgency(agencyProperties);
                     Map<String, Object> relProperties = new HashMap<String, Object>();
-                    relProperties.put("start_date", start_date);
+                    relProperties.put("start_date", Integer.parseInt(start_date));
                     if (start_date_qual != null && !start_date_qual.equals("(null)")) {
                         relProperties.put("start_date_qual", start_date_qual);
                     }
                     if (end_date != null && !end_date.equals("(null)")) {
-                        relProperties.put("end_date", end_date);
+                        relProperties.put("end_date", Integer.parseInt(end_date));
                     }
                     if (end_date_qual != null && !end_date_qual.equals("(null)")) {
                         relProperties.put("end_date_qual", end_date_qual);
@@ -366,7 +409,14 @@
             }
         }
     }
-
+    private int fixDate(String date) {
+        String[] parts = date.split("-");
+        if (parts.length == 3) {
+            return Integer.parseInt(""+parts[2]+parts[1]+parts[0]);
+        } else {
+            return 0;
+        }
+    }
     private String getCharacters(XMLEventReader rdr) throws XMLStreamException {
         XMLEvent e = rdr.nextEvent();
         if (e.isCharacters()) {
@@ -380,10 +430,6 @@
         return e.asStartElement().getName().getLocalPart().toLowerCase().equals(name.toLowerCase());
     }
 
-    private String getStartTagName(XMLEvent e) {
-        return e.asStartElement().getName().getLocalPart();
-    }
-
     private boolean hasEndTagName(XMLEvent e, String name) {
         return e.asEndElement().getName().getLocalPart().toLowerCase().equals(name.toLowerCase());
     }
@@ -395,3 +441,4 @@
 
 }
 
+