--- a/admin/neo4jimporter/src/main/java/StAXSample.java
+++ b/admin/neo4jimporter/src/main/java/StAXSample.java
@@ -84,6 +84,44 @@
if (e.isStartElement()) {
if (hasStartTagName(e, "AGENCIES")) {
System.out.println("Agencies file loaded... ");
+ } else if (hasStartTagName(e, "AGENCY_NO")) {
+ previousAgency.put("agency_no", getCharacters(r));
+ } else if (hasStartTagName(e, "TITLE")) {
+ String title = getCharacters(r);
+ previousAgency.put("name", title);
+ previousAgency.put("label", title);
+ } else if (hasStartTagName(e, "START_DATE")) {
+ String start_date = getCharacters(r);
+ if (start_date != null && !start_date.equals(" ") && !start_date.equals("(null)")) {
+ previousAgency.put("start_date", Integer.parseInt(start_date));
+ }
+ } else if (hasStartTagName(e, "START_DATE_QUAL")) {
+ previousAgency.put("start_date_qual", getCharacters(r));
+ } else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) {
+ previousAgency.put("alternative_title", getCharacters(r));
+ } else if (hasStartTagName(e, "END_DATE")) {
+ String end_date = getCharacters(r);
+ if (end_date != null && !end_date.equals(" ") && !end_date.equals("(null)")) {
+ previousAgency.put("end_date", Integer.parseInt(end_date));
+ }
+ } else if (hasStartTagName(e, "END_DATE_QUAL")) {
+ previousAgency.put("end_date_qual", getCharacters(r));
+ // save agency
+ getAgency(previousAgency);
+ previousAgency = new HashMap<String, Object>();
+ }
+ }
+ }
+ r = xmlif.createXMLEventReader(
+ filename,
+ //new FileInputStream(new File(xmlFileURL.toURI())));
+ new FileInputStream(new File(filename)));
+ while (r.hasNext()) {
+ XMLEvent e = r.nextEvent();
+
+ if (e.isStartElement()) {
+ if (hasStartTagName(e, "AGENCIES")) {
+ System.out.println("Agencies file loaded again... ");
} else if (hasStartTagName(e, "AGENCY_LINK")) {
processAgencyLink(r);
} else if (hasStartTagName(e, "AGENCY_LOCATION")) {
@@ -92,25 +130,17 @@
processAgencyFunction(r);
} else if (hasStartTagName(e, "AGENCY_STATUS")) {
processAgencyStatus(r);
- } else if (hasStartTagName(e, "AGENCY_NO")) {
- previousAgency.put("agency_no", getCharacters(r));
- } else if (hasStartTagName(e, "TITLE")) {
- previousAgency.put("name", getCharacters(r));
- } else if (hasStartTagName(e, "START_DATE")) {
- previousAgency.put("start_date", getCharacters(r));
- } else if (hasStartTagName(e, "START_DATE_QUAL")) {
- previousAgency.put("start_date_qual", getCharacters(r));
- } else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) {
- previousAgency.put("alternative_title", getCharacters(r));
- } else if (hasStartTagName(e, "END_DATE")) {
- previousAgency.put("end_date", getCharacters(r));
- } else if (hasStartTagName(e, "END_DATE_QUAL")) {
- previousAgency.put("end_date_qual", getCharacters(r));
- // save agency
- getAgency(previousAgency);
- previousAgency = new HashMap<String, Object>();
- } else {
- System.out.println("Unhandled tag: " + getStartTagName(e) + " content:" + getCharacters(r));
+ /* TODO
+ Unhandled tag: AGENCY_NOTE content:
+
+Unhandled tag: NOTE_AGENCY_NO content:CA 4886
+Unhandled tag: NOTE_TYPE content:Archivists note
+Unhandled tag: NOTE content:null
+Unhandled tag: head content:
+
+Unexpected character 'C' (code 67) in start tag Expected a quote
+ at [row,col,system-id]: [1093387,18,"agency-sample.xml"]
+ */
}
}
}
@@ -133,19 +163,25 @@
}
private long getAgency(Map<String, Object> properties) {
+ if (properties.get("agency_no") == null || properties.get("agency_no") == "(null)" || properties.get("agency_no") == " ") {
+ return 0;
+ }
if (agencyIDs.get(properties.get("agency_no").toString()) == null) {
long agencyID = inserter.createNode(properties, agencyLabel);
- if (properties.values().size() > 2) {
+ /*if (properties.values().size() > 1) {
agencyFullVersion.put(properties.get("agency_no").toString(), true);
- }
+ } */
agencyIDs.put(properties.get("agency_no").toString(), agencyID);
+ //if (agencyID % 10 == 0) {
+ System.out.println("Agency #"+agencyID);
+ //}
return agencyID;
} else {
long agencyID = agencyIDs.get(properties.get("agency_no").toString());
- if (properties.values().size() > 2 && agencyFullVersion.get(properties.get("agency_no")) == null) {
+ /*if (properties.values().size() > 1 && agencyFullVersion.get(properties.get("agency_no")) == null) {
inserter.setNodeProperties(agencyID, properties);
agencyFullVersion.put(properties.get("agency_no").toString(), true);
- }
+ } */
return agencyID;
}
}
@@ -154,6 +190,7 @@
if (locationIDs.get(locationName) == null) {
HashMap properties = new HashMap< String,Object > ();
properties.put("name", locationName);
+ properties.put("label", locationName);
long locationID = inserter.createNode(properties, locationLabel);
locationIDs.put(locationName, locationID);
return locationID;
@@ -165,6 +202,7 @@
if (functionIDs.get(functionName) == null) {
HashMap properties = new HashMap< String,Object > ();
properties.put("name", functionName);
+ properties.put("label", functionName);
long functionID = inserter.createNode(properties, functionLabel);
functionIDs.put(functionName, functionID);
return functionID;
@@ -176,6 +214,7 @@
if (statusIDs.get(statusName) == null) {
HashMap properties = new HashMap< String,Object > ();
properties.put("name", statusName);
+ properties.put("label", statusName);
long statusID = inserter.createNode(properties, statusLabel);
statusIDs.put(statusName, statusID);
return statusID;
@@ -215,30 +254,34 @@
if (e.isEndElement()) {
if (hasEndTagName(e, "AGENCY_LINK")) {
- //System.out.println("Finished processing link: Name = " + name + "; of = " + of + "; date = " + date);
- long agencyFromID, agencyToID;
- Map<String, Object> agencyFromProperties = new HashMap<String, Object>();
- agencyFromProperties.put("agency_no",agency_from_no);
- agencyFromID = getAgency(agencyFromProperties);
- Map<String, Object> agencyToProperties = new HashMap<String, Object>();
- agencyToProperties.put("agency_no",agency_to_no);
- agencyToID = getAgency(agencyToProperties);
- Map<String, Object> relProperties = new HashMap<String, Object>();
- relProperties.put("link_type", link_type);
- relProperties.put("start_date", start_date);
- if (start_date_qual != null && !start_date_qual.equals("(null)")) {
- relProperties.put("start_date_qual", start_date_qual);
- }
- if (end_date != null && !end_date.equals("(null)")) {
- relProperties.put("end_date", end_date);
- }
- if (end_date_qual != null && !end_date_qual.equals("(null)")) {
- relProperties.put("end_date_qual", end_date_qual);
- }
- inserter.createRelationship(agencyFromID, agencyToID,
- DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties);
-
- break;
+ //System.out.println("Finished processing link: type = " + link_type+ "; from = " + agency_from_no + "; to = " + agency_to_no);
+ if (agency_to_no != null && !agency_to_no.equals("(null)")) {
+ long agencyFromID, agencyToID;
+ Map<String, Object> agencyFromProperties = new HashMap<String, Object>();
+ agencyFromProperties.put("agency_from_no", agency_from_no);
+ agencyFromID = getAgency(agencyFromProperties);
+ Map<String, Object> agencyToProperties = new HashMap<String, Object>();
+ agencyToProperties.put("agency_to_no", agency_to_no);
+ agencyToID = getAgency(agencyToProperties);
+ Map<String, Object> relProperties = new HashMap<String, Object>();
+ relProperties.put("link_type", link_type);
+ if (start_date != null && !start_date.equals("(null)")) {
+ relProperties.put("start_date", Integer.parseInt(start_date));
+ }
+ if (start_date_qual != null && !start_date_qual.equals("(null)")) {
+ relProperties.put("start_date_qual", start_date_qual);
+ }
+ if (end_date != null && !end_date.equals("(null)")) {
+ relProperties.put("end_date", Integer.parseInt(end_date));
+ }
+ if (end_date_qual != null && !end_date_qual.equals("(null)")) {
+ relProperties.put("end_date_qual", end_date_qual);
+ }
+ inserter.createRelationship(agencyFromID, agencyToID,
+ DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties);
+ }
+ break;
+
}
}
}
@@ -262,14 +305,14 @@
}
if (e.isEndElement()) {
if (hasEndTagName(e, "AGENCY_LOCATION")) {
- System.out.println("Finished processing location: Name = " + name + "; of = " + of + "; date = " + date);
+ //System.out.println("Finished processing location: Name = " + name + "; of = " + of + "; date = " + date);
long locationID, agencyID;
locationID = getLocation(name);
Map<String, Object> agencyProperties = new HashMap<String, Object>();
agencyProperties.put("agency_no",of);
agencyID = getAgency(agencyProperties);
Map<String, Object> relProperties = new HashMap<String, Object>();
- relProperties.put("date", date);
+ relProperties.put("date", fixDate(date));
inserter.createRelationship(agencyID, locationID,
DynamicRelationshipType.withName("HAS_LOCATION"), relProperties);
@@ -297,14 +340,14 @@
}
if (e.isEndElement()) {
if (hasEndTagName(e, "AGENCY_STATUS")) {
- System.out.println("Finished processing status: Status = " + status + "; of = " + of + "; date = " + date);
+ //System.out.println("Finished processing status: Status = " + status + "; of = " + of + "; date = " + date);
long statusID, agencyID;
statusID = getStatus(status);
Map<String, Object> agencyProperties = new HashMap<String, Object>();
agencyProperties.put("agency_no",of);
agencyID = getAgency(agencyProperties);
Map<String, Object> relProperties = new HashMap<String, Object>();
- relProperties.put("date", date);
+ relProperties.put("date", fixDate(date));
inserter.createRelationship(agencyID, statusID,
DynamicRelationshipType.withName("HAS_STATUS"), relProperties);
@@ -348,12 +391,12 @@
agencyProperties.put("agency_no",agency);
agencyID = getAgency(agencyProperties);
Map<String, Object> relProperties = new HashMap<String, Object>();
- relProperties.put("start_date", start_date);
+ relProperties.put("start_date", Integer.parseInt(start_date));
if (start_date_qual != null && !start_date_qual.equals("(null)")) {
relProperties.put("start_date_qual", start_date_qual);
}
if (end_date != null && !end_date.equals("(null)")) {
- relProperties.put("end_date", end_date);
+ relProperties.put("end_date", Integer.parseInt(end_date));
}
if (end_date_qual != null && !end_date_qual.equals("(null)")) {
relProperties.put("end_date_qual", end_date_qual);
@@ -366,7 +409,14 @@
}
}
}
-
+ private int fixDate(String date) {
+ String[] parts = date.split("-");
+ if (parts.length == 3) {
+ return Integer.parseInt(""+parts[2]+parts[1]+parts[0]);
+ } else {
+ return 0;
+ }
+ }
private String getCharacters(XMLEventReader rdr) throws XMLStreamException {
XMLEvent e = rdr.nextEvent();
if (e.isCharacters()) {
@@ -380,10 +430,6 @@
return e.asStartElement().getName().getLocalPart().toLowerCase().equals(name.toLowerCase());
}
- private String getStartTagName(XMLEvent e) {
- return e.asStartElement().getName().getLocalPart();
- }
-
private boolean hasEndTagName(XMLEvent e, String name) {
return e.asEndElement().getName().getLocalPart().toLowerCase().equals(name.toLowerCase());
}
@@ -395,3 +441,4 @@
}
+
--- /dev/null
+++ b/admin/time.php
@@ -1,1 +1,164 @@
+<?php
+
+// https://github.com/jadell/neo4jphp
+spl_autoload_register(function ($className) {
+ $libPath = '../lib/neo4jphp/lib/';
+ $classFile = str_replace('\\',DIRECTORY_SEPARATOR,$className).'.php';
+ $classPath = $libPath.$classFile;
+ if (file_exists($classPath)) {
+ require($classPath);
+ }
+});
+
+$nodes = "";
+$edges = "";
+$description = "";
+$nodeList = Array();
+
+function add_node($node) {
+ global $nodes,$nodeList;
+ if (!in_array($node->getId(),$nodeList)) {
+ $start = $node->getProperty("start_date") . $node->getProperty("date");
+ $end = $node->getProperty("end_date");
+ $nodes.= "<node id='".urlencode($node->getId())."'". ' label="'.htmlentities($node->getProperty("name")).'" '
+ . ($start != ""? 'start="'.$start.'" ' : ""). ($end != ""? 'end="'.$end.'" ' : "").'>'
+ ."<viz:color b='".rand(0,255)."' g='".rand(0,255)."' r='".rand(0,255)."'/>"
+ ."</node>". PHP_EOL;
+ $nodeList[] = $node->getId();
+ }
+}
+
+function add_edge($rel) {
+ global $edges;
+ $start = $rel->getProperty("start_date");
+ $end = $rel->getProperty("end_date");
+ $edges.= "<edge id='".urlencode($rel->getId())."' source='".urlencode($rel->getStartNode()->getId())."' target='".urlencode($rel->getEndNode()->getId())."' ".($start != ""? 'start="'.$start.'" ' : ""). ($end != ""? 'end="'.$end.'" ' : "")."/>". PHP_EOL;
+
+}
+
+function expandNode($node) {
+ //global $description;
+ //$description .= ($description == ""? "" : " and ").$node->getProperty("name");
+ add_node($node);
+
+ foreach ($node->getProperties() as $key => $value) {
+ // echo "$key: $value\n";
+ }
+ foreach ($node->getRelationships() as $rel) {
+ //echo($rel->getStartNode()->getId()." -> ".$rel->getEndNode()->getId()."<br>");
+ add_edge($rel);
+ add_node($rel->getStartNode());
+ add_node($rel->getEndNode());
+ }
+}
+
+//$ids = (isset($_REQUEST['ids']) ? $_REQUEST['ids'] : "");
+
+// Connecting to the default port 7474 on localhost
+$client = new Everyman\Neo4j\Client();
+//$client = new Everyman\Neo4j\Client('192.168.1.127');
+//print_r($client->getServerInfo());
+
+//https://github.com/jadell/neo4jphp/wiki/Caching
+$plugin = new Everyman\Neo4j\Cache\Variable();
+$client->getEntityCache()->setCache($plugin);
+
+/*$memcached = new Memcached();
+$memcached->addServer('localhost', 11211);
+
+$plugin = new Everyman\Neo4j\Cache\Memcached($memcached);
+$client->getEntityCache()->setCache($plugin);*/
+
+/*$requests = explode(";",$ids);
+
+
+foreach ($requests as $request) {
+
+ // Array("type" => "path", from=>"1234", to=>"4321","options" => Array())
+
+ $parts = explode("-",$request);
+
+ $requestType = $parts[0];
+ $requestId = $parts[1];
+ if ($requestType == 'node') {
+ expandNode($client->getNode($requestId));
+ } else {
+ findNode($requestType,$requestId);
+ }
+}
+
+function findNode($type,$id) {
+ global $client;
+ $typeMapping = Array (
+ "agency" => Array("label" => "Agency", "id" => "agencyID")
+ );
+
+ $queryString =
+ "MATCH (n:".$typeMapping[$type]["label"].")".
+ "WHERE n.".$typeMapping[$type]["id"]." = {nodeId}".
+ "RETURN n";
+
+ $query = new Everyman\Neo4j\Cypher\Query($client, $queryString, array('nodeId' => $id));
+ $result = $query->getResultSet();
+
+ foreach ($result as $row) {
+ expandNode( $row[0]);
+
+ }
+
+} */
+
+// https://github.com/jadell/neo4jphp/wiki/Paths
+
+/* https://github.com/jadell/neo4jphp/wiki/Traversals
+ * $traversal = new Everyman\Neo4j\Traversal($client);
+$traversal->addRelationship('KNOWS', Relationship::DirectionOut)
+ ->setPruneEvaluator(Traversal::PruneNone)
+ ->setReturnFilter(Traversal::ReturnAll)
+ ->setMaxDepth(4);
+
+$nodes = $traversal->getResults($startNode, Traversal::ReturnTypeNode);
+
+ */
+
+/*https://github.com/jadell/neo4jphp/wiki/Cypher-and-gremlin-queries
+$queryString = "START n=node({nodeId}) ".
+ "MATCH (n)<-[:KNOWS]-(x)".
+ "WHERE x.name = {name}".
+ "RETURN x";
+$query = new Everyman\Neo4j\Cypher\Query($client, $queryString, array('nodeId' => 1, 'name' => 'Bob'));
+$result = $query->getResultSet();
+foreach ($result as $row) {
+ echo $row['x']->getProperty('name') . "\n";
+}*/
+
+$queryString =
+ "MATCH (n) WHERE has(n.start_date) and n.start_date < 18870000 RETURN n";
+ $description = '1887';
+$query = new Everyman\Neo4j\Cypher\Query($client, $queryString);
+$result = $query->getResultSet();
+
+foreach ($result as $row) {
+ expandNode( $row[0]);
+
+}
+
+if (!isset($_REQUEST['debug'])) {
+ header('Content-Type: application/gexf+xml');
+ header('Content-Disposition: attachment; filename="'.urlencode(str_replace(" ","_",strtolower($description))).'.gexf"');
+}
+echo '<?xml version="1.0" encoding="UTF-8"?>
+<gexf xmlns="http://www.gexf.net/1.2draft" xmlns:viz="http://www.gexf.net/1.2draft/viz" version="1.2">
+ <meta lastmodifieddate="2009-03-20">
+ <creator>lobbyist.disclosurelo.gs</creator>
+ <description>'. $description. '</description>
+ </meta>
+ <graph mode="dynamic" defaultedgetype="directed" timeformat="date">
+
+ <nodes>'. $nodes. '</nodes>
+ <edges>'. $edges.' </edges>
+ </graph>
+</gexf>'. PHP_EOL;
+
+?>