From: Alex Sadleir Date: Wed, 02 Apr 2014 02:28:56 +0000 Subject: naa import update X-Git-Url: http://maxious.lambdacomplex.org/git/?p=disclosr.git&a=commitdiff&h=c1289bb746febc08b38df8942d0ac0f452cfdd00 --- naa import update --- --- a/admin/neo4jimporter/src/main/java/StAXSample.java +++ b/admin/neo4jimporter/src/main/java/StAXSample.java @@ -84,6 +84,44 @@ if (e.isStartElement()) { if (hasStartTagName(e, "AGENCIES")) { System.out.println("Agencies file loaded... "); + } else if (hasStartTagName(e, "AGENCY_NO")) { + previousAgency.put("agency_no", getCharacters(r)); + } else if (hasStartTagName(e, "TITLE")) { + String title = getCharacters(r); + previousAgency.put("name", title); + previousAgency.put("label", title); + } else if (hasStartTagName(e, "START_DATE")) { + String start_date = getCharacters(r); + if (start_date != null && !start_date.equals(" ") && !start_date.equals("(null)")) { + previousAgency.put("start_date", Integer.parseInt(start_date)); + } + } else if (hasStartTagName(e, "START_DATE_QUAL")) { + previousAgency.put("start_date_qual", getCharacters(r)); + } else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) { + previousAgency.put("alternative_title", getCharacters(r)); + } else if (hasStartTagName(e, "END_DATE")) { + String end_date = getCharacters(r); + if (end_date != null && !end_date.equals(" ") && !end_date.equals("(null)")) { + previousAgency.put("end_date", Integer.parseInt(end_date)); + } + } else if (hasStartTagName(e, "END_DATE_QUAL")) { + previousAgency.put("end_date_qual", getCharacters(r)); + // save agency + getAgency(previousAgency); + previousAgency = new HashMap(); + } + } + } + r = xmlif.createXMLEventReader( + filename, + //new FileInputStream(new File(xmlFileURL.toURI()))); + new FileInputStream(new File(filename))); + while (r.hasNext()) { + XMLEvent e = r.nextEvent(); + + if (e.isStartElement()) { + if (hasStartTagName(e, "AGENCIES")) { + System.out.println("Agencies file loaded again... "); } else if (hasStartTagName(e, "AGENCY_LINK")) { processAgencyLink(r); } else if (hasStartTagName(e, "AGENCY_LOCATION")) { @@ -103,27 +141,6 @@ Unexpected character 'C' (code 67) in start tag Expected a quote at [row,col,system-id]: [1093387,18,"agency-sample.xml"] */ - } else if (hasStartTagName(e, "AGENCY_NO")) { - previousAgency.put("agency_no", getCharacters(r)); - } else if (hasStartTagName(e, "TITLE")) { - String title = getCharacters(r); - previousAgency.put("name", title); - previousAgency.put("label", title); - } else if (hasStartTagName(e, "START_DATE")) { - previousAgency.put("start_date", getCharacters(r)); - } else if (hasStartTagName(e, "START_DATE_QUAL")) { - previousAgency.put("start_date_qual", getCharacters(r)); - } else if (hasStartTagName(e, "ALTERNATIVE_TITLE")) { - previousAgency.put("alternative_title", getCharacters(r)); - } else if (hasStartTagName(e, "END_DATE")) { - previousAgency.put("end_date", getCharacters(r)); - } else if (hasStartTagName(e, "END_DATE_QUAL")) { - previousAgency.put("end_date_qual", getCharacters(r)); - // save agency - getAgency(previousAgency); - previousAgency = new HashMap(); - } else { - System.out.println("Unhandled tag: " + getStartTagName(e) + " content:" + getCharacters(r)); } } } @@ -146,11 +163,14 @@ } private long getAgency(Map properties) { + if (properties.get("agency_no") == null || properties.get("agency_no") == "(null)" || properties.get("agency_no") == " ") { + return 0; + } if (agencyIDs.get(properties.get("agency_no").toString()) == null) { long agencyID = inserter.createNode(properties, agencyLabel); - if (properties.values().size() > 2) { + /*if (properties.values().size() > 1) { agencyFullVersion.put(properties.get("agency_no").toString(), true); - } + } */ agencyIDs.put(properties.get("agency_no").toString(), agencyID); //if (agencyID % 10 == 0) { System.out.println("Agency #"+agencyID); @@ -158,10 +178,10 @@ return agencyID; } else { long agencyID = agencyIDs.get(properties.get("agency_no").toString()); - if (properties.values().size() > 2 && agencyFullVersion.get(properties.get("agency_no")) == null) { + /*if (properties.values().size() > 1 && agencyFullVersion.get(properties.get("agency_no")) == null) { inserter.setNodeProperties(agencyID, properties); agencyFullVersion.put(properties.get("agency_no").toString(), true); - } + } */ return agencyID; } } @@ -235,29 +255,33 @@ if (hasEndTagName(e, "AGENCY_LINK")) { //System.out.println("Finished processing link: type = " + link_type+ "; from = " + agency_from_no + "; to = " + agency_to_no); - long agencyFromID, agencyToID; - Map agencyFromProperties = new HashMap(); - agencyFromProperties.put("agency_no",agency_from_no); - agencyFromID = getAgency(agencyFromProperties); - Map agencyToProperties = new HashMap(); - agencyToProperties.put("agency_no",agency_to_no); - agencyToID = getAgency(agencyToProperties); - Map relProperties = new HashMap(); - relProperties.put("link_type", link_type); - relProperties.put("start_date", start_date); - if (start_date_qual != null && !start_date_qual.equals("(null)")) { - relProperties.put("start_date_qual", start_date_qual); - } - if (end_date != null && !end_date.equals("(null)")) { - relProperties.put("end_date", end_date); - } - if (end_date_qual != null && !end_date_qual.equals("(null)")) { - relProperties.put("end_date_qual", end_date_qual); - } - inserter.createRelationship(agencyFromID, agencyToID, - DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties); - - break; + if (agency_to_no != null && !agency_to_no.equals("(null)")) { + long agencyFromID, agencyToID; + Map agencyFromProperties = new HashMap(); + agencyFromProperties.put("agency_from_no", agency_from_no); + agencyFromID = getAgency(agencyFromProperties); + Map agencyToProperties = new HashMap(); + agencyToProperties.put("agency_to_no", agency_to_no); + agencyToID = getAgency(agencyToProperties); + Map relProperties = new HashMap(); + relProperties.put("link_type", link_type); + if (start_date != null && !start_date.equals("(null)")) { + relProperties.put("start_date", Integer.parseInt(start_date)); + } + if (start_date_qual != null && !start_date_qual.equals("(null)")) { + relProperties.put("start_date_qual", start_date_qual); + } + if (end_date != null && !end_date.equals("(null)")) { + relProperties.put("end_date", Integer.parseInt(end_date)); + } + if (end_date_qual != null && !end_date_qual.equals("(null)")) { + relProperties.put("end_date_qual", end_date_qual); + } + inserter.createRelationship(agencyFromID, agencyToID, + DynamicRelationshipType.withName("IS_LINKED_TO"), relProperties); + } + break; + } } } @@ -288,7 +312,7 @@ agencyProperties.put("agency_no",of); agencyID = getAgency(agencyProperties); Map relProperties = new HashMap(); - relProperties.put("date", date); + relProperties.put("date", fixDate(date)); inserter.createRelationship(agencyID, locationID, DynamicRelationshipType.withName("HAS_LOCATION"), relProperties); @@ -323,7 +347,7 @@ agencyProperties.put("agency_no",of); agencyID = getAgency(agencyProperties); Map relProperties = new HashMap(); - relProperties.put("date", date); + relProperties.put("date", fixDate(date)); inserter.createRelationship(agencyID, statusID, DynamicRelationshipType.withName("HAS_STATUS"), relProperties); @@ -367,12 +391,12 @@ agencyProperties.put("agency_no",agency); agencyID = getAgency(agencyProperties); Map relProperties = new HashMap(); - relProperties.put("start_date", start_date); + relProperties.put("start_date", Integer.parseInt(start_date)); if (start_date_qual != null && !start_date_qual.equals("(null)")) { relProperties.put("start_date_qual", start_date_qual); } if (end_date != null && !end_date.equals("(null)")) { - relProperties.put("end_date", end_date); + relProperties.put("end_date", Integer.parseInt(end_date)); } if (end_date_qual != null && !end_date_qual.equals("(null)")) { relProperties.put("end_date_qual", end_date_qual); @@ -385,7 +409,14 @@ } } } - + private int fixDate(String date) { + String[] parts = date.split("-"); + if (parts.length == 3) { + return Integer.parseInt(""+parts[2]+parts[1]+parts[0]); + } else { + return 0; + } + } private String getCharacters(XMLEventReader rdr) throws XMLStreamException { XMLEvent e = rdr.nextEvent(); if (e.isCharacters()) { @@ -399,10 +430,6 @@ return e.asStartElement().getName().getLocalPart().toLowerCase().equals(name.toLowerCase()); } - private String getStartTagName(XMLEvent e) { - return e.asStartElement().getName().getLocalPart(); - } - private boolean hasEndTagName(XMLEvent e, String name) { return e.asEndElement().getName().getLocalPart().toLowerCase().equals(name.toLowerCase()); } --- /dev/null +++ b/admin/time.php @@ -1,1 +1,164 @@ +getId(),$nodeList)) { + $start = $node->getProperty("start_date") . $node->getProperty("date"); + $end = $node->getProperty("end_date"); + $nodes.= "' + ."" + ."". PHP_EOL; + $nodeList[] = $node->getId(); + } +} + +function add_edge($rel) { + global $edges; + $start = $rel->getProperty("start_date"); + $end = $rel->getProperty("end_date"); + $edges.= "". PHP_EOL; + +} + +function expandNode($node) { + //global $description; + //$description .= ($description == ""? "" : " and ").$node->getProperty("name"); + add_node($node); + + foreach ($node->getProperties() as $key => $value) { + // echo "$key: $value\n"; + } + foreach ($node->getRelationships() as $rel) { + //echo($rel->getStartNode()->getId()." -> ".$rel->getEndNode()->getId()."
"); + add_edge($rel); + add_node($rel->getStartNode()); + add_node($rel->getEndNode()); + } +} + +//$ids = (isset($_REQUEST['ids']) ? $_REQUEST['ids'] : ""); + +// Connecting to the default port 7474 on localhost +$client = new Everyman\Neo4j\Client(); +//$client = new Everyman\Neo4j\Client('192.168.1.127'); +//print_r($client->getServerInfo()); + +//https://github.com/jadell/neo4jphp/wiki/Caching +$plugin = new Everyman\Neo4j\Cache\Variable(); +$client->getEntityCache()->setCache($plugin); + +/*$memcached = new Memcached(); +$memcached->addServer('localhost', 11211); + +$plugin = new Everyman\Neo4j\Cache\Memcached($memcached); +$client->getEntityCache()->setCache($plugin);*/ + +/*$requests = explode(";",$ids); + + +foreach ($requests as $request) { + + // Array("type" => "path", from=>"1234", to=>"4321","options" => Array()) + + $parts = explode("-",$request); + + $requestType = $parts[0]; + $requestId = $parts[1]; + if ($requestType == 'node') { + expandNode($client->getNode($requestId)); + } else { + findNode($requestType,$requestId); + } +} + +function findNode($type,$id) { + global $client; + $typeMapping = Array ( + "agency" => Array("label" => "Agency", "id" => "agencyID") + ); + + $queryString = + "MATCH (n:".$typeMapping[$type]["label"].")". + "WHERE n.".$typeMapping[$type]["id"]." = {nodeId}". + "RETURN n"; + + $query = new Everyman\Neo4j\Cypher\Query($client, $queryString, array('nodeId' => $id)); + $result = $query->getResultSet(); + + foreach ($result as $row) { + expandNode( $row[0]); + + } + +} */ + +// https://github.com/jadell/neo4jphp/wiki/Paths + +/* https://github.com/jadell/neo4jphp/wiki/Traversals + * $traversal = new Everyman\Neo4j\Traversal($client); +$traversal->addRelationship('KNOWS', Relationship::DirectionOut) + ->setPruneEvaluator(Traversal::PruneNone) + ->setReturnFilter(Traversal::ReturnAll) + ->setMaxDepth(4); + +$nodes = $traversal->getResults($startNode, Traversal::ReturnTypeNode); + + */ + +/*https://github.com/jadell/neo4jphp/wiki/Cypher-and-gremlin-queries +$queryString = "START n=node({nodeId}) ". + "MATCH (n)<-[:KNOWS]-(x)". + "WHERE x.name = {name}". + "RETURN x"; +$query = new Everyman\Neo4j\Cypher\Query($client, $queryString, array('nodeId' => 1, 'name' => 'Bob')); +$result = $query->getResultSet(); +foreach ($result as $row) { + echo $row['x']->getProperty('name') . "\n"; +}*/ + +$queryString = + "MATCH (n) WHERE has(n.start_date) and n.start_date < 18870000 RETURN n"; + $description = '1887'; +$query = new Everyman\Neo4j\Cypher\Query($client, $queryString); +$result = $query->getResultSet(); + +foreach ($result as $row) { + expandNode( $row[0]); + +} + +if (!isset($_REQUEST['debug'])) { + header('Content-Type: application/gexf+xml'); + header('Content-Disposition: attachment; filename="'.urlencode(str_replace(" ","_",strtolower($description))).'.gexf"'); +} +echo ' + + + lobbyist.disclosurelo.gs + '. $description. ' + + + + '. $nodes. ' + '. $edges.' + +'. PHP_EOL; + +?> --- /dev/null +++ b/admin/timeline.php @@ -1,1 +1,149 @@ +(child:Agency) WITH n, count(child) AS children WHERE children = 0 AND n:Agency RETURN n, children; +// no children + +// MATCH (n) WHERE has(n.start_date) and n.start_date < 18870000 RETURN n +// time slice +if (isset($_REQUEST['json'])) { + + /* echo '{ + "timeline": + { + "headline":"The Main Timeline Headline Goes here", + "type":"default", + "text":"

Intro body text goes here, some HTML is ok

", + "asset": { + "media":"http://yourdomain_or_socialmedialink_goes_here.jpg", + "credit":"Credit Name Goes Here", + "caption":"Caption text goes here" + }, + "date": [ + { + "startDate":"2011,12,10", + "endDate":"2011,12,11", + "headline":"Headline Goes Here", + "text":"

Body text goes here, some HTML is OK

", + "tag":"This is Optional", + "classname":"optionaluniqueclassnamecanbeaddedhere", + "asset": { + "media":"http://twitter.com/ArjunaSoriano/status/164181156147900416", + "thumbnail":"optional-32x32px.jpg", + "credit":"Credit Name Goes Here", + "caption":"Caption text goes here" + } + } + ], + "era": [ + { + "startDate":"2011,12,10", + "endDate":"2011,12,11", + "headline":"Headline Goes Here", + "text":"

Body text goes here, some HTML is OK

", + "tag":"This is Optional" + } + + ] + } +}'; */ + + // https://github.com/jadell/neo4jphp + spl_autoload_register(function ($className) { + $libPath = '../lib/neo4jphp/lib/'; + $classFile = str_replace('\\',DIRECTORY_SEPARATOR,$className).'.php'; + $classPath = $libPath.$classFile; + if (file_exists($classPath)) { + require($classPath); + } + }); + + +// Connecting to the default port 7474 on localhost + $client = new Everyman\Neo4j\Client(); +//$client = new Everyman\Neo4j\Client('192.168.1.127'); +//print_r($client->getServerInfo()); + + + + + //https://github.com/jadell/neo4jphp/wiki/Cypher-and-gremlin-queries + $queryString = "MATCH (n) where has(n.agency_no) RETURN n LIMIT 1000"; + $query = new Everyman\Neo4j\Cypher\Query($client, $queryString); + $dates = Array(); + $result = $query->getResultSet(); + foreach ($result as $row) { + //$dates[] = Array("startDate" =>"2011,12,10", "endDate"=>"2011,12,11", "headline"=>"Headline Goes Here", "text"=>"

Body text goes here, some HTML is OK

"); + $name = $row['x']->getProperty('name'); + $startDate = $row['x']->getProperty('start_date'); + $startDate = substr_replace($startDate, ",", 6, 0); + $startDate = substr_replace($startDate, ",", 4, 0); + $endDate = $row['x']->getProperty('end_date'); + if ($endDate == " ") $endDate = "20140101"; + $endDate = substr_replace($endDate, ",", 6, 0); + $endDate = substr_replace($endDate, ",", 4, 0); + + $dates[] = Array("startDate" =>$startDate, "endDate"=>$endDate, "headline"=>$name, "text"=>"

Body text goes here, some HTML is OK

"); + + } + + + + $timeline = Array( "headline"=>"The Main Timeline Headline Goes here", + "type"=>"default", + "text"=>"

Intro body text goes here, some HTML is ok

","date"=>$dates); + echo json_encode(Array("timeline" => $timeline)); + die(); +} +?> + + + + + Revolutionary User Interfaces + + + + + + + + + + + +
+ + + + + +