update overviewer and neo4j exports
update overviewer and neo4j exports

--- a/admin/import.php
+++ b/admin/import.php
@@ -86,8 +86,7 @@
         "Agency Postcode" => "contactPostcode",
         "" => ""
     );
-    
-    $headers;
+
 
     while (($data = fgetcsv($handle, 1000, "\t")) !== false) {
         $num = count($data);
@@ -172,7 +171,7 @@
     }
     ksort($files);
     foreach ($files as $date => $fname) {
-        echo "<a href=\"import.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . $date . "<br/>";
+        echo "<a href=\"importdatagov.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . $date . "<br/>";
     }
 } else {
     $success = 0;

--- /dev/null
+++ b/admin/importdatagov.php
@@ -1,1 +1,216 @@
-
+<?php
+
+include_once ("../lib/common.inc.php");
+auth();
+/* todo gaps
+Contract ID,Department,Portfolio,Division,Branch,Agency Ref,Contract Date,End Date,Value,Office PCode,Description,Procurement Desc,Confidentiality Reason,Consultancy,ANZSCC Code,ANSCC Desc,Supplier,Supplier Postal Addr,Supplier Suburb,Supplier State,Supplier Country,Supplier PCode,Supplier ABN,Supplier DUNS,Supplier ACN,S/O Reference Num
+1694570,Department of Defence,Defence,NEW SOUTH WALES,RAN HMAS WATSON,1906458734  ,30-Jun-2007,30-Jun-2007,16083.96,2612,AIRLINE TICKETS,direct source,,No,731,Passenger transportation by air,QANTAS AIRWAYS LTD,PO Box PB 747,MASCOT,NSW,Australia,2020,16009661901,750512642,009661901  ,
+
+*/
+$contractNoticeFields = array(
+    "importFile",
+    "agencyName",
+    "parentCN",
+    "CNID",
+    "publishDate",
+    "amendDate",
+    "contractStart",
+    "contractEnd",
+    "value",
+    "description",
+    "agencyID",
+    "categoryUNSPSC",
+    "category",
+    "procurementMethod",
+    "atmID",
+    "SONID",
+    "confidentialityContract",
+    "confidentialityContractReason",
+    "confidentialityOutputs",
+    "confidentialityOutputsReason",
+    "consultancy",
+    "consultancyReason",
+    "amendmentReason",
+    "supplierName",
+    "supplierAddress",
+    "supplierCity",
+    "supplierPostcode",
+    "supplierCountry",
+    "supplierABNExempt",
+    "supplierABN",
+    "contactBranch",
+    "contactDivision",
+    "contactPostcode"
+);
+
+$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', $contractNoticeFields) . '") VALUES ( ';
+foreach ($contractNoticeFields as $key => $f) {
+    $contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?";
+}
+$contractNoticeInsertQ.= ");";
+$contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ);
+
+function processFile($fpath) {
+    global $conn, $contractNoticeFields, $contractNoticeInsertQ;
+    $row = 1;
+    $success = 0;
+    ini_set('auto_detect_line_endings',TRUE);
+    $handle = fopen($fpath, "r");
+    //"t" mode string translates windows line breaks to unix
+    $datamapping0712 = array(
+
+        "Agency Name" => "agencyName",
+        "Parent Contract ID" => "parentCN",
+        "Contract ID" => "CNID",
+        "Publish Date" => "publishDate",
+        "Amendment Date" => "amendDate",
+        "Start Date" => "contractStart",
+        "End Date" => "contractEnd",
+        "Value" => "value",
+        "Description" => "description",
+        "Agency Ref ID" => "agencyID",
+        "UNSPSC Code" => "categoryUNSPSC",
+        "Title" => "category",
+        "Procurement Method" => "procurementMethod",
+        "ATM ID" => "atmID",
+        "SON ID" => "SONID",
+        "Confidentiality Contract Flag" => "confidentialityContract",
+        "Confidentiality Contract Reason" => "confidentialityContractReason",
+        "Confidentiality Outputs Flag" => "confidentialityOutputs",
+        "Confidentiality Outputs Reason" => "confidentialityOutputsReason",
+        "Consultancy Flag" => "consultancy",
+        "Consultancy Reason" => "consultancyReason",
+        "Amendment Reason" => "amendmentReason",
+        "Supplier Name" => "supplierName",
+        "Supplier Address" => "supplierAddress",
+        "Supplier Suburb" => "supplierCity",
+        "Supplier Postcode" => "supplierPostcode",
+        "Supplier Country" => "supplierCountry",
+        "Supplier ABN Exempt" => "supplierABNExempt",
+        "ABN" => "supplierABN",
+        "Contact Name" => "",
+        "Contact Phone" => "",
+        "Branch" => "contactBranch",
+        "Division" => "contactDivision",
+        "Office Postcode" => "contactPostcode",
+
+    );
+
+
+    while (($data = fgetcsv($handle, 10000)) !== false) {
+        //print_r($data);
+        $num = count($data);
+        if ($row == 1) {
+            $headers = $data;
+        } elseif ($row > 1) {
+            if ($num > count($datamapping0712)) {
+                die("<font color=red>Error in data import; data mapping fields out of bounds or changed $num > ".count($datamapping0712)."</font><br>" . $fpath . print_r($data));
+            }
+            $contractNoticeInsert = Array();
+            $supplierInsert = Array();
+            $agencyInsert = Array();
+            $contractNoticeInsert[] = $fpath;
+            $keys = array_keys($datamapping0712);
+            for ($c = 0; $c < $num; $c++) {
+                $data[$c] = trim($data[$c], "=");
+                $data[$c] = trim($data[$c], "\"");
+                if (in_array(($datamapping0712[$headers[$c]]), $contractNoticeFields)) {
+                    if (($datamapping0712[$headers[$c]]) == "parentCN" || ($datamapping0712[$headers[$c]]) == "CNID") {
+                        $data[$c] = substr($data[$c], 2); // take off the "CN" prefix
+                        if ($data[$c] > 0 && $data[$c] != '0') {
+                            $contractNoticeInsert[] = $data[$c];
+                        } else {
+                            $contractNoticeInsert[] = null;
+                        }
+                    } elseif (($datamapping0712[$headers[$c]]) == "supplierABN") {
+                        if ($data[$c] > 0 && $data[$c] != '0') {
+                            $contractNoticeInsert[] = $data[$c];
+                        } else {
+                            $contractNoticeInsert[] = null;
+                        }
+                    } elseif (($datamapping0712[$headers[$c]]) == "amendDate" || ($datamapping0712[$headers[$c]]) == "publishDate" || ($datamapping0712[$headers[$c]]) == "contractStart" || ($datamapping0712[$headers[$c]]) == "contractEnd") {
+                        $contractNoticeInsert[] = date('Y-m-d H:i:s', strtotime($data[$c]));
+                    } else {
+                        if (strstr("\" =", $data[$c] > 0)) {
+                            die("Invalid Description field" . $contractNoticeInsert);
+                        }
+                        $colvalue = preg_replace('/[^[:print:]]/', '', utf8_encode($data[$c]));
+
+                        $contractNoticeInsert[] = $colvalue;
+                    }
+                }
+            }
+            flush();
+            //print_r($contractNoticeInsert);
+            $contractNoticeInsertQ->execute($contractNoticeInsert);
+            $errors = $conn->errorInfo();
+            if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
+          //          echo "dupe <br>";
+            } elseif ($errors[1] == 0) {
+                $success++;
+            } else {
+                foreach ($contractNoticeFields as $key => $cnf) {
+                    echo var_dump($contractNoticeInsert[$key]) . $cnf . "<br>";
+                }
+                echo $data[2] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n";
+            }
+
+            flush();
+            //echo "<hr>\n";
+        }
+        $row++;
+    }
+    fclose($handle);
+    $contractNoticeInsertQ->closeCursor();
+
+    return $success;
+}
+
+$path = 'datagovdata/';
+if ($_REQUEST["fname"] == "" && $argv[1] == "") {
+    echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>";
+    $dhandle = opendir($path);
+    // define an array to hold the files
+    $files = array();
+    if ($dhandle) {
+        // loop through all of the files
+        while (false !== ($fname = readdir($dhandle))) {
+            if (($fname != '.') && ($fname != '..')) {
+                $files[date("c", filemtime($path . $fname)) . md5($fname)] = $fname;
+            }
+        }
+    }
+    ksort($files);
+    foreach ($files as $date => $fname) {
+        echo "<a href=\"importdatagov.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . $date . "<br/>";
+    }
+} else {
+    $success = 0;
+    $fname = $_REQUEST["fname"];
+    if ($argv[1] != "") $fname = $argv[1];
+    echo " ============== $fname  ============== <br>";
+    flush();
+    $success+= processFile($path . $fname, "contractnotice");
+    $success+= processFile($path . $fname, "agency");
+    $success+= processFile($path . $fname, "supplier");
+    echo "<br> $success records successfully created";
+
+    flush();
+    // run post import data processing
+    // 
+    if ($success > 0) {
+$conn->exec("update datasets set \"lastUpdated\" = NOW() where title = 'Contract Notices'");
+    echo "link amend<br>";
+    include ("linkAmendments.php");
+    echo "update UNSPSC<br>";
+    include ("updateUNSPSC.php");
+    }
+// cn
+
+// agency
+//include ("setAgencyStatus.php");
+//include ("setAgencyURLABN.php");
+
+}
+?>
+

--- a/admin/neo4jimporter/pom.xml
+++ b/admin/neo4jimporter/pom.xml
@@ -7,12 +7,13 @@
   	<dependency>
   		<groupId>org.neo4j</groupId>
   		<artifactId>neo4j</artifactId>
-  		<version>1.8.RC1</version>
+  		<version>2.0.0-M03</version>
   	</dependency>
   	<dependency>
     <groupId>postgresql</groupId>
     <artifactId>postgresql</artifactId>
-    <version>9.0-801.jdbc4</version>
+    <version>9.1-901.jdbc4</version>
 </dependency>
   </dependencies>
 </project>
+

--- a/admin/neo4jimporter/src/main/java/Importer.java
+++ b/admin/neo4jimporter/src/main/java/Importer.java
@@ -5,25 +5,34 @@
 import java.sql.SQLException;

 import java.sql.SQLWarning;

 import java.sql.Statement;

+

+import java.io.File;

+import java.io.IOException;

+import java.io.InputStream;

+import java.io.Writer;

 import java.util.HashMap;

+import java.util.Map;

 

+import org.neo4j.graphdb.Direction;

+import org.neo4j.graphdb.DynamicLabel;

 import org.neo4j.graphdb.DynamicRelationshipType;

 import org.neo4j.graphdb.GraphDatabaseService;

+import org.neo4j.graphdb.Label;

 import org.neo4j.graphdb.Node;

-import org.neo4j.graphdb.index.BatchInserterIndex;

-import org.neo4j.graphdb.index.BatchInserterIndexProvider;

+import org.neo4j.graphdb.RelationshipType;

 import org.neo4j.helpers.collection.MapUtil;

-import org.neo4j.index.impl.lucene.LuceneBatchInserterIndexProvider;

-import org.neo4j.kernel.impl.batchinsert.BatchInserter;

-import org.neo4j.kernel.impl.batchinsert.BatchInserterImpl;

+import org.neo4j.unsafe.batchinsert.BatchInserter;

+import org.neo4j.unsafe.batchinsert.BatchInserters;

 

 public class Importer {

 

     public static void main(String[] argv) {

-        BatchInserter inserter = new BatchInserterImpl("target/neo4jdb-batchinsert");

-        BatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(inserter);

-        BatchInserterIndex labels = indexProvider.nodeIndex("labels", MapUtil.stringMap("type", "exact"));

-        labels.setCacheCapacity("Label", 100000);

+Map<String, String> config = new HashMap<String, String>();

+config.put( "neostore.nodestore.db.mapped_memory", "90M" );

+BatchInserter inserter = BatchInserters.inserter("target/batchinserter-example-config", config );

+        //BatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(inserter);

+        //BatchInserterIndex names = indexProvider.nodeIndex("names", MapUtil.stringMap("type", "exact"));

+        //names.setCacheCapacity("name", 100000);

 

 

 

@@ -78,52 +87,56 @@
 

             // Execute the query

             ResultSet rs = stmt.executeQuery("SELECT contractnotice.\"agencyName\", "

-                    + "  contractnotice.\"supplierABN\",contractnotice.\"supplierName\",sum(value) as sum "

+                    + " (case when \"supplierABN\" != 0 THEN \"supplierABN\"::text ELSE \"supplierName\" END) as supplierID , max(contractnotice.\"supplierName\") as \"supplierName\",sum(value) as sum "

                     + "FROM  public.contractnotice  GROUP BY contractnotice.\"agencyName\", "

-                    + "  contractnotice.\"supplierABN\",contractnotice.\"supplierName\"");

-            String previousAgency = "";

-            GraphDatabaseService gds = inserter.getGraphDbService();

+                    + " (case when \"supplierABN\" != 0 THEN \"supplierABN\"::text ELSE \"supplierName\" END)");

             HashMap<String, Long> supplierIDs = new HashMap<String, Long>();

             HashMap<String, Long> agencyIDs = new HashMap<String, Long>();

+

+Label agencyLabel = DynamicLabel.label( "Agency" );

+inserter.createDeferredSchemaIndex( agencyLabel ).on( "name" );

+Label supplierLabel = DynamicLabel.label( "Supplier" );

+inserter.createDeferredSchemaIndex( agencyLabel ).on( "name" );

 

             // Loop through the result set

             while (rs.next()) {

                 long supplierID, agencyID;

                 String supplierKey;

                 if (agencyIDs.get(rs.getString("agencyName")) == null) {

-                    Node myNode = gds.createNode();

-                    myNode.setProperty("Label", rs.getString("agencyName"));

-                    myNode.setProperty("type", "agency");

-                    agencyIDs.put(rs.getString("agencyName"), myNode.getId());

-                    if (myNode.getId() % 100 == 0) {

-                        System.out.println("Agency " + myNode.getId());

+		    Map<String, Object> properties = new HashMap<String, Object>();

+                    properties.put("name", rs.getString("agencyName"));

+                    properties.put("type", rs.getString("agency"));

+		    agencyID = inserter.createNode(properties, agencyLabel);

+                    agencyIDs.put(rs.getString("agencyName"), agencyID);

+                    if (agencyID % 10 == 0) {

+                        System.out.println("Agency " + agencyID);

                     }

                 }

                 agencyID = agencyIDs.get(rs.getString("agencyName"));

 

 

-                if (rs.getString("supplierABN") != "0" && rs.getString("supplierABN") != "") {

-                    supplierKey = rs.getString("supplierABN");

-                } else {

-                    supplierKey = rs.getString("supplierName");

-                }

                 // inject some data 

-                if (supplierIDs.get(supplierKey) == null) {

-                    Node myNode = gds.createNode();

-                    myNode.setProperty("Label", rs.getString("supplierName"));

-                    myNode.setProperty("type", "supplier");

-                    supplierIDs.put(supplierKey, myNode.getId());

-                    if (myNode.getId() % 1000 == 0) {

-                        System.out.println("Supplier " + myNode.getId());

+                if (supplierIDs.get(rs.getString("supplierID")) == null) {

+		    Map<String, Object> properties = new HashMap<String, Object>();

+                    properties.put("name", rs.getString("supplierName"));

+                    properties.put("type", rs.getString("supplier"));

+		    supplierID = inserter.createNode(properties, supplierLabel);

+                    supplierIDs.put(rs.getString("supplierID"), supplierID);

+                    if (supplierID % 1000 == 0) {

+                        System.out.println("Supplier " + supplierID);

                     }

                 }

-                supplierID = supplierIDs.get(supplierKey);

+                supplierID = supplierIDs.get(rs.getString("supplierID"));

 

 

-                long rel = inserter.createRelationship(agencyID, supplierID,

-                        DynamicRelationshipType.withName("KNOWS"), null);

-                inserter.setRelationshipProperty(rel, "Weight", rs.getDouble("sum"));

-

+// To set properties on the relationship, use a properties map

+// instead of null as the last parameter.

+Map<String, Object> properties = new HashMap<String, Object>();

+properties.put( "value", rs.getDouble("sum"));

+                inserter.createRelationship(agencyID, supplierID,

+                        DynamicRelationshipType.withName("PAYS"), properties);

+                inserter.createRelationship(supplierID, agencyID,

+                        DynamicRelationshipType.withName("PAID_BY"), properties);

             }

             // Close the result set, statement and the connection

             rs.close();

@@ -142,10 +155,11 @@
             }

         }

 //make the changes visible for reading, use this sparsely, requires IO!

-        labels.flush();

+//        names.flush();

 

 // Make sure to shut down the index provider

-        indexProvider.shutdown();

+//        indexProvider.shutdown();

         inserter.shutdown();

     }

-}
+}

+

 Binary files a/admin/neo4jimporter/target/classes/Importer.class and b/admin/neo4jimporter/target/classes/Importer.class differ
--- a/admin/partialdata/import.php
+++ b/admin/partialdata/import.php
@@ -12,6 +12,7 @@
 	$handle = fopen($fpath, "r");
 	//"t" mode string translates windows line breaks to unix
 	$datamapping0507 = array(
+
 		"Agency" => "agencyName",
 		"CN ID" => "CNID",
 		"Publish Date" => "publishDate",
@@ -137,7 +138,7 @@
 		// loop through all of the files
 		while (false !== ($fname = readdir($dhandle))) {
 			if (($fname != '.') && ($fname != '..') && (!isset($_REQUEST["filter"]) || strpos($fname,$_REQUEST["filter"]) != false)) {
-				echo "<a href=\"import.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . date("c", filemtime($path . $fname)) . "<br/>";
+				echo "<a href=\"importdatagov.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . date("c", filemtime($path . $fname)) . "<br/>";
 				processFile($path . $fname, "contractnotice");
 			}
 		}

--- a/admin/partialdata/importamendments.php
+++ b/admin/partialdata/importamendments.php
@@ -310,7 +310,7 @@
 		// loop through all of the files
 		while (false !== ($fname = readdir($dhandle))) {
 			if (($fname != '.') && ($fname != '..') && (strpos($fname,".xls")>0)) {
-				echo "<a href=\"import.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . date("c", filemtime($path . $fname)) . "<br/>";
+				echo "<a href=\"importdatagov.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . date("c", filemtime($path . $fname)) . "<br/>";
 				processFile($path . $fname, "contractnotice");
 			}
 		}

--- a/exportOverview.csv.php
+++ b/exportOverview.csv.php
@@ -11,10 +11,11 @@
     $unspsc[$row['UNSPSC']] = $row['Title'];
 }
 
+//(\'https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN\'::text || "CNID"::text) as url 
 $query = $conn->prepare('
-SELECT "CNID" as uid, description as text,
-(\'https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN\'::text || "CNID"::text) as url from "contractnotice"
-where "childCN" is null'
+SELECT "CNID" as id, description as text
+from "contractnotice"
+where "childCN" is null limit 10000'
         , array(PDO::ATTR_CURSOR => PDO::FETCH_ORI_NEXT));
 $query->execute();
 $errors = $conn->errorInfo();

--- a/lib/common.inc.php
+++ b/lib/common.inc.php
@@ -314,6 +314,3 @@
             include ("graphs.inc.php");
 
 
-
-            
-