update overviewer and neo4j exports
--- a/admin/import.php
+++ b/admin/import.php
@@ -86,8 +86,7 @@
"Agency Postcode" => "contactPostcode",
"" => ""
);
-
- $headers;
+
while (($data = fgetcsv($handle, 1000, "\t")) !== false) {
$num = count($data);
@@ -172,7 +171,7 @@
}
ksort($files);
foreach ($files as $date => $fname) {
- echo "<a href=\"import.php?fname=$fname\">$fname</a> " . filesize($path . $fname) . " " . $date . "<br/>";
+ echo "<a href=\"importdatagov.php?fname=$fname\">$fname</a> " . filesize($path . $fname) . " " . $date . "<br/>";
}
} else {
$success = 0;
--- /dev/null
+++ b/admin/importdatagov.php
@@ -1,1 +1,216 @@
-
+<?php
+
+include_once ("../lib/common.inc.php");
+auth();
+/* todo gaps
+Contract ID,Department,Portfolio,Division,Branch,Agency Ref,Contract Date,End Date,Value,Office PCode,Description,Procurement Desc,Confidentiality Reason,Consultancy,ANZSCC Code,ANSCC Desc,Supplier,Supplier Postal Addr,Supplier Suburb,Supplier State,Supplier Country,Supplier PCode,Supplier ABN,Supplier DUNS,Supplier ACN,S/O Reference Num
+1694570,Department of Defence,Defence,NEW SOUTH WALES,RAN HMAS WATSON,1906458734 ,30-Jun-2007,30-Jun-2007,16083.96,2612,AIRLINE TICKETS,direct source,,No,731,Passenger transportation by air,QANTAS AIRWAYS LTD,PO Box PB 747,MASCOT,NSW,Australia,2020,16009661901,750512642,009661901 ,
+
+*/
+$contractNoticeFields = array(
+ "importFile",
+ "agencyName",
+ "parentCN",
+ "CNID",
+ "publishDate",
+ "amendDate",
+ "contractStart",
+ "contractEnd",
+ "value",
+ "description",
+ "agencyID",
+ "categoryUNSPSC",
+ "category",
+ "procurementMethod",
+ "atmID",
+ "SONID",
+ "confidentialityContract",
+ "confidentialityContractReason",
+ "confidentialityOutputs",
+ "confidentialityOutputsReason",
+ "consultancy",
+ "consultancyReason",
+ "amendmentReason",
+ "supplierName",
+ "supplierAddress",
+ "supplierCity",
+ "supplierPostcode",
+ "supplierCountry",
+ "supplierABNExempt",
+ "supplierABN",
+ "contactBranch",
+ "contactDivision",
+ "contactPostcode"
+);
+
+$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', $contractNoticeFields) . '") VALUES ( ';
+foreach ($contractNoticeFields as $key => $f) {
+ $contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?";
+}
+$contractNoticeInsertQ.= ");";
+$contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ);
+
+function processFile($fpath) {
+ global $conn, $contractNoticeFields, $contractNoticeInsertQ;
+ $row = 1;
+ $success = 0;
+ ini_set('auto_detect_line_endings',TRUE);
+ $handle = fopen($fpath, "r");
+ //"t" mode string translates windows line breaks to unix
+ $datamapping0712 = array(
+
+ "Agency Name" => "agencyName",
+ "Parent Contract ID" => "parentCN",
+ "Contract ID" => "CNID",
+ "Publish Date" => "publishDate",
+ "Amendment Date" => "amendDate",
+ "Start Date" => "contractStart",
+ "End Date" => "contractEnd",
+ "Value" => "value",
+ "Description" => "description",
+ "Agency Ref ID" => "agencyID",
+ "UNSPSC Code" => "categoryUNSPSC",
+ "Title" => "category",
+ "Procurement Method" => "procurementMethod",
+ "ATM ID" => "atmID",
+ "SON ID" => "SONID",
+ "Confidentiality Contract Flag" => "confidentialityContract",
+ "Confidentiality Contract Reason" => "confidentialityContractReason",
+ "Confidentiality Outputs Flag" => "confidentialityOutputs",
+ "Confidentiality Outputs Reason" => "confidentialityOutputsReason",
+ "Consultancy Flag" => "consultancy",
+ "Consultancy Reason" => "consultancyReason",
+ "Amendment Reason" => "amendmentReason",
+ "Supplier Name" => "supplierName",
+ "Supplier Address" => "supplierAddress",
+ "Supplier Suburb" => "supplierCity",
+ "Supplier Postcode" => "supplierPostcode",
+ "Supplier Country" => "supplierCountry",
+ "Supplier ABN Exempt" => "supplierABNExempt",
+ "ABN" => "supplierABN",
+ "Contact Name" => "",
+ "Contact Phone" => "",
+ "Branch" => "contactBranch",
+ "Division" => "contactDivision",
+ "Office Postcode" => "contactPostcode",
+
+ );
+
+
+ while (($data = fgetcsv($handle, 10000)) !== false) {
+ //print_r($data);
+ $num = count($data);
+ if ($row == 1) {
+ $headers = $data;
+ } elseif ($row > 1) {
+ if ($num > count($datamapping0712)) {
+ die("<font color=red>Error in data import; data mapping fields out of bounds or changed $num > ".count($datamapping0712)."</font><br>" . $fpath . print_r($data));
+ }
+ $contractNoticeInsert = Array();
+ $supplierInsert = Array();
+ $agencyInsert = Array();
+ $contractNoticeInsert[] = $fpath;
+ $keys = array_keys($datamapping0712);
+ for ($c = 0; $c < $num; $c++) {
+ $data[$c] = trim($data[$c], "=");
+ $data[$c] = trim($data[$c], "\"");
+ if (in_array(($datamapping0712[$headers[$c]]), $contractNoticeFields)) {
+ if (($datamapping0712[$headers[$c]]) == "parentCN" || ($datamapping0712[$headers[$c]]) == "CNID") {
+ $data[$c] = substr($data[$c], 2); // take off the "CN" prefix
+ if ($data[$c] > 0 && $data[$c] != '0') {
+ $contractNoticeInsert[] = $data[$c];
+ } else {
+ $contractNoticeInsert[] = null;
+ }
+ } elseif (($datamapping0712[$headers[$c]]) == "supplierABN") {
+ if ($data[$c] > 0 && $data[$c] != '0') {
+ $contractNoticeInsert[] = $data[$c];
+ } else {
+ $contractNoticeInsert[] = null;
+ }
+ } elseif (($datamapping0712[$headers[$c]]) == "amendDate" || ($datamapping0712[$headers[$c]]) == "publishDate" || ($datamapping0712[$headers[$c]]) == "contractStart" || ($datamapping0712[$headers[$c]]) == "contractEnd") {
+ $contractNoticeInsert[] = date('Y-m-d H:i:s', strtotime($data[$c]));
+ } else {
+ if (strstr("\" =", $data[$c] > 0)) {
+ die("Invalid Description field" . $contractNoticeInsert);
+ }
+ $colvalue = preg_replace('/[^[:print:]]/', '', utf8_encode($data[$c]));
+
+ $contractNoticeInsert[] = $colvalue;
+ }
+ }
+ }
+ flush();
+ //print_r($contractNoticeInsert);
+ $contractNoticeInsertQ->execute($contractNoticeInsert);
+ $errors = $conn->errorInfo();
+ if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
+ // echo "dupe <br>";
+ } elseif ($errors[1] == 0) {
+ $success++;
+ } else {
+ foreach ($contractNoticeFields as $key => $cnf) {
+ echo var_dump($contractNoticeInsert[$key]) . $cnf . "<br>";
+ }
+ echo $data[2] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n";
+ }
+
+ flush();
+ //echo "<hr>\n";
+ }
+ $row++;
+ }
+ fclose($handle);
+ $contractNoticeInsertQ->closeCursor();
+
+ return $success;
+}
+
+$path = 'datagovdata/';
+if ($_REQUEST["fname"] == "" && $argv[1] == "") {
+ echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>";
+ $dhandle = opendir($path);
+ // define an array to hold the files
+ $files = array();
+ if ($dhandle) {
+ // loop through all of the files
+ while (false !== ($fname = readdir($dhandle))) {
+ if (($fname != '.') && ($fname != '..')) {
+ $files[date("c", filemtime($path . $fname)) . md5($fname)] = $fname;
+ }
+ }
+ }
+ ksort($files);
+ foreach ($files as $date => $fname) {
+ echo "<a href=\"importdatagov.php?fname=$fname\">$fname</a> " . filesize($path . $fname) . " " . $date . "<br/>";
+ }
+} else {
+ $success = 0;
+ $fname = $_REQUEST["fname"];
+ if ($argv[1] != "") $fname = $argv[1];
+ echo " ============== $fname ============== <br>";
+ flush();
+ $success+= processFile($path . $fname, "contractnotice");
+ $success+= processFile($path . $fname, "agency");
+ $success+= processFile($path . $fname, "supplier");
+ echo "<br> $success records successfully created";
+
+ flush();
+ // run post import data processing
+ //
+ if ($success > 0) {
+$conn->exec("update datasets set \"lastUpdated\" = NOW() where title = 'Contract Notices'");
+ echo "link amend<br>";
+ include ("linkAmendments.php");
+ echo "update UNSPSC<br>";
+ include ("updateUNSPSC.php");
+ }
+// cn
+
+// agency
+//include ("setAgencyStatus.php");
+//include ("setAgencyURLABN.php");
+
+}
+?>
+
--- a/admin/neo4jimporter/pom.xml
+++ b/admin/neo4jimporter/pom.xml
@@ -7,12 +7,13 @@
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j</artifactId>
- <version>1.8.RC1</version>
+ <version>2.0.0-M03</version>
</dependency>
<dependency>
<groupId>postgresql</groupId>
<artifactId>postgresql</artifactId>
- <version>9.0-801.jdbc4</version>
+ <version>9.1-901.jdbc4</version>
</dependency>
</dependencies>
</project>
+
--- a/admin/neo4jimporter/src/main/java/Importer.java
+++ b/admin/neo4jimporter/src/main/java/Importer.java
@@ -5,25 +5,34 @@
import java.sql.SQLException;
import java.sql.SQLWarning;
import java.sql.Statement;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Writer;
import java.util.HashMap;
+import java.util.Map;
+import org.neo4j.graphdb.Direction;
+import org.neo4j.graphdb.DynamicLabel;
import org.neo4j.graphdb.DynamicRelationshipType;
import org.neo4j.graphdb.GraphDatabaseService;
+import org.neo4j.graphdb.Label;
import org.neo4j.graphdb.Node;
-import org.neo4j.graphdb.index.BatchInserterIndex;
-import org.neo4j.graphdb.index.BatchInserterIndexProvider;
+import org.neo4j.graphdb.RelationshipType;
import org.neo4j.helpers.collection.MapUtil;
-import org.neo4j.index.impl.lucene.LuceneBatchInserterIndexProvider;
-import org.neo4j.kernel.impl.batchinsert.BatchInserter;
-import org.neo4j.kernel.impl.batchinsert.BatchInserterImpl;
+import org.neo4j.unsafe.batchinsert.BatchInserter;
+import org.neo4j.unsafe.batchinsert.BatchInserters;
public class Importer {
public static void main(String[] argv) {
- BatchInserter inserter = new BatchInserterImpl("target/neo4jdb-batchinsert");
- BatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(inserter);
- BatchInserterIndex labels = indexProvider.nodeIndex("labels", MapUtil.stringMap("type", "exact"));
- labels.setCacheCapacity("Label", 100000);
+Map<String, String> config = new HashMap<String, String>();
+config.put( "neostore.nodestore.db.mapped_memory", "90M" );
+BatchInserter inserter = BatchInserters.inserter("target/batchinserter-example-config", config );
+ //BatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(inserter);
+ //BatchInserterIndex names = indexProvider.nodeIndex("names", MapUtil.stringMap("type", "exact"));
+ //names.setCacheCapacity("name", 100000);
@@ -78,52 +87,56 @@
// Execute the query
ResultSet rs = stmt.executeQuery("SELECT contractnotice.\"agencyName\", "
- + " contractnotice.\"supplierABN\",contractnotice.\"supplierName\",sum(value) as sum "
+ + " (case when \"supplierABN\" != 0 THEN \"supplierABN\"::text ELSE \"supplierName\" END) as supplierID , max(contractnotice.\"supplierName\") as \"supplierName\",sum(value) as sum "
+ "FROM public.contractnotice GROUP BY contractnotice.\"agencyName\", "
- + " contractnotice.\"supplierABN\",contractnotice.\"supplierName\"");
- String previousAgency = "";
- GraphDatabaseService gds = inserter.getGraphDbService();
+ + " (case when \"supplierABN\" != 0 THEN \"supplierABN\"::text ELSE \"supplierName\" END)");
HashMap<String, Long> supplierIDs = new HashMap<String, Long>();
HashMap<String, Long> agencyIDs = new HashMap<String, Long>();
+
+Label agencyLabel = DynamicLabel.label( "Agency" );
+inserter.createDeferredSchemaIndex( agencyLabel ).on( "name" );
+Label supplierLabel = DynamicLabel.label( "Supplier" );
+inserter.createDeferredSchemaIndex( agencyLabel ).on( "name" );
// Loop through the result set
while (rs.next()) {
long supplierID, agencyID;
String supplierKey;
if (agencyIDs.get(rs.getString("agencyName")) == null) {
- Node myNode = gds.createNode();
- myNode.setProperty("Label", rs.getString("agencyName"));
- myNode.setProperty("type", "agency");
- agencyIDs.put(rs.getString("agencyName"), myNode.getId());
- if (myNode.getId() % 100 == 0) {
- System.out.println("Agency " + myNode.getId());
+ Map<String, Object> properties = new HashMap<String, Object>();
+ properties.put("name", rs.getString("agencyName"));
+ properties.put("type", rs.getString("agency"));
+ agencyID = inserter.createNode(properties, agencyLabel);
+ agencyIDs.put(rs.getString("agencyName"), agencyID);
+ if (agencyID % 10 == 0) {
+ System.out.println("Agency " + agencyID);
}
}
agencyID = agencyIDs.get(rs.getString("agencyName"));
- if (rs.getString("supplierABN") != "0" && rs.getString("supplierABN") != "") {
- supplierKey = rs.getString("supplierABN");
- } else {
- supplierKey = rs.getString("supplierName");
- }
// inject some data
- if (supplierIDs.get(supplierKey) == null) {
- Node myNode = gds.createNode();
- myNode.setProperty("Label", rs.getString("supplierName"));
- myNode.setProperty("type", "supplier");
- supplierIDs.put(supplierKey, myNode.getId());
- if (myNode.getId() % 1000 == 0) {
- System.out.println("Supplier " + myNode.getId());
+ if (supplierIDs.get(rs.getString("supplierID")) == null) {
+ Map<String, Object> properties = new HashMap<String, Object>();
+ properties.put("name", rs.getString("supplierName"));
+ properties.put("type", rs.getString("supplier"));
+ supplierID = inserter.createNode(properties, supplierLabel);
+ supplierIDs.put(rs.getString("supplierID"), supplierID);
+ if (supplierID % 1000 == 0) {
+ System.out.println("Supplier " + supplierID);
}
}
- supplierID = supplierIDs.get(supplierKey);
+ supplierID = supplierIDs.get(rs.getString("supplierID"));
- long rel = inserter.createRelationship(agencyID, supplierID,
- DynamicRelationshipType.withName("KNOWS"), null);
- inserter.setRelationshipProperty(rel, "Weight", rs.getDouble("sum"));
-
+// To set properties on the relationship, use a properties map
+// instead of null as the last parameter.
+Map<String, Object> properties = new HashMap<String, Object>();
+properties.put( "value", rs.getDouble("sum"));
+ inserter.createRelationship(agencyID, supplierID,
+ DynamicRelationshipType.withName("PAYS"), properties);
+ inserter.createRelationship(supplierID, agencyID,
+ DynamicRelationshipType.withName("PAID_BY"), properties);
}
// Close the result set, statement and the connection
rs.close();
@@ -142,10 +155,11 @@
}
}
//make the changes visible for reading, use this sparsely, requires IO!
- labels.flush();
+// names.flush();
// Make sure to shut down the index provider
- indexProvider.shutdown();
+// indexProvider.shutdown();
inserter.shutdown();
}
-}
+}
+
Binary files a/admin/neo4jimporter/target/classes/Importer.class and b/admin/neo4jimporter/target/classes/Importer.class differ
--- a/admin/partialdata/import.php
+++ b/admin/partialdata/import.php
@@ -12,6 +12,7 @@
$handle = fopen($fpath, "r");
//"t" mode string translates windows line breaks to unix
$datamapping0507 = array(
+
"Agency" => "agencyName",
"CN ID" => "CNID",
"Publish Date" => "publishDate",
@@ -137,7 +138,7 @@
// loop through all of the files
while (false !== ($fname = readdir($dhandle))) {
if (($fname != '.') && ($fname != '..') && (!isset($_REQUEST["filter"]) || strpos($fname,$_REQUEST["filter"]) != false)) {
- echo "<a href=\"import.php?fname=$fname\">$fname</a> " . filesize($path . $fname) . " " . date("c", filemtime($path . $fname)) . "<br/>";
+ echo "<a href=\"importdatagov.php?fname=$fname\">$fname</a> " . filesize($path . $fname) . " " . date("c", filemtime($path . $fname)) . "<br/>";
processFile($path . $fname, "contractnotice");
}
}
--- a/admin/partialdata/importamendments.php
+++ b/admin/partialdata/importamendments.php
@@ -310,7 +310,7 @@
// loop through all of the files
while (false !== ($fname = readdir($dhandle))) {
if (($fname != '.') && ($fname != '..') && (strpos($fname,".xls")>0)) {
- echo "<a href=\"import.php?fname=$fname\">$fname</a> " . filesize($path . $fname) . " " . date("c", filemtime($path . $fname)) . "<br/>";
+ echo "<a href=\"importdatagov.php?fname=$fname\">$fname</a> " . filesize($path . $fname) . " " . date("c", filemtime($path . $fname)) . "<br/>";
processFile($path . $fname, "contractnotice");
}
}
--- a/exportOverview.csv.php
+++ b/exportOverview.csv.php
@@ -11,10 +11,11 @@
$unspsc[$row['UNSPSC']] = $row['Title'];
}
+//(\'https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN\'::text || "CNID"::text) as url
$query = $conn->prepare('
-SELECT "CNID" as uid, description as text,
-(\'https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN\'::text || "CNID"::text) as url from "contractnotice"
-where "childCN" is null'
+SELECT "CNID" as id, description as text
+from "contractnotice"
+where "childCN" is null limit 10000'
, array(PDO::ATTR_CURSOR => PDO::FETCH_ORI_NEXT));
$query->execute();
$errors = $conn->errorInfo();
--- a/lib/common.inc.php
+++ b/lib/common.inc.php
@@ -314,6 +314,3 @@
include ("graphs.inc.php");
-
-
-