better amon db error logging
better amon db error logging

[submodule "lib/bubbletree"] [submodule "lib/bubbletree"]
path = lib/bubbletree path = lib/bubbletree
url = https://github.com/okfn/bubbletree.git url = https://github.com/okfn/bubbletree.git
  [submodule "lib/amon-php"]
  path = lib/amon-php
  url = https://github.com/martinrusev/amon-php.git
   
<?php <?php
if (php_sapi_name() != "cli") { if (php_sapi_name() != "cli") {
include_once ("../lib/common.inc.php"); include_once ("../lib/common.inc.php");
auth(); auth();
$contractNoticeFields = array( $contractNoticeFields = array(
"importFile", "importFile",
"agencyName", "agencyName",
"parentCN", "parentCN",
"CNID", "CNID",
"publishDate", "publishDate",
"amendDate", "amendDate",
"contractStart", "contractStart",
"contractEnd", "contractEnd",
"value", "value",
"description", "description",
"agencyID", "agencyID",
"category", "category",
"procurementMethod", "procurementMethod",
"atmID", "atmID",
"SONID", "SONID",
"confidentialityContract", "confidentialityContract",
"confidentialityContractReason", "confidentialityContractReason",
"confidentialityOutputs", "confidentialityOutputs",
"confidentialityOutputsReason", "confidentialityOutputsReason",
"consultancy", "consultancy",
"consultancyReason", "consultancyReason",
"amendmentReason", "amendmentReason",
"supplierName", "supplierName",
"supplierAddress", "supplierAddress",
"supplierCity", "supplierCity",
"supplierPostcode", "supplierPostcode",
"supplierCountry", "supplierCountry",
"supplierABNExempt", "supplierABNExempt",
"supplierABN", "supplierABN",
"contactBranch", "contactBranch",
"contactDivision", "contactDivision",
"contactPostcode" "contactPostcode"
); );
   
$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', $contractNoticeFields) . '") VALUES ( '; $contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', $contractNoticeFields) . '") VALUES ( ';
foreach ($contractNoticeFields as $key => $f) { foreach ($contractNoticeFields as $key => $f) {
$contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?"; $contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?";
} }
$contractNoticeInsertQ.= ");"; $contractNoticeInsertQ.= ");";
$contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ); $contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ);
   
function processFile($fpath) { function processFile($fpath) {
global $conn, $contractNoticeFields, $contractNoticeInsertQ; global $conn, $contractNoticeFields, $contractNoticeInsertQ;
$row = 1; $row = 1;
$handle = fopen($fpath, "r"); $handle = fopen($fpath, "r");
//"t" mode string translates windows line breaks to unix //"t" mode string translates windows line breaks to unix
$datamapping0712 = array( $datamapping0712 = array(
"Agency" => "agencyName", "Agency" => "agencyName",
"Parent CN ID" => "parentCN", "Parent CN ID" => "parentCN",
"CN ID" => "CNID", "CN ID" => "CNID",
"Publish Date" => "publishDate", "Publish Date" => "publishDate",
"Amendment Date" => "amendDate", "Amendment Date" => "amendDate",
"Status" => "", "Status" => "",
"StartDate" => "contractStart", "StartDate" => "contractStart",
"EndDate" => "contractEnd", "EndDate" => "contractEnd",
"Value" => "value", "Value" => "value",
"Description" => "description", "Description" => "description",
"Agency Ref Id" => "agencyID", "Agency Ref Id" => "agencyID",
"Agency Ref. ID" => "agencyID", "Agency Ref. ID" => "agencyID",
"Category" => "category", "Category" => "category",
"Procurement Method" => "procurementMethod", "Procurement Method" => "procurementMethod",
"ATM ID" => "atmID", "ATM ID" => "atmID",
"SON ID" => "SONID", "SON ID" => "SONID",
"Confidentiality - Contract" => "confidentialityContract", "Confidentiality - Contract" => "confidentialityContract",
"Confidentiality - Contract Reason(s)" => "confidentialityContractReason", "Confidentiality - Contract Reason(s)" => "confidentialityContractReason",
"Confidentiality - Outputs" => "confidentialityOutputs", "Confidentiality - Outputs" => "confidentialityOutputs",
"Confidentiality - Outputs Reason(s)" => "confidentialityOutputsReason", "Confidentiality - Outputs Reason(s)" => "confidentialityOutputsReason",
"Consultancy" => "consultancy", "Consultancy" => "consultancy",
"Consultancy Reason(s)" => "consultancyReason", "Consultancy Reason(s)" => "consultancyReason",
"Amendment Reason" => "amendmentReason", "Amendment Reason" => "amendmentReason",
"Supplier Name" => "supplierName", "Supplier Name" => "supplierName",
"Supplier Address" => "supplierAddress", "Supplier Address" => "supplierAddress",
"Supplier City" => "supplierCity", "Supplier City" => "supplierCity",
"Supplier Postcode" => "supplierPostcode", "Supplier Postcode" => "supplierPostcode",
"Supplier Country" => "supplierCountry", "Supplier Country" => "supplierCountry",
"Supplier ABNExempt" => "supplierABNExempt", "Supplier ABNExempt" => "supplierABNExempt",
"Supplier ABN" => "supplierABN", "Supplier ABN" => "supplierABN",
"Agency Branch" => "contactBranch", "Agency Branch" => "contactBranch",
"Agency Divison" => "contactDivision", "Agency Divison" => "contactDivision",
"Agency Postcode" => "contactPostcode", "Agency Postcode" => "contactPostcode",
"" => "" "" => ""
); );
$headers; $headers;
   
while (($data = fgetcsv($handle, 1000, "\t")) !== false) { while (($data = fgetcsv($handle, 1000, "\t")) !== false) {
$num = count($data); $num = count($data);
if ($row == 3) { if ($row == 3) {
$headers = $data; $headers = $data;
} elseif ($row > 3) { } elseif ($row > 3) {
if ($num > count($datamapping0712)) { if ($num > count($datamapping0712)) {
die("<font color=red>Error in data import; data mapping fields out of bounds or changed</font><br>" . $fname . print_r($data)); die("<font color=red>Error in data import; data mapping fields out of bounds or changed</font><br>" . $fname . print_r($data));
} }
$contractNoticeInsert = Array(); $contractNoticeInsert = Array();
$supplierInsert = Array(); $supplierInsert = Array();
$agencyInsert = Array(); $agencyInsert = Array();
$contractNoticeInsert[] = $fpath; $contractNoticeInsert[] = $fpath;
$keys = array_keys($datamapping0712); $keys = array_keys($datamapping0712);
for ($c = 0; $c < $num; $c++) { for ($c = 0; $c < $num; $c++) {
$data[$c] = trim($data[$c], "="); $data[$c] = trim($data[$c], "=");
$data[$c] = trim($data[$c], "\""); $data[$c] = trim($data[$c], "\"");
if (in_array(($datamapping0712[$headers[$c]]), $contractNoticeFields)) { if (in_array(($datamapping0712[$headers[$c]]), $contractNoticeFields)) {
if (($datamapping0712[$headers[$c]]) == "parentCN" || ($datamapping0712[$headers[$c]]) == "CNID") { if (($datamapping0712[$headers[$c]]) == "parentCN" || ($datamapping0712[$headers[$c]]) == "CNID") {
$data[$c] = substr($data[$c], 2); // take off the "CN" prefix $data[$c] = substr($data[$c], 2); // take off the "CN" prefix
if ($data[$c] > 0 && $data[$c] != '0') { if ($data[$c] > 0 && $data[$c] != '0') {
$contractNoticeInsert[] = $data[$c]; $contractNoticeInsert[] = $data[$c];
} else { } else {
$contractNoticeInsert[] = null; $contractNoticeInsert[] = null;
} }
} elseif (($datamapping0712[$headers[$c]]) == "supplierABN") { } elseif (($datamapping0712[$headers[$c]]) == "supplierABN") {
if ($data[$c] > 0 && $data[$c] != '0') { if ($data[$c] > 0 && $data[$c] != '0') {
$contractNoticeInsert[] = $data[$c]; $contractNoticeInsert[] = $data[$c];
} else { } else {
$contractNoticeInsert[] = null; $contractNoticeInsert[] = null;
} }
} elseif (($datamapping0712[$headers[$c]]) == "amendDate" || ($datamapping0712[$headers[$c]]) == "publishDate" || ($datamapping0712[$headers[$c]]) == "contractStart" || ($datamapping0712[$headers[$c]]) == "contractEnd") { } elseif (($datamapping0712[$headers[$c]]) == "amendDate" || ($datamapping0712[$headers[$c]]) == "publishDate" || ($datamapping0712[$headers[$c]]) == "contractStart" || ($datamapping0712[$headers[$c]]) == "contractEnd") {
$contractNoticeInsert[] = date('Y-m-d H:i:s', strtotime($data[$c])); $contractNoticeInsert[] = date('Y-m-d H:i:s', strtotime($data[$c]));
} else { } else {
if (strstr("\" =", $data[$c] > 0)) { if (strstr("\" =", $data[$c] > 0)) {
die("Invalid Description field" . $contractNoticeInsert); die("Invalid Description field" . $contractNoticeInsert);
} }
$colvalue = preg_replace('/[^[:print:]]/', '', utf8_encode($data[$c])); $colvalue = preg_replace('/[^[:print:]]/', '', utf8_encode($data[$c]));
   
$contractNoticeInsert[] = $colvalue; $contractNoticeInsert[] = $colvalue;
} }
} }
} }
flush(); flush();
$contractNoticeInsertQ->execute($contractNoticeInsert); $contractNoticeInsertQ->execute($contractNoticeInsert);
$errors = $conn->errorInfo(); $errors = $conn->errorInfo();
if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) { if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
} elseif ($errors[1] == 0) { } elseif ($errors[1] == 0) {
$success++; $success++;
} else { } else {
foreach ($contractNoticeFields as $key => $cnf) { foreach ($contractNoticeFields as $key => $cnf) {
echo var_dump($contractNoticeInsert[$key]) . $cnf . "<br>"; echo var_dump($contractNoticeInsert[$key]) . $cnf . "<br>";
} }
echo $data[2] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n"; echo $data[2] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n";
} }
   
flush(); flush();
//echo "<hr>\n"; //echo "<hr>\n";
} }
$row++; $row++;
} }
fclose($handle); fclose($handle);
$contractNoticeInsertQ->closeCursor(); $contractNoticeInsertQ->closeCursor();
   
return $success; return $success;
} }
   
$path = 'data/'; $path = 'data/';
if ($_REQUEST["fname"] == "") { if ($_REQUEST["fname"] == "") {
echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>"; echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>";
$dhandle = opendir($path); $dhandle = opendir($path);
// define an array to hold the files // define an array to hold the files
$files = array(); $files = array();
if ($dhandle) { if ($dhandle) {
// loop through all of the files // loop through all of the files
while (false !== ($fname = readdir($dhandle))) { while (false !== ($fname = readdir($dhandle))) {
if (($fname != '.') && ($fname != '..')) { if (($fname != '.') && ($fname != '..')) {
$files[date("c", filemtime($path . $fname)) . md5($fname)] = $fname; $files[date("c", filemtime($path . $fname)) . md5($fname)] = $fname;
} }
} }
} }
ksort($files); ksort($files);
foreach ($files as $date => $fname) { foreach ($files as $date => $fname) {
echo "<a href=\"import.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . $date . "<br/>"; echo "<a href=\"import.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . $date . "<br/>";
} }
} else { } else {
$success = 0; $success = 0;
$fname = $_REQUEST["fname"]; $fname = $_REQUEST["fname"];
echo " ============== $fname ============== <br>"; echo " ============== $fname ============== <br>";
flush(); flush();
$success+= processFile($path . $fname, "contractnotice"); $success+= processFile($path . $fname, "contractnotice");
$success+= processFile($path . $fname, "agency"); $success+= processFile($path . $fname, "agency");
$success+= processFile($path . $fname, "supplier"); $success+= processFile($path . $fname, "supplier");
echo "<br> $success records successfully created"; echo "<br> $success records successfully created";
   
flush(); flush();
// run post import data processing // run post import data processing
  //
  $dbConn->exec("update datasets set \"lastUpdated\" = NOW() where title = 'Contract Notices'");
// cn // cn
echo "link amend<br>"; echo "link amend<br>";
include ("linkAmendments.php"); include ("linkAmendments.php");
echo "update UNSPSC<br>"; echo "update UNSPSC<br>";
include ("updateUNSPSC.php"); include ("updateUNSPSC.php");
// agency // agency
//include ("setAgencyStatus.php"); //include ("setAgencyStatus.php");
//include ("setAgencyURLABN.php"); //include ("setAgencyURLABN.php");
} }
} }
?> ?>
   
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>org.lambdacomplex.contractdashboard</groupId> <groupId>org.lambdacomplex.contractdashboard</groupId>
<artifactId>neo4jimporter</artifactId> <artifactId>neo4jimporter</artifactId>
<version>0.0.1-SNAPSHOT</version> <version>0.0.1-SNAPSHOT</version>
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>org.neo4j</groupId> <groupId>org.neo4j</groupId>
<artifactId>neo4j</artifactId> <artifactId>neo4j</artifactId>
<version>1.6.1</version> <version>1.8.RC1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>postgresql</groupId> <groupId>postgresql</groupId>
<artifactId>postgresql</artifactId> <artifactId>postgresql</artifactId>
<version>9.0-801.jdbc4</version> <version>9.0-801.jdbc4</version>
</dependency> </dependency>
</dependencies> </dependencies>
</project> </project>
   
import java.io.ObjectInputStream.GetField;  
import java.math.BigInteger;  
import java.security.MessageDigest;  
import java.security.NoSuchAlgorithmException;  
import java.sql.Connection; import java.sql.Connection;
import java.sql.DriverManager; import java.sql.DriverManager;
import java.sql.ResultSet; import java.sql.ResultSet;
import java.sql.SQLException; import java.sql.SQLException;
import java.sql.SQLWarning; import java.sql.SQLWarning;
import java.sql.Statement; import java.sql.Statement;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map;  
   
import org.neo4j.graphdb.DynamicRelationshipType; import org.neo4j.graphdb.DynamicRelationshipType;
import org.neo4j.graphdb.GraphDatabaseService; import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Node; import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.index.BatchInserterIndex; import org.neo4j.graphdb.index.BatchInserterIndex;
import org.neo4j.graphdb.index.BatchInserterIndexProvider; import org.neo4j.graphdb.index.BatchInserterIndexProvider;
import org.neo4j.helpers.collection.MapUtil; import org.neo4j.helpers.collection.MapUtil;
  import org.neo4j.index.impl.lucene.LuceneBatchInserterIndexProvider;
import org.neo4j.kernel.impl.batchinsert.BatchInserter; import org.neo4j.kernel.impl.batchinsert.BatchInserter;
import org.neo4j.kernel.impl.batchinsert.BatchInserterImpl; import org.neo4j.kernel.impl.batchinsert.BatchInserterImpl;
import org.neo4j.index.impl.lucene.*;  
   
public class Importer { public class Importer {
   
public static void main(String[] argv) { public static void main(String[] argv) {
BatchInserter inserter = new BatchInserterImpl("target/neo4jdb-batchinsert"); BatchInserter inserter = new BatchInserterImpl("target/neo4jdb-batchinsert");
BatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(inserter); BatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(inserter);
BatchInserterIndex labels = indexProvider.nodeIndex("labels", MapUtil.stringMap("type", "exact")); BatchInserterIndex labels = indexProvider.nodeIndex("labels", MapUtil.stringMap("type", "exact"));
labels.setCacheCapacity("Label", 100000); labels.setCacheCapacity("Label", 100000);
   
   
   
System.out.println("-------- PostgreSQL " System.out.println("-------- PostgreSQL "
+ "JDBC Connection Testing ------------"); + "JDBC Connection Testing ------------");
   
try { try {
   
Class.forName("org.postgresql.Driver"); Class.forName("org.postgresql.Driver");
   
} catch (ClassNotFoundException e) { } catch (ClassNotFoundException e) {
   
System.out.println("Where is your PostgreSQL JDBC Driver? " System.out.println("Where is your PostgreSQL JDBC Driver? "