add datagov importer
add datagov importer

<?php <?php
if (php_sapi_name() != "cli") { if (php_sapi_name() != "cli") {
include_once ("../lib/common.inc.php"); include_once ("../lib/common.inc.php");
auth(); auth();
$contractNoticeFields = array( $contractNoticeFields = array(
"importFile", "importFile",
"agencyName", "agencyName",
"parentCN", "parentCN",
"CNID", "CNID",
"publishDate", "publishDate",
"amendDate", "amendDate",
"contractStart", "contractStart",
"contractEnd", "contractEnd",
"value", "value",
"description", "description",
"agencyID", "agencyID",
"category", "category",
"procurementMethod", "procurementMethod",
"atmID", "atmID",
"SONID", "SONID",
"confidentialityContract", "confidentialityContract",
"confidentialityContractReason", "confidentialityContractReason",
"confidentialityOutputs", "confidentialityOutputs",
"confidentialityOutputsReason", "confidentialityOutputsReason",
"consultancy", "consultancy",
"consultancyReason", "consultancyReason",
"amendmentReason", "amendmentReason",
"supplierName", "supplierName",
"supplierAddress", "supplierAddress",
"supplierCity", "supplierCity",
"supplierPostcode", "supplierPostcode",
"supplierCountry", "supplierCountry",
"supplierABNExempt", "supplierABNExempt",
"supplierABN", "supplierABN",
"contactBranch", "contactBranch",
"contactDivision", "contactDivision",
"contactPostcode" "contactPostcode"
); );
   
$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', $contractNoticeFields) . '") VALUES ( '; $contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', $contractNoticeFields) . '") VALUES ( ';
foreach ($contractNoticeFields as $key => $f) { foreach ($contractNoticeFields as $key => $f) {
$contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?"; $contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?";
} }
$contractNoticeInsertQ.= ");"; $contractNoticeInsertQ.= ");";
$contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ); $contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ);
   
function processFile($fpath) { function processFile($fpath) {
global $conn, $contractNoticeFields, $contractNoticeInsertQ; global $conn, $contractNoticeFields, $contractNoticeInsertQ;
$row = 1; $row = 1;
$handle = fopen($fpath, "r"); $handle = fopen($fpath, "r");
//"t" mode string translates windows line breaks to unix //"t" mode string translates windows line breaks to unix
$datamapping0712 = array( $datamapping0712 = array(
"Agency" => "agencyName", "Agency" => "agencyName",
"Parent CN ID" => "parentCN", "Parent CN ID" => "parentCN",
"CN ID" => "CNID", "CN ID" => "CNID",
"Publish Date" => "publishDate", "Publish Date" => "publishDate",
"Amendment Date" => "amendDate", "Amendment Date" => "amendDate",
"Status" => "", "Status" => "",
"StartDate" => "contractStart", "StartDate" => "contractStart",
"EndDate" => "contractEnd", "EndDate" => "contractEnd",
"Value" => "value", "Value" => "value",
"Description" => "description", "Description" => "description",
"Agency Ref Id" => "agencyID", "Agency Ref Id" => "agencyID",
"Agency Ref. ID" => "agencyID", "Agency Ref. ID" => "agencyID",
"Category" => "category", "Category" => "category",
"Procurement Method" => "procurementMethod", "Procurement Method" => "procurementMethod",
"ATM ID" => "atmID", "ATM ID" => "atmID",
"SON ID" => "SONID", "SON ID" => "SONID",
"Confidentiality - Contract" => "confidentialityContract", "Confidentiality - Contract" => "confidentialityContract",
"Confidentiality - Contract Reason(s)" => "confidentialityContractReason", "Confidentiality - Contract Reason(s)" => "confidentialityContractReason",
"Confidentiality - Outputs" => "confidentialityOutputs", "Confidentiality - Outputs" => "confidentialityOutputs",
"Confidentiality - Outputs Reason(s)" => "confidentialityOutputsReason", "Confidentiality - Outputs Reason(s)" => "confidentialityOutputsReason",
"Consultancy" => "consultancy", "Consultancy" => "consultancy",
"Consultancy Reason(s)" => "consultancyReason", "Consultancy Reason(s)" => "consultancyReason",
"Amendment Reason" => "amendmentReason", "Amendment Reason" => "amendmentReason",
"Supplier Name" => "supplierName", "Supplier Name" => "supplierName",
"Supplier Address" => "supplierAddress", "Supplier Address" => "supplierAddress",
"Supplier City" => "supplierCity", "Supplier City" => "supplierCity",
"Supplier Postcode" => "supplierPostcode", "Supplier Postcode" => "supplierPostcode",
"Supplier Country" => "supplierCountry", "Supplier Country" => "supplierCountry",
"Supplier ABNExempt" => "supplierABNExempt", "Supplier ABNExempt" => "supplierABNExempt",
"Supplier ABN" => "supplierABN", "Supplier ABN" => "supplierABN",
"Agency Branch" => "contactBranch", "Agency Branch" => "contactBranch",
"Agency Divison" => "contactDivision", "Agency Divison" => "contactDivision",
"Agency Postcode" => "contactPostcode", "Agency Postcode" => "contactPostcode",
"" => "" "" => ""
); );
   
$headers;  
   
while (($data = fgetcsv($handle, 1000, "\t")) !== false) { while (($data = fgetcsv($handle, 1000, "\t")) !== false) {
$num = count($data); $num = count($data);
if ($row == 3) { if ($row == 3) {
$headers = $data; $headers = $data;
} elseif ($row > 3) { } elseif ($row > 3) {
if ($num > count($datamapping0712)) { if ($num > count($datamapping0712)) {
die("<font color=red>Error in data import; data mapping fields out of bounds or changed</font><br>" . $fname . print_r($data)); die("<font color=red>Error in data import; data mapping fields out of bounds or changed</font><br>" . $fname . print_r($data));
} }
$contractNoticeInsert = Array(); $contractNoticeInsert = Array();
$supplierInsert = Array(); $supplierInsert = Array();
$agencyInsert = Array(); $agencyInsert = Array();
$contractNoticeInsert[] = $fpath; $contractNoticeInsert[] = $fpath;
$keys = array_keys($datamapping0712); $keys = array_keys($datamapping0712);
for ($c = 0; $c < $num; $c++) { for ($c = 0; $c < $num; $c++) {
$data[$c] = trim($data[$c], "="); $data[$c] = trim($data[$c], "=");
$data[$c] = trim($data[$c], "\""); $data[$c] = trim($data[$c], "\"");
if (in_array(($datamapping0712[$headers[$c]]), $contractNoticeFields)) { if (in_array(($datamapping0712[$headers[$c]]), $contractNoticeFields)) {
if (($datamapping0712[$headers[$c]]) == "parentCN" || ($datamapping0712[$headers[$c]]) == "CNID") { if (($datamapping0712[$headers[$c]]) == "parentCN" || ($datamapping0712[$headers[$c]]) == "CNID") {
$data[$c] = substr($data[$c], 2); // take off the "CN" prefix $data[$c] = substr($data[$c], 2); // take off the "CN" prefix
if ($data[$c] > 0 && $data[$c] != '0') { if ($data[$c] > 0 && $data[$c] != '0') {
$contractNoticeInsert[] = $data[$c]; $contractNoticeInsert[] = $data[$c];
} else { } else {
$contractNoticeInsert[] = null; $contractNoticeInsert[] = null;
} }
} elseif (($datamapping0712[$headers[$c]]) == "supplierABN") { } elseif (($datamapping0712[$headers[$c]]) == "supplierABN") {
if ($data[$c] > 0 && $data[$c] != '0') { if ($data[$c] > 0 && $data[$c] != '0') {
$contractNoticeInsert[] = $data[$c]; $contractNoticeInsert[] = $data[$c];
} else { } else {
$contractNoticeInsert[] = null; $contractNoticeInsert[] = null;
} }
} elseif (($datamapping0712[$headers[$c]]) == "amendDate" || ($datamapping0712[$headers[$c]]) == "publishDate" || ($datamapping0712[$headers[$c]]) == "contractStart" || ($datamapping0712[$headers[$c]]) == "contractEnd") { } elseif (($datamapping0712[$headers[$c]]) == "amendDate" || ($datamapping0712[$headers[$c]]) == "publishDate" || ($datamapping0712[$headers[$c]]) == "contractStart" || ($datamapping0712[$headers[$c]]) == "contractEnd") {
$contractNoticeInsert[] = date('Y-m-d H:i:s', strtotime($data[$c])); $contractNoticeInsert[] = date('Y-m-d H:i:s', strtotime($data[$c]));
} else { } else {
if (strstr("\" =", $data[$c] > 0)) { if (strstr("\" =", $data[$c] > 0)) {
die("Invalid Description field" . $contractNoticeInsert); die("Invalid Description field" . $contractNoticeInsert);
} }
$colvalue = preg_replace('/[^[:print:]]/', '', utf8_encode($data[$c])); $colvalue = preg_replace('/[^[:print:]]/', '', utf8_encode($data[$c]));
   
$contractNoticeInsert[] = $colvalue; $contractNoticeInsert[] = $colvalue;
} }
} }
} }
flush(); flush();
$contractNoticeInsertQ->execute($contractNoticeInsert); $contractNoticeInsertQ->execute($contractNoticeInsert);
$errors = $conn->errorInfo(); $errors = $conn->errorInfo();
if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) { if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
} elseif ($errors[1] == 0) { } elseif ($errors[1] == 0) {
$success++; $success++;
} else { } else {
foreach ($contractNoticeFields as $key => $cnf) { foreach ($contractNoticeFields as $key => $cnf) {
echo var_dump($contractNoticeInsert[$key]) . $cnf . "<br>"; echo var_dump($contractNoticeInsert[$key]) . $cnf . "<br>";
} }
echo $data[2] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n"; echo $data[2] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n";
} }
   
flush(); flush();
//echo "<hr>\n"; //echo "<hr>\n";
} }
$row++; $row++;
} }
fclose($handle); fclose($handle);
$contractNoticeInsertQ->closeCursor(); $contractNoticeInsertQ->closeCursor();
   
return $success; return $success;
} }
   
$path = 'data/'; $path = 'data/';
if ($_REQUEST["fname"] == "") { if ($_REQUEST["fname"] == "") {
echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>"; echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>";
$dhandle = opendir($path); $dhandle = opendir($path);
// define an array to hold the files // define an array to hold the files
$files = array(); $files = array();
if ($dhandle) { if ($dhandle) {
// loop through all of the files // loop through all of the files
while (false !== ($fname = readdir($dhandle))) { while (false !== ($fname = readdir($dhandle))) {
if (($fname != '.') && ($fname != '..')) { if (($fname != '.') && ($fname != '..')) {
$files[date("c", filemtime($path . $fname)) . md5($fname)] = $fname; $files[date("c", filemtime($path . $fname)) . md5($fname)] = $fname;
} }
} }
} }
ksort($files); ksort($files);
foreach ($files as $date => $fname) { foreach ($files as $date => $fname) {
echo "<a href=\"import.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . $date . "<br/>"; echo "<a href=\"importdatagov.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . $date . "<br/>";
} }
} else { } else {
$success = 0; $success = 0;
$fname = $_REQUEST["fname"]; $fname = $_REQUEST["fname"];
echo " ============== $fname ============== <br>"; echo " ============== $fname ============== <br>";
flush(); flush();
$success+= processFile($path . $fname, "contractnotice"); $success+= processFile($path . $fname, "contractnotice");
$success+= processFile($path . $fname, "agency"); $success+= processFile($path . $fname, "agency");
$success+= processFile($path . $fname, "supplier"); $success+= processFile($path . $fname, "supplier");
echo "<br> $success records successfully created"; echo "<br> $success records successfully created";
   
flush(); flush();
// run post import data processing // run post import data processing
// //
if ($success > 0) { if ($success > 0) {
$conn->exec("update datasets set \"lastUpdated\" = NOW() where title = 'Contract Notices'"); $conn->exec("update datasets set \"lastUpdated\" = NOW() where title = 'Contract Notices'");
echo "link amend<br>"; echo "link amend<br>";
include ("linkAmendments.php"); include ("linkAmendments.php");
echo "update UNSPSC<br>"; echo "update UNSPSC<br>";
include ("updateUNSPSC.php"); include ("updateUNSPSC.php");
} }
// cn // cn
   
// agency // agency
//include ("setAgencyStatus.php"); //include ("setAgencyStatus.php");
//include ("setAgencyURLABN.php"); //include ("setAgencyURLABN.php");
} }
} }
?> ?>
   
  <?php
 
  include_once ("../lib/common.inc.php");
  auth();
  /* todo gaps
  Contract ID,Department,Portfolio,Division,Branch,Agency Ref,Contract Date,End Date,Value,Office PCode,Description,Procurement Desc,Confidentiality Reason,Consultancy,ANZSCC Code,ANSCC Desc,Supplier,Supplier Postal Addr,Supplier Suburb,Supplier State,Supplier Country,Supplier PCode,Supplier ABN,Supplier DUNS,Supplier ACN,S/O Reference Num
  1694570,Department of Defence,Defence,NEW SOUTH WALES,RAN HMAS WATSON,1906458734 ,30-Jun-2007,30-Jun-2007,16083.96,2612,AIRLINE TICKETS,direct source,,No,731,Passenger transportation by air,QANTAS AIRWAYS LTD,PO Box PB 747,MASCOT,NSW,Australia,2020,16009661901,750512642,009661901 ,
 
  */
  $contractNoticeFields = array(
  "importFile",
  "agencyName",
  "parentCN",
  "CNID",
  "publishDate",
  "amendDate",
  "contractStart",
  "contractEnd",
  "value",
  "description",
  "agencyID",
  "categoryUNSPSC",
  "category",
  "procurementMethod",
  "atmID",
  "SONID",
  "confidentialityContract",
  "confidentialityContractReason",
  "confidentialityOutputs",
  "confidentialityOutputsReason",
  "consultancy",
  "consultancyReason",
  "amendmentReason",
  "supplierName",
  "supplierAddress",
  "supplierCity",
  "supplierPostcode",
  "supplierCountry",
  "supplierABNExempt",
  "supplierABN",
  "contactBranch",
  "contactDivision",
  "contactPostcode"
  );
 
  $contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', $contractNoticeFields) . '") VALUES ( ';
  foreach ($contractNoticeFields as $key => $f) {
  $contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?";
  }
  $contractNoticeInsertQ.= ");";
  $contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ);
 
  function processFile($fpath) {
  global $conn, $contractNoticeFields, $contractNoticeInsertQ;
  $row = 1;
  $success = 0;
  ini_set('auto_detect_line_endings',TRUE);
  $handle = fopen($fpath, "r");
  //"t" mode string translates windows line breaks to unix
  $datamapping0712 = array(
 
  "Agency Name" => "agencyName",
  "Parent Contract ID" => "parentCN",
  "Contract ID" => "CNID",
  "Publish Date" => "publishDate",
  "Amendment Date" => "amendDate",
  "Start Date" => "contractStart",
  "End Date" => "contractEnd",
  "Value" => "value",
  "Description" => "description",
  "Agency Ref ID" => "agencyID",
  "UNSPSC Code" => "categoryUNSPSC",
  "Title" => "category",
  "Procurement Method" => "procurementMethod",
  "ATM ID" => "atmID",
  "SON ID" => "SONID",
  "Confidentiality Contract Flag" => "confidentialityContract",
  "Confidentiality Contract Reason" => "confidentialityContractReason",
  "Confidentiality Outputs Flag" => "confidentialityOutputs",
  "Confidentiality Outputs Reason" => "confidentialityOutputsReason",
  "Consultancy Flag" => "consultancy",
  "Consultancy Reason" => "consultancyReason",
  "Amendment Reason" => "amendmentReason",
  "Supplier Name" => "supplierName",
  "Supplier Address" => "supplierAddress",
  "Supplier Suburb" => "supplierCity",
  "Supplier Postcode" => "supplierPostcode",
  "Supplier Country" => "supplierCountry",
  "Supplier ABN Exempt" => "supplierABNExempt",
  "ABN" => "supplierABN",
  "Contact Name" => "",
  "Contact Phone" => "",
  "Branch" => "contactBranch",
  "Division" => "contactDivision",
  "Office Postcode" => "contactPostcode",
 
  );
 
 
  while (($data = fgetcsv($handle, 10000)) !== false) {
  //print_r($data);
  $num = count($data);
  if ($row == 1) {
  $headers = $data;
  } elseif ($row > 1) {
  if ($num > count($datamapping0712)) {
  die("<font color=red>Error in data import; data mapping fields out of bounds or changed $num > ".count($datamapping0712)."</font><br>" . $fpath . print_r($data));
  }
  $contractNoticeInsert = Array();
  $supplierInsert = Array();
  $agencyInsert = Array();
  $contractNoticeInsert[] = $fpath;
  $keys = array_keys($datamapping0712);
  for ($c = 0; $c < $num; $c++) {
  $data[$c] = trim($data[$c], "=");
  $data[$c] = trim($data[$c], "\"");
  if (in_array(($datamapping0712[$headers[$c]]), $contractNoticeFields)) {
  if (($datamapping0712[$headers[$c]]) == "parentCN" || ($datamapping0712[$headers[$c]]) == "CNID") {
  $data[$c] = substr($data[$c], 2); // take off the "CN" prefix
  if ($data[$c] > 0 && $data[$c] != '0') {
  $contractNoticeInsert[] = $data[$c];
  } else {
  $contractNoticeInsert[] = null;
  }
  } elseif (($datamapping0712[$headers[$c]]) == "supplierABN") {
  if ($data[$c] > 0 && $data[$c] != '0') {
  $contractNoticeInsert[] = $data[$c];
  } else {
  $contractNoticeInsert[] = null;
  }
  } elseif (($datamapping0712[$headers[$c]]) == "amendDate" || ($datamapping0712[$headers[$c]]) == "publishDate" || ($datamapping0712[$headers[$c]]) == "contractStart" || ($datamapping0712[$headers[$c]]) == "contractEnd") {
  $contractNoticeInsert[] =