add datagov importer
[contractdashboard.git] / admin / importdatagov.php
blob:a/admin/importdatagov.php -> blob:b/admin/importdatagov.php
  <?php
   
  include_once ("../lib/common.inc.php");
  auth();
  /* todo gaps
  Contract ID,Department,Portfolio,Division,Branch,Agency Ref,Contract Date,End Date,Value,Office PCode,Description,Procurement Desc,Confidentiality Reason,Consultancy,ANZSCC Code,ANSCC Desc,Supplier,Supplier Postal Addr,Supplier Suburb,Supplier State,Supplier Country,Supplier PCode,Supplier ABN,Supplier DUNS,Supplier ACN,S/O Reference Num
  1694570,Department of Defence,Defence,NEW SOUTH WALES,RAN HMAS WATSON,1906458734 ,30-Jun-2007,30-Jun-2007,16083.96,2612,AIRLINE TICKETS,direct source,,No,731,Passenger transportation by air,QANTAS AIRWAYS LTD,PO Box PB 747,MASCOT,NSW,Australia,2020,16009661901,750512642,009661901 ,
   
  */
  $contractNoticeFields = array(
  "importFile",
  "agencyName",
  "parentCN",
  "CNID",
  "publishDate",
  "amendDate",
  "contractStart",
  "contractEnd",
  "value",
  "description",
  "agencyID",
  "categoryUNSPSC",
  "category",
  "procurementMethod",
  "atmID",
  "SONID",
  "confidentialityContract",
  "confidentialityContractReason",
  "confidentialityOutputs",
  "confidentialityOutputsReason",
  "consultancy",
  "consultancyReason",
  "amendmentReason",
  "supplierName",
  "supplierAddress",
  "supplierCity",
  "supplierPostcode",
  "supplierCountry",
  "supplierABNExempt",
  "supplierABN",
  "contactBranch",
  "contactDivision",
  "contactPostcode"
  );
   
  $contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', $contractNoticeFields) . '") VALUES ( ';
  foreach ($contractNoticeFields as $key => $f) {
  $contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?";
  }
  $contractNoticeInsertQ.= ");";
  $contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ);
   
  function processFile($fpath) {
  global $conn, $contractNoticeFields, $contractNoticeInsertQ;
  $row = 1;
  $success = 0;
  ini_set('auto_detect_line_endings',TRUE);
  $handle = fopen($fpath, "r");
  //"t" mode string translates windows line breaks to unix
  $datamapping0712 = array(
   
  "Agency Name" => "agencyName",
  "Parent Contract ID" => "parentCN",
  "Contract ID" => "CNID",
  "Publish Date" => "publishDate",
  "Amendment Date" => "amendDate",
  "Start Date" => "contractStart",
  "End Date" => "contractEnd",
  "Value" => "value",
  "Description" => "description",
  "Agency Ref ID" => "agencyID",
  "UNSPSC Code" => "categoryUNSPSC",
  "Title" => "category",
  "Procurement Method" => "procurementMethod",
  "ATM ID" => "atmID",
  "SON ID" => "SONID",
  "Confidentiality Contract Flag" => "confidentialityContract",
  "Confidentiality Contract Reason" => "confidentialityContractReason",
  "Confidentiality Outputs Flag" => "confidentialityOutputs",
  "Confidentiality Outputs Reason" => "confidentialityOutputsReason",
  "Consultancy Flag" => "consultancy",
  "Consultancy Reason" => "consultancyReason",
  "Amendment Reason" => "amendmentReason",
  "Supplier Name" => "supplierName",
  "Supplier Address" => "supplierAddress",
  "Supplier Suburb" => "supplierCity",
  "Supplier Postcode" => "supplierPostcode",
  "Supplier Country" => "supplierCountry",
  "Supplier ABN Exempt" => "supplierABNExempt",
  "ABN" => "supplierABN",
  "Contact Name" => "",
  "Contact Phone" => "",
  "Branch" => "contactBranch",
  "Division" => "contactDivision",
  "Office Postcode" => "contactPostcode",
   
  );
   
   
  while (($data = fgetcsv($handle, 10000)) !== false) {
  //print_r($data);
  $num = count($data);
  if ($row == 1) {
  $headers = $data;
  } elseif ($row > 1) {
  if ($num > count($datamapping0712)) {
  die("<font color=red>Error in data import; data mapping fields out of bounds or changed $num > ".count($datamapping0712)."</font><br>" . $fpath . print_r($data));
  }
  $contractNoticeInsert = Array();
  $supplierInsert = Array();
  $agencyInsert = Array();
  $contractNoticeInsert[] = $fpath;
  $keys = array_keys($datamapping0712);
  for ($c = 0; $c < $num; $c++) {
  $data[$c] = trim($data[$c], "=");
  $data[$c] = trim($data[$c], "\"");
  if (in_array(($datamapping0712[$headers[$c]]), $contractNoticeFields)) {
  if (($datamapping0712[$headers[$c]]) == "parentCN" || ($datamapping0712[$headers[$c]]) == "CNID") {
  $data[$c] = substr($data[$c], 2); // take off the "CN" prefix
  if ($data[$c] > 0 && $data[$c] != '0') {
  $contractNoticeInsert[] = $data[$c];
  } else {
  $contractNoticeInsert[] = null;
  }
  } elseif (($datamapping0712[$headers[$c]]) == "supplierABN") {
  if ($data[$c] > 0 && $data[$c] != '0') {
  $contractNoticeInsert[] = $data[$c];
  } else {
  $contractNoticeInsert[] = null;
  }
  } elseif (($datamapping0712[$headers[$c]]) == "amendDate" || ($datamapping0712[$headers[$c]]) == "publishDate" || ($datamapping0712[$headers[$c]]) == "contractStart" || ($datamapping0712[$headers[$c]]) == "contractEnd") {
  $contractNoticeInsert[] = date('Y-m-d H:i:s', strtotime($data[$c]));
  } else {
  if (strstr("\" =", $data[$c] > 0)) {
  die("Invalid Description field" . $contractNoticeInsert);
  }
  $colvalue = preg_replace('/[^[:print:]]/', '', utf8_encode($data[$c]));
   
  $contractNoticeInsert[] = $colvalue;
  }
  }
  }
  flush();
  //print_r($contractNoticeInsert);
  $contractNoticeInsertQ->execute($contractNoticeInsert);
  $errors = $conn->errorInfo();
  if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
  // echo "dupe <br>";
  } elseif ($errors[1] == 0) {
  $success++;
  } else {
  foreach ($contractNoticeFields as $key => $cnf) {
  echo var_dump($contractNoticeInsert[$key]) . $cnf . "<br>";
  }
  echo $data[2] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n";
  }
   
  flush();
  //echo "<hr>\n";
  }
  $row++;
  }
  fclose($handle);
  $contractNoticeInsertQ->closeCursor();
   
  return $success;
  }
   
  $path = 'datagovdata/';
  if ($_REQUEST["fname"] == "" && $argv[1] == "") {
  echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>";
  $dhandle = opendir($path);
  // define an array to hold the files
  $files = array();
  if ($dhandle) {
  // loop through all of the files
  while (false !== ($fname = readdir($dhandle))) {
  if (($fname != '.') && ($fname != '..')) {
  $files[date("c", filemtime($path . $fname)) . md5($fname)] = $fname;
  }
  }
  }
  ksort($files);
  foreach ($files as $date => $fname) {
  echo "<a href=\"importdatagov.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . $date . "<br/>";
  }
  } else {
  $success = 0;
  $fname = $_REQUEST["fname"];
  if ($argv[1] != "") $fname = $argv[1];
  echo " ============== $fname ============== <br>";
  flush();
  $success+= processFile($path . $fname, "contractnotice");
  $success+= processFile($path . $fname, "agency");
  $success+= processFile($path . $fname, "supplier");
  echo "<br> $success records successfully created";
   
  flush();
  // run post import data processing
  //
  if ($success > 0) {
  $conn->exec("update datasets set \"lastUpdated\" = NOW() where title = 'Contract Notices'");
  echo "link amend<br>";
  include ("linkAmendments.php");
  echo "update UNSPSC<br>";
  include ("updateUNSPSC.php");
  }
  // cn
   
  // agency
  //include ("setAgencyStatus.php");
  //include ("setAgencyURLABN.php");
   
  }
  ?>