add datagov importer
add datagov importer

--- a/admin/import.php
+++ b/admin/import.php
@@ -86,8 +86,7 @@
         "Agency Postcode" => "contactPostcode",
         "" => ""
     );
-    
-    $headers;
+
 
     while (($data = fgetcsv($handle, 1000, "\t")) !== false) {
         $num = count($data);
@@ -172,7 +171,7 @@
     }
     ksort($files);
     foreach ($files as $date => $fname) {
-        echo "<a href=\"import.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . $date . "<br/>";
+        echo "<a href=\"importdatagov.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . $date . "<br/>";
     }
 } else {
     $success = 0;

--- /dev/null
+++ b/admin/importdatagov.php
@@ -1,1 +1,216 @@
-
+<?php
+
+include_once ("../lib/common.inc.php");
+auth();
+/* todo gaps
+Contract ID,Department,Portfolio,Division,Branch,Agency Ref,Contract Date,End Date,Value,Office PCode,Description,Procurement Desc,Confidentiality Reason,Consultancy,ANZSCC Code,ANSCC Desc,Supplier,Supplier Postal Addr,Supplier Suburb,Supplier State,Supplier Country,Supplier PCode,Supplier ABN,Supplier DUNS,Supplier ACN,S/O Reference Num
+1694570,Department of Defence,Defence,NEW SOUTH WALES,RAN HMAS WATSON,1906458734  ,30-Jun-2007,30-Jun-2007,16083.96,2612,AIRLINE TICKETS,direct source,,No,731,Passenger transportation by air,QANTAS AIRWAYS LTD,PO Box PB 747,MASCOT,NSW,Australia,2020,16009661901,750512642,009661901  ,
+
+*/
+$contractNoticeFields = array(
+    "importFile",
+    "agencyName",
+    "parentCN",
+    "CNID",
+    "publishDate",
+    "amendDate",
+    "contractStart",
+    "contractEnd",
+    "value",
+    "description",
+    "agencyID",
+    "categoryUNSPSC",
+    "category",
+    "procurementMethod",
+    "atmID",
+    "SONID",
+    "confidentialityContract",
+    "confidentialityContractReason",
+    "confidentialityOutputs",
+    "confidentialityOutputsReason",
+    "consultancy",
+    "consultancyReason",
+    "amendmentReason",
+    "supplierName",
+    "supplierAddress",
+    "supplierCity",
+    "supplierPostcode",
+    "supplierCountry",
+    "supplierABNExempt",
+    "supplierABN",
+    "contactBranch",
+    "contactDivision",
+    "contactPostcode"
+);
+
+$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', $contractNoticeFields) . '") VALUES ( ';
+foreach ($contractNoticeFields as $key => $f) {
+    $contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?";
+}
+$contractNoticeInsertQ.= ");";
+$contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ);
+
+function processFile($fpath) {
+    global $conn, $contractNoticeFields, $contractNoticeInsertQ;
+    $row = 1;
+    $success = 0;
+    ini_set('auto_detect_line_endings',TRUE);
+    $handle = fopen($fpath, "r");
+    //"t" mode string translates windows line breaks to unix
+    $datamapping0712 = array(
+
+        "Agency Name" => "agencyName",
+        "Parent Contract ID" => "parentCN",
+        "Contract ID" => "CNID",
+        "Publish Date" => "publishDate",
+        "Amendment Date" => "amendDate",
+        "Start Date" => "contractStart",
+        "End Date" => "contractEnd",
+        "Value" => "value",
+        "Description" => "description",
+        "Agency Ref ID" => "agencyID",
+        "UNSPSC Code" => "categoryUNSPSC",
+        "Title" => "category",
+        "Procurement Method" => "procurementMethod",
+        "ATM ID" => "atmID",
+        "SON ID" => "SONID",
+        "Confidentiality Contract Flag" => "confidentialityContract",
+        "Confidentiality Contract Reason" => "confidentialityContractReason",
+        "Confidentiality Outputs Flag" => "confidentialityOutputs",
+        "Confidentiality Outputs Reason" => "confidentialityOutputsReason",
+        "Consultancy Flag" => "consultancy",
+        "Consultancy Reason" => "consultancyReason",
+        "Amendment Reason" => "amendmentReason",
+        "Supplier Name" => "supplierName",
+        "Supplier Address" => "supplierAddress",
+        "Supplier Suburb" => "supplierCity",
+        "Supplier Postcode" => "supplierPostcode",
+        "Supplier Country" => "supplierCountry",
+        "Supplier ABN Exempt" => "supplierABNExempt",
+        "ABN" => "supplierABN",
+        "Contact Name" => "",
+        "Contact Phone" => "",
+        "Branch" => "contactBranch",
+        "Division" => "contactDivision",
+        "Office Postcode" => "contactPostcode",
+
+    );
+
+
+    while (($data = fgetcsv($handle, 10000)) !== false) {
+        //print_r($data);
+        $num = count($data);
+        if ($row == 1) {
+            $headers = $data;
+        } elseif ($row > 1) {
+            if ($num > count($datamapping0712)) {
+                die("<font color=red>Error in data import; data mapping fields out of bounds or changed $num > ".count($datamapping0712)."</font><br>" . $fpath . print_r($data));
+            }
+            $contractNoticeInsert = Array();
+            $supplierInsert = Array();
+            $agencyInsert = Array();
+            $contractNoticeInsert[] = $fpath;
+            $keys = array_keys($datamapping0712);
+            for ($c = 0; $c < $num; $c++) {
+                $data[$c] = trim($data[$c], "=");
+                $data[$c] = trim($data[$c], "\"");
+                if (in_array(($datamapping0712[$headers[$c]]), $contractNoticeFields)) {
+                    if (($datamapping0712[$headers[$c]]) == "parentCN" || ($datamapping0712[$headers[$c]]) == "CNID") {
+                        $data[$c] = substr($data[$c], 2); // take off the "CN" prefix
+                        if ($data[$c] > 0 && $data[$c] != '0') {
+                            $contractNoticeInsert[] = $data[$c];
+                        } else {
+                            $contractNoticeInsert[] = null;
+                        }
+                    } elseif (($datamapping0712[$headers[$c]]) == "supplierABN") {
+                        if ($data[$c] > 0 && $data[$c] != '0') {
+                            $contractNoticeInsert[] = $data[$c];
+                        } else {
+                            $contractNoticeInsert[] = null;
+                        }
+                    } elseif (($datamapping0712[$headers[$c]]) == "amendDate" || ($datamapping0712[$headers[$c]]) == "publishDate" || ($datamapping0712[$headers[$c]]) == "contractStart" || ($datamapping0712[$headers[$c]]) == "contractEnd") {
+                        $contractNoticeInsert[] = date('Y-m-d H:i:s', strtotime($data[$c]));
+                    } else {
+                        if (strstr("\" =", $data[$c] > 0)) {
+                            die("Invalid Description field" . $contractNoticeInsert);
+                        }
+                        $colvalue = preg_replace('/[^[:print:]]/', '', utf8_encode($data[$c]));
+
+                        $contractNoticeInsert[] = $colvalue;
+                    }
+                }
+            }
+            flush();
+            //print_r($contractNoticeInsert);
+            $contractNoticeInsertQ->execute($contractNoticeInsert);
+            $errors = $conn->errorInfo();
+            if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
+          //          echo "dupe <br>";
+            } elseif ($errors[1] == 0) {
+                $success++;
+            } else {
+                foreach ($contractNoticeFields as $key => $cnf) {
+                    echo var_dump($contractNoticeInsert[$key]) . $cnf . "<br>";
+                }
+                echo $data[2] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n";
+            }
+
+            flush();
+            //echo "<hr>\n";
+        }
+        $row++;
+    }
+    fclose($handle);
+    $contractNoticeInsertQ->closeCursor();
+
+    return $success;
+}
+
+$path = 'datagovdata/';
+if ($_REQUEST["fname"] == "" && $argv[1] == "") {
+    echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>";
+    $dhandle = opendir($path);
+    // define an array to hold the files
+    $files = array();
+    if ($dhandle) {
+        // loop through all of the files
+        while (false !== ($fname = readdir($dhandle))) {
+            if (($fname != '.') && ($fname != '..')) {
+                $files[date("c", filemtime($path . $fname)) . md5($fname)] = $fname;
+            }
+        }
+    }
+    ksort($files);
+    foreach ($files as $date => $fname) {
+        echo "<a href=\"importdatagov.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . $date . "<br/>";
+    }
+} else {
+    $success = 0;
+    $fname = $_REQUEST["fname"];
+    if ($argv[1] != "") $fname = $argv[1];
+    echo " ============== $fname  ============== <br>";
+    flush();
+    $success+= processFile($path . $fname, "contractnotice");
+    $success+= processFile($path . $fname, "agency");
+    $success+= processFile($path . $fname, "supplier");
+    echo "<br> $success records successfully created";
+
+    flush();
+    // run post import data processing
+    // 
+    if ($success > 0) {
+$conn->exec("update datasets set \"lastUpdated\" = NOW() where title = 'Contract Notices'");
+    echo "link amend<br>";
+    include ("linkAmendments.php");
+    echo "update UNSPSC<br>";
+    include ("updateUNSPSC.php");
+    }
+// cn
+
+// agency
+//include ("setAgencyStatus.php");
+//include ("setAgencyURLABN.php");
+
+}
+?>
+

--- a/admin/partialdata/import.php
+++ b/admin/partialdata/import.php
@@ -12,6 +12,7 @@
 	$handle = fopen($fpath, "r");
 	//"t" mode string translates windows line breaks to unix
 	$datamapping0507 = array(
+
 		"Agency" => "agencyName",
 		"CN ID" => "CNID",
 		"Publish Date" => "publishDate",
@@ -137,7 +138,7 @@
 		// loop through all of the files
 		while (false !== ($fname = readdir($dhandle))) {
 			if (($fname != '.') && ($fname != '..') && (!isset($_REQUEST["filter"]) || strpos($fname,$_REQUEST["filter"]) != false)) {
-				echo "<a href=\"import.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . date("c", filemtime($path . $fname)) . "<br/>";
+				echo "<a href=\"importdatagov.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . date("c", filemtime($path . $fname)) . "<br/>";
 				processFile($path . $fname, "contractnotice");
 			}
 		}

--- a/admin/partialdata/importamendments.php
+++ b/admin/partialdata/importamendments.php
@@ -310,7 +310,7 @@
 		// loop through all of the files
 		while (false !== ($fname = readdir($dhandle))) {
 			if (($fname != '.') && ($fname != '..') && (strpos($fname,".xls")>0)) {
-				echo "<a href=\"import.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . date("c", filemtime($path . $fname)) . "<br/>";
+				echo "<a href=\"importdatagov.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . date("c", filemtime($path . $fname)) . "<br/>";
 				processFile($path . $fname, "contractnotice");
 			}
 		}

--- a/lib/common.inc.php
+++ b/lib/common.inc.php
@@ -14,11 +14,14 @@
 if (strstr($_SERVER['PHP_SELF'], "labs/") || strstr($_SERVER['PHP_SELF'], "admin/") || strstr($_SERVER['PHP_SELF'], "heuristics/")) {

     $basePath = "../";

 }

+    if (php_sapi_name() != "cli") {

+

 require $basePath."lib/amon-php/amon.php";

 Amon::config(array('address'=> 'http://127.0.0.1:2464', 

 		'protocol' => 'http', 

 		'secret_key' => "JBcSUdFOi5lK0vCjLjbHDpQamcBnRA4iV7QLaTADeDQ"));

 Amon::setup_exception_handler();

+    }

 

 require ROOT . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'lib' . DIRECTORY_SEPARATOR . 'openid.php';

 $openid = new LightOpenID($_SERVER['HTTP_HOST']);