Amendment partial record importer
[contractdashboard.git] / admin / partialdata / importamendments.php
blob:a/admin/partialdata/importamendments.php -> blob:b/admin/partialdata/importamendments.php
--- a/admin/partialdata/importamendments.php
+++ b/admin/partialdata/importamendments.php
@@ -1,1 +1,315 @@
-
+<?php
+include_once ("../../lib/common.inc.php");
+
+// http://www.lastcraft.com/browser_documentation.php
+// http://code.google.com/p/phpquery/
+require('phpQuery-onefile.php');
+function getURL($url) {
+    //return file_get_contents($url);
+    $ch = curl_init($url);
+	curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
+	curl_setopt($ch, CURLOPT_HEADER, 0);
+	curl_setopt($ch, CURLOPT_TIMEOUT, 45);
+	$page = curl_exec($ch);
+	if (curl_errno($ch)) {
+		echo "<font color=red> Database temporarily unavailable: ";
+		echo curl_errno($ch) . " " . curl_error($ch);
+		
+			echo $url;
+		echo "</font><br>";
+	}
+	curl_close($ch);
+	return $page;
+}
+
+
+function getTextFromTHNode($Node, $Text = "") { 
+    if ($Node->tagName == null) 
+        return $Text.$Node->textContent;
+    
+if ($Node->tagName != "td") {
+    $Node = $Node->firstChild; 
+    if ($Node != null) 
+        $Text = getTextFromTHNode($Node, $Text); 
+
+    while($Node->nextSibling != null) { 
+        $Text = getTextFromTHNode($Node->nextSibling, $Text); 
+        $Node = $Node->nextSibling; 
+    }
+}
+    return $Text; 
+} 
+
+function getTextFromNode($Node, $Text = "") { 
+    if ($Node->tagName == null) 
+        return $Text.$Node->textContent; 
+if ($Node->tagName != "th" && $Node->tagName != "span") {
+    $Node = $Node->firstChild; 
+    if ($Node != null) 
+        $Text = getTextFromNode($Node, $Text); 
+
+    while($Node->nextSibling != null) { 
+        $Text = getTextFromNode($Node->nextSibling, $Text); 
+        $Node = $Node->nextSibling; 
+    }
+}
+    return $Text; 
+} 
+function dom_to_array($root) 
+{ 
+    $result = array(); 
+
+    if ($root->hasAttributes()) 
+    { 
+        $attrs = $root->attributes; 
+
+        foreach ($attrs as $i => $attr) 
+            $result[$attr->name] = $attr->value; 
+    } 
+
+    $children = $root->childNodes; 
+if ($root->childNodes) {
+    if ($children->length == 1) 
+    { 
+        $child = $children->item(0); 
+
+        if ($child->nodeType == XML_TEXT_NODE) 
+        { 
+            $result['_value'] = $child->nodeValue; 
+
+            if (count($result) == 1) 
+                return $result['_value']; 
+            else 
+                return $result; 
+        } 
+    } 
+
+    $group = array(); 
+
+    for($i = 0; $i < $children->length; $i++) 
+    { 
+        $child = $children->item($i); 
+
+        if (!isset($result[$child->nodeName])) 
+            $result[$child->nodeName] = dom_to_array($child); 
+        else 
+        { 
+            if (!isset($group[$child->nodeName])) 
+            { 
+                $tmp = $result[$child->nodeName]; 
+                $result[$child->nodeName] = array($tmp); 
+                $group[$child->nodeName] = 1; 
+            } 
+
+            $result[$child->nodeName][] = dom_to_array($child); 
+        } 
+    } 
+}
+
+    return $result; 
+} 
+
+function importCN($cnid) {
+    global $conn;
+    $CN = str_replace("-A", "00",$cnid);
+    // check if already complete
+    $query = 'Select "parentCN" from contractnotice
+	where "CNID" = :CNID';
+	$query = $conn->prepare($query);
+	$query->bindParam(":CNID", $CN);
+	$query->execute();
+	$r = $query->fetch(PDO::FETCH_ASSOC);
+    if ($r['parentCN'] == NULL) {
+$site = "https://www.tenders.gov.au/";
+$searchResult = phpQuery::newDocument(getURL("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid));
+//echo "https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid;
+$url = "";
+foreach(pq('a') as $a) {
+	if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) {
+	    //echo  $a->getAttribute("href");
+	    $url = $a->getAttribute("href");
+	break;
+	}
+}
+$cn = phpQuery::newDocument(getURL($site.$url));
+	$datamapping0711 = array(
+		"Agency" => "agencyName",
+		"Parent CN" => "parentCN",
+		"CN ID" => "CNID",
+		"Publish Date" => "publishDate",
+		"Amendment Date" => "amendDate",
+		"Status" => "",
+		"StartDate" => "contractStart",
+		"EndDate" => "contractEnd",
+		"Contract Value (AUD)" => "value",
+		"Description" => "description",
+		"Agency Reference ID" => "agencyID",
+		"Category" => "category",
+		"Procurement Method" => "procurementMethod",
+		"ATM ID" => "atmID",
+		"SON ID" => "SONID",
+		"Confidentiality - Contract" => "confidentialityContract",
+		"Confidentiality Reason(s) - Contract" => "confidentialityContractReason",
+		"Confidentiality - Outputs" => "confidentialityOutputs",
+		"Confidentiality Reason(s) - Outputs" => "confidentialityOutputsReason",
+		"Consultancy" => "consultancy",
+		"Consultancy Reason(s)" => "consultancyReason",
+		"Amendment Reason" => "amendmentReason",
+		"Name" => "supplierName",
+		"Postal Address" => "supplierAddress",
+		"Town/City" => "supplierCity",
+		"Postcode" => "supplierPostcode",
+		"Country" => "supplierCountry",
+		"ABN Exempt" => "supplierABNExempt",
+		"ABN" => "supplierABN",
+		"Branch" => "contactBranch",
+		"Division" => "contactDivision",
+		"Office Postcode" => "contactPostcode"
+	);
+$cnFields = Array();
+foreach(pq('tr') as $tr) {
+	$tra = dom_to_array($tr);
+	if (is_array($tra['th'])) {
+	    $fieldName = trim(getTextFromTHNode($tr));
+	} else {
+	$fieldName = trim(str_replace("/th>","",$tra['th']));
+	}
+	$fieldValue = trim(print_r($tra['td'],true));
+	if ($fieldName == "State/Territory" || $fieldName == "Contact Name"
+	    || $fieldName == "Contact Phone" || $fieldName == "Contact Email"
+	    ||$fieldName == "Amendments") {
+	    // do nothing
+	} else if ($fieldName == "Contract Period") {
+	    $contractPeriod = explode("to",$fieldValue);
+	    $cnFields["contractStart"] =  trim($contractPeriod[0]);
+		$cnFields["contractEnd"] =  trim($contractPeriod[1]);
+	} else {
+	    $fieldName = $datamapping0711[$fieldName];
+
+	if ($fieldName == "parentCN" || $fieldName == "CNID") {
+	    	    if (is_array($tra['td'])) {
+							    $fieldValue = trim(getTextFromNode($tr));
+							    }
+							$fieldValue = substr($fieldValue, 2); // take off the "CN" prefix
+							$fieldValue = str_replace("-A", "00", $fieldValue); // make amendments really big numbers
+	} elseif ($fieldName == "description") {
+	   
+							if (is_array($tra['td'])) $fieldValue = print_r($tra['td']['p'],true);
+							
+	} elseif ($fieldName == "value" || $fieldName == "supplierABN") {
+							if (is_array($tra['td'])) {
+							    $fieldValue = trim(getTextFromNode($tr));
+							    }
+							$fieldValue = str_replace(Array("$",","," "), "", $fieldValue);
+							//if (!is_numeric($fieldValue)) $fieldValue = 0;
+							if ($fieldValue == "Exempt") $fieldValue = NULL;
+	} elseif ($fieldName == "amendDate" || $fieldName == "publishDate" || $fieldName == "contractStart" || $fieldName == "contractEnd") {
+							$fieldValue = date('Y-m-d H:i:s', strtotime($fieldValue));
+						} elseif (is_array($tra['td'])) {
+							    $fieldValue = trim(getTextFromNode($tr));
+							    }
+	    echo $fieldName. " = " .$fieldValue."<br>\n";
+	    $cnFields[$fieldName] = $fieldValue;
+	}
+}
+$cnFields["importFile"] = $url;
+$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', array_keys($cnFields)) . '") VALUES ( ';
+		for($key = 0; $key < sizeof($cnFields); $key++) {
+			$contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?";
+		}
+		$contractNoticeInsertQ.= ");";
+		//echo $contractNoticeInsertQ;
+		$contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ);
+		$contractNoticeInsertQ->execute(array_values($cnFields));
+				$errors = $conn->errorInfo();
+				if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
+					echo "Dupe {$cnFields['CNID']}<br>";
+				}
+				elseif ($errors[1] == 0) {
+					echo "Success insert {$cnFields['CNID']} <br>";
+				}
+				else {
+					foreach ($cnFields as $key => $cnf) {
+						echo var_dump($key) . $cnf . "<br>";
+					}
+					echo $cnFields['CNID'] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n";
+				}
+$contractNoticeUpdateQ = 'UPDATE contractnotice SET ';
+$count = 0;
+		foreach ($cnFields as $key => $f) {
+		    
+			$count++;
+			$contractNoticeUpdateQ.= '"'.$key.'"=? '.($count >= sizeof($cnFields) ? "" : ", ");
+		}
+		$contractNoticeUpdateQ.= ' WHERE "CNID"=?;';
+		$cnFields[] = $cnFields["CNID"];
+		//echo $contractNoticeUpdateQ;
+		$contractNoticeUpdateQ = $conn->prepare($contractNoticeUpdateQ);
+		$contractNoticeUpdateQ->execute(array_values($cnFields));
+				$errors = $conn->errorInfo();
+				if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
+				    print_r($errors);
+					echo "Dupe update {$cnFields['CNID']}<br>";
+				}
+				elseif ($errors[1] == 0) {
+					echo "Success update {$cnFields['CNID']} <br>";
+				}
+				else {
+					foreach ($cnFields as $key => $cnf) {
+						echo var_dump($key) . $cnf . "<br>";
+					}
+					echo $cnFields['CNID'] . " failed CN update.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n";
+				}
+		}
+}
+function processFile($fpath, $tablename)
+{
+	global $conn;
+	echo " ============== $fpath  ============== <br>";
+	
+	$handle = fopen($fpath, "r");
+	flush();
+	$row = 1;
+	
+	while (($data = fgetcsv($handle, 1000, "\t")) !== false) {
+		if ($row > 3) {
+				$data[0] = trim($data[0], "=");
+				$data[0] = trim($data[0], "\"");
+				if (strpos($data[0], "-A") > 0) {
+				    echo "Loading {$data[0]} ... <br>\n";
+				    importCN(str_replace("CN","",$data[0]));
+				}
+			}
+			flush();
+			//echo "<hr>\n";
+			
+	
+		$row++;
+	}
+	fclose($handle);
+}
+$path = './';
+if ($_REQUEST["fname"] == "") {
+	echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>";
+	$dhandle = opendir($path);
+	// define an array to hold the files
+	$files = array();
+	if ($dhandle) {
+		// loop through all of the files
+		while (false !== ($fname = readdir($dhandle))) {
+			if (($fname != '.') && ($fname != '..')) {
+				echo "<a href=\"import.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . date("c", filemtime($path . $fname)) . "<br/>";
+				processFile($path . $fname, "contractnotice");
+			}
+		}
+	}
+}
+else {
+	$success = 0;
+	$fname = $_REQUEST["fname"];
+	
+	$success+= processFile($path . $fname, "contractnotice");
+
+}
+
+?>