add datagov importer
[contractdashboard.git] / admin / partialdata / importamendments.php
blob:a/admin/partialdata/importamendments.php -> blob:b/admin/partialdata/importamendments.php
<?php <?php
if (php_sapi_name() == "cli") { if (php_sapi_name() == "cli") {
include_once ("../../lib/common.inc.php"); include_once ("../../lib/common.inc.php");
/* /*
update contractnotice set "supplierABN" = a."supplierABN" update contractnotice set "supplierABN" = a."supplierABN"
from contractnotice as cn inner join (select "supplierABN", from contractnotice as cn inner join (select "supplierABN",
"supplierName" from contractnotice where "supplierABN" "supplierName" from contractnotice where "supplierABN"
IS NOT NULL and "supplierABN" != 0) as a on IS NOT NULL and "supplierABN" != 0) as a on
cn."supplierName" = a."supplierName" where cn."supplierName" = a."supplierName" where
cn."CNID"=contractnotice."CNID" and (contractnotice."supplierABN" cn."CNID"=contractnotice."CNID" and (contractnotice."supplierABN"
IS NULL or contractnotice."supplierABN" = 0) */ IS NULL or contractnotice."supplierABN" = 0) */
// http://www.lastcraft.com/browser_documentation.php // http://www.lastcraft.com/browser_documentation.php
// http://code.google.com/p/phpquery/ // http://code.google.com/p/phpquery/
require('phpQuery-onefile.php'); require('phpQuery-onefile.php');
function getURL($url) { function getURL($url) {
//return file_get_contents($url); //return file_get_contents($url);
$ch = curl_init($url); $ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_TIMEOUT, 45); curl_setopt($ch, CURLOPT_TIMEOUT, 45);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
$page = curl_exec($ch); $page = curl_exec($ch);
if (curl_errno($ch)) { if (curl_errno($ch)) {
echo "<font color=red> Database temporarily unavailable: "; echo "<font color=red> Database temporarily unavailable: ";
echo curl_errno($ch) . " " . curl_error($ch); echo curl_errno($ch) . " " . curl_error($ch);
echo $url; echo $url;
echo "</font><br>"; echo "</font><br>";
} }
curl_close($ch); curl_close($ch);
return $page; return $page;
} }
   
   
function getTextFromTHNode($Node, $Text = "") { function getTextFromTHNode($Node, $Text = "") {
if ($Node->tagName == null) if ($Node->tagName == null)
return $Text.$Node->textContent; return $Text.$Node->textContent;
if ($Node->tagName != "td") { if ($Node->tagName != "td") {
$Node = $Node->firstChild; $Node = $Node->firstChild;
if ($Node != null) if ($Node != null)
$Text = getTextFromTHNode($Node, $Text); $Text = getTextFromTHNode($Node, $Text);
   
while($Node->nextSibling != null) { while($Node->nextSibling != null) {
$Text = getTextFromTHNode($Node->nextSibling, $Text); $Text = getTextFromTHNode($Node->nextSibling, $Text);
$Node = $Node->nextSibling; $Node = $Node->nextSibling;
} }
} }
return $Text; return $Text;
} }
   
function getTextFromNode($Node, $Text = "") { function getTextFromNode($Node, $Text = "") {
if ($Node->tagName == null) if ($Node->tagName == null)
return $Text.$Node->textContent; return $Text.$Node->textContent;
if ($Node->tagName != "th" && $Node->tagName != "span") { if ($Node->tagName != "th" && $Node->tagName != "span") {
$Node = $Node->firstChild; $Node = $Node->firstChild;
if ($Node != null) if ($Node != null)
$Text = getTextFromNode($Node, $Text); $Text = getTextFromNode($Node, $Text);
   
while($Node->nextSibling != null) { while($Node->nextSibling != null) {
$Text = getTextFromNode($Node->nextSibling, $Text); $Text = getTextFromNode($Node->nextSibling, $Text);
$Node = $Node->nextSibling; $Node = $Node->nextSibling;
} }
} }
return $Text; return $Text;
} }
function dom_to_array($root) function dom_to_array($root)
{ {
$result = array(); $result = array();
   
if ($root->hasAttributes()) if ($root->hasAttributes())
{ {
$attrs = $root->attributes; $attrs = $root->attributes;
   
foreach ($attrs as $i => $attr) foreach ($attrs as $i => $attr)
$result[$attr->name] = $attr->value; $result[$attr->name] = $attr->value;
} }
   
$children = $root->childNodes; $children = $root->childNodes;
if ($root->childNodes) { if ($root->childNodes) {
if ($children->length == 1) if ($children->length == 1)
{ {
$child = $children->item(0); $child = $children->item(0);
   
if ($child->nodeType == XML_TEXT_NODE) if ($child->nodeType == XML_TEXT_NODE)
{ {
$result['_value'] = $child->nodeValue; $result['_value'] = $child->nodeValue;
   
if (count($result) == 1) if (count($result) == 1)
return $result['_value']; return $result['_value'];
else else
return $result; return $result;
} }
} }
   
$group = array(); $group = array();
   
for($i = 0; $i < $children->length; $i++) for($i = 0; $i < $children->length; $i++)
{ {
$child = $children->item($i); $child = $children->item($i);
   
if (!isset($result[$child->nodeName])) if (!isset($result[$child->nodeName]))
$result[$child->nodeName] = dom_to_array($child); $result[$child->nodeName] = dom_to_array($child);
else else
{ {
if (!isset($group[$child->nodeName])) if (!isset($group[$child->nodeName]))
{ {
$tmp = $result[$child->nodeName]; $tmp = $result[$child->nodeName];
$result[$child->nodeName] = array($tmp); $result[$child->nodeName] = array($tmp);
$group[$child->nodeName] = 1; $group[$child->nodeName] = 1;
} }
   
$result[$child->nodeName][] = dom_to_array($child); $result[$child->nodeName][] = dom_to_array($child);
} }
} }
} }
   
return $result; return $result;
} }
   
function importCN($cnid) { function importCN($cnid) {
global $conn; global $conn;
// check if already complete // check if already complete
$query = 'Select "parentCN" from contractnotice $query = 'Select "parentCN" from contractnotice
where "CNID" = :CNID'; where "CNID" = :CNID';
$query = $conn->prepare($query); $query = $conn->prepare($query);
$query->bindParam(":CNID", $CN); $query->bindParam(":CNID", $CN);
$query->execute(); $query->execute();
$r = $query->fetch(PDO::FETCH_ASSOC); $r = $query->fetch(PDO::FETCH_ASSOC);
if ($r['parentCN'] == NULL) { if ($r['parentCN'] == NULL) {
$site = "https://www.tenders.gov.au/"; $site = "https://www.tenders.gov.au/";
$searchResult = phpQuery::newDocument(getURL("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid)); $searchResult = phpQuery::newDocument(getURL("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid));
//echo "https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid; //echo "https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid;
$url = ""; $url = "";
foreach(pq('a') as $a) { foreach(pq('a') as $a) {
if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) { if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) {
//echo $a->getAttribute("href"); //echo $a->getAttribute("href");
$url = $a->getAttribute("href"); $url = $a->getAttribute("href");
break; break;
} }
} }
$cn = phpQuery::newDocument(getURL($site.$url)); $cn = phpQuery::newDocument(getURL($site.$url));
$datamapping0711 = array( $datamapping0711 = array(
"Agency" => "agencyName", "Agency" => "agencyName",
"Parent CN" => "parentCN", "Parent CN" => "parentCN",
"CN ID" => "CNID", "CN ID" => "CNID",
"Publish Date" => "publishDate", "Publish Date" => "publishDate",
"Amendment Date" => "amendDate", "Amendment Date" => "amendDate",
"Status" => "", "Status" => "",
"StartDate" => "contractStart", "StartDate" => "contractStart",
"EndDate" => "contractEnd", "EndDate" => "contractEnd",
"Contract Value (AUD)" => "value", "Contract Value (AUD)" => "value",
"Description" => "description", "Description" => "description",
"Agency Reference ID" => "agencyID", "Agency Reference ID" => "agencyID",
"Category" => "category", "Category" => "category",
"Procurement Method" => "procurementMethod", "Procurement Method" => "procurementMethod",
"ATM ID" => "atmID", "ATM ID" => "atmID",
"SON ID" => "SONID", "SON ID" => "SONID",
"Confidentiality - Contract" => "confidentialityContract", "Confidentiality - Contract" => "confidentialityContract",
"Confidentiality Reason(s) - Contract" => "confidentialityContractReason", "Confidentiality Reason(s) - Contract" => "confidentialityContractReason",
"Confidentiality - Outputs" => "confidentialityOutputs", "Confidentiality - Outputs" => "confidentialityOutputs",
"Confidentiality Reason(s) - Outputs" => "confidentialityOutputsReason", "Confidentiality Reason(s) - Outputs" => "confidentialityOutputsReason",
"Consultancy" => "consultancy", "Consultancy" => "consultancy",
"Consultancy Reason(s)" => "consultancyReason", "Consultancy Reason(s)" => "consultancyReason",
"Amendment Reason" => "amendmentReason", "Amendment Reason" => "amendmentReason",
"Name" => "supplierName", "Name" => "supplierName",
"Postal Address" => "supplierAddress", "Postal Address" => "supplierAddress",
"Town/City" => "supplierCity", "Town/City" => "supplierCity",
"Postcode" => "supplierPostcode", "Postcode" => "supplierPostcode",
"Country" => "supplierCountry", "Country" => "supplierCountry",
"ABN Exempt" => "supplierABNExempt", "ABN Exempt" => "supplierABNExempt",
"ABN" => "supplierABN", "ABN" => "supplierABN",
"Branch" => "contactBranch", "Branch" => "contactBranch",
"Division" => "contactDivision", "Division" => "contactDivision",
"Office Postcode" => "contactPostcode" "Office Postcode" => "contactPostcode"
); );
$cnFields = Array(); $cnFields = Array();
foreach(pq('tr') as $tr) { foreach(pq('tr') as $tr) {
$tra = dom_to_array($tr); $tra = dom_to_array($tr);
if (is_array($tra['th'])) { if (is_array($tra['th'])) {
$fieldName = trim(getTextFromTHNode($tr)); $fieldName = trim(getTextFromTHNode($tr));
} else { } else {
$fieldName = trim(str_replace("/th>","",$tra['th'])); $fieldName = trim(str_replace("/th>","",$tra['th']));
} }
$fieldValue = trim(print_r($tra['td'],true)); $fieldValue = trim(print_r($tra['td'],true));
if ($fieldName == "State/Territory" || $fieldName == "Contact Name" if ($fieldName == "State/Territory" || $fieldName == "Contact Name"
|| $fieldName == "Contact Phone" || $fieldName == "Contact Email" || $fieldName == "Contact Phone" || $fieldName == "Contact Email"
||$fieldName == "Amendments") { ||$fieldName == "Amendments") {
// do nothing // do nothing
} else if ($fieldName == "Contract Period") { } else if ($fieldName == "Contract Period") {
$contractPeriod = explode("to",$fieldValue); $contractPeriod = explode("to",$fieldValue);
$cnFields["contractStart"] = trim($contractPeriod[0]); $cnFields["contractStart"] = trim($contractPeriod[0]);
$cnFields["contractEnd"] = trim($contractPeriod[1]); $cnFields["contractEnd"] = trim($contractPeriod[1]);
} else { } else {
$fieldName = $datamapping0711[$fieldName]; $fieldName = $datamapping0711[$fieldName];
   
if ($fieldName == "parentCN" || $fieldName == "CNID") { if ($fieldName == "parentCN" || $fieldName == "CNID") {
if (is_array($tra['td'])) { if (is_array($tra['td'])) {
$fieldValue = trim(getTextFromNode($tr)); $fieldValue = trim(getTextFromNode($tr));
} }
$fieldValue = substr($fieldValue, 2); // take off the "CN" prefix $fieldValue = substr($fieldValue, 2); // take off the "CN" prefix
} elseif ($fieldName == "description") { } elseif ($fieldName == "description") {
if (is_array($tra['td'])) $fieldValue = print_r($tra['td']['p'],true); if (is_array($tra['td'])) $fieldValue = print_r($tra['td']['p'],true);
} elseif ($fieldName == "value" || $fieldName == "supplierABN") { } elseif ($fieldName == "value" || $fieldName == "supplierABN") {
if (is_array($tra['td'])) { if (is_array($tra['td'])) {
$fieldValue = trim(getTextFromNode($tr)); $fieldValue = trim(getTextFromNode($tr));
} }
$fieldValue = str_replace(Array("$",","," "), "", $fieldValue); $fieldValue = str_replace(Array("$",","," "), "", $fieldValue);
//if (!is_numeric($fieldValue)) $fieldValue = 0; //if (!is_numeric($fieldValue)) $fieldValue = 0;
if ($fieldValue == "Exempt") $fieldValue = NULL; if ($fieldValue == "Exempt") $fieldValue = NULL;
} elseif ($fieldName == "amendDate" || $fieldName == "publishDate" || $fieldName == "contractStart" || $fieldName == "contractEnd") { } elseif ($fieldName == "amendDate" || $fieldName == "publishDate" || $fieldName == "contractStart" || $fieldName == "contractEnd") {
$fieldValue = date('Y-m-d H:i:s', strtotime($fieldValue)); $fieldValue = date('Y-m-d H:i:s', strtotime($fieldValue));
} elseif (is_array($tra['td'])) { } elseif (is_array($tra['td'])) {
$fieldValue = trim(getTextFromNode($tr)); $fieldValue = trim(getTextFromNode($tr));
} }
echo $fieldName. " = " .$fieldValue."<br>\n"; echo $fieldName. " = " .$fieldValue."<br>\n";
$cnFields[$fieldName] = $fieldValue; $cnFields[$fieldName] = $fieldValue;
} }
} }
if (isset($cnFields[""])) { if (isset($cnFields[""])) {
$cnFields["description"] .= $cnFields[""]; $cnFields["description"] .= $cnFields[""];
unset($cnFields[""]); unset($cnFields[""]);
} }
$cnFields["importFile"] = $url; $cnFields["importFile"] = $url;
$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', array_keys($cnFields)) . '") VALUES ( '; $contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', array_keys($cnFields)) . '") VALUES ( ';
for($key = 0; $key < sizeof($cnFields); $key++) { for($key = 0; $key < sizeof($cnFields); $key++) {
$contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?"; $contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?";
} }
$contractNoticeInsertQ.= ");"; $contractNoticeInsertQ.= ");";
//echo $contractNoticeInsertQ; //echo $contractNoticeInsertQ;
$contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ); $contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ);
$contractNoticeInsertQ->execute(array_values($cnFields)); $contractNoticeInsertQ->execute(array_values($cnFields));
$errors = $conn->errorInfo(); $errors = $conn->errorInfo();
if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) { if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
echo "Dupe {$cnFields['CNID']}<br>"; echo "Dupe {$cnFields['CNID']}<br>";
} }
elseif ($errors[1] == 0) { elseif ($errors[1] == 0) {
echo "Success insert {$cnFields['CNID']} <br>"; echo "Success insert {$cnFields['CNID']} <br>";
} }
else { else {
foreach ($cnFields as $key => $cnf) { foreach ($cnFields as $key => $cnf) {
echo var_dump($key) . $cnf . "<br>"; echo var_dump($key) . $cnf . "<br>";
} }
echo $cnFields['CNID'] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n"; echo $cnFields['CNID'] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n";
} }
$contractNoticeUpdateQ = 'UPDATE contractnotice SET '; $contractNoticeUpdateQ = 'UPDATE contractnotice SET ';
$count = 0; $count = 0;
foreach ($cnFields as $key => $f) { foreach ($cnFields as $key => $f) {
$count++; $count++;
$contractNoticeUpdateQ.= '"'.$key.'"=? '.($count >= sizeof($cnFields) ? "" : ", "); $contractNoticeUpdateQ.= '"'.$key.'"=? '.($count >= sizeof($cnFields) ? "" : ", ");
} }
$contractNoticeUpdateQ.= ' WHERE "CNID"=?;'; $contractNoticeUpdateQ.= ' WHERE "CNID"=?;';
$cnFields[] = $cnFields["CNID"]; $cnFields[] = $cnFields["CNID"];
//echo $contractNoticeUpdateQ; //echo $contractNoticeUpdateQ;
$contractNoticeUpdateQ = $conn->prepare($contractNoticeUpdateQ); $contractNoticeUpdateQ = $conn->prepare($contractNoticeUpdateQ);
$contractNoticeUpdateQ->execute(array_values($cnFields)); $contractNoticeUpdateQ->execute(array_values($cnFields));
$errors = $conn->errorInfo(); $errors = $conn->errorInfo();
if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) { if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
print_r($errors); print_r($errors);
echo "Dupe update {$cnFields['CNID']}<br>"; echo "Dupe update {$cnFields['CNID']}<br>";
} }
elseif ($errors[1] == 0) { elseif ($errors[1] == 0) {
echo "Success update {$cnFields['CNID']} <br>"; echo "Success update {$cnFields['CNID']} <br>";
} }
else { else {
foreach ($cnFields as $key => $cnf) { foreach ($cnFields as $key => $cnf) {
echo var_dump($key) . $cnf . "<br>"; echo var_dump($key) . $cnf . "<br>";
} }
echo $cnFields['CNID'] . " failed CN update.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n"; echo $cnFields['CNID'] . " failed CN update.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n";
} }
} }
} }
function processFile($fpath, $tablename) function processFile($fpath, $tablename)
{ {
global $conn; global $conn;
echo " ============== $fpath ============== <br>"; echo " ============== $fpath ============== <br>";
$handle = fopen($fpath, "r"); $handle = fopen($fpath, "r");
flush(); flush();
$row = 1; $row = 1;
while (($data = fgetcsv($handle, 1000, "\t")) !== false) { while (($data = fgetcsv($handle, 1000, "\t")) !== false) {
if ($row > 3) { if ($row > 3) {
$data[0] = trim($data[0], "="); $data[0] = trim($data[0], "=");
$data[0] = trim($data[0], "\""); $data[0] = trim($data[0], "\"");
if (strpos($data[0], "-A") > 0) { if (strpos($data[0], "-A") > 0) {
echo "Loading {$data[0]} ... <br>\n"; echo "Loading {$data[0]} ... <br>\n";
importCN(str_replace("CN","",$data[0])); importCN(str_replace("CN","",$data[0]));
} }
} }
flush(); flush();
//echo "<hr>\n"; //echo "<hr>\n";
$row++; $row++;
} }
fclose($handle); fclose($handle);
} }
$path = './'; $path = './';
if ($_REQUEST["fname"] == "") { if ($_REQUEST["fname"] == "") {
echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>"; echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>";
$dhandle = opendir($path); $dhandle = opendir($path);
// define an array to hold the files // define an array to hold the files
$files = array(); $files = array();
if ($dhandle) { if ($dhandle) {
// loop through all of the files // loop through all of the files
while (false !== ($fname = readdir($dhandle))) { while (false !== ($fname = readdir($dhandle))) {
if (($fname != '.') && ($fname != '..') && (strpos($fname,".xls")>0)) { if (($fname != '.') && ($fname != '..') && (strpos($fname,".xls")>0)) {
echo "<a href=\"import.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . date("c", filemtime($path . $fname)) . "<br/>"; echo "<a href=\"importdatagov.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . date("c", filemtime($path . $fname)) . "<br/>";
processFile($path . $fname, "contractnotice"); processFile($path . $fname, "contractnotice");
} }
} }
} }
} }
else { else {
$success = 0; $success = 0;
$fname = $_REQUEST["fname"]; $fname = $_REQUEST["fname"];
$success+= processFile($path . $fname, "contractnotice"); $success+= processFile($path . $fname, "contractnotice");
   
} }
} }
?> ?>