<?php |
<?php |
include_once ("../../lib/common.inc.php"); |
include_once ("../../lib/common.inc.php"); |
/* |
/* |
update contractnotice set "supplierABN" = a."supplierABN" |
update contractnotice set "supplierABN" = a."supplierABN" |
from contractnotice as cn inner join (select "supplierABN", |
from contractnotice as cn inner join (select "supplierABN", |
"supplierName" from contractnotice where "supplierABN" |
"supplierName" from contractnotice where "supplierABN" |
IS NOT NULL and "supplierABN" != 0) as a on |
IS NOT NULL and "supplierABN" != 0) as a on |
cn."supplierName" = a."supplierName" where |
cn."supplierName" = a."supplierName" where |
cn."CNID"=contractnotice."CNID" and (contractnotice."supplierABN" |
cn."CNID"=contractnotice."CNID" and (contractnotice."supplierABN" |
IS NULL or contractnotice."supplierABN" = 0) */ |
IS NULL or contractnotice."supplierABN" = 0) */ |
// http://www.lastcraft.com/browser_documentation.php |
// http://www.lastcraft.com/browser_documentation.php |
// http://code.google.com/p/phpquery/ |
// http://code.google.com/p/phpquery/ |
require('phpQuery-onefile.php'); |
require('phpQuery-onefile.php'); |
function getURL($url) { |
function getURL($url) { |
//return file_get_contents($url); |
//return file_get_contents($url); |
$ch = curl_init($url); |
$ch = curl_init($url); |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); |
curl_setopt($ch, CURLOPT_HEADER, 0); |
curl_setopt($ch, CURLOPT_HEADER, 0); |
curl_setopt($ch, CURLOPT_TIMEOUT, 45); |
curl_setopt($ch, CURLOPT_TIMEOUT, 45); |
|
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0); |
|
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0); |
$page = curl_exec($ch); |
$page = curl_exec($ch); |
if (curl_errno($ch)) { |
if (curl_errno($ch)) { |
echo "<font color=red> Database temporarily unavailable: "; |
echo "<font color=red> Database temporarily unavailable: "; |
echo curl_errno($ch) . " " . curl_error($ch); |
echo curl_errno($ch) . " " . curl_error($ch); |
|
|
echo $url; |
echo $url; |
echo "</font><br>"; |
echo "</font><br>"; |
} |
} |
curl_close($ch); |
curl_close($ch); |
return $page; |
return $page; |
} |
} |
|
|
|
|
function getTextFromTHNode($Node, $Text = "") { |
function getTextFromTHNode($Node, $Text = "") { |
if ($Node->tagName == null) |
if ($Node->tagName == null) |
return $Text.$Node->textContent; |
return $Text.$Node->textContent; |
|
|
if ($Node->tagName != "td") { |
if ($Node->tagName != "td") { |
$Node = $Node->firstChild; |
$Node = $Node->firstChild; |
if ($Node != null) |
if ($Node != null) |
$Text = getTextFromTHNode($Node, $Text); |
$Text = getTextFromTHNode($Node, $Text); |
|
|
while($Node->nextSibling != null) { |
while($Node->nextSibling != null) { |
$Text = getTextFromTHNode($Node->nextSibling, $Text); |
$Text = getTextFromTHNode($Node->nextSibling, $Text); |
$Node = $Node->nextSibling; |
$Node = $Node->nextSibling; |
} |
} |
} |
} |
return $Text; |
return $Text; |
} |
} |
|
|
function getTextFromNode($Node, $Text = "") { |
function getTextFromNode($Node, $Text = "") { |
if ($Node->tagName == null) |
if ($Node->tagName == null) |
return $Text.$Node->textContent; |
return $Text.$Node->textContent; |
if ($Node->tagName != "th" && $Node->tagName != "span") { |
if ($Node->tagName != "th" && $Node->tagName != "span") { |
$Node = $Node->firstChild; |
$Node = $Node->firstChild; |
if ($Node != null) |
if ($Node != null) |
$Text = getTextFromNode($Node, $Text); |
$Text = getTextFromNode($Node, $Text); |
|
|
while($Node->nextSibling != null) { |
while($Node->nextSibling != null) { |
$Text = getTextFromNode($Node->nextSibling, $Text); |
$Text = getTextFromNode($Node->nextSibling, $Text); |
$Node = $Node->nextSibling; |
$Node = $Node->nextSibling; |
} |
} |
} |
} |
return $Text; |
return $Text; |
} |
} |
function dom_to_array($root) |
function dom_to_array($root) |
{ |
{ |
$result = array(); |
$result = array(); |
|
|
if ($root->hasAttributes()) |
if ($root->hasAttributes()) |
{ |
{ |
$attrs = $root->attributes; |
$attrs = $root->attributes; |
|
|
foreach ($attrs as $i => $attr) |
foreach ($attrs as $i => $attr) |
$result[$attr->name] = $attr->value; |
$result[$attr->name] = $attr->value; |
} |
} |
|
|
$children = $root->childNodes; |
$children = $root->childNodes; |
if ($root->childNodes) { |
if ($root->childNodes) { |
if ($children->length == 1) |
if ($children->length == 1) |
{ |
{ |
$child = $children->item(0); |
$child = $children->item(0); |
|
|
if ($child->nodeType == XML_TEXT_NODE) |
if ($child->nodeType == XML_TEXT_NODE) |
{ |
{ |
$result['_value'] = $child->nodeValue; |
$result['_value'] = $child->nodeValue; |
|
|
if (count($result) == 1) |
if (count($result) == 1) |
return $result['_value']; |
return $result['_value']; |
else |
else |
return $result; |
return $result; |
} |
} |
} |
} |
|
|
$group = array(); |
$group = array(); |
|
|
for($i = 0; $i < $children->length; $i++) |
for($i = 0; $i < $children->length; $i++) |
{ |
{ |
$child = $children->item($i); |
$child = $children->item($i); |
|
|
if (!isset($result[$child->nodeName])) |
if (!isset($result[$child->nodeName])) |
$result[$child->nodeName] = dom_to_array($child); |
$result[$child->nodeName] = dom_to_array($child); |
else |
else |
{ |
{ |
if (!isset($group[$child->nodeName])) |
if (!isset($group[$child->nodeName])) |
{ |
{ |
$tmp = $result[$child->nodeName]; |
$tmp = $result[$child->nodeName]; |
$result[$child->nodeName] = array($tmp); |
$result[$child->nodeName] = array($tmp); |
$group[$child->nodeName] = 1; |
$group[$child->nodeName] = 1; |
} |
} |
|
|
$result[$child->nodeName][] = dom_to_array($child); |
$result[$child->nodeName][] = dom_to_array($child); |
} |
} |
} |
} |
} |
} |
|
|
return $result; |
return $result; |
} |
} |
|
|
function importCN($cnid) { |
function importCN($cnid) { |
global $conn; |
global $conn; |
$CN = str_replace("-A", "00",$cnid); |
$CN = str_replace("-A", "00",$cnid); |
// check if already complete |
// check if already complete |
$query = 'Select "parentCN" from contractnotice |
$query = 'Select "parentCN" from contractnotice |
where "CNID" = :CNID'; |
where "CNID" = :CNID'; |
$query = $conn->prepare($query); |
$query = $conn->prepare($query); |
$query->bindParam(":CNID", $CN); |
$query->bindParam(":CNID", $CN); |
$query->execute(); |
$query->execute(); |
$r = $query->fetch(PDO::FETCH_ASSOC); |
$r = $query->fetch(PDO::FETCH_ASSOC); |
if ($r['parentCN'] == NULL) { |
if ($r['parentCN'] == NULL) { |
$site = "https://www.tenders.gov.au/"; |
$site = "https://www.tenders.gov.au/"; |
$searchResult = phpQuery::newDocument(getURL("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid)); |
$searchResult = phpQuery::newDocument(getURL("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid)); |
//echo "https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid; |
//echo "https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid; |
$url = ""; |
$url = ""; |
foreach(pq('a') as $a) { |
foreach(pq('a') as $a) { |
if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) { |
if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) { |
//echo $a->getAttribute("href"); |
//echo $a->getAttribute("href"); |
$url = $a->getAttribute("href"); |
$url = $a->getAttribute("href"); |
break; |
break; |
} |
} |
} |
} |
$cn = phpQuery::newDocument(getURL($site.$url)); |
$cn = phpQuery::newDocument(getURL($site.$url)); |
$datamapping0711 = array( |
$datamapping0711 = array( |
"Agency" => "agencyName", |
"Agency" => "agencyName", |
"Parent CN" => "parentCN", |
"Parent CN" => "parentCN", |
"CN ID" => "CNID", |
"CN ID" => "CNID", |
"Publish Date" => "publishDate", |
"Publish Date" => "publishDate", |
"Amendment Date" => "amendDate", |
"Amendment Date" => "amendDate", |
"Status" => "", |
"Status" => "", |
"StartDate" => "contractStart", |
"StartDate" => "contractStart", |
"EndDate" => "contractEnd", |
"EndDate" => "contractEnd", |
"Contract Value (AUD)" => "value", |
"Contract Value (AUD)" => "value", |
"Description" => "description", |
"Description" => "description", |
"Agency Reference ID" => "agencyID", |
"Agency Reference ID" => "agencyID", |
"Category" => "category", |
"Category" => "category", |
"Procurement Method" => "procurementMethod", |
"Procurement Method" => "procurementMethod", |
"ATM ID" => "atmID", |
"ATM ID" => "atmID", |
"SON ID" => "SONID", |
"SON ID" => "SONID", |
"Confidentiality - Contract" => "confidentialityContract", |
"Confidentiality - Contract" => "confidentialityContract", |
"Confidentiality Reason(s) - Contract" => "confidentialityContractReason", |
"Confidentiality Reason(s) - Contract" => "confidentialityContractReason", |
"Confidentiality - Outputs" => "confidentialityOutputs", |
"Confidentiality - Outputs" => "confidentialityOutputs", |
"Confidentiality Reason(s) - Outputs" => "confidentialityOutputsReason", |
"Confidentiality Reason(s) - Outputs" => "confidentialityOutputsReason", |
"Consultancy" => "consultancy", |
"Consultancy" => "consultancy", |
"Consultancy Reason(s)" => "consultancyReason", |
"Consultancy Reason(s)" => "consultancyReason", |
"Amendment Reason" => "amendmentReason", |
"Amendment Reason" => "amendmentReason", |
"Name" => "supplierName", |
"Name" => "supplierName", |
"Postal Address" => "supplierAddress", |
"Postal Address" => "supplierAddress", |
"Town/City" => "supplierCity", |
"Town/City" => "supplierCity", |
"Postcode" => "supplierPostcode", |
"Postcode" => "supplierPostcode", |
"Country" => "supplierCountry", |
"Country" => "supplierCountry", |
"ABN Exempt" => "supplierABNExempt", |
"ABN Exempt" => "supplierABNExempt", |
"ABN" => "supplierABN", |
"ABN" => "supplierABN", |
"Branch" => "contactBranch", |
"Branch" => "contactBranch", |
"Division" => "contactDivision", |
"Division" => "contactDivision", |
"Office Postcode" => "contactPostcode" |
"Office Postcode" => "contactPostcode" |
); |
); |
$cnFields = Array(); |
$cnFields = Array(); |
foreach(pq('tr') as $tr) { |
foreach(pq('tr') as $tr) { |
$tra = dom_to_array($tr); |
$tra = dom_to_array($tr); |
if (is_array($tra['th'])) { |
if (is_array($tra['th'])) { |
$fieldName = trim(getTextFromTHNode($tr)); |
$fieldName = trim(getTextFromTHNode($tr)); |
} else { |
} else { |
$fieldName = trim(str_replace("/th>","",$tra['th'])); |
$fieldName = trim(str_replace("/th>","",$tra['th'])); |
} |
} |
$fieldValue = trim(print_r($tra['td'],true)); |
$fieldValue = trim(print_r($tra['td'],true)); |
if ($fieldName == "State/Territory" || $fieldName == "Contact Name" |
if ($fieldName == "State/Territory" || $fieldName == "Contact Name" |
|| $fieldName == "Contact Phone" || $fieldName == "Contact Email" |
|| $fieldName == "Contact Phone" || $fieldName == "Contact Email" |
||$fieldName == "Amendments") { |
||$fieldName == "Amendments") { |
// do nothing |
// do nothing |
} else if ($fieldName == "Contract Period") { |
} else if ($fieldName == "Contract Period") { |
$contractPeriod = explode("to",$fieldValue); |
$contractPeriod = explode("to",$fieldValue); |
$cnFields["contractStart"] = trim($contractPeriod[0]); |
$cnFields["contractStart"] = trim($contractPeriod[0]); |
$cnFields["contractEnd"] = trim($contractPeriod[1]); |
$cnFields["contractEnd"] = trim($contractPeriod[1]); |
} else { |
} else { |
$fieldName = $datamapping0711[$fieldName]; |
$fieldName = $datamapping0711[$fieldName]; |
|
|
if ($fieldName == "parentCN" || $fieldName == "CNID") { |
if ($fieldName == "parentCN" || $fieldName == "CNID") { |
if (is_array($tra['td'])) { |
if (is_array($tra['td'])) { |
$fieldValue = trim(getTextFromNode($tr)); |
$fieldValue = trim(getTextFromNode($tr)); |
} |
} |
$fieldValue = substr($fieldValue, 2); // take off the "CN" prefix |
$fieldValue = substr($fieldValue, 2); // take off the "CN" prefix |
$fieldValue = str_replace("-A", "00", $fieldValue); // make amendments really big numbers |
$fieldValue = str_replace("-A", "00", $fieldValue); // make amendments really big numbers |
} elseif ($fieldName == "description") { |
} elseif ($fieldName == "description") { |
|
|
if (is_array($tra['td'])) $fieldValue = print_r($tra['td']['p'],true); |
if (is_array($tra['td'])) $fieldValue = print_r($tra['td']['p'],true); |
|
|
} elseif ($fieldName == "value" || $fieldName == "supplierABN") { |
} elseif ($fieldName == "value" || $fieldName == "supplierABN") { |
if (is_array($tra['td'])) { |
if (is_array($tra['td'])) { |
$fieldValue = trim(getTextFromNode($tr)); |
$fieldValue = trim(getTextFromNode($tr)); |
} |
} |
$fieldValue = str_replace(Array("$",","," "), "", $fieldValue); |
$fieldValue = str_replace(Array("$",","," "), "", $fieldValue); |
//if (!is_numeric($fieldValue)) $fieldValue = 0; |
//if (!is_numeric($fieldValue)) $fieldValue = 0; |
if ($fieldValue == "Exempt") $fieldValue = NULL; |
if ($fieldValue == "Exempt") $fieldValue = NULL; |
} elseif ($fieldName == "amendDate" || $fieldName == "publishDate" || $fieldName == "contractStart" || $fieldName == "contractEnd") { |
} elseif ($fieldName == "amendDate" || $fieldName == "publishDate" || $fieldName == "contractStart" || $fieldName == "contractEnd") { |
$fieldValue = date('Y-m-d H:i:s', strtotime($fieldValue)); |
$fieldValue = date('Y-m-d H:i:s', strtotime($fieldValue)); |
} elseif (is_array($tra['td'])) { |
} elseif (is_array($tra['td'])) { |
$fieldValue = trim(getTextFromNode($tr)); |
$fieldValue = trim(getTextFromNode($tr)); |
} |
} |
echo $fieldName. " = " .$fieldValue."<br>\n"; |
echo $fieldName. " = " .$fieldValue."<br>\n"; |
$cnFields[$fieldName] = $fieldValue; |
$cnFields[$fieldName] = $fieldValue; |
} |
} |
} |
} |
|
if (isset($cnFields[""])) { |
|
$cnFields["description"] .= $cnFields[""]; |
|
unset($cnFields[""]); |
|
} |
$cnFields["importFile"] = $url; |
$cnFields["importFile"] = $url; |
$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', array_keys($cnFields)) . '") VALUES ( '; |
$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', array_keys($cnFields)) . '") VALUES ( '; |
for($key = 0; $key < sizeof($cnFields); $key++) { |
for($key = 0; $key < sizeof($cnFields); $key++) { |
$contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?"; |
$contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?"; |
} |
} |
$contractNoticeInsertQ.= ");"; |
$contractNoticeInsertQ.= ");"; |
//echo $contractNoticeInsertQ; |
//echo $contractNoticeInsertQ; |
$contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ); |
$contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ); |
$contractNoticeInsertQ->execute(array_values($cnFields)); |
$contractNoticeInsertQ->execute(array_values($cnFields)); |
$errors = $conn->errorInfo(); |
$errors = $conn->errorInfo(); |
if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) { |
if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) { |
echo "Dupe {$cnFields['CNID']}<br>"; |
echo "Dupe {$cnFields['CNID']}<br>"; |
} |
} |
elseif ($errors[1] == 0) { |
elseif ($errors[1] == 0) { |
echo "Success insert {$cnFields['CNID']} <br>"; |
echo "Success insert {$cnFields['CNID']} <br>"; |
} |
} |
else { |
else { |
foreach ($cnFields as $key => $cnf) { |
foreach ($cnFields as $key => $cnf) { |
echo var_dump($key) . $cnf . "<br>"; |
echo var_dump($key) . $cnf . "<br>"; |
} |
} |
echo $cnFields['CNID'] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n"; |
echo $cnFields['CNID'] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n"; |
} |
} |
$contractNoticeUpdateQ = 'UPDATE contractnotice SET '; |
$contractNoticeUpdateQ = 'UPDATE contractnotice SET '; |
$count = 0; |
$count = 0; |
foreach ($cnFields as $key => $f) { |
foreach ($cnFields as $key => $f) { |
|
|
$count++; |
$count++; |
$contractNoticeUpdateQ.= '"'.$key.'"=? '.($count >= sizeof($cnFields) ? "" : ", "); |
$contractNoticeUpdateQ.= '"'.$key.'"=? '.($count >= sizeof($cnFields) ? "" : ", "); |
} |
} |
$contractNoticeUpdateQ.= ' WHERE "CNID"=?;'; |
$contractNoticeUpdateQ.= ' WHERE "CNID"=?;'; |
$cnFields[] = $cnFields["CNID"]; |
$cnFields[] = $cnFields["CNID"]; |
//echo $contractNoticeUpdateQ; |
//echo $contractNoticeUpdateQ; |
$contractNoticeUpdateQ = $conn->prepare($contractNoticeUpdateQ); |
$contractNoticeUpdateQ = $conn->prepare($contractNoticeUpdateQ); |
$contractNoticeUpdateQ->execute(array_values($cnFields)); |
$contractNoticeUpdateQ->execute(array_values($cnFields)); |
$errors = $conn->errorInfo(); |
$errors = $conn->errorInfo(); |
if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) { |
if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) { |
print_r($errors); |
print_r($errors); |
echo "Dupe update {$cnFields['CNID']}<br>"; |
echo "Dupe update {$cnFields['CNID']}<br>"; |
} |
} |
elseif ($errors[1] == 0) { |
elseif ($errors[1] == 0) { |
echo "Success update {$cnFields['CNID']} <br>"; |
echo "Success update {$cnFields['CNID']} <br>"; |
} |
} |
else { |
else { |
foreach ($cnFields as $key => $cnf) { |
foreach ($cnFields as $key => $cnf) { |
echo var_dump($key) . $cnf . "<br>"; |
echo var_dump($key) . $cnf . "<br>"; |
} |
} |
echo $cnFields['CNID'] . " failed CN update.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n"; |
echo $cnFields['CNID'] . " failed CN update.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n"; |
} |
} |
} |
} |
} |
} |
function processFile($fpath, $tablename) |
function processFile($fpath, $tablename) |
{ |
{ |
global $conn; |
global $conn; |
echo " ============== $fpath ============== <br>"; |
echo " ============== $fpath ============== <br>"; |
|
|
$handle = fopen($fpath, "r"); |
$handle = fopen($fpath, "r"); |
flush(); |
flush(); |
$row = 1; |
$row = 1; |
|
|
while (($data = fgetcsv($handle, 1000, "\t")) !== false) { |
while (($data = fgetcsv($handle, 1000, "\t")) !== false) { |
if ($row > 3) { |
if ($row > 3) { |
$data[0] = trim($data[0], "="); |
$data[0] = trim($data[0], "="); |
$data[0] = trim($data[0], "\""); |
$data[0] = trim($data[0], "\""); |
if (strpos($data[0], "-A") > 0) { |
if (strpos($data[0], "-A") > 0) { |
echo "Loading {$data[0]} ... <br>\n"; |
echo "Loading {$data[0]} ... <br>\n"; |
importCN(str_replace("CN","",$data[0])); |
importCN(str_replace("CN","",$data[0])); |
} |
} |
} |
} |
flush(); |
flush(); |
//echo "<hr>\n"; |
//echo "<hr>\n"; |
|
|
|
|
$row++; |
$row++; |
} |
} |
fclose($handle); |
fclose($handle); |
} |
} |
$path = './'; |
$path = './'; |
if ($_REQUEST["fname"] == "") { |
if ($_REQUEST["fname"] == "") { |
echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>"; |
echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>"; |
$dhandle = opendir($path); |
$dhandle = opendir($path); |
// define an array to hold the files |
// define an array to hold the files |
$files = array(); |
$files = array(); |
if ($dhandle) { |
if ($dhandle) { |
// loop through all of the files |
// loop through all of the files |
while (false !== ($fname = readdir($dhandle))) { |
while (false !== ($fname = readdir($dhandle))) { |
if (($fname != '.') && ($fname != '..')) { |
if (($fname != '.') && ($fname != '..') && (strpos($fname,".xls")>0)) { |
echo "<a href=\"import.php?fname=$fname\">$fname</a> " . filesize($path . $fname) . " " . date("c", filemtime($path . $fname)) . "<br/>"; |
echo "<a href=\"import.php?fname=$fname\">$fname</a> " . filesize($path . $fname) . " " . date("c", filemtime($path . $fname)) . "<br/>"; |
processFile($path . $fname, "contractnotice"); |
processFile($path . $fname, "contractnotice"); |
} |
} |
} |
} |
} |
} |
} |
} |
else { |
else { |
$success = 0; |
$success = 0; |
$fname = $_REQUEST["fname"]; |
$fname = $_REQUEST["fname"]; |
|
|
$success+= processFile($path . $fname, "contractnotice"); |
$success+= processFile($path . $fname, "contractnotice"); |
|
|
} |
} |
|
|
?> |
?> |
|
|