<?php |
<?php |
include_once ("../../lib/common.inc.php"); |
include_once ("../../lib/common.inc.php"); |
|
/* |
|
update contractnotice set "supplierABN" = a."supplierABN" |
|
from contractnotice as cn inner join (select "supplierABN", |
|
"supplierName" from contractnotice where "supplierABN" |
|
IS NOT NULL and "supplierABN" != 0) as a on |
|
cn."supplierName" = a."supplierName" where |
|
cn."CNID"=contractnotice."CNID" and (contractnotice."supplierABN" |
|
IS NULL or contractnotice."supplierABN" = 0) */ |
// http://www.lastcraft.com/browser_documentation.php |
// http://www.lastcraft.com/browser_documentation.php |
// http://code.google.com/p/phpquery/ |
// http://code.google.com/p/phpquery/ |
require('phpQuery-onefile.php'); |
require('phpQuery-onefile.php'); |
function getURL($url) { |
function getURL($url) { |
//return file_get_contents($url); |
//return file_get_contents($url); |
$ch = curl_init($url); |
$ch = curl_init($url); |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); |
curl_setopt($ch, CURLOPT_HEADER, 0); |
curl_setopt($ch, CURLOPT_HEADER, 0); |
curl_setopt($ch, CURLOPT_TIMEOUT, 45); |
curl_setopt($ch, CURLOPT_TIMEOUT, 45); |
$page = curl_exec($ch); |
$page = curl_exec($ch); |
if (curl_errno($ch)) { |
if (curl_errno($ch)) { |
echo "<font color=red> Database temporarily unavailable: "; |
echo "<font color=red> Database temporarily unavailable: "; |
echo curl_errno($ch) . " " . curl_error($ch); |
echo curl_errno($ch) . " " . curl_error($ch); |
|
|
echo $url; |
echo $url; |
echo "</font><br>"; |
echo "</font><br>"; |
} |
} |
curl_close($ch); |
curl_close($ch); |
return $page; |
return $page; |
} |
} |
|
|
|
|
function getTextFromTHNode($Node, $Text = "") { |
function getTextFromTHNode($Node, $Text = "") { |
if ($Node->tagName == null) |
if ($Node->tagName == null) |
return $Text.$Node->textContent; |
return $Text.$Node->textContent; |
|
|
if ($Node->tagName != "td") { |
if ($Node->tagName != "td") { |
$Node = $Node->firstChild; |
$Node = $Node->firstChild; |
if ($Node != null) |
if ($Node != null) |
$Text = getTextFromTHNode($Node, $Text); |
$Text = getTextFromTHNode($Node, $Text); |
|
|
while($Node->nextSibling != null) { |
while($Node->nextSibling != null) { |
$Text = getTextFromTHNode($Node->nextSibling, $Text); |
$Text = getTextFromTHNode($Node->nextSibling, $Text); |
$Node = $Node->nextSibling; |
$Node = $Node->nextSibling; |
} |
} |
} |
} |
return $Text; |
return $Text; |
} |
} |
|
|
function getTextFromNode($Node, $Text = "") { |
function getTextFromNode($Node, $Text = "") { |
if ($Node->tagName == null) |
if ($Node->tagName == null) |
return $Text.$Node->textContent; |
return $Text.$Node->textContent; |
if ($Node->tagName != "th" && $Node->tagName != "span") { |
if ($Node->tagName != "th" && $Node->tagName != "span") { |
$Node = $Node->firstChild; |
$Node = $Node->firstChild; |
if ($Node != null) |
if ($Node != null) |
$Text = getTextFromNode($Node, $Text); |
$Text = getTextFromNode($Node, $Text); |
|
|
while($Node->nextSibling != null) { |
while($Node->nextSibling != null) { |
$Text = getTextFromNode($Node->nextSibling, $Text); |
$Text = getTextFromNode($Node->nextSibling, $Text); |
$Node = $Node->nextSibling; |
$Node = $Node->nextSibling; |
} |
} |
} |
} |
return $Text; |
return $Text; |
} |
} |
function dom_to_array($root) |
function dom_to_array($root) |
{ |
{ |
$result = array(); |
$result = array(); |
|
|
if ($root->hasAttributes()) |
if ($root->hasAttributes()) |
{ |
{ |
$attrs = $root->attributes; |
$attrs = $root->attributes; |
|
|
foreach ($attrs as $i => $attr) |
foreach ($attrs as $i => $attr) |
$result[$attr->name] = $attr->value; |
$result[$attr->name] = $attr->value; |
} |
} |
|
|
$children = $root->childNodes; |
$children = $root->childNodes; |
if ($root->childNodes) { |
if ($root->childNodes) { |
if ($children->length == 1) |
if ($children->length == 1) |
{ |
{ |
$child = $children->item(0); |
$child = $children->item(0); |
|
|
if ($child->nodeType == XML_TEXT_NODE) |
if ($child->nodeType == XML_TEXT_NODE) |
{ |
{ |
$result['_value'] = $child->nodeValue; |
$result['_value'] = $child->nodeValue; |
|
|
if (count($result) == 1) |
if (count($result) == 1) |
return $result['_value']; |
return $result['_value']; |
else |
else |
return $result; |
return $result; |
} |
} |
} |
} |
|
|
$group = array(); |
$group = array(); |
|
|
for($i = 0; $i < $children->length; $i++) |
for($i = 0; $i < $children->length; $i++) |
{ |
{ |
$child = $children->item($i); |
$child = $children->item($i); |
|
|
if (!isset($result[$child->nodeName])) |
if (!isset($result[$child->nodeName])) |
$result[$child->nodeName] = dom_to_array($child); |
$result[$child->nodeName] = dom_to_array($child); |
else |
else |
{ |
{ |
if (!isset($group[$child->nodeName])) |
if (!isset($group[$child->nodeName])) |
{ |
{ |
$tmp = $result[$child->nodeName]; |
$tmp = $result[$child->nodeName]; |
$result[$child->nodeName] = array($tmp); |
$result[$child->nodeName] = array($tmp); |
$group[$child->nodeName] = 1; |
$group[$child->nodeName] = 1; |
} |
} |
|
|
$result[$child->nodeName][] = dom_to_array($child); |
$result[$child->nodeName][] = dom_to_array($child); |
} |
} |
} |
} |
} |
} |
|
|
return $result; |
return $result; |
} |
} |
|
|
function importCN($cnid) { |
function importCN($cnid) { |
global $conn; |
global $conn; |
$CN = str_replace("-A", "00",$cnid); |
$CN = str_replace("-A", "00",$cnid); |
// check if already complete |
// check if already complete |
$query = 'Select "parentCN" from contractnotice |
$query = 'Select "parentCN" from contractnotice |
where "CNID" = :CNID'; |
where "CNID" = :CNID'; |
$query = $conn->prepare($query); |
$query = $conn->prepare($query); |
$query->bindParam(":CNID", $CN); |
$query->bindParam(":CNID", $CN); |
$query->execute(); |
$query->execute(); |
$r = $query->fetch(PDO::FETCH_ASSOC); |
$r = $query->fetch(PDO::FETCH_ASSOC); |
if ($r['parentCN'] == NULL) { |
if ($r['parentCN'] == NULL) { |
$site = "https://www.tenders.gov.au/"; |
$site = "https://www.tenders.gov.au/"; |
$searchResult = phpQuery::newDocument(getURL("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid)); |
$searchResult = phpQuery::newDocument(getURL("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid)); |
//echo "https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid; |
//echo "https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid; |
$url = ""; |
$url = ""; |
foreach(pq('a') as $a) { |
foreach(pq('a') as $a) { |
if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) { |
if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) { |
//echo $a->getAttribute("href"); |
//echo $a->getAttribute("href"); |
$url = $a->getAttribute("href"); |
$url = $a->getAttribute("href"); |
break; |
break; |
} |
} |
} |
} |
$cn = phpQuery::newDocument(getURL($site.$url)); |
$cn = phpQuery::newDocument(getURL($site.$url)); |
$datamapping0711 = array( |
$datamapping0711 = array( |
"Agency" => "agencyName", |
"Agency" => "agencyName", |
"Parent CN" => "parentCN", |
"Parent CN" => "parentCN", |
"CN ID" => "CNID", |
"CN ID" => "CNID", |
"Publish Date" => "publishDate", |
"Publish Date" => "publishDate", |
"Amendment Date" => "amendDate", |
"Amendment Date" => "amendDate", |
"Status" => "", |
"Status" => "", |
"StartDate" => "contractStart", |
"StartDate" => "contractStart", |
"EndDate" => "contractEnd", |
"EndDate" => "contractEnd", |
"Contract Value (AUD)" => "value", |
"Contract Value (AUD)" => "value", |
"Description" => "description", |
"Description" => "description", |
"Agency Reference ID" => "agencyID", |
"Agency Reference ID" => "agencyID", |
"Category" => "category", |
"Category" => "category", |
"Procurement Method" => "procurementMethod", |
"Procurement Method" => "procurementMethod", |
"ATM ID" => "atmID", |
"ATM ID" => "atmID", |
"SON ID" => "SONID", |
"SON ID" => "SONID", |
"Confidentiality - Contract" => "confidentialityContract", |
"Confidentiality - Contract" => "confidentialityContract", |
"Confidentiality Reason(s) - Contract" => "confidentialityContractReason", |
"Confidentiality Reason(s) - Contract" => "confidentialityContractReason", |
"Confidentiality - Outputs" => "confidentialityOutputs", |
"Confidentiality - Outputs" => "confidentialityOutputs", |
"Confidentiality Reason(s) - Outputs" => "confidentialityOutputsReason", |
"Confidentiality Reason(s) - Outputs" => "confidentialityOutputsReason", |
"Consultancy" => "consultancy", |
"Consultancy" => "consultancy", |
"Consultancy Reason(s)" => "consultancyReason", |
"Consultancy Reason(s)" => "consultancyReason", |
"Amendment Reason" => "amendmentReason", |
"Amendment Reason" => "amendmentReason", |
"Name" => "supplierName", |
"Name" => "supplierName", |
"Postal Address" => "supplierAddress", |
"Postal Address" => "supplierAddress", |
"Town/City" => "supplierCity", |
"Town/City" => "supplierCity", |
"Postcode" => "supplierPostcode", |
"Postcode" => "supplierPostcode", |
"Country" => "supplierCountry", |
"Country" => "supplierCountry", |
"ABN Exempt" => "supplierABNExempt", |
"ABN Exempt" => "supplierABNExempt", |
"ABN" => "supplierABN", |
"ABN" => "supplierABN", |
"Branch" => "contactBranch", |
"Branch" => "contactBranch", |
"Division" => "contactDivision", |
"Division" => "contactDivision", |
"Office Postcode" => "contactPostcode" |
"Office Postcode" => "contactPostcode" |
); |
); |
$cnFields = Array(); |
$cnFields = Array(); |
foreach(pq('tr') as $tr) { |
foreach(pq('tr') as $tr) { |
$tra = dom_to_array($tr); |
$tra = dom_to_array($tr); |
if (is_array($tra['th'])) { |
if (is_array($tra['th'])) { |
$fieldName = trim(getTextFromTHNode($tr)); |
$fieldName = trim(getTextFromTHNode($tr)); |
} else { |
} else { |
$fieldName = trim(str_replace("/th>","",$tra['th'])); |
$fieldName = trim(str_replace("/th>","",$tra['th'])); |
} |
} |
$fieldValue = trim(print_r($tra['td'],true)); |
$fieldValue = trim(print_r($tra['td'],true)); |
if ($fieldName == "State/Territory" || $fieldName == "Contact Name" |
if ($fieldName == "State/Territory" || $fieldName == "Contact Name" |
|| $fieldName == "Contact Phone" || $fieldName == "Contact Email" |
|| $fieldName == "Contact Phone" || $fieldName == "Contact Email" |