|
<?php |
|
include_once ("../../lib/common.inc.php"); |
|
|
|
// http://www.lastcraft.com/browser_documentation.php |
|
// http://code.google.com/p/phpquery/ |
|
require('phpQuery-onefile.php'); |
|
function getURL($url) { |
|
//return file_get_contents($url); |
|
$ch = curl_init($url); |
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); |
|
curl_setopt($ch, CURLOPT_HEADER, 0); |
|
curl_setopt($ch, CURLOPT_TIMEOUT, 45); |
|
$page = curl_exec($ch); |
|
if (curl_errno($ch)) { |
|
echo "<font color=red> Database temporarily unavailable: "; |
|
echo curl_errno($ch) . " " . curl_error($ch); |
|
|
|
echo $url; |
|
echo "</font><br>"; |
|
} |
|
curl_close($ch); |
|
return $page; |
|
} |
|
|
|
|
|
function getTextFromTHNode($Node, $Text = "") { |
|
if ($Node->tagName == null) |
|
return $Text.$Node->textContent; |
|
|
|
if ($Node->tagName != "td") { |
|
$Node = $Node->firstChild; |
|
if ($Node != null) |
|
$Text = getTextFromTHNode($Node, $Text); |
|
|
|
while($Node->nextSibling != null) { |
|
$Text = getTextFromTHNode($Node->nextSibling, $Text); |
|
$Node = $Node->nextSibling; |
|
} |
|
} |
|
return $Text; |
|
} |
|
|
|
function getTextFromNode($Node, $Text = "") { |
|
if ($Node->tagName == null) |
|
return $Text.$Node->textContent; |
|
if ($Node->tagName != "th" && $Node->tagName != "span") { |
|
$Node = $Node->firstChild; |
|
if ($Node != null) |
|
$Text = getTextFromNode($Node, $Text); |
|
|
|
while($Node->nextSibling != null) { |
|
$Text = getTextFromNode($Node->nextSibling, $Text); |
|
$Node = $Node->nextSibling; |
|
} |
|
} |
|
return $Text; |
|
} |
|
function dom_to_array($root) |
|
{ |
|
$result = array(); |
|
|
|
if ($root->hasAttributes()) |
|
{ |
|
$attrs = $root->attributes; |
|
|
|
foreach ($attrs as $i => $attr) |
|
$result[$attr->name] = $attr->value; |
|
} |
|
|
|
$children = $root->childNodes; |
|
if ($root->childNodes) { |
|
if ($children->length == 1) |
|
{ |
|
$child = $children->item(0); |
|
|
|
if ($child->nodeType == XML_TEXT_NODE) |
|
{ |
|
$result['_value'] = $child->nodeValue; |
|
|
|
if (count($result) == 1) |
|
return $result['_value']; |
|
else |
|
return $result; |
|
} |
|
} |
|
|
|
$group = array(); |
|
|
|
for($i = 0; $i < $children->length; $i++) |
|
{ |
|
$child = $children->item($i); |
|
|
|
if (!isset($result[$child->nodeName])) |
|
$result[$child->nodeName] = dom_to_array($child); |
|
else |
|
{ |
|
if (!isset($group[$child->nodeName])) |
|
{ |
|
$tmp = $result[$child->nodeName]; |
|
$result[$child->nodeName] = array($tmp); |
|
$group[$child->nodeName] = 1; |
|
} |
|
|
|
$result[$child->nodeName][] = dom_to_array($child); |
|
} |
|
} |
|
} |
|
|
|
return $result; |
|
} |
|
|
|
function importCN($cnid) { |
|
global $conn; |
|
$CN = str_replace("-A", "00",$cnid); |
|
// check if already complete |
|
$query = 'Select "parentCN" from contractnotice |
|
where "CNID" = :CNID'; |
|
$query = $conn->prepare($query); |
|
$query->bindParam(":CNID", $CN); |
|
$query->execute(); |
|
$r = $query->fetch(PDO::FETCH_ASSOC); |
|
if ($r['parentCN'] == NULL) { |
|
$site = "https://www.tenders.gov.au/"; |
|
$searchResult = phpQuery::newDocument(getURL("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid)); |
|
//echo "https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid; |
|
$url = ""; |
|
foreach(pq('a') as $a) { |
|
if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) { |
|
//echo $a->getAttribute("href"); |
|
$url = $a->getAttribute("href"); |
|
break; |
|
} |
|
} |
|
$cn = phpQuery::newDocument(getURL($site.$url)); |
|
$datamapping0711 = array( |
|
"Agency" => "agencyName", |
|
"Parent CN" => "parentCN", |
|
"CN ID" => "CNID", |
|
"Publish Date" => "publishDate", |
|
"Amendment Date" => "amendDate", |
|
"Status" => "", |
|
"StartDate" => "contractStart", |
|
"EndDate" => "contractEnd", |
|
"Contract Value (AUD)" => "value", |
|
"Description" => "description", |
|
"Agency Reference ID" => "agencyID", |
|
"Category" => "category", |
|
"Procurement Method" => "procurementMethod", |
|
"ATM ID" => "atmID", |
|
"SON ID" => "SONID", |
|
"Confidentiality - Contract" => "confidentialityContract", |
|
"Confidentiality Reason(s) - Contract" => "confidentialityContractReason", |
|
"Confidentiality - Outputs" => "confidentialityOutputs", |
|
"Confidentiality Reason(s) - Outputs" => "confidentialityOutputsReason", |
|
"Consultancy" => "consultancy", |
|
"Consultancy Reason(s)" => "consultancyReason", |
|
"Amendment Reason" => "amendmentReason", |
|
"Name" => "supplierName", |
|
"Postal Address" => "supplierAddress", |
|
"Town/City" => "supplierCity", |
|
"Postcode" => "supplierPostcode", |
|
"Country" => "supplierCountry", |
|
"ABN Exempt" => "supplierABNExempt", |
|
"ABN" => "supplierABN", |
|
"Branch" => "contactBranch", |
|
"Division" => "contactDivision", |
|
"Office Postcode" => "contactPostcode" |
|
); |
|
$cnFields = Array(); |
|
foreach(pq('tr') as $tr) { |
|
$tra = dom_to_array($tr); |
|
if (is_array($tra['th'])) { |
|
$fieldName = trim(getTextFromTHNode($tr)); |
|
} else { |
|
$fieldName = trim(str_replace("/th>","",$tra['th'])); |
|
} |
|
$fieldValue = trim(print_r($tra['td'],true)); |
|
if ($fieldName == "State/Territory" || $fieldName == "Contact Name" |
|
|| $fieldName == "Contact Phone" || $fieldName == "Contact Email" |
|
||$fieldName == "Amendments") { |
|
// do nothing |
|
} else if ($fieldName == "Contract Period") { |
|
$contractPeriod = explode("to",$fieldValue); |
|
$cnFields["contractStart"] = trim($contractPeriod[0]); |
|
$cnFields["contractEnd"] = trim($contractPeriod[1]); |
|
} else { |
|
$fieldName = $datamapping0711[$fieldName]; |
|
|
|
if ($fieldName == "parentCN" || $fieldName == "CNID") { |
|
if (is_array($tra['td'])) { |
|
$fieldValue = trim(getTextFromNode($tr)); |
|
} |
|
$fieldValue = substr($fieldValue, 2); // take off the "CN" prefix |
|
$fieldValue = str_replace("-A", "00", $fieldValue); // make amendments really big numbers |
|
} elseif ($fieldName == "description") { |
|
|
|
if (is_array($tra['td'])) $fieldValue = print_r($tra['td']['p'],true); |
|
|
|
} elseif ($fieldName == "value" || $fieldName == "supplierABN") { |
|
if (is_array($tra['td'])) { |
|
$fieldValue = trim(getTextFromNode($tr)); |
|
} |
|
$fieldValue = str_replace(Array("$",","," "), "", $fieldValue); |
|
//if (!is_numeric($fieldValue)) $fieldValue = 0; |
|
if ($fieldValue == "Exempt") $fieldValue = NULL; |
|
} elseif ($fieldName == "amendDate" || $fieldName == "publishDate" || $fieldName == "contractStart" || $fieldName == "contractEnd") { |
|
$fieldValue = date('Y-m-d H:i:s', strtotime($fieldValue)); |
|
} elseif (is_array($tra['td'])) { |
|
$fieldValue = trim(getTextFromNode($tr)); |
|
} |
|
echo $fieldName. " = " .$fieldValue."<br>\n"; |
|
$cnFields[$fieldName] = $fieldValue; |
|
} |
|
} |
|
$cnFields["importFile"] = $url; |
|
$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', array_keys($cnFields)) . '") VALUES ( '; |
|
for($key = 0; $key < sizeof($cnFields); $key++) { |
|
$contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?"; |
|
} |
|
$contractNoticeInsertQ.= ");"; |
|
//echo $contractNoticeInsertQ; |
|
$contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ); |
|
$contractNoticeInsertQ->execute(array_values($cnFields)); |
|
$errors = $conn->errorInfo(); |
|
if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) { |
|
echo "Dupe {$cnFields['CNID']}<br>"; |
|
} |
|
elseif ($errors[1] == 0) { |
|
echo "Success insert {$cnFields['CNID']} <br>"; |
|
} |
|
else { |
|
foreach ($cnFields as $key => $cnf) { |
|
echo var_dump($key) . $cnf . "<br>"; |
|
} |
|
echo $cnFields['CNID'] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n"; |
|
} |
|
$contractNoticeUpdateQ = 'UPDATE contractnotice SET '; |
|
$count = 0; |
|
foreach ($cnFields as $key => $f) { |
|
|
|
$count++; |
|
$contractNoticeUpdateQ.= '"'.$key.'"=? '.($count >= sizeof($cnFields) ? "" : ", "); |
|
} |
|
$contractNoticeUpdateQ.= ' WHERE "CNID"=?;'; |
|
$cnFields[] = $cnFields["CNID"]; |
|
//echo $contractNoticeUpdateQ; |
|
$contractNoticeUpdateQ = $conn->prepare($contractNoticeUpdateQ); |
|
$contractNoticeUpdateQ->execute(array_values($cnFields)); |
|
$errors = $conn->errorInfo(); |
|
if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) { |
|
print_r($errors); |
|
echo "Dupe update {$cnFields['CNID']}<br>"; |
|
} |
|
elseif ($errors[1] == 0) { |
|
echo "Success update {$cnFields['CNID']} <br>"; |
|
} |
|
else { |
|
foreach ($cnFields as $key => $cnf) { |
|
echo var_dump($key) . $cnf . "<br>"; |
|
} |
|
echo $cnFields['CNID'] . " failed CN update.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n"; |
|
} |
|
} |
|
} |
|
function processFile($fpath, $tablename) |
|
{ |
|
global $conn; |
|
echo " ============== $fpath ============== <br>"; |
|
|
|
$handle = fopen($fpath, "r"); |
|
flush(); |
|
$row = 1; |
|
|
|
while (($data = fgetcsv($handle, 1000, "\t")) !== false) { |
|
if ($row > 3) { |
|
$data[0] = trim($data[0], "="); |
|
$data[0] = trim($data[0], "\""); |
|
if (strpos($data[0], "-A") > 0) { |
|
echo "Loading {$data[0]} ... <br>\n"; |
|
importCN(str_replace("CN","",$data[0])); |
|
} |
|
} |
|
flush(); |
|
//echo "<hr>\n"; |
|
|
|
|
|
$row++; |
|
} |
|
fclose($handle); |
|
} |
|
$path = './'; |
|
if ($_REQUEST["fname"] == "") { |
|
echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>"; |
|
$dhandle = opendir($path); |
|
// define an array to hold the files |
|
$files = array(); |
|
if ($dhandle) { |
|
// loop through all of the files |
|
while (false !== ($fname = readdir($dhandle))) { |
|
if (($fname != '.') && ($fname != '..')) { |
|
echo "<a href=\"import.php?fname=$fname\">$fname</a> " . filesize($path . $fname) . " " . date("c", filemtime($path . $fname)) . "<br/>"; |
|
processFile($path . $fname, "contractnotice"); |
|
} |
|
} |
|
} |
|
} |
|
else { |
|
$success = 0; |
|
$fname = $_REQUEST["fname"]; |
|
|
|
$success+= processFile($path . $fname, "contractnotice"); |
|
|
|
} |
|
|
|
?> |