Merge branch 'master' of ssh://apples.lambdacomplex.org/git/contractdashboard
Merge branch 'master' of ssh://apples.lambdacomplex.org/git/contractdashboard

<?php <?php
include_once ("../../lib/common.inc.php"); include_once ("../../lib/common.inc.php");
function processFile($fpath, $tablename) function processFile($fpath, $tablename)
{ {
global $conn; global $conn;
echo " ============== $fpath ============== <br>"; echo " ============== $fpath ============== <br>";
flush(); flush();
$row = 1; $row = 1;
$success = 0; $success = 0;
$dupes = 0; $dupes = 0;
$handle = fopen($fpath, "r"); $handle = fopen($fpath, "r");
//"t" mode string translates windows line breaks to unix //"t" mode string translates windows line breaks to unix
$datamapping0507 = array( $datamapping0507 = array(
"Agency" => "agencyName", "Agency" => "agencyName",
"CN ID" => "CNID", "CN ID" => "CNID",
"Publish Date" => "publishDate", "Publish Date" => "publishDate",
"Contract Start Date" => "contractStart", "Contract Start Date" => "contractStart",
"Contract End Date" => "contractEnd", "Contract End Date" => "contractEnd",
"Value (AUD)" => "value", "Value (AUD)" => "value",
"Title" => "description", "Title" => "description",
"Category" => "category", "Category" => "category",
"ATM ID" => "atmID", "ATM ID" => "atmID",
"Supplier Name" => "supplierName", "Supplier Name" => "supplierName",
"LastUpdated" => "amendDate", "LastUpdated" => "amendDate",
"" => "" "" => ""
); );
$headers; $headers;
$contractNoticeFields = array( $contractNoticeFields = array(
"importFile", "importFile",
"CNID", "CNID",
"description", "description",
"agencyName", "agencyName",
"publishDate", "publishDate",
"category", "category",
"contractStart", "contractStart",
"contractEnd", "contractEnd",
"value", "value",
"atmID", "atmID",
"supplierName", "supplierName",
"amendDate" "amendDate"
); );
if ($tablename == "contractnotice") { if ($tablename == "contractnotice") {
$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', $contractNoticeFields) . '") VALUES ( '; $contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', $contractNoticeFields) . '") VALUES ( ';
foreach ($contractNoticeFields as $key => $f) { foreach ($contractNoticeFields as $key => $f) {
$contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?"; $contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?";
} }
$contractNoticeInsertQ.= ");"; $contractNoticeInsertQ.= ");";
$contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ); $contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ);
} }
while (($data = fgetcsv($handle, 1000, "\t")) !== false) { while (($data = fgetcsv($handle, 1000, "\t")) !== false) {
$num = count($data); $num = count($data);
if ($row == 3) { if ($row == 3) {
$headers = $data; $headers = $data;
} }
elseif ($row > 3) { elseif ($row > 3) {
if ($num > count($datamapping0507)) { if ($num > count($datamapping0507)) {
die("<font color=red>Error in data import; data mapping fields out of bounds or changed</font><br>" . $fname . "data:" .$num. print_r($data ,true). "mapping:" . count($datamapping0507). print_r($datamapping0507 ,true)); die("<font color=red>Error in data import; data mapping fields out of bounds or changed</font><br>" . $fname . "data:" .$num. print_r($data ,true). "mapping:" . count($datamapping0507). print_r($datamapping0507 ,true));
} }
$contractNoticeInsert = Array(); $contractNoticeInsert = Array();
$contractNoticeInsert[] = $fpath; $contractNoticeInsert[] = $fpath;
$keys = array_keys($datamapping0507); $keys = array_keys($datamapping0507);
for ($c = 0; $c < $num; $c++) { for ($c = 0; $c < $num; $c++) {
$data[$c] = trim($data[$c], "="); $data[$c] = trim($data[$c], "=");
$data[$c] = trim($data[$c], "\""); $data[$c] = trim($data[$c], "\"");
if ($tablename == "contractnotice") { if ($tablename == "contractnotice") {
if (in_array(($datamapping0507[$headers[$c]]) , $contractNoticeFields)) { if (in_array(($datamapping0507[$headers[$c]]) , $contractNoticeFields)) {
if (($datamapping0507[$headers[$c]]) == "parentCN" || ($datamapping0507[$headers[$c]]) == "CNID") { if (($datamapping0507[$headers[$c]]) == "parentCN" || ($datamapping0507[$headers[$c]]) == "CNID") {
$data[$c] = substr($data[$c], 2); // take off the "CN" prefix $data[$c] = substr($data[$c], 2); // take off the "CN" prefix
$data[$c] = str_replace("-A", "00", $data[$c]); // make amendments really big numbers $data[$c] = str_replace("-A", "00", $data[$c]); // make amendments really big numbers
if (!is_numeric($data[$c]) && $data[$c] != "") die($data[$c] . " is not numeric"); if (!is_numeric($data[$c]) && $data[$c] != "") die($data[$c] . " is not numeric");
if ($data[$c] > 0) { if ($data[$c] > 0) {
$contractNoticeInsert[] = $data[$c]; $contractNoticeInsert[] = $data[$c];
} }
else { else {
$contractNoticeInsert[] = 0; $contractNoticeInsert[] = 0;
} }
} }
elseif (($datamapping0507[$headers[$c]]) == "supplierABN") { elseif (($datamapping0507[$headers[$c]]) == "supplierABN") {
if ($data[$c] > 0) { if ($data[$c] > 0) {
$contractNoticeInsert[] = $data[$c]; $contractNoticeInsert[] = $data[$c];
} }
else { else {
$contractNoticeInsert[] = null; $contractNoticeInsert[] = null;
} }
} }
elseif (($datamapping0507[$headers[$c]]) == "amendDate" || ($datamapping0507[$headers[$c]]) == "publishDate" || ($datamapping0507[$headers[$c]]) == "contractStart" || ($datamapping0507[$headers[$c]]) == "contractEnd") { elseif (($datamapping0507[$headers[$c]]) == "amendDate" || ($datamapping0507[$headers[$c]]) == "publishDate" || ($datamapping0507[$headers[$c]]) == "contractStart" || ($datamapping0507[$headers[$c]]) == "contractEnd") {
$contractNoticeInsert[] = date('Y-m-d H:i:s', strtotime($data[$c])); $contractNoticeInsert[] = date('Y-m-d H:i:s', strtotime($data[$c]));
} }
else { else {
if (strstr("\" =", $data[$c] > 0)) { if (strstr("\" =", $data[$c] > 0)) {
die("Invalid Description field" . $contractNoticeInsert); die("Invalid Description field" . $contractNoticeInsert);
} }
$colvalue = preg_replace( '/[^[:print:]]/', '',utf8_encode( $data[$c])); $colvalue = preg_replace( '/[^[:print:]]/', '',utf8_encode( $data[$c]));
   
$contractNoticeInsert[] = $colvalue; $contractNoticeInsert[] = $colvalue;
} }
} }
} }
} }
flush(); flush();
if ($tablename == "contractnotice") { if ($tablename == "contractnotice") {
$contractNoticeInsertQ->execute($contractNoticeInsert); $contractNoticeInsertQ->execute($contractNoticeInsert);
$errors = $conn->errorInfo(); $errors = $conn->errorInfo();
if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) { if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
$dupes++; $dupes++;
} }
elseif ($errors[1] == 0) { elseif ($errors[1] == 0) {
$success++; $success++;
} }
else { else {
foreach ($contractNoticeFields as $key => $cnf) { foreach ($contractNoticeFields as $key => $cnf) {
echo var_dump($contractNoticeInsert[$key]) . $cnf . "<br>"; echo var_dump($contractNoticeInsert[$key]) . $cnf . "<br>";
} }
echo $data[2] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n"; echo $data[2] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n";
} }
} }
flush(); flush();
//echo "<hr>\n"; //echo "<hr>\n";
} }
$row++; $row++;
} }
fclose($handle); fclose($handle);
echo " $dupes duplicate records<br>"; echo " $dupes duplicate records<br>";
echo " $success records successfully created<br>"; echo " $success records successfully created<br>";
flush(); flush();
return $success; return $success;
} }
$path = './'; $path = './';
if ($_REQUEST["fname"] == "") { if ($_REQUEST["fname"] == "") {
echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>"; echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>";
$dhandle = opendir($path); $dhandle = opendir($path);
// define an array to hold the files // define an array to hold the files
$files = array(); $files = array();
if ($dhandle) { if ($dhandle) {
// loop through all of the files // loop through all of the files
while (false !== ($fname = readdir($dhandle))) { while (false !== ($fname = readdir($dhandle))) {
if (($fname != '.') && ($fname != '..')) { if (($fname != '.') && ($fname != '..') && (!isset($_REQUEST["filter"]) || strpos($fname,$_REQUEST["filter"]) != false)) {
echo "<a href=\"import.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . date("c", filemtime($path . $fname)) . "<br/>"; echo "<a href=\"import.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . date("c", filemtime($path . $fname)) . "<br/>";
processFile($path . $fname, "contractnotice"); processFile($path . $fname, "contractnotice");
} }
} }
} }
} }
else { else {
$success = 0; $success = 0;
$fname = $_REQUEST["fname"]; $fname = $_REQUEST["fname"];
$success+= processFile($path . $fname, "contractnotice"); $success+= processFile($path . $fname, "contractnotice");
   
} }
?> ?>
   
<?php <?php
include_once ("../../lib/common.inc.php"); include_once ("../../lib/common.inc.php");
  /*
  update contractnotice set "supplierABN" = a."supplierABN"
  from contractnotice as cn inner join (select "supplierABN",
  "supplierName" from contractnotice where "supplierABN"
  IS NOT NULL and "supplierABN" != 0) as a on
  cn."supplierName" = a."supplierName" where
  cn."CNID"=contractnotice."CNID" and (contractnotice."supplierABN"
  IS NULL or contractnotice."supplierABN" = 0) */
// http://www.lastcraft.com/browser_documentation.php // http://www.lastcraft.com/browser_documentation.php
// http://code.google.com/p/phpquery/ // http://code.google.com/p/phpquery/
require('phpQuery-onefile.php'); require('phpQuery-onefile.php');
function getURL($url) { function getURL($url) {
//return file_get_contents($url); //return file_get_contents($url);
$ch = curl_init($url); $ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_TIMEOUT, 45); curl_setopt($ch, CURLOPT_TIMEOUT, 45);
$page = curl_exec($ch); $page = curl_exec($ch);
if (curl_errno($ch)) { if (curl_errno($ch)) {
echo "<font color=red> Database temporarily unavailable: "; echo "<font color=red> Database temporarily unavailable: ";
echo curl_errno($ch) . " " . curl_error($ch); echo curl_errno($ch) . " " . curl_error($ch);
echo $url; echo $url;
echo "</font><br>"; echo "</font><br>";
} }
curl_close($ch); curl_close($ch);
return $page; return $page;
} }
   
   
function getTextFromTHNode($Node, $Text = "") { function getTextFromTHNode($Node, $Text = "") {
if ($Node->tagName == null) if ($Node->tagName == null)
return $Text.$Node->textContent; return $Text.$Node->textContent;
if ($Node->tagName != "td") { if ($Node->tagName != "td") {
$Node = $Node->firstChild; $Node = $Node->firstChild;
if ($Node != null) if ($Node != null)
$Text = getTextFromTHNode($Node, $Text); $Text = getTextFromTHNode($Node, $Text);
   
while($Node->nextSibling != null) { while($Node->nextSibling != null) {
$Text = getTextFromTHNode($Node->nextSibling, $Text); $Text = getTextFromTHNode($Node->nextSibling, $Text);
$Node = $Node->nextSibling; $Node = $Node->nextSibling;
} }
} }
return $Text; return $Text;
} }
   
function getTextFromNode($Node, $Text = "") { function getTextFromNode($Node, $Text = "") {
if ($Node->tagName == null) if ($Node->tagName == null)
return $Text.$Node->textContent; return $Text.$Node->textContent;
if ($Node->tagName != "th" && $Node->tagName != "span") { if ($Node->tagName != "th" && $Node->tagName != "span") {
$Node = $Node->firstChild; $Node = $Node->firstChild;
if ($Node != null) if ($Node != null)
$Text = getTextFromNode($Node, $Text); $Text = getTextFromNode($Node, $Text);
   
while($Node->nextSibling != null) { while($Node->nextSibling != null) {
$Text = getTextFromNode($Node->nextSibling, $Text); $Text = getTextFromNode($Node->nextSibling, $Text);
$Node = $Node->nextSibling; $Node = $Node->nextSibling;
} }
} }
return $Text; return $Text;
} }
function dom_to_array($root) function dom_to_array($root)
{ {
$result = array(); $result = array();
   
if ($root->hasAttributes()) if ($root->hasAttributes())
{ {
$attrs = $root->attributes; $attrs = $root->attributes;
   
foreach ($attrs as $i => $attr) foreach ($attrs as $i => $attr)
$result[$attr->name] = $attr->value; $result[$attr->name] = $attr->value;
} }
   
$children = $root->childNodes; $children = $root->childNodes;
if ($root->childNodes) { if ($root->childNodes) {
if ($children->length == 1) if ($children->length == 1)
{ {
$child = $children->item(0); $child = $children->item(0);
   
if ($child->nodeType == XML_TEXT_NODE) if ($child->nodeType == XML_TEXT_NODE)
{ {
$result['_value'] = $child->nodeValue; $result['_value'] = $child->nodeValue;
   
if (count($result) == 1) if (count($result) == 1)
return $result['_value']; return $result['_value'];
else else
return $result; return $result;
} }
} }
   
$group = array(); $group = array();
   
for($i = 0; $i < $children->length; $i++) for($i = 0; $i < $children->length; $i++)
{ {
$child = $children->item($i); $child = $children->item($i);
   
if (!isset($result[$child->nodeName])) if (!isset($result[$child->nodeName]))
$result[$child->nodeName] = dom_to_array($child); $result[$child->nodeName] = dom_to_array($child);
else else
{ {
if (!isset($group[$child->nodeName])) if (!isset($group[$child->nodeName]))
{ {
$tmp = $result[$child->nodeName]; $tmp = $result[$child->nodeName];
$result[$child->nodeName] = array($tmp); $result[$child->nodeName] = array($tmp);
$group[$child->nodeName] = 1; $group[$child->nodeName] = 1;
} }
   
$result[$child->nodeName][] = dom_to_array($child); $result[$child->nodeName][] = dom_to_array($child);
} }
} }
} }
   
return $result; return $result;
} }
   
function importCN($cnid) { function importCN($cnid) {
global $conn; global $conn;
$CN = str_replace("-A", "00",$cnid); $CN = str_replace("-A", "00",$cnid);
// check if already complete // check if already complete
$query = 'Select "parentCN" from contractnotice $query = 'Select "parentCN" from contractnotice
where "CNID" = :CNID'; where "CNID" = :CNID';
$query = $conn->prepare($query); $query = $conn->prepare($query);
$query->bindParam(":CNID", $CN); $query->bindParam(":CNID", $CN);
$query->execute(); $query->execute();
$r = $query->fetch(PDO::FETCH_ASSOC); $r = $query->fetch(PDO::FETCH_ASSOC);
if ($r['parentCN'] == NULL) { if ($r['parentCN'] == NULL) {
$site = "https://www.tenders.gov.au/"; $site = "https://www.tenders.gov.au/";
$searchResult = phpQuery::newDocument(getURL("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid)); $searchResult = phpQuery::newDocument(getURL("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid));
//echo "https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid; //echo "https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid;
$url = ""; $url = "";
foreach(pq('a') as $a) { foreach(pq('a') as $a) {
if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) { if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) {
//echo $a->getAttribute("href"); //echo $a->getAttribute("href");
$url = $a->getAttribute("href"); $url = $a->getAttribute("href");
break; break;
} }
} }
$cn = phpQuery::newDocument(getURL($site.$url)); $cn = phpQuery::newDocument(getURL($site.$url));
$datamapping0711 = array( $datamapping0711 = array(
"Agency" => "agencyName", "Agency" => "agencyName",
"Parent CN" => "parentCN", "Parent CN" => "parentCN",
"CN ID" => "CNID", "CN ID" => "CNID",
"Publish Date" => "publishDate", "Publish Date" => "publishDate",
"Amendment Date" => "amendDate", "Amendment Date" => "amendDate",
"Status" => "", "Status" => "",
"StartDate" => "contractStart", "StartDate" => "contractStart",
"EndDate" => "contractEnd", "EndDate" => "contractEnd",
"Contract Value (AUD)" => "value", "Contract Value (AUD)" => "value",
"Description" => "description", "Description" => "description",
"Agency Reference ID" => "agencyID", "Agency Reference ID" => "agencyID",
"Category" => "category", "Category" => "category",
"Procurement Method" => "procurementMethod", "Procurement Method" => "procurementMethod",
"ATM ID" => "atmID", "ATM ID" => "atmID",
"SON ID" => "SONID", "SON ID" => "SONID",
"Confidentiality - Contract" => "confidentialityContract", "Confidentiality - Contract" => "confidentialityContract",
"Confidentiality Reason(s) - Contract" => "confidentialityContractReason", "Confidentiality Reason(s) - Contract" => "confidentialityContractReason",
"Confidentiality - Outputs" => "confidentialityOutputs", "Confidentiality - Outputs" => "confidentialityOutputs",
"Confidentiality Reason(s) - Outputs" => "confidentialityOutputsReason", "Confidentiality Reason(s) - Outputs" => "confidentialityOutputsReason",
"Consultancy" => "consultancy", "Consultancy" => "consultancy",
"Consultancy Reason(s)" => "consultancyReason", "Consultancy Reason(s)" => "consultancyReason",
"Amendment Reason" => "amendmentReason", "Amendment Reason" => "amendmentReason",
"Name" => "supplierName", "Name" => "supplierName",
"Postal Address" => "supplierAddress", "Postal Address" => "supplierAddress",
"Town/City" => "supplierCity", "Town/City" => "supplierCity",
"Postcode" => "supplierPostcode", "Postcode" => "supplierPostcode",
"Country" => "supplierCountry", "Country" => "supplierCountry",
"ABN Exempt" => "supplierABNExempt", "ABN Exempt" => "supplierABNExempt",
"ABN" => "supplierABN", "ABN" => "supplierABN",
"Branch" => "contactBranch", "Branch" => "contactBranch",
"Division" => "contactDivision", "Division" => "contactDivision",
"Office Postcode" => "contactPostcode" "Office Postcode" => "contactPostcode"
); );
$cnFields = Array(); $cnFields = Array();
foreach(pq('tr') as $tr) { foreach(pq('tr') as $tr) {
$tra = dom_to_array($tr); $tra = dom_to_array($tr);
if (is_array($tra['th'])) { if (is_array($tra['th'])) {
$fieldName = trim(getTextFromTHNode($tr)); $fieldName = trim(getTextFromTHNode($tr));
} else { } else {
$fieldName = trim(str_replace("/th>","",$tra['th'])); $fieldName = trim(str_replace("/th>","",$tra['th']));
} }
$fieldValue = trim(print_r($tra['td'],true)); $fieldValue = trim(print_r($tra['td'],true));
if ($fieldName == "State/Territory" || $fieldName == "Contact Name" if ($fieldName == "State/Territory" || $fieldName == "Contact Name"
|| $fieldName == "Contact Phone" || $fieldName == "Contact Email" || $fieldName == "Contact Phone" || $fieldName == "