Use cursor to export data to use less memory
Use cursor to export data to use less memory

  <?php
  include_once ("../../lib/common.inc.php");
  /*
  update contractnotice set "supplierABN" = a."supplierABN"
  from contractnotice as cn inner join (select "supplierABN",
  "supplierName" from contractnotice where "supplierABN"
  IS NOT NULL and "supplierABN" != 0) as a on
  cn."supplierName" = a."supplierName" where
  cn."CNID"=contractnotice."CNID" and (contractnotice."supplierABN"
  IS NULL or contractnotice."supplierABN" = 0) */
  // http://www.lastcraft.com/browser_documentation.php
  // http://code.google.com/p/phpquery/
  require('phpQuery-onefile.php');
  function getURL($url) {
  //return file_get_contents($url);
  $ch = curl_init($url);
  curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  curl_setopt($ch, CURLOPT_HEADER, 0);
  curl_setopt($ch, CURLOPT_TIMEOUT, 45);
  $page = curl_exec($ch);
  if (curl_errno($ch)) {
  echo "<font color=red> Database temporarily unavailable: ";
  echo curl_errno($ch) . " " . curl_error($ch);
 
  echo $url;
  echo "</font><br>";
  }
  curl_close($ch);
  return $page;
  }
 
 
  function getTextFromTHNode($Node, $Text = "") {
  if ($Node->tagName == null)
  return $Text.$Node->textContent;
 
  if ($Node->tagName != "td") {
  $Node = $Node->firstChild;
  if ($Node != null)
  $Text = getTextFromTHNode($Node, $Text);
 
  while($Node->nextSibling != null) {
  $Text = getTextFromTHNode($Node->nextSibling, $Text);
  $Node = $Node->nextSibling;
  }
  }
  return $Text;
  }
 
  function getTextFromNode($Node, $Text = "") {
  if ($Node->tagName == null)
  return $Text.$Node->textContent;
  if ($Node->tagName != "th" && $Node->tagName != "span") {
  $Node = $Node->firstChild;
  if ($Node != null)
  $Text = getTextFromNode($Node, $Text);
 
  while($Node->nextSibling != null) {
  $Text = getTextFromNode($Node->nextSibling, $Text);
  $Node = $Node->nextSibling;
  }
  }
  return $Text;
  }
  function dom_to_array($root)
  {
  $result = array();
 
  if ($root->hasAttributes())
  {
  $attrs = $root->attributes;
 
  foreach ($attrs as $i => $attr)
  $result[$attr->name] = $attr->value;
  }
 
  $children = $root->childNodes;
  if ($root->childNodes) {
  if ($children->length == 1)
  {
  $child = $children->item(0);
 
  if ($child->nodeType == XML_TEXT_NODE)
  {
  $result['_value'] = $child->nodeValue;
 
  if (count($result) == 1)
  return $result['_value'];
  else
  return $result;
  }
  }
 
  $group = array();
 
  for($i = 0; $i < $children->length; $i++)
  {
  $child = $children->item($i);
 
  if (!isset($result[$child->nodeName]))
  $result[$child->nodeName] = dom_to_array($child);
  else
  {
  if (!isset($group[$child->nodeName]))
  {
  $tmp = $result[$child->nodeName];
  $result[$child->nodeName] = array($tmp);
  $group[$child->nodeName] = 1;
  }
 
  $result[$child->nodeName][] = dom_to_array($child);
  }
  }
  }
 
  return $result;
  }
 
  function importCN($cnid) {
  global $conn;
  $CN = str_replace("-A", "00",$cnid);
  // check if already complete
  $query = 'Select "parentCN" from contractnotice
  where "CNID" = :CNID';
  $query = $conn->prepare($query);
  $query->bindParam(":CNID", $CN);
  $query->execute();
  $r = $query->fetch(PDO::FETCH_ASSOC);
  if ($r['parentCN'] == NULL) {
  $site = "https://www.tenders.gov.au/";
  $searchResult = phpQuery::newDocument(getURL("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid));
  //echo "https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid;
  $url = "";
  foreach(pq('a') as $a) {
  if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) {
  //echo $a->getAttribute("href");
  $url = $a->getAttribute("href");
  break;
  }
  }
  $cn = phpQuery::newDocument(getURL($site.$url));
  $datamapping0711 = array(
  "Agency" => "agencyName",
  "Parent CN" => "parentCN",
  "CN ID" => "CNID",
  "Publish Date" => "publishDate",
  "Amendment Date" => "amendDate",
  "Status" => "",
  "StartDate" => "contractStart",
  "EndDate" => "contractEnd",
  "Contract Value (AUD)" => "value",
  "Description" => "description",
  "Agency Reference ID" => "agencyID",
  "Category" => "category",
  "Procurement Method" => "procurementMethod",
  "ATM ID" => "atmID",
  "SON ID" => "SONID",
  "Confidentiality - Contract" => "confidentialityContract",
  "Confidentiality Reason(s) - Contract" => "confidentialityContractReason",
  "Confidentiality - Outputs" => "confidentialityOutputs",
  "Confidentiality Reason(s) - Outputs" => "confidentialityOutputsReason",
  "Consultancy" => "consultancy",
  "Consultancy Reason(s)" => "consultancyReason",
  "Amendment Reason" => "amendmentReason",
  "Name" => "supplierName",
  "Postal Address" => "supplierAddress",
  "Town/City" => "supplierCity",
  "Postcode" => "supplierPostcode",
  "Country" => "supplierCountry",
  "ABN Exempt" => "supplierABNExempt",
  "ABN" => "supplierABN",
  "Branch" => "contactBranch",
  "Division" => "contactDivision",
  "Office Postcode" => "contactPostcode"
  );
  $cnFields = Array();
  foreach(pq('tr') as $tr) {
  $tra = dom_to_array($tr);
  if (is_array($tra['th'])) {
  $fieldName = trim(getTextFromTHNode($tr));
  } else {
  $fieldName = trim(str_replace("/th>","",$tra['th']));
  }
  $fieldValue = trim(print_r($tra['td'],true));
  if ($fieldName == "State/Territory" || $fieldName == "Contact Name"
  || $fieldName == "Contact Phone" || $fieldName == "Contact Email"
  ||$fieldName == "Amendments") {
  // do nothing
  } else if ($fieldName == "Contract Period") {
  $contractPeriod = explode("to",$fieldValue);
  $cnFields["contractStart"] = trim($contractPeriod[0]);
  $cnFields["contractEnd"] = trim($contractPeriod[1]);
  } else {
  $fieldName = $datamapping0711[$fieldName];
 
  if ($fieldName == "parentCN" || $fieldName == "CNID") {
  if (is_array($tra['td'])) {
  $fieldValue = trim(getTextFromNode($tr));
  }
  $fieldValue = substr($fieldValue, 2); // take off the "CN" prefix
  $fieldValue = str_replace("-A", "00", $fieldValue); // make amendments really big numbers
  } elseif ($fieldName == "description") {
 
  if (is_array($tra['td'])) $fieldValue = print_r($tra['td']['p'],true);
 
  } elseif ($fieldName == "value" || $fieldName == "supplierABN") {
  if (is_array($tra['td'])) {
  $fieldValue = trim(getTextFromNode($tr));
  }
  $fieldValue = str_replace(Array("$",","," "), "", $fieldValue);
  //if (!is_numeric($fieldValue)) $fieldValue = 0;
  if ($fieldValue == "Exempt") $fieldValue = NULL;
  } elseif ($fieldName == "amendDate" || $fieldName == "publishDate" || $fieldName == "contractStart" || $fieldName == "contractEnd") {
  $fieldValue = date('Y-m-d H:i:s', strtotime($fieldValue));
  } elseif (is_array($tra['td'])) {
  $fieldValue = trim(getTextFromNode($tr));
  }
  echo $fieldName. " = " .$fieldValue."<br>\n";
  $cnFields[$fieldName] = $fieldValue;
  }
  }
  $cnFields["importFile"] = $url;
  $contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', array_keys($cnFields)) . '") VALUES ( ';
  for($key = 0; $key < sizeof($cnFields); $key++) {
  $contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?";
  }
  $contractNoticeInsertQ.= ");";
  //echo $contractNoticeInsertQ;
  $contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ);
  $contractNoticeInsertQ->execute(array_values($cnFields));
  $errors = $conn->errorInfo();
  if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
  echo "Dupe {$cnFields['CNID']}<br>";
  }
  elseif ($errors[1] == 0) {
  echo "Success insert {$cnFields['CNID']} <br>";
  }
  else {
  foreach ($cnFields as $key => $cnf) {
  echo var_dump($key) . $cnf . "<br>";
  }
  echo $cnFields['CNID'] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n";
  }
  $contractNoticeUpdateQ = 'UPDATE contractnotice SET ';
  $count = 0;
  foreach ($cnFields as $key => $f) {
 
  $count++;
  $contractNoticeUpdateQ.= '"'.$key.'"=? '.($count >= sizeof($cnFields) ? "" : ", ");
  }
  $contractNoticeUpdateQ.= ' WHERE "CNID"=?;';
  $cnFields[] = $cnFields["CNID"];
  //echo $contractNoticeUpdateQ;
  $contractNoticeUpdateQ = $conn->prepare($contractNoticeUpdateQ);
  $contractNoticeUpdateQ->execute(array_values($cnFields));
  $errors = $conn->errorInfo();
  if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
  print_r($errors);
  echo "Dupe update {$cnFields['CNID']}<br>";
  }
  elseif ($errors[1] == 0) {
  echo "Success update {$cnFields['CNID']} <br>";
  }
  else {
  foreach ($cnFields as $key => $cnf) {
  echo var_dump($key) . $cnf . "<br>";
  }
  echo $cnFields['CNID'] . " failed CN update.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n";
  }
  }
  }
  function processFile($fpath, $tablename)
  {
  global $conn;
  echo " ============== $fpath ============== <br>";
 
  $handle = fopen($fpath, "r");
  flush();
  $row = 1;
 
  while (($data = fgetcsv($handle, 1000, "\t")) !== false) {
  if ($row > 3) {
  $data[0] = trim($data[0], "=");
  $data[0] = trim($data[0], "\"");
  if (strpos($data[0], "-A") > 0) {
  echo "Loading {$data[0]} ... <br>\n";
  importCN(str_replace("CN","",$data[0]));
  }
  }
  flush();
  //echo "<hr>\n";
 
 
  $row++;
  }
  fclose($handle);
  }
  $path = './';
  if ($_REQUEST["fname"] == "") {
  echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>";
  $dhandle = opendir($path);
  // define an array to hold the files
  $files = array();
  if ($dhandle) {
  // loop through all of the files
  while (false !== ($fname = readdir($dhandle))) {
  if (($fname != '.') && ($fname != '..')) {
  echo "<a href=\"import.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . date("c", filemtime($path . $fname)) . "<br/>";
  processFile($path . $fname, "contractnotice");
  }
  }
  }
  }
  else {
  $success = 0;
  $fname = $_REQUEST["fname"];
 
  $success+= processFile($path . $fname, "contractnotice");
 
  }
 
  ?>
 
<?php  
include_once ("../../lib/common.inc.php");  
 
$cnid = 1234;  
// http://www.lastcraft.com/browser_documentation.php  
// http://code.google.com/p/phpquery/  
require('phpQuery-onefile.php');  
function dom_to_array($root)  
{  
$result = array();  
 
if ($root->hasAttributes())  
{  
$attrs = $root->attributes;  
 
foreach ($attrs as $i => $attr)  
$result[$attr->name] = $attr->value;  
}  
 
$children = $root->childNodes;  
if ($root->childNodes) {  
if ($children->length == 1)  
{  
$child = $children->item(0);  
 
if ($child->nodeType == XML_TEXT_NODE)  
{  
$result['_value'] = $child->nodeValue;  
 
if (count($result) == 1)  
return $result['_value'];  
else  
return $result;  
}  
}  
 
$group = array();  
 
for($i = 0; $i < $children->length; $i++)  
{  
$child = $children->item($i);  
 
if (!isset($result[$child->nodeName]))  
$result[$child->nodeName] = dom_to_array($child);  
else  
{  
if (!isset($group[$child->nodeName]))  
{  
$tmp = $result[$child->nodeName];  
$result[$child->nodeName] = array($tmp);  
$group[$child->nodeName] = 1;  
}  
 
$result[$child->nodeName][] = dom_to_array($child);  
}  
}  
}  
 
return $result;  
}  
 
$site = "https://www.tenders.gov.au/";  
//$cn = phpQuery::newDocument(file_get_contents("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid));  
$searchResult = phpQuery::newDocument('<table class="four-col">  
<tbody><tr>  
<th>CN ID</th>  
 
<td><a href="/?event=public.cn.view&amp;CNUUID=BB3B13EA-A700-39A5-F003088ACF798AF9">CN1234</a></td>  
 
<th>  
</th><td>  
 
</td></tr>  
<tr>  
<th>Agency</th>  
<td colspan="3">Department of Veterans Affairs</td>  
</tr>  
<tr>  
<th>Publish Date</th>  
<td colspan="3"> 18-Aug-2006 </td>  
</tr>  
 
<tr>  
<th>Category</th>  
<td colspan="3">Computer services</td>  
</tr>  
 
<tr>  
<th>Contract Period</th>  
<td colspan="3"> 1-Aug-2006 to 31-Jul-2007 </td>  
</tr>  
<tr>  
<th>Contract Value (AUD)</th>  
<td colspan="3"> $156,200.00 </td>  
</tr>  
 
<tr>  
<th>Supplier Name</th>  
<td colspan="3">WIZARD INFORMATION SERVICES PTY LTD</td>  
</tr>  
<tr>  
<th></th>  
<td colspan="3">