--- /dev/null +++ b/admin/partialdata/importamendments.php @@ -1,1 +1,315 @@ - +<?php +include_once ("../../lib/common.inc.php"); + +// http://www.lastcraft.com/browser_documentation.php +// http://code.google.com/p/phpquery/ +require('phpQuery-onefile.php'); +function getURL($url) { + //return file_get_contents($url); + $ch = curl_init($url); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); + curl_setopt($ch, CURLOPT_HEADER, 0); + curl_setopt($ch, CURLOPT_TIMEOUT, 45); + $page = curl_exec($ch); + if (curl_errno($ch)) { + echo "<font color=red> Database temporarily unavailable: "; + echo curl_errno($ch) . " " . curl_error($ch); + + echo $url; + echo "</font><br>"; + } + curl_close($ch); + return $page; +} + + +function getTextFromTHNode($Node, $Text = "") { + if ($Node->tagName == null) + return $Text.$Node->textContent; + +if ($Node->tagName != "td") { + $Node = $Node->firstChild; + if ($Node != null) + $Text = getTextFromTHNode($Node, $Text); + + while($Node->nextSibling != null) { + $Text = getTextFromTHNode($Node->nextSibling, $Text); + $Node = $Node->nextSibling; + } +} + return $Text; +} + +function getTextFromNode($Node, $Text = "") { + if ($Node->tagName == null) + return $Text.$Node->textContent; +if ($Node->tagName != "th" && $Node->tagName != "span") { + $Node = $Node->firstChild; + if ($Node != null) + $Text = getTextFromNode($Node, $Text); + + while($Node->nextSibling != null) { + $Text = getTextFromNode($Node->nextSibling, $Text); + $Node = $Node->nextSibling; + } +} + return $Text; +} +function dom_to_array($root) +{ + $result = array(); + + if ($root->hasAttributes()) + { + $attrs = $root->attributes; + + foreach ($attrs as $i => $attr) + $result[$attr->name] = $attr->value; + } + + $children = $root->childNodes; +if ($root->childNodes) { + if ($children->length == 1) + { + $child = $children->item(0); + + if ($child->nodeType == XML_TEXT_NODE) + { + $result['_value'] = $child->nodeValue; + + if (count($result) == 1) + return $result['_value']; + else + return $result; + } + } + + $group = array(); + + for($i = 0; $i < $children->length; $i++) + { + $child = $children->item($i); + + if (!isset($result[$child->nodeName])) + $result[$child->nodeName] = dom_to_array($child); + else + { + if (!isset($group[$child->nodeName])) + { + $tmp = $result[$child->nodeName]; + $result[$child->nodeName] = array($tmp); + $group[$child->nodeName] = 1; + } + + $result[$child->nodeName][] = dom_to_array($child); + } + } +} + + return $result; +} + +function importCN($cnid) { + global $conn; + $CN = str_replace("-A", "00",$cnid); + // check if already complete + $query = 'Select "parentCN" from contractnotice + where "CNID" = :CNID'; + $query = $conn->prepare($query); + $query->bindParam(":CNID", $CN); + $query->execute(); + $r = $query->fetch(PDO::FETCH_ASSOC); + if ($r['parentCN'] == NULL) { +$site = "https://www.tenders.gov.au/"; +$searchResult = phpQuery::newDocument(getURL("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid)); +//echo "https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid; +$url = ""; +foreach(pq('a') as $a) { + if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) { + //echo $a->getAttribute("href"); + $url = $a->getAttribute("href"); + break; + } +} +$cn = phpQuery::newDocument(getURL($site.$url)); + $datamapping0711 = array( + "Agency" => "agencyName", + "Parent CN" => "parentCN", + "CN ID" => "CNID", + "Publish Date" => "publishDate", + "Amendment Date" => "amendDate", + "Status" => "", + "StartDate" => "contractStart", + "EndDate" => "contractEnd", + "Contract Value (AUD)" => "value", + "Description" => "description", + "Agency Reference ID" => "agencyID", + "Category" => "category", + "Procurement Method" => "procurementMethod", + "ATM ID" => "atmID", + "SON ID" => "SONID", + "Confidentiality - Contract" => "confidentialityContract", + "Confidentiality Reason(s) - Contract" => "confidentialityContractReason", + "Confidentiality - Outputs" => "confidentialityOutputs", + "Confidentiality Reason(s) - Outputs" => "confidentialityOutputsReason", + "Consultancy" => "consultancy", + "Consultancy Reason(s)" => "consultancyReason", + "Amendment Reason" => "amendmentReason", + "Name" => "supplierName", + "Postal Address" => "supplierAddress", + "Town/City" => "supplierCity", + "Postcode" => "supplierPostcode", + "Country" => "supplierCountry", + "ABN Exempt" => "supplierABNExempt", + "ABN" => "supplierABN", + "Branch" => "contactBranch", + "Division" => "contactDivision", + "Office Postcode" => "contactPostcode" + ); +$cnFields = Array(); +foreach(pq('tr') as $tr) { + $tra = dom_to_array($tr); + if (is_array($tra['th'])) { + $fieldName = trim(getTextFromTHNode($tr)); + } else { + $fieldName = trim(str_replace("/th>","",$tra['th'])); + } + $fieldValue = trim(print_r($tra['td'],true)); + if ($fieldName == "State/Territory" || $fieldName == "Contact Name" + || $fieldName == "Contact Phone" || $fieldName == "Contact Email" + ||$fieldName == "Amendments") { + // do nothing + } else if ($fieldName == "Contract Period") { + $contractPeriod = explode("to",$fieldValue); + $cnFields["contractStart"] = trim($contractPeriod[0]); + $cnFields["contractEnd"] = trim($contractPeriod[1]); + } else { + $fieldName = $datamapping0711[$fieldName]; + + if ($fieldName == "parentCN" || $fieldName == "CNID") { + if (is_array($tra['td'])) { + $fieldValue = trim(getTextFromNode($tr)); + } + $fieldValue = substr($fieldValue, 2); // take off the "CN" prefix + $fieldValue = str_replace("-A", "00", $fieldValue); // make amendments really big numbers + } elseif ($fieldName == "description") { + + if (is_array($tra['td'])) $fieldValue = print_r($tra['td']['p'],true); + + } elseif ($fieldName == "value" || $fieldName == "supplierABN") { + if (is_array($tra['td'])) { + $fieldValue = trim(getTextFromNode($tr)); + } + $fieldValue = str_replace(Array("$",","," "), "", $fieldValue); + //if (!is_numeric($fieldValue)) $fieldValue = 0; + if ($fieldValue == "Exempt") $fieldValue = NULL; + } elseif ($fieldName == "amendDate" || $fieldName == "publishDate" || $fieldName == "contractStart" || $fieldName == "contractEnd") { + $fieldValue = date('Y-m-d H:i:s', strtotime($fieldValue)); + } elseif (is_array($tra['td'])) { + $fieldValue = trim(getTextFromNode($tr)); + } + echo $fieldName. " = " .$fieldValue."<br>\n"; + $cnFields[$fieldName] = $fieldValue; + } +} +$cnFields["importFile"] = $url; +$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', array_keys($cnFields)) . '") VALUES ( '; + for($key = 0; $key < sizeof($cnFields); $key++) { + $contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?"; + } + $contractNoticeInsertQ.= ");"; + //echo $contractNoticeInsertQ; + $contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ); + $contractNoticeInsertQ->execute(array_values($cnFields)); + $errors = $conn->errorInfo(); + if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) { + echo "Dupe {$cnFields['CNID']}<br>"; + } + elseif ($errors[1] == 0) { + echo "Success insert {$cnFields['CNID']} <br>"; + } + else { + foreach ($cnFields as $key => $cnf) { + echo var_dump($key) . $cnf . "<br>"; + } + echo $cnFields['CNID'] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n"; + } +$contractNoticeUpdateQ = 'UPDATE contractnotice SET '; +$count = 0; + foreach ($cnFields as $key => $f) { + + $count++; + $contractNoticeUpdateQ.= '"'.$key.'"=? '.($count >= sizeof($cnFields) ? "" : ", "); + } + $contractNoticeUpdateQ.= ' WHERE "CNID"=?;'; + $cnFields[] = $cnFields["CNID"]; + //echo $contractNoticeUpdateQ; + $contractNoticeUpdateQ = $conn->prepare($contractNoticeUpdateQ); + $contractNoticeUpdateQ->execute(array_values($cnFields)); + $errors = $conn->errorInfo(); + if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) { + print_r($errors); + echo "Dupe update {$cnFields['CNID']}<br>"; + } + elseif ($errors[1] == 0) { + echo "Success update {$cnFields['CNID']} <br>"; + } + else { + foreach ($cnFields as $key => $cnf) { + echo var_dump($key) . $cnf . "<br>"; + } + echo $cnFields['CNID'] . " failed CN update.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n"; + } + } +} +function processFile($fpath, $tablename) +{ + global $conn; + echo " ============== $fpath ============== <br>"; + + $handle = fopen($fpath, "r"); + flush(); + $row = 1; + + while (($data = fgetcsv($handle, 1000, "\t")) !== false) { + if ($row > 3) { + $data[0] = trim($data[0], "="); + $data[0] = trim($data[0], "\""); + if (strpos($data[0], "-A") > 0) { + echo "Loading {$data[0]} ... <br>\n"; + importCN(str_replace("CN","",$data[0])); + } + } + flush(); + //echo "<hr>\n"; + + + $row++; + } + fclose($handle); +} +$path = './'; +if ($_REQUEST["fname"] == "") { + echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>"; + $dhandle = opendir($path); + // define an array to hold the files + $files = array(); + if ($dhandle) { + // loop through all of the files + while (false !== ($fname = readdir($dhandle))) { + if (($fname != '.') && ($fname != '..')) { + echo "<a href=\"import.php?fname=$fname\">$fname</a> " . filesize($path . $fname) . " " . date("c", filemtime($path . $fname)) . "<br/>"; + processFile($path . $fname, "contractnotice"); + } + } + } +} +else { + $success = 0; + $fname = $_REQUEST["fname"]; + + $success+= processFile($path . $fname, "contractnotice"); + +} + +?>
--- a/admin/partialdata/scrapesingle.php +++ /dev/null @@ -1,881 +1,1 @@ -<?php -include_once ("../../lib/common.inc.php"); -$cnid = 1234; -// http://www.lastcraft.com/browser_documentation.php -// http://code.google.com/p/phpquery/ -require('phpQuery-onefile.php'); -function dom_to_array($root) -{ - $result = array(); - - if ($root->hasAttributes()) - { - $attrs = $root->attributes; - - foreach ($attrs as $i => $attr) - $result[$attr->name] = $attr->value; - } - - $children = $root->childNodes; -if ($root->childNodes) { - if ($children->length == 1) - { - $child = $children->item(0); - - if ($child->nodeType == XML_TEXT_NODE) - { - $result['_value'] = $child->nodeValue; - - if (count($result) == 1) - return $result['_value']; - else - return $result; - } - } - - $group = array(); - - for($i = 0; $i < $children->length; $i++) - { - $child = $children->item($i); - - if (!isset($result[$child->nodeName])) - $result[$child->nodeName] = dom_to_array($child); - else - { - if (!isset($group[$child->nodeName])) - { - $tmp = $result[$child->nodeName]; - $result[$child->nodeName] = array($tmp); - $group[$child->nodeName] = 1; - } - - $result[$child->nodeName][] = dom_to_array($child); - } - } -} - - return $result; -} - -$site = "https://www.tenders.gov.au/"; -//$cn = phpQuery::newDocument(file_get_contents("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid)); -$searchResult = phpQuery::newDocument('<table class="four-col"> - <tbody><tr> - <th>CN ID</th> - - <td><a href="/?event=public.cn.view&CNUUID=BB3B13EA-A700-39A5-F003088ACF798AF9">CN1234</a></td> - - <th> - </th><td> - - </td></tr> - <tr> - <th>Agency</th> - <td colspan="3">Department of Veterans Affairs</td> - </tr> - <tr> - <th>Publish Date</th> - <td colspan="3"> 18-Aug-2006 </td> - </tr> - - <tr> - <th>Category</th> - <td colspan="3">Computer services</td> - </tr> - - <tr> - <th>Contract Period</th> - <td colspan="3"> 1-Aug-2006 to 31-Jul-2007 </td> - </tr> - <tr> - <th>Contract Value (AUD)</th> - <td colspan="3"> $156,200.00 </td> - </tr> - - <tr> - <th>Supplier Name</th> - <td colspan="3">WIZARD INFORMATION SERVICES PTY LTD</td> - </tr> - <tr> - <th></th> - <td colspan="3"> - <p> - <span class="last-updated"><strong>Last Updated:</strong> 18-Aug-2006 4:51 pm (ACT Local time)</span> - - <strong><a href="/?event=public.cn.view&CNUUID=BB3B13EA-A700-39A5-F003088ACF798AF9">Full Details</a></strong> - - </p> - </td> - </tr> - </tbody></table>'); -foreach(pq('a') as $a) { - if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) echo $a->getAttribute("href"); - break; -} -$cn = phpQuery::newDocument(' - - -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> - -<html lang="en-AU"> - -<head> - - <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> - - <title>AusTender: Contract Notice View - CN1234</title> - - - - <meta name="language" content="en-AU"> - - <meta name="description" content="AusTender provides centralised publication of Australian Government business opportunities, annual procurement plans, multi-use lists and contracts awarded."> - - <meta name="keywords" content="tenders, australian government tenders, austender, austenders, australian tenders, federal government tenders"> - - - - <link rel="stylesheet" type="text/css" href="/styles/styles_frontend_main.css" media="all"> - - <link rel="stylesheet" type="text/css" href="/styles/styles_frontend_print.css" media="print"> - - <link rel="stylesheet" type="text/css" href="/styles/styles_reports.css" media="all"> - - <link rel="stylesheet" href="/styles/calendar-atii.css" type="text/css" media="all"> - - <link rel="shortcut icon" href="/favicon.ico"> - - - - - -</head> - - - -<body> - - - -<div id="header"> - - <a href="/"><img src="/images/header_logo.gif" alt="AusTender - The Australian Government Tender System"></a> - -</div> - - - -<form action="./" method="get" id="header-tools" onSubmit="return (this.keyword.value.length != 0)"> - - - - <input - - name="event" - - id="form-event" - - type="hidden" - - value="public.advancedsearch.keyword" - - > - - - - <a href="/?event=public.home" id="home-link">Home</a> - - <label class="hidden" for="search-text">Search</label> - - <input type="text" value="" name="keyword" id="search-text"> - - <input type="submit" value="Search" id="search-submit"> - - <a href="?event=public.advancedsearch.home">Advanced Search</a> - -</form> - - - -<div id="left-col-wrapper"> - -<div id="left-col"><div class="pad"> - - - -<form action="?event=public.login" method="post" enctype="multipart/form-data" id="login-form"> - - <label for="login-username">Username</label> (email) - - <br> - - <input type="text" name="pub-auth-username" id="login-username" value=""> - - - - <label for="login-password">Password</label> - - <br> - - <input type="password" name="pub-auth-password" id="login-password" value=""> - - <input type="Submit" value="Login" id="login-submit"> - - - - <p> - - » <a href="?event=public.registereduser.forgotPassFormStep1">Forgotten password?</a> - - <br> - - » <a href="?event=public.registereduser.new">New user registration</a> - - </p> - - - -</form> - - <h2>View</h2> - - - - <ul> - - - - <li> - - - - <a href="?event=public.atmproposed.list" title="Information about potential procurements prior to their release to the market">Pre-Release Notices</a> - - </li> - - - - <li> - - - - <a href="?event=public.ATM.list" title="Business opportunities that are currently out to the market">Current ATMs</a> - - </li> - - - - <li> - - - - <a href="?event=public.ATM.closed" title="Business opportunities that have closed in the last 30 days">Closed ATMs</a> - - </li> - - - - <li> - - - - <a href="?event=public.CN.search" title="Notices of contracts awarded to suppliers">Contract Notices</a> - - </li> - - - - <li> - - - - <a href="?event=public.SON.search" title="Notices of standing arrangements with suppliers, including panels and period contracts">Standing Offer Notices</a> - - </li> - - - - <li> - - - - <a href="?event=public.MUL.list" title="Multi-Use Lists (MULs) currently open for applications for inclusion">Current Multi-Use Lists</a> - - </li> - - - - <li> - - - - <a href="?event=public.MUL.closed" title="Multi-Use Lists (MULs) that have closed in the last 30 days">Closed Multi-Use Lists</a> - - </li> - - - - <li> - - - - <a href="?event=public.APP.list" title="Agencies Planned Procurements for the latest financial year">Procurement Plans</a> - - </li> - - - - <li> - - - - <a href="?event=public.reports.list" title="A list of standard reports on various types of procurement information">Reports</a> - - </li> - - - - </ul> - - - - <h2>Info & Links</h2> - - - - <ul> - - - - <li> - - - - <a href="?event=public.agency.list" title="A Link to Australian Government agency and department corporate addresses">Agency Addresses</a> - - </li> - - - - <li> - - - - <a href="?event=public.relatedlink.list" title="Links to other procurement related websites, national and international">Related Links</a> - - </li> - - - - <li> - - - - <a href="?event=public.help.list" title="A list of frequently asked questions and a link to download the AusTender Public User Guide">Help</a> - - </li> - - - - <li> - - - - <a href="?event=public.contactus.show" title="Contact information for the AusTender Help Desk">Contact Us</a> - - </li> - - - - </ul> - - - - <h2>Policies</h2> - - - - <ul> - - - - <li> - - - - <a href="?event=public.policydocs.list" title="Links to Australian Government procurement policy information">Policy Documents</a> - - </li> - - - - <li> - - - - <a href="?event=public.document.list" title="Australian Government standard form contracts, templates and procurement related guidance">Procurement Document Library</a> - - </li> - - - - <li> - - - - <a href="?event=public.termsOfUse" title="Conditions of use for both the AusTender website and ATM document distribution">Terms of Use</a> - - </li> - - - - <li> - - - - <a href="?event=public.privacyStatement" title="The terms under which we manage information relating to and provided by AusTender Registered Users">Privacy Statement</a> - - </li> - - - - </ul> - - - -</div></div> - - - -</div> - - - - - - - - - -<div id="main-content"><div class="pad"> - - - -<ol id="bread-crumbs"> - - - - <li class="first-item"> - - <a href="./?event=public.home">Home</a> - - </li> - - - - <li> Contract Notice View - CN1234</li> - -</ol> - - - - - - <h1> Contract Notice View - CN1234</h1> - - - - - - <div id="container"> - - - - - -<div class="content"> - - - - <div id="intro"> - - <span><p><span>AusTender holds Contract and Standing Offer Notices for the 07/08 financial year forward. For information related to previous years, please contact the AusTender Help Desk.</span></p><span><p><strong><span>Subcontractors: For Commonwealth contracts that started on or after 1 December 2008, agencies are required to provide the names of any associated subcontractors on request. Information on subcontractors can be sought </span><span>directly from the relevant agency through the Agency Contact listed in each Contract Notice.</span></strong></p></span></span> - - </div> - - - -</div> - - - - </div> - - <h2 class="highlight">IMU Contract Programmer: IMU-ICT040 (GAPS ID: 1611946)</h2> - - - -<table> - - <tr> - - <th>CN ID</th> - - - - <td>CN1234</td> - - - - </tr> - - <tr> - - <th>Agency</th> - - <td>Department of Veterans Affairs</td> - - </tr> - - <tr> - - <th>Publish Date</th> - - <td> 18-Aug-2006 </td> - - </tr> - - - - <tr> - - <th>Category</th> - - <td>Computer services</td> - - </tr> - - <tr> - - <th>Contract Period</th> - - <td> - - 1-Aug-2006 to 31-Jul-2007 - - </td> - - </tr> - - <tr> - - <th>Contract Value (AUD)</th> - - <td> - - $156,200.00 - - </td> - - </tr> - - - - <tr> - - <th>Description</th> - - <td>IMU Contract Programmer: IMU-ICT040 (GAPS ID: 1611946)</td> - - </tr> - - - - <tr> - - <th>Procurement Method</th> - - <td>Open</td> - - </tr> - - - - - - <tr> - - <th>Confidentiality - Contract</th> - - <td> - - - - </td> - - </tr> - - - - <tr> - - <th>Confidentiality - Outputs</th> - - <td> - - No - - </td> - - </tr> - - - - <tr> - - <th>Consultancy</th> - - <td>No</td> - - </tr> - - - - - -</table> - - - -<h2>Supplier Details</h2> - -<table> - - <tr> - - <th>Name</th> - - <td> - - WIZARD INFORMATION SERVICES PTY LTD - - </td> - - </tr> - - - - <tr> - - <th>Postal Address</th> - - <td>GPO Box 2700</td> - - </tr> - - <tr> - - <th>Town/City</th> - - <td>CANBERRA CITY</td> - - </tr> - - <tr> - - <th>Postcode</th> - - <td>2601</td> - - </tr> - - <tr> - - <th>State/Territory</th> - - <td>ACT</td> -