--- /dev/null
+++ b/admin/partialdata/importamendments.php
@@ -1,1 +1,323 @@
-
+<?php
+include_once ("../../lib/common.inc.php");
+/*
+update contractnotice set "supplierABN" = a."supplierABN"
+from contractnotice as cn inner join (select "supplierABN",
+"supplierName" from contractnotice where "supplierABN"
+IS NOT NULL and "supplierABN" != 0) as a on
+cn."supplierName" = a."supplierName" where
+cn."CNID"=contractnotice."CNID" and (contractnotice."supplierABN"
+IS NULL or contractnotice."supplierABN" = 0) */
+// http://www.lastcraft.com/browser_documentation.php
+// http://code.google.com/p/phpquery/
+require('phpQuery-onefile.php');
+function getURL($url) {
+ //return file_get_contents($url);
+ $ch = curl_init($url);
+ curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
+ curl_setopt($ch, CURLOPT_HEADER, 0);
+ curl_setopt($ch, CURLOPT_TIMEOUT, 45);
+ $page = curl_exec($ch);
+ if (curl_errno($ch)) {
+ echo "<font color=red> Database temporarily unavailable: ";
+ echo curl_errno($ch) . " " . curl_error($ch);
+
+ echo $url;
+ echo "</font><br>";
+ }
+ curl_close($ch);
+ return $page;
+}
+
+
+function getTextFromTHNode($Node, $Text = "") {
+ if ($Node->tagName == null)
+ return $Text.$Node->textContent;
+
+if ($Node->tagName != "td") {
+ $Node = $Node->firstChild;
+ if ($Node != null)
+ $Text = getTextFromTHNode($Node, $Text);
+
+ while($Node->nextSibling != null) {
+ $Text = getTextFromTHNode($Node->nextSibling, $Text);
+ $Node = $Node->nextSibling;
+ }
+}
+ return $Text;
+}
+
+function getTextFromNode($Node, $Text = "") {
+ if ($Node->tagName == null)
+ return $Text.$Node->textContent;
+if ($Node->tagName != "th" && $Node->tagName != "span") {
+ $Node = $Node->firstChild;
+ if ($Node != null)
+ $Text = getTextFromNode($Node, $Text);
+
+ while($Node->nextSibling != null) {
+ $Text = getTextFromNode($Node->nextSibling, $Text);
+ $Node = $Node->nextSibling;
+ }
+}
+ return $Text;
+}
+function dom_to_array($root)
+{
+ $result = array();
+
+ if ($root->hasAttributes())
+ {
+ $attrs = $root->attributes;
+
+ foreach ($attrs as $i => $attr)
+ $result[$attr->name] = $attr->value;
+ }
+
+ $children = $root->childNodes;
+if ($root->childNodes) {
+ if ($children->length == 1)
+ {
+ $child = $children->item(0);
+
+ if ($child->nodeType == XML_TEXT_NODE)
+ {
+ $result['_value'] = $child->nodeValue;
+
+ if (count($result) == 1)
+ return $result['_value'];
+ else
+ return $result;
+ }
+ }
+
+ $group = array();
+
+ for($i = 0; $i < $children->length; $i++)
+ {
+ $child = $children->item($i);
+
+ if (!isset($result[$child->nodeName]))
+ $result[$child->nodeName] = dom_to_array($child);
+ else
+ {
+ if (!isset($group[$child->nodeName]))
+ {
+ $tmp = $result[$child->nodeName];
+ $result[$child->nodeName] = array($tmp);
+ $group[$child->nodeName] = 1;
+ }
+
+ $result[$child->nodeName][] = dom_to_array($child);
+ }
+ }
+}
+
+ return $result;
+}
+
+function importCN($cnid) {
+ global $conn;
+ $CN = str_replace("-A", "00",$cnid);
+ // check if already complete
+ $query = 'Select "parentCN" from contractnotice
+ where "CNID" = :CNID';
+ $query = $conn->prepare($query);
+ $query->bindParam(":CNID", $CN);
+ $query->execute();
+ $r = $query->fetch(PDO::FETCH_ASSOC);
+ if ($r['parentCN'] == NULL) {
+$site = "https://www.tenders.gov.au/";
+$searchResult = phpQuery::newDocument(getURL("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid));
+//echo "https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid;
+$url = "";
+foreach(pq('a') as $a) {
+ if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) {
+ //echo $a->getAttribute("href");
+ $url = $a->getAttribute("href");
+ break;
+ }
+}
+$cn = phpQuery::newDocument(getURL($site.$url));
+ $datamapping0711 = array(
+ "Agency" => "agencyName",
+ "Parent CN" => "parentCN",
+ "CN ID" => "CNID",
+ "Publish Date" => "publishDate",
+ "Amendment Date" => "amendDate",
+ "Status" => "",
+ "StartDate" => "contractStart",
+ "EndDate" => "contractEnd",
+ "Contract Value (AUD)" => "value",
+ "Description" => "description",
+ "Agency Reference ID" => "agencyID",
+ "Category" => "category",
+ "Procurement Method" => "procurementMethod",
+ "ATM ID" => "atmID",
+ "SON ID" => "SONID",
+ "Confidentiality - Contract" => "confidentialityContract",
+ "Confidentiality Reason(s) - Contract" => "confidentialityContractReason",
+ "Confidentiality - Outputs" => "confidentialityOutputs",
+ "Confidentiality Reason(s) - Outputs" => "confidentialityOutputsReason",
+ "Consultancy" => "consultancy",
+ "Consultancy Reason(s)" => "consultancyReason",
+ "Amendment Reason" => "amendmentReason",
+ "Name" => "supplierName",
+ "Postal Address" => "supplierAddress",
+ "Town/City" => "supplierCity",
+ "Postcode" => "supplierPostcode",
+ "Country" => "supplierCountry",
+ "ABN Exempt" => "supplierABNExempt",
+ "ABN" => "supplierABN",
+ "Branch" => "contactBranch",
+ "Division" => "contactDivision",
+ "Office Postcode" => "contactPostcode"
+ );
+$cnFields = Array();
+foreach(pq('tr') as $tr) {
+ $tra = dom_to_array($tr);
+ if (is_array($tra['th'])) {
+ $fieldName = trim(getTextFromTHNode($tr));
+ } else {
+ $fieldName = trim(str_replace("/th>","",$tra['th']));
+ }
+ $fieldValue = trim(print_r($tra['td'],true));
+ if ($fieldName == "State/Territory" || $fieldName == "Contact Name"
+ || $fieldName == "Contact Phone" || $fieldName == "Contact Email"
+ ||$fieldName == "Amendments") {
+ // do nothing
+ } else if ($fieldName == "Contract Period") {
+ $contractPeriod = explode("to",$fieldValue);
+ $cnFields["contractStart"] = trim($contractPeriod[0]);
+ $cnFields["contractEnd"] = trim($contractPeriod[1]);
+ } else {
+ $fieldName = $datamapping0711[$fieldName];
+
+ if ($fieldName == "parentCN" || $fieldName == "CNID") {
+ if (is_array($tra['td'])) {
+ $fieldValue = trim(getTextFromNode($tr));
+ }
+ $fieldValue = substr($fieldValue, 2); // take off the "CN" prefix
+ $fieldValue = str_replace("-A", "00", $fieldValue); // make amendments really big numbers
+ } elseif ($fieldName == "description") {
+
+ if (is_array($tra['td'])) $fieldValue = print_r($tra['td']['p'],true);
+
+ } elseif ($fieldName == "value" || $fieldName == "supplierABN") {
+ if (is_array($tra['td'])) {
+ $fieldValue = trim(getTextFromNode($tr));
+ }
+ $fieldValue = str_replace(Array("$",","," "), "", $fieldValue);
+ //if (!is_numeric($fieldValue)) $fieldValue = 0;
+ if ($fieldValue == "Exempt") $fieldValue = NULL;
+ } elseif ($fieldName == "amendDate" || $fieldName == "publishDate" || $fieldName == "contractStart" || $fieldName == "contractEnd") {
+ $fieldValue = date('Y-m-d H:i:s', strtotime($fieldValue));
+ } elseif (is_array($tra['td'])) {
+ $fieldValue = trim(getTextFromNode($tr));
+ }
+ echo $fieldName. " = " .$fieldValue."<br>\n";
+ $cnFields[$fieldName] = $fieldValue;
+ }
+}
+$cnFields["importFile"] = $url;
+$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', array_keys($cnFields)) . '") VALUES ( ';
+ for($key = 0; $key < sizeof($cnFields); $key++) {
+ $contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?";
+ }
+ $contractNoticeInsertQ.= ");";
+ //echo $contractNoticeInsertQ;
+ $contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ);
+ $contractNoticeInsertQ->execute(array_values($cnFields));
+ $errors = $conn->errorInfo();
+ if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
+ echo "Dupe {$cnFields['CNID']}<br>";
+ }
+ elseif ($errors[1] == 0) {
+ echo "Success insert {$cnFields['CNID']} <br>";
+ }
+ else {
+ foreach ($cnFields as $key => $cnf) {
+ echo var_dump($key) . $cnf . "<br>";
+ }
+ echo $cnFields['CNID'] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n";
+ }
+$contractNoticeUpdateQ = 'UPDATE contractnotice SET ';
+$count = 0;
+ foreach ($cnFields as $key => $f) {
+
+ $count++;
+ $contractNoticeUpdateQ.= '"'.$key.'"=? '.($count >= sizeof($cnFields) ? "" : ", ");
+ }
+ $contractNoticeUpdateQ.= ' WHERE "CNID"=?;';
+ $cnFields[] = $cnFields["CNID"];
+ //echo $contractNoticeUpdateQ;
+ $contractNoticeUpdateQ = $conn->prepare($contractNoticeUpdateQ);
+ $contractNoticeUpdateQ->execute(array_values($cnFields));
+ $errors = $conn->errorInfo();
+ if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
+ print_r($errors);
+ echo "Dupe update {$cnFields['CNID']}<br>";
+ }
+ elseif ($errors[1] == 0) {
+ echo "Success update {$cnFields['CNID']} <br>";
+ }
+ else {
+ foreach ($cnFields as $key => $cnf) {
+ echo var_dump($key) . $cnf . "<br>";
+ }
+ echo $cnFields['CNID'] . " failed CN update.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n";
+ }
+ }
+}
+function processFile($fpath, $tablename)
+{
+ global $conn;
+ echo " ============== $fpath ============== <br>";
+
+ $handle = fopen($fpath, "r");
+ flush();
+ $row = 1;
+
+ while (($data = fgetcsv($handle, 1000, "\t")) !== false) {
+ if ($row > 3) {
+ $data[0] = trim($data[0], "=");
+ $data[0] = trim($data[0], "\"");
+ if (strpos($data[0], "-A") > 0) {
+ echo "Loading {$data[0]} ... <br>\n";
+ importCN(str_replace("CN","",$data[0]));
+ }
+ }
+ flush();
+ //echo "<hr>\n";
+
+
+ $row++;
+ }
+ fclose($handle);
+}
+$path = './';
+if ($_REQUEST["fname"] == "") {
+ echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>";
+ $dhandle = opendir($path);
+ // define an array to hold the files
+ $files = array();
+ if ($dhandle) {
+ // loop through all of the files
+ while (false !== ($fname = readdir($dhandle))) {
+ if (($fname != '.') && ($fname != '..')) {
+ echo "<a href=\"import.php?fname=$fname\">$fname</a> " . filesize($path . $fname) . " " . date("c", filemtime($path . $fname)) . "<br/>";
+ processFile($path . $fname, "contractnotice");
+ }
+ }
+ }
+}
+else {
+ $success = 0;
+ $fname = $_REQUEST["fname"];
+
+ $success+= processFile($path . $fname, "contractnotice");
+
+}
+
+?>
+
--- a/admin/partialdata/scrapesingle.php
+++ /dev/null
@@ -1,881 +1,1 @@
-<?php
-include_once ("../../lib/common.inc.php");
-$cnid = 1234;
-// http://www.lastcraft.com/browser_documentation.php
-// http://code.google.com/p/phpquery/
-require('phpQuery-onefile.php');
-function dom_to_array($root)
-{
- $result = array();
-
- if ($root->hasAttributes())
- {
- $attrs = $root->attributes;
-
- foreach ($attrs as $i => $attr)
- $result[$attr->name] = $attr->value;
- }
-
- $children = $root->childNodes;
-if ($root->childNodes) {
- if ($children->length == 1)
- {
- $child = $children->item(0);
-
- if ($child->nodeType == XML_TEXT_NODE)
- {
- $result['_value'] = $child->nodeValue;
-
- if (count($result) == 1)
- return $result['_value'];
- else
- return $result;
- }
- }
-
- $group = array();
-
- for($i = 0; $i < $children->length; $i++)
- {
- $child = $children->item($i);
-
- if (!isset($result[$child->nodeName]))
- $result[$child->nodeName] = dom_to_array($child);
- else
- {
- if (!isset($group[$child->nodeName]))
- {
- $tmp = $result[$child->nodeName];
- $result[$child->nodeName] = array($tmp);
- $group[$child->nodeName] = 1;
- }
-
- $result[$child->nodeName][] = dom_to_array($child);
- }
- }
-}
-
- return $result;
-}
-
-$site = "https://www.tenders.gov.au/";
-//$cn = phpQuery::newDocument(file_get_contents("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid));
-$searchResult = phpQuery::newDocument('<table class="four-col">
- <tbody><tr>
- <th>CN ID</th>
-
- <td><a href="/?event=public.cn.view&CNUUID=BB3B13EA-A700-39A5-F003088ACF798AF9">CN1234</a></td>
-
- <th>
- </th><td>
-
- </td></tr>
- <tr>
- <th>Agency</th>
- <td colspan="3">Department of Veterans Affairs</td>
- </tr>
- <tr>
- <th>Publish Date</th>
- <td colspan="3"> 18-Aug-2006 </td>
- </tr>
-
- <tr>
- <th>Category</th>
- <td colspan="3">Computer services</td>
- </tr>
-
- <tr>
- <th>Contract Period</th>
- <td colspan="3"> 1-Aug-2006 to 31-Jul-2007 </td>
- </tr>
- <tr>
- <th>Contract Value (AUD)</th>
- <td colspan="3"> $156,200.00 </td>
- </tr>
-
- <tr>
- <th>Supplier Name</th>
- <td colspan="3">WIZARD INFORMATION SERVICES PTY LTD</td>
- </tr>
- <tr>
- <th></th>
- <td colspan="3">
- <p>
- <span class="last-updated"><strong>Last Updated:</strong> 18-Aug-2006 4:51 pm (ACT Local time)</span>
-
- <strong><a href="/?event=public.cn.view&CNUUID=BB3B13EA-A700-39A5-F003088ACF798AF9">Full Details</a></strong>
-
- </p>
- </td>
- </tr>
- </tbody></table>');
-foreach(pq('a') as $a) {
- if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) echo $a->getAttribute("href");
- break;
-}
-$cn = phpQuery::newDocument('
-
-
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-
-<html lang="en-AU">
-
-<head>
-
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-
- <title>AusTender: Contract Notice View - CN1234</title>
-
-
-
- <meta name="language" content="en-AU">
-
- <meta name="description" content="AusTender provides centralised publication of Australian Government business opportunities, annual procurement plans, multi-use lists and contracts awarded.">
-
- <meta name="keywords" content="tenders, australian government tenders, austender, austenders, australian tenders, federal government tenders">
-
-
-
- <link rel="stylesheet" type="text/css" href="/styles/styles_frontend_main.css" media="all">
-
- <link rel="stylesheet" type="text/css" href="/styles/styles_frontend_print.css" media="print">
-
- <link rel="stylesheet" type="text/css" href="/styles/styles_reports.css" media="all">
-
- <link rel="stylesheet" href="/styles/calendar-atii.css" type="text/css" media="all">
-
- <link rel="shortcut icon" href="/favicon.ico">
-
-
-
-
-
-</head>
-
-
-
-<body>
-
-
-
-<div id="header">
-
- <a href="/"><img src="/images/header_logo.gif" alt="AusTender - The Australian Government Tender System"></a>
-
-</div>
-
-
-
-<form action="./" method="get" id="header-tools" onSubmit="return (this.keyword.value.length != 0)">
-
-
-
- <input
-
- name="event"
-
- id="form-event"
-
- type="hidden"
-
- value="public.advancedsearch.keyword"
-
- >
-
-
-
- <a href="/?event=public.home" id="home-link">Home</a>
-
- <label class="hidden" for="search-text">Search</label>
-
- <input type="text" value="" name="keyword" id="search-text">
-
- <input type="submit" value="Search" id="search-submit">
-
- <a href="?event=public.advancedsearch.home">Advanced Search</a>
-
-</form>
-
-
-
-<div id="left-col-wrapper">
-
-<div id="left-col"><div class="pad">
-
-
-
-<form action="?event=public.login" method="post" enctype="multipart/form-data" id="login-form">
-
- <label for="login-username">Username</label> (email)
-
- <br>
-
- <input type="text" name="pub-auth-username" id="login-username" value="">
-
-
-
- <label for="login-password">Password</label>
-
- <br>
-
- <input type="password" name="pub-auth-password" id="login-password" value="">
-
- <input type="Submit" value="Login" id="login-submit">
-
-
-
- <p>
-
- » <a href="?event=public.registereduser.forgotPassFormStep1">Forgotten password?</a>
-
- <br>
-
- » <a href="?event=public.registereduser.new">New user registration</a>
-
- </p>
-
-
-
-</form>
-
- <h2>View</h2>
-
-
-
- <ul>
-
-
-
- <li>
-
-
-
- <a href="?event=public.atmproposed.list" title="Information about potential procurements prior to their release to the market">Pre-Release Notices</a>
-
- </li>
-
-
-
- <li>
-
-
-
- <a href="?event=public.ATM.list" title="Business opportunities that are currently out to the market">Current ATMs</a>
-
- </li>
-
-
-
- <li>
-
-
-
- <a href="?event=public.ATM.closed" title="Business opportunities that have closed in the last 30 days">Closed ATMs</a>
-
- </li>
-
-
-
- <li>
-
-
-
- <a href="?event=public.CN.search" title="Notices of contracts awarded to suppliers">Contract Notices</a>
-
- </li>
-
-
-
- <li>
-
-
-
- <a href="?event=public.SON.search" title="Notices of standing arrangements with suppliers, including panels and period contracts">Standing Offer Notices</a>
-
- </li>
-
-
-
- <li>
-
-
-
- <a href="?event=public.MUL.list" title="Multi-Use Lists (MULs) currently open for applications for inclusion">Current Multi-Use Lists</a>
-
- </li>
-
-
-
- <li>
-
-
-
- <a href="?event=public.MUL.closed" title="Multi-Use Lists (MULs) that have closed in the last 30 days">Closed Multi-Use Lists</a>
-
- </li>
-
-
-
- <li>
-
-
-
- <a href="?event=public.APP.list" title="Agencies Planned Procurements for the latest financial year">Procurement Plans</a>
-
- </li>
-
-
-
- <li>
-
-
-
- <a href="?event=public.reports.list" title="A list of standard reports on various types of procurement information">Reports</a>
-
- </li>
-
-
-
- </ul>
-
-
-
- <h2>Info & Links</h2>
-
-
-
- <ul>
-
-
-
- <li>
-
-
-
- <a href="?event=public.agency.list" title="A Link to Australian Government agency and department corporate addresses">Agency Addresses</a>
-
- </li>
-
-
-
- <li>
-
-
-
- <a href="?event=public.relatedlink.list" title="Links to other procurement related websites, national and international">Related Links</a>
-
- </li>
-
-
-
- <li>
-
-
-
- <a href="?event=public.help.list" title="A list of frequently asked questions and a link to download the AusTender Public User Guide">Help</a>
-
- </li>
-
-
-
- <li>
-
-
-
- <a href="?event=public.contactus.show" title="Contact information for the AusTender Help Desk">Contact Us</a>
-
- </li>
-
-
-
- </ul>
-
-
-
- <h2>Policies</h2>
-
-
-
- <ul>
-
-
-
- <li>
-
-
-
- <a href="?event=public.policydocs.list" title="Links to Australian Government procurement policy information">Policy Documents</a>
-
- </li>
-
-
-
- <li>
-
-
-
- <a href="?event=public.document.list" title="Australian Government standard form contracts, templates and procurement related guidance">Procurement Document Library</a>
-
- </li>
-
-
-
- <li>
-
-
-
- <a href="?event=public.termsOfUse" title="Conditions of use for both the AusTender website and ATM document distribution">Terms of Use</a>
-
- </li>
-
-
-
- <li>
-
-
-
- <a href="?event=public.privacyStatement" title="The terms under which we manage information relating to and provided by AusTender Registered Users">Privacy Statement</a>
-
- </li>
-
-
-
- </ul>
-
-
-
-</div></div>
-
-
-
-</div>
-
-
-
-
-
-
-
-
-
-<div id="main-content"><div class="pad">
-
-
-
-<ol id="bread-crumbs">
-
-
-
- <li class="first-item">
-
- <a href="./?event=public.home">Home</a>
-
- </li>
-
-
-
- <li> Contract Notice View - CN1234</li>
-
-</ol>
-
-
-
-
-
- <h1> Contract Notice View - CN1234</h1>
-
-
-
-
-
- <div id="container">
-
-
-
-
-
-<div class="content">
-
-
-
- <div id="intro">
-
- <span><p><span>AusTender holds Contract and Standing Offer Notices for the 07/08 financial year forward. For information related to previous years, please contact the AusTender Help Desk.</span></p><span><p><strong><span>Subcontractors: For Commonwealth contracts that started on or after 1 December 2008, agencies are required to provide the names of any associated subcontractors on request. Information on subcontractors can be sought </span><span>directly from the relevant agency through the Agency Contact listed in each Contract Notice.</span></strong></p></span></span>
-
- </div>
-
-
-
-</div>
-
-
-
- </div>
-
- <h2 class="highlight">IMU Contract Programmer: IMU-ICT040 (GAPS ID: 1611946)</h2>
-
-
-
-<table>
-
- <tr>
-
- <th>CN ID</th>
-
-
-
- <td>CN1234</td>
-
-
-
- </tr>
-
- <tr>
-
- <th>Agency</th>
-
- <td>Department of Veterans Affairs</td>
-
- </tr>
-
- <tr>
-
- <th>Publish Date</th>
-
- <td> 18-Aug-2006 </td>
-
- </tr>
-
-
-
- <tr>
-
- <th>Category</th>
-
- <td>Computer services</td>
-
- </tr>
-
- <tr>
-
- <th>Contract Period</th>
-
- <td>
-
- 1-Aug-2006 to 31-Jul-2007
-
- </td>
-
- </tr>
-
- <tr>
-
- <th>Contract Value (AUD)</th>
-
- <td>
-
- $156,200.00
-
- </td>
-
- </tr>
-
-
-
- <tr>
-
- <th>Description</th>
-
- <td>IMU Contract Programmer: IMU-ICT040 (GAPS ID: 1611946)</td>
-
- </tr>
-
-
-
- <tr>
-
- <th>Procurement Method</th>
-
- <td>Open</td>
-
- </tr>
-
-
-
-
-
- <tr>
-
- <th>Confidentiality - Contract</th>
-
- <td>
-
-
-
- </td>
-
- </tr>
-
-
-
- <tr>
-
- <th>Confidentiality - Outputs</th>
-
- <td>
-
- No
-
- </td>
-
- </tr>
-
-
-
- <tr>