From: Maxious Date: Mon, 08 Aug 2011 10:55:08 +0000 Subject: Amendment partial record importer X-Git-Url: https://maxious.lambdacomplex.org/git/?p=contractdashboard.git&a=commitdiff&h=0faa2cf200c634ea12f76f9d1b2ae437d46d2aee --- Amendment partial record importer --- --- /dev/null +++ b/admin/partialdata/importamendments.php @@ -1,1 +1,315 @@ - + Database temporarily unavailable: "; + echo curl_errno($ch) . " " . curl_error($ch); + + echo $url; + echo "
"; + } + curl_close($ch); + return $page; +} + + +function getTextFromTHNode($Node, $Text = "") { + if ($Node->tagName == null) + return $Text.$Node->textContent; + +if ($Node->tagName != "td") { + $Node = $Node->firstChild; + if ($Node != null) + $Text = getTextFromTHNode($Node, $Text); + + while($Node->nextSibling != null) { + $Text = getTextFromTHNode($Node->nextSibling, $Text); + $Node = $Node->nextSibling; + } +} + return $Text; +} + +function getTextFromNode($Node, $Text = "") { + if ($Node->tagName == null) + return $Text.$Node->textContent; +if ($Node->tagName != "th" && $Node->tagName != "span") { + $Node = $Node->firstChild; + if ($Node != null) + $Text = getTextFromNode($Node, $Text); + + while($Node->nextSibling != null) { + $Text = getTextFromNode($Node->nextSibling, $Text); + $Node = $Node->nextSibling; + } +} + return $Text; +} +function dom_to_array($root) +{ + $result = array(); + + if ($root->hasAttributes()) + { + $attrs = $root->attributes; + + foreach ($attrs as $i => $attr) + $result[$attr->name] = $attr->value; + } + + $children = $root->childNodes; +if ($root->childNodes) { + if ($children->length == 1) + { + $child = $children->item(0); + + if ($child->nodeType == XML_TEXT_NODE) + { + $result['_value'] = $child->nodeValue; + + if (count($result) == 1) + return $result['_value']; + else + return $result; + } + } + + $group = array(); + + for($i = 0; $i < $children->length; $i++) + { + $child = $children->item($i); + + if (!isset($result[$child->nodeName])) + $result[$child->nodeName] = dom_to_array($child); + else + { + if (!isset($group[$child->nodeName])) + { + $tmp = $result[$child->nodeName]; + $result[$child->nodeName] = array($tmp); + $group[$child->nodeName] = 1; + } + + $result[$child->nodeName][] = dom_to_array($child); + } + } +} + + return $result; +} + +function importCN($cnid) { + global $conn; + $CN = str_replace("-A", "00",$cnid); + // check if already complete + $query = 'Select "parentCN" from contractnotice + where "CNID" = :CNID'; + $query = $conn->prepare($query); + $query->bindParam(":CNID", $CN); + $query->execute(); + $r = $query->fetch(PDO::FETCH_ASSOC); + if ($r['parentCN'] == NULL) { +$site = "https://www.tenders.gov.au/"; +$searchResult = phpQuery::newDocument(getURL("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid)); +//echo "https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid; +$url = ""; +foreach(pq('a') as $a) { + if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) { + //echo $a->getAttribute("href"); + $url = $a->getAttribute("href"); + break; + } +} +$cn = phpQuery::newDocument(getURL($site.$url)); + $datamapping0711 = array( + "Agency" => "agencyName", + "Parent CN" => "parentCN", + "CN ID" => "CNID", + "Publish Date" => "publishDate", + "Amendment Date" => "amendDate", + "Status" => "", + "StartDate" => "contractStart", + "EndDate" => "contractEnd", + "Contract Value (AUD)" => "value", + "Description" => "description", + "Agency Reference ID" => "agencyID", + "Category" => "category", + "Procurement Method" => "procurementMethod", + "ATM ID" => "atmID", + "SON ID" => "SONID", + "Confidentiality - Contract" => "confidentialityContract", + "Confidentiality Reason(s) - Contract" => "confidentialityContractReason", + "Confidentiality - Outputs" => "confidentialityOutputs", + "Confidentiality Reason(s) - Outputs" => "confidentialityOutputsReason", + "Consultancy" => "consultancy", + "Consultancy Reason(s)" => "consultancyReason", + "Amendment Reason" => "amendmentReason", + "Name" => "supplierName", + "Postal Address" => "supplierAddress", + "Town/City" => "supplierCity", + "Postcode" => "supplierPostcode", + "Country" => "supplierCountry", + "ABN Exempt" => "supplierABNExempt", + "ABN" => "supplierABN", + "Branch" => "contactBranch", + "Division" => "contactDivision", + "Office Postcode" => "contactPostcode" + ); +$cnFields = Array(); +foreach(pq('tr') as $tr) { + $tra = dom_to_array($tr); + if (is_array($tra['th'])) { + $fieldName = trim(getTextFromTHNode($tr)); + } else { + $fieldName = trim(str_replace("/th>","",$tra['th'])); + } + $fieldValue = trim(print_r($tra['td'],true)); + if ($fieldName == "State/Territory" || $fieldName == "Contact Name" + || $fieldName == "Contact Phone" || $fieldName == "Contact Email" + ||$fieldName == "Amendments") { + // do nothing + } else if ($fieldName == "Contract Period") { + $contractPeriod = explode("to",$fieldValue); + $cnFields["contractStart"] = trim($contractPeriod[0]); + $cnFields["contractEnd"] = trim($contractPeriod[1]); + } else { + $fieldName = $datamapping0711[$fieldName]; + + if ($fieldName == "parentCN" || $fieldName == "CNID") { + if (is_array($tra['td'])) { + $fieldValue = trim(getTextFromNode($tr)); + } + $fieldValue = substr($fieldValue, 2); // take off the "CN" prefix + $fieldValue = str_replace("-A", "00", $fieldValue); // make amendments really big numbers + } elseif ($fieldName == "description") { + + if (is_array($tra['td'])) $fieldValue = print_r($tra['td']['p'],true); + + } elseif ($fieldName == "value" || $fieldName == "supplierABN") { + if (is_array($tra['td'])) { + $fieldValue = trim(getTextFromNode($tr)); + } + $fieldValue = str_replace(Array("$",","," "), "", $fieldValue); + //if (!is_numeric($fieldValue)) $fieldValue = 0; + if ($fieldValue == "Exempt") $fieldValue = NULL; + } elseif ($fieldName == "amendDate" || $fieldName == "publishDate" || $fieldName == "contractStart" || $fieldName == "contractEnd") { + $fieldValue = date('Y-m-d H:i:s', strtotime($fieldValue)); + } elseif (is_array($tra['td'])) { + $fieldValue = trim(getTextFromNode($tr)); + } + echo $fieldName. " = " .$fieldValue."
\n"; + $cnFields[$fieldName] = $fieldValue; + } +} +$cnFields["importFile"] = $url; +$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', array_keys($cnFields)) . '") VALUES ( '; + for($key = 0; $key < sizeof($cnFields); $key++) { + $contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?"; + } + $contractNoticeInsertQ.= ");"; + //echo $contractNoticeInsertQ; + $contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ); + $contractNoticeInsertQ->execute(array_values($cnFields)); + $errors = $conn->errorInfo(); + if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) { + echo "Dupe {$cnFields['CNID']}
"; + } + elseif ($errors[1] == 0) { + echo "Success insert {$cnFields['CNID']}
"; + } + else { + foreach ($cnFields as $key => $cnf) { + echo var_dump($key) . $cnf . "
"; + } + echo $cnFields['CNID'] . " failed CN insert.
" . print_r($errors, true) . "
row $row

\n"; + } +$contractNoticeUpdateQ = 'UPDATE contractnotice SET '; +$count = 0; + foreach ($cnFields as $key => $f) { + + $count++; + $contractNoticeUpdateQ.= '"'.$key.'"=? '.($count >= sizeof($cnFields) ? "" : ", "); + } + $contractNoticeUpdateQ.= ' WHERE "CNID"=?;'; + $cnFields[] = $cnFields["CNID"]; + //echo $contractNoticeUpdateQ; + $contractNoticeUpdateQ = $conn->prepare($contractNoticeUpdateQ); + $contractNoticeUpdateQ->execute(array_values($cnFields)); + $errors = $conn->errorInfo(); + if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) { + print_r($errors); + echo "Dupe update {$cnFields['CNID']}
"; + } + elseif ($errors[1] == 0) { + echo "Success update {$cnFields['CNID']}
"; + } + else { + foreach ($cnFields as $key => $cnf) { + echo var_dump($key) . $cnf . "
"; + } + echo $cnFields['CNID'] . " failed CN update.
" . print_r($errors, true) . "
row $row

\n"; + } + } +} +function processFile($fpath, $tablename) +{ + global $conn; + echo " ============== $fpath ==============
"; + + $handle = fopen($fpath, "r"); + flush(); + $row = 1; + + while (($data = fgetcsv($handle, 1000, "\t")) !== false) { + if ($row > 3) { + $data[0] = trim($data[0], "="); + $data[0] = trim($data[0], "\""); + if (strpos($data[0], "-A") > 0) { + echo "Loading {$data[0]} ...
\n"; + importCN(str_replace("CN","",$data[0])); + } + } + flush(); + //echo "
\n"; + + + $row++; + } + fclose($handle); +} +$path = './'; +if ($_REQUEST["fname"] == "") { + echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list
"; + $dhandle = opendir($path); + // define an array to hold the files + $files = array(); + if ($dhandle) { + // loop through all of the files + while (false !== ($fname = readdir($dhandle))) { + if (($fname != '.') && ($fname != '..')) { + echo "$fname " . filesize($path . $fname) . " " . date("c", filemtime($path . $fname)) . "
"; + processFile($path . $fname, "contractnotice"); + } + } + } +} +else { + $success = 0; + $fname = $_REQUEST["fname"]; + + $success+= processFile($path . $fname, "contractnotice"); + +} + +?> --- a/admin/partialdata/scrapesingle.php +++ /dev/null @@ -1,881 +1,1 @@ -hasAttributes()) - { - $attrs = $root->attributes; - - foreach ($attrs as $i => $attr) - $result[$attr->name] = $attr->value; - } - - $children = $root->childNodes; -if ($root->childNodes) { - if ($children->length == 1) - { - $child = $children->item(0); - - if ($child->nodeType == XML_TEXT_NODE) - { - $result['_value'] = $child->nodeValue; - - if (count($result) == 1) - return $result['_value']; - else - return $result; - } - } - - $group = array(); - - for($i = 0; $i < $children->length; $i++) - { - $child = $children->item($i); - - if (!isset($result[$child->nodeName])) - $result[$child->nodeName] = dom_to_array($child); - else - { - if (!isset($group[$child->nodeName])) - { - $tmp = $result[$child->nodeName]; - $result[$child->nodeName] = array($tmp); - $group[$child->nodeName] = 1; - } - - $result[$child->nodeName][] = dom_to_array($child); - } - } -} - - return $result; -} - -$site = "https://www.tenders.gov.au/"; -//$cn = phpQuery::newDocument(file_get_contents("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid)); -$searchResult = phpQuery::newDocument(' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CN IDCN1234 - - -
AgencyDepartment of Veterans Affairs
Publish Date 18-Aug-2006
CategoryComputer services
Contract Period 1-Aug-2006 to 31-Jul-2007
Contract Value (AUD) $156,200.00
Supplier NameWIZARD INFORMATION SERVICES PTY LTD
-

- Last Updated: 18-Aug-2006 4:51 pm (ACT Local time) - - Full Details - -

-
'); -foreach(pq('a') as $a) { - if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) echo $a->getAttribute("href"); - break; -} -$cn = phpQuery::newDocument(' - - - - - - - - - - - AusTender: Contract Notice View - CN1234 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - Home - - - - - - - - Advanced Search - -
- - - -
- -
- - - -
- - (email) - -
- - - - - - - -
- - - - - - - -

- - » Forgotten password? - -
- - » New user registration - -

- - - -
- -

View

- - - - - - - -

Info & Links

- - - - - - - -

Policies

- - - - - - - -
- - - -
- - - - - - - - - -
- - - - - - - - - -

Contract Notice View - CN1234

- - - - - -
- - - - - -
- - - -
- -

AusTender holds Contract and Standing Offer Notices for the 07/08 financial year forward. For information related to previous years, please contact the AusTender Help Desk.

Subcontractors:  For Commonwealth contracts that started on or after 1 December 2008, agencies are required to provide the names of any associated subcontractors on request.  Information on subcontractors can be sought directly from the relevant agency through the Agency Contact listed in each Contract Notice.

- -
- - - -
- - - -
- -

IMU Contract Programmer: IMU-ICT040 (GAPS ID: 1611946)

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CN IDCN1234
AgencyDepartment of Veterans Affairs
Publish Date 18-Aug-2006
CategoryComputer services
Contract Period - - 1-Aug-2006 to 31-Jul-2007 - -
Contract Value (AUD) - - $156,200.00 - -
DescriptionIMU Contract Programmer: IMU-ICT040 (GAPS ID: 1611946)
Procurement MethodOpen
Confidentiality - Contract - - - -
Confidentiality - Outputs - - No - -
ConsultancyNo
- - - -

Supplier Details

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Name - - WIZARD INFORMATION SERVICES PTY LTD - -
Postal AddressGPO Box 2700
Town/CityCANBERRA CITY
Postcode2601
State/TerritoryACT
CountryAustralia
ABN - - 47 008 617 561 - -
- - - - - - - -

Agency Details

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Contact Name/th> - - Monico, Raymond J
Contact Phone(02) 6289 6016
BranchInformation Management Unit
DivisionNational Office Compensation Division
Office Postcode2606
Agency Reference ID107460
- - - - - - - - - - '); - $datamapping0711 = array( - "Agency" => "agencyName", - "Parent CN ID" => "parentCN", - "CN ID" => "CNID", - "Publish Date" => "publishDate", - "Amendment Date" => "amendDate", - "Status" => "", - "StartDate" => "contractStart", - "EndDate" => "contractEnd", - "Contract Value (AUD)" => "value", - "Description" => "description", - "Agency Reference ID" => "agencyID", - "Category" => "category", - "Procurement Method" => "procurementMethod", - "ATM ID" => "atmID", - "SON ID" => "SONID", - "Confidentiality - Contract" => "confidentialityContract", - "Confidentiality - Contract Reason(s)" => "confidentialityContractReason", - "Confidentiality - Outputs" => "confidentialityOutputs", - "Confidentiality - Outputs Reason(s)" => "confidentialityOutputsReason", - "Consultancy" => "consultancy", - "Consultancy Reason(s)" => "consultancyReason", - "Amendment Reason" => "amendmentReason", - "Name" => "supplierName", - "Postal Address" => "supplierAddress", - "Town/City" => "supplierCity", - "Postcode" => "supplierPostcode", - "Country" => "supplierCountry", - "ABN Exempt" => "supplierABNExempt", - "ABN" => "supplierABN", - "Branch" => "contactBranch", - "Division" => "contactDivision", - "Office Postcode" => "contactPostcode" - ); -$cnFields = Array(); -foreach(pq('tr') as $tr) { - $tra = dom_to_array($tr); - $fieldName = trim(str_replace("/th>","",$tra['th'])); - $fieldValue = trim(print_r($tra['td'],true)); - if ($fieldName == "State/Territory" || $fieldName == "Contact Name" || $fieldName == "Contact Phone") { - // do nothing - } else if ($fieldName == "Contract Period") { - $contractPeriod = explode("to",$fieldValue); - $cnFields["contractStart"] = trim($contractPeriod[0]); - $cnFields["contractEnd"] = trim($contractPeriod[1]); - } else { - $fieldName = $datamapping0711[$fieldName]; - if ($fieldName == "parentCN" || $fieldName == "CNID") { - $fieldValue = substr($fieldValue, 2); // take off the "CN" prefix - $fieldValue = str_replace("-A", "00", $fieldValue); // make amendments really big numbers - } elseif ($fieldName == "amendDate" || $fieldName == "publishDate" || $fieldName == "contractStart" || $fieldName == "contractEnd") { - $contractNoticeInsert[] = date('Y-m-d H:i:s', strtotime($fieldValue)); - } -echo $fieldName. " = " .$fieldValue."
\n"; -$cnFields[$fieldName] = $fieldValue; - } -} -$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', array_keys($cnFields)) . '") VALUES ( '; - for($key = 0; $key < sizeof($cnFields); $key++) { - $contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?"; - } - $contractNoticeInsertQ.= ");"; - echo $contractNoticeInsertQ; - //$contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ); - -$contractNoticeUpdateQ = 'UPDATE contractnotice SET '; -$count = 0; - foreach ($cnFields as $key => $f) { - - $count++; - $contractNoticeUpdateQ.= $key."=? ".($count >= sizeof($cnFields) ? "" : ", "); - } - $contractNoticeUpdateQ.= " WHERE CNID=?;"; - $cnFields[] = $cnFields["CNID"]; - echo $contractNoticeUpdateQ; - $contractNoticeUpdateQ = $conn->prepare($contractNoticeUpdateQ); -?>