single record scraper
[contractdashboard.git] / admin / partialdata / scrapesingle.php
blob:a/admin/partialdata/scrapesingle.php -> blob:b/admin/partialdata/scrapesingle.php
--- a/admin/partialdata/scrapesingle.php
+++ b/admin/partialdata/scrapesingle.php
@@ -1,1 +1,811 @@
-
+<?php
+$cnid = 1234;
+// http://www.lastcraft.com/browser_documentation.php
+// http://code.google.com/p/phpquery/
+require('phpQuery-onefile.php');
+function dom_to_array($root) 
+{ 
+    $result = array(); 
+
+    if ($root->hasAttributes()) 
+    { 
+        $attrs = $root->attributes; 
+
+        foreach ($attrs as $i => $attr) 
+            $result[$attr->name] = $attr->value; 
+    } 
+
+    $children = $root->childNodes; 
+if ($root->childNodes) {
+    if ($children->length == 1) 
+    { 
+        $child = $children->item(0); 
+
+        if ($child->nodeType == XML_TEXT_NODE) 
+        { 
+            $result['_value'] = $child->nodeValue; 
+
+            if (count($result) == 1) 
+                return $result['_value']; 
+            else 
+                return $result; 
+        } 
+    } 
+
+    $group = array(); 
+
+    for($i = 0; $i < $children->length; $i++) 
+    { 
+        $child = $children->item($i); 
+
+        if (!isset($result[$child->nodeName])) 
+            $result[$child->nodeName] = dom_to_array($child); 
+        else 
+        { 
+            if (!isset($group[$child->nodeName])) 
+            { 
+                $tmp = $result[$child->nodeName]; 
+                $result[$child->nodeName] = array($tmp); 
+                $group[$child->nodeName] = 1; 
+            } 
+
+            $result[$child->nodeName][] = dom_to_array($child); 
+        } 
+    } 
+}
+
+    return $result; 
+} 
+
+$site = "https://www.tenders.gov.au/";
+//$cn = phpQuery::newDocument(file_get_contents("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid));
+$searchResult = phpQuery::newDocument('<table class="four-col">
+		<tbody><tr>
+			<th>CN ID</th>
+			
+			<td><a href="/?event=public.cn.view&amp;CNUUID=BB3B13EA-A700-39A5-F003088ACF798AF9">CN1234</a></td>
+			
+				<th>
+				</th><td>
+			
+		</td></tr>
+		<tr>
+			<th>Agency</th>
+			<td colspan="3">Department of Veterans Affairs</td>
+		</tr>
+		<tr>
+			<th>Publish Date</th>
+			<td colspan="3"> 18-Aug-2006 </td>
+		</tr>
+	
+		<tr>
+			<th>Category</th>
+			<td colspan="3">Computer services</td>
+		</tr>
+		
+		<tr>
+			<th>Contract Period</th>
+			<td colspan="3"> 1-Aug-2006  to 31-Jul-2007 </td>
+		</tr>
+		<tr>
+			<th>Contract Value (AUD)</th>
+			<td colspan="3"> $156,200.00 </td>
+		</tr>
+		
+		<tr>
+			<th>Supplier Name</th>
+			<td colspan="3">WIZARD INFORMATION SERVICES PTY LTD</td>
+		</tr>
+		<tr>
+			<th></th>
+			<td colspan="3">
+				<p>
+					<span class="last-updated"><strong>Last Updated:</strong> 18-Aug-2006 4:51 pm  (ACT Local time)</span>
+					
+					<strong><a href="/?event=public.cn.view&amp;CNUUID=BB3B13EA-A700-39A5-F003088ACF798AF9">Full Details</a></strong>
+					
+				</p>
+			</td>
+		</tr>
+	</tbody></table>');
+foreach(pq('a') as $a) {
+	if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) echo  $a->getAttribute("href");
+	break;
+}
+$cn = phpQuery::newDocument('
+
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+
+<html lang="en-AU">
+
+<head>
+
+	<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+
+	<title>AusTender: Contract Notice View - CN1234</title>
+
+	
+
+	<meta name="language" content="en-AU">
+
+	<meta name="description" content="AusTender provides centralised publication of Australian Government business opportunities, annual procurement plans, multi-use lists and contracts awarded.">
+
+	<meta name="keywords" content="tenders, australian government tenders, austender, austenders, australian tenders, federal government tenders">
+
+
+
+	<link rel="stylesheet" type="text/css" href="/styles/styles_frontend_main.css" media="all">
+
+	<link rel="stylesheet" type="text/css" href="/styles/styles_frontend_print.css" media="print">
+
+	<link rel="stylesheet" type="text/css" href="/styles/styles_reports.css" media="all">
+
+	<link rel="stylesheet" href="/styles/calendar-atii.css" type="text/css" media="all">
+
+	<link rel="shortcut icon" href="/favicon.ico">
+
+	
+
+	
+
+</head>
+
+
+
+<body>
+
+
+
+<div id="header">
+
+	<a href="/"><img src="/images/header_logo.gif" alt="AusTender - The Australian Government Tender System"></a>
+
+</div>
+
+
+
+<form action="./" method="get" id="header-tools" onSubmit="return (this.keyword.value.length != 0)">
+
+	
+
+	<input
+
+		name="event"
+
+		id="form-event"
+
+		type="hidden"
+
+		value="public.advancedsearch.keyword"
+
+		>
+
+
+
+	<a href="/?event=public.home" id="home-link">Home</a>
+
+	<label class="hidden" for="search-text">Search</label>
+
+	<input type="text" value="" name="keyword" id="search-text">
+
+	<input type="submit" value="Search" id="search-submit">
+
+	<a href="?event=public.advancedsearch.home">Advanced Search</a>
+
+</form>
+
+
+
+<div id="left-col-wrapper">
+
+<div id="left-col"><div class="pad">
+
+	
+
+<form action="?event=public.login" method="post" enctype="multipart/form-data" id="login-form">
+
+	<label for="login-username">Username</label> (email)
+
+	<br>
+
+	<input type="text" name="pub-auth-username" id="login-username" value="">
+
+
+
+	<label for="login-password">Password</label>
+
+	<br>
+
+	<input type="password" name="pub-auth-password" id="login-password" value="">
+
+	<input type="Submit" value="Login" id="login-submit">
+
+
+
+	<p>
+
+		&raquo; <a href="?event=public.registereduser.forgotPassFormStep1">Forgotten password?</a>
+
+		<br>
+
+		&raquo; <a href="?event=public.registereduser.new">New user registration</a>
+
+	</p>
+
+
+
+</form> 
+
+			<h2>View</h2>
+
+		
+
+			<ul>
+
+			
+
+					<li>
+
+				
+
+					<a href="?event=public.atmproposed.list" title="Information about potential procurements prior to their release to the market">Pre-Release Notices</a>
+
+				</li>
+
+			
+
+					<li>
+
+				
+
+					<a href="?event=public.ATM.list" title="Business opportunities that are currently out to the market">Current ATMs</a>
+
+				</li>
+
+			
+
+					<li>
+
+				
+
+					<a href="?event=public.ATM.closed" title="Business opportunities that have closed in the last 30 days">Closed ATMs</a>
+
+				</li>
+
+			
+
+					<li>
+
+				
+
+					<a href="?event=public.CN.search" title="Notices of contracts awarded to suppliers">Contract Notices</a>
+
+				</li>
+
+			
+
+					<li>
+
+				
+
+					<a href="?event=public.SON.search" title="Notices of standing arrangements with suppliers, including panels and period contracts">Standing Offer Notices</a>
+
+				</li>
+
+			
+
+					<li>
+
+				
+
+					<a href="?event=public.MUL.list" title="Multi-Use Lists (MULs) currently open for applications for inclusion">Current Multi-Use Lists</a>
+
+				</li>
+
+			
+
+					<li>
+
+				
+
+					<a href="?event=public.MUL.closed" title="Multi-Use Lists (MULs) that have closed in the last 30 days">Closed Multi-Use Lists</a>
+
+				</li>
+
+			
+
+					<li>
+
+				
+
+					<a href="?event=public.APP.list" title="Agencies Planned Procurements for the latest financial year">Procurement Plans</a>
+
+				</li>
+
+			
+
+					<li>
+
+				
+
+					<a href="?event=public.reports.list" title="A list of standard reports on various types of procurement information">Reports</a>
+
+				</li>
+
+			
+
+			</ul>
+
+		
+
+			<h2>Info &amp; Links</h2>
+
+		
+
+			<ul>
+
+			
+
+					<li>
+
+				
+
+					<a href="?event=public.agency.list" title="A Link to Australian Government agency and department corporate addresses">Agency Addresses</a>
+
+				</li>
+
+			
+
+					<li>
+
+				
+
+					<a href="?event=public.relatedlink.list" title="Links to other procurement related websites, national and international">Related Links</a>
+
+				</li>
+
+			
+
+					<li>
+
+				
+
+					<a href="?event=public.help.list" title="A list of frequently asked questions and a link to download the AusTender Public User Guide">Help</a>
+
+				</li>
+
+			
+
+					<li>
+
+				
+
+					<a href="?event=public.contactus.show" title="Contact information for the AusTender Help Desk">Contact Us</a>
+
+				</li>
+
+			
+
+			</ul>
+
+		
+
+			<h2>Policies</h2>
+
+		
+
+			<ul>
+
+			
+
+					<li>
+
+				
+
+					<a href="?event=public.policydocs.list" title="Links to Australian Government procurement policy information">Policy Documents</a>
+
+				</li>
+
+			
+
+					<li>
+
+				
+
+					<a href="?event=public.document.list" title="Australian Government standard form contracts, templates and procurement related guidance">Procurement Document Library</a>
+
+				</li>
+
+			
+
+					<li>
+
+				
+
+					<a href="?event=public.termsOfUse" title="Conditions of use for both the AusTender website and ATM document distribution">Terms of Use</a>
+
+				</li>
+
+			
+
+					<li>
+
+				
+
+					<a href="?event=public.privacyStatement" title="The terms under which we manage information relating to and provided by AusTender Registered Users">Privacy Statement</a>
+
+				</li>
+
+			
+
+			</ul>
+
+		
+
+</div></div>
+
+
+
+</div>
+
+
+
+
+
+
+
+
+
+<div id="main-content"><div class="pad">
+
+	
+
+<ol id="bread-crumbs">
+
+	
+
+			<li class="first-item">
+
+				<a href="./?event=public.home">Home</a>
+
+			</li>
+
+		
+
+	<li> Contract Notice View - CN1234</li>
+
+</ol>
+
+
+
+
+
+	<h1> Contract Notice View - CN1234</h1>
+
+
+
+	
+
+		<div id="container">
+
+        
+
+
+
+<div class="content">
+
+
+
+	<div id="intro">
+
+		<span><p><span>AusTender holds Contract and Standing Offer Notices for the 07/08 financial year forward. For information related to previous years, please contact the AusTender Help Desk.</span></p><span><p><strong><span>Subcontractors:&nbsp; For Commonwealth contracts that started on or after 1 December 2008, agencies are required to provide the names of any associated subcontractors on request.&nbsp; Information&nbsp;on subcontractors&nbsp;can be&nbsp;sought&nbsp;</span><span>directly from the relevant agency through the&nbsp;Agency Contact listed in each Contract Notice.</span></strong></p></span></span> 
+
+	</div>
+
+
+
+</div>
+
+
+
+		</div>
+
+	<h2 class="highlight">IMU Contract Programmer: IMU-ICT040 (GAPS ID: 1611946)</h2>
+
+
+
+<table>
+
+	<tr>
+
+		<th>CN ID</th>
+
+		
+
+		<td>CN1234</td>
+
+		
+
+	</tr>
+
+	<tr>
+
+		<th>Agency</th>
+
+		<td>Department of Veterans Affairs</td>
+
+	</tr>
+
+	<tr>
+
+		<th>Publish Date</th>
+
+		<td> 18-Aug-2006 </td>
+
+	</tr>
+
+	
+
+	<tr>
+
+		<th>Category</th>
+
+		<td>Computer services</td>
+
+	</tr>
+
+	<tr>
+
+		<th>Contract Period</th>
+
+		<td>
+
+			1-Aug-2006  to 31-Jul-2007 
+
+		</td>
+
+	</tr>	
+
+		<tr>
+
+			<th>Contract Value (AUD)</th>
+
+		<td>
+
+			$156,200.00 
+
+		</td>
+
+		</tr>
+
+	
+
+	<tr>
+
+		<th>Description</th>
+
+		<td>IMU Contract Programmer: IMU-ICT040 (GAPS ID: 1611946)</td>
+
+	</tr>
+
+	
+
+	<tr>
+
+		<th>Procurement Method</th>
+
+		<td>Open</td>
+
+	</tr>
+
+	
+
+
+
+	<tr>
+
+		<th>Confidentiality - Contract</th>
+
+		<td>
+
+			
+
+		</td>
+
+	</tr>
+
+	
+
+	<tr>
+
+		<th>Confidentiality - Outputs</th>
+
+		<td>
+
+			No 
+
+		</td>
+
+	</tr>
+
+	
+
+			<tr>
+
+				<th>Consultancy</th>
+
+				<td>No</td>
+
+			</tr>
+
+		
+
+
+
+</table>
+
+
+
+<h2>Supplier Details</h2>
+
+<table>
+
+	<tr>
+
+		<th>Name</th>
+
+		<td>
+
+			WIZARD INFORMATION SERVICES PTY LTD 
+
+		</td>
+
+	</tr>
+
+	
+
+	<tr>
+
+		<th>Postal Address</th>
+
+		<td>GPO Box 2700</td>
+
+	</tr>
+
+	<tr>
+
+		<th>Town/City</th>
+
+		<td>CANBERRA CITY</td>
+
+	</tr>
+
+	<tr>
+
+		<th>Postcode</th>
+
+		<td>2601</td>
+
+	</tr>
+
+	<tr>
+
+		<th>State/Territory</th>
+
+		<td>ACT</td>
+
+	</tr>
+
+	<tr>
+
+		<th>Country</th>
+
+		<td>Australia</td>
+
+	</tr>
+
+	<tr>
+
+		<th>ABN</th>		
+
+		<td>
+
+			47 008 617 561 
+
+		</td>
+
+		
+
+	</tr>
+
+</table>
+
+
+
+
+
+
+
+<h2>Agency Details</h2>
+
+
+
+<table class="medium-th">
+
+	<tr>
+
+		<th>Contact Name/th>
+
+		<td>Monico, Raymond J</td>
+
+	</tr>
+
+	
+
+	<tr>
+
+		<th>Contact Phone</th>
+
+		<td>(02) 6289 6016</td>
+
+	</tr>
+
+	
+
+	<tr>
+
+		<th>Branch</th>
+
+		<td>Information Management Unit</td>
+
+	</tr>
+
+	
+
+	<tr>
+
+		<th>Division</th>
+
+		<td>National Office Compensation Division</td>
+
+	</tr>
+
+	
+
+	<tr>
+
+		<th>Office Postcode</th>
+
+		<td>2606</td>
+
+	</tr>
+
+	<tr>
+
+		<th>Agency Reference ID</th>
+
+		<td>107460</td>
+
+	</tr>
+
+</table> </div></div>
+
+
+
+
+
+
+
+</body>
+
+</html> ');
+
+foreach(pq('tr') as $tr) {
+	$tra = dom_to_array($tr);
+	$tra['th'] = trim(str_replace("/th>","",$tra['th']));
+echo $tra['th']. " = " .trim(print_r($tra['td'],true))."<br>\n";
+
+}
+
+?>