single record scraper
[contractdashboard.git] / admin / partialdata / scrapesingle.php
blob:a/admin/partialdata/scrapesingle.php -> blob:b/admin/partialdata/scrapesingle.php
--- a/admin/partialdata/scrapesingle.php
+++ b/admin/partialdata/scrapesingle.php
@@ -1,1 +1,811 @@
+$cnid = 1234;
+function dom_to_array($root) 
+    $result = array(); 
+    if ($root->hasAttributes()) 
+    { 
+        $attrs = $root->attributes; 
+        foreach ($attrs as $i => $attr) 
+            $result[$attr->name] = $attr->value; 
+    } 
+    $children = $root->childNodes; 
+if ($root->childNodes) {
+    if ($children->length == 1) 
+    { 
+        $child = $children->item(0); 
+        if ($child->nodeType == XML_TEXT_NODE) 
+        { 
+            $result['_value'] = $child->nodeValue; 
+            if (count($result) == 1) 
+                return $result['_value']; 
+            else 
+                return $result; 
+        } 
+    } 
+    $group = array(); 
+    for($i = 0; $i < $children->length; $i++) 
+    { 
+        $child = $children->item($i); 
+        if (!isset($result[$child->nodeName])) 
+            $result[$child->nodeName] = dom_to_array($child); 
+        else 
+        { 
+            if (!isset($group[$child->nodeName])) 
+            { 
+                $tmp = $result[$child->nodeName]; 
+                $result[$child->nodeName] = array($tmp); 
+                $group[$child->nodeName] = 1; 
+            } 
+            $result[$child->nodeName][] = dom_to_array($child); 
+        } 
+    } 
+    return $result; 
+$site = "";
+//$cn = phpQuery::newDocument(file_get_contents("".$cnid));
+$searchResult = phpQuery::newDocument('<table class="four-col">
+		<tbody><tr>
+			<th>CN ID</th>
+			<td><a href="/?;CNUUID=BB3B13EA-A700-39A5-F003088ACF798AF9">CN1234</a></td>
+				<th>
+				</th><td>
+		</td></tr>
+		<tr>
+			<th>Agency</th>
+			<td colspan="3">Department of Veterans Affairs</td>
+		</tr>
+		<tr>
+			<th>Publish Date</th>
+			<td colspan="3"> 18-Aug-2006 </td>
+		</tr>
+		<tr>
+			<th>Category</th>
+			<td colspan="3">Computer services</td>
+		</tr>
+		<tr>
+			<th>Contract Period</th>
+			<td colspan="3"> 1-Aug-2006  to 31-Jul-2007 </td>
+		</tr>
+		<tr>
+			<th>Contract Value (AUD)</th>
+			<td colspan="3"> $156,200.00 </td>
+		</tr>
+		<tr>
+			<th>Supplier Name</th>
+		</tr>
+		<tr>
+			<th></th>
+			<td colspan="3">
+				<p>
+					<span class="last-updated"><strong>Last Updated:</strong> 18-Aug-2006 4:51 pm  (ACT Local time)</span>
+					<strong><a href="/?;CNUUID=BB3B13EA-A700-39A5-F003088ACF798AF9">Full Details</a></strong>
+				</p>
+			</td>
+		</tr>
+	</tbody></table>');
+foreach(pq('a') as $a) {
+	if (strpos($a->getAttribute("href"),"") >0 ) echo  $a->getAttribute("href");
+	break;
+$cn = phpQuery::newDocument('
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "">
+<html lang="en-AU">
+	<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+	<title>AusTender: Contract Notice View - CN1234</title>
+	<meta name="language" content="en-AU">
+	<meta name="description" content="AusTender provides centralised publication of Australian Government business opportunities, annual procurement plans, multi-use lists and contracts awarded.">
+	<meta name="keywords" content="tenders, australian government tenders, austender, austenders, australian tenders, federal government tenders">
+	<link rel="stylesheet" type="text/css" href="/styles/styles_frontend_main.css" media="all">
+	<link rel="stylesheet" type="text/css" href="/styles/styles_frontend_print.css" media="print">
+	<link rel="stylesheet" type="text/css" href="/styles/styles_reports.css" media="all">
+	<link rel="stylesheet" href="/styles/calendar-atii.css" type="text/css" media="all">
+	<link rel="shortcut icon" href="/favicon.ico">
+<div id="header">
+	<a href="/"><img src="/images/header_logo.gif" alt="AusTender - The Australian Government Tender System"></a>
+<form action="./" method="get" id="header-tools" onSubmit="return (this.keyword.value.length != 0)">
+	<input
+		name="event"
+		id="form-event"
+		type="hidden"
+		value="public.advancedsearch.keyword"
+		>
+	<a href="/?event=public.home" id="home-link">Home</a>
+	<label class="hidden" for="search-text">Search</label>
+	<input type="text" value="" name="keyword" id="search-text">
+	<input type="submit" value="Search" id="search-submit">
+	<a href="?event=public.advancedsearch.home">Advanced Search</a>
+<div id="left-col-wrapper">
+<div id="left-col"><div class="pad">
+<form action="?event=public.login" method="post" enctype="multipart/form-data" id="login-form">
+	<label for="login-username">Username</label> (email)
+	<br>
+	<input type="text" name="pub-auth-username" id="login-username" value="">
+	<label for="login-password">Password</label>
+	<br>
+	<input type="password" name="pub-auth-password" id="login-password" value="">
+	<input type="Submit" value="Login" id="login-submit">
+	<p>
+		&raquo; <a href="?event=public.registereduser.forgotPassFormStep1">Forgotten password?</a>
+		<br>
+		&raquo; <a href="?">New user registration</a>
+	</p>
+			<h2>View</h2>
+			<ul>
+					<li>
+					<a href="?event=public.atmproposed.list" title="Information about potential procurements prior to their release to the market">Pre-Release Notices</a>
+				</li>
+					<li>
+					<a href="?event=public.ATM.list" title="Business opportunities that are currently out to the market">Current ATMs</a>
+				</li>
+					<li>
+					<a href="?event=public.ATM.closed" title="Business opportunities that have closed in the last 30 days">Closed ATMs</a>
+				</li>
+					<li>
+					<a href="?" title="Notices of contracts awarded to suppliers">Contract Notices</a>
+				</li>
+					<li>
+					<a href="?" title="Notices of standing arrangements with suppliers, including panels and period contracts">Standing Offer Notices</a>
+				</li>
+					<li>
+					<a href="?event=public.MUL.list" title="Multi-Use Lists (MULs) currently open for applications for inclusion">Current Multi-Use Lists</a>
+				</li>
+					<li>
+					<a href="?event=public.MUL.closed" title="Multi-Use Lists (MULs) that have closed in the last 30 days">Closed Multi-Use Lists</a>
+				</li>
+					<li>
+					<a href="?event=public.APP.list" title="Agencies Planned Procurements for the latest financial year">Procurement Plans</a>
+				</li>
+					<li>
+					<a href="?event=public.reports.list" title="A list of standard reports on various types of procurement information">Reports</a>
+				</li>
+			</ul>
+			<h2>Info &amp; Links</h2>
+			<ul>
+					<li>
+					<a href="?" title="A Link to Australian Government agency and department corporate addresses">Agency Addresses</a>
+				</li>
+					<li>
+					<a href="?event=public.relatedlink.list" title="Links to other procurement related websites, national and international">Related Links</a>
+				</li>
+					<li>
+					<a href="?" title="A list of frequently asked questions and a link to download the AusTender Public User Guide">Help</a>
+				</li>
+					<li>
+					<a href="?" title="Contact information for the AusTender Help Desk">Contact Us</a>
+				</li>
+			</ul>
+			<h2>Policies</h2>
+			<ul>
+					<li>
+					<a href="?event=public.policydocs.list" title="Links to Australian Government procurement policy information">Policy Documents</a>
+				</li>
+					<li>
+					<a href="?event=public.document.list" title="Australian Government standard form contracts, templates and procurement related guidance">Procurement Document Library</a>
+				</li>
+					<li>
+					<a href="?event=public.termsOfUse" title="Conditions of use for both the AusTender website and ATM document distribution">Terms of Use</a>
+				</li>
+					<li>
+					<a href="?event=public.privacyStatement" title="The terms under which we manage information relating to and provided by AusTender Registered Users">Privacy Statement</a>
+				</li>
+			</ul>
+<div id="main-content"><div class="pad">
+<ol id="bread-crumbs">
+			<li class="first-item">
+				<a href="./?event=public.home">Home</a>
+			</li>
+	<li> Contract Notice View - CN1234</li>
+	<h1> Contract Notice View - CN1234</h1>
+		<div id="container">
+<div class="content">
+	<div id="intro">
+		<span><p><span>AusTender holds Contract and Standing Offer Notices for the 07/08 financial year forward. For information related to previous years, please contact the AusTender Help Desk.</span></p><span><p><strong><span>Subcontractors:&nbsp; For Commonwealth contracts that started on or after 1 December 2008, agencies are required to provide the names of any associated subcontractors on request.&nbsp; Information&nbsp;on subcontractors&nbsp;can be&nbsp;sought&nbsp;</span><span>directly from the relevant agency through the&nbsp;Agency Contact listed in each Contract Notice.</span></strong></p></span></span> 
+	</div>
+		</div>
+	<h2 class="highlight">IMU Contract Programmer: IMU-ICT040 (GAPS ID: 1611946)</h2>
+	<tr>
+		<th>CN ID</th>
+		<td>CN1234</td>
+	</tr>
+	<tr>
+		<th>Agency</th>
+		<td>Department of Veterans Affairs</td>
+	</tr>
+	<tr>
+		<th>Publish Date</th>
+		<td> 18-Aug-2006 </td>
+	</tr>
+	<tr>
+		<th>Category</th>
+		<td>Computer services</td>
+	</tr>
+	<tr>
+		<th>Contract Period</th>
+		<td>
+			1-Aug-2006  to 31-Jul-2007 
+		</td>
+	</tr>	
+		<tr>
+			<th>Contract Value (AUD)</th>
+		<td>
+			$156,200.00 
+		</td>
+		</tr>
+	<tr>
+		<th>Description</th>
+		<td>IMU Contract Programmer: IMU-ICT040 (GAPS ID: 1611946)</td>
+	</tr>
+	<tr>
+		<th>Procurement Method</th>
+		<td>Open</td>
+	</tr>
+	<tr>
+		<th>Confidentiality - Contract</th>
+		<td>
+		</td>
+	</tr>
+	<tr>
+		<th>Confidentiality - Outputs</th>
+		<td>
+			No 
+		</td>
+	</tr>
+			<tr>
+				<th>Consultancy</th>
+				<td>No</td>
+			</tr>
+<h2>Supplier Details</h2>
+	<tr>
+		<th>Name</th>
+		<td>
+		</td>
+	</tr>
+	<tr>
+		<th>Postal Address</th>
+		<td>GPO Box 2700</td>
+	</tr>
+	<tr>
+		<th>Town/City</th>
+		<td>CANBERRA CITY</td>
+	</tr>
+	<tr>
+		<th>Postcode</th>
+		<td>2601</td>
+	</tr>
+	<tr>
+		<th>State/Territory</th>
+		<td>ACT</td>
+	</tr>
+	<tr>
+		<th>Country</th>
+		<td>Australia</td>
+	</tr>
+	<tr>
+		<th>ABN</th>		
+		<td>
+			47 008 617 561 
+		</td>
+	</tr>
+<h2>Agency Details</h2>
+<table class="medium-th">
+	<tr>
+		<th>Contact Name/th>
+		<td>Monico, Raymond J</td>
+	</tr>
+	<tr>
+		<th>Contact Phone</th>
+		<td>(02) 6289 6016</td>
+	</tr>
+	<tr>
+		<th>Branch</th>
+		<td>Information Management Unit</td>
+	</tr>
+	<tr>
+		<th>Division</th>
+		<td>National Office Compensation Division</td>
+	</tr>
+	<tr>
+		<th>Office Postcode</th>
+		<td>2606</td>
+	</tr>
+	<tr>
+		<th>Agency Reference ID</th>
+		<td>107460</td>
+	</tr>
+</table> </div></div>
+</html> ');
+foreach(pq('tr') as $tr) {
+	$tra = dom_to_array($tr);
+	$tra['th'] = trim(str_replace("/th>","",$tra['th']));
+echo $tra['th']. " = " .trim(print_r($tra['td'],true))."<br>\n";