single record scraper
[contractdashboard.git] / admin / partialdata / scrapesingle.php
blob:a/admin/partialdata/scrapesingle.php -> blob:b/admin/partialdata/scrapesingle.php
  <?php
  $cnid = 1234;
  // http://www.lastcraft.com/browser_documentation.php
  // http://code.google.com/p/phpquery/
  require('phpQuery-onefile.php');
  function dom_to_array($root)
  {
  $result = array();
   
  if ($root->hasAttributes())
  {
  $attrs = $root->attributes;
   
  foreach ($attrs as $i => $attr)
  $result[$attr->name] = $attr->value;
  }
   
  $children = $root->childNodes;
  if ($root->childNodes) {
  if ($children->length == 1)
  {
  $child = $children->item(0);
   
  if ($child->nodeType == XML_TEXT_NODE)
  {
  $result['_value'] = $child->nodeValue;
   
  if (count($result) == 1)
  return $result['_value'];
  else
  return $result;
  }
  }
   
  $group = array();
   
  for($i = 0; $i < $children->length; $i++)
  {
  $child = $children->item($i);
   
  if (!isset($result[$child->nodeName]))
  $result[$child->nodeName] = dom_to_array($child);
  else
  {
  if (!isset($group[$child->nodeName]))
  {
  $tmp = $result[$child->nodeName];
  $result[$child->nodeName] = array($tmp);
  $group[$child->nodeName] = 1;
  }
   
  $result[$child->nodeName][] = dom_to_array($child);
  }
  }
  }
   
  return $result;
  }
   
  $site = "https://www.tenders.gov.au/";
  //$cn = phpQuery::newDocument(file_get_contents("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN".$cnid));
  $searchResult = phpQuery::newDocument('<table class="four-col">
  <tbody><tr>
  <th>CN ID</th>
   
  <td><a href="/?event=public.cn.view&amp;CNUUID=BB3B13EA-A700-39A5-F003088ACF798AF9">CN1234</a></td>
   
  <th>
  </th><td>
   
  </td></tr>
  <tr>
  <th>Agency</th>
  <td colspan="3">Department of Veterans Affairs</td>
  </tr>
  <tr>
  <th>Publish Date</th>
  <td colspan="3"> 18-Aug-2006 </td>
  </tr>
   
  <tr>
  <th>Category</th>
  <td colspan="3">Computer services</td>
  </tr>
   
  <tr>
  <th>Contract Period</th>
  <td colspan="3"> 1-Aug-2006 to 31-Jul-2007 </td>
  </tr>
  <tr>
  <th>Contract Value (AUD)</th>
  <td colspan="3"> $156,200.00 </td>
  </tr>
   
  <tr>
  <th>Supplier Name</th>
  <td colspan="3">WIZARD INFORMATION SERVICES PTY LTD</td>
  </tr>
  <tr>
  <th></th>
  <td colspan="3">
  <p>
  <span class="last-updated"><strong>Last Updated:</strong> 18-Aug-2006 4:51 pm (ACT Local time)</span>
   
  <strong><a href="/?event=public.cn.view&amp;CNUUID=BB3B13EA-A700-39A5-F003088ACF798AF9">Full Details</a></strong>
   
  </p>
  </td>
  </tr>
  </tbody></table>');
  foreach(pq('a') as $a) {
  if (strpos($a->getAttribute("href"),"public.cn.view") >0 ) echo $a->getAttribute("href");
  break;
  }
  $cn = phpQuery::newDocument('
   
   
  <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
   
  <html lang="en-AU">
   
  <head>
   
  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
   
  <title>AusTender: Contract Notice View - CN1234</title>
   
   
   
  <meta name="language" content="en-AU">
   
  <meta name="description" content="AusTender provides centralised publication of Australian Government business opportunities, annual procurement plans, multi-use lists and contracts awarded.">
   
  <meta name="keywords" content="tenders, australian government tenders, austender, austenders, australian tenders, federal government tenders">
   
   
   
  <link rel="stylesheet" type="text/css" href="/styles/styles_frontend_main.css" media="all">
   
  <link rel="stylesheet" type="text/css" href="/styles/styles_frontend_print.css" media="print">
   
  <link rel="stylesheet" type="text/css" href="/styles/styles_reports.css" media="all">
   
  <link rel="stylesheet" href="/styles/calendar-atii.css" type="text/css" media="all">
   
  <link rel="shortcut icon" href="/favicon.ico">
   
   
   
   
   
  </head>
   
   
   
  <body>
   
   
   
  <div id="header">
   
  <a href="/"><img src="/images/header_logo.gif" alt="AusTender - The Australian Government Tender System"></a>
   
  </div>
   
   
   
  <form action="./" method="get" id="header-tools" onSubmit="return (this.keyword.value.length != 0)">
   
   
   
  <input
   
  name="event"
   
  id="form-event"
   
  type="hidden"
   
  value="public.advancedsearch.keyword"
   
  >
   
   
   
  <a href="/?event=public.home" id="home-link">Home</a>
   
  <label class="hidden" for="search-text">Search</label>
   
  <input type="text" value="" name="keyword" id="search-text">
   
  <input type="submit" value="Search" id="search-submit">
   
  <a href="?event=public.advancedsearch.home">Advanced Search</a>
   
  </form>
   
   
   
  <div id="left-col-wrapper">
   
  <div id="left-col"><div class="pad">
   
   
   
  <form action="?event=public.login" method="post" enctype="multipart/form-data" id="login-form">
   
  <label for="login-username">Username</label> (email)
   
  <br>
   
  <input type="text" name="pub-auth-username" id="login-username" value="">
   
   
   
  <label for="login-password">Password</label>
   
  <br>
   
  <input type="password" name="pub-auth-password" id="login-password" value="">
   
  <input type="Submit" value="Login" id="login-submit">
   
   
   
  <p>
   
  &raquo; <a href="?event=public.registereduser.forgotPassFormStep1">Forgotten password?</a>
   
  <br>
   
  &raquo; <a href="?event=public.registereduser.new">New user registration</a>
   
  </p>
   
   
   
  </form>
   
  <h2>View</h2>
   
   
   
  <ul>
   
   
   
  <li>
   
   
   
  <a href="?event=public.atmproposed.list" title="Information about potential procurements prior to their release to the market">Pre-Release Notices</a>
   
  </li>
   
   
   
  <li>
   
   
   
  <a href="?event=public.ATM.list" title="Business opportunities that are currently out to the market">Current ATMs</a>
   
  </li>
   
   
   
  <li>
   
   
   
  <a href="?event=public.ATM.closed" title="Business opportunities that have closed in the last 30 days">Closed ATMs</a>
   
  </li>
   
   
   
  <li>
   
   
   
  <a href="?event=public.CN.search" title="Notices of contracts awarded to suppliers">Contract Notices</a>
   
  </li>
   
   
   
  <li>
   
   
   
  <a href="?event=public.SON.search" title="Notices of standing arrangements with suppliers, including panels and period contracts">Standing Offer Notices</a>
   
  </li>
   
   
   
  <li>
   
   
   
  <a href="?event=public.MUL.list" title="Multi-Use Lists (MULs) currently open for applications for inclusion">Current Multi-Use Lists</a>
   
  </li>
   
   
   
  <li>
   
   
   
  <a href="?event=public.MUL.closed" title="Multi-Use Lists (MULs) that have closed in the last 30 days">Closed Multi-Use Lists</a>
   
  </li>
   
   
   
  <li>
   
   
   
  <a href="?event=public.APP.list" title="Agencies Planned Procurements for the latest financial year">Procurement Plans</a>
   
  </li>
   
   
   
  <li>
   
   
   
  <a href="?event=public.reports.list" title="A list of standard reports on various types of procurement information">Reports</a>
   
  </li>
   
   
   
  </ul>
   
   
   
  <h2>Info &amp; Links</h2>
   
   
   
  <ul>
   
   
   
  <li>
   
   
   
  <a href="?event=public.agency.list" title="A Link to Australian Government agency and department corporate addresses">Agency Addresses</a>
   
  </li>
   
   
   
  <li>
   
   
   
  <a href="?event=public.relatedlink.list" title="Links to other procurement related websites, national and international">Related Links</a>
   
  </li>
   
   
   
  <li>
   
   
   
  <a href="?event=public.help.list" title="A list of frequently asked questions and a link to download the AusTender Public User Guide">Help</a>
   
  </li>
   
   
   
  <li>
   
   
   
  <a href="?event=public.contactus.show" title="Contact information for the AusTender Help Desk">Contact Us</a>
   
  </li>
   
   
   
  </ul>
   
   
   
  <h2>Policies</h2>