Link amendments made in new numbering scheme
[contractdashboard.git] / admin / partialdata / scraper.php
blob:a/admin/partialdata/scraper.php -> blob:b/admin/partialdata/scraper.php
  <?php
  date_default_timezone_set('Australia/Melbourne');
   
  function format_bytes($size) {
  $units = array(' B', ' KB', ' MB', ' GB', ' TB');
  for ($i = 0; $size >= 1024 && $i < 4; $i++) $size /= 1024;
  return round($size, 2).$units[$i];
  }
   
  $days = 4;
  if (isset($_REQUEST['days'])) $days = $_REQUEST['days'];
  $startDate = strtotime("18-Nov-2007");
  if (isset($_REQUEST['startDate'])) $startDate = $_REQUEST['startDate'];
  function getFile($startDate, $days, $minVal, $maxVal) {
  $endDate = strtotime(date("Y-m-d", $startDate)." +".$days." days");
  $file = date("dMY",$startDate).'to'.date("dMY",$endDate).'val'.$minVal.'to'.$maxVal.'.xls';
  echo "Fetching $file ($days days) ($minVal < value < $maxVal )... ";
  $url = "https://www.tenders.gov.au/?event=public.advancedsearch.CNSONRedirect&type=cnEvent&atmType=archived%2Cclosed%2Cpublished%2Cproposed&agencyUUID=&agencyStatus=-1&portfolioUUID=&keyword=&KeywordTypeSearch=AllWord&CNID=&dateType=Publish+Date&dateStart=".date("d-M-Y",$startDate)."&dateEnd=".date("d-M-Y",$endDate)."&supplierName=&supplierABN=&valueFrom=".$minVal."&valueTo=".$maxVal."&ATMID=&AgencyRefId=&consultancy=&download=Download+results";
  echo "<!-- $url -->";
  $current = file_get_contents($url);
  if (strpos($current,"Your search returned more than 1000 results.") === false) {
  file_put_contents($file, $current);
  echo "$file saved<br>";
  echo format_bytes(filesize($file))."<br>";
  echo '<a href="?startDate='.$endDate.'&days='.$days.'">Load next '.($days).' days </a><br>';
  echo '<a href="?startDate='.$endDate.'&days='.($days*2).'">Load next '.($days*2).' days </a><br>';
  echo '<a href="?startDate='.$endDate.'&days='.$days.'&split=yes">Load next '.($days).' days with split</a><br>';
  flush();
  return true;
  } else {
  echo "<font color=red>Too many records!</font><br>";
  echo '<a href="?startDate='.$startDate.'&days='.floor($days/2).'">Load '.($days/2).' days instead?</a><br>';
  echo '<a href="?startDate='.$startDate.'&days='.$days.'&split=yes">Split instead?</a><br>';
  flush();
  return false;
  }
  }
  if (isset($_REQUEST['split'])) {
  getFile($startDate, $days, 0, 16000);
  getFile($startDate, $days, 16000, 20000);
  getFile($startDate, $days, 20000, 30000);
  getFile($startDate, $days, 30000, 40000);
  getFile($startDate, $days, 40000, 80000);
  getFile($startDate, $days, 80000, 300000);
  getFile($startDate, $days, 300000, 999999999);
  } else {
  getFile($startDate, $days, "" , "");
  }
  ?>