--- a/admin/partialdata/scraper.php +++ b/admin/partialdata/scraper.php @@ -1,6 +1,6 @@ <?php date_default_timezone_set('Australia/Melbourne'); - +$split = false; function format_bytes($size) { $units = array(' B', ' KB', ' MB', ' GB', ' TB'); for ($i = 0; $size >= 1024 && $i < 4; $i++) $size /= 1024; @@ -9,15 +9,20 @@ $days = 4; if (isset($_REQUEST['days'])) $days = $_REQUEST['days']; -$startDate = strtotime("18-Nov-2007"); +$startDate = strtotime("05-Jun-2008"); if (isset($_REQUEST['startDate'])) $startDate = $_REQUEST['startDate']; + function getFile($startDate, $days, $minVal, $maxVal) { +global $split; $endDate = strtotime(date("Y-m-d", $startDate)." +".$days." days"); $file = date("dMY",$startDate).'to'.date("dMY",$endDate).'val'.$minVal.'to'.$maxVal.'.xls'; echo "Fetching $file ($days days) ($minVal < value < $maxVal )... "; $url = "https://www.tenders.gov.au/?event=public.advancedsearch.CNSONRedirect&type=cnEvent&atmType=archived%2Cclosed%2Cpublished%2Cproposed&agencyUUID=&agencyStatus=-1&portfolioUUID=&keyword=&KeywordTypeSearch=AllWord&CNID=&dateType=Publish+Date&dateStart=".date("d-M-Y",$startDate)."&dateEnd=".date("d-M-Y",$endDate)."&supplierName=&supplierABN=&valueFrom=".$minVal."&valueTo=".$maxVal."&ATMID=&AgencyRefId=&consultancy=&download=Download+results"; echo "<!-- $url -->"; $current = file_get_contents($url); +if (strpos($current,"There are no results that match your selection.")> 0 ) { + echo "<font color=red>Empty file!</font><br>"; +} if (strpos($current,"Your search returned more than 1000 results.") === false) { file_put_contents($file, $current); echo "$file saved<br>"; @@ -26,24 +31,44 @@ echo '<a href="?startDate='.$endDate.'&days='.($days*2).'">Load next '.($days*2).' days </a><br>'; echo '<a href="?startDate='.$endDate.'&days='.$days.'&split=yes">Load next '.($days).' days with split</a><br>'; flush(); +if (!isset($_REQUEST['split']) && !$split) { +echo "Success so fetching next $days... <br>"; +getFile($endDate, $days, "" , ""); +} return true; } else { echo "<font color=red>Too many records!</font><br>"; echo '<a href="?startDate='.$startDate.'&days='.floor($days/2).'">Load '.($days/2).' days instead?</a><br>'; echo '<a href="?startDate='.$startDate.'&days='.$days.'&split=yes">Split instead?</a><br>'; flush(); +if (!isset($_REQUEST['split']) && !$split) { +echo "Failure so splitting ... <br>"; + doSplit($startDate, $days); +} return false; } } +function doSplit($startDate, $days) { +global $split; +$split = true; +set_time_limit(20); +getFile($startDate, $days, 0, 12000); +getFile($startDate, $days, 12000, 16000); + getFile($startDate, $days, 16000, 20000); + getFile($startDate, $days, 20000, 30000); + getFile($startDate, $days, 30000, 40000); +// getFile($startDate, $days, 40000, 80000); + getFile($startDate, $days, 40000, 60000); + getFile($startDate, $days, 60000, 80000); +// getFile($startDate, $days, 80000, 300000); + getFile($startDate, $days, 80000, 150000); + getFile($startDate, $days, 150000, 300000); + getFile($startDate, $days, 300000, 999999999); +} if (isset($_REQUEST['split'])) { - getFile($startDate, $days, 0, 16000); - getFile($startDate, $days, 16000, 20000); -getFile($startDate, $days, 20000, 30000); -getFile($startDate, $days, 30000, 40000); - getFile($startDate, $days, 40000, 80000); - getFile($startDate, $days, 80000, 300000); - getFile($startDate, $days, 300000, 999999999); + doSplit($startDate, $days); } else { getFile($startDate, $days, "" , ""); } ?> +