--- a/admin/partialdata/scraper.php
+++ b/admin/partialdata/scraper.php
@@ -1,6 +1,6 @@
= 1024 && $i < 4; $i++) $size /= 1024;
@@ -9,15 +9,20 @@
$days = 4;
if (isset($_REQUEST['days'])) $days = $_REQUEST['days'];
-$startDate = strtotime("18-Nov-2007");
+$startDate = strtotime("05-Jun-2008");
if (isset($_REQUEST['startDate'])) $startDate = $_REQUEST['startDate'];
+
function getFile($startDate, $days, $minVal, $maxVal) {
+global $split;
$endDate = strtotime(date("Y-m-d", $startDate)." +".$days." days");
$file = date("dMY",$startDate).'to'.date("dMY",$endDate).'val'.$minVal.'to'.$maxVal.'.xls';
echo "Fetching $file ($days days) ($minVal < value < $maxVal )... ";
$url = "https://www.tenders.gov.au/?event=public.advancedsearch.CNSONRedirect&type=cnEvent&atmType=archived%2Cclosed%2Cpublished%2Cproposed&agencyUUID=&agencyStatus=-1&portfolioUUID=&keyword=&KeywordTypeSearch=AllWord&CNID=&dateType=Publish+Date&dateStart=".date("d-M-Y",$startDate)."&dateEnd=".date("d-M-Y",$endDate)."&supplierName=&supplierABN=&valueFrom=".$minVal."&valueTo=".$maxVal."&ATMID=&AgencyRefId=&consultancy=&download=Download+results";
echo "";
$current = file_get_contents($url);
+if (strpos($current,"There are no results that match your selection.")> 0 ) {
+ echo "Empty file!
";
+}
if (strpos($current,"Your search returned more than 1000 results.") === false) {
file_put_contents($file, $current);
echo "$file saved
";
@@ -26,24 +31,44 @@
echo 'Load next '.($days*2).' days
';
echo 'Load next '.($days).' days with split
';
flush();
+if (!isset($_REQUEST['split']) && !$split) {
+echo "Success so fetching next $days...
";
+getFile($endDate, $days, "" , "");
+}
return true;
} else {
echo "Too many records!
";
echo 'Load '.($days/2).' days instead?
';
echo 'Split instead?
';
flush();
+if (!isset($_REQUEST['split']) && !$split) {
+echo "Failure so splitting ...
";
+ doSplit($startDate, $days);
+}
return false;
}
}
+function doSplit($startDate, $days) {
+global $split;
+$split = true;
+set_time_limit(20);
+getFile($startDate, $days, 0, 12000);
+getFile($startDate, $days, 12000, 16000);
+ getFile($startDate, $days, 16000, 20000);
+ getFile($startDate, $days, 20000, 30000);
+ getFile($startDate, $days, 30000, 40000);
+// getFile($startDate, $days, 40000, 80000);
+ getFile($startDate, $days, 40000, 60000);
+ getFile($startDate, $days, 60000, 80000);
+// getFile($startDate, $days, 80000, 300000);
+ getFile($startDate, $days, 80000, 150000);
+ getFile($startDate, $days, 150000, 300000);
+ getFile($startDate, $days, 300000, 999999999);
+}
if (isset($_REQUEST['split'])) {
- getFile($startDate, $days, 0, 16000);
- getFile($startDate, $days, 16000, 20000);
-getFile($startDate, $days, 20000, 30000);
-getFile($startDate, $days, 30000, 40000);
- getFile($startDate, $days, 40000, 80000);
- getFile($startDate, $days, 80000, 300000);
- getFile($startDate, $days, 300000, 999999999);
+ doSplit($startDate, $days);
} else {
getFile($startDate, $days, "" , "");
}
?>
+