From: Alexander Sadleir Date: Sat, 03 Dec 2011 04:53:24 +0000 Subject: Remove troublesome scraper script X-Git-Url: http://maxious.lambdacomplex.org/git/?p=contractdashboard.git&a=commitdiff&h=dcbd87085b9d28dd871e847df93b1a3e5c7f4802 --- Remove troublesome scraper script --- --- a/admin/fixoldamend.php +++ b/admin/fixoldamend.php @@ -20,7 +20,7 @@ echo "$oldCN => $newCN (from parent CN $parentCN)
\n"; } else { - echo "parent CN unexpected - $oldCN doesn't look like child of {$record['parentCN']}
\n"; +// echo "parent CN unexpected - $oldCN doesn't look like child of {$record['parentCN']}
\n"; } } --- a/admin/partialdata/scraper.php +++ /dev/null @@ -1,74 +1,1 @@ -= 1024 && $i < 4; $i++) $size /= 1024; - return round($size, 2).$units[$i]; -} -$days = 4; -if (isset($_REQUEST['days'])) $days = $_REQUEST['days']; -$startDate = strtotime("05-Jun-2008"); -if (isset($_REQUEST['startDate'])) $startDate = $_REQUEST['startDate']; - -function getFile($startDate, $days, $minVal, $maxVal) { -global $split; - $endDate = strtotime(date("Y-m-d", $startDate)." +".$days." days"); -$file = date("dMY",$startDate).'to'.date("dMY",$endDate).'val'.$minVal.'to'.$maxVal.'.xls'; -echo "Fetching $file ($days days) ($minVal < value < $maxVal )... "; -$url = "https://www.tenders.gov.au/?event=public.advancedsearch.CNSONRedirect&type=cnEvent&atmType=archived%2Cclosed%2Cpublished%2Cproposed&agencyUUID=&agencyStatus=-1&portfolioUUID=&keyword=&KeywordTypeSearch=AllWord&CNID=&dateType=Publish+Date&dateStart=".date("d-M-Y",$startDate)."&dateEnd=".date("d-M-Y",$endDate)."&supplierName=&supplierABN=&valueFrom=".$minVal."&valueTo=".$maxVal."&ATMID=&AgencyRefId=&consultancy=&download=Download+results"; -echo ""; -$current = file_get_contents($url); -if (strpos($current,"There are no results that match your selection.")> 0 ) { - echo "Empty file!
"; -} -if (strpos($current,"Your search returned more than 1000 results.") === false) { - file_put_contents($file, $current); - echo "$file saved
"; - echo format_bytes(filesize($file))."
"; - echo 'Load next '.($days).' days
'; - echo 'Load next '.($days*2).' days
'; - echo 'Load next '.($days).' days with split
'; - flush(); -if (!isset($_REQUEST['split']) && !$split) { -echo "Success so fetching next $days...
"; -getFile($endDate, $days, "" , ""); -} - return true; -} else { - echo "Too many records!
"; - echo 'Load '.($days/2).' days instead?
'; - echo 'Split instead?
'; - flush(); -if (!isset($_REQUEST['split']) && !$split) { -echo "Failure so splitting ...
"; - doSplit($startDate, $days); -} - return false; -} -} -function doSplit($startDate, $days) { -global $split; -$split = true; -set_time_limit(20); -getFile($startDate, $days, 0, 12000); -getFile($startDate, $days, 12000, 16000); - getFile($startDate, $days, 16000, 20000); - getFile($startDate, $days, 20000, 30000); - getFile($startDate, $days, 30000, 40000); -// getFile($startDate, $days, 40000, 80000); - getFile($startDate, $days, 40000, 60000); - getFile($startDate, $days, 60000, 80000); -// getFile($startDate, $days, 80000, 300000); - getFile($startDate, $days, 80000, 150000); - getFile($startDate, $days, 150000, 300000); - getFile($startDate, $days, 300000, 999999999); -} -if (isset($_REQUEST['split'])) { - doSplit($startDate, $days); -} else { - getFile($startDate, $days, "" , ""); -} -?> - --- a/admin/updateUNSPSC.php +++ b/admin/updateUNSPSC.php @@ -19,20 +19,29 @@ $unspsc[$armor] = $row['UNSPSC']; $erre = str_replace("er", "re", $row['Title']); $unspsc[$erre] = $row['UNSPSC']; + $center = str_replace("center", "centre", $row['Title']); + $unspsc[$center] = $row['UNSPSC']; + $accessory = str_replace("accesor", "accessor", $row['Title']); + $unspsc[$accessory] = $row['UNSPSC']; + $lyslyz = str_replace("lyz", "lys", $row['Title']); + $unspsc[$lyslyz] = $row['UNSPSC']; $tire = str_replace("ire", "yre", $row['Title']); $unspsc[$tire] = $row['UNSPSC']; - $aeroplane = str_replace("airplane","aeroplane", $row['Title']); - $unspsc[$aeroplane] = $row['UNSPSC']; - $lyslyz = str_replace("lyz", "lys", $row['Title']); - $unspsc[$lyslyz] = $row['UNSPSC']; + + $pe = str_replace("pe", "pae", $row['Title']); + $unspsc[$pe] = $row['UNSPSC']; + $ane = str_replace("ane", "anae", $row['Title']); + $unspsc[$ane] = $row['UNSPSC']; + $airo = str_replace("airplane", "aeroplane", $row['Title']); + $unspsc[$airo] = $row['UNSPSC']; // some divergence from standard + $forensicit = str_replace("Information technology consultation services", "Forensic IT Services", $row['Title']); + $unspsc[$forensicit] = $row['UNSPSC']; + $powercable = str_replace( "Power cable", "Power cable installation and supply", $row['Title']); + $unspsc[$powercable] = $row['UNSPSC']; $tobacco = str_replace("Food Beverage and Tobacco Products", "Food and Beverage Products", $row['Title']); $unspsc[$tobacco] = $row['UNSPSC']; $architect = str_replace("Building and Construction and Maintenance Services", "Architectural services", $row['Title']); - $unspsc[$architect] = $row['UNSPSC']; - $powercable = str_replace("Power cable", "Power cable installation and supply", $row['Title']); - $unspsc[$powercable] = $row['UNSPSC']; - $forensicIT = str_replace("Building and Construction and Maintenance Services", "Architectural services", $row['Title']); $unspsc[$architect] = $row['UNSPSC']; // some just plain wrong $noOilRigs = str_replace("Building and Construction and Maintenance Services", "Management and provision of all facilities engineering modification and maintenance services for a site or platform", $row['Title']); --- a/exportData.csv.php +++ b/exportData.csv.php @@ -3,33 +3,17 @@ include_once("./lib/common.inc.php"); setlocale(LC_CTYPE, 'C'); // source: http://stackoverflow.com/questions/81934/easy-way-to-export-a-sql-table-without-access-to-the-server-or-phpmyadmin#81951 - -$unspsc = Array(); -$unspscresult = $conn->prepare('select * from "UNSPSCcategories" where "UNSPSC"::text like \'%00000\';'); -$unspscresult->execute(); -foreach ($unspscresult->fetchAll() as $row) { - $unspsc[$row['UNSPSC']] = $row['Title']; -} - $query = $conn->prepare(' -SELECT "CNID",contractnotice."agencyName",agency.abn as "agencyABN", -EXTRACT(EPOCH FROM "publishDate") as "publishDate", -EXTRACT(EPOCH FROM "contractStart") as "contractStart", -EXTRACT(EPOCH FROM "contractEnd") as "contractEnd", -value,description,"procurementMethod",category,"categoryUNSPSC", -(substr( "categoryUNSPSC"::text, 0, 2 ) || \'0000000\'::text) as "cat1", - (substr( "categoryUNSPSC"::text, 0, 3 ) || \'000000\'::text) as "cat2", - (substr( "categoryUNSPSC"::text, 0, 4 ) || \'00000\'::text) as "cat3", +SELECT "CNID",contractnotice."agencyName",agency.abn as "agencyABN",EXTRACT(EPOCH FROM "publishDate") as "publishDate",EXTRACT(EPOCH FROM "contractStart") as "contractStart",EXTRACT(EPOCH FROM "contractEnd") as "contractEnd",value,description,"procurementMethod",category,"categoryUNSPSC", "supplierABN","supplierName", ( case when "supplierABN" != 0 THEN "supplierABN"::text ELSE "supplierName" END) as supplierID, (\'https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN\'::text || "CNID"::text) as sourceURL -FROM contractnotice join agency on contractnotice."agencyName"=agency."agencyName" -where "childCN" is null' +FROM contractnotice join agency on contractnotice."agencyName"=agency."agencyName" where "childCN" is null' , array(PDO::ATTR_CURSOR => PDO::FETCH_ORI_NEXT)); // "supplierCity","supplierPostcode","supplierCountry","contactPostcode", -// +// (substr( "categoryUNSPSC"::text, 0, 2 ) || \'0000000\'::text) as "categoryUNSPSClv1", "categoryUNSPSC", (substr( "categoryUNSPSC"::text, 0, 3 ) || \'000000\'::text) as "categoryUNSPSClv2" "categoryUNSPSC", (substr( "categoryUNSPSC"::text, 0, 4 ) || \'00000\'::text as "categoryUNSPSClv3") $query->execute(); $errors = $conn->errorInfo(); if ($errors[2] != "") { @@ -37,8 +21,8 @@ } $num_fields = $query->columnCount(); -$headers = Array(); -for ($i = 0; $i < $num_fields; $i++) { // for each column in query, make a CSV header +$headers = array(); +for ($i = 0; $i < $num_fields; $i++) { $meta = $query->getColumnMeta($i); $headers[] = $meta['name']; } @@ -57,10 +41,6 @@ || $headers[$key] == "contractEnd") { $colvalue = date("Y-m-d", $colvalue); } - if ($headers[$key] == "cat1" || $headers[$key] == "cat2" - || $headers[$key] == "cat3") { - $colvalue = $unspsc[$colvalue]; - } } fputcsv($fp, array_values($row)); }