single record scraper
[contractdashboard.git] / admin / updateUNSPSC.php
blob:a/admin/updateUNSPSC.php -> blob:b/admin/updateUNSPSC.php
--- a/admin/updateUNSPSC.php
+++ b/admin/updateUNSPSC.php
@@ -1,30 +1,56 @@
 <?php

+

 error_reporting(E_ALL);

 

 include_once("../lib/common.inc.php");

 

-$unspscresult= $conn->prepare('select * from "UNSPSCcategories";');

+$unspscresult = $conn->prepare('select * from "UNSPSCcategories";');

 $unspscresult->execute();

 foreach ($unspscresult->fetchAll() as $row) {

-	$unspsc[$row['Title']] = $row['UNSPSC'];

-	// some Australian spellings

-	$isiz = str_replace("iz","is",$row['Title']);

-	$unspsc[$isiz] = $row['UNSPSC'];

-	$defence = str_replace("efense","efence",$row['Title']);

-	$unspsc[$defence] = $row['UNSPSC'];

-		$armor = str_replace("rmored","rmoured",$row['Title']);

-	$unspsc[$armor] = $row['UNSPSC'];

-	$center = str_replace("enter","entre",$row['Title']);

-	$unspsc[$center] = $row['UNSPSC'];

-	// some divergence from standard

-	$tobacco = str_replace("Food Beverage and Tobacco Products","Food and Beverage Products",$row['Title']);

-	$unspsc[$tobacco] = $row['UNSPSC'];

-	$architect = str_replace("Building and Construction and Maintenance Services","Architectural services",$row['Title']);

-	$unspsc[$architect] = $row['UNSPSC'];

-	// some just plain wrong

-	$noOilRigs = str_replace("Building and Construction and Maintenance Services","Management and provision of all facilities engineering modification and maintenance services for a site or platform",$row['Title']);

-	$unspsc[$noOilRigs] = $row['UNSPSC'];

-	

+    $unspsc[$row['Title']] = $row['UNSPSC'];

+    // some Australian spellings

+    $isiz = str_replace("iz", "is", $row['Title']);

+    $unspsc[$isiz] = $row['UNSPSC'];

+    $filfill = str_replace("fill", "fil", $row['Title']);

+    $unspsc[$filfill] = $row['UNSPSC'];

+    $defence = str_replace("efense", "efence", $row['Title']);

+    $unspsc[$defence] = $row['UNSPSC'];

+    $armor = str_replace("rmored", "rmoured", $row['Title']);

+    $unspsc[$armor] = $row['UNSPSC'];

+    $erre = str_replace("er", "re", $row['Title']);

+    $unspsc[$erre] = $row['UNSPSC'];

+    $center = str_replace("center", "centre", $row['Title']);

+    $unspsc[$center] = $row['UNSPSC'];

+      $accessory = str_replace("accesor", "accessor", $row['Title']);

+    $unspsc[$accessory] = $row['UNSPSC'];

+    $lyslyz = str_replace("lyz", "lys", $row['Title']);

+    $unspsc[$lyslyz] = $row['UNSPSC'];

+        $tire = str_replace("ire", "yre", $row['Title']);

+    $unspsc[$tire] = $row['UNSPSC'];

+    

+        $pe = str_replace("pe", "pae", $row['Title']);

+    $unspsc[$pe] = $row['UNSPSC'];

+            $ane = str_replace("ane", "anae", $row['Title']);

+    $unspsc[$ane] = $row['UNSPSC'];

+        $airo = str_replace("airplane", "aeroplane", $row['Title']);

+    $unspsc[$airo] = $row['UNSPSC'];

+    // some divergence from standard

+      $forensicit = str_replace("Information technology consultation services", "Forensic IT Services", $row['Title']);

+        $unspsc[$forensicit] = $row['UNSPSC'];

+        $powercable = str_replace( "Power cable", "Power cable installation and supply", $row['Title']);

+    $unspsc[$powercable] = $row['UNSPSC'];

+    $tobacco = str_replace("Food Beverage and Tobacco Products", "Food and Beverage Products", $row['Title']);

+    $unspsc[$tobacco] = $row['UNSPSC'];

+    $architect = str_replace("Building and Construction and Maintenance Services", "Architectural services", $row['Title']);

+    $unspsc[$architect] = $row['UNSPSC'];

+        $powercable = str_replace("Power cable", "Power cable installation and supply", $row['Title']);

+    $unspsc[$powercable] = $row['UNSPSC'];

+        $unemployment = str_replace("Unemployment services", "Employment services", $row['Title']);

+    $unspsc[$unemployment] = $row['UNSPSC'];

+    

+    // some just plain wrong

+    $noOilRigs = str_replace("Building and Construction and Maintenance Services", "Management and provision of all facilities engineering modification and maintenance services for a site or platform", $row['Title']);

+    $unspsc[$noOilRigs] = $row['UNSPSC'];

 }

 $query = 'SELECT "CNID","category","value"

 FROM contractnotice

@@ -32,20 +58,24 @@
 $emptycatresult = $conn->prepare($query);

 $missing = Array();

 $emptycatresult->execute();

-	foreach ($emptycatresult->fetchAll() as $record) {

-		

-	if ($unspsc[$record['category']] == "") {

-		$missing[$record['category']]= $missing[$record['category']]+ $record['value'];

+foreach ($emptycatresult->fetchAll() as $record) {

+

+    if (!isset($unspsc[$record['category']]) || $unspsc[$record['category']] == "") {

+        $missing[$record['category']] = (isset($missing[$record['category']]) ? $missing[$record['category']] : 0) + $record['value'];

 //		echo "<br>\n Category not found for: \n";

 //		print_r($record);

-	} else {

-	$result = $conn->exec('UPDATE contractnotice SET "categoryUNSPSC" = 

-\''.$unspsc[$record['category']].'\' where "CNID" = '.$record['CNID'].';');

-	if ($result) echo $record['CNID']. " set to ". ($unspsc[$record['category']]) . " <br>\n";

-	else echo "error".$conn->errorInfo();

-	}

-	} 

-asort($missing,SORT_NUMERIC);

+    } else {

+        $result = $conn->exec('UPDATE contractnotice SET "categoryUNSPSC" = 

+\'' . $unspsc[$record['category']] . '\' where "CNID" = \'' . $record['CNID'] . '\';');

+        if ($result) {

+            echo $record['CNID'] . " set to " . ($unspsc[$record['category']]) . " <br>\n";

+        } else {

+            echo "error<br>";

+            print_r($conn->errorInfo());

+        }

+    }

+}

+asort($missing, SORT_NUMERIC);

 print_r($missing);

 ?>