Add possible export for ap.org Overview document categoriser
Add possible export for ap.org Overview document categoriser

--- a/admin/import.php
+++ b/admin/import.php
@@ -1,271 +1,258 @@
 <?php
+
 include_once ("../lib/common.inc.php");
-function processFile($fpath, $tablename)
-{
-	global $conn;
-	$row = 1;
-	$handle = fopen($fpath, "r");
-	//"t" mode string translates windows line breaks to unix
-	$datamapping0711 = array(
-		"Agency" => "agencyName",
-		"Parent CN ID" => "parentCN",
-		"CN ID" => "CNID",
-		"Publish Date" => "publishDate",
-		"Amendment Date" => "amendDate",
-		"Status" => "",
-		"StartDate" => "contractStart",
-		"EndDate" => "contractEnd",
-		"Value" => "value",
-		"Description" => "description",
-		"Agency Ref Id" => "agencyID",
-		"Category" => "category",
-		"Procurement Method" => "procurementMethod",
-		"ATM ID" => "atmID",
-		"SON ID" => "SONID",
-		"Confidentiality - Contract" => "confidentialityContract",
-		"Confidentiality - Contract Reason(s)" => "confidentialityContractReason",
-		"Confidentiality - Outputs" => "confidentialityOutputs",
-		"Confidentiality - Outputs Reason(s)" => "confidentialityOutputsReason",
-		"Consultancy" => "consultancy",
-		"Consultancy Reason(s)" => "consultancyReason",
-		"Amendment Reason" => "amendmentReason",
-		"Supplier Name" => "supplierName",
-		"Supplier Address" => "supplierAddress",
-		"Supplier City" => "supplierCity",
-		"Supplier Postcode" => "supplierPostcode",
-		"Supplier Country" => "supplierCountry",
-		"Supplier ABNExempt" => "supplierABNExempt",
-		"Supplier ABN" => "supplierABN",
-		"Agency Branch" => "contactBranch",
-		"Agency Divison" => "contactDivision",
-		"Agency Postcode" => "contactPostcode",
-		"" => ""
-	);
-	$headers;
-	$contractNoticeFields = array(
-		"importFile",
-		"agencyName",
-		"parentCN",
-		"CNID",
-		"publishDate",
-		"amendDate",
-		"contractStart",
-		"contractEnd",
-		"value",
-		"description",
-		"agencyID",
-		"category",
-		"procurementMethod",
-		"atmID",
-		"SONID",
-		"confidentialityContract",
-		"confidentialityContractReason",
-		"confidentialityOutputs",
-		"confidentialityOutputsReason",
-		"consultancy",
-		"consultancyReason",
-		"amendmentReason",
-		"supplierName",
-		"supplierAddress",
-		"supplierCity",
-		"supplierPostcode",
-		"supplierCountry",
-		"supplierABNExempt",
-		"supplierABN",
-		"contactBranch",
-		"contactDivision",
-		"contactPostcode"
-	);
-	$agencyFields = array(
-		"agencyName"
-	);
-	$supplierFields = array(
-		"supplierName",
-		"supplierAddress",
-		"supplierCity",
-		"supplierPostcode",
-		"supplierCountry",
-		"supplierABNExempt",
-		"supplierABN"
-	);
-	if ($tablename == "contractnotice") {
-		$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', $contractNoticeFields) . '") VALUES ( ';
-		foreach ($contractNoticeFields as $key => $f) {
-			$contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?";
-		}
-		$contractNoticeInsertQ.= ");";
-		$contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ);
-	}
-	else if ($tablename == "supplierdetails") {
-		$supplierInsertQ = 'INSERT INTO supplierdetails ("' . implode('" , "', $supplierFields) . '") VALUES ( ';
-		foreach ($supplierFields as $key => $f) {
-			$supplierInsertQ.= ($key == 0 ? "" : ", ") . "?";
-		}
-		$supplierInsertQ.= ");";
-		$supplierInsertQ = $conn->prepare($supplierInsertQ);
-	}
-	else if ($tablename == "agency") {
-		$agencyInsertQ = 'INSERT INTO agency ("' . implode('" , "', $agencyFields) . '") VALUES ( ';
-		foreach ($agencyFields as $key => $f) {
-			$agencyInsertQ.= ($key == 0 ? "" : ", ") . "?";
-		}
-		$agencyInsertQ.= ");";
-		$agencyInsertQ = $conn->prepare($agencyInsertQ);
-	}
-	while (($data = fgetcsv($handle, 1000, "\t")) !== false) {
-		$num = count($data);
-		if ($row == 3) {
-			$headers = $data;
-		}
-		elseif ($row > 3) {
-			if ($num > count($datamapping0711)) {
-				die("<font color=red>Error in data import; data mapping fields out of bounds or changed</font><br>" . $fname . print_r($data));
-			}
-			$contractNoticeInsert = Array();
-			$supplierInsert = Array();
-			$agencyInsert = Array();
-			$contractNoticeInsert[] = $fpath;
-			$keys = array_keys($datamapping0711);
-			for ($c = 0; $c < $num; $c++) {
-				$data[$c] = trim($data[$c], "=");
-				$data[$c] = trim($data[$c], "\"");
-				if ($tablename == "contractnotice") {
-					if (in_array(($datamapping0711[$headers[$c]]) , $contractNoticeFields)) {
-						if (($datamapping0711[$headers[$c]]) == "parentCN" || ($datamapping0711[$headers[$c]]) == "CNID") {
-							$data[$c] = substr($data[$c], 2); // take off the "CN" prefix
-							if ($data[$c] > 0) {
-								$contractNoticeInsert[] = $data[$c];
-							}
-							else {
-								$contractNoticeInsert[] = 0;
-							}
-						}
-						elseif (($datamapping0711[$headers[$c]]) == "supplierABN") {
-							if ($data[$c] > 0) {
-								$contractNoticeInsert[] = $data[$c];
-							}
-							else {
-								$contractNoticeInsert[] = null;
-							}
-						}
-						elseif (($datamapping0711[$headers[$c]]) == "amendDate" || ($datamapping0711[$headers[$c]]) == "publishDate" || ($datamapping0711[$headers[$c]]) == "contractStart" || ($datamapping0711[$headers[$c]]) == "contractEnd") {
-							$contractNoticeInsert[] = date('Y-m-d H:i:s', strtotime($data[$c]));
-						}
-						else {
-							if (strstr("\" =", $data[$c] > 0)) {
-								die("Invalid Description field" . $contractNoticeInsert);
-							}
-$colvalue =  preg_replace( '/[^[:print:]]/', '',utf8_encode( $data[$c]));
-
-							$contractNoticeInsert[] = $colvalue;
-						}
-					}
-				}
-				else if ($tablename == "supplierdetails") {
-					if (in_array(($datamapping0711[$headers[$c]]) , $supplierFields)) {
-						if (($datamapping0711[$headers[$c]]) == "supplierABN") {
-							if ($data[$c] > 0) {
-								$contractNoticeInsert[] = $data[$c];
-							}
-							else {
-								$contractNoticeInsert[] = 0;
-							}
-						}
-						else {
-							$supplierInsert[] = $data[$c];
-						}
-					}
-				}
-				else if ($tablename == "agency") {
-					if (in_array(($datamapping0711[$headers[$c]]) , $agencyFields)) {
-						$agencyInsert[] = $data[$c];
-					}
-				}
-			}
-			flush();
-			if ($tablename == "contractnotice") {
-				$contractNoticeInsertQ->execute($contractNoticeInsert);
-				$errors = $conn->errorInfo();
-				if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
-				}
-				elseif ($errors[1] == 0) {
-					$success++;
-				}
-				else {
-					foreach ($contractNoticeFields as $key => $cnf) {
-						echo var_dump($contractNoticeInsert[$key]) . $cnf . "<br>";
-					}
-					echo $data[2] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n";
-				}
-			}
-			else if ($tablename == "supplierdetails") {
-				$supplierInsertQ->execute($supplierInsert);
-				$errors = $conn->errorInfo();
-				if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
-				}
-				elseif ($errors[1] == 0) {
-					$success++;
-				}
-				else {
-					echo $data[2] . " failed supplier insert.<br>" . print_r($errors, true) . " <br> " . print_r($supplierInsert, true) . "<br> $row <br><br>\n";
-				}
-			}
-			else if ($tablename == "agency") {
-				$agencyInsertQ->execute($agencyInsert);
-				$errors = $conn->errorInfo();
-				if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
-				}
-				elseif ($errors[1] == 0) {
-					$success++;
-				}
-				else {
-					echo $data[2] . " failed agency insert.<br>" . print_r($errors, true) . " <br> " . print_r($agencyInsert, true) . "<br> $row <br><br>\n";
-				}
-			}
-			flush();
-			//echo "<hr>\n";
-			
-		}
-		$row++;
-	}
-	fclose($handle);
-        // run post import data processing
+
+$contractNoticeFields = array(
+    "importFile",
+    "agencyName",
+    "parentCN",
+    "CNID",
+    "publishDate",
+    "amendDate",
+    "contractStart",
+    "contractEnd",
+    "value",
+    "description",
+    "agencyID",
+    "category",
+    "procurementMethod",
+    "atmID",
+    "SONID",
+    "confidentialityContract",
+    "confidentialityContractReason",
+    "confidentialityOutputs",
+    "confidentialityOutputsReason",
+    "consultancy",
+    "consultancyReason",
+    "amendmentReason",
+    "supplierName",
+    "supplierAddress",
+    "supplierCity",
+    "supplierPostcode",
+    "supplierCountry",
+    "supplierABNExempt",
+    "supplierABN",
+    "contactBranch",
+    "contactDivision",
+    "contactPostcode"
+);
+$agencyFields = array(
+    "agencyName"
+);
+$supplierFields = array(
+    "supplierName",
+    "supplierAddress",
+    "supplierCity",
+    "supplierPostcode",
+    "supplierCountry",
+    "supplierABNExempt",
+    "supplierABN"
+);
+$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', $contractNoticeFields) . '") VALUES ( ';
+foreach ($contractNoticeFields as $key => $f) {
+    $contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?";
+}
+$contractNoticeInsertQ.= ");";
+$contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ);
+
+$supplierInsertQ = 'INSERT INTO supplierdetails ("' . implode('" , "', $supplierFields) . '") VALUES ( ';
+foreach ($supplierFields as $key => $f) {
+    $supplierInsertQ.= ($key == 0 ? "" : ", ") . "?";
+}
+$supplierInsertQ.= ");";
+$supplierInsertQ = $conn->prepare($supplierInsertQ);
+
+$agencyInsertQ = 'INSERT INTO agency_nametoabn ("' . implode('" , "', $agencyFields) . '") VALUES ( ';
+foreach ($agencyFields as $key => $f) {
+    $agencyInsertQ.= ($key == 0 ? "" : ", ") . "?";
+}
+$agencyInsertQ.= ");";
+$agencyInsertQ = $conn->prepare($agencyInsertQ);
+
+function processFile($fpath, $tablename) {
+    global $conn, $contractNoticeQ, $supplierInsertQ, $agencyInsertQ;
+    $row = 1;
+    $handle = fopen($fpath, "r");
+    //"t" mode string translates windows line breaks to unix
+    $datamapping0711 = array(
+        "Agency" => "agencyName",
+        "Parent CN ID" => "parentCN",
+        "CN ID" => "CNID",
+        "Publish Date" => "publishDate",
+        "Amendment Date" => "amendDate",
+        "Status" => "",
+        "StartDate" => "contractStart",
+        "EndDate" => "contractEnd",
+        "Value" => "value",
+        "Description" => "description",
+        "Agency Ref Id" => "agencyID",
+        "Category" => "category",
+        "Procurement Method" => "procurementMethod",
+        "ATM ID" => "atmID",
+        "SON ID" => "SONID",
+        "Confidentiality - Contract" => "confidentialityContract",
+        "Confidentiality - Contract Reason(s)" => "confidentialityContractReason",
+        "Confidentiality - Outputs" => "confidentialityOutputs",
+        "Confidentiality - Outputs Reason(s)" => "confidentialityOutputsReason",
+        "Consultancy" => "consultancy",
+        "Consultancy Reason(s)" => "consultancyReason",
+        "Amendment Reason" => "amendmentReason",
+        "Supplier Name" => "supplierName",
+        "Supplier Address" => "supplierAddress",
+        "Supplier City" => "supplierCity",
+        "Supplier Postcode" => "supplierPostcode",
+        "Supplier Country" => "supplierCountry",
+        "Supplier ABNExempt" => "supplierABNExempt",
+        "Supplier ABN" => "supplierABN",
+        "Agency Branch" => "contactBranch",
+        "Agency Divison" => "contactDivision",
+        "Agency Postcode" => "contactPostcode",
+        "" => ""
+    );
+    $headers;
+
+    while (($data = fgetcsv($handle, 1000, "\t")) !== false) {
+        $num = count($data);
+        if ($row == 3) {
+            $headers = $data;
+        } elseif ($row > 3) {
+            if ($num > count($datamapping0711)) {
+                die("<font color=red>Error in data import; data mapping fields out of bounds or changed</font><br>" . $fname . print_r($data));
+            }
+            $contractNoticeInsert = Array();
+            $supplierInsert = Array();
+            $agencyInsert = Array();
+            $contractNoticeInsert[] = $fpath;
+            $keys = array_keys($datamapping0711);
+            for ($c = 0; $c < $num; $c++) {
+                $data[$c] = trim($data[$c], "=");
+                $data[$c] = trim($data[$c], "\"");
+                if ($tablename == "contractnotice") {
+                    if (in_array(($datamapping0711[$headers[$c]]), $contractNoticeFields)) {
+                        if (($datamapping0711[$headers[$c]]) == "parentCN" || ($datamapping0711[$headers[$c]]) == "CNID") {
+                            $data[$c] = substr($data[$c], 2); // take off the "CN" prefix
+                            if ($data[$c] > 0) {
+                                $contractNoticeInsert[] = $data[$c];
+                            } else {
+                                $contractNoticeInsert[] = 0;
+                            }
+                        } elseif (($datamapping0711[$headers[$c]]) == "supplierABN") {
+                            if ($data[$c] > 0) {
+                                $contractNoticeInsert[] = $data[$c];
+                            } else {
+                                $contractNoticeInsert[] = null;
+                            }
+                        } elseif (($datamapping0711[$headers[$c]]) == "amendDate" || ($datamapping0711[$headers[$c]]) == "publishDate" || ($datamapping0711[$headers[$c]]) == "contractStart" || ($datamapping0711[$headers[$c]]) == "contractEnd") {
+                            $contractNoticeInsert[] = date('Y-m-d H:i:s', strtotime($data[$c]));
+                        } else {
+                            if (strstr("\" =", $data[$c] > 0)) {
+                                die("Invalid Description field" . $contractNoticeInsert);
+                            }
+                            $colvalue = preg_replace('/[^[:print:]]/', '', utf8_encode($data[$c]));
+
+                            $contractNoticeInsert[] = $colvalue;
+                        }
+                    }
+                } else if ($tablename == "supplierdetails") {
+                    if (in_array(($datamapping0711[$headers[$c]]), $supplierFields)) {
+                        if (($datamapping0711[$headers[$c]]) == "supplierABN") {
+                            if ($data[$c] > 0) {
+                                $contractNoticeInsert[] = $data[$c];
+                            } else {
+                                $contractNoticeInsert[] = 0;
+                            }
+                        } else {
+                            $supplierInsert[] = $data[$c];
+                        }
+                    }
+                } else if ($tablename == "agency") {
+                    if (in_array(($datamapping0711[$headers[$c]]), $agencyFields)) {
+                        $agencyInsert[] = $data[$c];
+                    }
+                }
+            }
+            flush();
+            if ($tablename == "contractnotice") {
+                $contractNoticeInsertQ->execute($contractNoticeInsert);
+                $errors = $conn->errorInfo();
+                if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
+                    
+                } elseif ($errors[1] == 0) {
+                    $success++;
+                } else {
+                    foreach ($contractNoticeFields as $key => $cnf) {
+                        echo var_dump($contractNoticeInsert[$key]) . $cnf . "<br>";
+                    }
+                    echo $data[2] . " failed CN insert.<br>" . print_r($errors, true) . "<br> row $row <br><br>\n";
+                }
+            } else if ($tablename == "supplierdetails") {
+                $supplierInsertQ->execute($supplierInsert);
+                $errors = $conn->errorInfo();
+                if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
+                    
+                } elseif ($errors[1] == 0) {
+                    $success++;
+                } else {
+                    echo $data[2] . " failed supplier insert.<br>" . print_r($errors, true) . " <br> " . print_r($supplierInsert, true) . "<br> $row <br><br>\n";
+                }
+            } else if ($tablename == "agency") {
+                $agencyInsertQ->execute($agencyInsert);
+                $errors = $conn->errorInfo();
+                if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) {
+                    
+                } elseif ($errors[1] == 0) {
+                    $success++;
+                } else {
+                    echo $data[2] . " failed agency insert.<br>" . print_r($errors, true) . " <br> " . print_r($agencyInsert, true) . "<br> $row <br><br>\n";
+                }
+            }
+            flush();
+            //echo "<hr>\n";
+        }
+        $row++;
+    }
+    fclose($handle);
+    $contractNoticeInsertQ->closeCursor();
+    $supplierInsertQ->closeCursor();
+    $agencyInsertQ->closeCursor();
+ 
+    return $success;
+}
+
+$path = 'data/';
+if ($_REQUEST["fname"] == "") {
+    echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>";
+    $dhandle = opendir($path);
+    // define an array to hold the files
+    $files = array();
+    if ($dhandle) {
+        // loop through all of the files
+        while (false !== ($fname = readdir($dhandle))) {
+            if (($fname != '.') && ($fname != '..')) {
+                echo "<a href=\"import.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . date("c", filemtime($path . $fname)) . "<br/>";
+            }
+        }
+    }
+} else {
+    $success = 0;
+    $fname = $_REQUEST["fname"];
+    echo " ============== $fname  ============== <br>";
+    flush();
+    $success+= processFile($path . $fname, "contractnotice");
+    $success+= processFile($path . $fname, "agency");
+    $success+= processFile($path . $fname, "supplier");
+    echo "<br> $success records successfully created";
+    
+    flush();
+       // run post import data processing
 // cn
-echo "link amend<br>";
-include ("linkAmendments.php");
-echo "update UNSPSC<br>";
-include ("updateUNSPSC.php");
+    echo "link amend<br>";
+    include ("linkAmendments.php");
+    echo "update UNSPSC<br>";
+    include ("updateUNSPSC.php");
 // agency
 //include ("setAgencyStatus.php");
 //include ("setAgencyURLABN.php");
-	return $success;
-}
-$path = 'data/';
-if ($_REQUEST["fname"] == "") {
-	echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list<br>";
-	$dhandle = opendir($path);
-	// define an array to hold the files
-	$files = array();
-	if ($dhandle) {
-		// loop through all of the files
-		while (false !== ($fname = readdir($dhandle))) {
-			if (($fname != '.') && ($fname != '..')) {
-				echo "<a href=\"import.php?fname=$fname\">$fname</a>&nbsp;" . filesize($path . $fname) . "&nbsp;" . date("c", filemtime($path . $fname)) . "<br/>";
-			}
-		}
-	}
-}
-else {
-	$success = 0;
-	$fname = $_REQUEST["fname"];
-	echo " ============== $fname  ============== <br>";
-	flush();
-	$success+= processFile($path . $fname, "contractnotice");
-	$success+= processFile($path . $fname, "agency");
-	$success+= processFile($path . $fname, "supplier");
-	echo "<br> $success records successfully created";
-	flush();
-}
-
+}
 ?>
 

--- a/admin/linkAmendments.php
+++ b/admin/linkAmendments.php
@@ -2,7 +2,7 @@
 include_once ("../lib/common.inc.php");
 $query = 'SELECT c."CNID",c."parentCN",p."childCN" FROM contractnotice as c  LEFT OUTER JOIN contractnotice as p on c."parentCN" = p."CNID"
 WHERE
-c."parentCN" > 0 AND p."childCN" IS NULL ';
+c."parentCN" IS NOT NULL AND p."childCN" IS NULL ';
 $query = $conn->prepare($query);
 	$query->execute();
 		databaseError($conn->errorInfo());
@@ -28,9 +28,9 @@
       FROM contractnotice
       GROUP BY "parentCN" 
       HAVING COUNT(*) > 1 
-      AND "parentCN" != 0
+      AND "parentCN" IS NOT NULL
 )
-AND "childCN" = 0
+AND "childCN" IS NULL
 GROUP BY "parentCN" having count(*) > 1';
 $query = $conn->prepare($query);
 	$query->execute();

--- a/admin/partialdata/scraper.php
+++ /dev/null
@@ -1,74 +1,1 @@
-<?php
-date_default_timezone_set('Australia/Melbourne');
-$split = false;
-function format_bytes($size) {
-    $units = array(' B', ' KB', ' MB', ' GB', ' TB');
-    for ($i = 0; $size >= 1024 && $i < 4; $i++) $size /= 1024;
-    return round($size, 2).$units[$i];
-}
 
-$days = 4;
-if (isset($_REQUEST['days'])) $days = $_REQUEST['days'];
-$startDate = strtotime("05-Jun-2008");
-if (isset($_REQUEST['startDate'])) $startDate = $_REQUEST['startDate'];
-
-function getFile($startDate, $days, $minVal, $maxVal) {
-global $split;
-	$endDate = strtotime(date("Y-m-d", $startDate)." +".$days." days");
-$file = date("dMY",$startDate).'to'.date("dMY",$endDate).'val'.$minVal.'to'.$maxVal.'.xls';
-echo "Fetching $file ($days days) ($minVal < value < $maxVal )... ";
-$url = "https://www.tenders.gov.au/?event=public.advancedsearch.CNSONRedirect&type=cnEvent&atmType=archived%2Cclosed%2Cpublished%2Cproposed&agencyUUID=&agencyStatus=-1&portfolioUUID=&keyword=&KeywordTypeSearch=AllWord&CNID=&dateType=Publish+Date&dateStart=".date("d-M-Y",$startDate)."&dateEnd=".date("d-M-Y",$endDate)."&supplierName=&supplierABN=&valueFrom=".$minVal."&valueTo=".$maxVal."&ATMID=&AgencyRefId=&consultancy=&download=Download+results";
-echo "<!-- $url -->";
-$current = file_get_contents($url);
-if (strpos($current,"There are no results that match your selection.")> 0 ) { 
- echo "<font color=red>Empty file!</font><br>";
-}
-if (strpos($current,"Your search returned more than 1000 results.") === false) {
-	file_put_contents($file, $current);
-	echo "$file saved<br>";
-	echo format_bytes(filesize($file))."<br>";
-	echo '<a href="?startDate='.$endDate.'&days='.$days.'">Load next '.($days).' days </a><br>';
-		echo '<a href="?startDate='.$endDate.'&days='.($days*2).'">Load next '.($days*2).' days </a><br>';
-	echo '<a href="?startDate='.$endDate.'&days='.$days.'&split=yes">Load next '.($days).' days with split</a><br>';
-	flush();
-if (!isset($_REQUEST['split']) && !$split) {
-echo "Success so fetching next $days... <br>";
-getFile($endDate, $days, "" , "");
-}
-	return true;
-} else  {
-	echo "<font color=red>Too many records!</font><br>";
-	echo '<a href="?startDate='.$startDate.'&days='.floor($days/2).'">Load '.($days/2).' days instead?</a><br>';
-		echo '<a href="?startDate='.$startDate.'&days='.$days.'&split=yes">Split instead?</a><br>';
-	flush();
-if (!isset($_REQUEST['split']) && !$split) {
-echo "Failure so splitting ... <br>";
- doSplit($startDate, $days);
-}
-	return false;
-}
-}
-function doSplit($startDate, $days) {
-global $split;
-$split = true;
-set_time_limit(20);
-getFile($startDate, $days, 0, 12000);
-getFile($startDate, $days, 12000, 16000);
- getFile($startDate, $days, 16000, 20000);
- getFile($startDate, $days, 20000, 30000);
- getFile($startDate, $days, 30000, 40000);
-// getFile($startDate, $days, 40000, 80000);
- getFile($startDate, $days, 40000, 60000);
- getFile($startDate, $days, 60000, 80000);
-// getFile($startDate, $days, 80000, 300000);
- getFile($startDate, $days, 80000, 150000);
- getFile($startDate, $days, 150000, 300000);
- getFile($startDate, $days, 300000, 999999999);
-}
-if (isset($_REQUEST['split'])) {
-	doSplit($startDate, $days);
-} else {
-	getFile($startDate, $days, "" , "");
-}
-?>
-

--- a/admin/updateUNSPSC.php
+++ b/admin/updateUNSPSC.php
@@ -43,6 +43,10 @@
     $unspsc[$tobacco] = $row['UNSPSC'];

     $architect = str_replace("Building and Construction and Maintenance Services", "Architectural services", $row['Title']);

     $unspsc[$architect] = $row['UNSPSC'];

+        $powercable = str_replace("Power cable", "Power cable installation and supply", $row['Title']);

+    $unspsc[$powercable] = $row['UNSPSC'];

+        $forensicIT = str_replace("Building and Construction and Maintenance Services", "Architectural services", $row['Title']);

+    $unspsc[$architect] = $row['UNSPSC'];

     // some just plain wrong

     $noOilRigs = str_replace("Building and Construction and Maintenance Services", "Management and provision of all facilities engineering modification and maintenance services for a site or platform", $row['Title']);

     $unspsc[$noOilRigs] = $row['UNSPSC'];


--- a/australian_federal_government_contract_spending.json
+++ b/australian_federal_government_contract_spending.json
@@ -1,125 +1,164 @@
 {
-   "dataset":{
-      "name":"australian_federal_government_contract_spending",
-      "label":"Australian Federal Government Contract Spending",
-      "description":"Spending by Australian Federal Government agencies on goods and services from 2007 onwards.",
-      "currency":"AUD",
-      "unique_keys":[
-         "id"
-      ],
-      "temporal_granularity":"year"
-   },
-   "mapping":{
-  "category": {
-      "fields": [ {
-         "column": "category", 
-        "datatype": "string", 
-          "name": "label"
-          } ],
-
-    "type": "classifier", 
-    "description": "", 
-    "taxonomy": "unspsc",
-    "label": "Contract Goods/Services Category"
+  "dataset": {
+    "languages": [
+      "en"
+    ], 
+    "currency": "AUD", 
+    "name": "australian_federal_government_contract_spending", 
+    "territories": [], 
+    "default_time": null, 
+    "description": "Spending by Australian Federal Government agencies on goods and services from 2007 onwards.", 
+    "schema_version": "2011-12-07", 
+    "label": "Australian Federal Government Contract Spending"
   }, 
-      "from":{
-         "fields":[
-            {
-               "column":"agencyABN",
-               "datatype":"string",
-               "name":"id"
-            },
-            {
-               "column":"agencyName",
-               "datatype":"string",
-               "name":"label"
-            }
-         ],
-         "type":"entity",
-         "description":"",
-         "label":"Government Agency Australian Business Number"
-      },
-      "description":{
-         "column":"description",
-         "datatype":"string",
-         "type":"value",
-         "description":"Contract as described by agency",
-         "label":"Contract Description"
-      },
-      "to":{
-         "fields":[
-            {
-               "column":"supplierName",
-               "datatype":"string",
-               "name":"label"
-            },
-            {
-               "column":"supplierID",
-               "datatype":"string",
-               "name":"id"
-            }
-         ],
-         "type":"entity",
-         "description":"Government Supplier",
-         "label":"Supplier Name"
-      },
-      "currency":{
-         "default_value":"AUD",
-         "description":"All entries in AUD, foreign transactions are converted at the time of their payment",
-         "column":"",
-         "label":"",
-         "datatype":"currency",
-         "type":"value"
-      },
-      "amount":{
-         "column":"value",
-         "datatype":"float",
-         "type":"value",
-         "description":"The total value of the contract including all variations/amendments/extensions",
-         "label":"Contract Value"
-      },
-      "time":{
-         "column":"contractStart",
-         "datatype":"date",
-         "type":"value",
-         "description":"Start of the contract period (goods or services being received)",
-         "label":"Contract Start Date"
-      },
-      "id":{        
-"column": "CNID", 
-        "datatype": "string", 
-        "name": "label",
-    "type": "value", 
-    "description": "", 
-    "label": "Contract Notice ID"
-      }
-   },
-
-"views": [
+  "mapping": {
+    "category": {
+      "description": "Contract Goods/Services Category", 
+      "datatype": "string", 
+      "label": "category", 
+      "column": "cat1", 
+      "type": "attribute", 
+      "dimension": "category"
+    }, 
+    "sourceurl": {
+      "description": "Source Document URL", 
+      "datatype": "string", 
+      "label": "sourceurl", 
+      "column": "sourceurl", 
+      "type": "attribute", 
+      "dimension": "sourceurl"
+    }, 
+    "from": {
+      "attributes": {
+        "name": {
+          "column": "agencyABN", 
+          "datatype": "id", 
+          "type": "id", 
+          "description": null
+        }, 
+        "label": {
+          "column": "agencyName", 
+          "datatype": "string", 
+          "type": "attribute", 
+          "description": null
+        }
+      }, 
+      "label": "Government Agency", 
+      "type": "compound", 
+      "dimension": "from", 
+      "description": "Government Agency"
+    }, 
+    "description": {
+      "description": null, 
+      "datatype": "string", 
+      "label": "description", 
+      "column": "description", 
+      "type": "attribute", 
+      "dimension": "description"
+    }, 
+    "publishdate": {
+      "description": null, 
+      "format": null, 
+      "datatype": "date", 
+      "label": "publishDate", 
+      "column": "publishDate", 
+      "type": "date", 
+      "dimension": "publishdate"
+    }, 
+    "to": {
+      "attributes": {
+        "name": {
+          "column": "supplierid", 
+          "datatype": "id", 
+          "type": "id"
+        }, 
+        "label": {
+          "column": "supplierName", 
+          "datatype": "string", 
+          "type": "attribute"
+        }
+      }, 
+      "label": "Supplier Name", 
+      "type": "compound", 
+      "dimension": "to", 
+      "description": "Government Supplier"
+    }, 
+    "amount": {
+      "description": "The total value of the contract including all variations/amendments/extensions", 
+      "datatype": "float", 
+      "label": "value", 
+      "column": "value", 
+      "type": "measure", 
+      "dimension": "amount"
+    }, 
+    "cnid": {
+      "description": "Contract Notice ID", 
+      "datatype": "string", 
+      "label": "CNID", 
+      "column": "CNID", 
+      "key": true, 
+      "type": "attribute", 
+      "dimension": "cnid"
+    }, 
+    "contractend": {
+      "description": "End of the contract period (goods or services being received)", 
+      "format": null, 
+      "datatype": "date", 
+      "label": "contractEnd", 
+      "column": "contractEnd", 
+      "type": "date", 
+      "dimension": "contractend"
+    }, 
+    "time": {
+      "description": "Start of the contract period (goods or services being received)", 
+      "format": null, 
+      "datatype": "date", 
+      "label": "contractStart", 
+      "column": "contractStart", 
+      "type": "date", 
+      "dimension": "time"
+    }
+  }, 
+  "views": [
     {
-        "name": "default",
-        "entity": "dataset",
-        "label": "Default breakdown by Category",
-        "dimension": "dataset",
-        "breakdown": "category",
-	"filters" : {"name": "australian_federal_government_contract_spending" }
-    },
-  {
-        "name":"default",
-        "entity": "classifier",
-        "label":"Breakdown of Spending Areas by Supplier Company",
-        "dimension": "category",
-        "breakdown": "to",
-        "filters": {"taxonomy": "unspsc"}
-    },
-  {
-        "name":"default",
-        "entity": "classifier",
-        "label":"Breakdown of Spending Areas by Government Agency",
-        "dimension": "category",
-        "breakdown": "from",
-        "filters": {"taxonomy": "unspsc"}
-    }  
-]
+      "name": "default", 
+      "label": "Breakdown by Category", 
+      "cuts": {}, 
+      "drilldown": "category", 
+      "entity": "dataset", 
+      "dimension": "dataset"
+    }, 
+    {
+      "name": "by_supplier", 
+      "label": "Breakdown by Supplier", 
+      "cuts": {}, 
+      "drilldown": "to", 
+      "entity": "dataset", 
+      "dimension": "dataset"
+    }, 
+    {
+      "name": "by_agency", 
+      "label": "Breakdown by Agency", 
+      "cuts": {}, 
+      "drilldown": "from", 
+      "entity": "dataset", 
+      "dimension": "dataset"
+    }, 
+    {
+      "name": "default", 
+      "label": "Agency spending breakdown by supplier", 
+      "cuts": {}, 
+      "drilldown": "to", 
+      "entity": "dimension", 
+      "dimension": "from"
+    }, 
+    {
+      "name": "default", 
+      "label": "Supplier contracts breakdown by procuring agency", 
+      "cuts": {}, 
+      "drilldown": "from", 
+      "entity": "dimension", 
+      "dimension": "to"
+    }
+  ]
 }
-

--- a/displayBubbletree.php
+++ b/displayBubbletree.php
@@ -32,48 +32,48 @@
 }
 $total = 0;
 
-$catsresult = $conn->prepare('SELECT distinct substr( "categoryUNSPSC"::text, 0, 2 ) as cat 
+$catsresult = $conn->prepare('SELECT distinct substr( "categoryUNSPSC"::text, 0, 3 ) as cat 
 FROM contractnotice WHERE "categoryUNSPSC" IS NOT NULL group by "categoryUNSPSC";;');
 $catsresult->execute();
 foreach ($catsresult->fetchAll() as $row) {
 	$cats[] = $row['cat'];
 }
-$cattwosresult = $conn->prepare('SELECT distinct substr( "categoryUNSPSC"::text, 0, 3 ) as cat
+$cattwosresult = $conn->prepare('SELECT distinct substr( "categoryUNSPSC"::text, 0, 5 ) as cat
 FROM contractnotice
 WHERE "categoryUNSPSC" IS NOT NULL
 GROUP BY  "categoryUNSPSC"  order by cat ;');
 $cattwosresult->execute();
 		foreach ($cattwosresult->fetchAll() as $tworow) { 
-			$cattwos[substr($tworow['cat'],0,1)][] = $tworow['cat'];
+			$cattwos[substr($tworow['cat'],0,2)][] = $tworow['cat'];
 		}
 				
 	
 
-$catthreesresult = $conn->prepare('SELECT substr( "categoryUNSPSC"::text, 0, 4 ) as cat , SUM( "value" ) as value
+$catthreesresult = $conn->prepare('SELECT substr( "categoryUNSPSC"::text, 0, 7 ) as cat , SUM( "value" ) as value
 FROM contractnotice
 WHERE "childCN" is null and "categoryUNSPSC" IS NOT NULL
 GROUP BY cat order by cat ;');
 $catthreesresult->execute();
 		foreach ($catthreesresult->fetchAll() as $threerow) { 
-			$catthrees[substr($threerow['cat'],0,2)][] = $threerow;
+			$catthrees[substr($threerow['cat'],0,4)][] = $threerow;
 		}
 $nodes = Array();
 foreach ($cats as $catNum) {
 $catColor = $color->hsv2hex(Array($catNum/10, .7, abs(($catNum*(1/10))-.5) + .5));
-	$catName = $unspsc[$catNum . "0000000"] . $catNum;
+	$catName = substr($unspsc[$catNum . "000000"],0,18) . $catNum;
 	$subnodes = Array();
 	$catValue = 0;
 	foreach ($cattwos[$catNum] as $twoCatNum) {
-		$subcatName = $unspsc[$twoCatNum. "000000"] .$twoCatNum;
+		$subcatName = $unspsc[$twoCatNum. "0000"] .$twoCatNum;
 		$subsubnodes = Array();
-	$subCatValue = 0;
+	$subCatValue = 1;
 		$subCatColor = $color->hsv2hex(Array($catNum/10, rand(1,10)/10, abs(($catNum*(1/10))-.5) + .5));
 		foreach ($catthrees[$twoCatNum] as $threerow) {
-			$subsubcatName = $unspsc[$threerow['cat'] . "00000"] . $threerow['cat'];
+			$subsubcatName = $unspsc[$threerow['cat'] . "00"] . $threerow['cat'];
 			$subsubnodes[] = Array(
 				"label" => $subsubcatName,
 				"amount" => $threerow['value'],
-				//"color" => "#".$subCatColor
+				"color" => "#".$subCatColor
 			);
 			$subCatValue += $threerow['value'];
 		}

--- a/exportData.csv.php
+++ b/exportData.csv.php
@@ -3,17 +3,25 @@
 include_once("./lib/common.inc.php");
 setlocale(LC_CTYPE, 'C');
 // source: http://stackoverflow.com/questions/81934/easy-way-to-export-a-sql-table-without-access-to-the-server-or-phpmyadmin#81951
+
+$unspsc = Array();
+$unspscresult = $conn->prepare('select * from "UNSPSCcategories" where "UNSPSC"::text like \'%00000\';');
+$unspscresult->execute();
+foreach ($unspscresult->fetchAll() as $row) {
+    $unspsc[$row['UNSPSC']] = $row['Title'];
+}
+
 $query = $conn->prepare('
-SELECT "CNID",contractnotice."agencyName",agency.abn as "agencyABN",EXTRACT(EPOCH FROM "publishDate") as "publishDate",EXTRACT(EPOCH FROM "contractStart") as "contractStart",EXTRACT(EPOCH FROM "contractEnd") as "contractEnd",value,description,"procurementMethod",category,"categoryUNSPSC", 
- "supplierABN","supplierName",
-(
- case when "supplierABN" != 0 THEN "supplierABN"::text ELSE "supplierName" END) as supplierID,
- 
+SELECT "CNID",contractnotice."agencyName",agency_nametoabn.abn as "agencyABN",
+EXTRACT(EPOCH FROM "publishDate") as "publishDate",
+EXTRACT(EPOCH FROM "contractStart") as "contractStart",
+EXTRACT(EPOCH FROM "contractEnd") as "contractEnd",
+value,description,category,
+"supplierName",(case when "supplierABN" != 0 THEN "supplierABN"::text ELSE "supplierName" END) as supplierID,
 (\'https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN\'::text || "CNID"::text) as sourceURL 
-FROM contractnotice join agency on contractnotice."agencyName"=agency."agencyName" where "childCN" is null'
+FROM contractnotice join agency_nametoabn on contractnotice."agencyName"=agency_nametoabn."agencyName"  
+where "childCN" is null'
         , array(PDO::ATTR_CURSOR => PDO::FETCH_ORI_NEXT));
-// "supplierCity","supplierPostcode","supplierCountry","contactPostcode",
-// (substr( "categoryUNSPSC"::text, 0, 2 ) || \'0000000\'::text) as "categoryUNSPSClv1", "categoryUNSPSC", (substr( "categoryUNSPSC"::text, 0, 3 ) || \'000000\'::text) as "categoryUNSPSClv2" "categoryUNSPSC", (substr( "categoryUNSPSC"::text, 0, 4 ) || \'00000\'::text as "categoryUNSPSClv3")
 $query->execute();
 $errors = $conn->errorInfo();
 if ($errors[2] != "") {
@@ -21,8 +29,8 @@
 }
 
 $num_fields = $query->columnCount();
-$headers = array();
-for ($i = 0; $i < $num_fields; $i++) {
+$headers = Array();
+for ($i = 0; $i < $num_fields; $i++) { // for each column in query, make a CSV header
     $meta = $query->getColumnMeta($i);
     $headers[] = $meta['name'];
 }
@@ -41,6 +49,13 @@
                     || $headers[$key] == "contractEnd") {
                 $colvalue = date("Y-m-d", $colvalue);
             }
+           /* if ($headers[$key] == "CNID") {
+                $colvalue = str_replace("A","", $colvalue);
+}*/
+            if ($headers[$key] == "cat1" || $headers[$key] == "cat2"
+                    || $headers[$key] == "cat3") {
+                $colvalue = $unspsc[$colvalue];
+            }
         }
         fputcsv($fp, array_values($row));
     }

--- /dev/null
+++ b/exportOverview.csv.php
@@ -1,1 +1,49 @@
+<?php
 
+include_once("./lib/common.inc.php");
+setlocale(LC_CTYPE, 'C');
+// source: http://stackoverflow.com/questions/81934/easy-way-to-export-a-sql-table-without-access-to-the-server-or-phpmyadmin#81951
+
+$unspsc = Array();
+$unspscresult = $conn->prepare('select * from "UNSPSCcategories" where "UNSPSC"::text like \'%00000\';');
+$unspscresult->execute();
+foreach ($unspscresult->fetchAll() as $row) {
+    $unspsc[$row['UNSPSC']] = $row['Title'];
+}
+
+$query = $conn->prepare('
+SELECT "CNID" as uid, description as text,
+(\'https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN\'::text || "CNID"::text) as url from "contractnotice"
+where "childCN" is null'
+        , array(PDO::ATTR_CURSOR => PDO::FETCH_ORI_NEXT));
+$query->execute();
+$errors = $conn->errorInfo();
+if ($errors[2] != "") {
+    die("Export terminated, db error" . print_r($errors, true));
+}
+
+$num_fields = $query->columnCount();
+$headers = Array();
+for ($i = 0; $i < $num_fields; $i++) { // for each column in query, make a CSV header
+    $meta = $query->getColumnMeta($i);
+    $headers[] = $meta['name'];
+}
+$fp = fopen('php://output', 'w');
+if ($fp && $query) {
+    header('Content-Type: text/csv');
+    header('Content-Disposition: attachment; filename="export.' . date("c") . '.csv"');
+    header('Pragma: no-cache');
+    header('Expires: 0');
+    fputcsv($fp, $headers);
+    while ($row = $query->fetch(PDO::FETCH_NUM, PDO::FETCH_ORI_NEXT)) {
+        foreach ($row as $key => &$colvalue) {
+
+            $colvalue = preg_replace('/[^[:print:]]/', '', utf8_encode($colvalue));
+            
+        }
+        fputcsv($fp, array_values($row));
+    }
+    die;
+}
+?>
+

file:b/robots.txt (new)
--- /dev/null
+++ b/robots.txt
@@ -1,1 +1,3 @@
+User-agent: *
+Disallow: /admin