From: Maxious Date: Fri, 01 Mar 2013 00:06:29 +0000 Subject: add datagov importer X-Git-Url: http://maxious.lambdacomplex.org/git/?p=contractdashboard.git&a=commitdiff&h=7afe3ef2df14aaa6f3eabdd6d7c5bf136c5029bc --- add datagov importer --- --- a/admin/import.php +++ b/admin/import.php @@ -86,8 +86,7 @@ "Agency Postcode" => "contactPostcode", "" => "" ); - - $headers; + while (($data = fgetcsv($handle, 1000, "\t")) !== false) { $num = count($data); @@ -172,7 +171,7 @@ } ksort($files); foreach ($files as $date => $fname) { - echo "$fname " . filesize($path . $fname) . " " . $date . "
"; + echo "$fname " . filesize($path . $fname) . " " . $date . "
"; } } else { $success = 0; --- /dev/null +++ b/admin/importdatagov.php @@ -1,1 +1,216 @@ - + $f) { + $contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?"; +} +$contractNoticeInsertQ.= ");"; +$contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ); + +function processFile($fpath) { + global $conn, $contractNoticeFields, $contractNoticeInsertQ; + $row = 1; + $success = 0; + ini_set('auto_detect_line_endings',TRUE); + $handle = fopen($fpath, "r"); + //"t" mode string translates windows line breaks to unix + $datamapping0712 = array( + + "Agency Name" => "agencyName", + "Parent Contract ID" => "parentCN", + "Contract ID" => "CNID", + "Publish Date" => "publishDate", + "Amendment Date" => "amendDate", + "Start Date" => "contractStart", + "End Date" => "contractEnd", + "Value" => "value", + "Description" => "description", + "Agency Ref ID" => "agencyID", + "UNSPSC Code" => "categoryUNSPSC", + "Title" => "category", + "Procurement Method" => "procurementMethod", + "ATM ID" => "atmID", + "SON ID" => "SONID", + "Confidentiality Contract Flag" => "confidentialityContract", + "Confidentiality Contract Reason" => "confidentialityContractReason", + "Confidentiality Outputs Flag" => "confidentialityOutputs", + "Confidentiality Outputs Reason" => "confidentialityOutputsReason", + "Consultancy Flag" => "consultancy", + "Consultancy Reason" => "consultancyReason", + "Amendment Reason" => "amendmentReason", + "Supplier Name" => "supplierName", + "Supplier Address" => "supplierAddress", + "Supplier Suburb" => "supplierCity", + "Supplier Postcode" => "supplierPostcode", + "Supplier Country" => "supplierCountry", + "Supplier ABN Exempt" => "supplierABNExempt", + "ABN" => "supplierABN", + "Contact Name" => "", + "Contact Phone" => "", + "Branch" => "contactBranch", + "Division" => "contactDivision", + "Office Postcode" => "contactPostcode", + + ); + + + while (($data = fgetcsv($handle, 10000)) !== false) { + //print_r($data); + $num = count($data); + if ($row == 1) { + $headers = $data; + } elseif ($row > 1) { + if ($num > count($datamapping0712)) { + die("Error in data import; data mapping fields out of bounds or changed $num > ".count($datamapping0712)."
" . $fpath . print_r($data)); + } + $contractNoticeInsert = Array(); + $supplierInsert = Array(); + $agencyInsert = Array(); + $contractNoticeInsert[] = $fpath; + $keys = array_keys($datamapping0712); + for ($c = 0; $c < $num; $c++) { + $data[$c] = trim($data[$c], "="); + $data[$c] = trim($data[$c], "\""); + if (in_array(($datamapping0712[$headers[$c]]), $contractNoticeFields)) { + if (($datamapping0712[$headers[$c]]) == "parentCN" || ($datamapping0712[$headers[$c]]) == "CNID") { + $data[$c] = substr($data[$c], 2); // take off the "CN" prefix + if ($data[$c] > 0 && $data[$c] != '0') { + $contractNoticeInsert[] = $data[$c]; + } else { + $contractNoticeInsert[] = null; + } + } elseif (($datamapping0712[$headers[$c]]) == "supplierABN") { + if ($data[$c] > 0 && $data[$c] != '0') { + $contractNoticeInsert[] = $data[$c]; + } else { + $contractNoticeInsert[] = null; + } + } elseif (($datamapping0712[$headers[$c]]) == "amendDate" || ($datamapping0712[$headers[$c]]) == "publishDate" || ($datamapping0712[$headers[$c]]) == "contractStart" || ($datamapping0712[$headers[$c]]) == "contractEnd") { + $contractNoticeInsert[] = date('Y-m-d H:i:s', strtotime($data[$c])); + } else { + if (strstr("\" =", $data[$c] > 0)) { + die("Invalid Description field" . $contractNoticeInsert); + } + $colvalue = preg_replace('/[^[:print:]]/', '', utf8_encode($data[$c])); + + $contractNoticeInsert[] = $colvalue; + } + } + } + flush(); + //print_r($contractNoticeInsert); + $contractNoticeInsertQ->execute($contractNoticeInsert); + $errors = $conn->errorInfo(); + if ($errors[1] == 7 && strpos($errors[2], "duplicate key")) { + // echo "dupe
"; + } elseif ($errors[1] == 0) { + $success++; + } else { + foreach ($contractNoticeFields as $key => $cnf) { + echo var_dump($contractNoticeInsert[$key]) . $cnf . "
"; + } + echo $data[2] . " failed CN insert.
" . print_r($errors, true) . "
row $row

\n"; + } + + flush(); + //echo "
\n"; + } + $row++; + } + fclose($handle); + $contractNoticeInsertQ->closeCursor(); + + return $success; +} + +$path = 'datagovdata/'; +if ($_REQUEST["fname"] == "" && $argv[1] == "") { + echo "Get files from: https://www.tenders.gov.au/?event=public.reports.list
"; + $dhandle = opendir($path); + // define an array to hold the files + $files = array(); + if ($dhandle) { + // loop through all of the files + while (false !== ($fname = readdir($dhandle))) { + if (($fname != '.') && ($fname != '..')) { + $files[date("c", filemtime($path . $fname)) . md5($fname)] = $fname; + } + } + } + ksort($files); + foreach ($files as $date => $fname) { + echo "$fname " . filesize($path . $fname) . " " . $date . "
"; + } +} else { + $success = 0; + $fname = $_REQUEST["fname"]; + if ($argv[1] != "") $fname = $argv[1]; + echo " ============== $fname ==============
"; + flush(); + $success+= processFile($path . $fname, "contractnotice"); + $success+= processFile($path . $fname, "agency"); + $success+= processFile($path . $fname, "supplier"); + echo "
$success records successfully created"; + + flush(); + // run post import data processing + // + if ($success > 0) { +$conn->exec("update datasets set \"lastUpdated\" = NOW() where title = 'Contract Notices'"); + echo "link amend
"; + include ("linkAmendments.php"); + echo "update UNSPSC
"; + include ("updateUNSPSC.php"); + } +// cn + +// agency +//include ("setAgencyStatus.php"); +//include ("setAgencyURLABN.php"); + +} +?> + --- a/admin/partialdata/import.php +++ b/admin/partialdata/import.php @@ -12,6 +12,7 @@ $handle = fopen($fpath, "r"); //"t" mode string translates windows line breaks to unix $datamapping0507 = array( + "Agency" => "agencyName", "CN ID" => "CNID", "Publish Date" => "publishDate", @@ -137,7 +138,7 @@ // loop through all of the files while (false !== ($fname = readdir($dhandle))) { if (($fname != '.') && ($fname != '..') && (!isset($_REQUEST["filter"]) || strpos($fname,$_REQUEST["filter"]) != false)) { - echo "$fname " . filesize($path . $fname) . " " . date("c", filemtime($path . $fname)) . "
"; + echo "$fname " . filesize($path . $fname) . " " . date("c", filemtime($path . $fname)) . "
"; processFile($path . $fname, "contractnotice"); } } --- a/admin/partialdata/importamendments.php +++ b/admin/partialdata/importamendments.php @@ -310,7 +310,7 @@ // loop through all of the files while (false !== ($fname = readdir($dhandle))) { if (($fname != '.') && ($fname != '..') && (strpos($fname,".xls")>0)) { - echo "$fname " . filesize($path . $fname) . " " . date("c", filemtime($path . $fname)) . "
"; + echo "$fname " . filesize($path . $fname) . " " . date("c", filemtime($path . $fname)) . "
"; processFile($path . $fname, "contractnotice"); } } --- a/lib/common.inc.php +++ b/lib/common.inc.php @@ -14,11 +14,14 @@ if (strstr($_SERVER['PHP_SELF'], "labs/") || strstr($_SERVER['PHP_SELF'], "admin/") || strstr($_SERVER['PHP_SELF'], "heuristics/")) { $basePath = "../"; } + if (php_sapi_name() != "cli") { + require $basePath."lib/amon-php/amon.php"; Amon::config(array('address'=> 'http://127.0.0.1:2464', 'protocol' => 'http', 'secret_key' => "JBcSUdFOi5lK0vCjLjbHDpQamcBnRA4iV7QLaTADeDQ")); Amon::setup_exception_handler(); + } require ROOT . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'lib' . DIRECTORY_SEPARATOR . 'openid.php'; $openid = new LightOpenID($_SERVER['HTTP_HOST']);