From: Maxious Date: Tue, 06 Dec 2011 06:08:15 +0000 Subject: Use PDO prepared statements for massive import speedup X-Git-Url: https://maxious.lambdacomplex.org/git/?p=busui.git&a=commitdiff&h=79ddd6110cb0542a9f4f0db25e09e9b7bbc1447c --- Use PDO prepared statements for massive import speedup --- --- a/updatedb.php +++ b/updatedb.php @@ -17,7 +17,6 @@ */ if (php_sapi_name() == "cli") { include ('include/common.inc.php'); - $conn = pg_connect("dbname=transitdata user=postgres password=snmc host=localhost") or die('connection failed'); $pdconn = new PDO("pgsql:dbname=transitdata;user=postgres;password=snmc;host=localhost"); /* @@ -34,7 +33,7 @@ // Unzip cbrfeed.zip, import all csv files to database $unzip = false; $zip = zip_open(dirname(__FILE__) . "/cbrfeed.zip"); - $tmpdir = "c:/tmp/"; + $tmpdir = "c:/tmp/cbrfeed/"; mkdir($tmpdir); if ($unzip) { if (is_resource($zip)) { @@ -53,6 +52,7 @@ } foreach (scandir($tmpdir) as $file) { + $headers = Array(); if (!strpos($file, ".txt") === false) { $fieldseparator = ","; $lineseparator = "\n"; @@ -60,33 +60,50 @@ echo "Opening $file \n"; $line = 0; $handle = fopen($tmpdir . $file, "r"); - if ($tablename == "stop_times") { - $stmt = $pdconn->prepare("insert into stop_times (trip_id,stop_id,stop_sequence,arrival_time,departure_time) values(:trip_id, :stop_id, :stop_sequence,:arrival_time,:departure_time);"); - $stmt->bindParam(':trip_id', $trip_id); - $stmt->bindParam(':stop_id', $stop_id); - $stmt->bindParam(':stop_sequence', $stop_sequence); - $stmt->bindParam(':arrival_time', $time); - $stmt->bindParam(':departure_time', $time); - } $distance = 0; $lastshape = 0; $lastlat = 0; $lastlon = 0; + $stmt = null; while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { if ($line == 0) { - - } else { - $query = "insert into $tablename values("; + $headers = array_values($data); + if ($tablename == "stops") { + $headers[] = "position"; + } + if ($tablename == "shapes") { + $headers[] = "shape_pt"; + } + $query = "insert into $tablename ("; + $valueCount = 0; + foreach ($headers as $value) { + $query.=($valueCount > 0 ? "," : "") . pg_escape_string($value); + $valueCount++; + } + $query.= ") values( "; $valueCount = 0; foreach ($data as $value) { - $query.=($valueCount > 0 ? "','" : "'") . pg_escape_string($value); + $query.=($valueCount > 0 ? "," : "") . '?'; $valueCount++; } + if ($tablename == "stops") { + $query.= ", ST_GeographyFromText(?));"; + } else if ($tablename == "shapes") { + $query.= ", ST_GeographyFromText(?));"; + } else { + $query.= ");"; + } + echo $query; + $stmt = $pdconn->prepare($query); + } else { + $values = array_values($data); if ($tablename == "stops") { - $query.= "', ST_GeographyFromText('SRID=4326;POINT({$data[2]} {$data[0]})'));"; - } else if ($tablename == "shapes") { + // Coordinate values are out of range [-180 -90, 180 90] + $values[] = 'SRID=4326;POINT('.$values[5].' '.$values[4].')'; + } + if ($tablename == "shapes") { if ($data[0] != $lastshape) { $distance = 0; $lastshape = $data[0]; @@ -95,28 +112,26 @@ } $lastlat = $data[1]; $lastlon = $data[2]; - $query.= "', $distance, ST_GeographyFromText('SRID=4326;POINT({$data[2]} {$data[1]})'));"; - } else { - $query.= "');"; + + $values[4] = $distance; + $values[] = 'SRID=4326;POINT('.$values[2].' '.$values[1].')'; } - if ($tablename == "stop_times") { - // $query = "insert into $tablename (trip_id,stop_id,stop_sequence) values('{$data[0]}','{$data[3]}','{$data[4]}');"; - $trip_id = $data[0]; - $stop_id = $data[3]; - $stop_sequence = $data[4]; - $time = ($data[1] == "" ? null : $data[1]); +if (substr($values[1],0,2) == '24') $values[1] = "23:59:59"; +if (substr($values[2],0,2) == '24') $values[2] = "23:59:59"; + $stmt->execute($values); + $err = $pdconn->errorInfo(); + if ($err[2] != "" && strpos($err[2], "duplicate key") === false) { + print_r($values); + print_r($err); + die("terminated import due to db error above"); } - } - if ($tablename == "stop_times") { - $stmt->execute(); - } else { - $result = pg_query($conn, $query); } $line++; if ($line % 10000 == 0) echo "$line records... " . date('c') . "\n"; } fclose($handle); + $stmt->closeCursor(); echo "Found a total of $line records in $file.\n"; } }