Use PDO prepared statements for massive import speedup
Use PDO prepared statements for massive import speedup

--- a/updatedb.php
+++ b/updatedb.php
@@ -17,7 +17,6 @@
  */
 if (php_sapi_name() == "cli") {
     include ('include/common.inc.php');
-    $conn = pg_connect("dbname=transitdata user=postgres password=snmc host=localhost") or die('connection failed');
     $pdconn = new PDO("pgsql:dbname=transitdata;user=postgres;password=snmc;host=localhost");
 
     /*
@@ -34,7 +33,7 @@
 // Unzip cbrfeed.zip, import all csv files to database
     $unzip = false;
     $zip = zip_open(dirname(__FILE__) . "/cbrfeed.zip");
-    $tmpdir = "c:/tmp/";
+    $tmpdir = "c:/tmp/cbrfeed/";
     mkdir($tmpdir);
     if ($unzip) {
         if (is_resource($zip)) {
@@ -53,6 +52,7 @@
     }
 
     foreach (scandir($tmpdir) as $file) {
+        $headers = Array();
         if (!strpos($file, ".txt") === false) {
             $fieldseparator = ",";
             $lineseparator = "\n";
@@ -60,33 +60,50 @@
             echo "Opening $file \n";
             $line = 0;
             $handle = fopen($tmpdir . $file, "r");
-            if ($tablename == "stop_times") {
-                $stmt = $pdconn->prepare("insert into stop_times (trip_id,stop_id,stop_sequence,arrival_time,departure_time) values(:trip_id, :stop_id, :stop_sequence,:arrival_time,:departure_time);");
-                $stmt->bindParam(':trip_id', $trip_id);
-                $stmt->bindParam(':stop_id', $stop_id);
-                $stmt->bindParam(':stop_sequence', $stop_sequence);
-                $stmt->bindParam(':arrival_time', $time);
-                $stmt->bindParam(':departure_time', $time);
-            }
 
             $distance = 0;
             $lastshape = 0;
             $lastlat = 0;
             $lastlon = 0;
+            $stmt = null;
             while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) {
                 if ($line == 0) {
-                    
-                } else {
-                    $query = "insert into $tablename values(";
+                    $headers = array_values($data);
+                    if ($tablename == "stops") {
+                        $headers[] = "position";
+                    }
+                    if ($tablename == "shapes") {
+                        $headers[] = "shape_pt";
+                    }
+                    $query = "insert into $tablename (";
+                    $valueCount = 0;
+                    foreach ($headers as $value) {
+                        $query.=($valueCount > 0 ? "," : "") . pg_escape_string($value);
+                        $valueCount++;
+                    }
+                    $query.= ") values( ";
                     $valueCount = 0;
                     foreach ($data as $value) {
-                        $query.=($valueCount > 0 ? "','" : "'") . pg_escape_string($value);
+                        $query.=($valueCount > 0 ? "," : "") . '?';
                         $valueCount++;
                     }
+                    if ($tablename == "stops") {
+                        $query.= ", ST_GeographyFromText(?));";
+                    } else if ($tablename == "shapes") {
+                        $query.= ", ST_GeographyFromText(?));";
+                    } else {
+                        $query.= ");";
+                    }
 
+                    echo $query;
+                    $stmt = $pdconn->prepare($query);
+                } else {
+                    $values = array_values($data);
                     if ($tablename == "stops") {
-                        $query.= "', ST_GeographyFromText('SRID=4326;POINT({$data[2]} {$data[0]})'));";
-                    } else if ($tablename == "shapes") {
+                        // Coordinate values are out of range [-180 -90, 180 90]
+                        $values[] = 'SRID=4326;POINT('.$values[5].' '.$values[4].')';
+                    }
+                    if ($tablename == "shapes") {
                         if ($data[0] != $lastshape) {
                             $distance = 0;
                             $lastshape = $data[0];
@@ -95,28 +112,26 @@
                         }
                         $lastlat = $data[1];
                         $lastlon = $data[2];
-                        $query.= "', $distance,  ST_GeographyFromText('SRID=4326;POINT({$data[2]} {$data[1]})'));";
-                    } else {
-                        $query.= "');";
+
+                        $values[4] = $distance;
+                        $values[] = 'SRID=4326;POINT('.$values[2].' '.$values[1].')';
                     }
-                    if ($tablename == "stop_times") {
-                        //                  $query = "insert into $tablename (trip_id,stop_id,stop_sequence) values('{$data[0]}','{$data[3]}','{$data[4]}');";
-                        $trip_id = $data[0];
-                        $stop_id = $data[3];
-                        $stop_sequence = $data[4];
-                        $time = ($data[1] == "" ? null : $data[1]);
+if (substr($values[1],0,2) == '24') $values[1] = "23:59:59";
+if (substr($values[2],0,2) == '24') $values[2] = "23:59:59";
+                    $stmt->execute($values);
+                    $err = $pdconn->errorInfo();
+                    if ($err[2] != "" && strpos($err[2], "duplicate key") === false) {
+                        print_r($values);
+                        print_r($err);
+                        die("terminated import due to db error above");
                     }
-                }
-                if ($tablename == "stop_times") {
-                    $stmt->execute();
-                } else {
-                    $result = pg_query($conn, $query);
                 }
                 $line++;
                 if ($line % 10000 == 0)
                     echo "$line records... " . date('c') . "\n";
             }
             fclose($handle);
+            $stmt->closeCursor();
             echo "Found a total of $line records in $file.\n";
         }
     }