Use PDO prepared statements for massive import speedup
Use PDO prepared statements for massive import speedup

<?php <?php
   
/* /*
* Copyright 2010,2011 Alexander Sadleir * Copyright 2010,2011 Alexander Sadleir
   
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
   
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
   
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
*/ */
if (php_sapi_name() == "cli") { if (php_sapi_name() == "cli") {
include ('include/common.inc.php'); include ('include/common.inc.php');
$conn = pg_connect("dbname=transitdata user=postgres password=snmc host=localhost") or die('connection failed');  
$pdconn = new PDO("pgsql:dbname=transitdata;user=postgres;password=snmc;host=localhost"); $pdconn = new PDO("pgsql:dbname=transitdata;user=postgres;password=snmc;host=localhost");
   
/* /*
delete from agency; delete from agency;
delete from calendar; delete from calendar;
delete from calendar_dates; delete from calendar_dates;
delete from routes; delete from routes;
delete from shapes; delete from shapes;
delete from stop_times; delete from stop_times;
delete from stops; delete from stops;
delete from trips; delete from trips;
*/ */
   
// Unzip cbrfeed.zip, import all csv files to database // Unzip cbrfeed.zip, import all csv files to database
$unzip = false; $unzip = false;
$zip = zip_open(dirname(__FILE__) . "/cbrfeed.zip"); $zip = zip_open(dirname(__FILE__) . "/cbrfeed.zip");
$tmpdir = "c:/tmp/"; $tmpdir = "c:/tmp/cbrfeed/";
mkdir($tmpdir); mkdir($tmpdir);
if ($unzip) { if ($unzip) {
if (is_resource($zip)) { if (is_resource($zip)) {
while ($zip_entry = zip_read($zip)) { while ($zip_entry = zip_read($zip)) {
$fp = fopen($tmpdir . zip_entry_name($zip_entry), "w"); $fp = fopen($tmpdir . zip_entry_name($zip_entry), "w");
if (zip_entry_open($zip, $zip_entry, "r")) { if (zip_entry_open($zip, $zip_entry, "r")) {
echo "Extracting " . zip_entry_name($zip_entry) . "\n"; echo "Extracting " . zip_entry_name($zip_entry) . "\n";
$buf = zip_entry_read($zip_entry, zip_entry_filesize($zip_entry)); $buf = zip_entry_read($zip_entry, zip_entry_filesize($zip_entry));
fwrite($fp, "$buf"); fwrite($fp, "$buf");
zip_entry_close($zip_entry); zip_entry_close($zip_entry);
fclose($fp); fclose($fp);
} }
} }
zip_close($zip); zip_close($zip);
} }
} }
   
foreach (scandir($tmpdir) as $file) { foreach (scandir($tmpdir) as $file) {
  $headers = Array();
if (!strpos($file, ".txt") === false) { if (!strpos($file, ".txt") === false) {
$fieldseparator = ","; $fieldseparator = ",";
$lineseparator = "\n"; $lineseparator = "\n";
$tablename = str_replace(".txt", "", $file); $tablename = str_replace(".txt", "", $file);
echo "Opening $file \n"; echo "Opening $file \n";
$line = 0; $line = 0;
$handle = fopen($tmpdir . $file, "r"); $handle = fopen($tmpdir . $file, "r");
if ($tablename == "stop_times") {  
$stmt = $pdconn->prepare("insert into stop_times (trip_id,stop_id,stop_sequence,arrival_time,departure_time) values(:trip_id, :stop_id, :stop_sequence,:arrival_time,:departure_time);");  
$stmt->bindParam(':trip_id', $trip_id);  
$stmt->bindParam(':stop_id', $stop_id);  
$stmt->bindParam(':stop_sequence', $stop_sequence);  
$stmt->bindParam(':arrival_time', $time);  
$stmt->bindParam(':departure_time', $time);  
}  
   
$distance = 0; $distance = 0;
$lastshape = 0; $lastshape = 0;
$lastlat = 0; $lastlat = 0;
$lastlon = 0; $lastlon = 0;
  $stmt = null;
while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) {
if ($line == 0) { if ($line == 0) {
  $headers = array_values($data);
} else { if ($tablename == "stops") {
$query = "insert into $tablename values("; $headers[] = "position";
  }
  if ($tablename == "shapes") {
  $headers[] = "shape_pt";
  }
  $query = "insert into $tablename (";
  $valueCount = 0;
  foreach ($headers as $value) {
  $query.=($valueCount > 0 ? "," : "") . pg_escape_string($value);
  $valueCount++;
  }
  $query.= ") values( ";
$valueCount = 0; $valueCount = 0;
foreach ($data as $value) { foreach ($data as $value) {
$query.=($valueCount > 0 ? "','" : "'") . pg_escape_string($value); $query.=($valueCount > 0 ? "," : "") . '?';
$valueCount++; $valueCount++;
} }
  if ($tablename == "stops") {
  $query.= ", ST_GeographyFromText(?));";
  } else if ($tablename == "shapes") {
  $query.= ", ST_GeographyFromText(?));";
  } else {
  $query.= ");";
  }
   
  echo $query;
  $stmt = $pdconn->prepare($query);
  } else {
  $values = array_values($data);
if ($tablename == "stops") { if ($tablename == "stops") {
$query.= "', ST_GeographyFromText('SRID=4326;POINT({$data[2]} {$data[0]})'));"; // Coordinate values are out of range [-180 -90, 180 90]
} else if ($tablename == "shapes") { $values[] = 'SRID=4326;POINT('.$values[5].' '.$values[4].')';
  }
  if ($tablename == "shapes") {
if ($data[0] != $lastshape) { if ($data[0] != $lastshape) {
$distance = 0; $distance = 0;
$lastshape = $data[0]; $lastshape = $data[0];
} else { } else {
$distance += distance($lastlat, $lastlon, $data[1], $data[2]); $distance += distance($lastlat, $lastlon, $data[1], $data[2]);
} }
$lastlat = $data[1]; $lastlat = $data[1];
$lastlon = $data[2]; $lastlon = $data[2];
$query.= "', $distance, ST_GeographyFromText('SRID=4326;POINT({$data[2]} {$data[1]})'));";  
} else { $values[4] = $distance;
$query.= "');"; $values[] = 'SRID=4326;POINT('.$values[2].' '.$values[1].')';
} }
if ($tablename == "stop_times") { if (substr($values[1],0,2) == '24') $values[1] = "23:59:59";
// $query = "insert into $tablename (trip_id,stop_id,stop_sequence) values('{$data[0]}','{$data[3]}','{$data[4]}');"; if (substr($values[2],0,2) == '24') $values[2] = "23:59:59";
$trip_id = $data[0]; $stmt->execute($values);
$stop_id = $data[3]; $err = $pdconn->errorInfo();
$stop_sequence = $data[4]; if ($err[2] != "" && strpos($err[2], "duplicate key") === false) {
$time = ($data[1] == "" ? null : $data[1]); print_r($values);
  print_r($err);
  die("terminated import due to db error above");
} }
}  
if ($tablename == "stop_times") {  
$stmt->execute();  
} else {  
$result = pg_query($conn, $query);  
} }
$line++; $line++;
if ($line % 10000 == 0) if ($line % 10000 == 0)
echo "$line records... " . date('c') . "\n"; echo "$line records... " . date('c') . "\n";
} }
fclose($handle); fclose($handle);
  $stmt->closeCursor();
echo "Found a total of $line records in $file.\n"; echo "Found a total of $line records in $file.\n";
} }
} }
} }
?> ?>