Use PDO prepared statements for massive import speedup
--- a/updatedb.php
+++ b/updatedb.php
@@ -17,7 +17,6 @@
*/
if (php_sapi_name() == "cli") {
include ('include/common.inc.php');
- $conn = pg_connect("dbname=transitdata user=postgres password=snmc host=localhost") or die('connection failed');
$pdconn = new PDO("pgsql:dbname=transitdata;user=postgres;password=snmc;host=localhost");
/*
@@ -34,7 +33,7 @@
// Unzip cbrfeed.zip, import all csv files to database
$unzip = false;
$zip = zip_open(dirname(__FILE__) . "/cbrfeed.zip");
- $tmpdir = "c:/tmp/";
+ $tmpdir = "c:/tmp/cbrfeed/";
mkdir($tmpdir);
if ($unzip) {
if (is_resource($zip)) {
@@ -53,6 +52,7 @@
}
foreach (scandir($tmpdir) as $file) {
+ $headers = Array();
if (!strpos($file, ".txt") === false) {
$fieldseparator = ",";
$lineseparator = "\n";
@@ -60,33 +60,50 @@
echo "Opening $file \n";
$line = 0;
$handle = fopen($tmpdir . $file, "r");
- if ($tablename == "stop_times") {
- $stmt = $pdconn->prepare("insert into stop_times (trip_id,stop_id,stop_sequence,arrival_time,departure_time) values(:trip_id, :stop_id, :stop_sequence,:arrival_time,:departure_time);");
- $stmt->bindParam(':trip_id', $trip_id);
- $stmt->bindParam(':stop_id', $stop_id);
- $stmt->bindParam(':stop_sequence', $stop_sequence);
- $stmt->bindParam(':arrival_time', $time);
- $stmt->bindParam(':departure_time', $time);
- }
$distance = 0;
$lastshape = 0;
$lastlat = 0;
$lastlon = 0;
+ $stmt = null;
while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) {
if ($line == 0) {
-
- } else {
- $query = "insert into $tablename values(";
+ $headers = array_values($data);
+ if ($tablename == "stops") {
+ $headers[] = "position";
+ }
+ if ($tablename == "shapes") {
+ $headers[] = "shape_pt";
+ }
+ $query = "insert into $tablename (";
+ $valueCount = 0;
+ foreach ($headers as $value) {
+ $query.=($valueCount > 0 ? "," : "") . pg_escape_string($value);
+ $valueCount++;
+ }
+ $query.= ") values( ";
$valueCount = 0;
foreach ($data as $value) {
- $query.=($valueCount > 0 ? "','" : "'") . pg_escape_string($value);
+ $query.=($valueCount > 0 ? "," : "") . '?';
$valueCount++;
}
+ if ($tablename == "stops") {
+ $query.= ", ST_GeographyFromText(?));";
+ } else if ($tablename == "shapes") {
+ $query.= ", ST_GeographyFromText(?));";
+ } else {
+ $query.= ");";
+ }
+ echo $query;
+ $stmt = $pdconn->prepare($query);
+ } else {
+ $values = array_values($data);
if ($tablename == "stops") {
- $query.= "', ST_GeographyFromText('SRID=4326;POINT({$data[2]} {$data[0]})'));";
- } else if ($tablename == "shapes") {
+ // Coordinate values are out of range [-180 -90, 180 90]
+ $values[] = 'SRID=4326;POINT('.$values[5].' '.$values[4].')';
+ }
+ if ($tablename == "shapes") {
if ($data[0] != $lastshape) {
$distance = 0;
$lastshape = $data[0];
@@ -95,28 +112,26 @@
}
$lastlat = $data[1];
$lastlon = $data[2];
- $query.= "', $distance, ST_GeographyFromText('SRID=4326;POINT({$data[2]} {$data[1]})'));";
- } else {
- $query.= "');";
+
+ $values[4] = $distance;
+ $values[] = 'SRID=4326;POINT('.$values[2].' '.$values[1].')';
}
- if ($tablename == "stop_times") {
- // $query = "insert into $tablename (trip_id,stop_id,stop_sequence) values('{$data[0]}','{$data[3]}','{$data[4]}');";
- $trip_id = $data[0];
- $stop_id = $data[3];
- $stop_sequence = $data[4];
- $time = ($data[1] == "" ? null : $data[1]);
+if (substr($values[1],0,2) == '24') $values[1] = "23:59:59";
+if (substr($values[2],0,2) == '24') $values[2] = "23:59:59";
+ $stmt->execute($values);
+ $err = $pdconn->errorInfo();
+ if ($err[2] != "" && strpos($err[2], "duplicate key") === false) {
+ print_r($values);
+ print_r($err);
+ die("terminated import due to db error above");
}
- }
- if ($tablename == "stop_times") {
- $stmt->execute();
- } else {
- $result = pg_query($conn, $query);
}
$line++;
if ($line % 10000 == 0)
echo "$line records... " . date('c') . "\n";
}
fclose($handle);
+ $stmt->closeCursor();
echo "Found a total of $line records in $file.\n";
}
}