Use PDO prepared statements for massive import speedup
Use PDO prepared statements for massive import speedup

--- a/include/common-template.inc.php
+++ b/include/common-template.inc.php
@@ -212,9 +212,37 @@
     }
     echo "\n</div></div></body></html>";
 }
-
+function timeSettings() {
+    global $service_periods;
+echo '<div id="settings" data-role="collapsible" data-collapsed="true">
+<h3>Change Time (' . (isset($_REQUEST['time']) ? $_REQUEST['time'] : "Current Time,") . ' ' . ucwords(service_period()) . ')...</h3>
+        <form action="' . basename($_SERVER['PHP_SELF']) . '" method="GET">
+               <input type="hidden" name="suburb" id="suburb" value="' . (isset($_REQUEST['suburb']) ? $_REQUEST['suburb'] : "") . '"/>
+       
+            <input type="hidden" name="stopid" id="stopid" value="' . (isset($_REQUEST['stopid']) ? $_REQUEST['stopid'] : "") . '"/>
+                 <input type="hidden" name="stopcode" id="stopcode" value="' . (isset($_REQUEST['stopcode']) ? $_REQUEST['stopcode'] : "") . '"/>
+        <div class="ui-body"> 
+    		<div data-role="fieldcontain">
+		        <label for="time"> Time: </label>
+		    	<input type="time" name="time" id="time" value="' . (isset($_REQUEST['time']) ? $_REQUEST['time'] : date("H:i")) . '"/>
+			<a href="#" name="currentTime" id="currentTime" onClick="var d = new Date();' . "$('#time').val(d.getHours() +':'+ (d.getMinutes().toString().length == 1 ? '0'+ d.getMinutes():  d.getMinutes()));" . '">Current Time?</a>
+	        </div>
+		<div data-role="fieldcontain">
+		    <label for="service_period"> Service Period:  </label>
+			<select name="service_period" id="service_period">';
+foreach ($service_periods as $service_period) {
+    echo "<option value=\"$service_period\"" . (service_period() === $service_period ? " SELECTED" : "") . '>' . ucwords($service_period) . '</option>';
+}
+echo '</select>
+			<a href="#" style="display:none" name="currentPeriod" id="currentPeriod">Current Period?</a>
+		</div>
+		
+		<input type="submit" value="Update"/>
+                </div></form>
+            </div>';
+}
 function placeSettings() {
-    global $service_periods;
+    
     $geoerror = false;
     $geoerror = !isset($_SESSION['lat']) || !isset($_SESSION['lat']) || $_SESSION['lat'] == "" || $_SESSION['lon'] == "";
 

--- a/include/db/route-dao.inc.php
+++ b/include/db/route-dao.inc.php
@@ -46,11 +46,11 @@
 
 function getRouteHeadsigns($routeID) {
     global $conn;
-    $query = "select stops.stop_name, direction_id,max(service_id) as service_id, count(*)
+    $query = "select stops.stop_name, trip_headsign, direction_id,max(service_id) as service_id, count(*)
         from routes join trips on trips.route_id = routes.route_id
 join stop_times on stop_times.trip_id = trips.trip_id join stops on 
 stop_times.stop_id = stops.stop_id where trips.route_id = :routeID 
-and stop_times.stop_sequence = 1 group by stops.stop_name, direction_id having count(*) > 2";
+and stop_times.stop_sequence = 1 group by stops.stop_name, trip_headsign, direction_id having count(*) > 2";
     debug($query, "database");
     $query = $conn->prepare($query);
     $query->bindParam(":routeID", $routeID);
@@ -260,25 +260,24 @@
     $service_ids = service_ids($service_period);
     $sidA = $service_ids[0];
     $sidB = $service_ids[1];
+   
     global $conn;
     $query = "SELECT DISTINCT service_id,trips.route_id,route_short_name,route_long_name
 FROM stop_times join trips on trips.trip_id = stop_times.trip_id
 join routes on trips.route_id = routes.route_id
 join stops on stops.stop_id = stop_times.stop_id
-WHERE zone_id LIKE ':suburb AND (service_id=:service_periodA OR service_id=:service_periodB)
+WHERE stop_desc LIKE :suburb AND (service_id=:service_periodA OR service_id=:service_periodB)
  ORDER BY route_short_name";
     debug($query, "database");
     $query = $conn->prepare($query);
     $query->bindParam(":service_periodA", $sidA);
     $query->bindParam(":service_periodB", $sidB);
-    $query->bindParam(":service_period", $service_period);
-    $suburb = "%" . $suburb . ";%";
+    $suburb = "%Suburb: %" . $suburb . "%";
     $query->bindParam(":suburb", $suburb);
     $query->execute();
-    if (!$query) {
-        databaseError($conn->errorInfo());
-        return Array();
-    }
+    
+        databaseError($conn->errorInfo());
+  
     return $query->fetchAll();
 }
 

--- a/routeList.php
+++ b/routeList.php
@@ -18,6 +18,7 @@
 include ('include/common.inc.php');
 
 function navbar() {
+
     echo '
 		<div data-role="navbar"> 
 			<ul> 
@@ -28,6 +29,42 @@
 			</ul>
                 </div>
 	';
+}
+
+function displayRoutes($routes) {
+    global $nearby;
+    echo '  <ul data-role="listview" data-filter="true" data-inset="true" >';
+    $filteredRoutes = Array();
+    foreach ($routes as $route) {
+        foreach (getRouteHeadsigns($route['route_id']) as $headsign) {
+            $start = $headsign['stop_name'];
+            $serviceday = service_period_day($headsign['service_id']);
+            $key = $route['route_short_name'] . "." . $headsign['direction_id'];
+            if (isset($filteredRoutes[$key])) {
+                $filteredRoutes[$key]['route_ids'][] = $route['route_id'];
+                $filteredRoutes[$key]['route_ids'] = array_unique($filteredRoutes[$key]['route_ids']);
+            } else {
+                $filteredRoutes[$key]['route_short_name'] = $route['route_short_name'];
+                $filteredRoutes[$key]['route_long_name'] = "starting at " . $start;
+                $filteredRoutes[$key]['service_id'] = $serviceday;
+                $filteredRoutes[$key]['trip_headsign'] = $headsign['trip_headsign'].(strstr($headsign['trip_headsign'], "bound") ===false ?"bound":"");
+                $filteredRoutes[$key]['direction_id'] = $headsign['direction_id'];
+                if (isset($nearby)) {
+                    $filteredRoutes[$key]['distance'] = $route['distance'];
+                }
+            }
+        }
+    }
+    foreach ($filteredRoutes as $key => $route) {
+        echo '<li> <a href="trip.php?routeids=' . implode(",", $route['route_ids']) . '&directionid=' . $route['direction_id'] . '"><h3>' . $route['route_short_name'] . "</h3>
+                   
+                <p>" . $route['trip_headsign'].", ".  $route['route_long_name'] . " (" . ucwords($route['service_id']) . ")</p>";
+        if (isset($nearby)) {
+            $time = getRouteAtStop($route['route_id'], $route['stop_id']);
+            echo '<span class="ui-li-count">' . ($time['arrival_time'] ? $time['arrival_time'] : "No more trips today") . "<br>" . floor($route['distance']) . 'm away</span>';
+        }
+        echo"       </a></li>\n";
+    }
 }
 
 if (isset($bysuburbs)) {
@@ -46,37 +83,32 @@
         }
     }
     echo '</ul>';
-} else if (isset($nearby) || isset($suburb)) {
-    $routes = Array();
+} else if (isset($suburb)) {
+
     if ($suburb) {
         include_header($suburb . " - " . ucwords(service_period()), "routeList");
         navbar();
-        timePlaceSettings();
+        timeSettings();
         trackEvent("Route Lists", "Routes By Suburb", $suburb);
-        $routes = getRoutesBySuburb($suburb);
+        displayRoutes(getRoutesBySuburb($suburb));
     }
-    if (isset($nearby)) {
-        include_header("Routes Nearby", "routeList", true, true);
-        trackEvent("Route Lists", "Routes Nearby", $_SESSION['lat'] . "," . $_SESSION['lon']);
-        navbar();
-        placeSettings();
-        if (!isset($_SESSION['lat']) || !isset($_SESSION['lat']) || $_SESSION['lat'] == "" || $_SESSION['lon'] == "") {
-            include_footer();
-            die();
-        }
-        $routes = getRoutesNearby($_SESSION['lat'], $_SESSION['lon']);
+} else if (isset($nearby)) {
+    $routes = Array();
+    include_header("Routes Nearby", "routeList", true, true);
+    trackEvent("Route Lists", "Routes Nearby", $_SESSION['lat'] . "," . $_SESSION['lon']);
+    navbar();
+    placeSettings();
+    if (!isset($_SESSION['lat']) || !isset($_SESSION['lat']) || $_SESSION['lat'] == "" || $_SESSION['lon'] == "") {
+        include_footer();
+        die();
     }
-    echo '  <ul data-role="listview" data-filter="true" data-inset="true" >';
-    if ($routes) {
-        foreach ($routes as $route) {
-            echo '<li><a href="trip.php?routeid=' . $route['route_id'] . '"><h3>' . $route['route_short_name'] . "</h3><p>" . $route['route_long_name'] . " (" . ucwords($route['service_id']) . ")</p>";
-            if (isset($nearby)) {
-                $time = getRouteAtStop($route['route_id'], $route['stop_id']);
-                echo '<span class="ui-li-count">' . ($time['arrival_time'] ? $time['arrival_time'] : "No more trips today") . "<br>" . floor($route['distance']) . 'm away</span>';
-            }
-            echo "</a></li>\n";
-        }
+    $routes = getRoutesNearby($_SESSION['lat'], $_SESSION['lon']);
+
+
+    if (sizeof($routes) > 0) {
+        displayRoutes($routes);
     } else {
+        echo '  <ul data-role="listview" data-filter="true" data-inset="true" >';
         echo "<li style='text-align: center;'> No routes nearby.</li>";
     }
 } else if (isset($bynumber) || isset($numberSeries)) {
@@ -118,39 +150,14 @@
         }
     }
     else if ($numberSeries) {
-        $routes = getRoutesByNumberSeries($numberSeries);
-        $filteredRoutes = Array();
-        foreach ($routes as $route) {
-            foreach (getRouteHeadsigns($route['route_id']) as $headsign) {
-                $start = $headsign['stop_name'];
-            $serviceday = service_period_day ( $headsign['service_id']);
-            $key = $route['route_short_name'].".".$headsign['direction_id'];
-            if (isset($filteredRoutes[$key])) {
-                $filteredRoutes[$key]['route_ids'][] = $route['route_id'];
-                $filteredRoutes[$key]['route_ids'] = array_unique($filteredRoutes[$key]['route_ids']);
-            } else {
-                $filteredRoutes[$key]['route_short_name'] = $route['route_short_name'];
-                $filteredRoutes[$key]['route_long_name'] = "Starting at ".$start;
-                $filteredRoutes[$key]['service_id'] = $serviceday;
-                $filteredRoutes[$key]['direction_id'] = $headsign['direction_id'];
-            }
-            }
-        }
-        foreach ($filteredRoutes as $key => $route) {
-               echo '<li> <a href="trip.php?routeids=' . implode(",",$route['route_ids']) . '&directionid='.$route['direction_id'].'"><h3>' . $route['route_short_name'] . "</h3>
-                   
-                <p>" . $route['route_long_name'] . " (" . ucwords($route['service_id']) . ")</p>
-                    </a></li>\n";
-        }
+        displayRoutes(getRoutesByNumberSeries($numberSeries));
     }
 } else {
     include_header("Routes by Destination", "routeList");
     navbar();
     echo ' <ul data-role="listview"  data-inset="true">';
     if (isset($routeDestination)) {
-        foreach (getRoutesByDestination($routeDestination) as $route) {
-            echo '<li><a href="trip.php?routeid=' . $route["route_id"] . '"><h3>' . $route["route_short_name"] . '</h3><p>' . $route["route_long_name"] . " (" . ucwords($route['service_id']) . ")</p></a></li>\n";
-        }
+        displayRoutes(getRoutesByDestination($routeDestination));
     } else {
         foreach (getRoutesByDestination() as $destination) {
             echo '<li><a href="' . curPageURL() . '/routeList.php?routeDestination=' . urlencode($destination['route_long_name']) . '">' . $destination['route_long_name'] . "... </a></li>\n";

--- a/servicealerts/importer.py
+++ b/servicealerts/importer.py
@@ -1,5 +1,3 @@
-#dependencies http://code.google.com/p/python-twitter/
-
 # info
 # http://stackoverflow.com/questions/4206882/named-entity-recognition-with-preset-list-of-names-for-python-php/4207128#4207128
 # http://alias-i.com/lingpipe/demos/tutorial/ne/read-me.html approximate dist
@@ -12,11 +10,17 @@
 # http://esa.act.gov.au/feeds/currentincidents.xml
 
 # source: https://gist.github.com/322906/90dea659c04570757cccf0ce1e6d26c9d06f9283
+# to install python -m nltk.downloader punkt
 import nltk
-import twitter
+import tweepy
 import psycopg2
+import pickle
+
+from iniparse import INIConfig
+
 def insert_service_alert_sitewide(heading, message, url):
-        
+        print "NaN"
+
 def insert_service_alert_for_street(streets, heading, message, url):
     	conn_string = "host='localhost' dbname='energymapper' user='postgres' password='snmc'"
 	# print the connection string we will use to connect
@@ -29,30 +33,22 @@
 		cursor = conn.cursor()
 
 		# execute our Query
-		cursor.execute("select max(value), extract(dow from max(time)) as dow, \
-extract(year from max(time))::text || lpad(extract(month from max(time))::text,2,'0') \
-|| lpad(extract(month from max(time))::text,2,'0') as yearmonthweek, to_char(max(time),'J') \
-from environmentdata_values where \"dataSourceID\"='NSWAEMODemand' \
-group by extract(dow from time), extract(year from time),  extract(week from time) \
-order by  extract(year from time),  extract(week from time), extract(dow from time)")
+		cursor.execute("")
 
 		# retrieve the records from the database
 		records = cursor.fetchall()
 
   	  	for record in records:
 			ys.append(record[0])
-# >>> cur.execute("INSERT INTO test (num, data) VALUES (%s, %s)", (42, 'bar'))
-#>>> cur.statusmessage
-#'INSERT 0 1'
+                # >>> cur.execute("INSERT INTO test (num, data) VALUES (%s, %s)", (42, 'bar'))
+                #>>> cur.statusmessage
+                #'INSERT 0 1'
 	except:
 		# Get the most recent exception
 		exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
 		# Exit the script and print an error telling what happened.
 		sys.exit("Database connection failed!\n ->%s" % (exceptionValue))
 		
-def get_tweets(user):
-    tapi = twitter.Api()
-    return tapi.GetUserTimeline(user)
 
 def extract_entity_names(t):
     entity_names = []
@@ -86,3 +82,23 @@
     # Print unique entity names
     print set(entity_names)
 
+cfg = INIConfig(open('/tmp/aws.ini'))
+
+auth = tweepy.OAuthHandler(cfg.api_keys.twitter_consumer_key, cfg.api_keys.twitter_consumer_secret)
+auth.set_access_token(cfg.api_keys.twitter_access_token, cfg.api_keys.twitter_access_token_secret)
+
+#api = tweepy.API(auth)
+api = tweepy.API()
+# If the authentication was successful, you should
+# see the name of the account print out
+#print api.me().name
+# https://github.com/tweepy/tweepy/blob/master/tweepy/api.py
+for status in api.user_timeline(screen_name="ACTPol_Traffic",exclude_replies='true'):
+            print status.text
+            print status.created_at
+            print extract_names(status.text)
+# print api.update_status(status="test")
+
+last_tweet_ids = { "lion": "111", "kitty": "2222" } 
+pickle.dump( last_tweet_ids, open( "save.p", "wb" ) )
+last_tweet_ids = pickle.load( open( "save.p", "rb" ) )

--- /dev/null
+++ b/servicealerts/punkt.zip

file:a/stop.php -> file:b/stop.php
--- a/stop.php
+++ b/stop.php
@@ -96,31 +96,7 @@
     ));
 }
 
-// time settings
-echo '<div id="settings" data-role="collapsible" data-collapsed="true">
-<h3>Change Time (' . (isset($_REQUEST['time']) ? $_REQUEST['time'] : "Current Time,") . ' ' . ucwords(service_period()) . ')...</h3>
-        <form action="' . basename($_SERVER['PHP_SELF']) . '" method="GET">
-            <input type="hidden" name="stopid" id="stopid" value="' . (isset($_REQUEST['stopid']) ? $_REQUEST['stopid'] : "") . '"/>
-                 <input type="hidden" name="stopcode" id="stopcode" value="' . (isset($_REQUEST['stopcode']) ? $_REQUEST['stopcode'] : "") . '"/>
-        <div class="ui-body"> 
-    		<div data-role="fieldcontain">
-		        <label for="time"> Time: </label>
-		    	<input type="time" name="time" id="time" value="' . (isset($_REQUEST['time']) ? $_REQUEST['time'] : date("H:i")) . '"/>
-			<a href="#" name="currentTime" id="currentTime" onClick="var d = new Date();' . "$('#time').val(d.getHours() +':'+ (d.getMinutes().toString().length == 1 ? '0'+ d.getMinutes():  d.getMinutes()));" . '">Current Time?</a>
-	        </div>
-		<div data-role="fieldcontain">
-		    <label for="service_period"> Service Period:  </label>
-			<select name="service_period" id="service_period">';
-foreach ($service_periods as $service_period) {
-    echo "<option value=\"$service_period\"" . (service_period() === $service_period ? " SELECTED" : "") . '>' . ucwords($service_period) . '</option>';
-}
-echo '</select>
-			<a href="#" style="display:none" name="currentPeriod" id="currentPeriod">Current Period?</a>
-		</div>
-		
-		<input type="submit" value="Update"/>
-                </div></form>
-            </div>';
+timeSettings();
 
 echo '</span><span class="content-primary">';
 echo '  <ul data-role="listview"  data-inset="true">';

file:a/trip.php -> file:b/trip.php
--- a/trip.php
+++ b/trip.php
@@ -83,7 +83,7 @@
     }
 }
 foreach ($filteredRoutes as $key => $row) {
-    echo '<a href="trip.php?routeids=' . implode(",",$row['route_ids']) . '&directionid='.$row['direction_id'].'">' . $row['route_long_name'] . ' (' . ucwords($row['service_id']) . ')</a> ';
+    echo '<a href="trip.php?routeids=' . implode(",",$row['route_ids']) . '&directionid='.$row['direction_id'].'&service_period='.$row['service_id'].'">' . $row['route_long_name'] . ' (' . ucwords($row['service_id']) . ')</a> ';
     $otherDir++;
 }
 

--- a/updatedb.php
+++ b/updatedb.php
@@ -17,7 +17,6 @@
  */
 if (php_sapi_name() == "cli") {
     include ('include/common.inc.php');
-    $conn = pg_connect("dbname=transitdata user=postgres password=snmc host=localhost") or die('connection failed');
     $pdconn = new PDO("pgsql:dbname=transitdata;user=postgres;password=snmc;host=localhost");
 
     /*
@@ -34,7 +33,7 @@
 // Unzip cbrfeed.zip, import all csv files to database
     $unzip = false;
     $zip = zip_open(dirname(__FILE__) . "/cbrfeed.zip");
-    $tmpdir = "c:/tmp/";
+    $tmpdir = "c:/tmp/cbrfeed/";
     mkdir($tmpdir);
     if ($unzip) {
         if (is_resource($zip)) {
@@ -53,6 +52,7 @@
     }
 
     foreach (scandir($tmpdir) as $file) {
+        $headers = Array();
         if (!strpos($file, ".txt") === false) {
             $fieldseparator = ",";
             $lineseparator = "\n";
@@ -60,33 +60,50 @@
             echo "Opening $file \n";
             $line = 0;
             $handle = fopen($tmpdir . $file, "r");
-            if ($tablename == "stop_times") {
-                $stmt = $pdconn->prepare("insert into stop_times (trip_id,stop_id,stop_sequence,arrival_time,departure_time) values(:trip_id, :stop_id, :stop_sequence,:arrival_time,:departure_time);");
-                $stmt->bindParam(':trip_id', $trip_id);
-                $stmt->bindParam(':stop_id', $stop_id);
-                $stmt->bindParam(':stop_sequence', $stop_sequence);
-                $stmt->bindParam(':arrival_time', $time);
-                $stmt->bindParam(':departure_time', $time);
-            }
 
             $distance = 0;
             $lastshape = 0;
             $lastlat = 0;
             $lastlon = 0;
+            $stmt = null;
             while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) {
                 if ($line == 0) {
-                    
-                } else {
-                    $query = "insert into $tablename values(";
+                    $headers = array_values($data);
+                    if ($tablename == "stops") {
+                        $headers[] = "position";
+                    }
+                    if ($tablename == "shapes") {
+                        $headers[] = "shape_pt";
+                    }
+                    $query = "insert into $tablename (";
+                    $valueCount = 0;
+                    foreach ($headers as $value) {
+                        $query.=($valueCount > 0 ? "," : "") . pg_escape_string($value);
+                        $valueCount++;
+                    }
+                    $query.= ") values( ";
                     $valueCount = 0;
                     foreach ($data as $value) {
-                        $query.=($valueCount > 0 ? "','" : "'") . pg_escape_string($value);
+                        $query.=($valueCount > 0 ? "," : "") . '?';
                         $valueCount++;
                     }
+                    if ($tablename == "stops") {
+                        $query.= ", ST_GeographyFromText(?));";
+                    } else if ($tablename == "shapes") {
+                        $query.= ", ST_GeographyFromText(?));";
+                    } else {
+                        $query.= ");";
+                    }
 
+                    echo $query;
+                    $stmt = $pdconn->prepare($query);
+                } else {
+                    $values = array_values($data);
                     if ($tablename == "stops") {
-                        $query.= "', ST_GeographyFromText('SRID=4326;POINT({$data[2]} {$data[0]})'));";
-                    } else if ($tablename == "shapes") {
+                        // Coordinate values are out of range [-180 -90, 180 90]
+                        $values[] = 'SRID=4326;POINT('.$values[5].' '.$values[4].')';
+                    }
+                    if ($tablename == "shapes") {
                         if ($data[0] != $lastshape) {
                             $distance = 0;
                             $lastshape = $data[0];
@@ -95,28 +112,26 @@
                         }
                         $lastlat = $data[1];
                         $lastlon = $data[2];
-                        $query.= "', $distance,  ST_GeographyFromText('SRID=4326;POINT({$data[2]} {$data[1]})'));";
-                    } else {
-                        $query.= "');";
+
+                        $values[4] = $distance;
+                        $values[] = 'SRID=4326;POINT('.$values[2].' '.$values[1].')';
                     }
-                    if ($tablename == "stop_times") {
-                        //                  $query = "insert into $tablename (trip_id,stop_id,stop_sequence) values('{$data[0]}','{$data[3]}','{$data[4]}');";
-                        $trip_id = $data[0];
-                        $stop_id = $data[3];
-                        $stop_sequence = $data[4];
-                        $time = ($data[1] == "" ? null : $data[1]);
+if (substr($values[1],0,2) == '24') $values[1] = "23:59:59";
+if (substr($values[2],0,2) == '24') $values[2] = "23:59:59";
+                    $stmt->execute($values);
+                    $err = $pdconn->errorInfo();
+                    if ($err[2] != "" && strpos($err[2], "duplicate key") === false) {
+                        print_r($values);
+                        print_r($err);
+                        die("terminated import due to db error above");
                     }
-                }
-                if ($tablename == "stop_times") {
-                    $stmt->execute();
-                } else {
-                    $result = pg_query($conn, $query);
                 }
                 $line++;
                 if ($line % 10000 == 0)
                     echo "$line records... " . date('c') . "\n";
             }
             fclose($handle);
+            $stmt->closeCursor();
             echo "Found a total of $line records in $file.\n";
         }
     }