From: maxious Date: Tue, 20 Apr 2010 14:31:30 +0000 Subject: Suggest timepoints using OSM database of POIs X-Git-Url: https://maxious.lambdacomplex.org/git/?p=bus.git&a=commitdiff&h=214e0a01c61e552e480689683da0e4afcbab23a8 --- Suggest timepoints using OSM database of POIs --- --- /dev/null +++ b/maxious-canberra-transit-feed/01-extracttimes.rb @@ -1,1 +1,95 @@ +require 'rubygems' +require 'nokogiri' +require 'open-uri' +require 'pp' +def makeTimetable(table, period, short_name) + timetable = {"stop_times" => [], "between_stops" => [], "short_name" => short_name} + time_points = table.xpath('tr[1]//th').map do |tp| + if tp.content != "\302\240" && tp.content != "" && tp.content != "
" + timing_point = tp.content.squeeze(" ").gsub("\r\n Platform"," - Platform").gsub(" - "," - ").gsub("\n","").gsub("\r","").gsub("\\"," / ").strip + end + end + time_points.delete(nil) + timetable["time_points"] = time_points + timetable["long_name"] = "To " + time_points.last + periodtimes = [] + table.css('tr').each do |row| + times = row.css('td').map do |cell| + #TODO convert to GTFS time ie. replace " AM" with a + time = cell.content.squeeze(" ").strip + end + if not times.empty? + if not (route = times.shift) + raise("TODO: account for shifting route numbers eg. intertown/redex 62/162") + end + periodtimes << times + end + end + if periodtimes.size < 1 + raise "No times for route " + short_name + " in period " + period + end + timetable["stop_times"] = { period => periodtimes } + # pp timetable + filename = timetable["short_name"] + "-" + timetable["long_name"].downcase.gsub(" ","-").gsub("/","") + "." + period + ".yml" + puts "Saving " + filename + File.open("#{File.dirname(__FILE__)}/output/"+filename, "w") do |f| + f.write timetable.to_yaml + end + timetable +end + +#TODO fix route 934 +Dir.glob("source-html/Route*.htm*") { |file| + puts "Opened " + file + doc = Nokogiri::HTML(open(file)) + # Search for nodes by css + timetables = [] + short_name = ""; + doc.xpath('//title').each do |title| + short_name = title.content.gsub("Route_","").gsub("Route ","").squeeze(" ").strip + end + if short_name == "" + raise "Route number(s) not found in tag" + end + + doc.xpath('//table[preceding::text()="Weekdays"]').each do |table| + timetables << makeTimetable(table, "weekday", short_name) + end + + #weekends + doc.xpath('//table[preceding::text()="Saturdays" and following::a]').each do |table| + timetables << makeTimetable(table, "saturday", short_name) + end + doc.xpath('//table[preceding::text()="Sundays"]').each do |table| + timetables << makeTimetable(table, "sunday", short_name) + end + #930/934 special cases + doc.xpath('//table[preceding::text()="Saturday" and following::h2]').each do |table| + timetables << makeTimetable(table, "saturday", short_name) + end + doc.xpath('//table[preceding::text()="Sunday"]').each do |table| + timetables << makeTimetable(table, "sunday", short_name) + end + #route 81 = Weekdays - School Holidays Only + doc.xpath('//table[preceding::text()="Weekdays - School Holidays Only "]').each do |table| + timetable = makeTimetable(table, "weekday", short_name) + #TODO set active date range to only be holidays + timetables << timetable; + end + + + if timetables.size > 2 + puts "WARNING: " + file + " more than 2 timetables (weekend split?):" + timetables.size.to_s + end + if timetables.size < 2 + puts "WARNING: " + file + " less than 2 timetables (weekday loop service?):" + timetables.size.to_s + elsif not (timetables[0]["time_points"] - timetables[1]["time_points"].reverse).empty? + puts "WARNING: first pair of timetable timing points are not complementary for "+ file + pp(timetables[0]["time_points"] - timetables[1]["time_points"].reverse) + end + if timetables.size < 1 + raise "No timetables extracted from " + file + end +} + --- /dev/null +++ b/maxious-canberra-transit-feed/02-tidytimepoints.rb @@ -1,1 +1,82 @@ +require 'rubygems' +require 'pp' +require 'yaml' +Dir.chdir("output") +def getTimePoints() + $time_points = [] + $time_points_sources = Hash.new([]) + Dir.glob("*.yml") { |file| + timetable = YAML::load_file(file) + $time_points = $time_points | timetable["time_points"] + timetable["time_points"].each do |timepoint| + $time_points_sources[timepoint] = $time_points_sources[timepoint] | [ file ] + end + } +end + +getTimePoints() +pp $time_points.sort! +#pp $time_points_sources.sort + +time_point_corrections = {"North Lynehamham" => "North Lyneham", + "Lathlain St Platform 2" => "Lathlain St Bus Station - Platform 2", + "Lathlain St Sation - Platform 5" => "Lathlain St Bus Station - Platform 5", + "Lathlain Steet Station" => "Lathlain St Bus Station", + "Lathlain St - Platform 3" => "Lathlain St Bus Station - Platform 3", + "Lathlain Steet Station - Platform 3" => "Lathlain St Bus Station - Platform 3", + "Lathlain St Station" => "Lathlain St Bus Station", + "Lathlain St Station - Platform 1" => "Lathlain St Bus Station - Platform 1", + "Lathlain St Station - Platform 2" => "Lathlain St Bus Station - Platform 2", + "Lathlain St Station - Platform 3" => "Lathlain St Bus Station - Platform 3", + "Lathlain St Station - Platform 4" => "Lathlain St Bus Station - Platform 4", + "Lathlain St Station - Platform 5" => "Lathlain St Bus Station - Platform 5", + "Lathlain St Station - Platform 6" => "Lathlain St Bus Station - Platform 6", + "Manuka Captain Cook" => "Manuka, Captain Cook", + "Flemington Rd, Sandford St" => "Flemington Rd/Sandford St", + "Erindale Centre / - Sternberg Crescent" => "Erindale Drive/Sternberg", + "Canberra Hospita" => "Canberra Hospital", + "Cohen Str Station - Platform 1" => "Cohen St Bus Station - Platform 1", + "Cohen Street Station" => "Cohen St Bus Station", + "Cohen Street Station - Platform 2" => "Cohen St Bus Station - Platform 2", + "Cohn St Station - Platform 3" => "Cohen St Bus Station - Platform 3", + "Cohen St Station" => "Cohen St Bus Station", + "Cohen St Station - Platform 1" => "Cohen St Bus Station - Platform 1", + "Cohen St Station - Platform 2" => "Cohen St Bus Station - Platform 2", + "Cohen St Station - Platform 3" => "Cohen St Bus Station - Platform 3", + "Cohen St Station - Platform 4" => "Cohen St Bus Station - Platform 4", + "Cohen St Station - Platform 5" => "Cohen St Bus Station - Platform 5", + "Cohen St Station - Platform 6" => "Cohen St Bus Station - Platform 6", + "City - Platform 7" => "City Interchange - Platform 7", + "Cameron Avenue Station" => "Cameron Ave Bus Station", + "Cameron Avenue Station - Platform 1" => "Cameron Ave Bus Station - Platform 1", + "Cameron Avenue Station - Platform 2" => "Cameron Ave Bus Station - Platform 2", + "Cameron Avenue Station - Platform 3" => "Cameron Ave Bus Station - Platform 3", + "Cameron Avenue Station - Platform 4" => "Cameron Ave Bus Station - Platform 4", + "Cameron Avenue Station - Platform 5" => "Cameron Ave Bus Station - Platform 5", + "Cameron Ave Station" => "Cameron Ave Bus Station", + "Cameron Ave Station - Platform 1" => "Cameron Ave Bus Station - Platform 1", + "Cameron Ave Station - Platform 2" => "Cameron Ave Bus Station - Platform 2", + "Cameron Ave Station - Platform 3" => "Cameron Ave Bus Station - Platform 3", + "Cameron Ave Station - Platform 4" => "Cameron Ave Bus Station - Platform 4", + "Cameron Ave Station - Platform 5" => "Cameron Ave Bus Station - Platform 5", + "Burton & Garranan Hall, Daley Road ANU" => "Burton & Garran Hall, Daley Road ANU", + "Burton & Garranan Hall,Daley Road ANU" => "Burton & Garran Hall, Daley Road ANU", + "Newcastle Street after Isa St" => "Newcastle / Isa Street Fyshwick", + "National Circ/Canberra Ave" => "National Circuit / Canberra Ave", + } +time_point_corrections.each do |wrong, right| + $time_points_sources[wrong].each do |wrongfile| + badtimetable = YAML::load_file(wrongfile) + badentrynumber = badtimetable["time_points"].index wrong + badtimetable["time_points"][badentrynumber] = right + puts "Corrected '" + wrong + "' to '" + right + "' in " + wrongfile + File.open(wrongfile, "w") do |f| + f.write badtimetable.to_yaml + end + end +end + +getTimePoints() +pp $time_points.sort! + --- /dev/null +++ b/maxious-canberra-transit-feed/03-locatetimepoints.rb @@ -1,1 +1,118 @@ +#!/usr/bin/ruby +require 'postgres' +require 'highline.rb' +include HighLine + +require 'yaml' +require 'pp' +Dir.chdir("output") + +def getTimePoints() + $time_points = [] + $time_points_sources = Hash.new([]) + Dir.glob("*.yml") { |file| + timetable = YAML::load_file(file) + $time_points = $time_points | timetable["time_points"] + timetable["time_points"].each do |timepoint| + $time_points_sources[timepoint] = $time_points_sources[timepoint] | [ file ] + end + } +end + +getTimePoints() +$time_points.sort! + +connbus = PGconn.connect("localhost", 5432, '', '', "bus", "postgres", +"snmc") +connosm = PGconn.connect("localhost", 5432, '', '', "openstreetmap", +"postgres", "snmc") + +if ask_if("Insert Timing Point names to database?") + $time_points.each do |time_point| + begin + time_point = time_point.gsub(/\\/, '\&\&').gsub(/'/, "''") + res = connbus.exec("INSERT INTO timing_point (name) VALUES ('#{time_point}')") + puts "Put '#{time_point}' into DB" + rescue PGError => e + puts "Error inserting '#{time_point}' to DB #{e}" + #conn.close() if conn + end + end +end + + +if ask_if("Fill null Timing Points from OSM bus_stop database?") + begin + null_points = connbus.exec('SELECT name FROM timing_point WHERE lat IS null OR lng IS null;') + rescue PGError => e + puts "Error selecting null points from DB #{e}" + #conn.close() if conn + end + + null_points.each do |null_point_name| + begin + name = null_point_name.to_s.gsub(/\\/, '\&\&').gsub(/'/, "''") + pp name + matching_nodes = connosm.exec("Select * FROM (SELECT * from current_node_tags, + (Select id as ctagid FROM current_node_tags WHERE v LIKE '%#{name}%') as a + where a.ctagid = current_node_tags.id) as ctags INNER JOIN current_nodes ON + ctags.id=current_nodes.id") + rescue PGError => e + puts "Error selecting matching bus stops from DB #{e}" + #conn.close() if conn + end + suggested_nodes = Hash.new() + + matching_nodes.each do |matching_node_row| + #pp matching_node_row + # 0 = id + # 1 = k + # 2 = v + # 3,4 = redundant ids + # 5 = lat*100000 + # 6 = lng*100000 + suggested_node = suggested_nodes.fetch(matching_node_row[0], {'lat' => Float(matching_node_row[5])/10000000, + 'lng' => Float(matching_node_row[6])/10000000}) + if matching_node_row[1] == "ref" + matching_node_row[1] = "loc_ref" + end + suggested_node[matching_node_row[1]] = matching_node_row[2] + suggested_nodes[matching_node_row[0]] = suggested_node + end + pp suggested_nodes + nodeID = ask("Enter selected node ID:", :string) + if suggested_nodes.has_key?(nodeID) + node = suggested_nodes.fetch(nodeID) + guess = ask_if("Is this a guess?") + puts "Location #{node["lat"]},#{node["lng"]} for #{null_point_name}" + begin + res = connbus.exec("UPDATE timing_point SET lat = #{node["lat"]*10000000}, lng = +#{node["lng"]*10000000},osm_node = #{nodeID}" + (node.has_key?("loc_ref") ? ",loc_ref = #{node["loc_ref"]}" : "") + ",guess = #{guess} WHERE name += '#{name}'") + puts "Put '#{null_point_name}' into DB" + rescue PGError => e + puts "Error inserting '#{null_point_name}' to DB #{e}" + ask_if("Continue?") + #conn.close() if conn + end + else + puts "Uhh, there was no suggestion ID like that. Try again next time!" + end + end +end +if ask_if("Fill null Timing Points from geocoder?") + begin + null_points = connbus.exec('SELECT name FROM timing_point WHERE lat IS null OR lng IS null;') + rescue PGError => e + puts "Error selecting null points from DB #{e}" + #conn.close() if conn + end + + null_points.each do |null_point_name| + pp null_point_name + end +end + + + --- /dev/null +++ b/maxious-canberra-transit-feed/highline.rb @@ -1,1 +1,140 @@ +module HighLine + # prompt = text to display + # type can be one of :string, :integer, :float, :bool or a proc + # if it's a proc then it is called with the entered string. If the input + # cannot be converted then it should throw an exception + # if type == :bool then y,yes are converted to true. n,no are converted to + # false. All other values are rejected. + # + # options should be a hash of validation options + # :validate => regular expresion or proc + # if validate is a regular expression then the input is matched against it + # if it's a proc then the proc is called and the input is accepted if it + # returns true + # :between => range + # the input is checked if it lies within the range + # :above => value + # the input is checked if it is above the value + # :below => value + # the input is checked if it is less than the value + # :default => string + # if the user doesn't enter a value then the default value is returned + # :base => [b, o, d, x] + # when asking for integers this will take a number in binary, octal, + # decimal or hexadecimal + def ask(prompt, type, options=nil) + begin + valid = true + default = option(options, :default) + if default + defaultstr = " |#{default}|" + else + defaultstr = "" + end + + base = option(options, :base) + + print prompt, "#{defaultstr} " + $stdout.flush + input = gets.chomp + + if default && input == "" + input = default + end + + #comvert the input to the correct type + input = case type + when :string: input + when :integer: convert(input, base) rescue valid = false + when :float: Float(input) rescue valid = false + when :bool + valid = input =~ /^(y|n|yes|no)$/ + input[0] == ?y + when Proc: input = type.call(input) rescue valid = false + end + + #validate the input + valid &&= validate(options, :validate) do |test| + case test + when Regexp: input =~ test + when Proc: test.call(input) + end + end + valid &&= validate(options, :within) { |range| range === input} + valid &&= validate(options, :above) { |value| input > value} + valid &&= validate(options, :below) { |value| input < value} + + puts "Not a valid value" unless valid + end until valid + + return input + end + + #asks a yes/no question + def ask_if(prompt) + ask(prompt, :bool) + end + + private + + #extracts a key from the options hash + def option(options, key) + result = nil + if options && options.key?(key) + result = options[key] + end + result + end + + #helper function for validation + def validate(options, key) + result = true + if options && options.key?(key) + result = yield options[key] + end + result + end + + #converts a string to an integer + #input = the value to convert + #base = the numeric base of the value b,o,d,x + def convert(input, base) + if base + if ["b", "o", "d", "x"].include?(base) + input = "0#{base}#{input}" + value = Integer(input) + else + value = Integer(input) + end + else + value = Integer(input) + end + + value + end +end + + + +if __FILE__ == $0 + include HighLine + #string input using a regexp to validate, returns test as the default value + p ask("enter a string, (all lower case)", :string, :validate => /^[a-z]*$/, :default => "test") + #string input using a proc to validate + p ask("enter a string, (between 3 and 6 characters)", :string, :validate => proc { |input| (3..6) === input.length}) + + #integer intput using :within + p ask("enter an integer, (0-10)", :integer, :within => 0..10) + #float input using :above + p ask("enter a float, (> 6)", :float, :above => 6) + + #getting a binary value + p ask("enter a binary number", :integer, :base => "b") + + #using a proc to convert the a comma seperated list into an array + p ask("enter a comma seperated list", proc { |x| x.split(/,/)}) + + p ask_if("do you want to continue?") +end +