Suggest timepoints using OSM database of POIs
Suggest timepoints using OSM database of POIs

  require 'rubygems'
  require 'nokogiri'
  require 'open-uri'
  require 'pp'
 
  def makeTimetable(table, period, short_name)
  timetable = {"stop_times" => [], "between_stops" => [], "short_name" => short_name}
  time_points = table.xpath('tr[1]//th').map do |tp|
  if tp.content != "\302\240" && tp.content != "" && tp.content != "<br/>"
  timing_point = tp.content.squeeze(" ").gsub("\r\n Platform"," - Platform").gsub(" - "," - ").gsub("\n","").gsub("\r","").gsub("\\"," / ").strip
  end
  end
  time_points.delete(nil)
  timetable["time_points"] = time_points
  timetable["long_name"] = "To " + time_points.last
  periodtimes = []
  table.css('tr').each do |row|
  times = row.css('td').map do |cell|
  #TODO convert to GTFS time ie. replace " AM" with a
  time = cell.content.squeeze(" ").strip
  end
  if not times.empty?
  if not (route = times.shift)
  raise("TODO: account for shifting route numbers eg. intertown/redex 62/162")
  end
  periodtimes << times
  end
  end
  if periodtimes.size < 1
  raise "No times for route " + short_name + " in period " + period
  end
  timetable["stop_times"] = { period => periodtimes }
  # pp timetable
  filename = timetable["short_name"] + "-" + timetable["long_name"].downcase.gsub(" ","-").gsub("/","") + "." + period + ".yml"
  puts "Saving " + filename
  File.open("#{File.dirname(__FILE__)}/output/"+filename, "w") do |f|
  f.write timetable.to_yaml
  end
  timetable
  end
 
  #TODO fix route 934
  Dir.glob("source-html/Route*.htm*") { |file|
  puts "Opened " + file
  doc = Nokogiri::HTML(open(file))
  # Search for nodes by css
  timetables = []
  short_name = "";
  doc.xpath('//title').each do |title|
  short_name = title.content.gsub("Route_","").gsub("Route ","").squeeze(" ").strip
  end
  if short_name == ""
  raise "Route number(s) not found in <title> tag"
  end
 
  doc.xpath('//table[preceding::text()="Weekdays"]').each do |table|
  timetables << makeTimetable(table, "weekday", short_name)
  end
 
  #weekends
  doc.xpath('//table[preceding::text()="Saturdays" and following::a]').each do |table|
  timetables << makeTimetable(table, "saturday", short_name)
  end
  doc.xpath('//table[preceding::text()="Sundays"]').each do |table|
  timetables << makeTimetable(table, "sunday", short_name)
  end
  #930/934 special cases
  doc.xpath('//table[preceding::text()="Saturday" and following::h2]').each do |table|
  timetables << makeTimetable(table, "saturday", short_name)
  end
  doc.xpath('//table[preceding::text()="Sunday"]').each do |table|
  timetables << makeTimetable(table, "sunday", short_name)
  end
  #route 81 = Weekdays - School Holidays Only
  doc.xpath('//table[preceding::text()="Weekdays - School Holidays Only "]').each do |table|
  timetable = makeTimetable(table, "weekday", short_name)
  #TODO set active date range to only be holidays
  timetables << timetable;
  end
 
 
  if timetables.size > 2
  puts "WARNING: " + file + " more than 2 timetables (weekend split?):" + timetables.size.to_s
  end
  if timetables.size < 2
  puts "WARNING: " + file + " less than 2 timetables (weekday loop service?):" + timetables.size.to_s
  elsif not (timetables[0]["time_points"] - timetables[1]["time_points"].reverse).empty?
  puts "WARNING: first pair of timetable timing points are not complementary for "+ file
  pp(timetables[0]["time_points"] - timetables[1]["time_points"].reverse)
  end
  if timetables.size < 1
  raise "No timetables extracted from " + file
  end
  }
 
  require 'rubygems'
  require 'pp'
  require 'yaml'
  Dir.chdir("output")
 
  def getTimePoints()
  $time_points = []
  $time_points_sources = Hash.new([])
  Dir.glob("*.yml") { |file|
  timetable = YAML::load_file(file)
  $time_points = $time_points | timetable["time_points"]
  timetable["time_points"].each do |timepoint|
  $time_points_sources[timepoint] = $time_points_sources[timepoint] | [ file ]
  end
  }
  end
 
  getTimePoints()
  pp $time_points.sort!
  #pp $time_points_sources.sort
 
  time_point_corrections = {"North Lynehamham" => "North Lyneham",
  "Lathlain St Platform 2" => "Lathlain St Bus Station - Platform 2",
  "Lathlain St Sation - Platform 5" => "Lathlain St Bus Station - Platform 5",
  "Lathlain Steet Station" => "Lathlain St Bus Station",
  "Lathlain St - Platform 3" => "Lathlain St Bus Station - Platform 3",
  "Lathlain Steet Station - Platform 3" => "Lathlain St Bus Station - Platform 3",
  "Lathlain St Station" => "Lathlain St Bus Station",
  "Lathlain St Station - Platform 1" => "Lathlain St Bus Station - Platform 1",
  "Lathlain St Station - Platform 2" => "Lathlain St Bus Station - Platform 2",
  "Lathlain St Station - Platform 3" => "Lathlain St Bus Station - Platform 3",
  "Lathlain St Station - Platform 4" => "Lathlain St Bus Station - Platform 4",
  "Lathlain St Station - Platform 5" => "Lathlain St Bus Station - Platform 5",
  "Lathlain St Station - Platform 6" => "Lathlain St Bus Station - Platform 6",
  "Manuka Captain Cook" => "Manuka, Captain Cook",
  "Flemington Rd, Sandford St" => "Flemington Rd/Sandford St",
  "Erindale Centre / - Sternberg Crescent" => "Erindale Drive/Sternberg",
  "Canberra Hospita" => "Canberra Hospital",
  "Cohen Str Station - Platform 1" => "Cohen St Bus Station - Platform 1",
  "Cohen Street Station" => "Cohen St Bus Station",
  "Cohen Street Station - Platform 2" => "Cohen St Bus Station - Platform 2",
  "Cohn St Station - Platform 3" => "Cohen St Bus Station - Platform 3",
  "Cohen St Station" => "Cohen St Bus Station",
  "Cohen St Station - Platform 1" => "Cohen St Bus Station - Platform 1",
  "Cohen St Station - Platform 2" => "Cohen St Bus Station - Platform 2",
  "Cohen St Station - Platform 3" => "Cohen St Bus Station - Platform 3",
  "Cohen St Station - Platform 4" => "Cohen St Bus Station - Platform 4",
  "Cohen St Station - Platform 5" => "Cohen St Bus Station - Platform 5",
  "Cohen St Station - Platform 6" => "Cohen St Bus Station - Platform 6",
  "City - Platform 7" => "City Interchange - Platform 7",
  "Cameron Avenue Station" => "Cameron Ave Bus Station",
  "Cameron Avenue Station - Platform 1" => "Cameron Ave Bus Station - Platform 1",
  "Cameron Avenue Station - Platform 2" => "Cameron Ave Bus Station - Platform 2",
  "Cameron Avenue Station - Platform 3" => "Cameron Ave Bus Station - Platform 3",
  "Cameron Avenue Station - Platform 4" => "Cameron Ave Bus Station - Platform 4",
  "Cameron Avenue Station - Platform 5" => "Cameron Ave Bus Station - Platform 5",
  "Cameron Ave Station" => "Cameron Ave Bus Station",
  "Cameron Ave Station - Platform 1" => "Cameron Ave Bus Station - Platform 1",
  "Cameron Ave Station - Platform 2" => "Cameron Ave Bus Station - Platform 2",
  "Cameron Ave Station - Platform 3" => "Cameron Ave Bus Station - Platform 3",
  "Cameron Ave Station - Platform 4" => "Cameron Ave Bus Station - Platform 4",
  "Cameron Ave Station - Platform 5" => "Cameron Ave Bus Station - Platform 5",
  "Burton & Garranan Hall, Daley Road ANU" => "Burton & Garran Hall, Daley Road ANU",
  "Burton & Garranan Hall,Daley Road ANU" => "Burton & Garran Hall, Daley Road ANU",
  "Newcastle Street after Isa St" => "Newcastle / Isa Street Fyshwick",
  "National Circ/Canberra Ave" => "National Circuit / Canberra Ave",
  }
  time_point_corrections.each do |wrong, right|
  $time_points_sources[wrong].each do |wrongfile|
  badtimetable = YAML::load_file(wrongfile)
  badentrynumber = badtimetable["time_points"].index wrong
  badtimetable["time_points"][badentrynumber] = right
  puts "Corrected '" + wrong + "' to '" + right + "' in " + wrongfile
  File.open(wrongfile, "w") do |f|
  f.write badtimetable.to_yaml
  end
  end
  end
 
  getTimePoints()
  pp $time_points.sort!
 
  #!/usr/bin/ruby
  require 'postgres'
 
  require 'highline.rb'
  include HighLine
 
  require 'yaml'
  require 'pp'
  Dir.chdir("output")
 
  def getTimePoints()
  $time_points = []
  $time_points_sources = Hash.new([])
  Dir.glob("*.yml") { |file|
  timetable = YAML::load_file(file)
  $time_points = $time_points | timetable["time_points"]
  timetable["time_points"].each do |timepoint|
  $time_points_sources[timepoint] = $time_points_sources[timepoint] | [ file ]
  end
  }
  end
 
  getTimePoints()
  $time_points.sort!
 
  connbus = PGconn.connect("localhost", 5432, '', '', "bus", "postgres",
  "snmc")
  connosm = PGconn.connect("localhost", 5432, '', '', "openstreetmap",
  "postgres", "snmc")
 
  if ask_if("Insert Timing Point names to database?")
  $time_points.each do |time_point|
  begin
  time_point = time_point.gsub(/\\/, '\&\&').gsub(/'/, "''")
  res = connbus.exec("INSERT INTO timing_point (name) VALUES ('#{time_point}')")
  puts "Put '#{time_point}' into DB"
  rescue PGError => e
  puts "Error inserting '#{time_point}' to DB #{e}"
  #conn.close() if conn
  end
  end
  end
 
 
  if ask_if("Fill null Timing Points from OSM bus_stop database?")
  begin
  null_points = connbus.exec('SELECT name FROM timing_point WHERE lat IS null OR lng IS null;')
  rescue PGError => e
  puts "Error selecting null points from DB #{e}"
  #conn.close() if conn
  end
 
  null_points.each do |null_point_name|
  begin
  name = null_point_name.to_s.gsub(/\\/, '\&\&').gsub(/'/, "''")
  pp name
  matching_nodes = connosm.exec("Select * FROM (SELECT * from current_node_tags,
  (Select id as ctagid FROM current_node_tags WHERE v LIKE '%#{name}%') as a
  where a.ctagid = current_node_tags.id) as ctags INNER JOIN current_nodes ON
  ctags.id=current_nodes.id")
  rescue PGError => e
  puts "Error selecting matching bus stops from DB #{e}"
  #conn.close() if conn
  end
  suggested_nodes = Hash.new()
 
  matching_nodes.each do |matching_node_row|
  #pp matching_node_row
  # 0 = id
  # 1 = k
  # 2 = v
  # 3,4 = redundant ids
  # 5 = lat*100000
  # 6 = lng*100000
  suggested_node = suggested_nodes.fetch(matching_node_row[0], {'lat' => Float(matching_node_row[5])/10000000,
  'lng' => Float(matching_node_row[6])/10000000})
  if matching_node_row[1] == "ref"
  matching_node_row[1] = "loc_ref"
  end
  suggested_node[matching_node_row[1]] = matching_node_row[2]
  suggested_nodes[matching_node_row[0]] = suggested_node
  end
  pp suggested_nodes
  nodeID = ask("Enter selected node ID:", :string)
  if suggested_nodes.has_key?(nodeID)
  node = suggested_nodes.fetch(nodeID)
  guess = ask_if("Is this a guess?")
  puts "Location #{node["lat"]},#{node["lng"]} for #{null_point_name}"
  begin
  res = connbus.exec("UPDATE timing_point SET lat = #{node["lat"]*10000000}, lng =
  #{node["lng"]*10000000},osm_node = #{nodeID}" + (node.has_key?("loc_ref") ? ",loc_ref = #{node["loc_ref"]}" : "") + ",guess = #{guess} WHERE name
  = '#{name}'")
  puts "Put '#{null_point_name}' into DB"
  rescue PGError => e
  puts "Error inserting '#{null_point_name}' to DB #{e}"
  ask_if("Continue?")
  #conn.close() if conn
  end
  else
  puts "Uhh, there was no suggestion ID like that. Try again next time!"
  end
  end
  end
  if ask_if("Fill null Timing Points from geocoder?")
  begin
  null_points = connbus.exec('SELECT name FROM timing_point WHERE lat IS null OR lng IS null;')
  rescue PGError => e
  puts "Error selecting null points from DB #{e}"
  #conn.close() if conn
  end
 
  null_points.each do |null_point_name|
  pp null_point_name
  end
  end
 
 
 
  module HighLine
  # prompt = text to display
  # type can be one of :string, :integer, :float, :bool or a proc
  # if it's a proc then it is called with the entered string. If the input
  # cannot be converted then it should throw an exception
  # if type == :bool then y,yes are converted to true. n,no are converted to
  # false. All other values are rejected.
  #
  # options should be a hash of validation options
  # :validate => regular expresion or proc
  # if validate is a regular expression then the input is matched against it
  # if it's a proc then the proc is called and the input is accepted if it
  # returns true
  # :between => range
  # the input is checked if it lies within the range
  # :above => value
  # the input is checked if it is above the value
  # :below => value
  # the input is checked if it is less than the value
  # :default => string
  # if the user doesn't enter a value then the default value is returned
  # :base => [b, o, d, x]
  # when asking for integers this will take a number in binary, octal,
  # decimal or hexadecimal
  def ask(prompt, type, options=nil)
  begin
  valid = true
 
  default = option(options, :default)
  if default
  defaultstr = " |#{default}|"
  else
  defaultstr = ""
  end
 
  base = option(options, :base)
 
  print prompt, "#{defaultstr} "
  $stdout.flush
  input = gets.chomp
 
  if default && input == ""
  input = default
  end
 
  #comvert the input to the correct type
  input = case type
  when :string: input
  when :integer: convert(input, base) rescue valid = false
  when :float: Float(input) rescue valid = false
  when :bool
  valid = input =~ /^(y|n|yes|no)$/
  input[0] == ?y
  when Proc: input = type.call(input) rescue valid = false
  end
 
  #validate the input
  valid &&= validate(options, :validate) do |test|
  case test
  when Regexp: input =~ test
  when Proc: test.call(input)
  end
  end
  valid &&= validate(options, :within) { |range| range === input}
  valid &&= validate(options, :above) { |value| input > value}
  valid &&= validate(options, :below) { |value| input < value}
 
  puts "Not a valid value" unless valid
  end until valid
 
  return input
  end
 
  #asks a yes/no question
  def ask_if(prompt)
  ask(prompt, :bool)
  end
 
  private
 
  #extracts a key from the options hash
  def option(options, key)
  result = nil
  if options && options.key?(key)
  result = options[key]
  end
  result
  end
 
  #helper function for validation
  def validate(options, key)
  result = true
  if options && options.key?(key)
  result = yield options[key]
  end
  result
  end
 
  #converts a string to an integer
  #input = the value to convert
  #base = the numeric base of the value b,o,d,x
  def convert(input, base)
  if base
  if ["b", "o", "d", "x"].include?(base)
  input = "0#{base}#{input}"