Suggest timepoints using OSM database of POIs
[bus.git] / maxious-canberra-transit-feed / 01-extracttimes.rb
blob:a/maxious-canberra-transit-feed/01-extracttimes.rb -> blob:b/maxious-canberra-transit-feed/01-extracttimes.rb
  require 'rubygems'
  require 'nokogiri'
  require 'open-uri'
  require 'pp'
   
  def makeTimetable(table, period, short_name)
  timetable = {"stop_times" => [], "between_stops" => [], "short_name" => short_name}
  time_points = table.xpath('tr[1]//th').map do |tp|
  if tp.content != "\302\240" && tp.content != "" && tp.content != "<br/>"
  timing_point = tp.content.squeeze(" ").gsub("\r\n Platform"," - Platform").gsub(" - "," - ").gsub("\n","").gsub("\r","").gsub("\\"," / ").strip
  end
  end
  time_points.delete(nil)
  timetable["time_points"] = time_points
  timetable["long_name"] = "To " + time_points.last
  periodtimes = []
  table.css('tr').each do |row|
  times = row.css('td').map do |cell|
  #TODO convert to GTFS time ie. replace " AM" with a
  time = cell.content.squeeze(" ").strip
  end
  if not times.empty?
  if not (route = times.shift)
  raise("TODO: account for shifting route numbers eg. intertown/redex 62/162")
  end
  periodtimes << times
  end
  end
  if periodtimes.size < 1
  raise "No times for route " + short_name + " in period " + period
  end
  timetable["stop_times"] = { period => periodtimes }
  # pp timetable
  filename = timetable["short_name"] + "-" + timetable["long_name"].downcase.gsub(" ","-").gsub("/","") + "." + period + ".yml"
  puts "Saving " + filename
  File.open("#{File.dirname(__FILE__)}/output/"+filename, "w") do |f|
  f.write timetable.to_yaml
  end
  timetable
  end
   
  #TODO fix route 934
  Dir.glob("source-html/Route*.htm*") { |file|
  puts "Opened " + file
  doc = Nokogiri::HTML(open(file))
  # Search for nodes by css
  timetables = []
  short_name = "";
  doc.xpath('//title').each do |title|
  short_name = title.content.gsub("Route_","").gsub("Route ","").squeeze(" ").strip
  end
  if short_name == ""
  raise "Route number(s) not found in <title> tag"
  end
   
  doc.xpath('//table[preceding::text()="Weekdays"]').each do |table|
  timetables << makeTimetable(table, "weekday", short_name)
  end
   
  #weekends
  doc.xpath('//table[preceding::text()="Saturdays" and following::a]').each do |table|
  timetables << makeTimetable(table, "saturday", short_name)
  end
  doc.xpath('//table[preceding::text()="Sundays"]').each do |table|
  timetables << makeTimetable(table, "sunday", short_name)
  end
  #930/934 special cases
  doc.xpath('//table[preceding::text()="Saturday" and following::h2]').each do |table|
  timetables << makeTimetable(table, "saturday", short_name)
  end
  doc.xpath('//table[preceding::text()="Sunday"]').each do |table|
  timetables << makeTimetable(table, "sunday", short_name)
  end
  #route 81 = Weekdays - School Holidays Only
  doc.xpath('//table[preceding::text()="Weekdays - School Holidays Only "]').each do |table|
  timetable = makeTimetable(table, "weekday", short_name)
  #TODO set active date range to only be holidays
  timetables << timetable;
  end
   
   
  if timetables.size > 2
  puts "WARNING: " + file + " more than 2 timetables (weekend split?):" + timetables.size.to_s
  end
  if timetables.size < 2
  puts "WARNING: " + file + " less than 2 timetables (weekday loop service?):" + timetables.size.to_s
  elsif not (timetables[0]["time_points"] - timetables[1]["time_points"].reverse).empty?
  puts "WARNING: first pair of timetable timing points are not complementary for "+ file
  pp(timetables[0]["time_points"] - timetables[1]["time_points"].reverse)
  end
  if timetables.size < 1
  raise "No timetables extracted from " + file
  end
  }