Begin to locate time points
Begin to locate time points

require 'rubygems'  
require 'nokogiri'  
require 'open-uri'  
require 'pp'  
 
def makeTimetable(table, period, short_name)  
timetable = {"stop_times" => [], "between_stops" => [], "short_name" => short_name}  
time_points = table.xpath('tr[1]//th').map do |tp|  
if tp.content != "\302\240" && tp.content != "" && tp.content != "<br/>"  
timing_point = tp.content.squeeze(" ").gsub("\r\n Platform"," - Platform").gsub(" - "," - ").gsub("\n","").strip  
end  
end  
time_points.delete(nil)  
timetable["time_points"] = time_points  
timetable["long_name"] = "To " + time_points.last  
periodtimes = []  
table.css('tr').each do |row|  
times = row.css('td').map do |cell|  
#TODO convert to GTFS time ie. replace " AM" with a  
time = cell.content.squeeze(" ").strip  
end  
if not times.empty?  
if not (route = times.shift)  
raise("TODO: account for shifting route numbers eg. intertown/redex 62/162")  
end  
periodtimes << times  
end  
end  
if periodtimes.size < 1  
raise "No times for route " + short_name + " in period " + period  
end  
timetable["stop_times"] = { period => periodtimes }  
# pp timetable  
filename = timetable["short_name"] + "-" + timetable["long_name"].downcase.gsub(" ","-").gsub("/","") + "." + period + ".yml"  
puts "Saving " + filename  
File.open("#{File.dirname(__FILE__)}/output/"+filename, "w") do |f|  
f.write timetable.to_yaml  
end  
timetable  
end  
 
#TODO fix route 934  
Dir.glob("source-html/Route*.htm*") { |file|  
puts "Opened " + file  
doc = Nokogiri::HTML(open(file))  
# Search for nodes by css  
timetables = []  
short_name = "";  
doc.xpath('//title').each do |title|  
short_name = title.content.gsub("Route_","").gsub("Route ","").squeeze(" ").strip  
end  
if short_name == ""  
raise "Route number(s) not found in <title> tag"  
end  
 
doc.xpath('//table[preceding::text()="Weekdays"]').each do |table|  
timetables << makeTimetable(table, "weekday", short_name)  
end  
 
#weekends  
doc.xpath('//table[preceding::text()="Saturdays" and following::a]').each do |table|  
timetables << makeTimetable(table, "saturday", short_name)  
end  
doc.xpath('//table[preceding::text()="Sundays"]').each do |table|  
timetables << makeTimetable(table, "sunday", short_name)  
end  
#930/934 special cases  
doc.xpath('//table[preceding::text()="Saturday" and following::h2]').each do |table|  
timetables << makeTimetable(table, "saturday", short_name)  
end  
doc.xpath('//table[preceding::text()="Sunday"]').each do |table|  
timetables << makeTimetable(table, "sunday", short_name)  
end  
#route 81 = Weekdays - School Holidays Only  
doc.xpath('//table[preceding::text()="Weekdays - School Holidays Only "]').each do |table|  
timetable = makeTimetable(table, "weekday", short_name)  
#TODO set active date range to only be holidays  
timetables << timetable;  
end  
 
 
if timetables.size > 2  
puts "WARNING: " + file + " more than 2 timetables (weekend split?):" + timetables.size.to_s  
end  
if timetables.size < 2  
puts "WARNING: " + file + " less than 2 timetables (weekday loop service?):" + timetables.size.to_s  
elsif not (timetables[0]["time_points"] - timetables[1]["time_points"].reverse).empty?  
puts "WARNING: first pair of timetable timing points are not complementary for "+ file  
pp(timetables[0]["time_points"] - timetables[1]["time_points"].reverse)  
end  
if timetables.size < 1  
raise "No timetables extracted from " + file  
end  
}  
 
require 'rubygems'  
require 'pp'  
require 'yaml'  
 
time_points = []  
time_points_sources = Hash.new([])  
Dir.chdir("output")  
Dir.glob("*.yml") { |file|  
timetable = YAML::load_file(file)  
time_points = time_points | timetable["time_points"]  
timetable["time_points"].each do |timepoint|  
time_points_sources[timepoint] = time_points_sources[timepoint] | [ file ]  
end  
}  
pp time_points.sort!  
#pp time_points_sources.sort  
 
time_point_corrections = {"North Lynehamham" => "North Lyneham",  
"Lathlain St Platform 2" => "Lathlain St Bus Station - Platform 2",  
"Lathlain St Sation - Platform 5" => "Lathlain St Bus Station - Platform 5",  
"Lathlain Steet Station" => "Lathlain St Bus Station",  
"Lathlain St - Platform 3" => "Lathlain St Bus Station - Platform 3",  
"Lathlain Steet Station - Platform 3" => "Lathlain St Bus Station - Platform 3",  
"Lathlain St Station" => "Lathlain St Bus Station",  
"Lathlain St Station - Platform 1" => "Lathlain St Bus Station - Platform 1",  
"Lathlain St Station - Platform 2" => "Lathlain St Bus Station - Platform 2",  
"Lathlain St Station - Platform 3" => "Lathlain St Bus Station - Platform 3",  
"Lathlain St Station - Platform 4" => "Lathlain St Bus Station - Platform 4",  
"Lathlain St Station - Platform 5" => "Lathlain St Bus Station - Platform 5",  
"Lathlain St Station - Platform 6" => "Lathlain St Bus Station - Platform 6",  
"Manuka Captain Cook" => "Manuka, Captain Cook",  
"Flemington Rd, Sandford St" => "Flemington Rd/Sandford St",  
"Erindale Centre / - Sternberg Crescent" => "Erindale Drive/Sternberg",  
"Canberra Hospita" => "Canberra Hospital",  
"Cohen Str Station - Platform 1" => "Cohen St Bus Station - Platform 1",  
"Cohen Street Station" => "Cohen St Bus Station",  
"Cohen Street Station - Platform 2" => "Cohen St Bus Station - Platform 2",  
"Cohn St Station - Platform 3" => "Cohen St Bus Station - Platform 3",  
"Cohen St Station" => "Cohen St Bus Station",  
"Cohen St Station - Platform 1" => "Cohen St Bus Station - Platform 1",  
"Cohen St Station - Platform 2" => "Cohen St Bus Station - Platform 2",  
"Cohen St Station - Platform 3" => "Cohen St Bus Station - Platform 3",  
"Cohen St Station - Platform 4" => "Cohen St Bus Station - Platform 4",  
"Cohen St Station - Platform 5" => "Cohen St Bus Station - Platform 5",  
"City - Platform 7" => "City Interchange - Platform 7",  
"Cameron Avenue Station" => "Cameron Ave Bus Station",  
"Cameron Avenue Station - Platform 2" => "Cameron Ave Bus Station - Platform 2",  
"Cameron Avenue Station - Platform 3" => "Cameron Ave Bus Station - Platform 3",  
"Cameron Ave Station" => "Cameron Ave Bus Station",  
"Cameron Ave Station - Platform 1" => "Cameron Ave Bus Station - Platform 1",  
"Cameron Ave Station - Platform 2" => "Cameron Ave Bus Station - Platform 2",  
"Cameron Ave Station - Platform 3" => "Cameron Ave Bus Station - Platform 3",  
"Cameron Ave Station - Platform 4" => "Cameron Ave Bus Station - Platform 4",  
"Cameron Ave Station - Platform 5" => "Cameron Ave Bus Station - Platform 5",  
"Burton & Garranan Hall, Daley Road ANU" => "Burton & Garran Hall, Daley Road ANU",  
"Burton & Garranan Hall,Daley Road ANU" => "Burton & Garran Hall, Daley Road ANU"  
"Newcastle Street after Isa St" => "Newcastle / Isa Street Fyshwick",  
"National Circ/Canberra Ave" => "National Circuit / Canberra Ave",  
}  
time_point_corrections.each do |wrong, right|  
time_points_sources[wrong].each do |wrongfile|  
badtimetable = YAML::load_file(wrongfile)  
badentrynumber = badtimetable["time_points"].index wrong  
badtimetable["time_points"][badentrynumber] = right  
puts "Corrected '" + wrong + "' to '" + right + "' in " + wrongfile  
File.open(wrongfile, "w") do |f|  
f.write badtimetable.to_yaml  
end  
end  
end