--- a/maxious-canberra-transit-feed/01-extracttimes.rb +++ b/maxious-canberra-transit-feed/01-extracttimes.rb @@ -1,1 +1,95 @@ +require 'rubygems' +require 'nokogiri' +require 'open-uri' +require 'pp' +def makeTimetable(table, period, short_name) + timetable = {"stop_times" => [], "between_stops" => [], "short_name" => short_name} + time_points = table.xpath('tr[1]//th').map do |tp| + if tp.content != "\302\240" && tp.content != "" && tp.content != "
" + timing_point = tp.content.squeeze(" ").gsub("\r\n Platform"," - Platform").gsub(" - "," - ").gsub("\n","").gsub("\r","").gsub("\\"," / ").strip + end + end + time_points.delete(nil) + timetable["time_points"] = time_points + timetable["long_name"] = "To " + time_points.last + periodtimes = [] + table.css('tr').each do |row| + times = row.css('td').map do |cell| + #TODO convert to GTFS time ie. replace " AM" with a + time = cell.content.squeeze(" ").strip + end + if not times.empty? + if not (route = times.shift) + raise("TODO: account for shifting route numbers eg. intertown/redex 62/162") + end + periodtimes << times + end + end + if periodtimes.size < 1 + raise "No times for route " + short_name + " in period " + period + end + timetable["stop_times"] = { period => periodtimes } + # pp timetable + filename = timetable["short_name"] + "-" + timetable["long_name"].downcase.gsub(" ","-").gsub("/","") + "." + period + ".yml" + puts "Saving " + filename + File.open("#{File.dirname(__FILE__)}/output/"+filename, "w") do |f| + f.write timetable.to_yaml + end + timetable +end + +#TODO fix route 934 +Dir.glob("source-html/Route*.htm*") { |file| + puts "Opened " + file + doc = Nokogiri::HTML(open(file)) + # Search for nodes by css + timetables = [] + short_name = ""; + doc.xpath('//title').each do |title| + short_name = title.content.gsub("Route_","").gsub("Route ","").squeeze(" ").strip + end + if short_name == "" + raise "Route number(s) not found in tag" + end + + doc.xpath('//table[preceding::text()="Weekdays"]').each do |table| + timetables << makeTimetable(table, "weekday", short_name) + end + + #weekends + doc.xpath('//table[preceding::text()="Saturdays" and following::a]').each do |table| + timetables << makeTimetable(table, "saturday", short_name) + end + doc.xpath('//table[preceding::text()="Sundays"]').each do |table| + timetables << makeTimetable(table, "sunday", short_name) + end + #930/934 special cases + doc.xpath('//table[preceding::text()="Saturday" and following::h2]').each do |table| + timetables << makeTimetable(table, "saturday", short_name) + end + doc.xpath('//table[preceding::text()="Sunday"]').each do |table| + timetables << makeTimetable(table, "sunday", short_name) + end + #route 81 = Weekdays - School Holidays Only + doc.xpath('//table[preceding::text()="Weekdays - School Holidays Only "]').each do |table| + timetable = makeTimetable(table, "weekday", short_name) + #TODO set active date range to only be holidays + timetables << timetable; + end + + + if timetables.size > 2 + puts "WARNING: " + file + " more than 2 timetables (weekend split?):" + timetables.size.to_s + end + if timetables.size < 2 + puts "WARNING: " + file + " less than 2 timetables (weekday loop service?):" + timetables.size.to_s + elsif not (timetables[0]["time_points"] - timetables[1]["time_points"].reverse).empty? + puts "WARNING: first pair of timetable timing points are not complementary for "+ file + pp(timetables[0]["time_points"] - timetables[1]["time_points"].reverse) + end + if timetables.size < 1 + raise "No timetables extracted from " + file + end +} +