require 'rubygems' |
require 'rubygems' |
require 'nokogiri' |
require 'nokogiri' |
require 'open-uri' |
require 'open-uri' |
require 'pp' |
require 'pp' |
|
|
def makeTimetable(table, period, short_name) |
def makeTimetable(table, period, short_name) |
timetable = {"stop_times" => [], "between_stops" => [], "short_name" => short_name} |
timetable = {"stop_times" => [], "between_stops" => [], "short_name" => short_name} |
time_points = table.xpath('tr[1]//th').map do |tp| |
time_points = table.xpath('tr[1]//th').map do |tp| |
if tp.content != "\302\240" && tp.content != "" && tp.content != "<br/>" |
if tp.content != "\302\240" && tp.content != "" && tp.content != "<br/>" |
timing_point = tp.content.squeeze(" ").gsub("\r\n Platform"," - Platform").strip |
timing_point = tp.content.squeeze(" ").gsub("\r\n Platform"," - Platform").gsub(" - "," - ").gsub("\n","").strip |
end |
end |
end |
end |
time_points.delete(nil) |
time_points.delete(nil) |
timetable["time_points"] = time_points |
timetable["time_points"] = time_points |
timetable["long_name"] = "To " + time_points.last |
timetable["long_name"] = "To " + time_points.last |
periodtimes = [] |
periodtimes = [] |
table.css('tr').each do |row| |
table.css('tr').each do |row| |
times = row.css('td').map do |cell| |
times = row.css('td').map do |cell| |
#TODO convert to GTFS time |
#TODO convert to GTFS time ie. replace " AM" with a |
time = cell.content.squeeze(" ").strip |
time = cell.content.squeeze(" ").strip |
end |
end |
if not times.empty? |
if not times.empty? |
if not (route = times.shift) |
if not (route = times.shift) |
raise("TODO: account for shifting route numbers eg. intertown/redex 62/162") |
raise("TODO: account for shifting route numbers eg. intertown/redex 62/162") |
end |
end |
periodtimes << times |
periodtimes << times |
end |
end |
end |
end |
if periodtimes.size < 1 |
if periodtimes.size < 1 |
raise "No times for route " + short_name + " in period " + period |
raise "No times for route " + short_name + " in period " + period |
end |
end |
timetable["stop_times"] = { period => periodtimes } |
timetable["stop_times"] = { period => periodtimes } |
# pp timetable |
# pp timetable |
filename = timetable["short_name"] + "-" + timetable["long_name"].downcase.gsub(" ","-").gsub("/","") + "." + period + ".yml" |
filename = timetable["short_name"] + "-" + timetable["long_name"].downcase.gsub(" ","-").gsub("/","") + "." + period + ".yml" |
puts "Saving " + filename |
puts "Saving " + filename |
File.open("#{File.dirname(__FILE__)}/output/"+filename, "w") do |f| |
File.open("#{File.dirname(__FILE__)}/output/"+filename, "w") do |f| |
f.write timetable.to_yaml |
f.write timetable.to_yaml |
end |
end |
timetable |
timetable |
end |
end |
|
|
#TODO fix route 934 |
#TODO fix route 934 |
Dir.glob("source-html/Route*.htm*") { |file| |
Dir.glob("source-html/Route*.htm*") { |file| |
puts "Opened " + file |
puts "Opened " + file |
doc = Nokogiri::HTML(open(file)) |
doc = Nokogiri::HTML(open(file)) |
# Search for nodes by css |
# Search for nodes by css |
timetables = [] |
timetables = [] |
short_name = ""; |
short_name = ""; |
doc.xpath('//title').each do |title| |
doc.xpath('//title').each do |title| |
short_name = title.content.gsub("Route_","").gsub("Route ","").squeeze(" ").strip |
short_name = title.content.gsub("Route_","").gsub("Route ","").squeeze(" ").strip |
end |
end |
if short_name == "" |
if short_name == "" |
raise "Route number(s) not found in <title> tag" |
raise "Route number(s) not found in <title> tag" |
end |
end |
|
|
doc.xpath('//table[preceding::text()="Weekdays"]').each do |table| |
doc.xpath('//table[preceding::text()="Weekdays"]').each do |table| |
timetables << makeTimetable(table, "weekday", short_name) |
timetables << makeTimetable(table, "weekday", short_name) |
end |
end |
|
|
#weekends |
#weekends |
doc.xpath('//table[preceding::text()="Saturdays" and following::a]').each do |table| |
doc.xpath('//table[preceding::text()="Saturdays" and following::a]').each do |table| |
timetables << makeTimetable(table, "saturday", short_name) |
timetables << makeTimetable(table, "saturday", short_name) |
end |
end |
doc.xpath('//table[preceding::text()="Sundays"]').each do |table| |
doc.xpath('//table[preceding::text()="Sundays"]').each do |table| |
timetables << makeTimetable(table, "sunday", short_name) |
timetables << makeTimetable(table, "sunday", short_name) |
end |
end |
#930/934 special cases |
#930/934 special cases |
doc.xpath('//table[preceding::text()="Saturday" and following::h2]').each do |table| |
doc.xpath('//table[preceding::text()="Saturday" and following::h2]').each do |table| |
timetables << makeTimetable(table, "saturday", short_name) |
timetables << makeTimetable(table, "saturday", short_name) |
end |
end |
doc.xpath('//table[preceding::text()="Sunday"]').each do |table| |
doc.xpath('//table[preceding::text()="Sunday"]').each do |table| |
timetables << makeTimetable(table, "sunday", short_name) |
timetables << makeTimetable(table, "sunday", short_name) |
end |
end |
#route 81 = Weekdays - School Holidays Only |
#route 81 = Weekdays - School Holidays Only |
doc.xpath('//table[preceding::text()="Weekdays - School Holidays Only "]').each do |table| |
doc.xpath('//table[preceding::text()="Weekdays - School Holidays Only "]').each do |table| |
timetable = makeTimetable(table, "weekday", short_name) |
timetable = makeTimetable(table, "weekday", short_name) |
#TODO set active date range to only be holidays |
#TODO set active date range to only be holidays |
timetables << timetable; |
timetables << timetable; |
end |
end |
|
|
|
|
if timetables.size > 2 |
if timetables.size > 2 |
puts "WARNING: " + file + " more than 2 timetables (weekend split?):" + timetables.size.to_s |
puts "WARNING: " + file + " more than 2 timetables (weekend split?):" + timetables.size.to_s |
end |
end |
if timetables.size < 2 |
if timetables.size < 2 |
puts "WARNING: " + file + " less than 2 timetables (weekday loop service?):" + timetables.size.to_s |
puts "WARNING: " + file + " less than 2 timetables (weekday loop service?):" + timetables.size.to_s |
elsif not (timetables[0]["time_points"] - timetables[1]["time_points"].reverse).empty? |
elsif not (timetables[0]["time_points"] - timetables[1]["time_points"].reverse).empty? |
puts "WARNING: first pair of timetable timing points are not complementary for "+ file |
puts "WARNING: first pair of timetable timing points are not complementary for "+ file |
pp(timetables[0]["time_points"] - timetables[1]["time_points"].reverse) |
pp(timetables[0]["time_points"] - timetables[1]["time_points"].reverse) |
end |
end |
if timetables.size < 1 |
if timetables.size < 1 |
raise "No timetables extracted from " + file |
raise "No timetables extracted from " + file |
end |
end |
} |
} |
|
|