require 'rubygems' |
require 'rubygems' |
require 'nokogiri' |
require 'nokogiri' |
require 'open-uri' |
require 'open-uri' |
require 'pp' |
require 'pp' |
require 'yaml' |
require 'yaml' |
class Array |
class Array |
def to_yaml_style |
def to_yaml_style |
:inline |
:inline |
end |
end |
end |
end |
|
|
|
|
def makeTimetable(table, period, short_name) |
def makeTimetable(table, period, short_name) |
timetable = {"stop_times" => [], "between_stops" => [], "short_name" => short_name} |
timetable = {"between_stops" => [], "short_name" => short_name} |
time_points = table.xpath('tr[1]//th').map do |tp| |
time_points = table.xpath('tr[1]//th').map do |tp| |
if tp.content != "\302\240" && tp.content != "" && tp.content != "<br/>" |
if tp.content != "\302\240" && tp.content != "" && tp.content != "<br/>" |
timing_point = tp.content.squeeze(" ").gsub("\r\n Platform"," - Platform").gsub(" - "," - ").gsub("\n","").gsub("\r","").gsub("\\"," / ").strip |
timing_point = tp.content.squeeze(" ").gsub(" Platform"," (Platform").gsub(" - "," - ").gsub("\n","").gsub("\r","").gsub("\\"," / ").squeeze(" ").strip |
|
if (tp.content.match('Platform')) |
|
timing_point.concat(")") |
|
end; |
|
timing_point |
end |
end |
end |
end |
time_points.delete(nil) |
time_points.delete(nil) |
timetable["time_points"] = time_points.to_a |
timetable["time_points"] = time_points.to_a |
timetable["long_name"] = "To " + time_points.last |
timetable["long_name"] = "To " + time_points.last |
periodtimes = [] |
periodtimes = [] |
table.css('tr').each do |row| |
table.css('tr').each do |row| |
times = row.css('td').map do |cell| |
times = row.css('td').map do |cell| |
#TODO convert to GTFS time ie. replace " AM" with a |
|
time = cell.content.squeeze(" ").strip |
time = cell.content.squeeze(" ").strip |
|
time = time.gsub(/ *A\S?M/,"a").gsub(/ ?P\S?M/,"p").gsub("12:08 AM","1208x").gsub(":","").gsub("1.","1").gsub("2.","2") |
|
time = time.gsub("3.","3").gsub("4.","4") |
|
time = time.gsub("5.","5").gsub("6.","6").gsub("7.","7").gsub("8.","8").gsub("9.","9").gsub("10.","10") |
|
time = time.gsub("11.","11").gsub("12.","12").gsub(/\.+/,"-") |
end |
end |
if not times.empty? |
if not times.empty? |
if not (route = times.shift) |
if not (route = times.shift) |
raise("TODO: account for shifting route numbers eg. intertown/redex 62/162") |
raise("TODO: account for shifting route numbers eg. intertown/redex 62/162") |
end |
end |
periodtimes << times.to_a |
periodtimes << times.to_a |
end |
end |
end |
end |
if periodtimes.size < 1 |
if periodtimes.size < 1 |
raise "No times for route " + short_name + " in period " + period |
raise "No times for route " + short_name + " in period " + period |
end |
end |
timetable["stop_times"] = { period => periodtimes.to_a } |
timetable[period] = periodtimes.to_a |
# pp timetable |
# pp timetable |
filename = timetable["short_name"] + "-" + timetable["long_name"].downcase.gsub(" ","-").gsub("/","") + "." + period + ".yml" |
filename = timetable["short_name"] + "-" + timetable["long_name"]+ "." + period + ".yml" |
|
filename = filename.downcase.gsub(" ","-").gsub("/","-").gsub("(","").gsub(")","") |
puts "Saving " + filename |
puts "Saving " + filename |
File.open("#{File.dirname(__FILE__)}/output/"+filename, "w") do |f| |
File.open("#{File.dirname(__FILE__)}/output/"+filename, "w") do |f| |
f.write timetable.to_yaml |
f.write timetable.to_yaml |
end |
end |
timetable |
timetable |
end |
end |
|
|
#TODO fix route 934 |
#TODO fix route 934 |
Dir.glob("source-html/Route*.htm*") { |file| |
Dir.glob("source-html/Route*.htm*") { |file| |
puts "Opened " + file |
puts "Opened " + file |
doc = Nokogiri::HTML(open(file)) |
doc = Nokogiri::HTML(open(file)) |
# Search for nodes by css |
# Search for nodes by css |
timetables = [] |
timetables = [] |
short_name = ""; |
short_name = ""; |
doc.xpath('//title').each do |title| |
doc.xpath('//title').each do |title| |
short_name = title.content.gsub("Route_","").gsub("Route ","").squeeze(" ").strip |
short_name = title.content.gsub("Route_","").gsub("Route ","").gsub(", ","/").squeeze(" ").strip |
end |
end |
if short_name == "" |
if short_name == "" |
raise "Route number(s) not found in <title> tag" |
raise "Route number(s) not found in <title> tag" |
end |
end |
|
|
doc.xpath('//table[preceding::text()="Weekdays"]').each do |table| |
doc.xpath('//table[preceding::text()="Weekdays"]').each do |table| |
timetables << makeTimetable(table, "weekday", short_name) |
timetables << makeTimetable(table, "stop_times", short_name) |
end |
end |
|
|
#weekends |
#weekends |
doc.xpath('//table[preceding::text()="Saturdays" and following::a]').each do |table| |
doc.xpath('//table[preceding::text()="Saturdays" and following::a]').each do |table| |
timetables << makeTimetable(table, "saturday", short_name) |
timetables << makeTimetable(table, "stop_times_saturday", short_name) |
end |
end |
doc.xpath('//table[preceding::text()="Sundays"]').each do |table| |
doc.xpath('//table[preceding::text()="Sundays"]').each do |table| |
timetables << makeTimetable(table, "sunday", short_name) |
timetables << makeTimetable(table, "stop_times_sunday", short_name) |
end |
end |
#930/934 special cases |
#930/934 special cases |
doc.xpath('//table[preceding::text()="Saturday" and following::h2]').each do |table| |
doc.xpath('//table[preceding::text()="Saturday" and following::h2]').each do |table| |
timetables << makeTimetable(table, "saturday", short_name) |
timetables << makeTimetable(table, "stop_times_saturday", short_name) |
end |
end |
doc.xpath('//table[preceding::text()="Sunday"]').each do |table| |
doc.xpath('//table[preceding::text()="Sunday"]').each do |table| |
timetables << makeTimetable(table, "sunday", short_name) |
timetables << makeTimetable(table, "stop_times_sunday", short_name) |
end |
end |
#route 81 = Weekdays - School Holidays Only |
#route 81 = Weekdays - School Holidays Only |
doc.xpath('//table[preceding::text()="Weekdays - School Holidays Only "]').each do |table| |
doc.xpath('//table[preceding::text()="Weekdays - School Holidays Only "]').each do |table| |
timetable = makeTimetable(table, "weekday", short_name) |
timetable = makeTimetable(table, "stop_times", short_name) |
#TODO set active date range to only be holidays |
#TODO set active date range to only be holidays |
timetables << timetable; |
timetables << timetable; |
end |
end |
|
|
|
|
if timetables.size > 2 |
if timetables.size > 2 |
puts "WARNING: " + file + " more than 2 timetables (weekend split?):" + timetables.size.to_s |
puts "WARNING: " + file + " more than 2 timetables (weekend split?):" + timetables.size.to_s |
end |
end |
if timetables.size < 2 |
if timetables.size < 2 |
puts "WARNING: " + file + " less than 2 timetables (weekday loop service?):" + timetables.size.to_s |
puts "WARNING: " + file + " less than 2 timetables (weekday loop service?):" + timetables.size.to_s |
elsif not (timetables[0]["time_points"] - timetables[1]["time_points"].reverse).empty? |
elsif not (timetables[0]["time_points"] - timetables[1]["time_points"].reverse).empty? |
puts "WARNING: first pair of timetable timing points are not complementary for "+ file |
puts "WARNING: first pair of timetable timing points are not complementary for "+ file |
pp(timetables[0]["time_points"] - timetables[1]["time_points"].reverse) |
pp(timetables[0]["time_points"] - timetables[1]["time_points"].reverse) |
end |
end |
if timetables.size < 1 |
if timetables.size < 1 |
raise "No timetables extracted from " + file |
raise "No timetables extracted from " + file |
end |
end |
} |
} |
|
|