--- a/maxious-canberra-transit-feed/01-extracttimes.rb
+++ b/maxious-canberra-transit-feed/01-extracttimes.rb
@@ -14,7 +14,14 @@
timetable = {"between_stops" => [], "short_name" => short_name}
time_points = table.xpath('tr[1]//th').map do |tp|
if tp.content != "\302\240" && tp.content != "" && tp.content != "
"
- timing_point = tp.content.squeeze(" ").gsub("\r\n Platform"," - Platform").gsub(" - "," - ").gsub("\n","").gsub("\r","").gsub("\\"," / ").strip
+ timing_point = tp.content.squeeze(" ").gsub("Shops"," ").gsub("Bus Station"," Bus Station ").gsub(" Platform"," (Platform").gsub(" - "," - ").gsub("\n"," ").gsub("\r"," ").gsub("\t"," ").gsub("\\"," / ").gsub("/"," / ").gsub(","," ").gsub("\302\240","").squeeze(" ").strip
+ if (tp.content.match('Platform'))
+ timing_point.concat(")")
+ end;
+ if tp.to_s.match(/[0-9][0-9][0-9]/) or tp.to_s.include? "Wheelchair"
+ timing_point = nil
+ end
+ timing_point
end
end
time_points.delete(nil)
@@ -24,12 +31,17 @@
table.css('tr').each do |row|
times = row.css('td').map do |cell|
time = cell.content.squeeze(" ").strip
- time = time.gsub(" AM","a").gsub(" PM","p").gsub("12:08 AM","1208x").gsub(":","").gsub("1.","1").gsub("2.","2")
- time = time.gsub("3.","3").gsub("4.","4")
+ time = time.gsub(/ *A\S?M/,"a").gsub(/ ?P\S?M/,"p").gsub(/ *a\S?m/,"a").gsub(/ ?p\S?m/,"p")
+ time = time.gsub("12:08 AM","1208x").gsub(":","").gsub("1.","1").gsub("2.","2").gsub("3.","3").gsub("4.","4")
time = time.gsub("5.","5").gsub("6.","6").gsub("7.","7").gsub("8.","8").gsub("9.","9").gsub("10.","10")
- time = time.gsub("11.","11").gsub("12.","12").gsub(/\.+/,"-")
+ time = time.gsub("11.","11").gsub("12.","12").gsub(/\.+/,"-").gsub("\302\240","")
+ if time == "" or time.include? "chool" or time.include? "On Race Days" or time.include? "Bus"
+ time = nil # This hacky way is faster than using position()>1 xpath on
s!
+ end
+ time
end
- if not times.empty?
+ times.delete(nil)
+ if not times.empty?
if not (route = times.shift)
raise("TODO: account for shifting route numbers eg. intertown/redex 62/162")
end
@@ -41,7 +53,8 @@
end
timetable[period] = periodtimes.to_a
# pp timetable
- filename = timetable["short_name"] + "-" + timetable["long_name"].downcase.gsub(" ","-").gsub("/","") + "." + period + ".yml"
+ filename = timetable["short_name"] + "-" + timetable["long_name"]+ "." + period + ".yml"
+ filename = filename.downcase.gsub(" ","-").gsub("/","-").gsub("(","").gsub(")","")
puts "Saving " + filename
File.open("#{File.dirname(__FILE__)}/output/"+filename, "w") do |f|
f.write timetable.to_yaml
@@ -49,15 +62,14 @@
timetable
end
-#TODO fix route 934
-Dir.glob("source-html/Route*.htm*") { |file|
+Dir.glob("source-html/*oute*.htm*") { |file|
puts "Opened " + file
doc = Nokogiri::HTML(open(file))
# Search for nodes by css
timetables = []
short_name = "";
doc.xpath('//title').each do |title|
- short_name = title.content.gsub("Route_","").gsub("Route ","").squeeze(" ").strip
+ short_name = title.content.gsub("Route_","").gsub("Route ","").gsub("route ","").gsub(", ","/").gsub("ACTION Buses Timetable for ","").squeeze(" ").strip
end
if short_name == ""
raise "Route number(s) not found in tag"
@@ -66,7 +78,17 @@
doc.xpath('//table[preceding::text()="Weekdays"]').each do |table|
timetables << makeTimetable(table, "stop_times", short_name)
end
-
+ doc.xpath('//table[preceding::text()="This timetable is effective from Monday 15th November 2010."]').each do |table|
+ if short_name[0].chr != "9" or short_name.size == 1
+ timetables << makeTimetable(table, "stop_times", short_name)
+ end
+ end
+ #all tables are weekdays on some really malformatted timetables
+ if short_name == "170"
+ doc.xpath('//table').each do |table|
+ timetables << makeTimetable(table, "stop_times", short_name)
+ end
+ end
#weekends
doc.xpath('//table[preceding::text()="Saturdays" and following::a]').each do |table|
timetables << makeTimetable(table, "stop_times_saturday", short_name)
|