Fix up time display format
[bus.git] / maxious-canberra-transit-feed / 01-extracttimes.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
require 'rubygems'
require 'nokogiri'
require 'open-uri'
require 'pp'
require 'yaml'
class Array
  def to_yaml_style
    :inline
  end
end
 
 
def makeTimetable(table, period, short_name)
        timetable = {"stop_times" => [], "between_stops" => [], "short_name" => short_name}
        time_points = table.xpath('tr[1]//th').map do |tp|
                if tp.content != "\302\240" && tp.content != "" && tp.content != "<br/>"
                        timing_point = tp.content.squeeze(" ").gsub("\r\n Platform"," - Platform").gsub("  - "," - ").gsub("\n","").gsub("\r","").gsub("\\"," / ").strip
                end
        end
        time_points.delete(nil)
        timetable["time_points"] = time_points.to_a
        timetable["long_name"] = "To " + time_points.last
        periodtimes = []
        table.css('tr').each do |row|
                times = row.css('td').map do |cell|
                        #TODO convert to GTFS time ie. replace " AM" with a
                        time = cell.content.squeeze(" ").strip
                end
                if not times.empty?
                        if not (route = times.shift)
                                raise("TODO: account for shifting route numbers eg. intertown/redex 62/162")
                        end
                        periodtimes << times.to_a
                end
        end
        if periodtimes.size < 1
                raise "No times for route " + short_name + " in period " + period
        end
        timetable["stop_times"] = { period => periodtimes.to_a }
        # pp timetable
        filename = timetable["short_name"] + "-" + timetable["long_name"].downcase.gsub(" ","-").gsub("/","") + "." + period + ".yml"
        puts "Saving " + filename
        File.open("#{File.dirname(__FILE__)}/output/"+filename, "w") do |f|
                f.write timetable.to_yaml
        end
        timetable
end
 
#TODO fix route 934
Dir.glob("source-html/Route*.htm*") { |file|
        puts "Opened " + file
        doc = Nokogiri::HTML(open(file))
        # Search for nodes by css
        timetables = []
        short_name = "";
        doc.xpath('//title').each do |title|
                short_name = title.content.gsub("Route_","").gsub("Route ","").squeeze(" ").strip
        end
        if short_name == ""
                raise "Route number(s) not found in <title> tag"
        end
 
        doc.xpath('//table[preceding::text()="Weekdays"]').each do |table|
                timetables << makeTimetable(table, "weekday", short_name)
        end
 
        #weekends
        doc.xpath('//table[preceding::text()="Saturdays" and following::a]').each do |table|
                timetables << makeTimetable(table, "saturday", short_name)
        end
        doc.xpath('//table[preceding::text()="Sundays"]').each do |table|
                timetables << makeTimetable(table, "sunday",  short_name)
        end
        #930/934 special cases
        doc.xpath('//table[preceding::text()="Saturday" and following::h2]').each do |table|
                timetables << makeTimetable(table, "saturday", short_name)
        end
        doc.xpath('//table[preceding::text()="Sunday"]').each do |table|
                timetables << makeTimetable(table, "sunday",  short_name)
        end
        #route 81 = Weekdays - School Holidays Only 
        doc.xpath('//table[preceding::text()="Weekdays - School Holidays Only "]').each do |table|
                timetable = makeTimetable(table, "weekday", short_name)
                #TODO set active date range to only be holidays
                timetables << timetable;
        end
 
        
        if timetables.size > 2
                puts "WARNING: " + file + " more than 2 timetables (weekend split?):" + timetables.size.to_s
        end
        if timetables.size < 2
                puts "WARNING: " + file + " less than 2 timetables (weekday loop service?):" + timetables.size.to_s 
        elsif not (timetables[0]["time_points"] - timetables[1]["time_points"].reverse).empty?
                puts "WARNING: first pair of timetable timing points are not complementary for "+ file 
                pp(timetables[0]["time_points"] - timetables[1]["time_points"].reverse)
        end
        if timetables.size < 1
                raise "No timetables extracted from " + file
        end
}