Suggest timepoints using OSM database of POIs
[bus.git] / maxious-canberra-transit-feed / 01-extracttimes.rb
blob:a/maxious-canberra-transit-feed/01-extracttimes.rb -> blob:b/maxious-canberra-transit-feed/01-extracttimes.rb
--- a/maxious-canberra-transit-feed/01-extracttimes.rb
+++ b/maxious-canberra-transit-feed/01-extracttimes.rb
@@ -1,1 +1,95 @@
+require 'rubygems'
+require 'nokogiri'
+require 'open-uri'
+require 'pp'
 
+def makeTimetable(table, period, short_name)
+	timetable = {"stop_times" => [], "between_stops" => [], "short_name" => short_name}
+	time_points = table.xpath('tr[1]//th').map do |tp|
+		if tp.content != "\302\240" && tp.content != "" && tp.content != "<br/>"
+			timing_point = tp.content.squeeze(" ").gsub("\r\n Platform"," - Platform").gsub("  - "," - ").gsub("\n","").gsub("\r","").gsub("\\"," / ").strip
+		end
+	end
+	time_points.delete(nil)
+	timetable["time_points"] = time_points
+	timetable["long_name"] = "To " + time_points.last
+	periodtimes = []
+	table.css('tr').each do |row|
+		times = row.css('td').map do |cell|
+			#TODO convert to GTFS time ie. replace " AM" with a
+			time = cell.content.squeeze(" ").strip
+		end
+		if not times.empty?
+			if not (route = times.shift)
+				raise("TODO: account for shifting route numbers eg. intertown/redex 62/162")
+			end
+			periodtimes << times
+		end
+	end
+	if periodtimes.size < 1
+		raise "No times for route " + short_name + " in period " + period
+	end
+	timetable["stop_times"] = { period => periodtimes }
+	# pp timetable
+	filename = timetable["short_name"] + "-" + timetable["long_name"].downcase.gsub(" ","-").gsub("/","") + "." + period + ".yml"
+	puts "Saving " + filename
+	File.open("#{File.dirname(__FILE__)}/output/"+filename, "w") do |f|
+	  	f.write timetable.to_yaml
+	end
+	timetable
+end
+
+#TODO fix route 934
+Dir.glob("source-html/Route*.htm*") { |file|
+	puts "Opened " + file
+	doc = Nokogiri::HTML(open(file))
+	# Search for nodes by css
+	timetables = []
+	short_name = "";
+	doc.xpath('//title').each do |title|
+		short_name = title.content.gsub("Route_","").gsub("Route ","").squeeze(" ").strip
+	end
+	if short_name == ""
+		raise "Route number(s) not found in <title> tag"
+	end
+
+	doc.xpath('//table[preceding::text()="Weekdays"]').each do |table|
+		timetables << makeTimetable(table, "weekday", short_name)
+	end
+
+	#weekends
+	doc.xpath('//table[preceding::text()="Saturdays" and following::a]').each do |table|
+		timetables << makeTimetable(table, "saturday", short_name)
+	end
+	doc.xpath('//table[preceding::text()="Sundays"]').each do |table|
+		timetables << makeTimetable(table, "sunday",  short_name)
+	end
+	#930/934 special cases
+	doc.xpath('//table[preceding::text()="Saturday" and following::h2]').each do |table|
+		timetables << makeTimetable(table, "saturday", short_name)
+	end
+	doc.xpath('//table[preceding::text()="Sunday"]').each do |table|
+		timetables << makeTimetable(table, "sunday",  short_name)
+	end
+	#route 81 = Weekdays - School Holidays Only 
+	doc.xpath('//table[preceding::text()="Weekdays - School Holidays Only "]').each do |table|
+		timetable = makeTimetable(table, "weekday", short_name)
+		#TODO set active date range to only be holidays
+		timetables << timetable;
+	end
+
+	
+	if timetables.size > 2
+		puts "WARNING: " + file + " more than 2 timetables (weekend split?):" + timetables.size.to_s
+	end
+	if timetables.size < 2
+		puts "WARNING: " + file + " less than 2 timetables (weekday loop service?):" + timetables.size.to_s 
+	elsif not (timetables[0]["time_points"] - timetables[1]["time_points"].reverse).empty?
+		puts "WARNING: first pair of timetable timing points are not complementary for "+ file 
+		pp(timetables[0]["time_points"] - timetables[1]["time_points"].reverse)
+	end
+	if timetables.size < 1
+		raise "No timetables extracted from " + file
+	end
+}
+