Add weekend Network 10
[bus.git] / maxious-canberra-transit-feed / 01-extracttimes.rb
blob:a/maxious-canberra-transit-feed/01-extracttimes.rb -> blob:b/maxious-canberra-transit-feed/01-extracttimes.rb
--- a/maxious-canberra-transit-feed/01-extracttimes.rb
+++ b/maxious-canberra-transit-feed/01-extracttimes.rb
@@ -14,31 +14,34 @@
 	timetable = {"between_stops" => [], "short_name" => short_name}
 	time_points = table.xpath('tr[1]//th').map do |tp|
 		if tp.content != "\302\240" && tp.content != "" && tp.content != "<br/>"
-			timing_point = tp.content.squeeze(" ").gsub("Bus Station"," Bus Station ").gsub(" Platform"," (Platform").gsub("  - "," - ").gsub("\n"," ").gsub("\r"," ").gsub("\t"," ").gsub("\\"," / ").gsub("/"," / ").gsub(",",", ").gsub("\302\240","").squeeze(" ").strip
+			timing_point = tp.content.squeeze(" ").gsub("Bus Station"," Bus Station ").gsub(" Platform"," (Platform").gsub("  - "," - ").gsub("\n"," ").gsub("\r"," ").gsub("\t"," ").gsub("\\"," / ").gsub("/"," / ").gsub(","," ").gsub("\302\240","").squeeze(" ").strip
 			if (tp.content.match('Platform'))
 			  timing_point.concat(")")
 			end;
+			if tp.to_s.match(/[0-9][0-9][0-9]/) or tp.to_s.include? "Wheelchair"
+			  timing_point = nil
+			end
 			timing_point
 		end
 	end
 	time_points.delete(nil)
-	time_points.delete("WheelchairAccessible")
-	time_points.delete("Wheelchair Accessible")
 	timetable["time_points"] = time_points.to_a
 	timetable["long_name"] = "To " + time_points.last
 	periodtimes = []
 	table.css('tr').each do |row|
 		times = row.css('td').map do |cell|
 			time = cell.content.squeeze(" ").strip
-			time = time.gsub(/ *A\S?M/,"a").gsub(/ ?P\S?M/,"p").gsub("12:08 AM","1208x").gsub(":","").gsub("1.","1").gsub("2.","2")
-			time = time.gsub("3.","3").gsub("4.","4")
+			time = time.gsub(/ *A\S?M/,"a").gsub(/ ?P\S?M/,"p").gsub(/ *a\S?m/,"a").gsub(/ ?p\S?m/,"p")
+			time = time.gsub("12:08 AM","1208x").gsub(":","").gsub("1.","1").gsub("2.","2").gsub("3.","3").gsub("4.","4")
 			time = time.gsub("5.","5").gsub("6.","6").gsub("7.","7").gsub("8.","8").gsub("9.","9").gsub("10.","10")
 			time = time.gsub("11.","11").gsub("12.","12").gsub(/\.+/,"-").gsub("\302\240","")
-			if time == "" then time = nil end 
+			if time == "" or time.include? "chool" or time.include? "On Race Days" or time.include? "Bus"
+				time = nil # This hacky way is faster than using position()>1 xpath on <TD>s!
+			end 
 			time
 		end
 		times.delete(nil)
-		if not times.empty?
+		if not times.empty? 
 			if not (route = times.shift)
 				raise("TODO: account for shifting route numbers eg. intertown/redex 62/162")
 			end
@@ -59,14 +62,14 @@
 	timetable
 end
 
-Dir.glob("source-html/Route*.htm*") { |file|
+Dir.glob("source-html/*oute*.htm*") { |file|
 	puts "Opened " + file
 	doc = Nokogiri::HTML(open(file))
 	# Search for nodes by css
 	timetables = []
 	short_name = "";
 	doc.xpath('//title').each do |title|
-		short_name = title.content.gsub("Route_","").gsub("Route ","").gsub(", ","/").gsub("ACTION Buses Timetable for ","").squeeze(" ").strip
+		short_name = title.content.gsub("Route_","").gsub("Route ","").gsub("route ","").gsub(", ","/").gsub("ACTION Buses Timetable for ","").squeeze(" ").strip
 	end
 	if short_name == ""
 		raise "Route number(s) not found in <title> tag"