require 'rubygems' | |
require 'nokogiri' | |
require 'open-uri' | |
require 'pp' | |
def makeTimetable(table, period, short_name) | |
timetable = {"stop_times" => [], "between_stops" => [], "short_name" => short_name} | |
time_points = table.xpath('tr[1]//th').map do |tp| | |
if tp.content != "\302\240" && tp.content != "" && tp.content != "<br/>" | |
timing_point = tp.content.squeeze(" ").gsub("\r\n Platform"," - Platform").gsub(" - "," - ").gsub("\n","").gsub("\r","").gsub("\\"," / ").strip | |
end | |
end | |
time_points.delete(nil) | |
timetable["time_points"] = time_points | |
timetable["long_name"] = "To " + time_points.last | |
periodtimes = [] | |
table.css('tr').each do |row| | |
times = row.css('td').map do |cell| | |
#TODO convert to GTFS time ie. replace " AM" with a | |
time = cell.content.squeeze(" ").strip | |
end | |
if not times.empty? | |
if not (route = times.shift) | |
raise("TODO: account for shifting route numbers eg. intertown/redex 62/162") | |
end | |
periodtimes << times | |
end | |
end | |
if periodtimes.size < 1 | |
raise "No times for route " + short_name + " in period " + period | |
end | |
timetable["stop_times"] = { period => periodtimes } | |
# pp timetable | |
filename = timetable["short_name"] + "-" + timetable["long_name"].downcase.gsub(" ","-").gsub("/","") + "." + period + ".yml" | |
puts "Saving " + filename | |
File.open("#{File.dirname(__FILE__)}/output/"+filename, "w") do |f| | |
f.write timetable.to_yaml | |
end | |
timetable | |
end | |
#TODO fix route 934 | |
Dir.glob("source-html/Route*.htm*") { |file| | |
puts "Opened " + file | |
doc = Nokogiri::HTML(open(file)) | |
# Search for nodes by css | |
timetables = [] | |
short_name = ""; | |
doc.xpath('//title').each do |title| | |
short_name = title.content.gsub("Route_","").gsub("Route ","").squeeze(" ").strip | |
end | |
if short_name == "" | |
raise "Route number(s) not found in <title> tag" | |
end | |
doc.xpath('//table[preceding::text()="Weekdays"]').each do |table| | |
timetables << makeTimetable(table, "weekday", short_name) | |
end | |
#weekends | |
doc.xpath('//table[preceding::text()="Saturdays" and following::a]').each do |table| | |
timetables << makeTimetable(table, "saturday", short_name) | |
end | |
doc.xpath('//table[preceding::text()="Sundays"]').each do |table| | |
timetables << makeTimetable(table, "sunday", short_name) | |
end | |
#930/934 special cases | |
doc.xpath('//table[preceding::text()="Saturday" and following::h2]').each do |table| | |
timetables << makeTimetable(table, "saturday", short_name) | |
end | |
doc.xpath('//table[preceding::text()="Sunday"]').each do |table| | |
timetables << makeTimetable(table, "sunday", short_name) | |
end | |
#route 81 = Weekdays - School Holidays Only | |
doc.xpath('//table[preceding::text()="Weekdays - School Holidays Only "]').each do |table| | |
timetable = makeTimetable(table, "weekday", short_name) | |
#TODO set active date range to only be holidays | |
timetables << timetable; | |
end | |
if timetables.size > 2 | |
puts "WARNING: " + file + " more than 2 timetables (weekend split?):" + timetables.size.to_s | |
end | |
if timetables.size < 2 | |
puts "WARNING: " + file + " less than 2 timetables (weekday loop service?):" + timetables.size.to_s | |
elsif not (timetables[0]["time_points"] - timetables[1]["time_points"].reverse).empty? | |
puts "WARNING: first pair of timetable timing points are not complementary for "+ file | |
pp(timetables[0]["time_points"] - timetables[1]["time_points"].reverse) | |
end | |
if timetables.size < 1 | |
raise "No timetables extracted from " + file | |
end | |
} | |
require 'rubygems' | |
require 'pp' | |
require 'yaml' | |
Dir.chdir("output") | |
def getTimePoints() | |
$time_points = [] | |
$time_points_sources = Hash.new([]) | |
Dir.glob("*.yml") { |file| | |
timetable = YAML::load_file(file) | |
$time_points = $time_points | timetable["time_points"] | |
timetable["time_points"].each do |timepoint| | |
$time_points_sources[timepoint] = $time_points_sources[timepoint] | [ file ] | |
end | |
} | |
end | |
getTimePoints() | |
pp $time_points.sort! | |
#pp $time_points_sources.sort | |
time_point_corrections = {"North Lynehamham" => "North Lyneham", | |
"Lathlain St Platform 2" => "Lathlain St Bus Station - Platform 2", | |
"Lathlain St Sation - Platform 5" => "Lathlain St Bus Station - Platform 5", | |
"Lathlain Steet Station" => "Lathlain St Bus Station", | |
"Lathlain St - Platform 3" => "Lathlain St Bus Station - Platform 3", | |
"Lathlain Steet Station - Platform 3" => "Lathlain St Bus Station - Platform 3", | |
"Lathlain St Station" => "Lathlain St Bus Station", | |
"Lathlain St Station - Platform 1" => "Lathlain St Bus Station - Platform 1", | |
"Lathlain St Station - Platform 2" => "Lathlain St Bus Station - Platform 2", | |
"Lathlain St Station - Platform 3" => "Lathlain St Bus Station - Platform 3", | |
"Lathlain St Station - Platform 4" => "Lathlain St Bus Station - Platform 4", | |
"Lathlain St Station - Platform 5" => "Lathlain St Bus Station - Platform 5", | |
"Lathlain St Station - Platform 6" => "Lathlain St Bus Station - Platform 6", | |
"Manuka Captain Cook" => "Manuka, Captain Cook", | |
"Flemington Rd, Sandford St" => "Flemington Rd/Sandford St", | |
"Erindale Centre / - Sternberg Crescent" => "Erindale Drive/Sternberg", | |
"Canberra Hospita" => "Canberra Hospital", | |
"Cohen Str Station - Platform 1" => "Cohen St Bus Station - Platform 1", | |
"Cohen Street Station" => "Cohen St Bus Station", | |
"Cohen Street Station - Platform 2" => "Cohen St Bus Station - Platform 2", | |
"Cohn St Station - Platform 3" => "Cohen St Bus Station - Platform 3", | |
"Cohen St Station" => "Cohen St Bus Station", | |
"Cohen St Station - Platform 1" => "Cohen St Bus Station - Platform 1", | |
"Cohen St Station - Platform 2" => "Cohen St Bus Station - Platform 2", | |
"Cohen St Station - Platform 3" => "Cohen St Bus Station - Platform 3", | |
"Cohen St Station - Platform 4" => "Cohen St Bus Station - Platform 4", | |
"Cohen St Station - Platform 5" => "Cohen St Bus Station - Platform 5", | |
"Cohen St Station - Platform 6" => "Cohen St Bus Station - Platform 6", | |
"City - Platform 7" => "City Interchange - Platform 7", | |
"Cameron Avenue Station" => "Cameron Ave Bus Station", | |
"Cameron Avenue Station - Platform 1" => "Cameron Ave Bus Station - Platform 1", | |
"Cameron Avenue Station - Platform 2" => "Cameron Ave Bus Station - Platform 2", | |
"Cameron Avenue Station - Platform 3" => "Cameron Ave Bus Station - Platform 3", | |
"Cameron Avenue Station - Platform 4" => "Cameron Ave Bus Station - Platform 4", | |
"Cameron Avenue Station - Platform 5" => "Cameron Ave Bus Station - Platform 5", | |
"Cameron Ave Station" => "Cameron Ave Bus Station", | |
"Cameron Ave Station - Platform 1" => "Cameron Ave Bus Station - Platform 1", | |
"Cameron Ave Station - Platform 2" => "Cameron Ave Bus Station - Platform 2", | |
"Cameron Ave Station - Platform 3" => "Cameron Ave Bus Station - Platform 3", | |
"Cameron Ave Station - Platform 4" => "Cameron Ave Bus Station - Platform 4", | |
"Cameron Ave Station - Platform 5" => "Cameron Ave Bus Station - Platform 5", | |
"Burton & Garranan Hall, Daley Road ANU" => "Burton & Garran Hall, Daley Road ANU", | |
"Burton & Garranan Hall,Daley Road ANU" => "Burton & Garran Hall, Daley Road ANU", | |
"Newcastle Street after Isa St" => "Newcastle / Isa Street Fyshwick", | |
"National Circ/Canberra Ave" => "National Circuit / Canberra Ave", | |
} | |
time_point_corrections.each do |wrong, right| | |
$time_points_sources[wrong].each do |wrongfile| | |
badtimetable = YAML::load_file(wrongfile) | |
badentrynumber = badtimetable["time_points"].index wrong | |
badtimetable["time_points"][badentrynumber] = right | |
puts "Corrected '" + wrong + "' to '" + right + "' in " + wrongfile | |
File.open(wrongfile, "w") do |f| | |
f.write badtimetable.to_yaml | |
end | |
end | |
end | |
getTimePoints() | |
pp $time_points.sort! | |
#!/usr/bin/ruby | |
require 'postgres' | |
require 'highline.rb' | |
include HighLine | |
require 'yaml' | |
require 'pp' | |
Dir.chdir("output") | |
def getTimePoints() | |
$time_points = [] | |
$time_points_sources = Hash.new([]) | |
Dir.glob("*.yml") { |file| | |
timetable = YAML::load_file(file) | |
$time_points = $time_points | timetable["time_points"] | |
timetable["time_points"].each do |timepoint| | |
$time_points_sources[timepoint] = $time_points_sources[timepoint] | [ file ] | |
end | |
} | |
end | |
getTimePoints() | |
$time_points.sort! | |
connbus = PGconn.connect("localhost", 5432, '', '', "bus", "postgres", | |
"snmc") | |
connosm = PGconn.connect("localhost", 5432, '', '', "openstreetmap", | |
"postgres", "snmc") | |
if ask_if("Insert Timing Point names to database?") | |
$time_points.each do |time_point| | |
begin | |
time_point = time_point.gsub(/\\/, '\&\&').gsub(/'/, "''") | |
res = connbus.exec("INSERT INTO timing_point (name) VALUES ('#{time_point}')") | |
puts "Put '#{time_point}' into DB" | |
rescue PGError => e | |
puts "Error inserting '#{time_point}' to DB #{e}" | |
#conn.close() if conn | |
end | |
end | |
end | |
if ask_if("Fill null Timing Points from OSM bus_stop database?") | |
begin | |
null_points = connbus.exec('SELECT name FROM timing_point WHERE lat IS null OR lng IS null;') | |
rescue PGError => e | |
puts "Error selecting null points from DB #{e}" | |
#conn.close() if conn | |
end | |
null_points.each do |null_point_name| | |
begin | |
name = null_point_name.to_s.gsub(/\\/, '\&\&').gsub(/'/, "''") | |
pp name | |
matching_nodes = connosm.exec("Select * FROM (SELECT * from current_node_tags, | |
(Select id as ctagid FROM current_node_tags WHERE v LIKE '%#{name}%') as a | |
where a.ctagid = current_node_tags.id) as ctags INNER JOIN current_nodes ON | |
ctags.id=current_nodes.id") | |
rescue PGError => e | |
puts "Error selecting matching bus stops from DB #{e}" | |
#conn.close() if conn | |
end | |
suggested_nodes = Hash.new() | |
matching_nodes.each do |matching_node_row| | |
#pp matching_node_row | |
# 0 = id | |
# 1 = k | |
# 2 = v | |
# 3,4 = redundant ids | |
# 5 = lat*100000 | |
# 6 = lng*100000 | |
suggested_node = suggested_nodes.fetch(matching_node_row[0], {'lat' => Float(matching_node_row[5])/10000000, | |
'lng' => Float(matching_node_row[6])/10000000}) | |
if matching_node_row[1] == "ref" | |
matching_node_row[1] = "loc_ref" | |
end | |
suggested_node[matching_node_row[1]] = matching_node_row[2] | |
suggested_nodes[matching_node_row[0]] = suggested_node | |
end | |
pp suggested_nodes | |
nodeID = ask("Enter selected node ID:", :string) | |
if suggested_nodes.has_key?(nodeID) | |
node = suggested_nodes.fetch(nodeID) | |
guess = ask_if("Is this a guess?") | |
puts "Location #{node["lat"]},#{node["lng"]} for #{null_point_name}" | |
begin | |
res = connbus.exec("UPDATE timing_point SET lat = #{node["lat"]*10000000}, lng = | |
#{node["lng"]*10000000},osm_node = #{nodeID}" + (node.has_key?("loc_ref") ? ",loc_ref = #{node["loc_ref"]}" : "") + ",guess = #{guess} WHERE name | |
= '#{name}'") | |
puts "Put '#{null_point_name}' into DB" | |
rescue PGError => e | |
puts "Error inserting '#{null_point_name}' to DB #{e}" | |
ask_if("Continue?") | |
#conn.close() if conn | |
end | |
else | |
puts "Uhh, there was no suggestion ID like that. Try again next time!" | |
end | |
end | |
end | |
if ask_if("Fill null Timing Points from geocoder?") | |
begin | |
null_points = connbus.exec('SELECT name FROM timing_point WHERE lat IS null OR lng IS null;') | |
rescue PGError => e | |
puts "Error selecting null points from DB #{e}" | |
#conn.close() if conn | |
end | |
null_points.each do |null_point_name| | |
pp null_point_name | |
end | |
end | |
module HighLine | |
# prompt = text to display | |
# type can be one of :string, :integer, :float, :bool or a proc | |
# if it's a proc then it is called with the entered string. If the input | |
# cannot be converted then it should throw an exception | |
# if type == :bool then y,yes are converted to true. n,no are converted to | |
# false. All other values are rejected. | |
# | |
# options should be a hash of validation options | |
# :validate => regular expresion or proc | |
# if validate is a regular expression then the input is matched against it | |
# if it's a proc then the proc is called and the input is accepted if it | |
# returns true | |
# :between => range | |
# the input is checked if it lies within the range | |
# :above => value | |
# the input is checked if it is above the value | |
# :below => value | |
# the input is checked if it is less than the value | |
# :default => string | |
# if the user doesn't enter a value then the default value is returned | |
# :base => [b, o, d, x] | |
# when asking for integers this will take a number in binary, octal, | |
# decimal or hexadecimal | |
def ask(prompt, type, options=nil) | |
begin | |
valid = true | |
default = option(options, :default) | |
if default | |
defaultstr = " |#{default}|" | |
else | |
defaultstr = "" | |
end | |
base = option(options, :base) | |
print prompt, "#{defaultstr} " | |
$stdout.flush | |
input = gets.chomp | |
if default && input == "" | |
input = default | |
end | |
#comvert the input to the correct type | |
input = case type | |
when :string: input | |
when :integer: convert(input, base) rescue valid = false | |
when :float: Float(input) rescue valid = false | |
when :bool | |
valid = input =~ /^(y|n|yes|no)$/ | |
input[0] == ?y | |
when Proc: input = type.call(input) rescue valid = false | |
end | |
#validate the input | |
valid &&= validate(options, :validate) do |test| | |
case test | |
when Regexp: input =~ test | |
when Proc: test.call(input) | |
end | |
end | |
valid &&= validate(options, :within) { |range| range === input} | |
valid &&= validate(options, :above) { |value| input > value} | |
valid &&= validate(options, :below) { |value| input < value} | |
puts "Not a valid value" unless valid | |
end until valid | |
return input | |
end | |
#asks a yes/no question | |
def ask_if(prompt) | |
ask(prompt, :bool) | |
end | |
private | |
#extracts a key from the options hash | |
def option(options, key) | |
result = nil | |
if options && options.key?(key) | |
result = options[key] | |
end | |
result | |
end | |
#helper function for validation | |
def validate(options, key) | |
result = true | |
if options && options.key?(key) | |
result = yield options[key] | |
end | |
result | |
end | |
#converts a string to an integer | |
#input = the value to convert | |
#base = the numeric base of the value b,o,d,x | |
def convert(input, base) | |
if base | |
if ["b", "o", "d" |