require 'rubygems' | require 'rubygems' |
require 'nokogiri' | require 'nokogiri' |
require 'open-uri' | require 'open-uri' |
require 'pp' | require 'pp' |
require 'yaml' | |
class Array | |
def to_yaml_style | |
:inline | |
end | |
end | |
def makeTimetable(table, period, short_name) | def makeTimetable(table, period, short_name) |
timetable = {"stop_times" => [], "between_stops" => [], "short_name" => short_name} | timetable = {"stop_times" => [], "between_stops" => [], "short_name" => short_name} |
time_points = table.xpath('tr[1]//th').map do |tp| | time_points = table.xpath('tr[1]//th').map do |tp| |
if tp.content != "\302\240" && tp.content != "" && tp.content != "<br/>" | if tp.content != "\302\240" && tp.content != "" && tp.content != "<br/>" |
timing_point = tp.content.squeeze(" ").gsub("\r\n Platform"," - Platform").gsub(" - "," - ").gsub("\n","").gsub("\r","").gsub("\\"," / ").strip | timing_point = tp.content.squeeze(" ").gsub("\r\n Platform"," - Platform").gsub(" - "," - ").gsub("\n","").gsub("\r","").gsub("\\"," / ").strip |
end | end |
end | end |
time_points.delete(nil) | time_points.delete(nil) |
timetable["time_points"] = time_points | timetable["time_points"] = time_points.to_a |
timetable["long_name"] = "To " + time_points.last | timetable["long_name"] = "To " + time_points.last |
periodtimes = [] | periodtimes = [] |
table.css('tr').each do |row| | table.css('tr').each do |row| |
times = row.css('td').map do |cell| | times = row.css('td').map do |cell| |
#TODO convert to GTFS time ie. replace " AM" with a | #TODO convert to GTFS time ie. replace " AM" with a |
time = cell.content.squeeze(" ").strip | time = cell.content.squeeze(" ").strip |
end | end |
if not times.empty? | if not times.empty? |
if not (route = times.shift) | if not (route = times.shift) |
raise("TODO: account for shifting route numbers eg. intertown/redex 62/162") | raise("TODO: account for shifting route numbers eg. intertown/redex 62/162") |
end | end |
periodtimes << times | periodtimes << times.to_a |
end | end |
end | end |
if periodtimes.size < 1 | if periodtimes.size < 1 |
raise "No times for route " + short_name + " in period " + period | raise "No times for route " + short_name + " in period " + period |
end | end |
timetable["stop_times"] = { period => periodtimes } | timetable["stop_times"] = { period => periodtimes.to_a } |
# pp timetable | # pp timetable |
filename = timetable["short_name"] + "-" + timetable["long_name"].downcase.gsub(" ","-").gsub("/","") + "." + period + ".yml" | filename = timetable["short_name"] + "-" + timetable["long_name"].downcase.gsub(" ","-").gsub("/","") + "." + period + ".yml" |
puts "Saving " + filename | puts "Saving " + filename |
File.open("#{File.dirname(__FILE__)}/output/"+filename, "w") do |f| | File.open("#{File.dirname(__FILE__)}/output/"+filename, "w") do |f| |
f.write timetable.to_yaml | f.write timetable.to_yaml |
end | end |
timetable | timetable |
end | end |
#TODO fix route 934 | #TODO fix route 934 |
Dir.glob("source-html/Route*.htm*") { |file| | Dir.glob("source-html/Route*.htm*") { |file| |
puts "Opened " + file | puts "Opened " + file |
doc = Nokogiri::HTML(open(file)) | doc = Nokogiri::HTML(open(file)) |
# Search for nodes by css | # Search for nodes by css |
timetables = [] | timetables = [] |
short_name = ""; | short_name = ""; |
doc.xpath('//title').each do |title| | doc.xpath('//title').each do |title| |
short_name = title.content.gsub("Route_","").gsub("Route ","").squeeze(" ").strip | short_name = title.content.gsub("Route_","").gsub("Route ","").squeeze(" ").strip |
end | end |
if short_name == "" | if short_name == "" |
raise "Route number(s) not found in <title> tag" | raise "Route number(s) not found in <title> tag" |
end | end |
doc.xpath('//table[preceding::text()="Weekdays"]').each do |table| | doc.xpath('//table[preceding::text()="Weekdays"]').each do |table| |
timetables << makeTimetable(table, "weekday", short_name) | timetables << makeTimetable(table, "weekday", short_name) |
end | end |
#weekends | #weekends |
doc.xpath('//table[preceding::text()="Saturdays" and following::a]').each do |table| | doc.xpath('//table[preceding::text()="Saturdays" and following::a]').each do |table| |
timetables << makeTimetable(table, "saturday", short_name) | timetables << makeTimetable(table, "saturday", short_name) |
end | end |
doc.xpath('//table[preceding::text()="Sundays"]').each do |table| | doc.xpath('//table[preceding::text()="Sundays"]').each do |table| |
timetables << makeTimetable(table, "sunday", short_name) | timetables << makeTimetable(table, "sunday", short_name) |
end | end |
#930/934 special cases | #930/934 special cases |
doc.xpath('//table[preceding::text()="Saturday" and following::h2]').each do |table| | doc.xpath('//table[preceding::text()="Saturday" and following::h2]').each do |table| |
timetables << makeTimetable(table, "saturday", short_name) | timetables << makeTimetable(table, "saturday", short_name) |
end | end |
doc.xpath('//table[preceding::text()="Sunday"]').each do |table| | doc.xpath('//table[preceding::text()="Sunday"]').each do |table| |
timetables << makeTimetable(table, "sunday", short_name) | timetables << makeTimetable(table, "sunday", short_name) |
end | end |
#route 81 = Weekdays - School Holidays Only | #route 81 = Weekdays - School Holidays Only |
doc.xpath('//table[preceding::text()="Weekdays - School Holidays Only "]').each do |table| | doc.xpath('//table[preceding::text()="Weekdays - School Holidays Only "]').each do |table| |
timetable = makeTimetable(table, "weekday", short_name) | timetable = makeTimetable(table, "weekday", short_name) |
#TODO set active date range to only be holidays | #TODO set active date range to only be holidays |
timetables << timetable; | timetables << timetable; |
end | end |
if timetables.size > 2 | if timetables.size > 2 |
puts "WARNING: " + file + " more than 2 timetables (weekend split?):" + timetables.size.to_s | puts "WARNING: " + file + " more than 2 timetables (weekend split?):" + timetables.size.to_s |
end | end |
if timetables.size < 2 | if timetables.size < 2 |
puts "WARNING: " + file + " less than 2 timetables (weekday loop service?):" + timetables.size.to_s | puts "WARNING: " + file + " less than 2 timetables (weekday loop service?):" + timetables.size.to_s |
elsif not (timetables[0]["time_points"] - timetables[1]["time_points"].reverse).empty? | elsif not (timetables[0]["time_points"] - timetables[1]["time_points"].reverse).empty? |
puts "WARNING: first pair of timetable timing points are not complementary for "+ file | puts "WARNING: first pair of timetable timing points are not complementary for "+ file |
pp(timetables[0]["time_points"] - timetables[1]["time_points"].reverse) | pp(timetables[0]["time_points"] - timetables[1]["time_points"].reverse) |
end | end |
if timetables.size < 1 | if timetables.size < 1 |
raise "No timetables extracted from " + file | raise "No timetables extracted from " + file |
end | end |
} | } |
require 'rubygems' | require 'rubygems' |
require 'pp' | require 'pp' |
require 'yaml' | require 'yaml' |
Dir.chdir("output") | Dir.chdir("output") |
def getTimePoints() | def getTimePoints() |
$time_points = [] | $time_points = [] |
$time_points_sources = Hash.new([]) | $time_points_sources = Hash.new([]) |
Dir.glob("*.yml") { |file| | Dir.glob("*.yml") { |file| |
timetable = YAML::load_file(file) | timetable = YAML::load_file(file) |
$time_points = $time_points | timetable["time_points"] | $time_points = $time_points | timetable["time_points"] |
timetable["time_points"].each do |timepoint| | timetable["time_points"].each do |timepoint| |
$time_points_sources[timepoint] = $time_points_sources[timepoint] | [ file ] | $time_points_sources[timepoint] = $time_points_sources[timepoint] | [ file ] |
end | end |
} | } |
end | end |
getTimePoints() | getTimePoints() |
pp $time_points.sort! | pp $time_points.sort! |
#pp $time_points_sources.sort | #pp $time_points_sources.sort |
time_point_corrections = {"North Lynehamham" => "North Lyneham", | time_point_corrections = {"North Lynehamham" => "North Lyneham", |
"Lathlain St Platform 2" => "Lathlain St Bus Station - Platform 2", | "Lathlain St Platform 2" => "Lathlain St Bus Station - Platform 2", |
"Lathlain St Sation - Platform 5" => "Lathlain St Bus Station - Platform 5", | "Lathlain St Sation - Platform 5" => "Lathlain St Bus Station - Platform 5", |
"Lathlain Steet Station" => "Lathlain St Bus Station", | "Lathlain Steet Station" => "Lathlain St Bus Station", |
"Lathlain St - Platform 3" => "Lathlain St Bus Station - Platform 3", | "Lathlain St - Platform 3" => "Lathlain St Bus Station - Platform 3", |
"Lathlain Steet Station - Platform 3" => "Lathlain St Bus Station - Platform 3", | "Lathlain Steet Station - Platform 3" => "Lathlain St Bus Station - Platform 3", |
"Lathlain St Station" => "Lathlain St Bus Station", | "Lathlain St Station" => "Lathlain St Bus Station", |
"Lathlain St Station - Platform 1" => "Lathlain St Bus Station - Platform 1", | "Lathlain St Station - Platform 1" => "Lathlain St Bus Station - Platform 1", |
"Lathlain St Station - Platform 2" => "Lathlain St Bus Station - Platform 2", | "Lathlain St Station - Platform 2" => "Lathlain St Bus Station - Platform 2", |
"Lathlain St Station - Platform 3" => "Lathlain St Bus Station - Platform 3", | "Lathlain St Station - Platform 3" => "Lathlain St Bus Station - Platform 3", |
"Lathlain St Station - Platform 4" => "Lathlain St Bus Station - Platform 4", | "Lathlain St Station - Platform 4" => "Lathlain St Bus Station - Platform 4", |
"Lathlain St Station - Platform 5" => "Lathlain St Bus Station - Platform 5", | "Lathlain St Station - Platform 5" => "Lathlain St Bus Station - Platform 5", |
"Lathlain St Station - Platform 6" => "Lathlain St Bus Station - Platform 6", | "Lathlain St Station - Platform 6" => "Lathlain St Bus Station - Platform 6", |
"Manuka Captain Cook" => "Manuka, Captain Cook", | "Manuka Captain Cook" => "Manuka, Captain Cook", |
"Flemington Rd, Sandford St" => "Flemington Rd/Sandford St", | "Flemington Rd, Sandford St" => "Flemington Rd/Sandford St", |
"Erindale Centre / - Sternberg Crescent" => "Erindale Drive/Sternberg", | "Erindale Centre / - Sternberg Crescent" => "Erindale Drive/Sternberg", |
"Canberra Hospita" => "Canberra Hospital", | "Canberra Hospita" => "Canberra Hospital", |
"Cohen Str Station - Platform 1" => "Cohen St Bus Station - Platform 1", | "Cohen Str Station - Platform 1" => "Cohen St Bus Station - Platform 1", |
"Cohen Street Station" => "Cohen St Bus Station", | "Cohen Street Station" => "Cohen St Bus Station", |
"Cohen Street Station - Platform 2" => "Cohen St Bus Station - Platform 2", | "Cohen Street Station - Platform 2" => "Cohen St Bus Station - Platform 2", |
"Cohn St Station - Platform 3" => "Cohen St Bus Station - Platform 3", | "Cohn St Station - Platform 3" => "Cohen St Bus Station - Platform 3", |
"Cohen St Station" => "Cohen St Bus Station", | "Cohen St Station" => "Cohen St Bus Station", |
"Cohen St Station - Platform 1" => "Cohen St Bus Station - Platform 1", | "Cohen St Station - Platform 1" => "Cohen St Bus Station - Platform 1", |
"Cohen St Station - Platform 2" => "Cohen St Bus Station - Platform 2", | "Cohen St Station - Platform 2" => "Cohen St Bus Station - Platform 2", |
"Cohen St Station - Platform 3" => "Cohen St Bus Station - Platform 3", | "Cohen St Station - Platform 3" => "Cohen St Bus Station - Platform 3", |
"Cohen St Station - Platform 4" => "Cohen St Bus Station - Platform 4", | "Cohen St Station - Platform 4" => "Cohen St Bus Station - Platform 4", |
"Cohen St Station - Platform 5" => "Cohen St Bus Station - Platform 5", | "Cohen St Station - Platform 5" => "Cohen St Bus Station - Platform 5", |
"Cohen St Station - Platform 6" => "Cohen St Bus Station - Platform 6", | "Cohen St Station - Platform 6" => "Cohen St Bus Station - Platform 6", |
"City - Platform 7" => "City Interchange - Platform 7", | "City - Platform 7" => "City Interchange - Platform 7", |
"Cameron Avenue Station" => "Cameron Ave Bus Station", | "Cameron Avenue Station" => "Cameron Ave Bus Station", |
"Cameron Avenue Station - Platform 1" => "Cameron Ave Bus Station - Platform 1", | "Cameron Avenue Station - Platform 1" => "Cameron Ave Bus Station - Platform 1", |
"Cameron Avenue Station - Platform 2" => "Cameron Ave Bus Station - Platform 2", | "Cameron Avenue Station - Platform 2" => "Cameron Ave Bus Station - Platform 2", |
"Cameron Avenue Station - Platform 3" => "Cameron Ave Bus Station - Platform 3", | "Cameron Avenue Station - Platform 3" => "Cameron Ave Bus Station - Platform 3", |
"Cameron Avenue Station - Platform 4" => "Cameron Ave Bus Station - Platform 4", | "Cameron Avenue Station - Platform 4" => "Cameron Ave Bus Station - Platform 4", |
"Cameron Avenue Station - Platform 5" => "Cameron Ave Bus Station - Platform 5", | "Cameron Avenue Station - Platform 5" => "Cameron Ave Bus Station - Platform 5", |
"Cameron Ave Station" => "Cameron Ave Bus Station", | "Cameron Ave Station" => "Cameron Ave Bus Station", |
"Cameron Ave Station - Platform 1" => "Cameron Ave Bus Station - Platform 1", | "Cameron Ave Station - Platform 1" => "Cameron Ave Bus Station - Platform 1", |
"Cameron Ave Station - Platform 2" => "Cameron Ave Bus Station - Platform 2", | "Cameron Ave Station - Platform 2" => "Cameron Ave Bus Station - Platform 2", |
"Cameron Ave Station - Platform 3" => "Cameron Ave Bus Station - Platform 3", | "Cameron Ave Station - Platform 3" => "Cameron Ave Bus Station - Platform 3", |
"Cameron Ave Station - Platform 4" => "Cameron Ave Bus Station - Platform 4", | "Cameron Ave Station - Platform 4" => "Cameron Ave Bus Station - Platform 4", |
"Cameron Ave Station - Platform 5" => "Cameron Ave Bus Station - Platform 5", | "Cameron Ave Station - Platform 5" => "Cameron Ave Bus Station - Platform 5", |
"Burton & Garranan Hall, Daley Road ANU" => "Burton & Garran Hall, Daley Road ANU", | "Burton & Garranan Hall, Daley Road ANU" => "Burton & Garran Hall, Daley Road ANU", |
"Burton & Garranan Hall,Daley Road ANU" => "Burton & Garran Hall, Daley Road ANU", | "Burton & Garranan Hall,Daley Road ANU" => "Burton & Garran Hall, Daley Road ANU", |
"Garran/Daley Rd" => "Burton & Garran Hall, Daley Road ANU", | |
"Kingstons Ave/National Crt" => "Kings Ave/National Crt", | |
"Newcastle Street after Isa St" => "Newcastle / Isa Street Fyshwick", | "Newcastle Street after Isa St" => "Newcastle / Isa Street Fyshwick", |
"National Circ/Canberra Ave" => "National Circuit / Canberra Ave", | "National Circ/Canberra Ave" => "National Circuit / Canberra Ave", |
"St Clare of Conder" => "St Clare of Assisi Primary", | |
"McKillop College Isabella Campus" => "MacKillop College Isabella Campus", | |
"Outrim / Duggan" => "Outtrim / Duggan", | |
} | } |
time_point_corrections.each do |wrong, right| | time_point_corrections.each do |wrong, right| |
$time_points_sources[wrong].each do |wrongfile| | $time_points_sources[wrong].each do |wrongfile| |
badtimetable = YAML::load_file(wrongfile) | badtimetable = YAML::load_file(wrongfile) |
badentrynumber = badtimetable["time_points"].index wrong | badentrynumber = badtimetable["time_points"].index wrong |
badtimetable["time_points"][badentrynumber] = right | badtimetable["time_points"][badentrynumber] = right |
puts "Corrected '" + wrong + "' to '" + right + "' in " + wrongfile | puts "Corrected '" + wrong + "' to '" + right + "' in " + wrongfile |
File.open(wrongfile, "w") do |f| | File.open(wrongfile, "w") do |f| |
f.write badtimetable.to_yaml | f.write badtimetable.to_yaml |
end | end |
end | end |
end | end |
getTimePoints() | getTimePoints() |
pp $time_points.sort! | pp $time_points.sort! |
#!/usr/bin/ruby | #!/usr/bin/ruby |
require 'postgres' | require 'postgres' |
require 'highline.rb' | require 'highline.rb' |
include HighLine | include HighLine |
require 'rubygems' | require 'rubygems' |
require 'json' | require 'json' |
require 'net/http' | require 'net/http' |
def cbr_geocode(query) | def cbr_geocode(query) |
base_url = "http://geocoding.cloudmade.com/daa03470bb8740298d4b10e3f03d63e6/geocoding/v2/find.js?query=" | base_url = "http://geocoding.cloudmade.com/daa03470bb8740298d4b10e3f03d63e6/geocoding/v2/find.js?query=" |
url = "#{base_url}#{URI.encode(query)}&bbox=-35.47,148.83,-35.16,149.25&return_location=true" | url = "#{base_url}#{URI.encode(query)}&bbox=-35.47,148.83,-35.16,149.25&return_location=true" |
resp = Net::HTTP.get_response(URI.parse(url)) | resp = Net::HTTP.get_response(URI.parse(url)) |
data = resp.body | data = resp.body |
# we convert the returned JSON data to native Ruby | # we convert the returned JSON data to native Ruby |
# data structure - a hash | # data structure - a hash |
result = JSON.parse(data) | result = JSON.parse(data) |
# if the hash has 'Error' as a key, we raise an error | # if the hash has 'Error' as a key, we raise an error |
if result.has_key? 'Error' | if result.has_key? 'Error' |
raise "web service error" | raise "web service error" |
end | end |
return result | return result |
end | |
class Array | |
def find_dups | |
inject(Hash.new(0)) { |h,e| h[e] += 1; h }.select { |k,v| v > 1 }.collect { |x| x.first } | |
end | |
end | end |
require 'yaml' | require 'yaml' |
require 'pp' | require 'pp' |
Dir.chdir("output") | Dir.chdir("output") |
def getTimePoints() | def getTimePoints() |
$time_points = [] | $time_points = [] |
$time_points_sources = Hash.new([]) | $time_points_sources = Hash.new([]) |
Dir.glob("*.yml") { |file| | Dir.glob("*.yml") { |file| |
timetable = YAML::load_file(file) | timetable = YAML::load_file(file) |
$time_points = $time_points | timetable["time_points"] | $time_points = $time_points | timetable["time_points"] |
timetable["time_points"].each do |timepoint| | timetable["time_points"].each do |timepoint| |
$time_points_sources[timepoint] = $time_points_sources[timepoint] | [ file ] | $time_points_sources[timepoint] = $time_points_sources[timepoint] | [ file ] |
end | end |
} | } |
end | end |
getTimePoints() | getTimePoints() |
$time_points.sort! | $time_points.sort! |
connbus = PGconn.connect("localhost", 5432, '', '', "bus", "postgres", | connbus = PGconn.connect("localhost", 5432, '', '', "bus", "postgres", |
"snmc") | "snmc") |
connosm = PGconn.connect("localhost", 5432, '', '', "openstreetmap", | connosm = PGconn.connect("localhost", 5432, '', '', "openstreetmap", |
"postgres", "snmc") | "postgres", "snmc") |
if ask_if("Insert Timing Point names to database?") | if ask_if("Insert Timing Point names to database?") |
$time_points.each do |time_point| | $time_points.each do |time_point| |
begin | begin |
time_point = time_point.gsub(/\\/, '\&\&').gsub(/'/, "''").gsub("St", "%") | time_point = time_point.gsub(/\\/, '\&\&').gsub(/'/, "''").gsub("St", "%") |
res = connbus.exec("INSERT INTO timing_point (name) VALUES ('#{time_point}')") | res = connbus.exec("INSERT INTO timing_point (name) VALUES ('#{time_point}')") |
puts "Put '#{time_point}' into DB" | puts "Put '#{time_point}' into DB" |
rescue PGError => e | rescue PGError => e |
puts "Error inserting '#{time_point}' to DB #{e}" | puts "Error inserting '#{time_point}' to DB #{e}" |
#conn.close() if conn | #conn.close() if conn |
end | end |
end | end |
end | end |
if ask_if("Fill null Timing Points from OSM bus_stop database?") | if ask_if("Fill null Timing Points from OSM bus_stop database?") |
# TODO Where there's a "Cnr" or a \/ or a &, Look for 2 ways or nodes and average the closest two! | |
begin | begin |
null_points = connbus.exec('SELECT name FROM timing_point WHERE lat IS null OR lng IS null;') | null_points = connbus.exec('SELECT name FROM timing_point WHERE lat IS null OR lng IS null;') |
rescue PGError => e | rescue PGError => e |
puts "Error selecting null points from DB #{e}" | puts "Error selecting null points from DB #{e}" |
#conn.close() if conn | #conn.close() if conn |
end | end |
null_points.each do |null_point_name| | null_points.each do |null_point_name| |
begin | begin |
name = null_point_name.to_s.gsub(/\\/, '\&\&').gsub(/'/, "''") | name = null_point_name.to_s.gsub(/\\/, '\&\&').gsub(/'/, "''") |
pp name | pp name |
search_name = ask("Hmm, if we're still looking, the name is probably wrong. What's the right name?", :string, :default => name) | |
matching_nodes = connosm.exec("Select * FROM (SELECT * from current_node_tags, | matching_nodes = connosm.exec("Select * FROM (SELECT * from current_node_tags, |
(Select id as ctagid FROM current_node_tags WHERE v LIKE '%#{name}%') as a | (Select id as ctagid FROM current_node_tags WHERE v LIKE '%#{search_name}%') as a |
where a.ctagid = current_node_tags.id) as ctags INNER JOIN current_nodes ON | where a.ctagid = current_node_tags.id) as ctags INNER JOIN current_nodes ON |
ctags.id=current_nodes.id") | ctags.id=current_nodes.id") |
rescue PGError => e | rescue PGError => e |
puts "Error selecting matching bus stops from DB #{e}" | puts "Error selecting matching bus stops from DB #{e}" |
#conn.close() if conn | #conn.close() if conn |
end | end |
suggested_nodes = Hash.new() | suggested_nodes = Hash.new() |
matching_nodes.each do |matching_node_row| | matching_nodes.each do |matching_node_row| |
#pp matching_node_row | #pp matching_node_row |
# 0 = id | # 0 = id |
# 1 = k | # 1 = k |
# 2 = v | # 2 = v |
# 3,4 = redundant ids | # 3,4 = redundant ids |
# 5 = lat*100000 | # 5 = lat*100000 |
# 6 = lng*100000 | # 6 = lng*100000 |
suggested_node = suggested_nodes.fetch(matching_node_row[0], {'lat' => Float(matching_node_row[5])/10000000, | suggested_node = suggested_nodes.fetch(matching_node_row[0], {'lat' => Float(matching_node_row[5])/10000000, |
'lng' => Float(matching_node_row[6])/10000000}) | 'lng' => Float(matching_node_row[6])/10000000}) |