|
#!/usr/bin/python2.5 |
|
# |
|
# Copyright 2007 Google Inc. All Rights Reserved. |
|
# |
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
# you may not use this file except in compliance with the License. |
|
# You may obtain a copy of the License at |
|
# |
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
# |
|
# Unless required by applicable law or agreed to in writing, software |
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
# See the License for the specific language governing permissions and |
|
# limitations under the License. |
|
|
|
"""A tool for merging two Google Transit feeds. |
|
|
|
Given two Google Transit feeds intending to cover two disjoint calendar |
|
intervals, this tool will attempt to produce a single feed by merging as much |
|
of the two feeds together as possible. |
|
|
|
For example, most stops remain the same throughout the year. Therefore, many |
|
of the stops given in stops.txt for the first feed represent the same stops |
|
given in the second feed. This tool will try to merge these stops so they |
|
only appear once in the resultant feed. |
|
|
|
A note on terminology: The first schedule is referred to as the "old" schedule; |
|
the second as the "new" schedule. The resultant schedule is referred to as |
|
the "merged" schedule. Names of things in the old schedule are variations of |
|
the letter "a" while names of things from the new schedule are variations of |
|
"b". The objects that represents routes, agencies and so on are called |
|
"entities". |
|
|
|
usage: merge.py [options] old_feed_path new_feed_path merged_feed_path |
|
|
|
Run merge.py --help for a list of the possible options. |
|
""" |
|
|
|
|
|
__author__ = 'timothy.stranex@gmail.com (Timothy Stranex)' |
|
|
|
|
|
import datetime |
|
import optparse |
|
import os |
|
import re |
|
import sys |
|
import time |
|
import transitfeed |
|
from transitfeed import util |
|
import webbrowser |
|
|
|
|
|
# TODO: |
|
# 1. write unit tests that use actual data |
|
# 2. write a proper trip and stop_times merger |
|
# 3. add a serialised access method for stop_times and shapes to transitfeed |
|
# 4. add support for merging schedules which have some service period overlap |
|
|
|
|
|
def ApproximateDistanceBetweenPoints(pa, pb): |
|
"""Finds the distance between two points on the Earth's surface. |
|
|
|
This is an approximate distance based on assuming that the Earth is a sphere. |
|
The points are specified by their lattitude and longitude. |
|
|
|
Args: |
|
pa: the first (lat, lon) point tuple |
|
pb: the second (lat, lon) point tuple |
|
|
|
Returns: |
|
The distance as a float in metres. |
|
""" |
|
alat, alon = pa |
|
blat, blon = pb |
|
sa = transitfeed.Stop(lat=alat, lng=alon) |
|
sb = transitfeed.Stop(lat=blat, lng=blon) |
|
return transitfeed.ApproximateDistanceBetweenStops(sa, sb) |
|
|
|
|
|
class Error(Exception): |
|
"""The base exception class for this module.""" |
|
|
|
|
|
class MergeError(Error): |
|
"""An error produced when two entities could not be merged.""" |
|
|
|
|
|
class MergeProblemWithContext(transitfeed.ExceptionWithContext): |
|
"""The base exception class for problem reporting in the merge module. |
|
|
|
Attributes: |
|
dataset_merger: The DataSetMerger that generated this problem. |
|
entity_type_name: The entity type of the dataset_merger. This is just |
|
dataset_merger.ENTITY_TYPE_NAME. |
|
ERROR_TEXT: The text used for generating the problem message. |
|
""" |
|
|
|
def __init__(self, dataset_merger, problem_type=transitfeed.TYPE_WARNING, |
|
**kwargs): |
|
"""Initialise the exception object. |
|
|
|
Args: |
|
dataset_merger: The DataSetMerger instance that generated this problem. |
|
problem_type: The problem severity. This should be set to one of the |
|
corresponding constants in transitfeed. |
|
kwargs: Keyword arguments to be saved as instance attributes. |
|
""" |
|
kwargs['type'] = problem_type |
|
kwargs['entity_type_name'] = dataset_merger.ENTITY_TYPE_NAME |
|
transitfeed.ExceptionWithContext.__init__(self, None, None, **kwargs) |
|
self.dataset_merger = dataset_merger |
|
|
|
def FormatContext(self): |
|
return "In files '%s'" % self.dataset_merger.FILE_NAME |
|
|
|
|
|
class SameIdButNotMerged(MergeProblemWithContext): |
|
ERROR_TEXT = ("There is a %(entity_type_name)s in the old feed with id " |
|
"'%(id)s' and one from the new feed with the same id but " |
|
"they could not be merged:") |
|
|
|
|
|
class CalendarsNotDisjoint(MergeProblemWithContext): |
|
ERROR_TEXT = ("The service periods could not be merged since they are not " |
|
"disjoint.") |
|
|
|
|
|
class MergeNotImplemented(MergeProblemWithContext): |
|
ERROR_TEXT = ("The feed merger does not currently support merging in this " |
|
"file. The entries have been duplicated instead.") |
|
|
|
|
|
class FareRulesBroken(MergeProblemWithContext): |
|
ERROR_TEXT = ("The feed merger is currently unable to handle fare rules " |
|
"properly.") |
|
|
|
|
|
class MergeProblemReporter(transitfeed.ProblemReporter): |
|
"""The base problem reporter class for the merge module.""" |
|
|
|
def __init__(self, accumulator): |
|
transitfeed.ProblemReporter.__init__(self, accumulator) |
|
|
|
def SameIdButNotMerged(self, dataset, entity_id, reason): |
|
self.AddToAccumulator( |
|
SameIdButNotMerged(dataset, id=entity_id, reason=reason)) |
|
|
|
def CalendarsNotDisjoint(self, dataset): |
|
self.AddToAccumulator( |
|
CalendarsNotDisjoint(dataset, problem_type=transitfeed.TYPE_ERROR)) |
|
|
|
def MergeNotImplemented(self, dataset): |
|
self.AddToAccumulator(MergeNotImplemented(dataset)) |
|
|
|
def FareRulesBroken(self, dataset): |
|
self.AddToAccumulator(FareRulesBroken(dataset)) |
|
|
|
|
|
class HTMLProblemAccumulator(transitfeed.ProblemAccumulatorInterface): |
|
"""A problem reporter which generates HTML output.""" |
|
|
|
def __init__(self): |
|
"""Initialise.""" |
|
self._dataset_warnings = {} # a map from DataSetMergers to their warnings |
|
self._dataset_errors = {} |
|
self._warning_count = 0 |
|
self._error_count = 0 |
|
|
|
def _Report(self, merge_problem): |
|
if merge_problem.IsWarning(): |
|
dataset_problems = self._dataset_warnings |
|
self._warning_count += 1 |
|
else: |
|
dataset_problems = self._dataset_errors |
|
self._error_count += 1 |
|
|
|
problem_html = '<li>%s</li>' % ( |
|
merge_problem.FormatProblem().replace('\n', '<br>')) |
|
dataset_problems.setdefault(merge_problem.dataset_merger, []).append( |
|
problem_html) |
|
|
|
def _GenerateStatsTable(self, feed_merger): |
|
"""Generate an HTML table of merge statistics. |
|
|
|
Args: |
|
feed_merger: The FeedMerger instance. |
|
|
|
Returns: |
|
The generated HTML as a string. |
|
""" |
|
rows = [] |
|
rows.append('<tr><th class="header"/><th class="header">Merged</th>' |
|
'<th class="header">Copied from old feed</th>' |
|
'<th class="header">Copied from new feed</th></tr>') |
|
for merger in feed_merger.GetMergerList(): |
|
stats = merger.GetMergeStats() |
|
if stats is None: |
|
continue |
|
merged, not_merged_a, not_merged_b = stats |
|
rows.append('<tr><th class="header">%s</th>' |
|
'<td class="header">%d</td>' |
|
'<td class="header">%d</td>' |
|
'<td class="header">%d</td></tr>' % |
|
(merger.DATASET_NAME, merged, not_merged_a, not_merged_b)) |
|
return '<table>%s</table>' % '\n'.join(rows) |
|
|
|
def _GenerateSection(self, problem_type): |
|
"""Generate a listing of the given type of problems. |
|
|
|
Args: |
|
problem_type: The type of problem. This is one of the problem type |
|
constants from transitfeed. |
|
|
|
Returns: |
|
The generated HTML as a string. |
|
""" |
|
if problem_type == transitfeed.TYPE_WARNING: |
|
dataset_problems = self._dataset_warnings |
|
heading = 'Warnings' |
|
else: |
|
dataset_problems = self._dataset_errors |
|
heading = 'Errors' |
|
|
|
if not dataset_problems: |
|
return '' |
|
|
|
prefix = '<h2 class="issueHeader">%s:</h2>' % heading |
|
dataset_sections = [] |
|
for dataset_merger, problems in dataset_problems.items(): |
|
dataset_sections.append('<h3>%s</h3><ol>%s</ol>' % ( |
|
dataset_merger.FILE_NAME, '\n'.join(problems))) |
|
body = '\n'.join(dataset_sections) |
|
return prefix + body |
|
|
|
def _GenerateSummary(self): |
|
"""Generate a summary of the warnings and errors. |
|
|
|
Returns: |
|
The generated HTML as a string. |
|
""" |
|
items = [] |
|
if self._dataset_errors: |
|
items.append('errors: %d' % self._error_count) |
|
if self._dataset_warnings: |
|
items.append('warnings: %d' % self._warning_count) |
|
|
|
if items: |
|
return '<p><span class="fail">%s</span></p>' % '<br>'.join(items) |
|
else: |
|
return '<p><span class="pass">feeds merged successfully</span></p>' |
|
|
|
def WriteOutput(self, output_file, feed_merger, |
|
old_feed_path, new_feed_path, merged_feed_path): |
|
"""Write the HTML output to a file. |
|
|
|
Args: |
|
output_file: The file object that the HTML output will be written to. |
|
feed_merger: The FeedMerger instance. |
|
old_feed_path: The path to the old feed file as a string. |
|
new_feed_path: The path to the new feed file as a string |
|
merged_feed_path: The path to the merged feed file as a string. This |
|
may be None if no merged feed was written. |
|
""" |
|
if merged_feed_path is None: |
|
html_merged_feed_path = '' |
|
else: |
|
html_merged_feed_path = '<p>Merged feed created: <code>%s</code></p>' % ( |
|
merged_feed_path) |
|
|
|
html_header = """<html> |
|
<head> |
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> |
|
<title>Feed Merger Results</title> |
|
<style> |
|
body {font-family: Georgia, serif; background-color: white} |
|
.path {color: gray} |
|
div.problem {max-width: 500px} |
|
td,th {background-color: khaki; padding: 2px; font-family:monospace} |
|
td.problem,th.problem {background-color: dc143c; color: white; padding: 2px; |
|
font-family:monospace} |
|
table {border-spacing: 5px 0px; margin-top: 3px} |
|
h3.issueHeader {padding-left: 1em} |
|
span.pass {background-color: lightgreen} |
|
span.fail {background-color: yellow} |
|
.pass, .fail {font-size: 16pt; padding: 3px} |
|
ol,.unused {padding-left: 40pt} |
|
.header {background-color: white; font-family: Georgia, serif; padding: 0px} |
|
th.header {text-align: right; font-weight: normal; color: gray} |
|
.footer {font-size: 10pt} |
|
</style> |
|
</head> |
|
<body> |
|
<h1>Feed merger results</h1> |
|
<p>Old feed: <code>%(old_feed_path)s</code></p> |
|
<p>New feed: <code>%(new_feed_path)s</code></p> |
|
%(html_merged_feed_path)s""" % locals() |
|
|
|
html_stats = self._GenerateStatsTable(feed_merger) |
|
html_summary = self._GenerateSummary() |
|
html_errors = self._GenerateSection(transitfeed.TYPE_ERROR) |
|
html_warnings = self._GenerateSection(transitfeed.TYPE_WARNING) |
|
|
|
html_footer = """ |
|
<div class="footer"> |
|
Generated using transitfeed version %s on %s. |
|
</div> |
|
</body> |
|
</html>""" % (transitfeed.__version__, |
|
time.strftime('%B %d, %Y at %I:%M %p %Z')) |
|
|
|
output_file.write(transitfeed.EncodeUnicode(html_header)) |
|
output_file.write(transitfeed.EncodeUnicode(html_stats)) |
|
output_file.write(transitfeed.EncodeUnicode(html_summary)) |
|
output_file.write(transitfeed.EncodeUnicode(html_errors)) |
|
output_file.write(transitfeed.EncodeUnicode(html_warnings)) |
|
output_file.write(transitfeed.EncodeUnicode(html_footer)) |
|
|
|
|
|
def LoadWithoutErrors(path, memory_db): |
|
""""Return a Schedule object loaded from path; sys.exit for any error.""" |
|
accumulator = transitfeed.ExceptionProblemAccumulator() |
|
loading_problem_handler = MergeProblemReporter(accumulator) |
|
try: |
|
schedule = transitfeed.Loader(path, |
|
memory_db=memory_db, |
|
problems=loading_problem_handler).Load() |
|
except transitfeed.ExceptionWithContext, e: |
|
print >>sys.stderr, ( |
|
"\n\nFeeds to merge must load without any errors.\n" |
|
"While loading %s the following error was found:\n%s\n%s\n" % |
|
(path, e.FormatContext(), transitfeed.EncodeUnicode(e.FormatProblem()))) |
|
sys.exit(1) |
|
return schedule |
|
|
|
|
|
class DataSetMerger(object): |
|
"""A DataSetMerger is in charge of merging a set of entities. |
|
|
|
This is an abstract class and should be subclassed for each different entity |
|
type. |
|
|
|
Attributes: |
|
ENTITY_TYPE_NAME: The name of the entity type like 'agency' or 'stop'. |
|
FILE_NAME: The name of the file containing this data set like 'agency.txt'. |
|
DATASET_NAME: A name for the dataset like 'Agencies' or 'Stops'. |
|
""" |
|
|
|
def __init__(self, feed_merger): |
|
"""Initialise. |
|
|
|
Args: |
|
feed_merger: The FeedMerger. |
|
""" |
|
self.feed_merger = feed_merger |
|
self._num_merged = 0 |
|
self._num_not_merged_a = 0 |
|
self._num_not_merged_b = 0 |
|
|
|
def _MergeIdentical(self, a, b): |
|
"""Tries to merge two values. The values are required to be identical. |
|
|
|
Args: |
|
a: The first value. |