--- a/origin-src/transitfeed-1.2.6/merge.py +++ b/origin-src/transitfeed-1.2.6/merge.py @@ -1,1 +1,1830 @@ - +#!/usr/bin/python2.5 +# +# Copyright 2007 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A tool for merging two Google Transit feeds. + +Given two Google Transit feeds intending to cover two disjoint calendar +intervals, this tool will attempt to produce a single feed by merging as much +of the two feeds together as possible. + +For example, most stops remain the same throughout the year. Therefore, many +of the stops given in stops.txt for the first feed represent the same stops +given in the second feed. This tool will try to merge these stops so they +only appear once in the resultant feed. + +A note on terminology: The first schedule is referred to as the "old" schedule; +the second as the "new" schedule. The resultant schedule is referred to as +the "merged" schedule. Names of things in the old schedule are variations of +the letter "a" while names of things from the new schedule are variations of +"b". The objects that represents routes, agencies and so on are called +"entities". + +usage: merge.py [options] old_feed_path new_feed_path merged_feed_path + +Run merge.py --help for a list of the possible options. +""" + + +__author__ = 'timothy.stranex@gmail.com (Timothy Stranex)' + + +import datetime +import optparse +import os +import re +import sys +import time +import transitfeed +from transitfeed import util +import webbrowser + + +# TODO: +# 1. write unit tests that use actual data +# 2. write a proper trip and stop_times merger +# 3. add a serialised access method for stop_times and shapes to transitfeed +# 4. add support for merging schedules which have some service period overlap + + +def ApproximateDistanceBetweenPoints(pa, pb): + """Finds the distance between two points on the Earth's surface. + + This is an approximate distance based on assuming that the Earth is a sphere. + The points are specified by their lattitude and longitude. + + Args: + pa: the first (lat, lon) point tuple + pb: the second (lat, lon) point tuple + + Returns: + The distance as a float in metres. + """ + alat, alon = pa + blat, blon = pb + sa = transitfeed.Stop(lat=alat, lng=alon) + sb = transitfeed.Stop(lat=blat, lng=blon) + return transitfeed.ApproximateDistanceBetweenStops(sa, sb) + + +class Error(Exception): + """The base exception class for this module.""" + + +class MergeError(Error): + """An error produced when two entities could not be merged.""" + + +class MergeProblemWithContext(transitfeed.ExceptionWithContext): + """The base exception class for problem reporting in the merge module. + + Attributes: + dataset_merger: The DataSetMerger that generated this problem. + entity_type_name: The entity type of the dataset_merger. This is just + dataset_merger.ENTITY_TYPE_NAME. + ERROR_TEXT: The text used for generating the problem message. + """ + + def __init__(self, dataset_merger, problem_type=transitfeed.TYPE_WARNING, + **kwargs): + """Initialise the exception object. + + Args: + dataset_merger: The DataSetMerger instance that generated this problem. + problem_type: The problem severity. This should be set to one of the + corresponding constants in transitfeed. + kwargs: Keyword arguments to be saved as instance attributes. + """ + kwargs['type'] = problem_type + kwargs['entity_type_name'] = dataset_merger.ENTITY_TYPE_NAME + transitfeed.ExceptionWithContext.__init__(self, None, None, **kwargs) + self.dataset_merger = dataset_merger + + def FormatContext(self): + return "In files '%s'" % self.dataset_merger.FILE_NAME + + +class SameIdButNotMerged(MergeProblemWithContext): + ERROR_TEXT = ("There is a %(entity_type_name)s in the old feed with id " + "'%(id)s' and one from the new feed with the same id but " + "they could not be merged:") + + +class CalendarsNotDisjoint(MergeProblemWithContext): + ERROR_TEXT = ("The service periods could not be merged since they are not " + "disjoint.") + + +class MergeNotImplemented(MergeProblemWithContext): + ERROR_TEXT = ("The feed merger does not currently support merging in this " + "file. The entries have been duplicated instead.") + + +class FareRulesBroken(MergeProblemWithContext): + ERROR_TEXT = ("The feed merger is currently unable to handle fare rules " + "properly.") + + +class MergeProblemReporter(transitfeed.ProblemReporter): + """The base problem reporter class for the merge module.""" + + def __init__(self, accumulator): + transitfeed.ProblemReporter.__init__(self, accumulator) + + def SameIdButNotMerged(self, dataset, entity_id, reason): + self.AddToAccumulator( + SameIdButNotMerged(dataset, id=entity_id, reason=reason)) + + def CalendarsNotDisjoint(self, dataset): + self.AddToAccumulator( + CalendarsNotDisjoint(dataset, problem_type=transitfeed.TYPE_ERROR)) + + def MergeNotImplemented(self, dataset): + self.AddToAccumulator(MergeNotImplemented(dataset)) + + def FareRulesBroken(self, dataset): + self.AddToAccumulator(FareRulesBroken(dataset)) + + +class HTMLProblemAccumulator(transitfeed.ProblemAccumulatorInterface): + """A problem reporter which generates HTML output.""" + + def __init__(self): + """Initialise.""" + self._dataset_warnings = {} # a map from DataSetMergers to their warnings + self._dataset_errors = {} + self._warning_count = 0 + self._error_count = 0 + + def _Report(self, merge_problem): + if merge_problem.IsWarning(): + dataset_problems = self._dataset_warnings + self._warning_count += 1 + else: + dataset_problems = self._dataset_errors + self._error_count += 1 + + problem_html = '<li>%s</li>' % ( + merge_problem.FormatProblem().replace('\n', '<br>')) + dataset_problems.setdefault(merge_problem.dataset_merger, []).append( + problem_html) + + def _GenerateStatsTable(self, feed_merger): + """Generate an HTML table of merge statistics. + + Args: + feed_merger: The FeedMerger instance. + + Returns: + The generated HTML as a string. + """ + rows = [] + rows.append('<tr><th class="header"/><th class="header">Merged</th>' + '<th class="header">Copied from old feed</th>' + '<th class="header">Copied from new feed</th></tr>') + for merger in feed_merger.GetMergerList(): + stats = merger.GetMergeStats() + if stats is None: + continue + merged, not_merged_a, not_merged_b = stats + rows.append('<tr><th class="header">%s</th>' + '<td class="header">%d</td>' + '<td class="header">%d</td>' + '<td class="header">%d</td></tr>' % + (merger.DATASET_NAME, merged, not_merged_a, not_merged_b)) + return '<table>%s</table>' % '\n'.join(rows) + + def _GenerateSection(self, problem_type): + """Generate a listing of the given type of problems. + + Args: + problem_type: The type of problem. This is one of the problem type + constants from transitfeed. + + Returns: + The generated HTML as a string. + """ + if problem_type == transitfeed.TYPE_WARNING: + dataset_problems = self._dataset_warnings + heading = 'Warnings' + else: + dataset_problems = self._dataset_errors + heading = 'Errors' + + if not dataset_problems: + return '' + + prefix = '<h2 class="issueHeader">%s:</h2>' % heading + dataset_sections = [] + for dataset_merger, problems in dataset_problems.items(): + dataset_sections.append('<h3>%s</h3><ol>%s</ol>' % ( + dataset_merger.FILE_NAME, '\n'.join(problems))) + body = '\n'.join(dataset_sections) + return prefix + body + + def _GenerateSummary(self): + """Generate a summary of the warnings and errors. + + Returns: + The generated HTML as a string. + """ + items = [] + if self._dataset_errors: + items.append('errors: %d' % self._error_count) + if self._dataset_warnings: + items.append('warnings: %d' % self._warning_count) + + if items: + return '<p><span class="fail">%s</span></p>' % '<br>'.join(items) + else: + return '<p><span class="pass">feeds merged successfully</span></p>' + + def WriteOutput(self, output_file, feed_merger, + old_feed_path, new_feed_path, merged_feed_path): + """Write the HTML output to a file. + + Args: + output_file: The file object that the HTML output will be written to. + feed_merger: The FeedMerger instance. + old_feed_path: The path to the old feed file as a string. + new_feed_path: The path to the new feed file as a string + merged_feed_path: The path to the merged feed file as a string. This + may be None if no merged feed was written. + """ + if merged_feed_path is None: + html_merged_feed_path = '' + else: + html_merged_feed_path = '<p>Merged feed created: <code>%s</code></p>' % ( + merged_feed_path) + + html_header = """<html> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> +<title>Feed Merger Results</title> +<style> + body {font-family: Georgia, serif; background-color: white} + .path {color: gray} + div.problem {max-width: 500px} + td,th {background-color: khaki; padding: 2px; font-family:monospace} + td.problem,th.problem {background-color: dc143c; color: white; padding: 2px; + font-family:monospace} + table {border-spacing: 5px 0px; margin-top: 3px} + h3.issueHeader {padding-left: 1em} + span.pass {background-color: lightgreen} + span.fail {background-color: yellow} + .pass, .fail {font-size: 16pt; padding: 3px} + ol,.unused {padding-left: 40pt} + .header {background-color: white; font-family: Georgia, serif; padding: 0px} + th.header {text-align: right; font-weight: normal; color: gray} + .footer {font-size: 10pt} +</style> +</head> +<body> +<h1>Feed merger results</h1> +<p>Old feed: <code>%(old_feed_path)s</code></p> +<p>New feed: <code>%(new_feed_path)s</code></p> +%(html_merged_feed_path)s""" % locals() + + html_stats = self._GenerateStatsTable(feed_merger) + html_summary = self._GenerateSummary() + html_errors = self._GenerateSection(transitfeed.TYPE_ERROR) + html_warnings = self._GenerateSection(transitfeed.TYPE_WARNING) + + html_footer = """ +<div class="footer"> +Generated using transitfeed version %s on %s. +</div> +</body> +</html>""" % (transitfeed.__version__, + time.strftime('%B %d, %Y at %I:%M %p %Z')) + + output_file.write(transitfeed.EncodeUnicode(html_header)) + output_file.write(transitfeed.EncodeUnicode(html_stats)) + output_file.write(transitfeed.EncodeUnicode(html_summary)) + output_file.write(transitfeed.EncodeUnicode(html_errors)) + output_file.write(transitfeed.EncodeUnicode(html_warnings)) + output_file.write(transitfeed.EncodeUnicode(html_footer)) + + +def LoadWithoutErrors(path, memory_db): + """"Return a Schedule object loaded from path; sys.exit for any error.""" + accumulator = transitfeed.ExceptionProblemAccumulator() + loading_problem_handler = MergeProblemReporter(accumulator) + try: + schedule = transitfeed.Loader(path, + memory_db=memory_db, + problems=loading_problem_handler).Load() + except transitfeed.ExceptionWithContext, e: + print >>sys.stderr, ( + "\n\nFeeds to merge must load without any errors.\n" + "While loading %s the following error was found:\n%s\n%s\n" % + (path, e.FormatContext(), transitfeed.EncodeUnicode(e.FormatProblem()))) + sys.exit(1) + return schedule + + +class DataSetMerger(object): + """A DataSetMerger is in charge of merging a set of entities. + + This is an abstract class and should be subclassed for each different entity + type. + + Attributes: + ENTITY_TYPE_NAME: The name of the entity type like 'agency' or 'stop'. + FILE_NAME: The name of the file containing this data set like 'agency.txt'. + DATASET_NAME: A name for the dataset like 'Agencies' or 'Stops'. + """ + + def __init__(self, feed_merger): + """Initialise. + + Args: + feed_merger: The FeedMerger. + """ + self.feed_merger = feed_merger + self._num_merged = 0 + self._num_not_merged_a = 0 + self._num_not_merged_b = 0 + + def _MergeIdentical(self, a, b): + """Tries to merge two values. The values are required to be identical. + + Args: + a: The first value. + b: The second value. + + Returns: + The trivially merged value. + + Raises: + MergeError: The values were not identical. + """ + if a != b: + raise MergeError("values must be identical ('%s' vs '%s')" % + (transitfeed.EncodeUnicode(a), + transitfeed.EncodeUnicode(b))) + return b + + def _MergeIdenticalCaseInsensitive(self, a, b): + """Tries to merge two strings. + + The string are required to be the same ignoring case. The second string is + always used as the merged value. + + Args: + a: The first string. + b: The second string. + + Returns: + The merged string. This is equal to the second string. + + Raises: + MergeError: The strings were not the same ignoring case. + """ + if a.lower() != b.lower(): + raise MergeError("values must be the same (case insensitive) " + "('%s' vs '%s')" % (transitfeed.EncodeUnicode(a), + transitfeed.EncodeUnicode(b))) + return b + + def _MergeOptional(self, a, b): + """Tries to merge two values which may be None. + + If both values are not None, they are required to be the same and the + merge is trivial. If one of the values is None and the other is not None, + the merge results in the one which is not None. If both are None, the merge + results in None. + + Args: + a: The first value. + b: The second value. + + Returns: + The merged value. + + Raises: + MergeError: If both values are not None and are not the same. + """ + if a and b: + if a != b: + raise MergeError("values must be identical if both specified " + "('%s' vs '%s')" % (transitfeed.EncodeUnicode(a), + transitfeed.EncodeUnicode(b))) + return a or b + + def _MergeSameAgency(self, a_agency_id, b_agency_id): + """Merge agency ids to the corresponding agency id in the merged schedule. + + Args: + a_agency_id: an agency id from the old schedule + b_agency_id: an agency id from the new schedule + + Returns: + The agency id of the corresponding merged agency. + + Raises: + MergeError: If a_agency_id and b_agency_id do not correspond to the same + merged agency. + KeyError: Either aaid or baid is not a valid agency id. + """ + a_agency_id = (a_agency_id or + self.feed_merger.a_schedule.GetDefaultAgency().agency_id) + b_agency_id = (b_agency_id or + self.feed_merger.b_schedule.GetDefaultAgency().agency_id) + a_agency = self.feed_merger.a_schedule.GetAgency( + a_agency_id)._migrated_entity + b_agency = self.feed_merger.b_schedule.GetAgency( + b_agency_id)._migrated_entity + if a_agency != b_agency: + raise MergeError('agency must be the same') + return a_agency.agency_id + + def _SchemedMerge(self, scheme, a, b): + """Tries to merge two entities according to a merge scheme. + + A scheme is specified by a map where the keys are entity attributes and the + values are merge functions like Merger._MergeIdentical or + Merger._MergeOptional. The entity is first migrated to the merged schedule. + Then the attributes are individually merged as specified by the scheme. + + Args: + scheme: The merge scheme, a map from entity attributes to merge + functions. + a: The entity from the old schedule. + b: The entity from the new schedule. + + Returns: + The migrated and merged entity. + + Raises: + MergeError: One of the attributes was not able to be merged. + """ + migrated = self._Migrate(b, self.feed_merger.b_schedule, False) + for attr, merger in scheme.items(): + a_attr = getattr(a, attr, None) + b_attr = getattr(b, attr, None) + try: + merged_attr = merger(a_attr, b_attr) + except MergeError, merge_error: + raise MergeError("Attribute '%s' could not be merged: %s." % ( + attr, merge_error)) + setattr(migrated, attr, merged_attr) + return migrated + + def _MergeSameId(self): + """Tries to merge entities based on their ids. + + This tries to merge only the entities from the old and new schedules which + have the same id. These are added into the merged schedule. Entities which + do not merge or do not have the same id as another entity in the other + schedule are simply migrated into the merged schedule. + + This method is less flexible than _MergeDifferentId since it only tries + to merge entities which have the same id while _MergeDifferentId tries to + merge everything. However, it is faster and so should be used whenever + possible. + + This method makes use of various methods like _Merge and _Migrate which + are not implemented in the abstract DataSetMerger class. These method + should be overwritten in a subclass to allow _MergeSameId to work with + different entity types. + + Returns: + The number of merged entities. + """ + a_not_merged = [] + b_not_merged = [] + + for a in self._GetIter(self.feed_merger.a_schedule): + try: + b = self._GetById(self.feed_merger.b_schedule, self._GetId(a)) + except KeyError: + # there was no entity in B with the same id as a + a_not_merged.append(a) + continue + try: + self._Add(a, b, self._MergeEntities(a, b)) + self._num_merged += 1 + except MergeError, merge_error: + a_not_merged.append(a) + b_not_merged.append(b) + self._ReportSameIdButNotMerged(self._GetId(a), merge_error) + + for b in self._GetIter(self.feed_merger.b_schedule): + try: + a = self._GetById(self.feed_merger.a_schedule, self._GetId(b)) + except KeyError: + # there was no entity in A with the same id as b + b_not_merged.append(b) + + # migrate the remaining entities + for a in a_not_merged: + newid = self._HasId(self.feed_merger.b_schedule, self._GetId(a)) + self._Add(a, None, self._Migrate(a, self.feed_merger.a_schedule, newid)) + for b in b_not_merged: + newid = self._HasId(self.feed_merger.a_schedule, self._GetId(b)) + self._Add(None, b, self._Migrate(b, self.feed_merger.b_schedule, newid)) + + self._num_not_merged_a = len(a_not_merged) + self._num_not_merged_b = len(b_not_merged) + return self._num_merged + + def _MergeByIdKeepNew(self): + """Migrate all entities, discarding duplicates from the old/a schedule. + + This method migrates all entities from the new/b schedule. It then migrates + entities in the old schedule where there isn't already an entity with the + same ID. + + Unlike _MergeSameId this method migrates entities to the merged schedule + before comparing their IDs. This allows transfers to be compared when they + refer to stops that had their ID updated by migration. + + This method makes use of various methods like _Migrate and _Add which + are not implemented in the abstract DataSetMerger class. These methods + should be overwritten in a subclass to allow _MergeByIdKeepNew to work with + different entity types. + + Returns: + The number of merged entities. + """ + # Maps from migrated ID to tuple(original object, migrated object) + a_orig_migrated = {} + b_orig_migrated = {} + + for orig in self._GetIter(self.feed_merger.a_schedule): + migrated = self._Migrate(orig, self.feed_merger.a_schedule) + a_orig_migrated[self._GetId(migrated)] = (orig, migrated) + + for orig in self._GetIter(self.feed_merger.b_schedule): + migrated = self._Migrate(orig, self.feed_merger.b_schedule) + b_orig_migrated[self._GetId(migrated)] = (orig, migrated) + + for migrated_id, (orig, migrated) in b_orig_migrated.items(): + self._Add(None, orig, migrated) + self._num_not_merged_b += 1 + + for migrated_id, (orig, migrated) in a_orig_migrated.items(): + if migrated_id not in b_orig_migrated: + self._Add(orig, None, migrated) + self._num_not_merged_a += 1 + return self._num_merged + + def _MergeDifferentId(self): + """Tries to merge all possible combinations of entities. + + This tries to merge every entity in the old schedule with every entity in + the new schedule. Unlike _MergeSameId, the ids do not need to match. + However, _MergeDifferentId is much slower than _MergeSameId. + + This method makes use of various methods like _Merge and _Migrate which + are not implemented in the abstract DataSetMerger class. These method + should be overwritten in a subclass to allow _MergeSameId to work with + different entity types. + + Returns: + The number of merged entities. + """ + # TODO: The same entity from A could merge with multiple from B. + # This should either generate an error or should be prevented from + # happening. + for a in self._GetIter(self.feed_merger.a_schedule): + for b in self._GetIter(self.feed_merger.b_schedule): + try: + self._Add(a, b, self._MergeEntities(a, b)) + self._num_merged += 1 + except MergeError: + continue + + for a in self._GetIter(self.feed_merger.a_schedule): + if a not in self.feed_merger.a_merge_map: + self._num_not_merged_a += 1 + newid = self._HasId(self.feed_merger.b_schedule, self._GetId(a)) + self._Add(a, None, + self._Migrate(a, self.feed_merger.a_schedule, newid)) + for b in self._GetIter(self.feed_merger.b_schedule): + if b not in self.feed_merger.b_merge_map: + self._num_not_merged_b += 1 + newid = self._HasId(self.feed_merger.a_schedule, self._GetId(b)) + self._Add(None, b, + self._Migrate(b, self.feed_merger.b_schedule, newid)) + + return self._num_merged + + def _ReportSameIdButNotMerged(self, entity_id, reason): + """Report that two entities have the same id but could not be merged. + + Args: + entity_id: The id of the entities. + reason: A string giving a reason why they could not be merged. + """ + self.feed_merger.problem_reporter.SameIdButNotMerged(self, + entity_id, + reason) + + def _GetIter(self, schedule): + """Returns an iterator of entities for this data set in the given schedule. + + This method usually corresponds to one of the methods from + transitfeed.Schedule like GetAgencyList() or GetRouteList(). + + Note: This method must be overwritten in a subclass if _MergeSameId or + _MergeDifferentId are to be used. + + Args: + schedule: Either the old or new schedule from the FeedMerger. + + Returns: + An iterator of entities. + """ + raise NotImplementedError() + + def _GetById(self, schedule, entity_id): + """Returns an entity given its id. + + This method usually corresponds to one of the methods from + transitfeed.Schedule like GetAgency() or GetRoute(). + + Note: This method must be overwritten in a subclass if _MergeSameId or + _MergeDifferentId are to be used. + + Args: + schedule: Either the old or new schedule from the FeedMerger. + entity_id: The id string of the entity. + + Returns: + The entity with the given id. + + Raises: + KeyError: There is not entity with the given id. + """ + raise NotImplementedError() + + def _HasId(self, schedule, entity_id): + """Check if the schedule has an entity with the given id. + + Args: + schedule: The transitfeed.Schedule instance to look in. + entity_id: The id of the entity. + + Returns: + True if the schedule has an entity with the id or False if not. + """ + try: + self._GetById(schedule, entity_id) + has = True + except KeyError: + has = False + return has + + def _MergeEntities(self, a, b): + """Tries to merge the two entities. + + Note: This method must be overwritten in a subclass if _MergeSameId or + _MergeDifferentId are to be used. + + Args: + a: The entity from the old schedule. + b: The entity from the new schedule. + + Returns: + The merged migrated entity. + + Raises: + MergeError: The entities were not able to be merged. + """ + raise NotImplementedError() + + def _Migrate(self, entity, schedule, newid): + """Migrates the entity to the merge schedule. + + This involves copying the entity and updating any ids to point to the + corresponding entities in the merged schedule. If newid is True then + a unique id is generated for the migrated entity using the original id + as a prefix. + + Note: This method must be overwritten in a subclass if _MergeSameId or + _MergeDifferentId are to be used. + + Args: + entity: The entity to migrate. + schedule: The schedule from the FeedMerger that contains ent. + newid: Whether to generate a new id (True) or keep the original (False). + + Returns: + The migrated entity. + """ + raise NotImplementedError() + + def _Add(self, a, b, migrated): + """Adds the migrated entity to the merged schedule. + + If a and b are both not None, it means that a and b were merged to create + migrated. If one of a or b is None, it means that the other was not merged + but has been migrated. This mapping is registered with the FeedMerger. + + Note: This method must be overwritten in a subclass if _MergeSameId or + _MergeDifferentId are to be used. + + Args: + a: The original entity from the old schedule. + b: The original entity from the new schedule. + migrated: The migrated entity for the merged schedule. + """ + raise NotImplementedError() + + def _GetId(self, entity): + """Returns the id of the given entity. + + Note: This method must be overwritten in a subclass if _MergeSameId or + _MergeDifferentId are to be used. + + Args: + entity: The entity. + + Returns: + The id of the entity as a string or None. + """ + raise NotImplementedError() + + def MergeDataSets(self): + """Merge the data sets. + + This method is called in FeedMerger.MergeSchedule(). + + Note: This method must be overwritten in a subclass. + + Returns: + A boolean which is False if the dataset was unable to be merged and + as a result the entire merge should be aborted. In this case, the problem + will have been reported using the FeedMerger's problem reporter. + """ + raise NotImplementedError() + + def GetMergeStats(self): + """Returns some merge statistics. + + These are given as a tuple (merged, not_merged_a, not_merged_b) where + "merged" is the number of merged entities, "not_merged_a" is the number of + entities from the old schedule that were not merged and "not_merged_b" is + the number of entities from the new schedule that were not merged. + + The return value can also be None. This means that there are no statistics + for this entity type. + + The statistics are only available after MergeDataSets() has been called. + + Returns: + Either the statistics tuple or None. + """ + return (self._num_merged, self._num_not_merged_a, self._num_not_merged_b) + + +class AgencyMerger(DataSetMerger): + """A DataSetMerger for agencies.""" + + ENTITY_TYPE_NAME = 'agency' + FILE_NAME = 'agency.txt' + DATASET_NAME = 'Agencies' + + def _GetIter(self, schedule): + return schedule.GetAgencyList() + + def _GetById(self, schedule, agency_id): + return schedule.GetAgency(agency_id) + + def _MergeEntities(self, a, b): + """Merges two agencies. + + To be merged, they are required to have the same id, name, url and + timezone. The remaining language attribute is taken from the new agency. + + Args: + a: The first agency. + b: The second agency. + + Returns: + The merged agency. + + Raises: + MergeError: The agencies could not be merged. + """ + + def _MergeAgencyId(a_agency_id, b_agency_id): + """Merge two agency ids. + + The only difference between this and _MergeIdentical() is that the values + None and '' are regarded as being the same. + + Args: + a_agency_id: The first agency id. + b_agency_id: The second agency id. + + Returns: + The merged agency id. + + Raises: + MergeError: The agency ids could not be merged. + """ + a_agency_id = a_agency_id or None + b_agency_id = b_agency_id or None + return self._MergeIdentical(a_agency_id, b_agency_id) + + scheme = {'agency_id': _MergeAgencyId, + 'agency_name': self._MergeIdentical, + 'agency_url': self._MergeIdentical, + 'agency_timezone': self._MergeIdentical} + return self._SchemedMerge(scheme, a, b) + + def _Migrate(self, entity, schedule, newid): + a = transitfeed.Agency(field_dict=entity) + if newid: + a.agency_id = self.feed_merger.GenerateId(entity.agency_id)