|
#!/usr/bin/python2.5 |
|
|
|
# Copyright (C) 2007 Google Inc. |
|
# |
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
# you may not use this file except in compliance with the License. |
|
# You may obtain a copy of the License at |
|
# |
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
# |
|
# Unless required by applicable law or agreed to in writing, software |
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
# See the License for the specific language governing permissions and |
|
# limitations under the License. |
|
|
|
|
|
"""Validates a GTFS file. |
|
|
|
For usage information run feedvalidator.py --help |
|
""" |
|
|
|
import bisect |
|
import codecs |
|
import datetime |
|
from transitfeed.util import defaultdict |
|
import optparse |
|
import os |
|
import os.path |
|
import re |
|
import socket |
|
import sys |
|
import time |
|
import transitfeed |
|
from transitfeed import TYPE_ERROR, TYPE_WARNING |
|
from urllib2 import Request, urlopen, HTTPError, URLError |
|
from transitfeed import util |
|
import webbrowser |
|
|
|
SVN_TAG_URL = 'http://googletransitdatafeed.googlecode.com/svn/tags/' |
|
|
|
|
|
def MaybePluralizeWord(count, word): |
|
if count == 1: |
|
return word |
|
else: |
|
return word + 's' |
|
|
|
|
|
def PrettyNumberWord(count, word): |
|
return '%d %s' % (count, MaybePluralizeWord(count, word)) |
|
|
|
|
|
def UnCamelCase(camel): |
|
return re.sub(r'([a-z])([A-Z])', r'\1 \2', camel) |
|
|
|
|
|
def ProblemCountText(error_count, warning_count): |
|
results = [] |
|
if error_count: |
|
results.append(PrettyNumberWord(error_count, 'error')) |
|
if warning_count: |
|
results.append(PrettyNumberWord(warning_count, 'warning')) |
|
|
|
return ' and '.join(results) |
|
|
|
|
|
def CalendarSummary(schedule): |
|
today = datetime.date.today() |
|
summary_end_date = today + datetime.timedelta(days=60) |
|
start_date, end_date = schedule.GetDateRange() |
|
|
|
if not start_date or not end_date: |
|
return {} |
|
|
|
try: |
|
start_date_object = transitfeed.DateStringToDateObject(start_date) |
|
end_date_object = transitfeed.DateStringToDateObject(end_date) |
|
except ValueError: |
|
return {} |
|
|
|
# Get the list of trips only during the period the feed is active. |
|
# As such we have to check if it starts in the future and/or if |
|
# if it ends in less than 60 days. |
|
date_trips_departures = schedule.GenerateDateTripsDeparturesList( |
|
max(today, start_date_object), |
|
min(summary_end_date, end_date_object)) |
|
|
|
if not date_trips_departures: |
|
return {} |
|
|
|
# Check that the dates which will be shown in summary agree with these |
|
# calculations. Failure implies a bug which should be fixed. It isn't good |
|
# for users to discover assertion failures but means it will likely be fixed. |
|
assert start_date <= date_trips_departures[0][0].strftime("%Y%m%d") |
|
assert end_date >= date_trips_departures[-1][0].strftime("%Y%m%d") |
|
|
|
# Generate a map from int number of trips in a day to a list of date objects |
|
# with that many trips. The list of dates is sorted. |
|
trips_dates = defaultdict(lambda: []) |
|
trips = 0 |
|
for date, day_trips, day_departures in date_trips_departures: |
|
trips += day_trips |
|
trips_dates[day_trips].append(date) |
|
mean_trips = trips / len(date_trips_departures) |
|
max_trips = max(trips_dates.keys()) |
|
min_trips = min(trips_dates.keys()) |
|
|
|
calendar_summary = {} |
|
calendar_summary['mean_trips'] = mean_trips |
|
calendar_summary['max_trips'] = max_trips |
|
calendar_summary['max_trips_dates'] = FormatDateList(trips_dates[max_trips]) |
|
calendar_summary['min_trips'] = min_trips |
|
calendar_summary['min_trips_dates'] = FormatDateList(trips_dates[min_trips]) |
|
calendar_summary['date_trips_departures'] = date_trips_departures |
|
calendar_summary['date_summary_range'] = "%s to %s" % ( |
|
date_trips_departures[0][0].strftime("%a %b %d"), |
|
date_trips_departures[-1][0].strftime("%a %b %d")) |
|
|
|
return calendar_summary |
|
|
|
|
|
def FormatDateList(dates): |
|
if not dates: |
|
return "0 service dates" |
|
|
|
formatted = [d.strftime("%a %b %d") for d in dates[0:3]] |
|
if len(dates) > 3: |
|
formatted.append("...") |
|
return "%s (%s)" % (PrettyNumberWord(len(dates), "service date"), |
|
", ".join(formatted)) |
|
|
|
|
|
def MaxVersion(versions): |
|
versions = filter(None, versions) |
|
versions.sort(lambda x,y: -cmp([int(item) for item in x.split('.')], |
|
[int(item) for item in y.split('.')])) |
|
if len(versions) > 0: |
|
return versions[0] |
|
|
|
|
|
class CountingConsoleProblemAccumulator(transitfeed.SimpleProblemAccumulator): |
|
def __init__(self): |
|
self._error_count = 0 |
|
self._warning_count = 0 |
|
|
|
def _Report(self, e): |
|
transitfeed.SimpleProblemAccumulator._Report(self, e) |
|
if e.IsError(): |
|
self._error_count += 1 |
|
else: |
|
self._warning_count += 1 |
|
|
|
def ErrorCount(self): |
|
return self._error_count |
|
|
|
def WarningCount(self): |
|
return self._warning_count |
|
|
|
def FormatCount(self): |
|
return ProblemCountText(self.ErrorCount(), self.WarningCount()) |
|
|
|
def HasIssues(self): |
|
return self.ErrorCount() or self.WarningCount() |
|
|
|
|
|
class BoundedProblemList(object): |
|
"""A list of one type of ExceptionWithContext objects with bounded size.""" |
|
def __init__(self, size_bound): |
|
self._count = 0 |
|
self._exceptions = [] |
|
self._size_bound = size_bound |
|
|
|
def Add(self, e): |
|
self._count += 1 |
|
try: |
|
bisect.insort(self._exceptions, e) |
|
except TypeError: |
|
# The base class ExceptionWithContext raises this exception in __cmp__ |
|
# to signal that an object is not comparable. Instead of keeping the most |
|
# significant issue keep the first reported. |
|
if self._count <= self._size_bound: |
|
self._exceptions.append(e) |
|
else: |
|
# self._exceptions is in order. Drop the least significant if the list is |
|
# now too long. |
|
if self._count > self._size_bound: |
|
del self._exceptions[-1] |
|
|
|
def _GetDroppedCount(self): |
|
return self._count - len(self._exceptions) |
|
|
|
def __repr__(self): |
|
return "<BoundedProblemList %s>" % repr(self._exceptions) |
|
|
|
count = property(lambda s: s._count) |
|
dropped_count = property(_GetDroppedCount) |
|
problems = property(lambda s: s._exceptions) |
|
|
|
|
|
class LimitPerTypeProblemAccumulator(transitfeed.ProblemAccumulatorInterface): |
|
def __init__(self, limit_per_type): |
|
# {TYPE_WARNING: {"ClassName": BoundedProblemList()}} |
|
self._type_to_name_to_problist = { |
|
TYPE_WARNING: defaultdict(lambda: BoundedProblemList(limit_per_type)), |
|
TYPE_ERROR: defaultdict(lambda: BoundedProblemList(limit_per_type)) |
|
} |
|
|
|
def HasIssues(self): |
|
return (self._type_to_name_to_problist[TYPE_ERROR] or |
|
self._type_to_name_to_problist[TYPE_WARNING]) |
|
|
|
def _Report(self, e): |
|
self._type_to_name_to_problist[e.GetType()][e.__class__.__name__].Add(e) |
|
|
|
def ErrorCount(self): |
|
error_sets = self._type_to_name_to_problist[TYPE_ERROR].values() |
|
return sum(map(lambda v: v.count, error_sets)) |
|
|
|
def WarningCount(self): |
|
warning_sets = self._type_to_name_to_problist[TYPE_WARNING].values() |
|
return sum(map(lambda v: v.count, warning_sets)) |
|
|
|
def ProblemList(self, problem_type, class_name): |
|
"""Return the BoundedProblemList object for given type and class.""" |
|
return self._type_to_name_to_problist[problem_type][class_name] |
|
|
|
def ProblemListMap(self, problem_type): |
|
"""Return the map from class name to BoundedProblemList object.""" |
|
return self._type_to_name_to_problist[problem_type] |
|
|
|
|
|
class HTMLCountingProblemAccumulator(LimitPerTypeProblemAccumulator): |
|
def FormatType(self, f, level_name, class_problist): |
|
"""Write the HTML dumping all problems of one type. |
|
|
|
Args: |
|
f: file object open for writing |
|
level_name: string such as "Error" or "Warning" |
|
class_problist: sequence of tuples (class name, |
|
BoundedProblemList object) |
|
""" |
|
class_problist.sort() |
|
output = [] |
|
for classname, problist in class_problist: |
|
output.append('<h4 class="issueHeader"><a name="%s%s">%s</a></h4><ul>\n' % |
|
(level_name, classname, UnCamelCase(classname))) |
|
for e in problist.problems: |
|
self.FormatException(e, output) |
|
if problist.dropped_count: |
|
output.append('<li>and %d more of this type.' % |
|
(problist.dropped_count)) |
|
output.append('</ul>\n') |
|
f.write(''.join(output)) |
|
|
|
def FormatTypeSummaryTable(self, level_name, name_to_problist): |
|
"""Return an HTML table listing the number of problems by class name. |
|
|
|
Args: |
|
level_name: string such as "Error" or "Warning" |
|
name_to_problist: dict mapping class name to an BoundedProblemList object |
|
|
|
Returns: |
|
HTML in a string |
|
""" |
|
output = [] |
|
output.append('<table>') |
|
for classname in sorted(name_to_problist.keys()): |
|
problist = name_to_problist[classname] |
|
human_name = MaybePluralizeWord(problist.count, UnCamelCase(classname)) |
|
output.append('<tr><td>%d</td><td><a href="#%s%s">%s</a></td></tr>\n' % |
|
(problist.count, level_name, classname, human_name)) |
|
output.append('</table>\n') |
|
return ''.join(output) |
|
|
|
def FormatException(self, e, output): |
|
"""Append HTML version of e to list output.""" |
|
d = e.GetDictToFormat() |
|
for k in ('file_name', 'feedname', 'column_name'): |
|
if k in d.keys(): |
|
d[k] = '<code>%s</code>' % d[k] |
|
problem_text = e.FormatProblem(d).replace('\n', '<br>') |
|
output.append('<li>') |
|
output.append('<div class="problem">%s</div>' % |
|
transitfeed.EncodeUnicode(problem_text)) |
|
try: |
|
if hasattr(e, 'row_num'): |
|
line_str = 'line %d of ' % e.row_num |
|
else: |
|
line_str = '' |
|
output.append('in %s<code>%s</code><br>\n' % |
|
(line_str, e.file_name)) |
|
row = e.row |
|
headers = e.headers |
|
column_name = e.column_name |
|
table_header = '' # HTML |
|
table_data = '' # HTML |
|
for header, value in zip(headers, row): |
|
attributes = '' |
|
if header == column_name: |
|
attributes = ' class="problem"' |
|
table_header += '<th%s>%s</th>' % (attributes, header) |
|
table_data += '<td%s>%s</td>' % (attributes, value) |
|
# Make sure output is encoded into UTF-8 |
|
output.append('<table class="dump"><tr>%s</tr>\n' % |
|
transitfeed.EncodeUnicode(table_header)) |
|
output.append('<tr>%s</tr></table>\n' % |
|
transitfeed.EncodeUnicode(table_data)) |
|
except AttributeError, e: |
|
pass # Hope this was getting an attribute from e ;-) |
|
output.append('<br></li>\n') |
|
|
|
def FormatCount(self): |
|
return ProblemCountText(self.ErrorCount(), self.WarningCount()) |
|
|
|
def CountTable(self): |
|
output = [] |
|
output.append('<table class="count_outside">\n') |
|
output.append('<tr>') |
|
if self.ProblemListMap(TYPE_ERROR): |
|
output.append('<td><span class="fail">%s</span></td>' % |
|
PrettyNumberWord(self.ErrorCount(), "error")) |
|
if self.ProblemListMap(TYPE_WARNING): |
|
output.append('<td><span class="fail">%s</span></td>' % |
|
PrettyNumberWord(self.WarningCount(), "warning")) |
|
output.append('</tr>\n<tr>') |
|
if self.ProblemListMap(TYPE_ERROR): |
|
output.append('<td>\n') |
|
output.append(self.FormatTypeSummaryTable("Error", |
|
self.ProblemListMap(TYPE_ERROR))) |
|
output.append('</td>\n') |
|
if self.ProblemListMap(TYPE_WARNING): |
|
output.append('<td>\n') |
|
output.append(self.FormatTypeSummaryTable("Warning", |
|
self.ProblemListMap(TYPE_WARNING))) |
|
output.append('</td>\n') |
|
output.append('</table>') |
|
return ''.join(output) |
|
|
|
def WriteOutput(self, feed_location, f, schedule, other_problems): |
|
"""Write the html output to f.""" |
|
if self.HasIssues(): |
|
if self.ErrorCount() + self.WarningCount() == 1: |
|
summary = ('<span class="fail">Found this problem:</span>\n%s' % |
|
self.CountTable()) |
|
else: |
|
summary = ('<span class="fail">Found these problems:</span>\n%s' % |
|
self.CountTable()) |
|
else: |
|
summary = '<span class="pass">feed validated successfully</span>' |
|
if other_problems is not None: |
|
summary = ('<span class="fail">\n%s</span><br><br>' % |
|
other_problems) + summary |
|
|
|
basename = os.path.basename(feed_location) |
|
feed_path = (feed_location[:feed_location.rfind(basename)], basename) |
|
|
|
agencies = ', '.join(['<a href="%s">%s</a>' % (a.agency_url, a.agency_name) |
|
for a in schedule.GetAgencyList()]) |
|
if not agencies: |
|
agencies = '?' |
|
|
|
dates = "No valid service dates found" |
|
(start, end) = schedule.GetDateRange() |
|
if start and end: |
|