Upgrade origin-src to google transit feed 1.2.6
[bus.git] / origin-src / transitfeed-1.2.6 / feedvalidator.py
blob:a/origin-src/transitfeed-1.2.6/feedvalidator.py -> blob:b/origin-src/transitfeed-1.2.6/feedvalidator.py
  #!/usr/bin/python2.5
   
  # Copyright (C) 2007 Google Inc.
  #
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at
  #
  # http://www.apache.org/licenses/LICENSE-2.0
  #
  # Unless required by applicable law or agreed to in writing, software
  # distributed under the License is distributed on an "AS IS" BASIS,
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  # See the License for the specific language governing permissions and
  # limitations under the License.
   
   
  """Validates a GTFS file.
   
  For usage information run feedvalidator.py --help
  """
   
  import bisect
  import codecs
  import datetime
  from transitfeed.util import defaultdict
  import optparse
  import os
  import os.path
  import re
  import socket
  import sys
  import time
  import transitfeed
  from transitfeed import TYPE_ERROR, TYPE_WARNING
  from urllib2 import Request, urlopen, HTTPError, URLError
  from transitfeed import util
  import webbrowser
   
  SVN_TAG_URL = 'http://googletransitdatafeed.googlecode.com/svn/tags/'
   
   
  def MaybePluralizeWord(count, word):
  if count == 1:
  return word
  else:
  return word + 's'
   
   
  def PrettyNumberWord(count, word):
  return '%d %s' % (count, MaybePluralizeWord(count, word))
   
   
  def UnCamelCase(camel):
  return re.sub(r'([a-z])([A-Z])', r'\1 \2', camel)
   
   
  def ProblemCountText(error_count, warning_count):
  results = []
  if error_count:
  results.append(PrettyNumberWord(error_count, 'error'))
  if warning_count:
  results.append(PrettyNumberWord(warning_count, 'warning'))
   
  return ' and '.join(results)
   
   
  def CalendarSummary(schedule):
  today = datetime.date.today()
  summary_end_date = today + datetime.timedelta(days=60)
  start_date, end_date = schedule.GetDateRange()
   
  if not start_date or not end_date:
  return {}
   
  try:
  start_date_object = transitfeed.DateStringToDateObject(start_date)
  end_date_object = transitfeed.DateStringToDateObject(end_date)
  except ValueError:
  return {}
   
  # Get the list of trips only during the period the feed is active.
  # As such we have to check if it starts in the future and/or if
  # if it ends in less than 60 days.
  date_trips_departures = schedule.GenerateDateTripsDeparturesList(
  max(today, start_date_object),
  min(summary_end_date, end_date_object))
   
  if not date_trips_departures:
  return {}
   
  # Check that the dates which will be shown in summary agree with these
  # calculations. Failure implies a bug which should be fixed. It isn't good
  # for users to discover assertion failures but means it will likely be fixed.
  assert start_date <= date_trips_departures[0][0].strftime("%Y%m%d")
  assert end_date >= date_trips_departures[-1][0].strftime("%Y%m%d")
   
  # Generate a map from int number of trips in a day to a list of date objects
  # with that many trips. The list of dates is sorted.
  trips_dates = defaultdict(lambda: [])
  trips = 0
  for date, day_trips, day_departures in date_trips_departures:
  trips += day_trips
  trips_dates[day_trips].append(date)
  mean_trips = trips / len(date_trips_departures)
  max_trips = max(trips_dates.keys())
  min_trips = min(trips_dates.keys())
   
  calendar_summary = {}
  calendar_summary['mean_trips'] = mean_trips
  calendar_summary['max_trips'] = max_trips
  calendar_summary['max_trips_dates'] = FormatDateList(trips_dates[max_trips])
  calendar_summary['min_trips'] = min_trips
  calendar_summary['min_trips_dates'] = FormatDateList(trips_dates[min_trips])
  calendar_summary['date_trips_departures'] = date_trips_departures
  calendar_summary['date_summary_range'] = "%s to %s" % (
  date_trips_departures[0][0].strftime("%a %b %d"),
  date_trips_departures[-1][0].strftime("%a %b %d"))
   
  return calendar_summary
   
   
  def FormatDateList(dates):
  if not dates:
  return "0 service dates"
   
  formatted = [d.strftime("%a %b %d") for d in dates[0:3]]
  if len(dates) > 3:
  formatted.append("...")
  return "%s (%s)" % (PrettyNumberWord(len(dates), "service date"),
  ", ".join(formatted))
   
   
  def MaxVersion(versions):
  versions = filter(None, versions)
  versions.sort(lambda x,y: -cmp([int(item) for item in x.split('.')],
  [int(item) for item in y.split('.')]))
  if len(versions) > 0:
  return versions[0]
   
   
  class CountingConsoleProblemAccumulator(transitfeed.SimpleProblemAccumulator):
  def __init__(self):
  self._error_count = 0
  self._warning_count = 0
   
  def _Report(self, e):
  transitfeed.SimpleProblemAccumulator._Report(self, e)
  if e.IsError():
  self._error_count += 1
  else:
  self._warning_count += 1
   
  def ErrorCount(self):
  return self._error_count
   
  def WarningCount(self):
  return self._warning_count
   
  def FormatCount(self):
  return ProblemCountText(self.ErrorCount(), self.WarningCount())
   
  def HasIssues(self):
  return self.ErrorCount() or self.WarningCount()
   
   
  class BoundedProblemList(object):
  """A list of one type of ExceptionWithContext objects with bounded size."""
  def __init__(self, size_bound):
  self._count = 0
  self._exceptions = []
  self._size_bound = size_bound
   
  def Add(self, e):
  self._count += 1
  try:
  bisect.insort(self._exceptions, e)
  except TypeError:
  # The base class ExceptionWithContext raises this exception in __cmp__
  # to signal that an object is not comparable. Instead of keeping the most
  # significant issue keep the first reported.
  if self._count <= self._size_bound:
  self._exceptions.append(e)
  else:
  # self._exceptions is in order. Drop the least significant if the list is
  # now too long.
  if self._count > self._size_bound:
  del self._exceptions[-1]
   
  def _GetDroppedCount(self):
  return self._count - len(self._exceptions)
   
  def __repr__(self):
  return "<BoundedProblemList %s>" % repr(self._exceptions)
   
  count = property(lambda s: s._count)
  dropped_count = property(_GetDroppedCount)
  problems = property(lambda s: s._exceptions)
   
   
  class LimitPerTypeProblemAccumulator(transitfeed.ProblemAccumulatorInterface):
  def __init__(self, limit_per_type):
  # {TYPE_WARNING: {"ClassName": BoundedProblemList()}}
  self._type_to_name_to_problist = {
  TYPE_WARNING: defaultdict(lambda: BoundedProblemList(limit_per_type)),
  TYPE_ERROR: defaultdict(lambda: BoundedProblemList(limit_per_type))
  }
   
  def HasIssues(self):
  return (self._type_to_name_to_problist[TYPE_ERROR] or
  self._type_to_name_to_problist[TYPE_WARNING])
   
  def _Report(self, e):
  self._type_to_name_to_problist[e.GetType()][e.__class__.__name__].Add(e)
   
  def ErrorCount(self):
  error_sets = self._type_to_name_to_problist[TYPE_ERROR].values()
  return sum(map(lambda v: v.count, error_sets))
   
  def WarningCount(self):
  warning_sets = self._type_to_name_to_problist[TYPE_WARNING].values()
  return sum(map(lambda v: v.count, warning_sets))
   
  def ProblemList(self, problem_type, class_name):
  """Return the BoundedProblemList object for given type and class."""
  return self._type_to_name_to_problist[problem_type][class_name]
   
  def ProblemListMap(self, problem_type):
  """Return the map from class name to BoundedProblemList object."""
  return self._type_to_name_to_problist[problem_type]
   
   
  class HTMLCountingProblemAccumulator(LimitPerTypeProblemAccumulator):
  def FormatType(self, f, level_name, class_problist):
  """Write the HTML dumping all problems of one type.
   
  Args:
  f: file object open for writing
  level_name: string such as "Error" or "Warning"
  class_problist: sequence of tuples (class name,
  BoundedProblemList object)
  """
  class_problist.sort()
  output = []
  for classname, problist in class_problist:
  output.append('<h4 class="issueHeader"><a name="%s%s">%s</a></h4><ul>\n' %
  (level_name, classname, UnCamelCase(classname)))
  for e in problist.problems:
  self.FormatException(e, output)
  if problist.dropped_count:
  output.append('<li>and %d more of this type.' %
  (problist.dropped_count))
  output.append('</ul>\n')
  f.write(''.join(output))
   
  def FormatTypeSummaryTable(self, level_name, name_to_problist):
  """Return an HTML table listing the number of problems by class name.
   
  Args:
  level_name: string such as "Error" or "Warning"
  name_to_problist: dict mapping class name to an BoundedProblemList object
   
  Returns:
  HTML in a string
  """
  output = []
  output.append('<table>')
  for classname in sorted(name_to_problist.keys()):
  problist = name_to_problist[classname]
  human_name = MaybePluralizeWord(problist.count, UnCamelCase(classname))
  output.append('<tr><td>%d</td><td><a href="#%s%s">%s</a></td></tr>\n' %
  (problist.count, level_name, classname, human_name))
  output.append('</table>\n')
  return ''.join(output)
   
  def FormatException(self, e, output):
  """Append HTML version of e to list output."""
  d = e.GetDictToFormat()
  for k in ('file_name', 'feedname', 'column_name'):
  if k in d.keys():
  d[k] = '<code>%s</code>' % d[k]
  problem_text = e.FormatProblem(d).replace('\n', '<br>')
  output.append('<li>')
  output.append('<div class="problem">%s</div>' %
  transitfeed.EncodeUnicode(problem_text))
  try:
  if hasattr(e, 'row_num'):
  line_str = 'line %d of ' % e.row_num
  else:
  line_str = ''
  output.append('in %s<code>%s</code><br>\n' %
  (line_str, e.file_name))
  row = e.row
  headers = e.headers
  column_name = e.column_name
  table_header = '' # HTML
  table_data = '' # HTML
  for header, value in zip(headers, row):
  attributes = ''
  if header == column_name:
  attributes = ' class="problem"'
  table_header += '<th%s>%s</th>' % (attributes, header)
  table_data += '<td%s>%s</td>' % (attributes, value)
  # Make sure output is encoded into UTF-8
  output.append('<table class="dump"><tr>%s</tr>\n' %
  transitfeed.EncodeUnicode(table_header))
  output.append('<tr>%s</tr></table>\n' %
  transitfeed.EncodeUnicode(table_data))
  except AttributeError, e:
  pass # Hope this was getting an attribute from e ;-)
  output.append('<br></li>\n')
   
  def FormatCount(self):
  return ProblemCountText(self.ErrorCount(), self.WarningCount())
   
  def CountTable(self):
  output = []
  output.append('<table class="count_outside">\n')
  output.append('<tr>')
  if self.ProblemListMap(TYPE_ERROR):
  output.append('<td><span class="fail">%s</span></td>' %
  PrettyNumberWord(self.ErrorCount(), "error"))
  if self.ProblemListMap(TYPE_WARNING):
  output.append('<td><span class="fail">%s</span></td>' %
  PrettyNumberWord(self.WarningCount(), "warning"))
  output.append('</tr>\n<tr>')
  if self.ProblemListMap(TYPE_ERROR):
  output.append('<td>\n')
  output.append(self.FormatTypeSummaryTable("Error",
  self.ProblemListMap(TYPE_ERROR)))
  output.append('</td>\n')
  if self.ProblemListMap(TYPE_WARNING):
  output.append('<td>\n')
  output.append(self.FormatTypeSummaryTable("Warning",
  self.ProblemListMap(TYPE_WARNING)))
  output.append('</td>\n')
  output.append('</table>')
  return ''.join(output)
   
  def WriteOutput(self, feed_location, f, schedule, other_problems):
  """Write the html output to f."""
  if self.HasIssues():
  if self.ErrorCount() + self.WarningCount() == 1:
  summary = ('<span class="fail">Found this problem:</span>\n%s' %
  self.CountTable())
  else:
  summary = ('<span class="fail">Found these problems:</span>\n%s' %
  self.CountTable())
  else:
  summary = '<span class="pass">feed validated successfully</span>'
  if other_problems is not None:
  summary = ('<span class="fail">\n%s</span><br><br>' %
  other_problems) + summary
   
  basename = os.path.basename(feed_location)
  feed_path = (feed_location[:feed_location.rfind(basename)], basename)
   
  agencies = ', '.join(['<a href="%s">%s</a>' % (a.agency_url, a.agency_name)
  for a in schedule.GetAgencyList()])
  if not agencies:
  agencies = '?'
   
  dates = "No valid service dates found"
  (start, end) = schedule.GetDateRange()
  if start and end: