[bus.git] / origin-src / transitfeed-1.2.6 / transitfeed / util.py
--- a/origin-src/transitfeed-1.2.6/transitfeed/util.py
+++ b/origin-src/transitfeed-1.2.6/transitfeed/util.py
@@ -1,1 +1,435 @@
-
+#!/usr/bin/python2.5
+
+# Copyright (C) 2009 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import codecs
+import csv
+import datetime
+import math
+import optparse
+import random
+import re
+import sys
+
+import problems
+from trip import Trip
+
+class OptionParserLongError(optparse.OptionParser):
+  """OptionParser subclass that includes list of options above error message."""
+  def error(self, msg):
+    print >>sys.stderr, self.format_help()
+    print >>sys.stderr, '\n\n%s: error: %s\n\n' % (self.get_prog_name(), msg)
+    sys.exit(2)
+
+
+def RunWithCrashHandler(f):
+  try:
+    exit_code = f()
+    sys.exit(exit_code)
+  except (SystemExit, KeyboardInterrupt):
+    raise
+  except:
+    import inspect
+    import traceback
+
+    # Save trace and exception now. These calls look at the most recently
+    # raised exception. The code that makes the report might trigger other
+    # exceptions.
+    original_trace = inspect.trace(3)[1:]
+    formatted_exception = traceback.format_exception_only(*(sys.exc_info()[:2]))
+
+    apology = """Yikes, the program threw an unexpected exception!
+
+Hopefully a complete report has been saved to transitfeedcrash.txt,
+though if you are seeing this message we've already disappointed you once
+today. Please include the report in a new issue at
+http://code.google.com/p/googletransitdatafeed/issues/entry
+or an email to the public group googletransitdatafeed@googlegroups.com. Sorry!
+
+"""
+    dashes = '%s\n' % ('-' * 60)
+    dump = []
+    dump.append(apology)
+    dump.append(dashes)
+    try:
+      import transitfeed
+      dump.append("transitfeed version %s\n\n" % transitfeed.__version__)
+    except NameError:
+      # Oh well, guess we won't put the version in the report
+      pass
+
+    for (frame_obj, filename, line_num, fun_name, context_lines,
+         context_index) in original_trace:
+      dump.append('File "%s", line %d, in %s\n' % (filename, line_num,
+                                                   fun_name))
+      if context_lines:
+        for (i, line) in enumerate(context_lines):
+          if i == context_index:
+            dump.append(' --> %s' % line)
+          else:
+            dump.append('     %s' % line)
+      for local_name, local_val in frame_obj.f_locals.items():
+        try:
+          truncated_val = str(local_val)[0:500]
+        except Exception, e:
+          dump.append('    Exception in str(%s): %s' % (local_name, e))
+        else:
+          if len(truncated_val) >= 500:
+            truncated_val = '%s...' % truncated_val[0:499]
+          dump.append('    %s = %s\n' % (local_name, truncated_val))
+      dump.append('\n')
+
+    dump.append(''.join(formatted_exception))
+
+    open('transitfeedcrash.txt', 'w').write(''.join(dump))
+
+    print ''.join(dump)
+    print
+    print dashes
+    print apology
+
+    try:
+      raw_input('Press enter to continue...')
+    except EOFError:
+      # Ignore stdin being closed. This happens during some tests.
+      pass
+    sys.exit(127)
+
+
+# Pick one of two defaultdict implementations. A native version was added to
+# the collections library in python 2.5. If that is not available use Jason's
+# pure python recipe. He gave us permission to distribute it.
+
+# On Mon, Nov 30, 2009 at 07:27, jason kirtland <jek at discorporate.us> wrote:
+# >
+# > Hi Tom, sure thing!  It's not easy to find on the cookbook site, but the
+# > recipe is under the Python license.
+# >
+# > Cheers,
+# > Jason
+# >
+# > On Thu, Nov 26, 2009 at 3:03 PM, Tom Brown <tom.brown.code@gmail.com> wrote:
+# >
+# >> I would like to include http://code.activestate.com/recipes/523034/ in
+# >> http://code.google.com/p/googletransitdatafeed/wiki/TransitFeedDistribution
+# >> which is distributed under the Apache License, Version 2.0 with Copyright
+# >> Google. May we include your code with a comment in the source pointing at
+# >> the original URL?  Thanks, Tom Brown
+
+try:
+  # Try the native implementation first
+  from collections import defaultdict
+except:
+  # Fallback for python2.4, which didn't include collections.defaultdict
+  class defaultdict(dict):
+    def __init__(self, default_factory=None, *a, **kw):
+      if (default_factory is not None and
+        not hasattr(default_factory, '__call__')):
+        raise TypeError('first argument must be callable')
+      dict.__init__(self, *a, **kw)
+      self.default_factory = default_factory
+    def __getitem__(self, key):
+      try:
+        return dict.__getitem__(self, key)
+      except KeyError:
+        return self.__missing__(key)
+    def __missing__(self, key):
+      if self.default_factory is None:
+        raise KeyError(key)
+      self[key] = value = self.default_factory()
+      return value
+    def __reduce__(self):
+      if self.default_factory is None:
+        args = tuple()
+      else:
+        args = self.default_factory,
+      return type(self), args, None, None, self.items()
+    def copy(self):
+      return self.__copy__()
+    def __copy__(self):
+      return type(self)(self.default_factory, self)
+    def __deepcopy__(self, memo):
+      import copy
+      return type(self)(self.default_factory,
+                        copy.deepcopy(self.items()))
+    def __repr__(self):
+      return 'defaultdict(%s, %s)' % (self.default_factory,
+                                      dict.__repr__(self))
+
+
+
+OUTPUT_ENCODING = 'utf-8'
+
+def EncodeUnicode(text):
+  """
+  Optionally encode text and return it. The result should be safe to print.
+  """
+  if type(text) == type(u''):
+    return text.encode(OUTPUT_ENCODING)
+  else:
+    return text
+
+def IsValidURL(url):
+  """Checks the validity of a URL value."""
+  # TODO: Add more thorough checking of URL
+  return url.startswith(u'http://') or url.startswith(u'https://')
+
+
+def IsValidColor(color):
+  """Checks the validity of a hex color value."""
+  return not re.match('^[0-9a-fA-F]{6}$', color) == None
+
+
+def ColorLuminance(color):
+  """Compute the brightness of an sRGB color using the formula from
+  http://www.w3.org/TR/2000/WD-AERT-20000426#color-contrast.
+
+  Args:
+    color: a string of six hex digits in the format verified by IsValidColor().
+
+  Returns:
+    A floating-point number between 0.0 (black) and 255.0 (white). """
+  r = int(color[0:2], 16)
+  g = int(color[2:4], 16)
+  b = int(color[4:6], 16)
+  return (299*r + 587*g + 114*b) / 1000.0
+
+
+def IsEmpty(value):
+  return value is None or (isinstance(value, basestring) and not value.strip())
+
+
+def FindUniqueId(dic):
+  """Return a string not used as a key in the dictionary dic"""
+  name = str(len(dic))
+  while name in dic:
+    # Use bigger numbers so it is obvious when an id is picked randomly.
+    name = str(random.randint(1000000, 999999999))
+  return name
+
+
+def TimeToSecondsSinceMidnight(time_string):
+  """Convert HHH:MM:SS into seconds since midnight.
+
+  For example "01:02:03" returns 3723. The leading zero of the hours may be
+  omitted. HH may be more than 23 if the time is on the following day."""
+  m = re.match(r'(\d{1,3}):([0-5]\d):([0-5]\d)$', time_string)
+  # ignored: matching for leap seconds
+  if not m:
+    raise problems.Error, 'Bad HH:MM:SS "%s"' % time_string
+  return int(m.group(1)) * 3600 + int(m.group(2)) * 60 + int(m.group(3))
+
+
+def FormatSecondsSinceMidnight(s):
+  """Formats an int number of seconds past midnight into a string
+  as "HH:MM:SS"."""
+  return "%02d:%02d:%02d" % (s / 3600, (s / 60) % 60, s % 60)
+
+
+def DateStringToDateObject(date_string):
+  """Return a date object for a string "YYYYMMDD"."""
+  # If this becomes a bottleneck date objects could be cached
+  return datetime.date(int(date_string[0:4]), int(date_string[4:6]),
+                       int(date_string[6:8]))
+
+
+def FloatStringToFloat(float_string, problems=None):
+  """Convert a float as a string to a float or raise an exception"""
+  # Will raise TypeError unless a string
+  match = re.match(r"^[+-]?\d+(\.\d+)?$", float_string)
+  # Will raise TypeError if the string can't be parsed
+  parsed_value = float(float_string)
+
+  if "x" in float_string:
+    # This is needed because Python 2.4 does not complain about float("0x20").
+    # But it does complain about float("0b10"), so this should be enough.
+    raise ValueError()
+
+  if not match and problems is not None:
+    # Does not match the regex, but it's a float according to Python
+    problems.InvalidFloatValue(float_string)
+  return parsed_value
+
+def NonNegIntStringToInt(int_string, problems=None):
+  """Convert an non-negative integer string to an int or raise an exception"""
+  # Will raise TypeError unless a string
+  match = re.match(r"^(?:0|[1-9]\d*)$", int_string)
+  # Will raise ValueError if the string can't be parsed
+  parsed_value = int(int_string)
+
+  if parsed_value < 0:
+    raise ValueError()
+  elif not match and problems is not None:
+    # Does not match the regex, but it's an int according to Python
+    problems.InvalidNonNegativeIntegerValue(int_string)
+
+  return parsed_value
+
+EARTH_RADIUS = 6378135          # in meters
+def ApproximateDistance(degree_lat1, degree_lng1, degree_lat2, degree_lng2):
+  """Compute approximate distance between two points in meters. Assumes the
+  Earth is a sphere."""
+  # TODO: change to ellipsoid approximation, such as
+  # http://www.codeguru.com/Cpp/Cpp/algorithms/article.php/c5115/
+  lat1 = math.radians(degree_lat1)
+  lng1 = math.radians(degree_lng1)
+  lat2 = math.radians(degree_lat2)
+  lng2 = math.radians(degree_lng2)
+  dlat = math.sin(0.5 * (lat2 - lat1))
+  dlng = math.sin(0.5 * (lng2 - lng1))
+  x = dlat * dlat + dlng * dlng * math.cos(lat1) * math.cos(lat2)
+  return EARTH_RADIUS * (2 * math.atan2(math.sqrt(x),
+      math.sqrt(max(0.0, 1.0 - x))))
+
+
+def ApproximateDistanceBetweenStops(stop1, stop2):
+  """Compute approximate distance between two stops in meters. Assumes the
+  Earth is a sphere."""
+  return ApproximateDistance(stop1.stop_lat, stop1.stop_lon,
+                             stop2.stop_lat, stop2.stop_lon)
+
+class CsvUnicodeWriter:
+  """
+  Create a wrapper around a csv writer object which can safely write unicode
+  values. Passes all arguments to csv.writer.
+  """
+  def __init__(self, *args, **kwargs):
+    self.writer = csv.writer(*args, **kwargs)
+
+  def writerow(self, row):
+    """Write row to the csv file. Any unicode strings in row are encoded as
+    utf-8."""
+    encoded_row = []
+    for s in row:
+      if isinstance(s, unicode):
+        encoded_row.append(s.encode("utf-8"))
+      else:
+        encoded_row.append(s)
+    try:
+      self.writer.writerow(encoded_row)
+    except Exception, e:
+      print 'error writing %s as %s' % (row, encoded_row)
+      raise e
+
+  def writerows(self, rows):
+    """Write rows to the csv file. Any unicode strings in rows are encoded as
+    utf-8."""
+    for row in rows:
+      self.writerow(row)
+
+  def __getattr__(self, name):
+    return getattr(self.writer, name)
+
+# Map from literal string that should never be found in the csv data to a human
+# readable description
+INVALID_LINE_SEPARATOR_UTF8 = {
+    "\x0c": "ASCII Form Feed 0x0C",
+    # May be part of end of line, but not found elsewhere
+    "\x0d": "ASCII Carriage Return 0x0D, \\r",
+    "\xe2\x80\xa8": "Unicode LINE SEPARATOR U+2028",
+    "\xe2\x80\xa9": "Unicode PARAGRAPH SEPARATOR U+2029",
+    "\xc2\x85": "Unicode NEXT LINE SEPARATOR U+0085",
+}
+
+class EndOfLineChecker:
+  """Wrapper for a file-like object that checks for consistent line ends.
+
+  The check for consistent end of lines (all CR LF or all LF) only happens if
+  next() is called until it raises StopIteration.
+  """
+  def __init__(self, f, name, problems):
+    """Create new object.
+
+    Args:
+      f: file-like object to wrap
+      name: name to use for f. StringIO objects don't have a name attribute.
+      problems: a ProblemReporterBase object
+    """
+    self._f = f
+    self._name = name
+    self._crlf = 0
+    self._crlf_examples = []
+    self._lf = 0
+    self._lf_examples = []
+    self._line_number = 0  # first line will be number 1
+    self._problems = problems
+
+  def __iter__(self):
+    return self
+
+  def next(self):
+    """Return next line without end of line marker or raise StopIteration."""
+    try:
+      next_line = self._f.next()
+    except StopIteration:
+      self._FinalCheck()
+      raise
+
+    self._line_number += 1
+    m_eol = re.search(r"[\x0a\x0d]*$", next_line)
+    if m_eol.group() == "\x0d\x0a":
+      self._crlf += 1
+      if self._crlf <= 5:
+        self._crlf_examples.append(self._line_number)
+    elif m_eol.group() == "\x0a":
+      self._lf += 1
+      if self._lf <= 5:
+        self._lf_examples.append(self._line_number)
+    elif m_eol.group() == "":
+      # Should only happen at the end of the file
+      try:
+        self._f.next()
+        raise RuntimeError("Unexpected row without new line sequence")
+      except StopIteration:
+        # Will be raised again when EndOfLineChecker.next() is next called
+        pass
+    else:
+      self._problems.InvalidLineEnd(
+        codecs.getencoder('string_escape')(m_eol.group())[0],
+        (self._name, self._line_number))
+    next_line_contents = next_line[0:m_eol.start()]
+    for seq, name in INVALID_LINE_SEPARATOR_UTF8.items():
+      if next_line_contents.find(seq) != -1:
+        self._problems.OtherProblem(
+          "Line contains %s" % name,
+          context=(self._name, self._line_number))
+    return next_line_contents
+
+  def _FinalCheck(self):
+    if self._crlf > 0 and self._lf > 0:
+      crlf_plural = self._crlf > 1 and "s" or ""
+      crlf_lines = ", ".join(["%s" % e for e in self._crlf_examples])
+      if self._crlf > len(self._crlf_examples):
+        crlf_lines += ", ..."
+      lf_plural = self._lf > 1 and "s" or ""
+      lf_lines = ", ".join(["%s" % e for e in self._lf_examples])
+      if self._lf > len(self._lf_examples):
+        lf_lines += ", ..."
+
+      self._problems.OtherProblem(
+          "Found %d CR LF \"\\r\\n\" line end%s (line%s %s) and "
+          "%d LF \"\\n\" line end%s (line%s %s). A file must use a "
+          "consistent line end." % (self._crlf, crlf_plural, crlf_plural,
+                                   crlf_lines, self._lf, lf_plural,
+                                   lf_plural, lf_lines),
+          (self._name,))
+      # Prevent _FinalCheck() from reporting the problem twice, in the unlikely
+      # case that it is run twice
+      self._crlf = 0
+      self._lf = 0
+
+def SortListOfTripByTime(trips):
+  trips.sort(key=Trip.GetStartTime)
+