gitphp 0.2.9.1 :: bus.git/blame

2010-11-28 11:34:48 maxious
#!/usr/bin/python2.5
 
# Copyright (C) 2009 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
 
import codecs
import csv
import datetime
import math
import optparse
import random
import re
import sys
 
import problems
from trip import Trip
 
class OptionParserLongError(optparse.OptionParser):
  """OptionParser subclass that includes list of options above error message."""
  def error(self, msg):
    print >>sys.stderr, self.format_help()
    print >>sys.stderr, '\n\n%s: error: %s\n\n' % (self.get_prog_name(), msg)
    sys.exit(2)
 
 
def RunWithCrashHandler(f):
  try:
    exit_code = f()
    sys.exit(exit_code)
  except (SystemExit, KeyboardInterrupt):
    raise
  except:
    import inspect
    import traceback
 
    # Save trace and exception now. These calls look at the most recently
    # raised exception. The code that makes the report might trigger other
    # exceptions.
    original_trace = inspect.trace(3)[1:]
    formatted_exception = traceback.format_exception_only(*(sys.exc_info()[:2]))
 
    apology = """Yikes, the program threw an unexpected exception!
 
Hopefully a complete report has been saved to transitfeedcrash.txt,
though if you are seeing this message we've already disappointed you once
today. Please include the report in a new issue at
http://code.google.com/p/googletransitdatafeed/issues/entry
or an email to the public group googletransitdatafeed@googlegroups.com. Sorry!
 
"""
    dashes = '%s\n' % ('-' * 60)
    dump = []
    dump.append(apology)
    dump.append(dashes)
    try:
      import transitfeed
      dump.append("transitfeed version %s\n\n" % transitfeed.__version__)
    except NameError:
      # Oh well, guess we won't put the version in the report
      pass
 
    for (frame_obj, filename, line_num, fun_name, context_lines,
         context_index) in original_trace:
      dump.append('File "%s", line %d, in %s\n' % (filename, line_num,
                                                   fun_name))
      if context_lines:
        for (i, line) in enumerate(context_lines):
          if i == context_index:
            dump.append(' --> %s' % line)
          else:
            dump.append('     %s' % line)
      for local_name, local_val in frame_obj.f_locals.items():
        try:
          truncated_val = str(local_val)[0:500]
        except Exception, e:
          dump.append('    Exception in str(%s): %s' % (local_name, e))
        else:
          if len(truncated_val) >= 500:
            truncated_val = '%s...' % truncated_val[0:499]
          dump.append('    %s = %s\n' % (local_name, truncated_val))
      dump.append('\n')
 
    dump.append(''.join(formatted_exception))
 
    open('transitfeedcrash.txt', 'w').write(''.join(dump))
 
    print ''.join(dump)
    print
    print dashes
    print apology
 
    try:
      raw_input('Press enter to continue...')
    except EOFError:
      # Ignore stdin being closed. This happens during some tests.
      pass
    sys.exit(127)
 
 
# Pick one of two defaultdict implementations. A native version was added to
# the collections library in python 2.5. If that is not available use Jason's
# pure python recipe. He gave us permission to distribute it.
 
# On Mon, Nov 30, 2009 at 07:27, jason kirtland <jek at discorporate.us> wrote:
# >
# > Hi Tom, sure thing!  It's not easy to find on the cookbook site, but the
# > recipe is under the Python license.
# >
# > Cheers,
# > Jason
# >
# > On Thu, Nov 26, 2009 at 3:03 PM, Tom Brown <tom.brown.code@gmail.com> wrote:
# >
# >> I would like to include http://code.activestate.com/recipes/523034/ in
# >> http://code.google.com/p/googletransitdatafeed/wiki/TransitFeedDistribution
# >> which is distributed under the Apache License, Version 2.0 with Copyright
# >> Google. May we include your code with a comment in the source pointing at
# >> the original URL?  Thanks, Tom Brown
 
try:
  # Try the native implementation first
  from collections import defaultdict
except:
  # Fallback for python2.4, which didn't include collections.defaultdict
  class defaultdict(dict):
    def __init__(self, default_factory=None, *a, **kw):
      if (default_factory is not None and
        not hasattr(default_factory, '__call__')):
        raise TypeError('first argument must be callable')
      dict.__init__(self, *a, **kw)
      self.default_factory = default_factory
    def __getitem__(self, key):
      try:
        return dict.__getitem__(self, key)
      except KeyError:
        return self.__missing__(key)
    def __missing__(self, key):
      if self.default_factory is None:
        raise KeyError(key)
      self[key] = value = self.default_factory()
      return value
    def __reduce__(self):
      if self.default_factory is None:
        args = tuple()
      else:
        args = self.default_factory,
      return type(self), args, None, None, self.items()
    def copy(self):
      return self.__copy__()
    def __copy__(self):
      return type(self)(self.default_factory, self)
    def __deepcopy__(self, memo):
      import copy
      return type(self)(self.default_factory,
                        copy.deepcopy(self.items()))
    def __repr__(self):
      return 'defaultdict(%s, %s)' % (self.default_factory,
                                      dict.__repr__(self))
 
 
 
OUTPUT_ENCODING = 'utf-8'
 
def EncodeUnicode(text):
  """
  Optionally encode text and return it. The result should be safe to print.
  """
  if type(text) == type(u''):
    return text.encode(OUTPUT_ENCODING)
  else:
    return text
 
def IsValidURL(url):
  """Checks the validity of a URL value."""
  # TODO: Add more thorough checking of URL
  return url.startswith(u'http://') or url.startswith(u'https://')
 
 
def IsValidColor(color):
  """Checks the validity of a hex color value."""
  return not re.match('^[0-9a-fA-F]{6}$', color) == None
 
 
def ColorLuminance(color):
  """Compute the brightness of an sRGB color using the formula from
  http://www.w3.org/TR/2000/WD-AERT-20000426#color-contrast.
 
  Args:
    color: a string of six hex digits in the format verified by IsValidColor().
 
  Returns:
    A floating-point number between 0.0 (black) and 255.0 (white). """
  r = int(color[0:2], 16)
  g = int(color[2:4], 16)
  b = int(color[4:6], 16)
  return (299*r + 587*g + 114*b) / 1000.0
 
 
def IsEmpty(value):
  return value is None or (isinstance(value, basestring) and not value.strip())
 
 
def FindUniqueId(dic):
  """Return a string not used as a key in the dictionary dic"""
  name = str(len(dic))
  while name in dic:
    # Use bigger numbers so it is obvious when an id is picked randomly.
    name = str(random.randint(1000000, 999999999))
  return name
 
 
def TimeToSecondsSinceMidnight(time_string):
  """Convert HHH:MM:SS into seconds since midnight.
 
  For example "01:02:03" returns 3723. The leading zero of the hours may be
  omitted. HH may be more than 23 if the time is on the following day."""
  m = re.match(r'(\d{1,3}):([0-5]\d):([0-5]\d)$', time_string)
  # ignored: matching for leap seconds
  if not m:
    raise problems.Error, 'Bad HH:MM:SS "%s"' % time_string
  return int(m.group(1)) * 3600 + int(m.group(2)) * 60 + int(m.group(3))
 
 
def FormatSecondsSinceMidnight(s):
  """Formats an int number of seconds past midnight into a string
  as "HH:MM:SS"."""
  return "%02d:%02d:%02d" % (s / 3600, (s / 60) % 60, s % 60)
 
 
def DateStringToDateObject(date_string):
  """Return a date object for a string "YYYYMMDD"."""
  # If this becomes a bottleneck date objects could be cached
  return datetime.date(int(date_string[0:4]), int(date_string[4:6]),
                       int(date_string[6:8]))
 
 
def FloatStringToFloat(float_string, problems=None):
  """Convert a float as a string to a float or raise an exception"""
  # Will raise TypeError unless a string
  match = re.match(r"^[+-]?\d+(\.\d+)?$", float_string)
  # Will raise TypeError if the string can't be parsed
  parsed_value = float(float_string)
 
  if "x" in float_string:
    # This is needed because Python 2.4 does not complain about float("0x20").
    # But it does complain about float("0b10"), so this should be enough.
    raise ValueError()
 
  if not match and problems is not None:
    # Does not match the regex, but it's a float according to Python
    problems.InvalidFloatValue(float_string)
  return parsed_value
 
def NonNegIntStringToInt(int_string, problems=None):
  """Convert an non-negative integer string to an int or raise an exception"""
  # Will raise TypeError unless a string
  match = re.match(r"^(?:0|[1-9]\d*)$", int_string)
  # Will raise ValueError if the string can't be parsed
  parsed_value = int(int_string)
 
  if parsed_value < 0:
    raise ValueError()
  elif not match and problems is not None:
    # Does not match the regex, but it's an int according to Python
    problems.InvalidNonNegativeIntegerValue(int_string)
 
  return parsed_value
 
EARTH_RADIUS = 6378135          # in meters
def ApproximateDistance(degree_lat1, degree_lng1, degree_lat2, degree_lng2):
  """Compute approximate distance between two points in meters. Assumes the
  Earth is a sphere."""
  # TODO: change to ellipsoid approximation, such as
  # http://www.codeguru.com/Cpp/Cpp/algorithms/article.php/c5115/
  lat1 = math.radians(degree_lat1)
  lng1 = math.radians(degree_lng1)
  lat2 = math.radians(degree_lat2)
  lng2 = math.radians(degree_lng2)
  dlat = math.sin(0.5 * (lat2 - lat1))
  dlng = math.sin(0.5 * (lng2 - lng1))
  x = dlat * dlat + dlng * dlng * math.cos(lat1) * math.cos(lat2)
  return EARTH_RADIUS * (2 * math.atan2(math.sqrt(x),
      math.sqrt(max(0.0, 1.0 - x))))
 
 
def ApproximateDistanceBetweenStops(stop1, stop2):
  """Compute approximate distance between two stops in meters. Assumes the
  Earth is a sphere."""
  return ApproximateDistance(stop1.stop_lat, stop1.stop_lon,
                             stop2.stop_lat, stop2.stop_lon)
 
class CsvUnicodeWriter:
  """
  Create a wrapper around a csv writer object which can safely write unicode
  values. Passes all arguments to csv.writer.
  """
  def __init__(self, *args, **kwargs):
    self.writer = csv.writer(*args, **kwargs)
 
  def writerow(self, row):
    """Write row to the csv file. Any unicode strings in row are encoded as
    utf-8."""
    encoded_row = []
    for s in row:
      if isinstance(s, unicode):
        encoded_row.append(s.encode("utf-8"))
      else:
        encoded_row.append(s)
    try:
      self.writer.writerow(encoded_row)
    except Exception, e:
      print 'error writing %s as %s' % (row, encoded_row)
      raise e
 
  def writerows(self, rows):
    """Write rows to the csv file. Any unicode strings in rows are encoded as
    utf-8."""
    for row in rows:
      self.writerow(row)
 
  def __getattr__(self, name):
    return getattr(self.writer, name)
 
# Map from literal string that should never be found in the csv data to a human
# readable description
INVALID_LINE_SEPARATOR_UTF8 = {
    "\x0c": "ASCII Form Feed 0x0C",
    # May be part of end of line, but not found elsewhere
    "\x0d": "ASCII Carriage Return 0x0D, \\r",
    "\xe2\x80\xa8": "Unicode LINE SEPARATOR U+2028",
    "\xe2\x80\xa9": "Unicode PARAGRAPH SEPARATOR U+2029",
    "\xc2\x85": "Unicode NEXT LINE SEPARATOR U+0085",
}
 
class EndOfLineChecker:
  """Wrapper for a file-like object that checks for consistent line ends.
 
  The check for consistent end of lines (all CR LF or all LF) only happens if
  next() is called until it raises StopIteration.
  """
  def __init__(self, f, name, problems):
    """Create new object.
 
    Args:
      f: file-like object to wrap
      name: name to use for f. StringIO objects don't have a name attribute.
      problems: a ProblemReporterBase object
    """
    self._f = f
    self._name = name
    self._crlf = 0
    self._crlf_examples = []
    self._lf = 0
    self._lf_examples = []
    self._line_number = 0  # first line will be number 1
    self._problems = problems
 
  def __iter__(self):
    return self
 
  def next(self):
    """Return next line without end of line marker or raise StopIteration."""
    try:
      next_line = self._f.next()
    except StopIteration:
      self._FinalCheck()
      raise
 
    self._line_number += 1
    m_eol = re.search(r"[\x0a\x0d]*$", next_line)
    if m_eol.group() == "\x0d\x0a":
      self._crlf += 1
      if self._crlf <= 5:
        self._crlf_examples.append(self._line_number)
    elif m_eol.group() == "\x0a":
      self._lf += 1
      if self._lf <= 5:
        self._lf_examples.append(self._line_number)
    elif m_eol.group() == "":
      # Should only happen at the end of the file
      try:
        self._f.next()
        raise RuntimeError("Unexpected row without new line sequence")
      except StopIteration:
        # Will be raised again when EndOfLineChecker.next() is next called
        pass
    else:
      self._problems.InvalidLineEnd(
        codecs.getencoder('string_escape')(m_eol.group())[0],
        (self._name, self._line_number))
    next_line_contents = next_line[0:m_eol.start()]
    for seq, name in INVALID_LINE_SEPARATOR_UTF8.items():
      if next_line_contents.find(seq) != -1:
        self._problems.OtherProblem