gitphp 0.2.9.1 :: bus.git/blobdiff

#!/usr/bin/python2.5

#

# Licensed under the Apache License, Version 2.0 (the "License");

# you may not use this file except in compliance with the License.

# You may obtain a copy of the License at

#

# http://www.apache.org/licenses/LICENSE-2.0

#

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

"""Validates a GTFS file.

For usage information run feedvalidator.py --help

"""

import bisect

import codecs

import datetime

from transitfeed.util import defaultdict

import optparse

import os

import os.path

import re

import socket

import sys

import time

import transitfeed

from transitfeed import TYPE_ERROR, TYPE_WARNING

from urllib2 import Request, urlopen, HTTPError, URLError

from transitfeed import util

import webbrowser

SVN_TAG_URL = 'http://googletransitdatafeed.googlecode.com/svn/tags/'

def MaybePluralizeWord(count, word):

if count == 1:

return word

else:

return word + 's'

def PrettyNumberWord(count, word):

return '%d %s' % (count, MaybePluralizeWord(count, word))

def UnCamelCase(camel):

return re.sub(r'([a-z])([A-Z])', r'\1 \2', camel)

def ProblemCountText(error_count, warning_count):

results = []

if error_count:

results.append(PrettyNumberWord(error_count, 'error'))

if warning_count:

results.append(PrettyNumberWord(warning_count, 'warning'))

return ' and '.join(results)

def CalendarSummary(schedule):

today = datetime.date.today()

summary_end_date = today + datetime.timedelta(days=60)

start_date, end_date = schedule.GetDateRange()

if not start_date or not end_date:

return {}

try:

start_date_object = transitfeed.DateStringToDateObject(start_date)

end_date_object = transitfeed.DateStringToDateObject(end_date)

except ValueError:

return {}

# Get the list of trips only during the period the feed is active.

# As such we have to check if it starts in the future and/or if

# if it ends in less than 60 days.

date_trips_departures = schedule.GenerateDateTripsDeparturesList(

max(today, start_date_object),

min(summary_end_date, end_date_object))

if not date_trips_departures:

return {}

# Check that the dates which will be shown in summary agree with these

# calculations. Failure implies a bug which should be fixed. It isn't good

# for users to discover assertion failures but means it will likely be fixed.

assert start_date <= date_trips_departures[0][0].strftime("%Y%m%d")

assert end_date >= date_trips_departures[-1][0].strftime("%Y%m%d")

# Generate a map from int number of trips in a day to a list of date objects

# with that many trips. The list of dates is sorted.

trips_dates = defaultdict(lambda: [])

trips = 0

for date, day_trips, day_departures in date_trips_departures:

trips += day_trips

trips_dates[day_trips].append(date)

mean_trips = trips / len(date_trips_departures)

max_trips = max(trips_dates.keys())

min_trips = min(trips_dates.keys())

calendar_summary = {}

calendar_summary['mean_trips'] = mean_trips

calendar_summary['max_trips'] = max_trips

calendar_summary['max_trips_dates'] = FormatDateList(trips_dates[max_trips])

calendar_summary['min_trips'] = min_trips

calendar_summary['min_trips_dates'] = FormatDateList(trips_dates[min_trips])

calendar_summary['date_trips_departures'] = date_trips_departures

calendar_summary['date_summary_range'] = "%s to %s" % (

date_trips_departures[0][0].strftime("%a %b %d"),

date_trips_departures[-1][0].strftime("%a %b %d"))

return calendar_summary

def FormatDateList(dates):

if not dates:

return "0 service dates"

formatted = [d.strftime("%a %b %d") for d in dates[0:3]]

if len(dates) > 3:

formatted.append("...")

return "%s (%s)" % (PrettyNumberWord(len(dates), "service date"),

", ".join(formatted))

def MaxVersion(versions):

versions = filter(None, versions)

versions.sort(lambda x,y: -cmp([int(item) for item in x.split('.')],

[int(item) for item in y.split('.')]))

if len(versions) > 0:

return versions[0]

class CountingConsoleProblemAccumulator(transitfeed.SimpleProblemAccumulator):

def __init__(self):

self._error_count = 0

self._warning_count = 0

def _Report(self, e):

transitfeed.SimpleProblemAccumulator._Report(self, e)

if e.IsError():

self._error_count += 1

else:

self._warning_count += 1

def ErrorCount(self):

return self._error_count

def WarningCount(self):

return self._warning_count

def FormatCount(self):

return ProblemCountText(self.ErrorCount(), self.WarningCount())

def HasIssues(self):

return self.ErrorCount() or self.WarningCount()

class BoundedProblemList(object):

"""A list of one type of ExceptionWithContext objects with bounded size."""

def __init__(self, size_bound):

self._count = 0

self._exceptions = []

self._size_bound = size_bound

def Add(self, e):

self._count += 1

try:

bisect.insort(self._exceptions, e)

except TypeError:

# The base class ExceptionWithContext raises this exception in __cmp__

# to signal that an object is not comparable. Instead of keeping the most

# significant issue keep the first reported.

if self._count <= self._size_bound:

self._exceptions.append(e)

else:

# self._exceptions is in order. Drop the least significant if the list is

# now too long.

if self._count > self._size_bound:

del self._exceptions[-1]

def _GetDroppedCount(self):

return self._count - len(self._exceptions)

def __repr__(self):

return "<BoundedProblemList %s>" % repr(self._exceptions)

count = property(lambda s: s._count)

dropped_count = property(_GetDroppedCount)

problems = property(lambda s: s._exceptions)

class LimitPerTypeProblemAccumulator(transitfeed.ProblemAccumulatorInterface):

def __init__(self, limit_per_type):

# {TYPE_WARNING: {"ClassName": BoundedProblemList()}}

self._type_to_name_to_problist = {

TYPE_WARNING: defaultdict(lambda: BoundedProblemList(limit_per_type)),

TYPE_ERROR: defaultdict(lambda: BoundedProblemList(limit_per_type))

}

def HasIssues(self):

return (self._type_to_name_to_problist[TYPE_ERROR] or

self._type_to_name_to_problist[TYPE_WARNING])

def _Report(self, e):

self._type_to_name_to_problist[e.GetType()][e.__class__.__name__].Add(e)

def ErrorCount(self):

error_sets = self._type_to_name_to_problist[TYPE_ERROR].values()

return sum(map(lambda v: v.count, error_sets))

def WarningCount(self):

warning_sets = self._type_to_name_to_problist[TYPE_WARNING].values()

return sum(map(lambda v: v.count, warning_sets))

def ProblemList(self, problem_type, class_name):

"""Return the BoundedProblemList object for given type and class."""

return self._type_to_name_to_problist[problem_type][class_name]

def ProblemListMap(self, problem_type):

"""Return the map from class name to BoundedProblemList object."""

return self._type_to_name_to_problist[problem_type]

class HTMLCountingProblemAccumulator(LimitPerTypeProblemAccumulator):

def FormatType(self, f, level_name, class_problist):

"""Write the HTML dumping all problems of one type.

Args:

f: file object open for writing

level_name: string such as "Error" or "Warning"

class_problist: sequence of tuples (class name,

BoundedProblemList object)

"""

class_problist.sort()

output = []

for classname, problist in class_problist:

output.append('<h4 class="issueHeader"><a name="%s%s">%s</a></h4><ul>\n' %

(level_name, classname, UnCamelCase(classname)))

for e in problist.problems:

self.FormatException(e, output)

if problist.dropped_count:

output.append('<li>and %d more of this type.' %

(problist.dropped_count))

output.append('</ul>\n')

f.write(''.join(output))

def FormatTypeSummaryTable(self, level_name, name_to_problist):

"""Return an HTML table listing the number of problems by class name.

Args:

level_name: string such as "Error" or "Warning"

name_to_problist: dict mapping class name to an BoundedProblemList object

Returns:

HTML in a string

"""

output = []

output.append('<table>')

for classname in sorted(name_to_problist.keys()):

problist = name_to_problist[classname]

human_name = MaybePluralizeWord(problist.count, UnCamelCase(classname))

output.append('<tr><td>%d</td><td><a href="#%s%s">%s</a></td></tr>\n' %

(problist.count, level_name, classname, human_name))

output.append('</table>\n')

return ''.join(output)

def FormatException(self, e, output):

"""Append HTML version of e to list output."""

d = e.GetDictToFormat()

for k in ('file_name', 'feedname', 'column_name'):

if k in d.keys():

d[k] = '<code>%s</code>' % d[k]

problem_text = e.FormatProblem(d).replace('\n', '<br>')

output.append('<li>')

output.append('<div class="problem">%s</div>' %

transitfeed.EncodeUnicode(problem_text))

try:

if hasattr(e, 'row_num'):

line_str = 'line %d of ' % e.row_num

else:

line_str = ''

output.append('in %s<code>%s</code><br>\n' %

(line_str, e.file_name))

row = e.row

headers = e.headers

column_name = e.column_name

table_header = '' # HTML

table_data = '' # HTML

for header, value in zip(headers, row):

attributes = ''

if header == column_name:

attributes = ' class="problem"'

table_header += '<th%s>%s</th>' % (attributes, header)

table_data += '<td%s>%s</td>' % (attributes, value)

# Make sure output is encoded into UTF-8

output.append('<table class="dump"><tr>%s</tr>\n' %

transitfeed.EncodeUnicode(table_header))

output.append('<tr>%s</tr></table>\n' %

transitfeed.EncodeUnicode(table_data))

except AttributeError, e:

pass # Hope this was getting an attribute from e ;-)

output.append('<br></li>\n')

def FormatCount(self):

return ProblemCountText(self.ErrorCount(), self.WarningCount())

def CountTable(self):

output = []

output.append('<table class="count_outside">\n')

output.append('<tr>')

if self.ProblemListMap(TYPE_ERROR):

output.append('<td><span class="fail">%s</span></td>' %

PrettyNumberWord(self.ErrorCount(), "error"))

if self.ProblemListMap(TYPE_WARNING):

output.append('<td><span class="fail">%s</span></td>' %

PrettyNumberWord(self.WarningCount(), "warning"))

output.append('</tr>\n<tr>')

if self.ProblemListMap(TYPE_ERROR):

output.append('<td>\n')

output.append(self.FormatTypeSummaryTable("Error",

self.ProblemListMap(TYPE_ERROR)))

output.append('</td>\n')

if self.ProblemListMap(TYPE_WARNING):

output.append('<td>\n')

output.append(self.FormatTypeSummaryTable("Warning",

self.ProblemListMap(TYPE_WARNING)))

output.append('</td>\n')

output.append('</table>')

return ''.join(output)

def WriteOutput(self, feed_location, f, schedule, other_problems):

"""Write the html output to f."""

if self.HasIssues():

if self.ErrorCount() + self.WarningCount() == 1:

summary = ('<span class="fail">Found this problem:</span>\n%s' %

self.CountTable())

else:

summary = ('<span class="fail">Found these problems:</span>\n%s' %

self.CountTable())

else:

summary = '<span class="pass">feed validated successfully</span>'

if other_problems is not None:

summary = ('<span class="fail">\n%s</span><br><br>' %

other_problems) + summary

basename = os.path.basename(feed_location)

feed_path = (feed_location[:feed_location.rfind(basename)], basename)

agencies = ', '.join(['<a href="%s">%s</a>' % (a.agency_url, a.agency_name)

for a in schedule.GetAgencyList()])

if not agencies:

agencies = '?'

dates = "No valid service dates found"

(start, end) = schedule.GetDateRange()

if start and end:

	#!/usr/bin/python2.5

	# Copyright (C) 2007 Google Inc.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.


	"""Validates a GTFS file.

	For usage information run feedvalidator.py --help
	"""

	import bisect
	import codecs
	import datetime
	from transitfeed.util import defaultdict
	import optparse
	import os
	import os.path
	import re
	import socket
	import sys
	import time
	import transitfeed
	from transitfeed import TYPE_ERROR, TYPE_WARNING
	from urllib2 import Request, urlopen, HTTPError, URLError
	from transitfeed import util
	import webbrowser

	SVN_TAG_URL = 'http://googletransitdatafeed.googlecode.com/svn/tags/'


	def MaybePluralizeWord(count, word):
	if count == 1:
	return word
	else:
	return word + 's'


	def PrettyNumberWord(count, word):
	return '%d %s' % (count, MaybePluralizeWord(count, word))


	def UnCamelCase(camel):
	return re.sub(r'([a-z])([A-Z])', r'\1 \2', camel)


	def ProblemCountText(error_count, warning_count):
	results = []
	if error_count:
	results.append(PrettyNumberWord(error_count, 'error'))
	if warning_count:
	results.append(PrettyNumberWord(warning_count, 'warning'))

	return ' and '.join(results)


	def CalendarSummary(schedule):
	today = datetime.date.today()
	summary_end_date = today + datetime.timedelta(days=60)
	start_date, end_date = schedule.GetDateRange()

	if not start_date or not end_date:
	return {}

	try:
	start_date_object = transitfeed.DateStringToDateObject(start_date)
	end_date_object = transitfeed.DateStringToDateObject(end_date)
	except ValueError:
	return {}

	# Get the list of trips only during the period the feed is active.
	# As such we have to check if it starts in the future and/or if
	# if it ends in less than 60 days.
	date_trips_departures = schedule.GenerateDateTripsDeparturesList(
	max(today, start_date_object),
	min(summary_end_date, end_date_object))

	if not date_trips_departures:
	return {}

	# Check that the dates which will be shown in summary agree with these
	# calculations. Failure implies a bug which should be fixed. It isn't good
	# for users to discover assertion failures but means it will likely be fixed.
	assert start_date <= date_trips_departures[0][0].strftime("%Y%m%d")
	assert end_date >= date_trips_departures[-1][0].strftime("%Y%m%d")

	# Generate a map from int number of trips in a day to a list of date objects
	# with that many trips. The list of dates is sorted.
	trips_dates = defaultdict(lambda: [])
	trips = 0
	for date, day_trips, day_departures in date_trips_departures:
	trips += day_trips
	trips_dates[day_trips].append(date)
	mean_trips = trips / len(date_trips_departures)
	max_trips = max(trips_dates.keys())
	min_trips = min(trips_dates.keys())

	calendar_summary = {}
	calendar_summary['mean_trips'] = mean_trips
	calendar_summary['max_trips'] = max_trips
	calendar_summary['max_trips_dates'] = FormatDateList(trips_dates[max_trips])
	calendar_summary['min_trips'] = min_trips
	calendar_summary['min_trips_dates'] = FormatDateList(trips_dates[min_trips])
	calendar_summary['date_trips_departures'] = date_trips_departures
	calendar_summary['date_summary_range'] = "%s to %s" % (
	date_trips_departures[0][0].strftime("%a %b %d"),
	date_trips_departures[-1][0].strftime("%a %b %d"))

	return calendar_summary


	def FormatDateList(dates):
	if not dates:
	return "0 service dates"

	formatted = [d.strftime("%a %b %d") for d in dates[0:3]]
	if len(dates) > 3:
	formatted.append("...")
	return "%s (%s)" % (PrettyNumberWord(len(dates), "service date"),
	", ".join(formatted))


	def MaxVersion(versions):
	versions = filter(None, versions)
	versions.sort(lambda x,y: -cmp([int(item) for item in x.split('.')],
	[int(item) for item in y.split('.')]))
	if len(versions) > 0:
	return versions[0]


	class CountingConsoleProblemAccumulator(transitfeed.SimpleProblemAccumulator):
	def __init__(self):
	self._error_count = 0
	self._warning_count = 0

	def _Report(self, e):
	transitfeed.SimpleProblemAccumulator._Report(self, e)
	if e.IsError():
	self._error_count += 1
	else:
	self._warning_count += 1

	def ErrorCount(self):
	return self._error_count

	def WarningCount(self):
	return self._warning_count

	def FormatCount(self):
	return ProblemCountText(self.ErrorCount(), self.WarningCount())

	def HasIssues(self):
	return self.ErrorCount() or self.WarningCount()


	class BoundedProblemList(object):
	"""A list of one type of ExceptionWithContext objects with bounded size."""
	def __init__(self, size_bound):
	self._count = 0
	self._exceptions = []
	self._size_bound = size_bound

	def Add(self, e):
	self._count += 1
	try:
	bisect.insort(self._exceptions, e)
	except TypeError:
	# The base class ExceptionWithContext raises this exception in __cmp__
	# to signal that an object is not comparable. Instead of keeping the most
	# significant issue keep the first reported.
	if self._count <= self._size_bound:
	self._exceptions.append(e)
	else:
	# self._exceptions is in order. Drop the least significant if the list is
	# now too long.
	if self._count > self._size_bound:
	del self._exceptions[-1]

	def _GetDroppedCount(self):
	return self._count - len(self._exceptions)

	def __repr__(self):
	return "<BoundedProblemList %s>" % repr(self._exceptions)

	count = property(lambda s: s._count)
	dropped_count = property(_GetDroppedCount)
	problems = property(lambda s: s._exceptions)


	class LimitPerTypeProblemAccumulator(transitfeed.ProblemAccumulatorInterface):
	def __init__(self, limit_per_type):
	# {TYPE_WARNING: {"ClassName": BoundedProblemList()}}
	self._type_to_name_to_problist = {
	TYPE_WARNING: defaultdict(lambda: BoundedProblemList(limit_per_type)),
	TYPE_ERROR: defaultdict(lambda: BoundedProblemList(limit_per_type))
	}

	def HasIssues(self):
	return (self._type_to_name_to_problist[TYPE_ERROR] or
	self._type_to_name_to_problist[TYPE_WARNING])

	def _Report(self, e):
	self._type_to_name_to_problist[e.GetType()][e.__class__.__name__].Add(e)

	def ErrorCount(self):
	error_sets = self._type_to_name_to_problist[TYPE_ERROR].values()
	return sum(map(lambda v: v.count, error_sets))

	def WarningCount(self):
	warning_sets = self._type_to_name_to_problist[TYPE_WARNING].values()
	return sum(map(lambda v: v.count, warning_sets))

	def ProblemList(self, problem_type, class_name):
	"""Return the BoundedProblemList object for given type and class."""
	return self._type_to_name_to_problist[problem_type][class_name]

	def ProblemListMap(self, problem_type):
	"""Return the map from class name to BoundedProblemList object."""
	return self._type_to_name_to_problist[problem_type]


	class HTMLCountingProblemAccumulator(LimitPerTypeProblemAccumulator):
	def FormatType(self, f, level_name, class_problist):
	"""Write the HTML dumping all problems of one type.

	Args:
	f: file object open for writing
	level_name: string such as "Error" or "Warning"
	class_problist: sequence of tuples (class name,
	BoundedProblemList object)
	"""
	class_problist.sort()
	output = []
	for classname, problist in class_problist:
	output.append('<h4 class="issueHeader"><a name="%s%s">%s</a></h4><ul>\n' %
	(level_name, classname, UnCamelCase(classname)))
	for e in problist.problems:
	self.FormatException(e, output)
	if problist.dropped_count:
	output.append('<li>and %d more of this type.' %
	(problist.dropped_count))
	output.append('</ul>\n')
	f.write(''.join(output))

	def FormatTypeSummaryTable(self, level_name, name_to_problist):
	"""Return an HTML table listing the number of problems by class name.

	Args:
	level_name: string such as "Error" or "Warning"
	name_to_problist: dict mapping class name to an BoundedProblemList object

	Returns:
	HTML in a string
	"""
	output = []
	output.append('<table>')
	for classname in sorted(name_to_problist.keys()):
	problist = name_to_problist[classname]
	human_name = MaybePluralizeWord(problist.count, UnCamelCase(classname))
	output.append('<tr><td>%d</td><td><a href="#%s%s">%s</a></td></tr>\n' %
	(problist.count, level_name, classname, human_name))
	output.append('</table>\n')
	return ''.join(output)

	def FormatException(self, e, output):
	"""Append HTML version of e to list output."""
	d = e.GetDictToFormat()
	for k in ('file_name', 'feedname', 'column_name'):
	if k in d.keys():
	d[k] = '<code>%s</code>' % d[k]
	problem_text = e.FormatProblem(d).replace('\n', '<br>')
	output.append('<li>')
	output.append('<div class="problem">%s</div>' %
	transitfeed.EncodeUnicode(problem_text))
	try:
	if hasattr(e, 'row_num'):
	line_str = 'line %d of ' % e.row_num
	else:
	line_str = ''
	output.append('in %s<code>%s</code><br>\n' %
	(line_str, e.file_name))
	row = e.row
	headers = e.headers
	column_name = e.column_name
	table_header = '' # HTML
	table_data = '' # HTML
	for header, value in zip(headers, row):
	attributes = ''
	if header == column_name:
	attributes = ' class="problem"'
	table_header += '<th%s>%s</th>' % (attributes, header)
	table_data += '<td%s>%s</td>' % (attributes, value)
	# Make sure output is encoded into UTF-8
	output.append('<table class="dump"><tr>%s</tr>\n' %
	transitfeed.EncodeUnicode(table_header))
	output.append('<tr>%s</tr></table>\n' %
	transitfeed.EncodeUnicode(table_data))
	except AttributeError, e:
	pass # Hope this was getting an attribute from e ;-)
	output.append('<br></li>\n')

	def FormatCount(self):
	return ProblemCountText(self.ErrorCount(), self.WarningCount())

	def CountTable(self):
	output = []
	output.append('<table class="count_outside">\n')
	output.append('<tr>')
	if self.ProblemListMap(TYPE_ERROR):
	output.append('<td><span class="fail">%s</span></td>' %
	PrettyNumberWord(self.ErrorCount(), "error"))
	if self.ProblemListMap(TYPE_WARNING):
	output.append('<td><span class="fail">%s</span></td>' %
	PrettyNumberWord(self.WarningCount(), "warning"))
	output.append('</tr>\n<tr>')
	if self.ProblemListMap(TYPE_ERROR):
	output.append('<td>\n')
	output.append(self.FormatTypeSummaryTable("Error",
	self.ProblemListMap(TYPE_ERROR)))
	output.append('</td>\n')
	if self.ProblemListMap(TYPE_WARNING):
	output.append('<td>\n')
	output.append(self.FormatTypeSummaryTable("Warning",
	self.ProblemListMap(TYPE_WARNING)))
	output.append('</td>\n')
	output.append('</table>')
	return ''.join(output)

	def WriteOutput(self, feed_location, f, schedule, other_problems):
	"""Write the html output to f."""
	if self.HasIssues():
	if self.ErrorCount() + self.WarningCount() == 1:
	summary = ('<span class="fail">Found this problem:</span>\n%s' %
	self.CountTable())
	else:
	summary = ('<span class="fail">Found these problems:</span>\n%s' %
	self.CountTable())
	else:
	summary = '<span class="pass">feed validated successfully</span>'
	if other_problems is not None:
	summary = ('<span class="fail">\n%s</span><br><br>' %
	other_problems) + summary

	basename = os.path.basename(feed_location)
	feed_path = (feed_location[:feed_location.rfind(basename)], basename)

	agencies = ', '.join(['<a href="%s">%s</a>' % (a.agency_url, a.agency_name)
	for a in schedule.GetAgencyList()])
	if not agencies:
	agencies = '?'

	dates = "No valid service dates found"
	(start, end) = schedule.GetDateRange()
	if start and end: