From: Alex Sadleir Date: Fri, 28 Feb 2014 05:34:19 +0000 Subject: Fixes to make this work on not-data.gov.uk X-Git-Url: https://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=e56e17d15791ab06ab5fc62d10fedcf302cf1141 --- Fixes to make this work on not-data.gov.uk --- --- a/ckanext/ga_report/command.py +++ b/ckanext/ga_report/command.py @@ -23,7 +23,7 @@ import ckan.model as model model.Session.remove() model.Session.configure(bind=model.meta.engine) - log = logging.getLogger('ckanext.ga-report') + log = logging.getLogger('ckanext.ga_report') import ga_model ga_model.init_tables() @@ -52,9 +52,37 @@ assuming it is correct. """ from ga_auth import init_service - init_service('token.dat', - self.args[0] if self.args - else 'credentials.json') + init_service('token.dat', 'credentials.json') + +class FixTimePeriods(CkanCommand): + """ + Fixes the 'All' records for GA_Urls + + It is possible that older urls that haven't recently been visited + do not have All records. This command will traverse through those + records and generate valid All records for them. + """ + summary = __doc__.split('\n')[0] + usage = __doc__ + max_args = 0 + min_args = 0 + + def __init__(self, name): + super(FixTimePeriods, self).__init__(name) + + def command(self): + import ckan.model as model + from ga_model import post_update_url_stats + self._load_config() + model.Session.remove() + model.Session.configure(bind=model.meta.engine) + + log = logging.getLogger('ckanext.ga_report') + + log.info("Updating 'All' records for old URLs") + post_update_url_stats() + log.info("Processing complete") + class LoadAnalytics(CkanCommand): @@ -85,6 +113,7 @@ default=False, dest='skip_url_stats', help='Skip the download of URL data - just do site-wide stats') + self.token = "" def command(self): self._load_config() @@ -99,14 +128,14 @@ return try: - svc = init_service(ga_token_filepath, None) + self.token, svc = init_service(ga_token_filepath, None) except TypeError: print ('Have you correctly run the getauthtoken task and ' 'specified the correct token file in the CKAN config under ' '"googleanalytics.token.filepath"?') return - downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc), + downloader = DownloadAnalytics(svc, self.token, profile_id=get_profile_id(svc), delete_first=self.options.delete_first, skip_url_stats=self.options.skip_url_stats) --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -1,6 +1,7 @@ import re import csv import sys +import json import logging import operator import collections @@ -13,6 +14,7 @@ log = logging.getLogger('ckanext.ga-report') +DOWNLOADS_AVAILABLE_FROM = '2012-12' def _get_month_name(strdate): import calendar @@ -20,8 +22,12 @@ d = strptime(strdate, '%Y-%m') return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year) - -def _month_details(cls): +def _get_unix_epoch(strdate): + from time import strptime,mktime + d = strptime(strdate, '%Y-%m') + return int(mktime(d)) + +def _month_details(cls, stat_key=None): ''' Returns a list of all the periods for which we have data, unfortunately knows too much about the type of the cls being passed as GA_Url has a @@ -32,9 +38,13 @@ months = [] day = None - vals = model.Session.query(cls.period_name,cls.period_complete_day)\ - .filter(cls.period_name!='All').distinct(cls.period_name)\ - .order_by("period_name desc").all() + q = model.Session.query(cls.period_name,cls.period_complete_day)\ + .filter(cls.period_name!='All').distinct(cls.period_name) + if stat_key: + q= q.filter(cls.stat_name==stat_key) + + vals = q.order_by("period_name desc").all() + if vals and vals[0][1]: day = int(vals[0][1]) ordinal = 'th' if 11 <= day <= 13 \ @@ -66,25 +76,6 @@ for entry in entries: writer.writerow([entry.period_name.encode('utf-8'), entry.stat_name.encode('utf-8'), - entry.key.encode('utf-8'), - entry.value.encode('utf-8')]) - - def csv_downloads(self, month): - import csv - - q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name=='Downloads') - if month != 'all': - q = q.filter(GA_Stat.period_name==month) - entries = q.order_by('GA_Stat.period_name, GA_Stat.key').all() - - response.headers['Content-Type'] = "text/csv; charset=utf-8" - response.headers['Content-Disposition'] = str('attachment; filename=downloads_%s.csv' % (month,)) - - writer = csv.writer(response) - writer.writerow(["Period", "Resource URL", "Count"]) - - for entry in entries: - writer.writerow([entry.period_name.encode('utf-8'), entry.key.encode('utf-8'), entry.value.encode('utf-8')]) @@ -121,11 +112,26 @@ return key, val + # Query historic values for sparkline rendering + sparkline_query = model.Session.query(GA_Stat)\ + .filter(GA_Stat.stat_name=='Totals')\ + .order_by(GA_Stat.period_name) + sparkline_data = {} + for x in sparkline_query: + sparkline_data[x.key] = sparkline_data.get(x.key,[]) + key, val = clean_key(x.key,float(x.value)) + tooltip = '%s: %s' % (_get_month_name(x.period_name), val) + sparkline_data[x.key].append( (tooltip,x.value) ) + # Trim the latest month, as it looks like a huge dropoff + for key in sparkline_data: + sparkline_data[key] = sparkline_data[key][:-1] + c.global_totals = [] if c.month: for e in entries: key, val = clean_key(e.key, e.value) - c.global_totals.append((key, val)) + sparkline = sparkline_data[e.key] + c.global_totals.append((key, val, sparkline)) else: d = collections.defaultdict(list) for e in entries: @@ -135,10 +141,18 @@ v = sum(v) else: v = float(sum(v))/float(len(v)) + sparkline = sparkline_data[k] key, val = clean_key(k,v) - c.global_totals.append((key, val)) - c.global_totals = sorted(c.global_totals, key=operator.itemgetter(0)) + c.global_totals.append((key, val, sparkline)) + # Sort the global totals into a more pleasant order + def sort_func(x): + key = x[0] + total_order = ['Total page views','Total visits','Pages per visit'] + if key in total_order: + return total_order.index(key) + return 999 + c.global_totals = sorted(c.global_totals, key=sort_func) keys = { 'Browser versions': 'browser_versions', @@ -175,12 +189,13 @@ for k, v in keys.iteritems(): q = model.Session.query(GA_Stat).\ - filter(GA_Stat.stat_name==k) + filter(GA_Stat.stat_name==k).\ + order_by(GA_Stat.period_name) + # Buffer the tabular data if c.month: entries = [] q = q.filter(GA_Stat.period_name==c.month).\ order_by('ga_stat.value::int desc') - d = collections.defaultdict(int) for e in q.all(): d[e.key] += int(e.value) @@ -189,46 +204,32 @@ entries.append((key,val,)) entries = sorted(entries, key=operator.itemgetter(1), reverse=True) + # Run a query on all months to gather graph data + graph_query = model.Session.query(GA_Stat).\ + filter(GA_Stat.stat_name==k).\ + order_by(GA_Stat.period_name) + graph_dict = {} + for stat in graph_query: + graph_dict[ stat.key ] = graph_dict.get(stat.key,{ + 'name':stat.key, + 'raw': {} + }) + graph_dict[ stat.key ]['raw'][stat.period_name] = float(stat.value) + stats_in_table = [x[0] for x in entries] + stats_not_in_table = set(graph_dict.keys()) - set(stats_in_table) + stats = stats_in_table + sorted(list(stats_not_in_table)) + graph = [graph_dict[x] for x in stats] + setattr(c, v+'_graph', json.dumps( _to_rickshaw(graph,percentageMode=True) )) + # Get the total for each set of values and then set the value as # a percentage of the total if k == 'Social sources': - total = sum([x for n,x in c.global_totals if n == 'Total visits']) + total = sum([x for n,x,graph in c.global_totals if n == 'Total visits']) else: total = sum([num for _,num in entries]) setattr(c, v, [(k,_percent(v,total)) for k,v in entries ]) return render('ga_report/site/index.html') - - def downloads(self): - - # Get the month details by fetching distinct values and determining the - # month names from the values. - c.months, c.day = _month_details(GA_Stat) - - # Work out which month to show, based on query params of the first item - c.month_desc = 'all months' - c.month = request.params.get('month', '') - if c.month: - c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) - - c.downloads = [] - q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name=='Downloads') - q = q.filter(GA_Stat.period_name==c.month) if c.month else q - q = q.order_by("ga_stat.value::int desc") - - data = collections.defaultdict(int) - for entry in q.all(): - r = model.Session.query(model.Resource).filter(model.Resource.url==entry.key).first() - if not r: - continue - data[r] += int(entry.value) - - for k,v in data.iteritems(): - c.downloads.append((k,v)) - - c.downloads = sorted(c.downloads, key=operator.itemgetter(1), reverse=True) - - return render('ga_report/site/downloads.html') class GaDatasetReport(BaseController): @@ -248,7 +249,9 @@ writer = csv.writer(response) writer.writerow(["Publisher Title", "Publisher Name", "Views", "Visits", "Period Name"]) - for publisher,view,visit in _get_top_publishers(None): + top_publishers = _get_top_publishers(limit=None) + + for publisher,view,visit in top_publishers: writer.writerow([publisher.title.encode('utf-8'), publisher.name.encode('utf-8'), view, @@ -268,19 +271,20 @@ if not c.publisher: abort(404, 'A publisher with that name could not be found') - packages = self._get_packages(c.publisher) + packages = self._get_packages(publisher=c.publisher, month=c.month) response.headers['Content-Type'] = "text/csv; charset=utf-8" response.headers['Content-Disposition'] = \ str('attachment; filename=datasets_%s_%s.csv' % (c.publisher_name, month,)) writer = csv.writer(response) - writer.writerow(["Dataset Title", "Dataset Name", "Views", "Visits", "Period Name"]) - - for package,view,visit in packages: + writer.writerow(["Dataset Title", "Dataset Name", "Views", "Visits", "Resource downloads", "Period Name"]) + + for package,view,visit,downloads in packages: writer.writerow([package.title.encode('utf-8'), package.name.encode('utf-8'), view, visit, + downloads, month]) def publishers(self): @@ -297,14 +301,19 @@ c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) c.top_publishers = _get_top_publishers() - return render('ga_report/publisher/index.html') - - def _get_packages(self, publisher=None, count=-1): + graph_data = _get_top_publishers_graph() + c.top_publishers_graph = json.dumps( _to_rickshaw(graph_data) ) + + x = render('ga_report/publisher/index.html') + + return x + + def _get_packages(self, publisher=None, month='', count=-1): '''Returns the datasets in order of views''' - if count == -1: - count = sys.maxint - - month = c.month or 'All' + have_download_data = True + month = month or 'All' + if month != 'All': + have_download_data = month >= DOWNLOADS_AVAILABLE_FROM q = model.Session.query(GA_Url,model.Package)\ .filter(model.Package.name==GA_Url.package_id)\ @@ -314,9 +323,26 @@ q = q.filter(GA_Url.period_name==month) q = q.order_by('ga_url.pageviews::int desc') top_packages = [] - for entry,package in q.limit(count): + if count == -1: + entries = q.all() + else: + entries = q.limit(count) + + for entry,package in entries: if package: - top_packages.append((package, entry.pageviews, entry.visits)) + # Downloads .... + if have_download_data: + dls = model.Session.query(GA_Stat).\ + filter(GA_Stat.stat_name=='Downloads').\ + filter(GA_Stat.key==package.name) + if month != 'All': # Fetch everything unless the month is specific + dls = dls.filter(GA_Stat.period_name==month) + downloads = 0 + for x in dls: + downloads += int(x.value) + else: + downloads = 'No data' + top_packages.append((package, entry.pageviews, entry.visits, downloads)) else: log.warning('Could not find package associated package') @@ -362,9 +388,73 @@ entry = q.filter(GA_Url.period_name==c.month).first() c.publisher_page_views = entry.pageviews if entry else 0 - c.top_packages = self._get_packages(c.publisher, 20) + c.top_packages = self._get_packages(publisher=c.publisher, count=20, month=c.month) + + # Graph query + top_packages_all_time = self._get_packages(publisher=c.publisher, count=20, month='All') + top_package_names = [ x[0].name for x in top_packages_all_time ] + graph_query = model.Session.query(GA_Url,model.Package)\ + .filter(model.Package.name==GA_Url.package_id)\ + .filter(GA_Url.url.like('/dataset/%'))\ + .filter(GA_Url.package_id.in_(top_package_names)) + all_series = {} + for entry,package in graph_query: + if not package: continue + if entry.period_name=='All': continue + all_series[package.name] = all_series.get(package.name,{ + 'name':package.title, + 'raw': {} + }) + all_series[package.name]['raw'][entry.period_name] = int(entry.pageviews) + graph = [ all_series[series_name] for series_name in top_package_names ] + c.graph_data = json.dumps( _to_rickshaw(graph) ) return render('ga_report/publisher/read.html') + +def _to_rickshaw(data, percentageMode=False): + if data==[]: + return data + # x-axis is every month in c.months. Note that data might not exist + # for entire history, eg. for recently-added datasets + x_axis = [x[0] for x in c.months] + x_axis.reverse() # Ascending order + x_axis = x_axis[:-1] # Remove latest month + totals = {} + for series in data: + series['data'] = [] + for x_string in x_axis: + x = _get_unix_epoch( x_string ) + y = series['raw'].get(x_string,0) + series['data'].append({'x':x,'y':y}) + totals[x] = totals.get(x,0)+y + if not percentageMode: + return data + # Turn all data into percentages + # Roll insignificant series into a catch-all + THRESHOLD = 1 + raw_data = data + data = [] + for series in raw_data: + for point in series['data']: + percentage = (100*float(point['y'])) / totals[point['x']] + if not (series in data) and percentage>THRESHOLD: + data.append(series) + point['y'] = percentage + others = [ x for x in raw_data if not (x in data) ] + if len(others): + data_other = [] + for i in range(len(x_axis)): + x = _get_unix_epoch(x_axis[i]) + y = 0 + for series in others: + y += series['data'][i]['y'] + data_other.append({'x':x,'y':y}) + data.append({ + 'name':'Other', + 'data': data_other + }) + return data + def _get_top_publishers(limit=20): ''' @@ -394,6 +484,46 @@ return top_publishers +def _get_top_publishers_graph(limit=20): + ''' + Returns a list of the top 20 publishers by dataset visits. + (The number to show can be varied with 'limit') + ''' + connection = model.Session.connection() + q = """ + select department_id, sum(pageviews::int) views + from ga_url + where department_id <> '' + and package_id <> '' + and url like '/dataset/%%' + and period_name='All' + group by department_id order by views desc + """ + if limit: + q = q + " limit %s;" % (limit) + + res = connection.execute(q) + department_ids = [ row[0] for row in res ] + + # Query for a history graph of these department ids + q = model.Session.query( + GA_Url.department_id, + GA_Url.period_name, + func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\ + .filter( GA_Url.department_id.in_(department_ids) )\ + .filter( GA_Url.url.like('/dataset/%') )\ + .filter( GA_Url.package_id!='' )\ + .group_by( GA_Url.department_id, GA_Url.period_name ) + graph_dict = {} + for dept_id,period_name,views in q: + graph_dict[dept_id] = graph_dict.get( dept_id, { + 'name' : model.Group.get(dept_id).title, + 'raw' : {} + }) + graph_dict[dept_id]['raw'][period_name] = views + return [ graph_dict[id] for id in department_ids ] + + def _get_publishers(): ''' Returns a list of all publishers. Each item is a tuple: @@ -401,7 +531,7 @@ ''' publishers = [] for pub in model.Session.query(model.Group).\ - filter(model.Group.type=='publisher').\ + filter(model.Group.type=='organization').\ filter(model.Group.state=='active').\ order_by(model.Group.name): publishers.append((pub.name, pub.title)) --- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -1,7 +1,10 @@ import os import logging import datetime +import httplib import collections +import requests +import json from pylons import config from ga_model import _normalize_url import ga_model @@ -18,13 +21,14 @@ class DownloadAnalytics(object): '''Downloads and stores analytics info''' - def __init__(self, service=None, profile_id=None, delete_first=False, + def __init__(self, service=None, token=None, profile_id=None, delete_first=False, skip_url_stats=False): self.period = config['ga-report.period'] self.service = service self.profile_id = profile_id self.delete_first = delete_first self.skip_url_stats = skip_url_stats + self.token = token def specific_month(self, date): import calendar @@ -32,6 +36,11 @@ first_of_this_month = datetime.datetime(date.year, date.month, 1) _, last_day_of_month = calendar.monthrange(int(date.year), int(date.month)) last_of_this_month = datetime.datetime(date.year, date.month, last_day_of_month) + # if this is the latest month, note that it is only up until today + now = datetime.datetime.now() + if now.year == date.year and now.month == date.month: + last_day_of_month = now.day + last_of_this_month = now periods = ((date.strftime(FORMAT_MONTH), last_day_of_month, first_of_this_month, last_of_this_month),) @@ -112,19 +121,23 @@ accountName = config.get('googleanalytics.account') log.info('Downloading analytics for dataset views') - data = self.download(start_date, end_date, '~/%s/dataset/[a-z0-9-_]+' % accountName) + data = self.download(start_date, end_date, '~^/dataset/[a-z0-9-_]+') log.info('Storing dataset views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data, ) log.info('Downloading analytics for publisher views') - data = self.download(start_date, end_date, '~/%s/publisher/[a-z0-9-_]+' % accountName) + data = self.download(start_date, end_date, '~^/organization/[a-z0-9-_]+') log.info('Storing publisher views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data,) - log.info('Aggregating datasets by publisher') + # Make sure the All records are correct. + ga_model.post_update_url_stats() + + log.info('Associating datasets with their publisher') ga_model.update_publisher_stats(period_name) # about 30 seconds. + log.info('Downloading and storing analytics for site-wide stats') self.sitewide_stats( period_name, period_complete_day ) @@ -140,21 +153,31 @@ metrics = 'ga:entrances' sort = '-ga:entrances' - # Supported query params at - # https://developers.google.com/analytics/devguides/reporting/core/v3/reference - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - filters=query, - start_date=start_date, - metrics=metrics, - sort=sort, - dimensions="ga:landingPagePath,ga:socialNetwork", - max_results=10000, - end_date=end_date).execute() + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = dict(ids='ga:' + self.profile_id, + filters=query, + metrics=metrics, + sort=sort, + dimensions="ga:landingPagePath,ga:socialNetwork", + max_results=10000) + + args['start-date'] = start_date + args['end-date'] = end_date + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + + data = collections.defaultdict(list) rows = results.get('rows',[]) for row in rows: - url = _normalize_url('http:/' + row[0]) + url = row[0] data[url].append( (row[1], int(row[2]),) ) ga_model.update_social(period_name, data) @@ -169,22 +192,34 @@ # Supported query params at # https://developers.google.com/analytics/devguides/reporting/core/v3/reference - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - filters=query, - start_date=start_date, - metrics=metrics, - sort=sort, - dimensions="ga:pagePath", - max_results=10000, - end_date=end_date).execute() + # https://ga-dev-tools.appspot.com/explorer/ + try: + args = {} + args["sort"] = "-ga:pageviews" + args["max-results"] = 100000 + args["dimensions"] = "ga:pagePath" + args["start-date"] = start_date + args["end-date"] = end_date + args["metrics"] = metrics + args["ids"] = "ga:" + self.profile_id + args["filters"] = query + args["alt"] = "json" + print args + results = self._get_json(args) + + except Exception, e: + log.exception(e) + return dict(url=[]) packages = [] - for entry in results.get('rows'): + log.info("There are %d results" % results['totalResults']) + if results['totalResults'] > 0: + for entry in results.get('rows'): (loc,pageviews,visits) = entry - url = _normalize_url('http:/' + loc) # strips off domain e.g. www.data.gov.uk or data.gov.uk - - if not url.startswith('/dataset/') and not url.startswith('/publisher/'): + #url = _normalize_url('http:/' + loc) # strips off domain e.g. www.data.gov.uk or data.gov.uk + url = loc + #print url + if not url.startswith('/dataset/') and not url.startswith('/organization/'): # filter out strays like: # /data/user/login?came_from=http://data.gov.uk/dataset/os-code-point-open # /403.html?page=/about&from=http://data.gov.uk/publisher/planning-inspectorate @@ -216,25 +251,78 @@ data[key] = data.get(key,0) + result[1] return data + def _get_json(self, params, prev_fail=False): + ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', '')) + if not ga_token_filepath: + print 'ERROR: In the CKAN config you need to specify the filepath of the ' \ + 'Google Analytics token file under key: googleanalytics.token.filepath' + return + + log.info("Trying to refresh our OAuth token") + try: + from ga_auth import init_service + self.token, svc = init_service(ga_token_filepath, None) + log.info("OAuth token refreshed") + except Exception, auth_exception: + log.error("Oauth refresh failed") + log.exception(auth_exception) + return + + try: + headers = {'authorization': 'Bearer ' + self.token} + r = requests.get("https://www.googleapis.com/analytics/v3/data/ga", params=params, headers=headers) + if r.status_code != 200: + log.info("STATUS: %s" % (r.status_code,)) + log.info("CONTENT: %s" % (r.content,)) + raise Exception("Request with params: %s failed" % params) + + return json.loads(r.content) + except Exception, e: + log.exception(e) + + return dict(url=[]) + def _totals_stats(self, start_date, end_date, period_name, period_complete_day): """ Fetches distinct totals, total pageviews etc """ - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - start_date=start_date, - metrics='ga:pageviews', - sort='-ga:pageviews', - max_results=10000, - end_date=end_date).execute() + try: + args = {} + args["max-results"] = 100000 + args["start-date"] = start_date + args["end-date"] = end_date + args["ids"] = "ga:" + self.profile_id + + args["metrics"] = "ga:pageviews" + args["sort"] = "-ga:pageviews" + args["alt"] = "json" + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + result_data = results.get('rows') ga_model.update_sitewide_stats(period_name, "Totals", {'Total page views': result_data[0][0]}, period_complete_day) - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - start_date=start_date, - metrics='ga:pageviewsPerVisit,ga:avgTimeOnSite,ga:percentNewVisits,ga:visits', - max_results=10000, - end_date=end_date).execute() + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = {} + args["max-results"] = 100000 + args["start-date"] = start_date + args["end-date"] = end_date + args["ids"] = "ga:" + self.profile_id + + args["metrics"] = "ga:pageviewsPerVisit,ga:avgTimeOnSite,ga:percentNewVisits,ga:visits" + args["alt"] = "json" + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + result_data = results.get('rows') data = { 'Pages per visit': result_data[0][0], @@ -245,16 +333,29 @@ ga_model.update_sitewide_stats(period_name, "Totals", data, period_complete_day) # Bounces from / or another configurable page. - path = '/%s%s' % (config.get('googleanalytics.account'), - config.get('ga-report.bounce_url', '/')) - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - filters='ga:pagePath==%s' % (path,), - start_date=start_date, - metrics='ga:visitBounceRate', - dimensions='ga:pagePath', - max_results=10000, - end_date=end_date).execute() + path = '/' #% (config.get('googleanalytics.account'), config.get('ga-report.bounce_url', '/')) + + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = {} + args["max-results"] = 100000 + args["start-date"] = start_date + args["end-date"] = end_date + args["ids"] = "ga:" + self.profile_id + + args["filters"] = 'ga:pagePath==%s' % (path,) + args["dimensions"] = 'ga:pagePath' + args["metrics"] = "ga:visitBounceRate" + args["alt"] = "json" + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + result_data = results.get('rows') if not result_data or len(result_data) != 1: log.error('Could not pinpoint the bounces for path: %s. Got results: %r', @@ -270,14 +371,28 @@ def _locale_stats(self, start_date, end_date, period_name, period_complete_day): """ Fetches stats about language and country """ - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - start_date=start_date, - metrics='ga:pageviews', - sort='-ga:pageviews', - dimensions="ga:language,ga:country", - max_results=10000, - end_date=end_date).execute() + + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = {} + args["max-results"] = 100000 + args["start-date"] = start_date + args["end-date"] = end_date + args["ids"] = "ga:" + self.profile_id + + args["dimensions"] = "ga:language,ga:country" + args["metrics"] = "ga:pageviews" + args["sort"] = "-ga:pageviews" + args["alt"] = "json" + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + result_data = results.get('rows') data = {} for result in result_data: @@ -293,34 +408,116 @@ def _download_stats(self, start_date, end_date, period_name, period_complete_day): - """ Fetches stats about language and country """ - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - start_date=start_date, - filters='ga:eventAction==download', - metrics='ga:totalEvents', - sort='-ga:totalEvents', - dimensions="ga:eventLabel", - max_results=10000, - end_date=end_date).execute() - result_data = results.get('rows') - # [[url, count], [url],count] - data = {} - for result in result_data: - data[result[0]] = data.get(result[0], 0) + int(result[1]) + """ Fetches stats about data downloads """ + import ckan.model as model + + data = {} + + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = {} + args["max-results"] = 100000 + args["start-date"] = start_date + args["end-date"] = end_date + args["ids"] = "ga:" + self.profile_id + + args["filters"] = 'ga:eventAction==download' + args["dimensions"] = "ga:eventLabel" + args["metrics"] = "ga:totalEvents" + args["alt"] = "json" + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + + result_data = results.get('rows') + if not result_data: + # We may not have data for this time period, so we need to bail + # early. + log.info("There is no download data for this time period") + return + + def process_result_data(result_data, cached=False): + progress_total = len(result_data) + progress_count = 0 + resources_not_matched = [] + for result in result_data: + progress_count += 1 + if progress_count % 100 == 0: + log.debug('.. %d/%d done so far', progress_count, progress_total) + + url = result[0].strip() + + # Get package id associated with the resource that has this URL. + q = model.Session.query(model.Resource) + if cached: + r = q.filter(model.Resource.cache_url.like("%s%%" % url)).first() + else: + r = q.filter(model.Resource.url.like("%s%%" % url)).first() + + package_name = r.resource_group.package.name if r else "" + if package_name: + data[package_name] = data.get(package_name, 0) + int(result[1]) + else: + resources_not_matched.append(url) + continue + if resources_not_matched: + log.debug('Could not match %i or %i resource URLs to datasets. e.g. %r', + len(resources_not_matched), progress_total, resources_not_matched[:3]) + + log.info('Associating downloads of resource URLs with their respective datasets') + process_result_data(results.get('rows')) + + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = dict( ids='ga:' + self.profile_id, + filters='ga:eventAction==download-cache', + metrics='ga:totalEvents', + sort='-ga:totalEvents', + dimensions="ga:eventLabel", + max_results=10000) + args['start-date'] = start_date + args['end-date'] = end_date + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + + log.info('Associating downloads of cache resource URLs with their respective datasets') + process_result_data(results.get('rows'), cached=False) + self._filter_out_long_tail(data, MIN_DOWNLOADS) ga_model.update_sitewide_stats(period_name, "Downloads", data, period_complete_day) def _social_stats(self, start_date, end_date, period_name, period_complete_day): """ Finds out which social sites people are referred from """ - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - start_date=start_date, - metrics='ga:pageviews', - sort='-ga:pageviews', - dimensions="ga:socialNetwork,ga:referralPath", - max_results=10000, - end_date=end_date).execute() + + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = dict( ids='ga:' + self.profile_id, + metrics='ga:pageviews', + sort='-ga:pageviews', + dimensions="ga:socialNetwork,ga:referralPath", + max_results=10000) + args['start-date'] = start_date + args['end-date'] = end_date + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + result_data = results.get('rows') data = {} for result in result_data: @@ -332,14 +529,24 @@ def _os_stats(self, start_date, end_date, period_name, period_complete_day): """ Operating system stats """ - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - start_date=start_date, - metrics='ga:pageviews', - sort='-ga:pageviews', - dimensions="ga:operatingSystem,ga:operatingSystemVersion", - max_results=10000, - end_date=end_date).execute() + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = dict( ids='ga:' + self.profile_id, + metrics='ga:pageviews', + sort='-ga:pageviews', + dimensions="ga:operatingSystem,ga:operatingSystemVersion", + max_results=10000) + args['start-date'] = start_date + args['end-date'] = end_date + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + result_data = results.get('rows') data = {} for result in result_data: @@ -357,14 +564,27 @@ def _browser_stats(self, start_date, end_date, period_name, period_complete_day): """ Information about browsers and browser versions """ - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - start_date=start_date, - metrics='ga:pageviews', - sort='-ga:pageviews', - dimensions="ga:browser,ga:browserVersion", - max_results=10000, - end_date=end_date).execute() + + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = dict( ids='ga:' + self.profile_id, + metrics='ga:pageviews', + sort='-ga:pageviews', + dimensions="ga:browser,ga:browserVersion", + max_results=10000) + + args['start-date'] = start_date + args['end-date'] = end_date + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + + result_data = results.get('rows') # e.g. [u'Firefox', u'19.0', u'20'] @@ -406,14 +626,24 @@ def _mobile_stats(self, start_date, end_date, period_name, period_complete_day): """ Info about mobile devices """ - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - start_date=start_date, - metrics='ga:pageviews', - sort='-ga:pageviews', - dimensions="ga:mobileDeviceBranding, ga:mobileDeviceInfo", - max_results=10000, - end_date=end_date).execute() + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = dict( ids='ga:' + self.profile_id, + metrics='ga:pageviews', + sort='-ga:pageviews', + dimensions="ga:mobileDeviceBranding, ga:mobileDeviceInfo", + max_results=10000) + args['start-date'] = start_date + args['end-date'] = end_date + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + result_data = results.get('rows') data = {} --- a/ckanext/ga_report/ga_auth.py +++ b/ckanext/ga_report/ga_auth.py @@ -36,7 +36,7 @@ credentials = _prepare_credentials(token_file, credentials_file) http = credentials.authorize(http) # authorize the http object - return build('analytics', 'v3', http=http) + return credentials.access_token, build('analytics', 'v3', http=http) def get_profile_id(service): --- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -114,7 +114,7 @@ >>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices') '/dataset/weekly_fuel_prices' ''' - return '/' + '/'.join(url.split('/')[3:]) + return url #'/' + '/'.join(url.split('/')[3:]) def _get_package_and_publisher(url): @@ -125,12 +125,12 @@ dataset_ref = dataset_match.groups()[0] dataset = model.Package.get(dataset_ref) if dataset: - publisher_groups = dataset.get_groups('publisher') + publisher_groups = dataset.get_groups('organization') if publisher_groups: return dataset_ref,publisher_groups[0].name return dataset_ref, None else: - publisher_match = re.match('/publisher/([^/]+)(/.*)?', url) + publisher_match = re.match('/organization/([^/]+)(/.*)?', url) if publisher_match: return None, publisher_match.groups()[0] return None, None @@ -161,20 +161,64 @@ def pre_update_url_stats(period_name): - log.debug("Deleting '%s' records" % period_name) - model.Session.query(GA_Url).\ - filter(GA_Url.period_name==period_name).delete() - - count = model.Session.query(GA_Url).\ - filter(GA_Url.period_name == 'All').count() - log.debug("Deleting %d 'All' records" % count) - count = model.Session.query(GA_Url).\ - filter(GA_Url.period_name == 'All').delete() - log.debug("Deleted %d 'All' records" % count) + q = model.Session.query(GA_Url).\ + filter(GA_Url.period_name==period_name) + log.debug("Deleting %d '%s' records" % (q.count(), period_name)) + q.delete() + + q = model.Session.query(GA_Url).\ + filter(GA_Url.period_name == 'All') + log.debug("Deleting %d 'All' records..." % q.count()) + q.delete() model.Session.flush() model.Session.commit() model.repo.commit_and_remove() + log.debug('...done') + +def post_update_url_stats(): + + """ Check the distinct url field in ga_url and make sure + it has an All record. If not then create one. + + After running this then every URL should have an All + record regardless of whether the URL has an entry for + the month being currently processed. + """ + log.debug('Post-processing "All" records...') + query = """select url, pageviews::int, visits::int + from ga_url + where url not in (select url from ga_url where period_name ='All')""" + connection = model.Session.connection() + res = connection.execute(query) + + views, visits = {}, {} + # url, views, visits + for row in res: + views[row[0]] = views.get(row[0], 0) + row[1] + visits[row[0]] = visits.get(row[0], 0) + row[2] + + progress_total = len(views.keys()) + progress_count = 0 + for key in views.keys(): + progress_count += 1 + if progress_count % 100 == 0: + log.debug('.. %d/%d done so far', progress_count, progress_total) + + package, publisher = _get_package_and_publisher(key) + + values = {'id': make_uuid(), + 'period_name': "All", + 'period_complete_day': 0, + 'url': key, + 'pageviews': views[key], + 'visits': visits[key], + 'department_id': publisher, + 'package_id': package + } + model.Session.add(GA_Url(**values)) + model.Session.commit() + log.debug('..done') def update_url_stats(period_name, period_complete_day, url_data): @@ -183,9 +227,14 @@ stores them in GA_Url under the period and recalculates the totals for the 'All' period. ''' + progress_total = len(url_data) + progress_count = 0 for url, views, visits in url_data: + progress_count += 1 + if progress_count % 100 == 0: + log.debug('.. %d/%d done so far', progress_count, progress_total) + package, publisher = _get_package_and_publisher(url) - item = model.Session.query(GA_Url).\ filter(GA_Url.period_name==period_name).\ @@ -274,11 +323,11 @@ """ toplevel = get_top_level() publishers = model.Session.query(model.Group).\ - filter(model.Group.type=='publisher').\ + filter(model.Group.type=='organization').\ filter(model.Group.state=='active').all() for publisher in publishers: views, visits, subpub = update_publisher(period_name, publisher, publisher.name) - parent, parents = '', publisher.get_groups('publisher') + parent, parents = '', publisher.get_parent_groups(type='organization') if parents: parent = parents[0].name item = model.Session.query(GA_Publisher).\ @@ -328,15 +377,12 @@ model.Member.table_name == 'group' and \ model.Member.state == 'active').\ filter(model.Member.id==None).\ - filter(model.Group.type=='publisher').\ + filter(model.Group.type=='organization').\ order_by(model.Group.name).all() def get_children(publisher): - '''Finds child publishers for the given publisher (object). (Not recursive)''' - from ckan.model.group import HIERARCHY_CTE - return model.Session.query(model.Group).\ - from_statement(HIERARCHY_CTE).params(id=publisher.id, type='publisher').\ - all() + '''Finds child publishers for the given publisher (object). (Not recursive i.e. returns one level)''' + return publisher.get_children_groups(type='organization') def go_down_tree(publisher): '''Provided with a publisher object, it walks down the hierarchy and yields each publisher, --- a/ckanext/ga_report/helpers.py +++ b/ckanext/ga_report/helpers.py @@ -71,7 +71,7 @@ def single_popular_dataset_html(top=20): dataset_dict = single_popular_dataset(top) groups = package.get('groups', []) - publishers = [ g for g in groups if g.get('type') == 'publisher' ] + publishers = [ g for g in groups if g.get('type') == 'organization' ] publisher = publishers[0] if publishers else {'name':'', 'title': ''} context = { 'dataset': dataset_dict, @@ -80,7 +80,7 @@ return base.render_snippet('ga_report/ga_popular_single.html', **context) -def most_popular_datasets(publisher, count=20): +def most_popular_datasets(publisher, count=20, preview_image=None): if not publisher: _log.error("No valid publisher passed to 'most_popular_datasets'") @@ -92,7 +92,8 @@ 'dataset_count': len(results), 'datasets': results, - 'publisher': publisher + 'publisher': publisher, + 'preview_image': preview_image } return base.render_snippet('ga_report/publisher/popular.html', **ctx) @@ -106,8 +107,18 @@ for entry in entries: if len(datasets) < count: p = model.Package.get(entry.url[len('/dataset/'):]) + + if not p: + _log.warning("Could not find Package for {url}".format(url=entry.url)) + continue + + if not p.state == 'active': + _log.warning("Package {0} is not active, it is {1}".format(p.name, p.state)) + continue + if not p in datasets: datasets[p] = {'views':0, 'visits': 0} + datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews) datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visits) @@ -117,3 +128,17 @@ return sorted(results, key=operator.itemgetter(1), reverse=True) +def month_option_title(month_iso, months, day): + month_isos = [ iso_code for (iso_code,name) in months ] + try: + index = month_isos.index(month_iso) + except ValueError: + _log.error('Month "%s" not found in list of months.' % month_iso) + return month_iso + month_name = months[index][1] + if index==0: + return month_name + (' (up to %s)'%day) + return month_name + + + --- a/ckanext/ga_report/plugin.py +++ b/ckanext/ga_report/plugin.py @@ -5,7 +5,8 @@ from ckanext.ga_report.helpers import (most_popular_datasets, popular_datasets, - single_popular_dataset) + single_popular_dataset, + month_option_title) log = logging.getLogger('ckanext.ga-report') @@ -27,55 +28,56 @@ 'ga_report_installed': lambda: True, 'popular_datasets': popular_datasets, 'most_popular_datasets': most_popular_datasets, - 'single_popular_dataset': single_popular_dataset + 'single_popular_dataset': single_popular_dataset, + 'month_option_title': month_option_title } def after_map(self, map): # GaReport map.connect( - '/data/site-usage', + '/site-usage', controller='ckanext.ga_report.controller:GaReport', action='index' ) map.connect( - '/data/site-usage/data_{month}.csv', + '/site-usage_{month}.csv', controller='ckanext.ga_report.controller:GaReport', action='csv' ) map.connect( - '/data/site-usage/downloads', + '/site-usage/downloads', controller='ckanext.ga_report.controller:GaReport', action='downloads' ) map.connect( - '/data/site-usage/downloads_{month}.csv', + '/site-usage/downloads_{month}.csv', controller='ckanext.ga_report.controller:GaReport', action='csv_downloads' ) # GaDatasetReport map.connect( - '/data/site-usage/publisher', + '/site-usage/publisher', controller='ckanext.ga_report.controller:GaDatasetReport', action='publishers' ) map.connect( - '/data/site-usage/publishers_{month}.csv', + '/site-usage/publishers_{month}.csv', controller='ckanext.ga_report.controller:GaDatasetReport', action='publisher_csv' ) map.connect( - '/data/site-usage/dataset/datasets_{id}_{month}.csv', + '/site-usagesetsets_{id}_{month}.csv', controller='ckanext.ga_report.controller:GaDatasetReport', action='dataset_csv' ) map.connect( - '/data/site-usage/dataset', + '/site-usageset', controller='ckanext.ga_report.controller:GaDatasetReport', action='read' ) map.connect( - '/data/site-usage/dataset/{id}', + '/site-usageset/{id}', controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher' ) --- /dev/null +++ b/ckanext/ga_report/public/css/ga_report.css @@ -1,1 +1,72 @@ +.table-condensed td.sparkline-cell { + padding: 1px 0 0 0; + width: 108px; + text-align: center; + /* Hack to hide the momentary flash of text + * before sparklines are fully rendered */ + font-size: 1px; + color: transparent; + overflow: hidden; +} +.rickshaw_chart_container { + position: relative; + height: 350px; + margin: 0 auto 20px auto; +} +.rickshaw_chart { + position: absolute; + left: 40px; + width: 500px; + top: 0; + bottom: 0; +} +.rickshaw_legend { + background: transparent; + width: 100%; + padding-top: 4px; +} +.rickshaw_y_axis { + position: absolute; + top: 0; + bottom: 0; + width: 40px; +} +.rickshaw_legend .label { + background: transparent !important; + color: #000000 !important; + font-weight: normal !important; +} +.rickshaw_legend .instructions { + color: #000; + margin-bottom: 6px; +} +.rickshaw_legend .line .action { + display: none; +} +.rickshaw_legend .line .swatch { + display: block; + float: left; +} +.rickshaw_legend .line .label { + display: block; + white-space: normal; + float: left; + width: 200px; +} +.rickshaw_legend .line .label:hover { + text-decoration: underline; +} + +.ga-reports-table .td-numeric { + text-align: center; +} +.ga-reports-heading { + padding-right: 10px; + margin-top: 4px; + float: left; +} +.tab-content { + padding-top: 12px; +} + --- /dev/null +++ b/ckanext/ga_report/public/scripts/ckanext_ga_reports.js @@ -1,1 +1,131 @@ +var CKAN = CKAN || {}; +CKAN.GA_Reports = {}; +CKAN.GA_Reports.render_rickshaw = function( css_name, data, mode, colorscheme ) { + var graphLegends = $('#graph-legend-container'); + + function renderError(alertClass,alertText,legendText) { + $("#chart_"+css_name) + .html( '
'+alertText+'
') + .closest('.rickshaw_chart_container').css('height',50); + var myLegend = $('
') + .html(legendText) + .appendTo(graphLegends); + } + + if (!Modernizr.svg) { + renderError('','Your browser does not support vector graphics. No graphs can be rendered.','(Graph cannot be rendered)'); + return; + } + if (data.length==0) { + renderError('alert-info','There is not enough data to render a graph.','(No graph available)'); + return + } + var myLegend = $('
').appendTo(graphLegends); + + var palette = new Rickshaw.Color.Palette( { scheme: colorscheme } ); + $.each(data, function(i, object) { + object['color'] = palette.color(); + }); + // Rickshaw renders the legend in reverse order... + data.reverse(); + + var graphElement = document.querySelector("#chart_"+css_name); + + var graph = new Rickshaw.Graph( { + element: document.querySelector("#chart_"+css_name), + renderer: mode, + series: data , + height: 328 + }); + var x_axis = new Rickshaw.Graph.Axis.Time( { + graph: graph + } ); + var y_axis = new Rickshaw.Graph.Axis.Y( { + graph: graph, + orientation: 'left', + tickFormat: Rickshaw.Fixtures.Number.formatKMBT, + element: document.getElementById('y_axis_'+css_name) + } ); + var legend = new Rickshaw.Graph.Legend( { + element: document.querySelector('#legend_'+css_name), + graph: graph + } ); + var shelving = new Rickshaw.Graph.Behavior.Series.Toggle( { + graph: graph, + legend: legend + } ); + myLegend.prepend('
Click on a series below to isolate its graph:
'); + graph.render(); +}; + +CKAN.GA_Reports.bind_sparklines = function() { + /* + * Bind to the 'totals' tab being on screen, when the + * Sparkline graphs should be drawn. + * Note that they cannot be drawn sooner. + */ + var created = false; + $('a[href="#totals"]').on( + 'shown', + function() { + if (!created) { + var sparkOptions = { + enableTagOptions: true, + type: 'line', + width: 100, + height: 26, + chartRangeMin: 0, + spotColor: '', + maxSpotColor: '', + minSpotColor: '', + highlightSpotColor: '#000000', + lineColor: '#3F8E6D', + fillColor: '#B7E66B' + }; + $('.sparkline').sparkline('html',sparkOptions); + created = true; + } + $.sparkline_display_visible(); + } + ); +}; + +CKAN.GA_Reports.bind_sidebar = function() { + /* + * Bind to changes in the tab behaviour: + * Show the correct rickshaw graph in the sidebar. + * Not to be called before all graphs load. + */ + $('a[data-toggle="tab"]').on( + 'shown', + function(e) { + var href = $(e.target).attr('href'); + var pane = $(href); + if (!pane.length) { console.err('bad href',href); return; } + var legend_name = "none"; + var graph = pane.find('.rickshaw_chart'); + if (graph.length) { + legend_name = graph.attr('id').replace('chart_',''); + } + legend_name = '#legend_'+legend_name; + $('#graph-legend-container > *').hide(); + $('#graph-legend-container .instructions').show(); + $(legend_name).show(); + } + ); + /* The first tab might already have been shown */ + $('li.active > a[data-toggle="tab"]').trigger('shown'); +}; + +CKAN.GA_Reports.bind_month_selector = function() { + var handler = function(e) { + var target = $(e.delegateTarget); + var form = target.closest('form'); + var url = form.attr('action')+'?month='+target.val()+window.location.hash; + window.location = url; + }; + var selectors = $('select[name="month"]'); + selectors.bind('change', handler); +}; + --- /dev/null +++ b/ckanext/ga_report/public/scripts/modernizr-2.6.2.custom.js @@ -1,1 +1,815 @@ - +/* Modernizr 2.6.2 (Custom Build) | MIT & BSD + * Build: http://modernizr.com/download/#-fontface-backgroundsize-borderimage-borderradius-boxshadow-flexbox-hsla-multiplebgs-opacity-rgba-textshadow-cssanimations-csscolumns-generatedcontent-cssgradients-cssreflections-csstransforms-csstransforms3d-csstransitions-applicationcache-canvas-canvastext-draganddrop-hashchange-history-audio-video-indexeddb-input-inputtypes-localstorage-postmessage-sessionstorage-websockets-websqldatabase-webworkers-geolocation-inlinesvg-smil-svg-svgclippaths-touch-webgl-shiv-cssclasses-addtest-prefixed-teststyles-testprop-testallprops-hasevent-prefixes-domprefixes-load + */ +; + + + +window.Modernizr = (function( window, document, undefined ) { + + var version = '2.6.2', + + Modernizr = {}, + + enableClasses = true, + + docElement = document.documentElement, + + mod = 'modernizr', + modElem = document.createElement(mod), + mStyle = modElem.style, + + inputElem = document.createElement('input') , + + smile = ':)', + + toString = {}.toString, + + prefixes = ' -webkit- -moz- -o- -ms- '.split(' '), + + + + omPrefixes = 'Webkit Moz O ms', + + cssomPrefixes = omPrefixes.split(' '), + + domPrefixes = omPrefixes.toLowerCase().split(' '), + + ns = {'svg': 'http://www.w3.org/2000/svg'}, + + tests = {}, + inputs = {}, + attrs = {}, + + classes = [], + + slice = classes.slice, + + featureName, + + + injectElementWithStyles = function( rule, callback, nodes, testnames ) { + + var style, ret, node, docOverflow, + div = document.createElement('div'), + body = document.body, + fakeBody = body || document.createElement('body'); + + if ( parseInt(nodes, 10) ) { + while ( nodes-- ) { + node = document.createElement('div'); + node.id = testnames ? testnames[nodes] : mod + (nodes + 1); + div.appendChild(node); + } + } + + style = ['­',''].join(''); + div.id = mod; + (body ? div : fakeBody).innerHTML += style; + fakeBody.appendChild(div); + if ( !body ) { + fakeBody.style.background = ''; + fakeBody.style.overflow = 'hidden'; + docOverflow = docElement.style.overflow; + docElement.style.overflow = 'hidden'; + docElement.appendChild(fakeBody); + } + + ret = callback(div, rule); + if ( !body ) { + fakeBody.parentNode.removeChild(fakeBody); + docElement.style.overflow = docOverflow; + } else { + div.parentNode.removeChild(div); + } + + return !!ret; + + }, + + + + isEventSupported = (function() { + + var TAGNAMES = { + 'select': 'input', 'change': 'input', + 'submit': 'form', 'reset': 'form', + 'error': 'img', 'load': 'img', 'abort': 'img' + }; + + function isEventSupported( eventName, element ) { + + element = element || document.createElement(TAGNAMES[eventName] || 'div'); + eventName = 'on' + eventName; + + var isSupported = eventName in element; + + if ( !isSupported ) { + if ( !element.setAttribute ) { + element = document.createElement('div'); + } + if ( element.setAttribute && element.removeAttribute ) { + element.setAttribute(eventName, ''); + isSupported = is(element[eventName], 'function'); + + if ( !is(element[eventName], 'undefined') ) { + element[eventName] = undefined; + } + element.removeAttribute(eventName); + } + } + + element = null; + return isSupported; + } + return isEventSupported; + })(), + + + _hasOwnProperty = ({}).hasOwnProperty, hasOwnProp; + + if ( !is(_hasOwnProperty, 'undefined') && !is(_hasOwnProperty.call, 'undefined') ) { + hasOwnProp = function (object, property) { + return _hasOwnProperty.call(object, property); + }; + } + else