From: Ross Jones Date: Mon, 07 Oct 2013 08:56:24 +0000 Subject: Force debugging logging for external libs and disables publisher/dataset X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=70a63b7bc385fd1d89aa261bd28fd1064989b2c5 --- Force debugging logging for external libs and disables publisher/dataset Because of the issue in #854, we've temporarily disabled the publisher and datasets views of the site-usage data until we can resolve the issue. --- --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.py[co] *.py~ .gitignore +ckan.log # Packages *.egg --- a/ckanext/ga_report/command.py +++ b/ckanext/ga_report/command.py @@ -23,7 +23,7 @@ import ckan.model as model model.Session.remove() model.Session.configure(bind=model.meta.engine) - log = logging.getLogger('ckanext.ga-report') + log = logging.getLogger('ckanext.ga_report') import ga_model ga_model.init_tables() @@ -55,6 +55,36 @@ init_service('token.dat', self.args[0] if self.args else 'credentials.json') + +class FixTimePeriods(CkanCommand): + """ + Fixes the 'All' records for GA_Urls + + It is possible that older urls that haven't recently been visited + do not have All records. This command will traverse through those + records and generate valid All records for them. + """ + summary = __doc__.split('\n')[0] + usage = __doc__ + max_args = 0 + min_args = 0 + + def __init__(self, name): + super(FixTimePeriods, self).__init__(name) + + def command(self): + import ckan.model as model + from ga_model import post_update_url_stats + self._load_config() + model.Session.remove() + model.Session.configure(bind=model.meta.engine) + + log = logging.getLogger('ckanext.ga_report') + + log.info("Updating 'All' records for old URLs") + post_update_url_stats() + log.info("Processing complete") + class LoadAnalytics(CkanCommand): --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -1,6 +1,7 @@ import re import csv import sys +import json import logging import operator import collections @@ -13,6 +14,7 @@ log = logging.getLogger('ckanext.ga-report') +DOWNLOADS_AVAILABLE_FROM = '2012-12' def _get_month_name(strdate): import calendar @@ -20,8 +22,12 @@ d = strptime(strdate, '%Y-%m') return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year) - -def _month_details(cls): +def _get_unix_epoch(strdate): + from time import strptime,mktime + d = strptime(strdate, '%Y-%m') + return int(mktime(d)) + +def _month_details(cls, stat_key=None): ''' Returns a list of all the periods for which we have data, unfortunately knows too much about the type of the cls being passed as GA_Url has a @@ -32,9 +38,13 @@ months = [] day = None - vals = model.Session.query(cls.period_name,cls.period_complete_day)\ - .filter(cls.period_name!='All').distinct(cls.period_name)\ - .order_by("period_name desc").all() + q = model.Session.query(cls.period_name,cls.period_complete_day)\ + .filter(cls.period_name!='All').distinct(cls.period_name) + if stat_key: + q= q.filter(cls.stat_name==stat_key) + + vals = q.order_by("period_name desc").all() + if vals and vals[0][1]: day = int(vals[0][1]) ordinal = 'th' if 11 <= day <= 13 \ @@ -52,7 +62,7 @@ def csv(self, month): import csv - q = model.Session.query(GA_Stat) + q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name!='Downloads') if month != 'all': q = q.filter(GA_Stat.period_name==month) entries = q.order_by('GA_Stat.period_name, GA_Stat.stat_name, GA_Stat.key').all() @@ -68,6 +78,7 @@ entry.stat_name.encode('utf-8'), entry.key.encode('utf-8'), entry.value.encode('utf-8')]) + def index(self): @@ -101,11 +112,26 @@ return key, val + # Query historic values for sparkline rendering + sparkline_query = model.Session.query(GA_Stat)\ + .filter(GA_Stat.stat_name=='Totals')\ + .order_by(GA_Stat.period_name) + sparkline_data = {} + for x in sparkline_query: + sparkline_data[x.key] = sparkline_data.get(x.key,[]) + key, val = clean_key(x.key,float(x.value)) + tooltip = '%s: %s' % (_get_month_name(x.period_name), val) + sparkline_data[x.key].append( (tooltip,x.value) ) + # Trim the latest month, as it looks like a huge dropoff + for key in sparkline_data: + sparkline_data[key] = sparkline_data[key][:-1] + c.global_totals = [] if c.month: for e in entries: key, val = clean_key(e.key, e.value) - c.global_totals.append((key, val)) + sparkline = sparkline_data[e.key] + c.global_totals.append((key, val, sparkline)) else: d = collections.defaultdict(list) for e in entries: @@ -114,11 +140,19 @@ if k in ['Total page views', 'Total visits']: v = sum(v) else: - v = float(sum(v))/len(v) + v = float(sum(v))/float(len(v)) + sparkline = sparkline_data[k] key, val = clean_key(k,v) - c.global_totals.append((key, val)) - c.global_totals = sorted(c.global_totals, key=operator.itemgetter(0)) + c.global_totals.append((key, val, sparkline)) + # Sort the global totals into a more pleasant order + def sort_func(x): + key = x[0] + total_order = ['Total page views','Total visits','Pages per visit'] + if key in total_order: + return total_order.index(key) + return 999 + c.global_totals = sorted(c.global_totals, key=sort_func) keys = { 'Browser versions': 'browser_versions', @@ -155,12 +189,13 @@ for k, v in keys.iteritems(): q = model.Session.query(GA_Stat).\ - filter(GA_Stat.stat_name==k) + filter(GA_Stat.stat_name==k).\ + order_by(GA_Stat.period_name) + # Buffer the tabular data if c.month: entries = [] q = q.filter(GA_Stat.period_name==c.month).\ order_by('ga_stat.value::int desc') - d = collections.defaultdict(int) for e in q.all(): d[e.key] += int(e.value) @@ -169,10 +204,27 @@ entries.append((key,val,)) entries = sorted(entries, key=operator.itemgetter(1), reverse=True) + # Run a query on all months to gather graph data + graph_query = model.Session.query(GA_Stat).\ + filter(GA_Stat.stat_name==k).\ + order_by(GA_Stat.period_name) + graph_dict = {} + for stat in graph_query: + graph_dict[ stat.key ] = graph_dict.get(stat.key,{ + 'name':stat.key, + 'raw': {} + }) + graph_dict[ stat.key ]['raw'][stat.period_name] = float(stat.value) + stats_in_table = [x[0] for x in entries] + stats_not_in_table = set(graph_dict.keys()) - set(stats_in_table) + stats = stats_in_table + sorted(list(stats_not_in_table)) + graph = [graph_dict[x] for x in stats] + setattr(c, v+'_graph', json.dumps( _to_rickshaw(graph,percentageMode=True) )) + # Get the total for each set of values and then set the value as # a percentage of the total if k == 'Social sources': - total = sum([x for n,x in c.global_totals if n == 'Total visits']) + total = sum([x for n,x,graph in c.global_totals if n == 'Total visits']) else: total = sum([num for _,num in entries]) setattr(c, v, [(k,_percent(v,total)) for k,v in entries ]) @@ -197,7 +249,9 @@ writer = csv.writer(response) writer.writerow(["Publisher Title", "Publisher Name", "Views", "Visits", "Period Name"]) - for publisher,view,visit in _get_top_publishers(None): + top_publishers = _get_top_publishers(limit=None) + + for publisher,view,visit in top_publishers: writer.writerow([publisher.title.encode('utf-8'), publisher.name.encode('utf-8'), view, @@ -217,19 +271,20 @@ if not c.publisher: abort(404, 'A publisher with that name could not be found') - packages = self._get_packages(c.publisher) + packages = self._get_packages(publisher=c.publisher, month=c.month) response.headers['Content-Type'] = "text/csv; charset=utf-8" response.headers['Content-Disposition'] = \ str('attachment; filename=datasets_%s_%s.csv' % (c.publisher_name, month,)) writer = csv.writer(response) - writer.writerow(["Dataset Title", "Dataset Name", "Views", "Visits", "Period Name"]) - - for package,view,visit in packages: + writer.writerow(["Dataset Title", "Dataset Name", "Views", "Visits", "Resource downloads", "Period Name"]) + + for package,view,visit,downloads in packages: writer.writerow([package.title.encode('utf-8'), package.name.encode('utf-8'), view, visit, + downloads, month]) def publishers(self): @@ -246,14 +301,17 @@ c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) c.top_publishers = _get_top_publishers() + graph_data = _get_top_publishers_graph() + c.top_publishers_graph = json.dumps( _to_rickshaw(graph_data) ) + return render('ga_report/publisher/index.html') - def _get_packages(self, publisher=None, count=-1): + def _get_packages(self, publisher=None, month='', count=-1): '''Returns the datasets in order of views''' - if count == -1: - count = sys.maxint - - month = c.month or 'All' + have_download_data = True + month = month or 'All' + if month != 'All': + have_download_data = month >= DOWNLOADS_AVAILABLE_FROM q = model.Session.query(GA_Url,model.Package)\ .filter(model.Package.name==GA_Url.package_id)\ @@ -263,9 +321,26 @@ q = q.filter(GA_Url.period_name==month) q = q.order_by('ga_url.pageviews::int desc') top_packages = [] - for entry,package in q.limit(count): + if count == -1: + entries = q.all() + else: + entries = q.limit(count) + + for entry,package in entries: if package: - top_packages.append((package, entry.pageviews, entry.visits)) + # Downloads .... + if have_download_data: + dls = model.Session.query(GA_Stat).\ + filter(GA_Stat.stat_name=='Downloads').\ + filter(GA_Stat.key==package.name) + if month != 'All': # Fetch everything unless the month is specific + dls = dls.filter(GA_Stat.period_name==month) + downloads = 0 + for x in dls: + downloads += int(x.value) + else: + downloads = 'No data' + top_packages.append((package, entry.pageviews, entry.visits, downloads)) else: log.warning('Could not find package associated package') @@ -311,9 +386,73 @@ entry = q.filter(GA_Url.period_name==c.month).first() c.publisher_page_views = entry.pageviews if entry else 0 - c.top_packages = self._get_packages(c.publisher, 20) + c.top_packages = self._get_packages(publisher=c.publisher, count=20, month=c.month) + + # Graph query + top_packages_all_time = self._get_packages(publisher=c.publisher, count=20, month='All') + top_package_names = [ x[0].name for x in top_packages_all_time ] + graph_query = model.Session.query(GA_Url,model.Package)\ + .filter(model.Package.name==GA_Url.package_id)\ + .filter(GA_Url.url.like('/dataset/%'))\ + .filter(GA_Url.package_id.in_(top_package_names)) + all_series = {} + for entry,package in graph_query: + if not package: continue + if entry.period_name=='All': continue + all_series[package.name] = all_series.get(package.name,{ + 'name':package.title, + 'raw': {} + }) + all_series[package.name]['raw'][entry.period_name] = int(entry.pageviews) + graph = [ all_series[series_name] for series_name in top_package_names ] + c.graph_data = json.dumps( _to_rickshaw(graph) ) return render('ga_report/publisher/read.html') + +def _to_rickshaw(data, percentageMode=False): + if data==[]: + return data + # x-axis is every month in c.months. Note that data might not exist + # for entire history, eg. for recently-added datasets + x_axis = [x[0] for x in c.months] + x_axis.reverse() # Ascending order + x_axis = x_axis[:-1] # Remove latest month + totals = {} + for series in data: + series['data'] = [] + for x_string in x_axis: + x = _get_unix_epoch( x_string ) + y = series['raw'].get(x_string,0) + series['data'].append({'x':x,'y':y}) + totals[x] = totals.get(x,0)+y + if not percentageMode: + return data + # Turn all data into percentages + # Roll insignificant series into a catch-all + THRESHOLD = 1 + raw_data = data + data = [] + for series in raw_data: + for point in series['data']: + percentage = (100*float(point['y'])) / totals[point['x']] + if not (series in data) and percentage>THRESHOLD: + data.append(series) + point['y'] = percentage + others = [ x for x in raw_data if not (x in data) ] + if len(others): + data_other = [] + for i in range(len(x_axis)): + x = _get_unix_epoch(x_axis[i]) + y = 0 + for series in others: + y += series['data'][i]['y'] + data_other.append({'x':x,'y':y}) + data.append({ + 'name':'Other', + 'data': data_other + }) + return data + def _get_top_publishers(limit=20): ''' @@ -343,6 +482,46 @@ return top_publishers +def _get_top_publishers_graph(limit=20): + ''' + Returns a list of the top 20 publishers by dataset visits. + (The number to show can be varied with 'limit') + ''' + connection = model.Session.connection() + q = """ + select department_id, sum(pageviews::int) views + from ga_url + where department_id <> '' + and package_id <> '' + and url like '/dataset/%%' + and period_name='All' + group by department_id order by views desc + """ + if limit: + q = q + " limit %s;" % (limit) + + res = connection.execute(q) + department_ids = [ row[0] for row in res ] + + # Query for a history graph of these department ids + q = model.Session.query( + GA_Url.department_id, + GA_Url.period_name, + func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\ + .filter( GA_Url.department_id.in_(department_ids) )\ + .filter( GA_Url.url.like('/dataset/%') )\ + .filter( GA_Url.package_id!='' )\ + .group_by( GA_Url.department_id, GA_Url.period_name ) + graph_dict = {} + for dept_id,period_name,views in q: + graph_dict[dept_id] = graph_dict.get( dept_id, { + 'name' : model.Group.get(dept_id).title, + 'raw' : {} + }) + graph_dict[dept_id]['raw'][period_name] = views + return [ graph_dict[id] for id in department_ids ] + + def _get_publishers(): ''' Returns a list of all publishers. Each item is a tuple: --- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -1,6 +1,7 @@ import os import logging import datetime +import httplib import collections from pylons import config from ga_model import _normalize_url @@ -8,11 +9,16 @@ #from ga_client import GA +import logging +logger.setLevel(logging.DEBUG) + + log = logging.getLogger('ckanext.ga-report') FORMAT_MONTH = '%Y-%m' MIN_VIEWS = 50 MIN_VISITS = 20 +MIN_DOWNLOADS = 10 class DownloadAnalytics(object): '''Downloads and stores analytics info''' @@ -31,6 +37,11 @@ first_of_this_month = datetime.datetime(date.year, date.month, 1) _, last_day_of_month = calendar.monthrange(int(date.year), int(date.month)) last_of_this_month = datetime.datetime(date.year, date.month, last_day_of_month) + # if this is the latest month, note that it is only up until today + now = datetime.datetime.now() + if now.year == date.year and now.month == date.month: + last_day_of_month = now.day + last_of_this_month = now periods = ((date.strftime(FORMAT_MONTH), last_day_of_month, first_of_this_month, last_of_this_month),) @@ -122,8 +133,12 @@ log.info('Storing publisher views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data,) - log.info('Aggregating datasets by publisher') + # Make sure the All records are correct. + ga_model.post_update_url_stats() + + log.info('Associating datasets with their publisher') ga_model.update_publisher_stats(period_name) # about 30 seconds. + log.info('Downloading and storing analytics for site-wide stats') self.sitewide_stats( period_name, period_complete_day ) @@ -153,7 +168,8 @@ data = collections.defaultdict(list) rows = results.get('rows',[]) for row in rows: - data[_normalize_url(row[0])].append( (row[1], int(row[2]),) ) + url = _normalize_url('http:/' + row[0]) + data[url].append( (row[1], int(row[2]),) ) ga_model.update_social(period_name, data) @@ -167,17 +183,23 @@ # Supported query params at # https://developers.google.com/analytics/devguides/reporting/core/v3/reference - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - filters=query, - start_date=start_date, - metrics=metrics, - sort=sort, - dimensions="ga:pagePath", - max_results=10000, - end_date=end_date).execute() + try: + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + filters=query, + start_date=start_date, + metrics=metrics, + sort=sort, + dimensions="ga:pagePath", + max_results=10000, + end_date=end_date).execute() + except httplib.BadStatusLine: + log.error(u"Failed to download data=> ids: ga:{0}, filters: {1}, start_date: {2}, end_date: {3}, metrics: {4}, sort: {5}, dimensions: ga:pagePath".format( + self.profile_id, query, start_date, end_date, metrics, sort )) + return dict(url=[]) packages = [] + log.info("There are %d results" % results['totalResults']) for entry in results.get('rows'): (loc,pageviews,visits) = entry url = _normalize_url('http:/' + loc) # strips off domain e.g. www.data.gov.uk or data.gov.uk @@ -202,7 +224,7 @@ start_date = '%s-01' % period_name end_date = '%s-%s' % (period_name, last_day_of_month) funcs = ['_totals_stats', '_social_stats', '_os_stats', - '_locale_stats', '_browser_stats', '_mobile_stats'] + '_locale_stats', '_browser_stats', '_mobile_stats', '_download_stats'] for f in funcs: log.info('Downloading analytics for %s' % f.split('_')[1]) getattr(self, f)(start_date, end_date, period_name, period_complete_day) @@ -249,7 +271,7 @@ ids='ga:' + self.profile_id, filters='ga:pagePath==%s' % (path,), start_date=start_date, - metrics='ga:bounces,ga:pageviews', + metrics='ga:visitBounceRate', dimensions='ga:pagePath', max_results=10000, end_date=end_date).execute() @@ -259,10 +281,10 @@ path, result_data) return results = result_data[0] - bounces, total = [float(x) for x in result_data[0][1:]] - pct = 100 * bounces/total - log.info('%d bounces from %d total == %s', bounces, total, pct) - ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': pct}, + bounces = float(results[1]) + # visitBounceRate is already a % + log.info('Google reports visitBounceRate as %s', bounces) + ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': float(bounces)}, period_complete_day) @@ -289,6 +311,74 @@ self._filter_out_long_tail(data, MIN_VIEWS) ga_model.update_sitewide_stats(period_name, "Country", data, period_complete_day) + + def _download_stats(self, start_date, end_date, period_name, period_complete_day): + """ Fetches stats about data downloads """ + import ckan.model as model + + data = {} + + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + start_date=start_date, + filters='ga:eventAction==download', + metrics='ga:totalEvents', + sort='-ga:totalEvents', + dimensions="ga:eventLabel", + max_results=10000, + end_date=end_date).execute() + result_data = results.get('rows') + if not result_data: + # We may not have data for this time period, so we need to bail + # early. + log.info("There is no download data for this time period") + return + + def process_result_data(result_data, cached=False): + progress_total = len(result_data) + progress_count = 0 + resources_not_matched = [] + for result in result_data: + progress_count += 1 + if progress_count % 100 == 0: + log.debug('.. %d/%d done so far', progress_count, progress_total) + + url = result[0].strip() + + # Get package id associated with the resource that has this URL. + q = model.Session.query(model.Resource) + if cached: + r = q.filter(model.Resource.cache_url.like("%s%%" % url)).first() + else: + r = q.filter(model.Resource.url.like("%s%%" % url)).first() + + package_name = r.resource_group.package.name if r else "" + if package_name: + data[package_name] = data.get(package_name, 0) + int(result[1]) + else: + resources_not_matched.append(url) + continue + if resources_not_matched: + log.debug('Could not match %i or %i resource URLs to datasets. e.g. %r', + len(resources_not_matched), progress_total, resources_not_matched[:3]) + + log.info('Associating downloads of resource URLs with their respective datasets') + process_result_data(results.get('rows')) + + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + start_date=start_date, + filters='ga:eventAction==download-cache', + metrics='ga:totalEvents', + sort='-ga:totalEvents', + dimensions="ga:eventLabel", + max_results=10000, + end_date=end_date).execute() + log.info('Associating downloads of cache resource URLs with their respective datasets') + process_result_data(results.get('rows'), cached=False) + + self._filter_out_long_tail(data, MIN_DOWNLOADS) + ga_model.update_sitewide_stats(period_name, "Downloads", data, period_complete_day) def _social_stats(self, start_date, end_date, period_name, period_complete_day): """ Finds out which social sites people are referred from """ --- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -161,10 +161,64 @@ def pre_update_url_stats(period_name): - model.Session.query(GA_Url).\ - filter(GA_Url.period_name==period_name).delete() - model.Session.query(GA_Url).\ - filter(GA_Url.period_name=='All').delete() + q = model.Session.query(GA_Url).\ + filter(GA_Url.period_name==period_name) + log.debug("Deleting %d '%s' records" % (q.count(), period_name)) + q.delete() + + q = model.Session.query(GA_Url).\ + filter(GA_Url.period_name == 'All') + log.debug("Deleting %d 'All' records..." % q.count()) + q.delete() + + model.Session.flush() + model.Session.commit() + model.repo.commit_and_remove() + log.debug('...done') + +def post_update_url_stats(): + + """ Check the distinct url field in ga_url and make sure + it has an All record. If not then create one. + + After running this then every URL should have an All + record regardless of whether the URL has an entry for + the month being currently processed. + """ + log.debug('Post-processing "All" records...') + query = """select url, pageviews::int, visits::int + from ga_url + where url not in (select url from ga_url where period_name ='All')""" + connection = model.Session.connection() + res = connection.execute(query) + + views, visits = {}, {} + # url, views, visits + for row in res: + views[row[0]] = views.get(row[0], 0) + row[1] + visits[row[0]] = visits.get(row[0], 0) + row[2] + + progress_total = len(views.keys()) + progress_count = 0 + for key in views.keys(): + progress_count += 1 + if progress_count % 100 == 0: + log.debug('.. %d/%d done so far', progress_count, progress_total) + + package, publisher = _get_package_and_publisher(key) + + values = {'id': make_uuid(), + 'period_name': "All", + 'period_complete_day': 0, + 'url': key, + 'pageviews': views[key], + 'visits': visits[key], + 'department_id': publisher, + 'package_id': package + } + model.Session.add(GA_Url(**values)) + model.Session.commit() + log.debug('..done') def update_url_stats(period_name, period_complete_day, url_data): @@ -173,9 +227,14 @@ stores them in GA_Url under the period and recalculates the totals for the 'All' period. ''' + progress_total = len(url_data) + progress_count = 0 for url, views, visits in url_data: + progress_count += 1 + if progress_count % 100 == 0: + log.debug('.. %d/%d done so far', progress_count, progress_total) + package, publisher = _get_package_and_publisher(url) - item = model.Session.query(GA_Url).\ filter(GA_Url.period_name==period_name).\ @@ -216,8 +275,8 @@ 'period_name': 'All', 'period_complete_day': 0, 'url': url, - 'pageviews': sum([int(e.pageviews) for e in entries]) + old_pageviews, - 'visits': sum([int(e.visits or 0) for e in entries]) + old_visits, + 'pageviews': sum([int(e.pageviews) for e in entries]) + int(old_pageviews), + 'visits': sum([int(e.visits or 0) for e in entries]) + int(old_visits), 'department_id': publisher, 'package_id': package } @@ -322,11 +381,8 @@ order_by(model.Group.name).all() def get_children(publisher): - '''Finds child publishers for the given publisher (object). (Not recursive)''' - from ckan.model.group import HIERARCHY_CTE - return model.Session.query(model.Group).\ - from_statement(HIERARCHY_CTE).params(id=publisher.id, type='publisher').\ - all() + '''Finds child publishers for the given publisher (object). (Not recursive i.e. returns one level)''' + return publisher.get_children_groups(type='organization') def go_down_tree(publisher): '''Provided with a publisher object, it walks down the hierarchy and yields each publisher, @@ -343,10 +399,10 @@ ''' for object_type in (GA_Url, GA_Stat, GA_Publisher, GA_ReferralStat): q = model.Session.query(object_type) - if period_name != 'all': + if period_name != 'All': q = q.filter_by(period_name=period_name) q.delete() - model.Session.commit() + model.repo.commit_and_remove() def get_score_for_dataset(dataset_name): ''' --- a/ckanext/ga_report/helpers.py +++ b/ckanext/ga_report/helpers.py @@ -50,9 +50,12 @@ dataset = model.Package.get(ga_url.url[len('/dataset/'):]) if dataset and not dataset.state == 'active': dataset = None - count += 1 - if count > 10: - break + # When testing, it is possible that top datasets are not available + # so only go round this loop a few times before falling back on + # a random dataset. + count += 1 + if count > 10: + break if not dataset: # fallback dataset = model.Session.query(model.Package)\ @@ -77,7 +80,7 @@ return base.render_snippet('ga_report/ga_popular_single.html', **context) -def most_popular_datasets(publisher, count=20): +def most_popular_datasets(publisher, count=20, preview_image=None): if not publisher: _log.error("No valid publisher passed to 'most_popular_datasets'") @@ -89,7 +92,8 @@ 'dataset_count': len(results), 'datasets': results, - 'publisher': publisher + 'publisher': publisher, + 'preview_image': preview_image } return base.render_snippet('ga_report/publisher/popular.html', **ctx) @@ -103,8 +107,18 @@ for entry in entries: if len(datasets) < count: p = model.Package.get(entry.url[len('/dataset/'):]) + + if not p: + _log.warning("Could not find Package for {url}".format(url=entry.url)) + continue + + if not p.state == 'active': + _log.warning("Package {0} is not active, it is {1}".format(p.name, p.state)) + continue + if not p in datasets: datasets[p] = {'views':0, 'visits': 0} + datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews) datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visits) @@ -114,3 +128,17 @@ return sorted(results, key=operator.itemgetter(1), reverse=True) +def month_option_title(month_iso, months, day): + month_isos = [ iso_code for (iso_code,name) in months ] + try: + index = month_isos.index(month_iso) + except ValueError: + _log.error('Month "%s" not found in list of months.' % month_iso) + return month_iso + month_name = months[index][1] + if index==0: + return month_name + (' (up to %s)'%day) + return month_name + + + --- a/ckanext/ga_report/plugin.py +++ b/ckanext/ga_report/plugin.py @@ -5,7 +5,8 @@ from ckanext.ga_report.helpers import (most_popular_datasets, popular_datasets, - single_popular_dataset) + single_popular_dataset, + month_option_title) log = logging.getLogger('ckanext.ga-report') @@ -27,7 +28,8 @@ 'ga_report_installed': lambda: True, 'popular_datasets': popular_datasets, 'most_popular_datasets': most_popular_datasets, - 'single_popular_dataset': single_popular_dataset + 'single_popular_dataset': single_popular_dataset, + 'month_option_title': month_option_title } def after_map(self, map): @@ -41,6 +43,16 @@ '/data/site-usage/data_{month}.csv', controller='ckanext.ga_report.controller:GaReport', action='csv' + ) + map.connect( + '/data/site-usage/downloads', + controller='ckanext.ga_report.controller:GaReport', + action='downloads' + ) + map.connect( + '/data/site-usage/downloads_{month}.csv', + controller='ckanext.ga_report.controller:GaReport', + action='csv_downloads' ) # GaDatasetReport --- /dev/null +++ b/ckanext/ga_report/public/css/ga_report.css @@ -1,1 +1,69 @@ +.table-condensed td.sparkline-cell { + padding: 1px 0 0 0; + width: 108px; + text-align: center; + /* Hack to hide the momentary flash of text + * before sparklines are fully rendered */ + font-size: 1px; + color: transparent; + overflow: hidden; +} +.rickshaw_chart_container { + position: relative; + height: 350px; + margin: 0 auto 20px auto; +} +.rickshaw_chart { + position: absolute; + left: 40px; + width: 500px; + top: 0; + bottom: 0; +} +.rickshaw_legend { + background: transparent; + width: 100%; + padding-top: 4px; +} +.rickshaw_y_axis { + position: absolute; + top: 0; + bottom: 0; + width: 40px; +} +.rickshaw_legend .label { + background: transparent !important; + color: #000000 !important; + font-weight: normal !important; +} +.rickshaw_legend .instructions { + color: #000; + margin-bottom: 6px; +} +.rickshaw_legend .line .action { + display: none; +} +.rickshaw_legend .line .swatch { + display: block; + float: left; +} +.rickshaw_legend .line .label { + display: block; + white-space: normal; + float: left; + width: 200px; +} +.rickshaw_legend .line .label:hover { + text-decoration: underline; +} + +.ga-reports-table .td-numeric { + text-align: center; +} +.ga-reports-heading { + padding-right: 10px; + margin-top: 4px; + float: left; +} + --- /dev/null +++ b/ckanext/ga_report/public/scripts/ckanext_ga_reports.js @@ -1,1 +1,132 @@ +var CKAN = CKAN || {}; +CKAN.GA_Reports = {}; +CKAN.GA_Reports.render_rickshaw = function( css_name, data, mode, colorscheme ) { + var graphLegends = $('#graph-legend-container'); + + function renderError(alertClass,alertText,legendText) { + $("#chart_"+css_name) + .html( '
'+alertText+'
') + .closest('.rickshaw_chart_container').css('height',50); + var myLegend = $('
') + .html(legendText) + .appendTo(graphLegends); + } + + if (!Modernizr.svg) { + renderError('','Your browser does not support vector graphics. No graphs can be rendered.','(Graph cannot be rendered)'); + return; + } + if (data.length==0) { + renderError('alert-info','There is not enough data to render a graph.','(No graph available)'); + return + } + var myLegend = $('
').appendTo(graphLegends); + + var palette = new Rickshaw.Color.Palette( { scheme: colorscheme } ); + $.each(data, function(i, object) { + object['color'] = palette.color(); + }); + // Rickshaw renders the legend in reverse order... + data.reverse(); + + var graphElement = document.querySelector("#chart_"+css_name); + + var graph = new Rickshaw.Graph( { + element: document.querySelector("#chart_"+css_name), + renderer: mode, + series: data , + height: 328 + }); + var x_axis = new Rickshaw.Graph.Axis.Time( { + graph: graph + } ); + var y_axis = new Rickshaw.Graph.Axis.Y( { + graph: graph, + orientation: 'left', + tickFormat: Rickshaw.Fixtures.Number.formatKMBT, + element: document.getElementById('y_axis_'+css_name) + } ); + var legend = new Rickshaw.Graph.Legend( { + element: document.querySelector('#legend_'+css_name), + graph: graph + } ); + var shelving = new Rickshaw.Graph.Behavior.Series.Toggle( { + graph: graph, + legend: legend + } ); + myLegend.prepend('
Click on a series below to isolate its graph:
'); + graph.render(); +}; + +CKAN.GA_Reports.bind_sparklines = function() { + /* + * Bind to the 'totals' tab being on screen, when the + * Sparkline graphs should be drawn. + * Note that they cannot be drawn sooner. + */ + var created = false; + $('a[href="#totals"]').on( + 'shown', + function() { + if (!created) { + var sparkOptions = { + enableTagOptions: true, + type: 'line', + width: 100, + height: 26, + chartRangeMin: 0, + spotColor: '', + maxSpotColor: '', + minSpotColor: '', + highlightSpotColor: '#000000', + lineColor: '#3F8E6D', + fillColor: '#B7E66B' + }; + $('.sparkline').sparkline('html',sparkOptions); + created = true; + } + $.sparkline_display_visible(); + } + ); +}; + +CKAN.GA_Reports.bind_sidebar = function() { + /* + * Bind to changes in the tab behaviour: + * Show the correct rickshaw graph in the sidebar. + * Not to be called before all graphs load. + */ + $('a[data-toggle="hashtab"]').on( + 'shown', + function(e) { + var href = $(e.target).attr('href'); + var pane = $(href); + if (!pane.length) { console.err('bad href',href); return; } + var legend_name = "none"; + var graph = pane.find('.rickshaw_chart'); + if (graph.length) { + legend_name = graph.attr('id').replace('chart_',''); + } + legend_name = '#legend_'+legend_name; + $('#graph-legend-container > *').hide(); + $('#graph-legend-container .instructions').show(); + $(legend_name).show(); + } + ); + /* The first tab might already have been shown */ + $('li.active > a[data-toggle="hashtab"]').trigger('shown'); +}; + +CKAN.GA_Reports.bind_month_selector = function() { + var handler = function(e) { + var target = $(e.delegateTarget); + var form = target.closest('form'); + var url = form.attr('action')+'?month='+target.val()+window.location.hash; + window.location = url; + }; + var selectors = $('select[name="month"]'); + assert(selectors.length>0); + selectors.bind('change', handler); +}; + --- /dev/null +++ b/ckanext/ga_report/public/scripts/modernizr-2.6.2.custom.js @@ -1,1 +1,815 @@ - +/* Modernizr 2.6.2 (Custom Build) | MIT & BSD + * Build: http://modernizr.com/download/#-fontface-backgroundsize-borderimage-borderradius-boxshadow-flexbox-hsla-multiplebgs-opacity-rgba-textshadow-cssanimations-csscolumns-generatedcontent-cssgradients-cssreflections-csstransforms-csstransforms3d-csstransitions-applicationcache-canvas-canvastext-draganddrop-hashchange-history-audio-video-indexeddb-input-inputtypes-localstorage-postmessage-sessionstorage-websockets-websqldatabase-webworkers-geolocation-inlinesvg-smil-svg-svgclippaths-touch-webgl-shiv-cssclasses-addtest-prefixed-teststyles-testprop-testallprops-hasevent-prefixes-domprefixes-load + */ +; + + + +window.Modernizr = (function( window, document, undefined ) { + + var version = '2.6.2', + + Modernizr = {}, + + enableClasses = true, + + docElement = document.documentElement, + + mod = 'modernizr', + modElem = document.createElement(mod), + mStyle = modElem.style, + + inputElem = document.createElement('input') , + + smile = ':)', + + toString = {}.toString, + + prefixes = ' -webkit- -moz- -o- -ms- '.split(' '), + + + + omPrefixes = 'Webkit Moz O ms', + + cssomPrefixes = omPrefixes.split(' '), + + domPrefixes = omPrefixes.toLowerCase().split(' '), + + ns = {'svg': 'http://www.w3.org/2000/svg'}, + + tests = {}, + inputs = {}, + attrs = {}, + + classes = [], + + slice = classes.slice, + + featureName, + + + injectElementWithStyles = function( rule, callback, nodes, testnames ) { + + var style, ret, node, docOverflow, + div = document.createElement('div'), + body = document.body, + fakeBody = body || document.createElement('body'); + + if ( parseInt(nodes, 10) ) { + while ( nodes-- ) { + node = document.createElement('div'); + node.id = testnames ? testnames[nodes] : mod + (nodes + 1); + div.appendChild(node); + } + } + + style = ['­',''].join(''); + div.id = mod; + (body ? div : fakeBody).innerHTML += style; + fakeBody.appendChild(div); + if ( !body ) { + fakeBody.style.background = ''; + fakeBody.style.overflow = 'hidden'; + docOverflow = docElement.style.overflow; + docElement.style.overflow = 'hidden'; + docElement.appendChild(fakeBody); + } + + ret = callback(div, rule); + if ( !body ) { + fakeBody.parentNode.removeChild(fakeBody); + docElement.style.overflow = docOverflow; + } else { + div.parentNode.removeChild(div); + } + + return !!ret; + + }, + + + + isEventSupported = (function() { + + var TAGNAMES = { + 'select': 'input', 'change': 'input', + 'submit': 'form', 'reset': 'form', + 'error': 'img', 'load': 'img', 'abort': 'img' + }; + + function isEventSupported( eventName, element ) { + + element = element || document.createElement(TAGNAMES[eventName] || 'div'); + eventName = 'on' + eventName; + + var isSupported = eventName in element; + + if ( !isSupported ) { + if ( !element.setAttribute ) { + element = document.createElement('div'); + } + if ( element.setAttribute && element.removeAttribute ) { + element.setAttribute(eventName, ''); + isSupported = is(element[eventName], 'function'); + + if ( !is(element[eventName], 'undefined') ) { + element[eventName] = undefined; + } + element.removeAttribute(eventName); + } + } + + element = null; + return isSupported; + } + return isEventSupported; + })(), + + + _hasOwnProperty = ({}).hasOwnProperty, hasOwnProp; + + if ( !is(_hasOwnProperty, 'undefined') && !is(_hasOwnProperty.call, 'undefined') ) { + hasOwnProp = function (object, property) { + return _hasOwnProperty.call(object, property); + }; + } + else { + hasOwnProp = function (object, property) { + return ((property in object) && is(object.constructor.prototype[property], 'undefined')); + }; + } + + + if (!Function.prototype.bind) { + Function.prototype.bind = function bind(that) { + + var target = this; + + if (typeof target != "function") { + throw new TypeError(); + } + + var args = slice.call(arguments, 1), + bound = function () { + + if (this instanceof bound) { + + var F = function(){}; + F.prototype = target.prototype; + var self = new F(); + + var result = target.apply( + self, + args.concat(slice.call(arguments)) + ); + if (Object(result) === result) { + return result; + } + return self; + + } else { + + return target.apply( + that, + args.concat(slice.call(arguments)) + ); + + } + + }; + + return bound; + }; + } + + function setCss( str ) { + mStyle.cssText = str; + } + + function setCssAll( str1, str2 ) { + return setCss(prefixes.join(str1 + ';') + ( str2 || '' )); + } + + function is( obj, type ) { + return typeof obj === type; + } + + function contains( str, substr ) { + return !!~('' + str).indexOf(substr); + } + + function testProps( props, prefixed ) { + for ( var i in props ) { + var prop = props[i]; + if ( !contains(prop, "-") && mStyle[prop] !== undefined ) { + return prefixed == 'pfx' ? prop : true; + } + } + return false; + } + + function testDOMProps( props, obj, elem ) { + for ( var i in props ) { + var item = obj[props[i]]; + if ( item !== undefined) { + + if (elem === false) return props[i]; + + if (is(item, 'function')){ + return item.bind(elem || obj); + } + + return item; + } + } + return false; + } + + function testPropsAll( prop, prefixed, elem ) { + + var ucProp = prop.charAt(0).toUpperCase() + prop.slice(1), + props = (prop + ' ' + cssomPrefixes.join(ucProp + ' ') + ucProp).split(' '); + + if(is(prefixed, "string") || is(prefixed, "undefined")) { + return testProps(props, prefixed); + + } else { + props = (prop + ' ' + (domPrefixes).join(ucProp + ' ') + ucProp).split(' '); + return testDOMProps(props, prefixed, elem); + } + } tests['flexbox'] = function() { + return testPropsAll('flexWrap'); + }; tests['canvas'] = function() { + var elem = document.createElement('canvas'); + return !!(elem.getContext && elem.getContext('2d')); + }; + + tests['canvastext'] = function() { + return !!(Modernizr['canvas'] && is(document.createElement('canvas').getContext('2d').fillText, 'function')); + }; + + + + tests['webgl'] = function() { + return !!window.WebGLRenderingContext; + }; + + + tests['touch'] = function() { + var bool; + + if(('ontouchstart' in window) || window.DocumentTouch && document instanceof DocumentTouch) { + bool = true; + } else { + injectElementWithStyles(['@media (',prefixes.join('touch-enabled),('),mod,')','{#modernizr{top:9px;position:absolute}}'].join(''), function( node ) { + bool = node.offsetTop === 9; + }); + } + + return bool; + }; + + + + tests['geolocation'] = function() { + return 'geolocation' in navigator; + }; + + + tests['postmessage'] = function() { + return !!window.postMessage; + }; + + + tests['websqldatabase'] = function() { + return !!window.openDatabase; + }; + + tests['indexedDB'] = function() { + return !!testPropsAll("indexedDB", window); + }; + + tests['hashchange'] = function() { + return isEventSupported('hashchange', window) && (document.documentMode === undefined || document.documentMode > 7); + }; + + tests['history'] = function() { + return !!(window.history && history.pushState); + }; + + tests['draganddrop'] = function() { + var div = document.createElement('div'); + return ('draggable' in div) || ('ondragstart' in div && 'ondrop' in div); + }; + + tests['websockets'] = function() { + return 'WebSocket' in window || 'MozWebSocket' in window; + }; + + + tests['rgba'] = function() { + setCss('background-color:rgba(150,255,150,.5)'); + + return contains(mStyle.backgroundColor, 'rgba'); + }; + + tests['hsla'] = function() { + setCss('background-color:hsla(120,40%,100%,.5)'); + + return contains(mStyle.backgroundColor, 'rgba') || contains(mStyle.backgroundColor, 'hsla'); + }; + + tests['multiplebgs'] = function() { + setCss('background:url(https://),url(https://),red url(https://)'); + + return (/(url\s*\(.*?){3}/).test(mStyle.background); + }; tests['backgroundsize'] = function() { + return testPropsAll('backgroundSize'); + }; + + tests['borderimage'] = function() { + return testPropsAll('borderImage'); + }; + + + + tests['borderradius'] = function() { + return testPropsAll('borderRadius'); + }; + + tests['boxshadow'] = function() { + return testPropsAll('boxShadow'); + }; + + tests['textshadow'] = function() { + return document.createElement('div').style.textShadow === ''; + }; + + + tests['opacity'] = function() { + setCssAll('opacity:.55'); + + return (/^0.55$/).test(mStyle.opacity); + }; + + + tests['cssanimations'] = function() { + return testPropsAll('animationName'); + }; + + + tests['csscolumns'] = function() { + return testPropsAll('columnCount'); + }; + + + tests['cssgradients'] = function() { + var str1 = 'background-image:', + str2 = 'gradient(linear,left top,right bottom,from(#9f9),to(white));', + str3 = 'linear-gradient(left top,#9f9, white);'; + + setCss( + (str1 + '-webkit- '.split(' ').join(str2 + str1) + + prefixes.join(str3 + str1)).slice(0, -str1.length) + ); + + return contains(mStyle.backgroundImage, 'gradient'); + }; + + + tests['cssreflections'] = function() { + return testPropsAll('boxReflect'); + }; + + + tests['csstransforms'] = function() { + return !!testPropsAll('transform'); + }; + + + tests['csstransforms3d'] = function() { + + var ret = !!testPropsAll('perspective'); + + if ( ret && 'webkitPerspective' in docElement.style ) { + + injectElementWithStyles('@media (transform-3d),(-webkit-transform-3d){#modernizr{left:9px;position:absolute;height:3px;}}', function( node, rule ) { + ret = node.offsetLeft === 9 && node.offsetHeight === 3; + }); + } + return ret; + }; + + + tests['csstransitions'] = function() { + return testPropsAll('transition'); + }; + + + + tests['fontface'] = function() { + var bool; + + injectElementWithStyles('@font-face {font-family:"font";src:url("https://")}', function( node, rule ) { + var style = document.getElementById('smodernizr'), + sheet = style.sheet || style.styleSheet, + cssText = sheet ? (sheet.cssRules && sheet.cssRules[0] ? sheet.cssRules[0].cssText : sheet.cssText || '') : ''; + + bool = /src/i.test(cssText) && cssText.indexOf(rule.split(' ')[0]) === 0; + }); + + return bool; + }; + + tests['generatedcontent'] = function() { + var bool; + + injectElementWithStyles(['#',mod,'{font:0/0 a}#',mod,':after{content:"',smile,'";visibility:hidden;font:3px/1 a}'].join(''), function( node ) { + bool = node.offsetHeight >= 3; + }); + + return bool; + }; + tests['video'] = function() { + var elem = document.createElement('video'), + bool = false; + + try { + if ( bool = !!elem.canPlayType ) { + bool = new Boolean(bool); + bool.ogg = elem.canPlayType('video/ogg; codecs="theora"') .replace(/^no$/,''); + + bool.h264 = elem.canPlayType('video/mp4; codecs="avc1.42E01E"') .replace(/^no$/,''); + + bool.webm = elem.canPlayType('video/webm; codecs="vp8, vorbis"').replace(/^no$/,''); + } + + } catch(e) { } + + return bool; + }; + + tests['audio'] = function() { + var elem = document.createElement('audio'), + bool = false; + + try { + if ( bool = !!elem.canPlayType ) { + bool = new Boolean(bool); + bool.ogg = elem.canPlayType('audio/ogg; codecs="vorbis"').replace(/^no$/,''); + bool.mp3 = elem.canPlayType('audio/mpeg;') .replace(/^no$/,''); + + bool.wav = elem.canPlayType('audio/wav; codecs="1"') .replace(/^no$/,''); + bool.m4a = ( elem.canPlayType('audio/x-m4a;') || + elem.canPlayType('audio/aac;')) .replace(/^no$/,''); + } + } catch(e) { } + + return bool; + }; + + + tests['localstorage'] = function() { + try { + localStorage.setItem(mod, mod); + localStorage.removeItem(mod); + return true; + } catch(e) { + return false; + } + }; + + tests['sessionstorage'] = function() { + try { + sessionStorage.setItem(mod, mod); + sessionStorage.removeItem(mod); + return true; + } catch(e) { + return false; + } + }; + + + tests['webworkers'] = function() { + return !!window.Worker; + }; + + + tests['applicationcache'] = function() { + return !!window.applicationCache; + }; + + + tests['svg'] = function() { + return !!document.createElementNS && !!document.createElementNS(ns.svg, 'svg').createSVGRect; + }; + + tests['inlinesvg'] = function() { + var div = document.createElement('div'); + div.innerHTML = ''; + return (div.firstChild && div.firstChild.namespaceURI) == ns.svg; + }; + + tests['smil'] = function() { + return !!document.createElementNS && /SVGAnimate/.test(toString.call(document.createElementNS(ns.svg, 'animate'))); + }; + + + tests['svgclippaths'] = function() { + return !!document.createElementNS && /SVGClipPath/.test(toString.call(document.createElementNS(ns.svg, 'clipPath'))); + }; + + function webforms() { + Modernizr['input'] = (function( props ) { + for ( var i = 0, len = props.length; i < len; i++ ) { + attrs[ props[i] ] = !!(props[i] in inputElem); + } + if (attrs.list){ + attrs.list = !!(document.createElement('datalist') && window.HTMLDataListElement); + } + return attrs; + })('autocomplete autofocus list placeholder max min multiple pattern required step'.split(' ')); + Modernizr['inputtypes'] = (function(props) { + + for ( var i = 0, bool, inputElemType, defaultView, len = props.length; i < len; i++ ) { + + inputElem.setAttribute('type', inputElemType = props[i]); + bool = inputElem.type !== 'text'; + + if ( bool ) { + + inputElem.value = smile; + inputElem.style.cssText = 'position:absolute;visibility:hidden;'; + + if ( /^range$/.test(inputElemType) && inputElem.style.WebkitAppearance !== undefined ) { + + docElement.appendChild(inputElem); + defaultView = document.defaultView; + + bool = defaultView.getComputedStyle && + defaultView.getComputedStyle(inputElem, null).WebkitAppearance !== 'textfield' && + (inputElem.offsetHeight !== 0); + + docElement.removeChild(inputElem); + + } else if ( /^(search|tel)$/.test(inputElemType) ){ + } else if ( /^(url|email)$/.test(inputElemType) ) { + bool = inputElem.checkValidity && inputElem.checkValidity() === false; + + } else { + bool = inputElem.value != smile; + } + } + + inputs[ props[i] ] = !!bool; + } + return inputs; + })('search tel url email datetime date month week time datetime-local number range color'.split(' ')); + } + for ( var feature in tests ) { + if ( hasOwnProp(tests, feature) ) { + featureName = feature.toLowerCase(); + Modernizr[featureName] = tests[feature](); + + classes.push((Modernizr[featureName] ? '' : 'no-') + featureName); + } + } + + Modernizr.input || webforms(); + + + Modernizr.addTest = function ( feature, test ) { + if ( typeof feature == 'object' ) { + for ( var key in feature ) { + if ( hasOwnProp( feature, key ) ) { + Modernizr.addTest( key, feature[ key ] ); + } + } + } else { + + feature = feature.toLowerCase(); + + if ( Modernizr[feature] !== undefined ) { + return Modernizr; + } + + test = typeof test == 'function' ? test() : test; + + if (typeof enableClasses !== "undefined" && enableClasses) { + docElement.className += ' ' + (test ? '' : 'no-') + feature; + } + Modernizr[feature] = test; + + } + + return Modernizr; + }; + + + setCss(''); + modElem = inputElem = null; + + ;(function(window, document) { + var options = window.html5 || {}; + + var reSkip = /^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i; + + var saveClones = /^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i; + + var supportsHtml5Styles; + + var expando = '_html5shiv'; + + var expanID = 0; + + var expandoData = {}; + + var supportsUnknownElements; + + (function() { + try { + var a = document.createElement('a'); + a.innerHTML = ''; + supportsHtml5Styles = ('hidden' in a); + + supportsUnknownElements = a.childNodes.length == 1 || (function() { + (document.createElement)('a'); + var frag = document.createDocumentFragment(); + return ( + typeof frag.cloneNode == 'undefined' || + typeof frag.createDocumentFragment == 'undefined' || + typeof frag.createElement == 'undefined' + ); + }()); + } catch(e) { + supportsHtml5Styles = true; + supportsUnknownElements = true; + } + + }()); function addStyleSheet(ownerDocument, cssText) { + var p = ownerDocument.createElement('p'), + parent = ownerDocument.getElementsByTagName('head')[0] || ownerDocument.documentElement; + + p.innerHTML = 'x'; + return parent.insertBefore(p.lastChild, parent.firstChild); + } + + function getElements() { + var elements = html5.elements; + return typeof elements == 'string' ? elements.split(' ') : elements; + } + + function getExpandoData(ownerDocument) { + var data = expandoData[ownerDocument[expando]]; + if (!data) { + data = {}; + expanID++; + ownerDocument[expando] = expanID; + expandoData[expanID] = data; + } + return data; + } + + function createElement(nodeName, ownerDocument, data){ + if (!ownerDocument) { + ownerDocument = document; + } + if(supportsUnknownElements){ + return ownerDocument.createElement(nodeName); + } + if (!data) { + data = getExpandoData(ownerDocument); + } + var node; + + if (data.cache[nodeName]) { + node = data.cache[nodeName].cloneNode(); + } else if (saveClones.test(nodeName)) { + node = (data.cache[nodeName] = data.createElem(nodeName)).cloneNode(); + } else { + node = data.createElem(nodeName); + } + + return node.canHaveChildren && !reSkip.test(nodeName) ? data.frag.appendChild(node) : node; + } + + function createDocumentFragment(ownerDocument, data){ + if (!ownerDocument) { + ownerDocument = document; + } + if(supportsUnknownElements){ + return ownerDocument.createDocumentFragment(); + } + data = data || getExpandoData(ownerDocument); + var clone = data.frag.cloneNode(), + i = 0, + elems = getElements(), + l = elems.length; + for(;i