--- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -1,6 +1,7 @@ import re import csv import sys +import json import logging import operator import collections @@ -21,6 +22,10 @@ d = strptime(strdate, '%Y-%m') return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year) +def _get_unix_epoch(strdate): + from time import strptime,mktime + d = strptime(strdate, '%Y-%m') + return int(mktime(d)) def _month_details(cls, stat_key=None): ''' @@ -108,24 +113,24 @@ return key, val # Query historic values for sparkline rendering - graph_query = model.Session.query(GA_Stat)\ + sparkline_query = model.Session.query(GA_Stat)\ .filter(GA_Stat.stat_name=='Totals')\ .order_by(GA_Stat.period_name) - graph_data = {} - for x in graph_query: - graph_data[x.key] = graph_data.get(x.key,[]) + sparkline_data = {} + for x in sparkline_query: + sparkline_data[x.key] = sparkline_data.get(x.key,[]) key, val = clean_key(x.key,float(x.value)) tooltip = '%s: %s' % (_get_month_name(x.period_name), val) - graph_data[x.key].append( (tooltip,x.value) ) + sparkline_data[x.key].append( (tooltip,x.value) ) # Trim the latest month, as it looks like a huge dropoff - for key in graph_data: - graph_data[key] = graph_data[key][:-1] + for key in sparkline_data: + sparkline_data[key] = sparkline_data[key][:-1] c.global_totals = [] if c.month: for e in entries: key, val = clean_key(e.key, e.value) - sparkline = graph_data[e.key] + sparkline = sparkline_data[e.key] c.global_totals.append((key, val, sparkline)) else: d = collections.defaultdict(list) @@ -136,11 +141,18 @@ v = sum(v) else: v = float(sum(v))/float(len(v)) - sparkline = graph_data[k] + sparkline = sparkline_data[k] key, val = clean_key(k,v) c.global_totals.append((key, val, sparkline)) - c.global_totals = sorted(c.global_totals, key=operator.itemgetter(0)) + # Sort the global totals into a more pleasant order + def sort_func(x): + key = x[0] + total_order = ['Total page views','Total visits','Pages per visit'] + if key in total_order: + return total_order.index(key) + return 999 + c.global_totals = sorted(c.global_totals, key=sort_func) keys = { 'Browser versions': 'browser_versions', @@ -177,7 +189,29 @@ for k, v in keys.iteritems(): q = model.Session.query(GA_Stat).\ - filter(GA_Stat.stat_name==k) + filter(GA_Stat.stat_name==k).\ + order_by(GA_Stat.period_name) + # Run the query on all months to gather graph data + series = {} + x_axis = set() + for stat in q: + x_val = _get_unix_epoch(stat.period_name) + series[ stat.key ] = series.get(stat.key,{}) + series[ stat.key ][x_val] = float(stat.value) + x_axis.add(x_val) + # Common x-axis for all series. Exclude this month (incomplete data) + x_axis = sorted(list(x_axis))[:-1] + # Buffer a rickshaw dataset from the series + def create_graph(series_name, series_data): + return { + 'name':series_name, + 'data':[ {'x':x,'y':series_data.get(x,0)} for x in x_axis ] + } + rickshaw = [ create_graph(name,data) for name, data in series.items() ] + rickshaw = sorted(rickshaw,key=lambda x:x['data'][-1]['y']) + setattr(c, v+'_graph', json.dumps(rickshaw)) + + # Buffer the tabular data if c.month: entries = [] q = q.filter(GA_Stat.period_name==c.month).\ @@ -239,7 +273,7 @@ if not c.publisher: abort(404, 'A publisher with that name could not be found') - packages = self._get_packages(c.publisher) + packages, graph_data = self._get_packages(c.publisher) response.headers['Content-Type'] = "text/csv; charset=utf-8" response.headers['Content-Disposition'] = \ str('attachment; filename=datasets_%s_%s.csv' % (c.publisher_name, month,)) @@ -285,6 +319,7 @@ q = q.filter(GA_Url.department_id==publisher.name) q = q.filter(GA_Url.period_name==month) q = q.order_by('ga_url.pageviews::int desc') + graph_data = [] top_packages = [] if count == -1: entries = q.all() @@ -293,6 +328,7 @@ for entry,package in entries: if package: + graph = [] # Downloads .... if have_download_data: dls = model.Session.query(GA_Stat).\ @@ -300,15 +336,18 @@ filter(GA_Stat.key==package.name) if month != 'All': # Fetch everything unless the month is specific dls = dls.filter(GA_Stat.period_name==month) - - downloads = sum(int(d.value) for d in dls.all()) + downloads = 0 + for x in dls: + graph.append({ 'x': _get_unix_epoch(d.period_name), 'y': int(d.value)}) + downloads += int(d.value) else: downloads = 'No data' - top_packages.append((package, entry.pageviews, entry.visits, downloads)) + top_packages.append((package, entry.pageviews, entry.visits, downloads, graph_data)) + graph_data.append({'name':package.title, 'data':graph}) else: log.warning('Could not find package associated package') - return top_packages + return top_packages,graph_data def read(self): ''' @@ -350,7 +389,8 @@ entry = q.filter(GA_Url.period_name==c.month).first() c.publisher_page_views = entry.pageviews if entry else 0 - c.top_packages = self._get_packages(c.publisher, 20) + c.top_packages, graph_data = self._get_packages(c.publisher, 20) + c.graph_data = json.dumps(graph_data) return render('ga_report/publisher/read.html')