From: Tom Rees Date: Thu, 17 Jan 2013 12:30:37 +0000 Subject: Stacked graphs with sane server-side data wrangling working. Percentage mode half-impemented. X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=1e315a4d6b9a9077dce680a0cc432dc4cb38d21e --- Stacked graphs with sane server-side data wrangling working. Percentage mode half-impemented. --- --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -113,24 +113,24 @@ return key, val # Query historic values for sparkline rendering - graph_query = model.Session.query(GA_Stat)\ + sparkline_query = model.Session.query(GA_Stat)\ .filter(GA_Stat.stat_name=='Totals')\ .order_by(GA_Stat.period_name) - graph_data = {} - for x in graph_query: - graph_data[x.key] = graph_data.get(x.key,[]) + sparkline_data = {} + for x in sparkline_query: + sparkline_data[x.key] = sparkline_data.get(x.key,[]) key, val = clean_key(x.key,float(x.value)) tooltip = '%s: %s' % (_get_month_name(x.period_name), val) - graph_data[x.key].append( (tooltip,x.value) ) + sparkline_data[x.key].append( (tooltip,x.value) ) # Trim the latest month, as it looks like a huge dropoff - for key in graph_data: - graph_data[key] = graph_data[key][:-1] + for key in sparkline_data: + sparkline_data[key] = sparkline_data[key][:-1] c.global_totals = [] if c.month: for e in entries: key, val = clean_key(e.key, e.value) - sparkline = graph_data[e.key] + sparkline = sparkline_data[e.key] c.global_totals.append((key, val, sparkline)) else: d = collections.defaultdict(list) @@ -141,11 +141,18 @@ v = sum(v) else: v = float(sum(v))/float(len(v)) - sparkline = graph_data[k] + sparkline = sparkline_data[k] key, val = clean_key(k,v) c.global_totals.append((key, val, sparkline)) - c.global_totals = sorted(c.global_totals, key=operator.itemgetter(0)) + # Sort the global totals into a more pleasant order + def sort_func(x): + key = x[0] + total_order = ['Total page views','Total visits','Pages per visit'] + if key in total_order: + return total_order.index(key) + return 999 + c.global_totals = sorted(c.global_totals, key=sort_func) keys = { 'Browser versions': 'browser_versions', @@ -185,24 +192,17 @@ filter(GA_Stat.stat_name==k).\ order_by(GA_Stat.period_name) # Run the query on all months to gather graph data - series = {} - x_axis = set() + graph = {} for stat in q: - x_val = _get_unix_epoch(stat.period_name) - series[ stat.key ] = series.get(stat.key,{}) - series[ stat.key ][x_val] = float(stat.value) - x_axis.add(x_val) - # Common x-axis for all series. Exclude this month (incomplete data) - x_axis = sorted(list(x_axis))[:-1] - # Buffer a rickshaw dataset from the series - def create_graph(series_name, series_data): - return { - 'name':series_name, - 'data':[ {'x':x,'y':series_data.get(x,0)} for x in x_axis ] - } - rickshaw = [ create_graph(name,data) for name, data in series.items() ] - rickshaw = sorted(rickshaw,key=lambda x:x['data'][-1]['y']) - setattr(c, v+'_graph', json.dumps(rickshaw)) + graph[ stat.key ] = graph.get(stat.key,{ + 'name':stat.key, + 'data': [] + }) + graph[ stat.key ]['data'].append({ + 'x':_get_unix_epoch(stat.period_name), + 'y':float(stat.value) + }) + setattr(c, v+'_graph', json.dumps( _to_rickshaw(graph.values(),percentageMode=True) )) # Buffer the tabular data if c.month: @@ -246,7 +246,9 @@ writer = csv.writer(response) writer.writerow(["Publisher Title", "Publisher Name", "Views", "Visits", "Period Name"]) - for publisher,view,visit in _get_top_publishers(None): + top_publishers, top_publishers_graph = _get_top_publishers(None) + + for publisher,view,visit in top_publishers: writer.writerow([publisher.title.encode('utf-8'), publisher.name.encode('utf-8'), view, @@ -295,7 +297,9 @@ if c.month: c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) - c.top_publishers = _get_top_publishers() + c.top_publishers, graph_data = _get_top_publishers() + c.top_publishers_graph = json.dumps( _to_rickshaw(graph_data.values()) ) + return render('ga_report/publisher/index.html') def _get_packages(self, publisher=None, count=-1): @@ -327,8 +331,9 @@ filter(GA_Stat.key==package.name) if month != 'All': # Fetch everything unless the month is specific dls = dls.filter(GA_Stat.period_name==month) - - downloads = sum(int(d.value) for d in dls.all()) + downloads = 0 + for x in dls: + downloads += int(x.value) else: downloads = 'No data' top_packages.append((package, entry.pageviews, entry.visits, downloads)) @@ -379,7 +384,72 @@ c.top_packages = self._get_packages(c.publisher, 20) + # Graph query + top_package_names = [ x[0].name for x in c.top_packages ] + graph_query = model.Session.query(GA_Url,model.Package)\ + .filter(model.Package.name==GA_Url.package_id)\ + .filter(GA_Url.url.like('/dataset/%'))\ + .filter(GA_Url.package_id.in_(top_package_names)) + graph_data = {} + for entry,package in graph_query: + if not package: continue + if entry.period_name=='All': continue + graph_data[package.id] = graph_data.get(package.id,{ + 'name':package.title, + 'data':[] + }) + graph_data[package.id]['data'].append({ + 'x':_get_unix_epoch(entry.period_name), + 'y':int(entry.pageviews), + }) + + c.graph_data = json.dumps( _to_rickshaw(graph_data.values()) ) + return render('ga_report/publisher/read.html') + +def _to_rickshaw(data, percentageMode=False): + if data==[]: + return data + # Create a consistent x-axis + num_points = [ len(package['data']) for package in data ] + ideal_index = num_points.index( max(num_points) ) + x_axis = [ point['x'] for point in data[ideal_index]['data'] ] + for package in data: + xs = [ point['x'] for point in package['data'] ] + assert set(xs).issubset( set(x_axis) ), (xs, x_axis) + # Zero pad any missing values + for x in set(x_axis).difference(set(xs)): + package['data'].append( {'x':x, 'y':0} ) + assert len(package['data'])==len(x_axis), (len(package['data']),len(x_axis),package['data'],x_axis,set(x_axis).difference(set(xs))) + if percentageMode: + # Transform data into percentage stacks + totals = {} + for x in x_axis: + for package in data: + for point in package['data']: + totals[ point['x'] ] = totals.get(point['x'],0) + point['y'] + # Roll insignificant series into a catch-all + THRESHOLD = 0.01 + significant_series = [] + for package in data: + for point in package['data']: + fraction = float(point['y']) / totals[point['x']] + if fraction>THRESHOLD and not (package in significant_series): + significant_series.append(package) + temp = {} + for package in data: + if package in significant_series: continue + for point in package['data']: + temp[point['x']] = temp.get(point['x'],0) + point['y'] + catch_all = { 'name':'Other','data': [ {'x':x,'y':y} for x,y in temp.items() ] } + # Roll insignificant series into one + data = significant_series + data.append(catch_all) + # Sort the points + for package in data: + package['data'] = sorted( package['data'], key=lambda x:x['x'] ) + return data + def _get_top_publishers(limit=20): ''' @@ -402,11 +472,35 @@ top_publishers = [] res = connection.execute(q, month) + department_ids = [] for row in res: g = model.Group.get(row[0]) if g: + department_ids.append(row[0]) top_publishers.append((g, row[1], row[2])) - return top_publishers + + graph = {} + if limit is not None: + # Query for a history graph of these publishers + q = model.Session.query( + GA_Url.department_id, + GA_Url.period_name, + func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\ + .filter( GA_Url.department_id.in_(department_ids) )\ + .filter( GA_Url.period_name!='All' )\ + .filter( GA_Url.url.like('/dataset/%') )\ + .filter( GA_Url.package_id!='' )\ + .group_by( GA_Url.department_id, GA_Url.period_name ) + for dept_id,period_name,views in q: + graph[dept_id] = graph.get( dept_id, { + 'name' : model.Group.get(dept_id).title, + 'data' : [] + }) + graph[dept_id]['data'].append({ + 'x': _get_unix_epoch(period_name), + 'y': views + }) + return top_publishers, graph def _get_publishers(): --- /dev/null +++ b/ckanext/ga_report/public/css/ga_report.css @@ -1,1 +1,41 @@ +.table-condensed td.sparkline-cell { + padding: 1px 0 0 0; + width: 108px; + text-align: center; +} +.rickshaw_chart_container { + position: relative; + height: 350px; + margin: 0 auto 20px auto; +} +.rickshaw_chart { + position: absolute; + left: 40px; + width: 500px; + top: 0; + bottom: 0; +} +.rickshaw_legend { + position: absolute; + right: 0; + top: 0; + margin-left: 15px; + padding: 0 5px; + background: transparent; + max-width: 150px; + overflow: hidden; + background: rgba(0,0,0,0.05); + border-radius:5px; +} +.rickshaw_y_axis { + position: absolute; + top: 0; + bottom: 0; + width: 40px; +} +.rickshaw_legend .label { + background: transparent !important; + color: #000000 !important; + font-weight: normal !important; +} --- a/ckanext/ga_report/templates/ga_report/ga_util.html +++ b/ckanext/ga_report/templates/ga_report/ga_util.html @@ -30,22 +30,23 @@ -
-
-
-
-
+
+
+
+
+
+ + + +
@@ -34,6 +42,7 @@
+ ${rickshaw_graph(c.top_publishers_graph,'publishers')} --- a/ckanext/ga_report/templates/ga_report/publisher/read.html +++ b/ckanext/ga_report/templates/ga_report/publisher/read.html @@ -6,6 +6,15 @@ Usage by Dataset + + + + + + + + +
  • @@ -41,21 +50,26 @@

    ${c.publisher.title}

    No page views in this period

    -
  • Publisher
    - - - - - - - - - - - + + + ${rickshaw_graph(c.graph_data,'dataset-downloads',debug=True)} +
    DatasetViewsDownloads
    ${h.link_to(package.title or package.name, h.url_for(controller='package', action='read', id=package.name))} - ${views}${downloads}
    + + + + + + + + + + + -
    DatasetViewsDownloads
    + ${h.link_to(package.title or package.name, h.url_for(controller='package', action='read', id=package.name))} + ${views}${downloads}
    + +
    --- a/ckanext/ga_report/templates/ga_report/site/index.html +++ b/ckanext/ga_report/templates/ga_report/site/index.html @@ -9,44 +9,11 @@ + - @@ -127,20 +94,20 @@
    - ${rickshaw_graph(c.browser_versions_graph,'browser-versions')} + ${rickshaw_graph(c.browser_versions_graph,'browser-versions',mode='stack')}

    Note: Where a browser has a large number of versions, these have been grouped together.

    ${stat_table(c.browser_versions)}
    - ${rickshaw_graph(c.browsers_graph,'browsers')} + ${rickshaw_graph(c.browsers_graph,'browsers',mode='stack')} ${stat_table(c.browsers)}
    - ${rickshaw_graph(c.os_graph,'os')} + ${rickshaw_graph(c.os_graph,'os',mode='stack')} ${stat_table(c.os)}
    - ${rickshaw_graph(c.os_versions_graph,'os_versions')} + ${rickshaw_graph(c.os_versions_graph,'os_versions',mode='stack')} ${stat_table(c.os_versions)}
    @@ -148,25 +115,20 @@ ${social_table(c.social_referrer_totals)}
    - ${rickshaw_graph(c.social_networks_graph, 'social_networks')} + ${rickshaw_graph(c.social_networks_graph, 'social_networks',mode='stack')}

    Percentage of visits that were referred from these social networks

    ${stat_table(c.social_networks, 'Visits')}
    - ${rickshaw_graph(c.languages_graph,'languages')} + ${rickshaw_graph(c.languages_graph,'languages',mode='stack')} ${stat_table(c.languages)}
    - ${rickshaw_graph(c.country_graph,'country')} + ${rickshaw_graph(c.country_graph,'country',mode='stack')} ${stat_table(c.country)}
    - -
    - - -