From: Tom Rees
Date: Fri, 25 Jan 2013 12:06:59 +0000
Subject: #167 Refactoring controller's logic to create better Percentage graphs
X-Git-Url: https://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=66388ec20aa1030e4cd7333f4ee6f4c75876b1c3
---
#167 Refactoring controller's logic to create better Percentage graphs
---
--- a/ckanext/ga_report/command.py
+++ b/ckanext/ga_report/command.py
@@ -23,7 +23,7 @@
import ckan.model as model
model.Session.remove()
model.Session.configure(bind=model.meta.engine)
- log = logging.getLogger('ckanext.ga-report')
+ log = logging.getLogger('ckanext.ga_report')
import ga_model
ga_model.init_tables()
--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -411,45 +411,52 @@
if data==[]:
return data
# Create a consistent x-axis
- num_points = [ len(package['data']) for package in data ]
+ num_points = [ len(series['data']) for series in data ]
ideal_index = num_points.index( max(num_points) )
x_axis = [ point['x'] for point in data[ideal_index]['data'] ]
- for package in data:
- xs = [ point['x'] for point in package['data'] ]
+ for series in data:
+ xs = [ point['x'] for point in series['data'] ]
assert set(xs).issubset( set(x_axis) ), (xs, x_axis)
# Zero pad any missing values
for x in set(x_axis).difference(set(xs)):
- package['data'].append( {'x':x, 'y':0} )
- assert len(package['data'])==len(x_axis), (len(package['data']),len(x_axis),package['data'],x_axis,set(x_axis).difference(set(xs)))
+ series['data'].append( {'x':x, 'y':0} )
+ assert len(series['data'])==len(x_axis), (len(series['data']),len(x_axis),series['data'],x_axis,set(x_axis).difference(set(xs)))
if percentageMode:
+ def get_totals(series_list):
+ totals = {}
+ for series in series_list:
+ for point in series['data']:
+ totals[point['x']] = totals.get(point['x'],0) + point['y']
+ lengths = [ len(series['data']) for series in series_list ]
+ assert len(set(lengths))==1
+ assert lengths[0] == len(totals)
+ return totals
# Transform data into percentage stacks
- totals = {}
- for x in x_axis:
- for package in data:
- for point in package['data']:
- totals[ point['x'] ] = totals.get(point['x'],0) + point['y']
+ totals = get_totals(data)
# Roll insignificant series into a catch-all
THRESHOLD = 0.01
- significant_series = []
- for package in data:
- for point in package['data']:
+ raw_data = data
+ data = []
+ for series in raw_data:
+ for point in series['data']:
fraction = float(point['y']) / totals[point['x']]
- if fraction>THRESHOLD and not (package in significant_series):
- significant_series.append(package)
- temp = {}
- for package in data:
- if package in significant_series: continue
- for point in package['data']:
- temp[point['x']] = temp.get(point['x'],0) + point['y']
- catch_all = { 'name':'Other','data': [ {'x':x,'y':y} for x,y in temp.items() ] }
- # Roll insignificant series into one
- data = significant_series
- data.append(catch_all)
+ if not (series in data) and fraction>THRESHOLD:
+ data.append(series)
+ # Overwrite data with a set of intereting series
+ others = [ x for x in raw_data if not (x in data) ]
+ data.append({
+ 'name':'Other',
+ 'data': [ {'x':x,'y':y} for x,y in get_totals(others).items() ]
+ })
+ # Turn each point into a percentage
+ for series in data:
+ for point in series['data']:
+ point['y'] = (point['y']*100) / totals[point['x']]
# Sort the points
- for package in data:
- package['data'] = sorted( package['data'], key=lambda x:x['x'] )
+ for series in data:
+ series['data'] = sorted( series['data'], key=lambda x:x['x'] )
# Strip the latest month's incomplete analytics
- package['data'] = package['data'][:-1]
+ series['data'] = series['data'][:-1]
return data
--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -32,6 +32,11 @@
first_of_this_month = datetime.datetime(date.year, date.month, 1)
_, last_day_of_month = calendar.monthrange(int(date.year), int(date.month))
last_of_this_month = datetime.datetime(date.year, date.month, last_day_of_month)
+ # if this is the latest month, note that it is only up until today
+ now = datetime.datetime.now()
+ if now.year == date.year and now.month == date.month:
+ last_day_of_month = now.day
+ last_of_this_month = now
periods = ((date.strftime(FORMAT_MONTH),
last_day_of_month,
first_of_this_month, last_of_this_month),)
@@ -126,7 +131,7 @@
# Make sure the All records are correct.
ga_model.post_update_url_stats()
- log.info('Aggregating datasets by publisher')
+ log.info('Associating datasets with their publisher')
ga_model.update_publisher_stats(period_name) # about 30 seconds.
@@ -298,7 +303,7 @@
def _download_stats(self, start_date, end_date, period_name, period_complete_day):
- """ Fetches stats about language and country """
+ """ Fetches stats about data downloads """
import ckan.model as model
data = {}
@@ -320,7 +325,14 @@
return
def process_result_data(result_data, cached=False):
+ progress_total = len(result_data)
+ progress_count = 0
+ resources_not_matched = []
for result in result_data:
+ progress_count += 1
+ if progress_count % 100 == 0:
+ log.debug('.. %d/%d done so far', progress_count, progress_total)
+
url = result[0].strip()
# Get package id associated with the resource that has this URL.
@@ -334,9 +346,13 @@
if package_name:
data[package_name] = data.get(package_name, 0) + int(result[1])
else:
- log.warning(u"Could not find resource for URL: {url}".format(url=url))
+ resources_not_matched.append(url)
continue
-
+ if resources_not_matched:
+ log.debug('Could not match %i or %i resource URLs to datasets. e.g. %r',
+ len(resources_not_matched), progress_total, resources_not_matched[:3])
+
+ log.info('Associating downloads of resource URLs with their respective datasets')
process_result_data(results.get('rows'))
results = self.service.data().ga().get(
@@ -348,6 +364,7 @@
dimensions="ga:eventLabel",
max_results=10000,
end_date=end_date).execute()
+ log.info('Associating downloads of cache resource URLs with their respective datasets')
process_result_data(results.get('rows'), cached=False)
self._filter_out_long_tail(data, MIN_DOWNLOADS)
--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -161,20 +161,20 @@
def pre_update_url_stats(period_name):
- log.debug("Deleting '%s' records" % period_name)
- model.Session.query(GA_Url).\
- filter(GA_Url.period_name==period_name).delete()
-
- count = model.Session.query(GA_Url).\
- filter(GA_Url.period_name == 'All').count()
- log.debug("Deleting %d 'All' records" % count)
- count = model.Session.query(GA_Url).\
- filter(GA_Url.period_name == 'All').delete()
- log.debug("Deleted %d 'All' records" % count)
+ q = model.Session.query(GA_Url).\
+ filter(GA_Url.period_name==period_name)
+ log.debug("Deleting %d '%s' records" % (q.count(), period_name))
+ q.delete()
+
+ q = model.Session.query(GA_Url).\
+ filter(GA_Url.period_name == 'All')
+ log.debug("Deleting %d 'All' records..." % q.count())
+ q.delete()
model.Session.flush()
model.Session.commit()
model.repo.commit_and_remove()
+ log.debug('...done')
def post_update_url_stats():
@@ -185,6 +185,7 @@
record regardless of whether the URL has an entry for
the month being currently processed.
"""
+ log.debug('Post-processing "All" records...')
query = """select url, pageviews::int, visits::int
from ga_url
where url not in (select url from ga_url where period_name ='All')"""
@@ -197,7 +198,13 @@
views[row[0]] = views.get(row[0], 0) + row[1]
visits[row[0]] = visits.get(row[0], 0) + row[2]
+ progress_total = len(views.keys())
+ progress_count = 0
for key in views.keys():
+ progress_count += 1
+ if progress_count % 100 == 0:
+ log.debug('.. %d/%d done so far', progress_count, progress_total)
+
package, publisher = _get_package_and_publisher(key)
values = {'id': make_uuid(),
@@ -207,10 +214,11 @@
'pageviews': views[key],
'visits': visits[key],
'department_id': publisher,
- 'package_id': publisher
+ 'package_id': package
}
model.Session.add(GA_Url(**values))
model.Session.commit()
+ log.debug('..done')
def update_url_stats(period_name, period_complete_day, url_data):
@@ -219,9 +227,14 @@
stores them in GA_Url under the period and recalculates the totals for
the 'All' period.
'''
+ progress_total = len(url_data)
+ progress_count = 0
for url, views, visits in url_data:
+ progress_count += 1
+ if progress_count % 100 == 0:
+ log.debug('.. %d/%d done so far', progress_count, progress_total)
+
package, publisher = _get_package_and_publisher(url)
-
item = model.Session.query(GA_Url).\
filter(GA_Url.period_name==period_name).\
--- a/ckanext/ga_report/helpers.py
+++ b/ckanext/ga_report/helpers.py
@@ -106,6 +106,10 @@
for entry in entries:
if len(datasets) < count:
p = model.Package.get(entry.url[len('/dataset/'):])
+ if not p:
+ _log.warning("Could not find Package for {url}".format(url=entry.url))
+ continue
+
if not p in datasets:
datasets[p] = {'views':0, 'visits': 0}
datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews)
--- a/ckanext/ga_report/public/css/ga_report.css
+++ b/ckanext/ga_report/public/css/ga_report.css
@@ -16,16 +16,8 @@
bottom: 0;
}
.rickshaw_legend {
- position: absolute;
- right: 0;
- top: 0;
- margin-left: 15px;
- padding: 0 5px;
background: transparent;
- max-width: 150px;
- overflow: hidden;
- background: rgba(0,0,0,0.05);
- border-radius:5px;
+ width: 100%;
}
.rickshaw_y_axis {
position: absolute;
--- a/ckanext/ga_report/public/scripts/ckanext_ga_reports.js
+++ b/ckanext/ga_report/public/scripts/ckanext_ga_reports.js
@@ -1,8 +1,20 @@
-
var CKAN = CKAN || {};
CKAN.GA_Reports = {};
CKAN.GA_Reports.render_rickshaw = function( css_name, data, mode, colorscheme ) {
+ var graphLegends = $('#graph-legend-container');
+
+ if (!Modernizr.svg) {
+ $("#chart_"+css_name)
+ .html( 'Your browser does not support vector graphics. No graphs can be rendered.
')
+ .closest('.rickshaw_chart_container').css('height',50);
+ var myLegend = $('')
+ .html('(Graph cannot be rendered)')
+ .appendTo(graphLegends);
+ return;
+ }
+ var myLegend = $('').appendTo(graphLegends);
+
var palette = new Rickshaw.Color.Palette( { scheme: colorscheme } );
$.each(data, function(i, object) {
object['color'] = palette.color();
@@ -21,22 +33,86 @@
graph: graph,
orientation: 'left',
tickFormat: Rickshaw.Fixtures.Number.formatKMBT,
- element: document.getElementById('y_axis_'+css_name),
+ element: document.getElementById('y_axis_'+css_name)
} );
var legend = new Rickshaw.Graph.Legend( {
element: document.querySelector('#legend_'+css_name),
graph: graph
} );
- var hoverDetail = new Rickshaw.Graph.HoverDetail( {
+ var shelving = new Rickshaw.Graph.Behavior.Series.Toggle( {
graph: graph,
- formatter: function(series, x, y) {
- var date = '' + new Date(x * 1000).toUTCString() + '';
- var swatch = '';
- var content = swatch + series.name + ": " + parseInt(y) + '
' + date;
- return content;
- }
+ legend: legend
} );
graph.render();
};
+CKAN.GA_Reports.bind_sparklines = function() {
+ /*
+ * Bind to the 'totals' tab being on screen, when the
+ * Sparkline graphs should be drawn.
+ * Note that they cannot be drawn sooner.
+ */
+ $('a[href="#totals"]').on(
+ 'shown',
+ function() {
+ var sparkOptions = {
+ enableTagOptions: true,
+ type: 'line',
+ width: 100,
+ height: 26,
+ chartRangeMin: 0,
+ spotColor: '',
+ maxSpotColor: '',
+ minSpotColor: '',
+ highlightSpotColor: '000000',
+ lineColor: '3F8E6D',
+ fillColor: 'B7E66B'
+ };
+ $('.sparkline').sparkline('html',sparkOptions);
+ }
+ );
+};
+CKAN.GA_Reports.bind_sidebar = function() {
+ /*
+ * Bind to changes in the tab behaviour:
+ * Show the correct rickshaw graph in the sidebar.
+ * Not to be called before all graphs load.
+ */
+ $('a[data-toggle="hashchange"]').on(
+ 'shown',
+ function(e) {
+ var href = $(e.target).attr('href');
+ var pane = $(href);
+ if (!pane.length) { console.err('bad href',href); return; }
+ var legend_name = "none";
+ var graph = pane.find('.rickshaw_chart');
+ if (graph.length) {
+ legend_name = graph.attr('id').replace('chart_','');
+ }
+ legend_name = '#legend_'+legend_name;
+ $('#graph-legend-container > *').hide();
+ $(legend_name).show();
+ }
+ );
+};
+
+/*
+ * Custom bootstrap plugin for handling data-toggle="hashchange".
+ * Behaves like data-toggle="tab" but I respond to the hashchange.
+ * Page state is memo-ized in the URL this way. Why doesn't Bootstrap do this?
+ */
+$(function() {
+ var mapping = {};
+ $('a[data-toggle="hashchange"]').each(
+ function(i,link) {
+ link = $(link);
+ mapping[link.attr('href')] = link;
+ }
+ );
+ $(window).hashchange(function() {
+ var link = mapping[window.location.hash];
+ if (link) { link.tab('show'); }
+ });
+});
+
--- /dev/null
+++ b/ckanext/ga_report/public/scripts/rickshaw_ie7_shim.js
@@ -1,1 +1,109 @@
+/*
+ * Collection of shims to allow d3 and Rickshaw to load, error-free
+ * (but ultimately unusable) on Internet Explorer 7. The browser's
+ * API lacks several crucial functions which these libraries depend
+ * upon to load; we try to hide these errors from the user.
+ *
+ * With thanks to Array functions from:
+ * http://stackoverflow.com/questions/2790001/fixing-javascript-array-functions-in-internet-explorer-indexof-foreach-etc
+ *
+ * Use (Modernizr.svg==true) to detect whether it's okay to draw a graph.
+ */
+'use strict';
+window.Element = window.Element || {'prototype': {}};
+window.CSSStyleDeclaration = window.CSSStyleDeclaration || {'prototype':{}};
+
+// Add ECMA262-5 method binding if not supported natively
+//
+if (!('bind' in Function.prototype)) {
+ Function.prototype.bind= function(owner) {
+ var that= this;
+ if (arguments.length<=1) {
+ return function() {
+ return that.apply(owner, arguments);
+ };
+ } else {
+ var args= Array.prototype.slice.call(arguments, 1);
+ return function() {
+ return that.apply(owner, arguments.length===0? args : args.concat(Array.prototype.slice.call(arguments)));
+ };
+ }
+ };
+}
+
+// Add ECMA262-5 string trim if not supported natively
+//
+if (!('trim' in String.prototype)) {
+ String.prototype.trim= function() {
+ return this.replace(/^\s+/, '').replace(/\s+$/, '');
+ };
+}
+
+// Add ECMA262-5 Array methods if not supported natively
+//
+if (!('indexOf' in Array.prototype)) {
+ Array.prototype.indexOf= function(find, i /*opt*/) {
+ if (i===undefined) i= 0;
+ if (i<0) i+= this.length;
+ if (i<0) i= 0;
+ for (var n= this.length; ithis.length-1) i= this.length-1;
+ for (i++; i-->0;) /* i++ because from-argument is sadly inclusive */
+ if (i in this && this[i]===find)
+ return i;
+ return -1;
+ };
+}
+if (!('forEach' in Array.prototype)) {
+ Array.prototype.forEach= function(action, that /*opt*/) {
+ for (var i= 0, n= this.length; i
-
+
@@ -23,6 +24,12 @@
Download as CSV
+
+
+ Graph Legend
+
@@ -43,38 +50,36 @@
-
+
Name |
@@ -136,20 +141,14 @@