From: Tom Rees Date: Thu, 16 May 2013 11:35:37 +0000 Subject: [423] Handle many more graphing edge-cases. X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=d63c4e97154c2a03d8042836dd203dd63e777e39 --- [423] Handle many more graphing edge-cases. Prepare graphs for an expected time period (since July 2012) rather than for the time period found in the DB, which can be reduced to absurdity with certain queries. Graphs always have a consistent X-axis, ugly logic to combine disparate data series can be removed. On 'Publisher' and 'Dataset' tabs, always graph the *top 20* series regardless of the month currently rendered in the table. This makes more sense from a useability POV. Finally, some client side error checking was improved. --- --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -212,13 +212,13 @@ for stat in graph_query: graph_dict[ stat.key ] = graph_dict.get(stat.key,{ 'name':stat.key, - 'data': [] + 'raw': {} }) - graph_dict[ stat.key ]['data'].append({ - 'x':_get_unix_epoch(stat.period_name), - 'y':float(stat.value) - }) - graph = [ graph_dict[x[0]] for x in entries ] + graph_dict[ stat.key ]['raw'][stat.period_name] = float(stat.value) + stats_in_table = [x[0] for x in entries] + stats_not_in_table = set(graph_dict.keys()) - set(stats_in_table) + stats = stats_in_table + sorted(list(stats_not_in_table)) + graph = [graph_dict[x] for x in stats] setattr(c, v+'_graph', json.dumps( _to_rickshaw(graph,percentageMode=True) )) # Get the total for each set of values and then set the value as @@ -249,7 +249,7 @@ writer = csv.writer(response) writer.writerow(["Publisher Title", "Publisher Name", "Views", "Visits", "Period Name"]) - top_publishers, top_publishers_graph = _get_top_publishers(None) + top_publishers = _get_top_publishers(limit=None) for publisher,view,visit in top_publishers: writer.writerow([publisher.title.encode('utf-8'), @@ -271,7 +271,7 @@ if not c.publisher: abort(404, 'A publisher with that name could not be found') - packages = self._get_packages(c.publisher) + packages = self._get_packages(publisher=c.publisher, month=c.month) response.headers['Content-Type'] = "text/csv; charset=utf-8" response.headers['Content-Disposition'] = \ str('attachment; filename=datasets_%s_%s.csv' % (c.publisher_name, month,)) @@ -300,15 +300,16 @@ if c.month: c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) - c.top_publishers, graph_data = _get_top_publishers() + c.top_publishers = _get_top_publishers() + graph_data = _get_top_publishers_graph() c.top_publishers_graph = json.dumps( _to_rickshaw(graph_data) ) return render('ga_report/publisher/index.html') - def _get_packages(self, publisher=None, count=-1): + def _get_packages(self, publisher=None, month='', count=-1): '''Returns the datasets in order of views''' have_download_data = True - month = c.month or 'All' + month = month or 'All' if month != 'All': have_download_data = month >= DOWNLOADS_AVAILABLE_FROM @@ -385,28 +386,25 @@ entry = q.filter(GA_Url.period_name==c.month).first() c.publisher_page_views = entry.pageviews if entry else 0 - c.top_packages = self._get_packages(c.publisher, 20) + c.top_packages = self._get_packages(publisher=c.publisher, count=20, month=c.month) # Graph query - top_package_names = [ x[0].name for x in c.top_packages ] + top_packages_all_time = self._get_packages(publisher=c.publisher, count=20, month='All') + top_package_names = [ x[0].name for x in top_packages_all_time ] graph_query = model.Session.query(GA_Url,model.Package)\ .filter(model.Package.name==GA_Url.package_id)\ .filter(GA_Url.url.like('/dataset/%'))\ .filter(GA_Url.package_id.in_(top_package_names)) - graph_dict = {} + all_series = {} for entry,package in graph_query: if not package: continue if entry.period_name=='All': continue - graph_dict[package.name] = graph_dict.get(package.name,{ + all_series[package.name] = all_series.get(package.name,{ 'name':package.title, - 'data':[] + 'raw': {} }) - graph_dict[package.name]['data'].append({ - 'x':_get_unix_epoch(entry.period_name), - 'y':int(entry.pageviews), - }) - graph = [ graph_dict[x] for x in top_package_names ] - + all_series[package.name]['raw'][entry.period_name] = int(entry.pageviews) + graph = [ all_series[series_name] for series_name in top_package_names ] c.graph_data = json.dumps( _to_rickshaw(graph) ) return render('ga_report/publisher/read.html') @@ -414,52 +412,45 @@ def _to_rickshaw(data, percentageMode=False): if data==[]: return data - # Create a consistent x-axis between all series - num_points = [ len(series['data']) for series in data ] - ideal_index = num_points.index( max(num_points) ) - x_axis = [ point['x'] for point in data[ideal_index]['data'] ] + # x-axis is every month in c.months. Note that data might not exist + # for entire history, eg. for recently-added datasets + x_axis = [x[0] for x in c.months] + x_axis.reverse() # Ascending order + x_axis = x_axis[:-1] # Remove latest month + totals = {} for series in data: - xs = [ point['x'] for point in series['data'] ] - assert set(xs).issubset( set(x_axis) ), (xs, x_axis) - # Zero pad any missing values - for x in set(x_axis).difference(set(xs)): - series['data'].append( {'x':x, 'y':0} ) - if percentageMode: - def get_totals(series_list): - totals = {} - for series in series_list: - for point in series['data']: - totals[point['x']] = totals.get(point['x'],0) + point['y'] - lengths = [ len(series['data']) for series in series_list ] - assert len(set(lengths))==1 - assert lengths[0] == len(totals) - return totals - # Transform data into percentage stacks - totals = get_totals(data) - # Roll insignificant series into a catch-all - THRESHOLD = 0.01 - raw_data = data - data = [] - for series in raw_data: - for point in series['data']: - fraction = float(point['y']) / totals[point['x']] - if not (series in data) and fraction>THRESHOLD: - data.append(series) - # Overwrite data with a set of intereting series - others = [ x for x in raw_data if not (x in data) ] + series['data'] = [] + for x_string in x_axis: + x = _get_unix_epoch( x_string ) + y = series['raw'].get(x_string,0) + series['data'].append({'x':x,'y':y}) + totals[x] = totals.get(x,0)+y + if not percentageMode: + return data + # Turn all data into percentages + # Roll insignificant series into a catch-all + THRESHOLD = 1 + raw_data = data + data = [] + for series in raw_data: + for point in series['data']: + percentage = (100*float(point['y'])) / totals[point['x']] + if not (series in data) and percentage>THRESHOLD: + data.append(series) + point['y'] = percentage + others = [ x for x in raw_data if not (x in data) ] + if len(others): + data_other = [] + for i in range(len(x_axis)): + x = _get_unix_epoch(x_axis[i]) + y = 0 + for series in others: + y += series['data'][i]['y'] + data_other.append({'x':x,'y':y}) data.append({ 'name':'Other', - 'data': [ {'x':x,'y':y} for x,y in get_totals(others).items() ] + 'data': data_other }) - # Turn each point into a percentage - for series in data: - for point in series['data']: - point['y'] = (point['y']*100) / totals[point['x']] - # Sort the points - for series in data: - series['data'] = sorted( series['data'], key=lambda x:x['x'] ) - # Strip the latest month's incomplete analytics - series['data'] = series['data'][:-1] return data @@ -484,39 +475,51 @@ top_publishers = [] res = connection.execute(q, month) - department_ids = [] for row in res: g = model.Group.get(row[0]) if g: - department_ids.append(row[0]) top_publishers.append((g, row[1], row[2])) - - graph = [] - if limit is not None: - # Query for a history graph of these publishers - q = model.Session.query( - GA_Url.department_id, - GA_Url.period_name, - func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\ - .filter( GA_Url.department_id.in_(department_ids) )\ - .filter( GA_Url.period_name!='All' )\ - .filter( GA_Url.url.like('/dataset/%') )\ - .filter( GA_Url.package_id!='' )\ - .group_by( GA_Url.department_id, GA_Url.period_name ) - graph_dict = {} - for dept_id,period_name,views in q: - graph_dict[dept_id] = graph_dict.get( dept_id, { - 'name' : model.Group.get(dept_id).title, - 'data' : [] - }) - graph_dict[dept_id]['data'].append({ - 'x': _get_unix_epoch(period_name), - 'y': views - }) - # Sort dict into ordered list - for id in department_ids: - graph.append( graph_dict[id] ) - return top_publishers, graph + return top_publishers + + +def _get_top_publishers_graph(limit=20): + ''' + Returns a list of the top 20 publishers by dataset visits. + (The number to show can be varied with 'limit') + ''' + connection = model.Session.connection() + q = """ + select department_id, sum(pageviews::int) views + from ga_url + where department_id <> '' + and package_id <> '' + and url like '/dataset/%%' + and period_name='All' + group by department_id order by views desc + """ + if limit: + q = q + " limit %s;" % (limit) + + res = connection.execute(q) + department_ids = [ row[0] for row in res ] + + # Query for a history graph of these department ids + q = model.Session.query( + GA_Url.department_id, + GA_Url.period_name, + func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\ + .filter( GA_Url.department_id.in_(department_ids) )\ + .filter( GA_Url.url.like('/dataset/%') )\ + .filter( GA_Url.package_id!='' )\ + .group_by( GA_Url.department_id, GA_Url.period_name ) + graph_dict = {} + for dept_id,period_name,views in q: + graph_dict[dept_id] = graph_dict.get( dept_id, { + 'name' : model.Group.get(dept_id).title, + 'raw' : {} + }) + graph_dict[dept_id]['raw'][period_name] = views + return [ graph_dict[id] for id in department_ids ] def _get_publishers(): --- a/ckanext/ga_report/helpers.py +++ b/ckanext/ga_report/helpers.py @@ -80,7 +80,7 @@ return base.render_snippet('ga_report/ga_popular_single.html', **context) -def most_popular_datasets(publisher, count=20): +def most_popular_datasets(publisher, count=20, preview_image=None): if not publisher: _log.error("No valid publisher passed to 'most_popular_datasets'") @@ -92,7 +92,8 @@ 'dataset_count': len(results), 'datasets': results, - 'publisher': publisher + 'publisher': publisher, + 'preview_image': preview_image } return base.render_snippet('ga_report/publisher/popular.html', **ctx) @@ -121,3 +122,17 @@ return sorted(results, key=operator.itemgetter(1), reverse=True) +def month_option_title(month_iso, months, day): + month_isos = [ iso_code for (iso_code,name) in months ] + try: + index = month_isos.index(month_iso) + except ValueError: + _log.error('Month "%s" not found in list of months.' % month_iso) + return month_iso + month_name = months[index][1] + if index==0: + return month_name + (' (up to %s)'%day) + return month_name + + + --- a/ckanext/ga_report/plugin.py +++ b/ckanext/ga_report/plugin.py @@ -5,7 +5,8 @@ from ckanext.ga_report.helpers import (most_popular_datasets, popular_datasets, - single_popular_dataset) + single_popular_dataset, + month_option_title) log = logging.getLogger('ckanext.ga-report') @@ -27,7 +28,8 @@ 'ga_report_installed': lambda: True, 'popular_datasets': popular_datasets, 'most_popular_datasets': most_popular_datasets, - 'single_popular_dataset': single_popular_dataset + 'single_popular_dataset': single_popular_dataset, + 'month_option_title': month_option_title } def after_map(self, map): --- a/ckanext/ga_report/public/css/ga_report.css +++ b/ckanext/ga_report/public/css/ga_report.css @@ -2,6 +2,11 @@ padding: 1px 0 0 0; width: 108px; text-align: center; + /* Hack to hide the momentary flash of text + * before sparklines are fully rendered */ + font-size: 1px; + color: transparent; + overflow: hidden; } .rickshaw_chart_container { position: relative; @@ -18,6 +23,7 @@ .rickshaw_legend { background: transparent; width: 100%; + padding-top: 4px; } .rickshaw_y_axis { position: absolute; @@ -29,6 +35,10 @@ background: transparent !important; color: #000000 !important; font-weight: normal !important; +} +.rickshaw_legend .instructions { + color: #000; + margin-bottom: 6px; } .rickshaw_legend .line .action { @@ -44,8 +54,16 @@ float: left; width: 200px; } +.rickshaw_legend .line .label:hover { + text-decoration: underline; +} .ga-reports-table .td-numeric { text-align: center; } +.ga-reports-heading { + padding-right: 10px; + margin-top: 4px; + float: left; +} --- a/ckanext/ga_report/public/scripts/ckanext_ga_reports.js +++ b/ckanext/ga_report/public/scripts/ckanext_ga_reports.js @@ -4,14 +4,22 @@ CKAN.GA_Reports.render_rickshaw = function( css_name, data, mode, colorscheme ) { var graphLegends = $('#graph-legend-container'); - if (!Modernizr.svg) { + function renderError(alertClass,alertText,legendText) { $("#chart_"+css_name) - .html( '
Your browser does not support vector graphics. No graphs can be rendered.
') + .html( '
'+alertText+'
') .closest('.rickshaw_chart_container').css('height',50); var myLegend = $('
') - .html('(Graph cannot be rendered)') + .html(legendText) .appendTo(graphLegends); + } + + if (!Modernizr.svg) { + renderError('','Your browser does not support vector graphics. No graphs can be rendered.','(Graph cannot be rendered)'); return; + } + if (data.length==0) { + renderError('alert-info','There is not enough data to render a graph.','(No graph available)'); + return } var myLegend = $('
').appendTo(graphLegends); @@ -30,7 +38,9 @@ series: data , height: 328 }); - var x_axis = new Rickshaw.Graph.Axis.Time( { graph: graph } ); + var x_axis = new Rickshaw.Graph.Axis.Time( { + graph: graph + } ); var y_axis = new Rickshaw.Graph.Axis.Y( { graph: graph, orientation: 'left', @@ -45,6 +55,7 @@ graph: graph, legend: legend } ); + myLegend.prepend('
Click on a series below to isolate its graph:
'); graph.render(); }; @@ -54,24 +65,29 @@ * Sparkline graphs should be drawn. * Note that they cannot be drawn sooner. */ + var created = false; $('a[href="#totals"]').on( 'shown', - function() { - var sparkOptions = { - enableTagOptions: true, - type: 'line', - width: 100, - height: 26, - chartRangeMin: 0, - spotColor: '', - maxSpotColor: '', - minSpotColor: '', - highlightSpotColor: '000000', - lineColor: '3F8E6D', - fillColor: 'B7E66B' - }; - $('.sparkline').sparkline('html',sparkOptions); - } + function() { + if (!created) { + var sparkOptions = { + enableTagOptions: true, + type: 'line', + width: 100, + height: 26, + chartRangeMin: 0, + spotColor: '', + maxSpotColor: '', + minSpotColor: '', + highlightSpotColor: '#000000', + lineColor: '#3F8E6D', + fillColor: '#B7E66B' + }; + $('.sparkline').sparkline('html',sparkOptions); + created = true; + } + $.sparkline_display_visible(); + } ); }; @@ -81,7 +97,7 @@ * Show the correct rickshaw graph in the sidebar. * Not to be called before all graphs load. */ - $('a[data-toggle="hashchange"]').on( + $('a[data-toggle="hashtab"]').on( 'shown', function(e) { var href = $(e.target).attr('href'); @@ -94,9 +110,12 @@ } legend_name = '#legend_'+legend_name; $('#graph-legend-container > *').hide(); + $('#graph-legend-container .instructions').show(); $(legend_name).show(); } ); + /* The first tab might already have been shown */ + $('li.active > a[data-toggle="hashtab"]').trigger('shown'); }; CKAN.GA_Reports.bind_month_selector = function() { @@ -111,22 +130,3 @@ selectors.bind('change', handler); }; -/* - * Custom bootstrap plugin for handling data-toggle="hashchange". - * Behaves like data-toggle="tab" but I respond to the hashchange. - * Page state is memo-ized in the URL this way. Why doesn't Bootstrap do this? - */ -$(function() { - var mapping = {}; - $('a[data-toggle="hashchange"]').each( - function(i,link) { - link = $(link); - mapping[link.attr('href')] = link; - } - ); - $(window).hashchange(function() { - var link = mapping[window.location.hash]; - if (link) { link.tab('show'); } - }); -}); - --- a/ckanext/ga_report/templates/ga_report/ga_util.html +++ b/ckanext/ga_report/templates/ga_report/ga_util.html @@ -8,8 +8,8 @@ @@ -37,7 +37,6 @@ @@ -59,21 +58,41 @@ -
-
-
-
--- a/ckanext/ga_report/templates/ga_report/notes.html +++ /dev/null @@ -1,16 +1,1 @@ - -
  • -

    Notes

    -
      -
    • "Views" is the number of times a page was loaded in users' browsers.
    • -
    • "Downloads" is the number of times a user has clicked to download either an original or cached resource for a particular dataset. Download information is only available from 2nd December 2012; 'No data' is shown for records before that date.
    • -
    • These usage statistics are confined to users with javascript enabled, which excludes web crawlers and API calls.
    • -
    • The results are not shown when the number of views/visits is tiny. Where these relate to site pages, results are available in full in the CSV download. Where these relate to users' web browser information, results are not disclosed, for privacy reasons.
    • -
    -
  • - - --- a/ckanext/ga_report/templates/ga_report/publisher/index.html +++ b/ckanext/ga_report/templates/ga_report/publisher/index.html @@ -7,19 +7,9 @@ Usage by Publisher + -
  • -

    Download

    -

    - Download as CSV
    -

    -
  • -
  • -

    Graph Legend

    -
    -
    -
  • - + ${ga_sidebar(download_link=h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='publisher_csv',month=c.month or 'all'))}
    @@ -35,36 +25,36 @@
    -

    Site Usage

    +

    Site Usage ${usage_nav('Publishers')}

    - ${usage_nav('Publishers')} - +
    ${rickshaw_graph(c.top_publishers_graph,'publishers')} -
    + +
    +
    - +

    Statistics for

    ${month_selector(c.month, c.months, c.day)}
    - - - - + + + + - - - - + + + +
    PublisherDataset Views
    PublisherDataset Views
    - ${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher', id=publisher.name) + (("?month=" + c.month) if c.month else ''))} - ${views}
    + ${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher', id=publisher.name) + (("?month=" + c.month) if c.month else ''))} + ${views}
    - - -
    +
    +