From: Tom Rees Date: Fri, 17 May 2013 14:19:01 +0000 Subject: [385] Use page_heading helper function consistently X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=98bba88c9027d3198c72d70fc354c3031060e85a --- [385] Use page_heading helper function consistently --- --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -212,12 +212,9 @@ for stat in graph_query: graph_dict[ stat.key ] = graph_dict.get(stat.key,{ 'name':stat.key, - 'data': [] + 'raw': {} }) - graph_dict[ stat.key ]['data'].append({ - 'x':_get_unix_epoch(stat.period_name), - 'y':float(stat.value) - }) + graph_dict[ stat.key ]['raw'][stat.period_name] = float(stat.value) stats_in_table = [x[0] for x in entries] stats_not_in_table = set(graph_dict.keys()) - set(stats_in_table) stats = stats_in_table + sorted(list(stats_not_in_table)) @@ -252,7 +249,7 @@ writer = csv.writer(response) writer.writerow(["Publisher Title", "Publisher Name", "Views", "Visits", "Period Name"]) - top_publishers, top_publishers_graph = _get_top_publishers(None) + top_publishers = _get_top_publishers(limit=None) for publisher,view,visit in top_publishers: writer.writerow([publisher.title.encode('utf-8'), @@ -274,7 +271,7 @@ if not c.publisher: abort(404, 'A publisher with that name could not be found') - packages = self._get_packages(c.publisher) + packages = self._get_packages(publisher=c.publisher, month=c.month) response.headers['Content-Type'] = "text/csv; charset=utf-8" response.headers['Content-Disposition'] = \ str('attachment; filename=datasets_%s_%s.csv' % (c.publisher_name, month,)) @@ -303,15 +300,16 @@ if c.month: c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) - c.top_publishers, graph_data = _get_top_publishers() + c.top_publishers = _get_top_publishers() + graph_data = _get_top_publishers_graph() c.top_publishers_graph = json.dumps( _to_rickshaw(graph_data) ) return render('ga_report/publisher/index.html') - def _get_packages(self, publisher=None, count=-1): + def _get_packages(self, publisher=None, month='', count=-1): '''Returns the datasets in order of views''' have_download_data = True - month = c.month or 'All' + month = month or 'All' if month != 'All': have_download_data = month >= DOWNLOADS_AVAILABLE_FROM @@ -388,28 +386,25 @@ entry = q.filter(GA_Url.period_name==c.month).first() c.publisher_page_views = entry.pageviews if entry else 0 - c.top_packages = self._get_packages(c.publisher, 20) + c.top_packages = self._get_packages(publisher=c.publisher, count=20, month=c.month) # Graph query - top_package_names = [ x[0].name for x in c.top_packages ] + top_packages_all_time = self._get_packages(publisher=c.publisher, count=20, month='All') + top_package_names = [ x[0].name for x in top_packages_all_time ] graph_query = model.Session.query(GA_Url,model.Package)\ .filter(model.Package.name==GA_Url.package_id)\ .filter(GA_Url.url.like('/dataset/%'))\ .filter(GA_Url.package_id.in_(top_package_names)) - graph_dict = {} + all_series = {} for entry,package in graph_query: if not package: continue if entry.period_name=='All': continue - graph_dict[package.name] = graph_dict.get(package.name,{ + all_series[package.name] = all_series.get(package.name,{ 'name':package.title, - 'data':[] + 'raw': {} }) - graph_dict[package.name]['data'].append({ - 'x':_get_unix_epoch(entry.period_name), - 'y':int(entry.pageviews), - }) - graph = [ graph_dict[x] for x in top_package_names ] - + all_series[package.name]['raw'][entry.period_name] = int(entry.pageviews) + graph = [ all_series[series_name] for series_name in top_package_names ] c.graph_data = json.dumps( _to_rickshaw(graph) ) return render('ga_report/publisher/read.html') @@ -417,53 +412,45 @@ def _to_rickshaw(data, percentageMode=False): if data==[]: return data - # Create a consistent x-axis between all series - num_points = [ len(series['data']) for series in data ] - ideal_index = num_points.index( max(num_points) ) - x_axis = [] + # x-axis is every month in c.months. Note that data might not exist + # for entire history, eg. for recently-added datasets + x_axis = [x[0] for x in c.months] + x_axis.reverse() # Ascending order + x_axis = x_axis[:-1] # Remove latest month + totals = {} for series in data: + series['data'] = [] + for x_string in x_axis: + x = _get_unix_epoch( x_string ) + y = series['raw'].get(x_string,0) + series['data'].append({'x':x,'y':y}) + totals[x] = totals.get(x,0)+y + if not percentageMode: + return data + # Turn all data into percentages + # Roll insignificant series into a catch-all + THRESHOLD = 1 + raw_data = data + data = [] + for series in raw_data: for point in series['data']: - x_axis.append(point['x']) - x_axis = sorted( list( set(x_axis) ) ) - # Zero pad any missing values - for series in data: - xs = [ point['x'] for point in series['data'] ] - for x in set(x_axis).difference(set(xs)): - series['data'].append( {'x':x, 'y':0} ) - if percentageMode: - def get_totals(series_list): - totals = {} - for series in series_list: - for point in series['data']: - totals[point['x']] = totals.get(point['x'],0) + point['y'] - return totals - # Transform data into percentage stacks - totals = get_totals(data) - # Roll insignificant series into a catch-all - THRESHOLD = 0.01 - raw_data = data - data = [] - for series in raw_data: - for point in series['data']: - fraction = float(point['y']) / totals[point['x']] - if not (series in data) and fraction>THRESHOLD: - data.append(series) - # Overwrite data with a set of interesting series - others = [ x for x in raw_data if not (x in data) ] - if len(others): - data.append({ - 'name':'Other', - 'data': [ {'x':x,'y':y} for x,y in get_totals(others).items() ] - }) - # Turn each point into a percentage - for series in data: - for point in series['data']: - point['y'] = (point['y']*100) / totals[point['x']] - # Sort the points - for series in data: - series['data'] = sorted( series['data'], key=lambda x:x['x'] ) - # Strip the latest month's incomplete analytics - series['data'] = series['data'][:-1] + percentage = (100*float(point['y'])) / totals[point['x']] + if not (series in data) and percentage>THRESHOLD: + data.append(series) + point['y'] = percentage + others = [ x for x in raw_data if not (x in data) ] + if len(others): + data_other = [] + for i in range(len(x_axis)): + x = _get_unix_epoch(x_axis[i]) + y = 0 + for series in others: + y += series['data'][i]['y'] + data_other.append({'x':x,'y':y}) + data.append({ + 'name':'Other', + 'data': data_other + }) return data @@ -488,39 +475,51 @@ top_publishers = [] res = connection.execute(q, month) - department_ids = [] for row in res: g = model.Group.get(row[0]) if g: - department_ids.append(row[0]) top_publishers.append((g, row[1], row[2])) - - graph = [] - if limit is not None: - # Query for a history graph of these publishers - q = model.Session.query( - GA_Url.department_id, - GA_Url.period_name, - func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\ - .filter( GA_Url.department_id.in_(department_ids) )\ - .filter( GA_Url.period_name!='All' )\ - .filter( GA_Url.url.like('/dataset/%') )\ - .filter( GA_Url.package_id!='' )\ - .group_by( GA_Url.department_id, GA_Url.period_name ) - graph_dict = {} - for dept_id,period_name,views in q: - graph_dict[dept_id] = graph_dict.get( dept_id, { - 'name' : model.Group.get(dept_id).title, - 'data' : [] - }) - graph_dict[dept_id]['data'].append({ - 'x': _get_unix_epoch(period_name), - 'y': views - }) - # Sort dict into ordered list - for id in department_ids: - graph.append( graph_dict[id] ) - return top_publishers, graph + return top_publishers + + +def _get_top_publishers_graph(limit=20): + ''' + Returns a list of the top 20 publishers by dataset visits. + (The number to show can be varied with 'limit') + ''' + connection = model.Session.connection() + q = """ + select department_id, sum(pageviews::int) views + from ga_url + where department_id <> '' + and package_id <> '' + and url like '/dataset/%%' + and period_name='All' + group by department_id order by views desc + """ + if limit: + q = q + " limit %s;" % (limit) + + res = connection.execute(q) + department_ids = [ row[0] for row in res ] + + # Query for a history graph of these department ids + q = model.Session.query( + GA_Url.department_id, + GA_Url.period_name, + func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\ + .filter( GA_Url.department_id.in_(department_ids) )\ + .filter( GA_Url.url.like('/dataset/%') )\ + .filter( GA_Url.package_id!='' )\ + .group_by( GA_Url.department_id, GA_Url.period_name ) + graph_dict = {} + for dept_id,period_name,views in q: + graph_dict[dept_id] = graph_dict.get( dept_id, { + 'name' : model.Group.get(dept_id).title, + 'raw' : {} + }) + graph_dict[dept_id]['raw'][period_name] = views + return [ graph_dict[id] for id in department_ids ] def _get_publishers(): --- a/ckanext/ga_report/helpers.py +++ b/ckanext/ga_report/helpers.py @@ -80,7 +80,7 @@ return base.render_snippet('ga_report/ga_popular_single.html', **context) -def most_popular_datasets(publisher, count=20): +def most_popular_datasets(publisher, count=20, preview_image=None): if not publisher: _log.error("No valid publisher passed to 'most_popular_datasets'") @@ -92,7 +92,8 @@ 'dataset_count': len(results), 'datasets': results, - 'publisher': publisher + 'publisher': publisher, + 'preview_image': preview_image } return base.render_snippet('ga_report/publisher/popular.html', **ctx) @@ -121,3 +122,17 @@ return sorted(results, key=operator.itemgetter(1), reverse=True) +def month_option_title(month_iso, months, day): + month_isos = [ iso_code for (iso_code,name) in months ] + try: + index = month_isos.index(month_iso) + except ValueError: + _log.error('Month "%s" not found in list of months.' % month_iso) + return month_iso + month_name = months[index][1] + if index==0: + return month_name + (' (up to %s)'%day) + return month_name + + + --- a/ckanext/ga_report/plugin.py +++ b/ckanext/ga_report/plugin.py @@ -5,7 +5,8 @@ from ckanext.ga_report.helpers import (most_popular_datasets, popular_datasets, - single_popular_dataset) + single_popular_dataset, + month_option_title) log = logging.getLogger('ckanext.ga-report') @@ -27,7 +28,8 @@ 'ga_report_installed': lambda: True, 'popular_datasets': popular_datasets, 'most_popular_datasets': most_popular_datasets, - 'single_popular_dataset': single_popular_dataset + 'single_popular_dataset': single_popular_dataset, + 'month_option_title': month_option_title } def after_map(self, map): --- a/ckanext/ga_report/public/css/ga_report.css +++ b/ckanext/ga_report/public/css/ga_report.css @@ -61,4 +61,9 @@ .ga-reports-table .td-numeric { text-align: center; } +.ga-reports-heading { + padding-right: 10px; + margin-top: 4px; + float: left; +} --- a/ckanext/ga_report/public/scripts/ckanext_ga_reports.js +++ b/ckanext/ga_report/public/scripts/ckanext_ga_reports.js @@ -4,14 +4,22 @@ CKAN.GA_Reports.render_rickshaw = function( css_name, data, mode, colorscheme ) { var graphLegends = $('#graph-legend-container'); - if (!Modernizr.svg) { + function renderError(alertClass,alertText,legendText) { $("#chart_"+css_name) - .html( '
Your browser does not support vector graphics. No graphs can be rendered.
') + .html( '
'+alertText+'
') .closest('.rickshaw_chart_container').css('height',50); var myLegend = $('
') - .html('(Graph cannot be rendered)') + .html(legendText) .appendTo(graphLegends); + } + + if (!Modernizr.svg) { + renderError('','Your browser does not support vector graphics. No graphs can be rendered.','(Graph cannot be rendered)'); return; + } + if (data.length==0) { + renderError('alert-info','There is not enough data to render a graph.','(No graph available)'); + return } var myLegend = $('
').appendTo(graphLegends); @@ -30,7 +38,9 @@ series: data , height: 328 }); - var x_axis = new Rickshaw.Graph.Axis.Time( { graph: graph } ); + var x_axis = new Rickshaw.Graph.Axis.Time( { + graph: graph + } ); var y_axis = new Rickshaw.Graph.Axis.Y( { graph: graph, orientation: 'left', --- a/ckanext/ga_report/templates/ga_report/ga_util.html +++ b/ckanext/ga_report/templates/ga_report/ga_util.html @@ -8,8 +8,8 @@ @@ -37,7 +37,6 @@ @@ -59,21 +58,41 @@ -
-
-
-
--- a/ckanext/ga_report/templates/ga_report/notes.html +++ /dev/null @@ -1,16 +1,1 @@ - -
  • -

    Notes

    -
      -
    • "Views" is the number of times a page was loaded in users' browsers.
    • -
    • "Downloads" is the number of times a user has clicked to download either an original or cached resource for a particular dataset. Download information is only available from 2nd December 2012; 'No data' is shown for records before that date.
    • -
    • These usage statistics are confined to users with javascript enabled, which excludes web crawlers and API calls.
    • -
    • The results are not shown when the number of views/visits is tiny. Where these relate to site pages, results are available in full in the CSV download. Where these relate to users' web browser information, results are not disclosed, for privacy reasons.
    • -
    -
  • - - --- a/ckanext/ga_report/templates/ga_report/publisher/index.html +++ b/ckanext/ga_report/templates/ga_report/publisher/index.html @@ -7,19 +7,9 @@ Usage by Publisher + -
  • -

    Download

    -

    - Download as CSV
    -

    -
  • -
  • -

    Graph Legend

    -
    -
    -
  • - + ${ga_sidebar(download_link=h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='publisher_csv',month=c.month or 'all'))}
    @@ -32,39 +22,39 @@ + Site Usage ${usage_nav('Publishers')}
    -

    Site Usage

    - ${usage_nav('Publishers')} - +
    ${rickshaw_graph(c.top_publishers_graph,'publishers')} -
    + +
    +
    - +

    Statistics for

    ${month_selector(c.month, c.months, c.day)}
    - - - - + + + + - - - - + + + +
    PublisherDataset Views
    PublisherDataset Views
    - ${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher', id=publisher.name) + (("?month=" + c.month) if c.month else ''))} - ${views}
    + ${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher', id=publisher.name) + (("?month=" + c.month) if c.month else ''))} + ${views}
    - - -
    +
    +