From: Ross Jones Date: Thu, 13 Jun 2013 09:02:37 +0000 Subject: [601] Make sure only active datasets are shown in popular datasets on publisher homepage X-Git-Url: https://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=e1cf709a3d29cfdac40e8dcee1d67e703c4e70fa --- [601] Make sure only active datasets are shown in popular datasets on publisher homepage --- --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -212,13 +212,13 @@ for stat in graph_query: graph_dict[ stat.key ] = graph_dict.get(stat.key,{ 'name':stat.key, - 'data': [] + 'raw': {} }) - graph_dict[ stat.key ]['data'].append({ - 'x':_get_unix_epoch(stat.period_name), - 'y':float(stat.value) - }) - graph = [ graph_dict[x[0]] for x in entries ] + graph_dict[ stat.key ]['raw'][stat.period_name] = float(stat.value) + stats_in_table = [x[0] for x in entries] + stats_not_in_table = set(graph_dict.keys()) - set(stats_in_table) + stats = stats_in_table + sorted(list(stats_not_in_table)) + graph = [graph_dict[x] for x in stats] setattr(c, v+'_graph', json.dumps( _to_rickshaw(graph,percentageMode=True) )) # Get the total for each set of values and then set the value as @@ -249,7 +249,7 @@ writer = csv.writer(response) writer.writerow(["Publisher Title", "Publisher Name", "Views", "Visits", "Period Name"]) - top_publishers, top_publishers_graph = _get_top_publishers(None) + top_publishers = _get_top_publishers(limit=None) for publisher,view,visit in top_publishers: writer.writerow([publisher.title.encode('utf-8'), @@ -271,7 +271,7 @@ if not c.publisher: abort(404, 'A publisher with that name could not be found') - packages = self._get_packages(c.publisher) + packages = self._get_packages(publisher=c.publisher, month=c.month) response.headers['Content-Type'] = "text/csv; charset=utf-8" response.headers['Content-Disposition'] = \ str('attachment; filename=datasets_%s_%s.csv' % (c.publisher_name, month,)) @@ -300,15 +300,16 @@ if c.month: c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) - c.top_publishers, graph_data = _get_top_publishers() + c.top_publishers = _get_top_publishers() + graph_data = _get_top_publishers_graph() c.top_publishers_graph = json.dumps( _to_rickshaw(graph_data) ) return render('ga_report/publisher/index.html') - def _get_packages(self, publisher=None, count=-1): + def _get_packages(self, publisher=None, month='', count=-1): '''Returns the datasets in order of views''' have_download_data = True - month = c.month or 'All' + month = month or 'All' if month != 'All': have_download_data = month >= DOWNLOADS_AVAILABLE_FROM @@ -385,28 +386,25 @@ entry = q.filter(GA_Url.period_name==c.month).first() c.publisher_page_views = entry.pageviews if entry else 0 - c.top_packages = self._get_packages(c.publisher, 20) + c.top_packages = self._get_packages(publisher=c.publisher, count=20, month=c.month) # Graph query - top_package_names = [ x[0].name for x in c.top_packages ] + top_packages_all_time = self._get_packages(publisher=c.publisher, count=20, month='All') + top_package_names = [ x[0].name for x in top_packages_all_time ] graph_query = model.Session.query(GA_Url,model.Package)\ .filter(model.Package.name==GA_Url.package_id)\ .filter(GA_Url.url.like('/dataset/%'))\ .filter(GA_Url.package_id.in_(top_package_names)) - graph_dict = {} + all_series = {} for entry,package in graph_query: if not package: continue if entry.period_name=='All': continue - graph_dict[package.name] = graph_dict.get(package.name,{ + all_series[package.name] = all_series.get(package.name,{ 'name':package.title, - 'data':[] + 'raw': {} }) - graph_dict[package.name]['data'].append({ - 'x':_get_unix_epoch(entry.period_name), - 'y':int(entry.pageviews), - }) - graph = [ graph_dict[x] for x in top_package_names ] - + all_series[package.name]['raw'][entry.period_name] = int(entry.pageviews) + graph = [ all_series[series_name] for series_name in top_package_names ] c.graph_data = json.dumps( _to_rickshaw(graph) ) return render('ga_report/publisher/read.html') @@ -414,52 +412,45 @@ def _to_rickshaw(data, percentageMode=False): if data==[]: return data - # Create a consistent x-axis between all series - num_points = [ len(series['data']) for series in data ] - ideal_index = num_points.index( max(num_points) ) - x_axis = [] + # x-axis is every month in c.months. Note that data might not exist + # for entire history, eg. for recently-added datasets + x_axis = [x[0] for x in c.months] + x_axis.reverse() # Ascending order + x_axis = x_axis[:-1] # Remove latest month + totals = {} for series in data: + series['data'] = [] + for x_string in x_axis: + x = _get_unix_epoch( x_string ) + y = series['raw'].get(x_string,0) + series['data'].append({'x':x,'y':y}) + totals[x] = totals.get(x,0)+y + if not percentageMode: + return data + # Turn all data into percentages + # Roll insignificant series into a catch-all + THRESHOLD = 1 + raw_data = data + data = [] + for series in raw_data: for point in series['data']: - x_axis.append(point['x']) - x_axis = sorted( list( set(x_axis) ) ) - # Zero pad any missing values - for series in data: - xs = [ point['x'] for point in series['data'] ] - for x in set(x_axis).difference(set(xs)): - series['data'].append( {'x':x, 'y':0} ) - if percentageMode: - def get_totals(series_list): - totals = {} - for series in series_list: - for point in series['data']: - totals[point['x']] = totals.get(point['x'],0) + point['y'] - return totals - # Transform data into percentage stacks - totals = get_totals(data) - # Roll insignificant series into a catch-all - THRESHOLD = 0.01 - raw_data = data - data = [] - for series in raw_data: - for point in series['data']: - fraction = float(point['y']) / totals[point['x']] - if not (series in data) and fraction>THRESHOLD: - data.append(series) - # Overwrite data with a set of intereting series - others = [ x for x in raw_data if not (x in data) ] + percentage = (100*float(point['y'])) / totals[point['x']] + if not (series in data) and percentage>THRESHOLD: + data.append(series) + point['y'] = percentage + others = [ x for x in raw_data if not (x in data) ] + if len(others): + data_other = [] + for i in range(len(x_axis)): + x = _get_unix_epoch(x_axis[i]) + y = 0 + for series in others: + y += series['data'][i]['y'] + data_other.append({'x':x,'y':y}) data.append({ 'name':'Other', - 'data': [ {'x':x,'y':y} for x,y in get_totals(others).items() ] + 'data': data_other }) - # Turn each point into a percentage - for series in data: - for point in series['data']: - point['y'] = (point['y']*100) / totals[point['x']] - # Sort the points - for series in data: - series['data'] = sorted( series['data'], key=lambda x:x['x'] ) - # Strip the latest month's incomplete analytics - series['data'] = series['data'][:-1] return data @@ -484,39 +475,51 @@ top_publishers = [] res = connection.execute(q, month) - department_ids = [] for row in res: g = model.Group.get(row[0]) if g: - department_ids.append(row[0]) top_publishers.append((g, row[1], row[2])) - - graph = [] - if limit is not None: - # Query for a history graph of these publishers - q = model.Session.query( - GA_Url.department_id, - GA_Url.period_name, - func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\ - .filter( GA_Url.department_id.in_(department_ids) )\ - .filter( GA_Url.period_name!='All' )\ - .filter( GA_Url.url.like('/dataset/%') )\ - .filter( GA_Url.package_id!='' )\ - .group_by( GA_Url.department_id, GA_Url.period_name ) - graph_dict = {} - for dept_id,period_name,views in q: - graph_dict[dept_id] = graph_dict.get( dept_id, { - 'name' : model.Group.get(dept_id).title, - 'data' : [] - }) - graph_dict[dept_id]['data'].append({ - 'x': _get_unix_epoch(period_name), - 'y': views - }) - # Sort dict into ordered list - for id in department_ids: - graph.append( graph_dict[id] ) - return top_publishers, graph + return top_publishers + + +def _get_top_publishers_graph(limit=20): + ''' + Returns a list of the top 20 publishers by dataset visits. + (The number to show can be varied with 'limit') + ''' + connection = model.Session.connection() + q = """ + select department_id, sum(pageviews::int) views + from ga_url + where department_id <> '' + and package_id <> '' + and url like '/dataset/%%' + and period_name='All' + group by department_id order by views desc + """ + if limit: + q = q + " limit %s;" % (limit) + + res = connection.execute(q) + department_ids = [ row[0] for row in res ] + + # Query for a history graph of these department ids + q = model.Session.query( + GA_Url.department_id, + GA_Url.period_name, + func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\ + .filter( GA_Url.department_id.in_(department_ids) )\ + .filter( GA_Url.url.like('/dataset/%') )\ + .filter( GA_Url.package_id!='' )\ + .group_by( GA_Url.department_id, GA_Url.period_name ) + graph_dict = {} + for dept_id,period_name,views in q: + graph_dict[dept_id] = graph_dict.get( dept_id, { + 'name' : model.Group.get(dept_id).title, + 'raw' : {} + }) + graph_dict[dept_id]['raw'][period_name] = views + return [ graph_dict[id] for id in department_ids ] def _get_publishers(): --- a/ckanext/ga_report/helpers.py +++ b/ckanext/ga_report/helpers.py @@ -80,7 +80,7 @@ return base.render_snippet('ga_report/ga_popular_single.html', **context) -def most_popular_datasets(publisher, count=20): +def most_popular_datasets(publisher, count=20, preview_image=None): if not publisher: _log.error("No valid publisher passed to 'most_popular_datasets'") @@ -92,7 +92,8 @@ 'dataset_count': len(results), 'datasets': results, - 'publisher': publisher + 'publisher': publisher, + 'preview_image': preview_image } return base.render_snippet('ga_report/publisher/popular.html', **ctx) @@ -106,12 +107,18 @@ for entry in entries: if len(datasets) < count: p = model.Package.get(entry.url[len('/dataset/'):]) + if not p: _log.warning("Could not find Package for {url}".format(url=entry.url)) continue + if not p.state == 'active': + _log.warning("Package {0} is not active, it is {1}".format(p.name, p.state)) + continue + if not p in datasets: datasets[p] = {'views':0, 'visits': 0} + datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews) datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visits) @@ -121,3 +128,17 @@ return sorted(results, key=operator.itemgetter(1), reverse=True) +def month_option_title(month_iso, months, day): + month_isos = [ iso_code for (iso_code,name) in months ] + try: + index = month_isos.index(month_iso) + except ValueError: + _log.error('Month "%s" not found in list of months.' % month_iso) + return month_iso + month_name = months[index][1] + if index==0: + return month_name + (' (up to %s)'%day) + return month_name + + + --- a/ckanext/ga_report/plugin.py +++ b/ckanext/ga_report/plugin.py @@ -5,7 +5,8 @@ from ckanext.ga_report.helpers import (most_popular_datasets, popular_datasets, - single_popular_dataset) + single_popular_dataset, + month_option_title) log = logging.getLogger('ckanext.ga-report') @@ -27,7 +28,8 @@ 'ga_report_installed': lambda: True, 'popular_datasets': popular_datasets, 'most_popular_datasets': most_popular_datasets, - 'single_popular_dataset': single_popular_dataset + 'single_popular_dataset': single_popular_dataset, + 'month_option_title': month_option_title } def after_map(self, map): --- a/ckanext/ga_report/public/css/ga_report.css +++ b/ckanext/ga_report/public/css/ga_report.css @@ -61,4 +61,9 @@ .ga-reports-table .td-numeric { text-align: center; } +.ga-reports-heading { + padding-right: 10px; + margin-top: 4px; + float: left; +} --- a/ckanext/ga_report/public/scripts/ckanext_ga_reports.js +++ b/ckanext/ga_report/public/scripts/ckanext_ga_reports.js @@ -4,14 +4,22 @@ CKAN.GA_Reports.render_rickshaw = function( css_name, data, mode, colorscheme ) { var graphLegends = $('#graph-legend-container'); - if (!Modernizr.svg) { + function renderError(alertClass,alertText,legendText) { $("#chart_"+css_name) - .html( '
Your browser does not support vector graphics. No graphs can be rendered.
') + .html( '
'+alertText+'
') .closest('.rickshaw_chart_container').css('height',50); var myLegend = $('
') - .html('(Graph cannot be rendered)') + .html(legendText) .appendTo(graphLegends); + } + + if (!Modernizr.svg) { + renderError('','Your browser does not support vector graphics. No graphs can be rendered.','(Graph cannot be rendered)'); return; + } + if (data.length==0) { + renderError('alert-info','There is not enough data to render a graph.','(No graph available)'); + return } var myLegend = $('
').appendTo(graphLegends); @@ -30,7 +38,9 @@ series: data , height: 328 }); - var x_axis = new Rickshaw.Graph.Axis.Time( { graph: graph } ); + var x_axis = new Rickshaw.Graph.Axis.Time( { + graph: graph + } ); var y_axis = new Rickshaw.Graph.Axis.Y( { graph: graph, orientation: 'left', --- a/ckanext/ga_report/templates/ga_report/ga_util.html +++ b/ckanext/ga_report/templates/ga_report/ga_util.html @@ -8,8 +8,8 @@ @@ -37,7 +37,6 @@ @@ -70,6 +69,32 @@
+
+
+
+

Download

+

+ Download as CSV
+

+
+
+

Graph Legend

+
+ +
+
+
+

Notes

+
    +
  • "Views" is the number of times a page was loaded in users' browsers.
  • +
  • "Downloads" is the number of times a user has clicked to download either an original or cached resource for a particular dataset. Download information is only available from 2nd December 2012; 'No data' is shown for records before that date.
  • +
  • These usage statistics are confined to users with javascript enabled, which excludes web crawlers and API calls.
  • +
  • The results are not shown when the number of views/visits is tiny. Where these relate to site pages, results are available in full in the CSV download. Where these relate to users' web browser information, results are not disclosed, for privacy reasons.
  • +
+
+
+
+ --- a/ckanext/ga_report/templates/ga_report/notes.html +++ /dev/null @@ -1,16 +1,1 @@ - -
  • -

    Notes

    -
      -
    • "Views" is the number of times a page was loaded in users' browsers.
    • -
    • "Downloads" is the number of times a user has clicked to download either an original or cached resource for a particular dataset. Download information is only available from 2nd December 2012; 'No data' is shown for records before that date.
    • -
    • These usage statistics are confined to users with javascript enabled, which excludes web crawlers and API calls.
    • -
    • The results are not shown when the number of views/visits is tiny. Where these relate to site pages, results are available in full in the CSV download. Where these relate to users' web browser information, results are not disclosed, for privacy reasons.
    • -
    -
  • - - --- a/ckanext/ga_report/templates/ga_report/publisher/index.html +++ b/ckanext/ga_report/templates/ga_report/publisher/index.html @@ -7,21 +7,9 @@ Usage by Publisher - -
  • -

    Download

    -

    - Download as CSV
    -

    -
  • -
  • -

    Graph Legend

    -
    -
    -
  • - + ${ga_sidebar(download_link=h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='publisher_csv',month=c.month or 'all'))}
    @@ -34,17 +22,19 @@ + Site Usage ${usage_nav('Publishers')}
    -

    Site Usage ${usage_nav('Publishers')}

    ${rickshaw_graph(c.top_publishers_graph,'publishers')} + +
    - +

    Statistics for

    ${month_selector(c.month, c.months, c.day)}
    --- a/ckanext/ga_report/templates/ga_report/publisher/popular.html +++ b/ckanext/ga_report/templates/ga_report/publisher/popular.html @@ -15,7 +15,12 @@ -

    ${h.link_to("More usage data for " + publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='read_publisher',id=publisher.name))}

    +

    + +
    + More usage data for ${publisher.title} +
    +

    --- a/ckanext/ga_report/templates/ga_report/publisher/read.html +++ b/ckanext/ga_report/templates/ga_report/publisher/read.html @@ -6,8 +6,6 @@ Usage by Dataset - - @@ -21,30 +19,16 @@ -
  • -

    Download

    -

    - Download as CSV
    -

    -
  • -
  • -

    Graph Legend

    -
    -
    -
  • - + ${ga_sidebar(download_link=h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='dataset_csv',id=c.publisher_name or 'all',month=c.month or 'all'))}
    - + Site Usage ${usage_nav('Datasets')}
    -

    Site Usage ${usage_nav('Datasets')}

    ${c.publisher.title}

    -

    No page views in this period

    - - + ${rickshaw_graph(c.graph_data,'dataset-downloads',debug=True)}
    @@ -61,6 +45,13 @@
    + +

    Statistics for ${h.month_option_title(c.month,c.months,c.day)}:

    +
    + +

    Statistics for all months:

    +
    +
    No page views in this period.
    --- a/ckanext/ga_report/templates/ga_report/site/downloads.html +++ b/ckanext/ga_report/templates/ga_report/site/downloads.html @@ -7,21 +7,12 @@ Downloads - - -
  • -

    Download

    -

    - Download as CSV
    -

    -
  • - - + ${ga_sidebar(download_link=h.url_for(controller='ckanext.ga_report.controller:GaReport',action='csv_downloads',month=c.month or 'all'))}
    + Downloads ${usage_nav('Downloads')}
    -

    Downloads ${usage_nav('Downloads')}

    --- a/ckanext/ga_report/templates/ga_report/site/index.html +++ b/ckanext/ga_report/templates/ga_report/site/index.html @@ -6,8 +6,6 @@ Site usage - - @@ -21,24 +19,12 @@ -
  • -

    Download

    -

    - Download as CSV
    -

    -
  • -
  • -

    Graph Legend

    -
    -
    (No graph loaded)
    -
    -
  • - - + ${ga_sidebar(download_link=h.url_for(controller='ckanext.ga_report.controller:GaReport',action='csv',month=c.month or 'all'))}
    + Site Usage ${usage_nav('Site-wide')} +
    -

    Site Usage ${usage_nav('Site-wide')}

    @@ -74,7 +60,7 @@
    - +

    Show stats table for:

    ${month_selector(c.month, c.months, c.day)}
    @@ -98,33 +84,37 @@
    ${rickshaw_graph(c.browser_versions_graph,'browser-versions',mode='stack')} +

    Note: Where a browser has a large number of versions, these have been grouped together.

    - +

    Show stats table for:

    ${month_selector(c.month, c.months, c.day)} ${stat_table(c.browser_versions)}
    ${rickshaw_graph(c.browsers_graph,'browsers',mode='stack')} +
    - +

    Show stats table for:

    ${month_selector(c.month, c.months, c.day)} ${stat_table(c.browsers)}
    ${rickshaw_graph(c.os_graph,'os',mode='stack')} +
    - +

    Show stats table for:

    ${month_selector(c.month, c.months, c.day)} ${stat_table(c.os)}
    ${rickshaw_graph(c.os_versions_graph,'os_versions',mode='stack')} +
    - +

    Show stats table for:

    ${month_selector(c.month, c.months, c.day)} ${stat_table(c.os_versions)} @@ -132,32 +122,35 @@

    Number of visits that were referred from social networks

    - +

    Show stats table for:

    ${month_selector(c.month, c.months, c.day)} ${social_table(c.social_referrer_totals)}
    ${rickshaw_graph(c.social_networks_graph, 'social_networks',mode='stack')} +

    Percentage of visits that were referred from these social networks

    - +

    Show stats table for:

    ${month_selector(c.month, c.months, c.day)} ${stat_table(c.social_networks, 'Visits')}
    ${rickshaw_graph(c.languages_graph,'languages',mode='stack')} +
    - +

    Show stats table for:

    ${month_selector(c.month, c.months, c.day)} ${stat_table(c.languages)}
    ${rickshaw_graph(c.country_graph,'country',mode='stack')} +
    - +

    Show stats table for:

    ${month_selector(c.month, c.months, c.day)} ${stat_table(c.country)}