From: Ross Jones Date: Thu, 13 Jun 2013 09:02:37 +0000 Subject: [601] Make sure only active datasets are shown in popular datasets on publisher homepage X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=e1cf709a3d29cfdac40e8dcee1d67e703c4e70fa --- [601] Make sure only active datasets are shown in popular datasets on publisher homepage --- --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -212,12 +212,9 @@ for stat in graph_query: graph_dict[ stat.key ] = graph_dict.get(stat.key,{ 'name':stat.key, - 'data': [] + 'raw': {} }) - graph_dict[ stat.key ]['data'].append({ - 'x':_get_unix_epoch(stat.period_name), - 'y':float(stat.value) - }) + graph_dict[ stat.key ]['raw'][stat.period_name] = float(stat.value) stats_in_table = [x[0] for x in entries] stats_not_in_table = set(graph_dict.keys()) - set(stats_in_table) stats = stats_in_table + sorted(list(stats_not_in_table)) @@ -252,7 +249,7 @@ writer = csv.writer(response) writer.writerow(["Publisher Title", "Publisher Name", "Views", "Visits", "Period Name"]) - top_publishers, top_publishers_graph = _get_top_publishers(None) + top_publishers = _get_top_publishers(limit=None) for publisher,view,visit in top_publishers: writer.writerow([publisher.title.encode('utf-8'), @@ -274,7 +271,7 @@ if not c.publisher: abort(404, 'A publisher with that name could not be found') - packages = self._get_packages(c.publisher) + packages = self._get_packages(publisher=c.publisher, month=c.month) response.headers['Content-Type'] = "text/csv; charset=utf-8" response.headers['Content-Disposition'] = \ str('attachment; filename=datasets_%s_%s.csv' % (c.publisher_name, month,)) @@ -303,15 +300,16 @@ if c.month: c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) - c.top_publishers, graph_data = _get_top_publishers() + c.top_publishers = _get_top_publishers() + graph_data = _get_top_publishers_graph() c.top_publishers_graph = json.dumps( _to_rickshaw(graph_data) ) return render('ga_report/publisher/index.html') - def _get_packages(self, publisher=None, count=-1): + def _get_packages(self, publisher=None, month='', count=-1): '''Returns the datasets in order of views''' have_download_data = True - month = c.month or 'All' + month = month or 'All' if month != 'All': have_download_data = month >= DOWNLOADS_AVAILABLE_FROM @@ -388,28 +386,25 @@ entry = q.filter(GA_Url.period_name==c.month).first() c.publisher_page_views = entry.pageviews if entry else 0 - c.top_packages = self._get_packages(c.publisher, 20) + c.top_packages = self._get_packages(publisher=c.publisher, count=20, month=c.month) # Graph query - top_package_names = [ x[0].name for x in c.top_packages ] + top_packages_all_time = self._get_packages(publisher=c.publisher, count=20, month='All') + top_package_names = [ x[0].name for x in top_packages_all_time ] graph_query = model.Session.query(GA_Url,model.Package)\ .filter(model.Package.name==GA_Url.package_id)\ .filter(GA_Url.url.like('/dataset/%'))\ .filter(GA_Url.package_id.in_(top_package_names)) - graph_dict = {} + all_series = {} for entry,package in graph_query: if not package: continue if entry.period_name=='All': continue - graph_dict[package.name] = graph_dict.get(package.name,{ + all_series[package.name] = all_series.get(package.name,{ 'name':package.title, - 'data':[] + 'raw': {} }) - graph_dict[package.name]['data'].append({ - 'x':_get_unix_epoch(entry.period_name), - 'y':int(entry.pageviews), - }) - graph = [ graph_dict[x] for x in top_package_names ] - + all_series[package.name]['raw'][entry.period_name] = int(entry.pageviews) + graph = [ all_series[series_name] for series_name in top_package_names ] c.graph_data = json.dumps( _to_rickshaw(graph) ) return render('ga_report/publisher/read.html') @@ -417,52 +412,45 @@ def _to_rickshaw(data, percentageMode=False): if data==[]: return data - # Create a consistent x-axis between all series - num_points = [ len(series['data']) for series in data ] - ideal_index = num_points.index( max(num_points) ) - x_axis = [] + # x-axis is every month in c.months. Note that data might not exist + # for entire history, eg. for recently-added datasets + x_axis = [x[0] for x in c.months] + x_axis.reverse() # Ascending order + x_axis = x_axis[:-1] # Remove latest month + totals = {} for series in data: + series['data'] = [] + for x_string in x_axis: + x = _get_unix_epoch( x_string ) + y = series['raw'].get(x_string,0) + series['data'].append({'x':x,'y':y}) + totals[x] = totals.get(x,0)+y + if not percentageMode: + return data + # Turn all data into percentages + # Roll insignificant series into a catch-all + THRESHOLD = 1 + raw_data = data + data = [] + for series in raw_data: for point in series['data']: - x_axis.append(point['x']) - x_axis = sorted( list( set(x_axis) ) ) - # Zero pad any missing values - for series in data: - xs = [ point['x'] for point in series['data'] ] - for x in set(x_axis).difference(set(xs)): - series['data'].append( {'x':x, 'y':0} ) - if percentageMode: - def get_totals(series_list): - totals = {} - for series in series_list: - for point in series['data']: - totals[point['x']] = totals.get(point['x'],0) + point['y'] - return totals - # Transform data into percentage stacks - totals = get_totals(data) - # Roll insignificant series into a catch-all - THRESHOLD = 0.01 - raw_data = data - data = [] - for series in raw_data: - for point in series['data']: - fraction = float(point['y']) / totals[point['x']] - if not (series in data) and fraction>THRESHOLD: - data.append(series) - # Overwrite data with a set of interesting series - others = [ x for x in raw_data if not (x in data) ] + percentage = (100*float(point['y'])) / totals[point['x']] + if not (series in data) and percentage>THRESHOLD: + data.append(series) + point['y'] = percentage + others = [ x for x in raw_data if not (x in data) ] + if len(others): + data_other = [] + for i in range(len(x_axis)): + x = _get_unix_epoch(x_axis[i]) + y = 0 + for series in others: + y += series['data'][i]['y'] + data_other.append({'x':x,'y':y}) data.append({ 'name':'Other', - 'data': [ {'x':x,'y':y} for x,y in get_totals(others).items() ] + 'data': data_other }) - # Turn each point into a percentage - for series in data: - for point in series['data']: - point['y'] = (point['y']*100) / totals[point['x']] - # Sort the points - for series in data: - series['data'] = sorted( series['data'], key=lambda x:x['x'] ) - # Strip the latest month's incomplete analytics - series['data'] = series['data'][:-1] return data @@ -487,39 +475,51 @@ top_publishers = [] res = connection.execute(q, month) - department_ids = [] for row in res: g = model.Group.get(row[0]) if g: - department_ids.append(row[0]) top_publishers.append((g, row[1], row[2])) - - graph = [] - if limit is not None: - # Query for a history graph of these publishers - q = model.Session.query( - GA_Url.department_id, - GA_Url.period_name, - func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\ - .filter( GA_Url.department_id.in_(department_ids) )\ - .filter( GA_Url.period_name!='All' )\ - .filter( GA_Url.url.like('/dataset/%') )\ - .filter( GA_Url.package_id!='' )\ - .group_by( GA_Url.department_id, GA_Url.period_name ) - graph_dict = {} - for dept_id,period_name,views in q: - graph_dict[dept_id] = graph_dict.get( dept_id, { - 'name' : model.Group.get(dept_id).title, - 'data' : [] - }) - graph_dict[dept_id]['data'].append({ - 'x': _get_unix_epoch(period_name), - 'y': views - }) - # Sort dict into ordered list - for id in department_ids: - graph.append( graph_dict[id] ) - return top_publishers, graph + return top_publishers + + +def _get_top_publishers_graph(limit=20): + ''' + Returns a list of the top 20 publishers by dataset visits. + (The number to show can be varied with 'limit') + ''' + connection = model.Session.connection() + q = """ + select department_id, sum(pageviews::int) views + from ga_url + where department_id <> '' + and package_id <> '' + and url like '/dataset/%%' + and period_name='All' + group by department_id order by views desc + """ + if limit: + q = q + " limit %s;" % (limit) + + res = connection.execute(q) + department_ids = [ row[0] for row in res ] + + # Query for a history graph of these department ids + q = model.Session.query( + GA_Url.department_id, + GA_Url.period_name, + func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\ + .filter( GA_Url.department_id.in_(department_ids) )\ + .filter( GA_Url.url.like('/dataset/%') )\ + .filter( GA_Url.package_id!='' )\ + .group_by( GA_Url.department_id, GA_Url.period_name ) + graph_dict = {} + for dept_id,period_name,views in q: + graph_dict[dept_id] = graph_dict.get( dept_id, { + 'name' : model.Group.get(dept_id).title, + 'raw' : {} + }) + graph_dict[dept_id]['raw'][period_name] = views + return [ graph_dict[id] for id in department_ids ] def _get_publishers(): --- a/ckanext/ga_report/helpers.py +++ b/ckanext/ga_report/helpers.py @@ -107,12 +107,18 @@ for entry in entries: if len(datasets) < count: p = model.Package.get(entry.url[len('/dataset/'):]) + if not p: _log.warning("Could not find Package for {url}".format(url=entry.url)) continue + if not p.state == 'active': + _log.warning("Package {0} is not active, it is {1}".format(p.name, p.state)) + continue + if not p in datasets: datasets[p] = {'views':0, 'visits': 0} + datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews) datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visits) @@ -122,3 +128,17 @@ return sorted(results, key=operator.itemgetter(1), reverse=True) +def month_option_title(month_iso, months, day): + month_isos = [ iso_code for (iso_code,name) in months ] + try: + index = month_isos.index(month_iso) + except ValueError: + _log.error('Month "%s" not found in list of months.' % month_iso) + return month_iso + month_name = months[index][1] + if index==0: + return month_name + (' (up to %s)'%day) + return month_name + + + --- a/ckanext/ga_report/plugin.py +++ b/ckanext/ga_report/plugin.py @@ -5,7 +5,8 @@ from ckanext.ga_report.helpers import (most_popular_datasets, popular_datasets, - single_popular_dataset) + single_popular_dataset, + month_option_title) log = logging.getLogger('ckanext.ga-report') @@ -27,7 +28,8 @@ 'ga_report_installed': lambda: True, 'popular_datasets': popular_datasets, 'most_popular_datasets': most_popular_datasets, - 'single_popular_dataset': single_popular_dataset + 'single_popular_dataset': single_popular_dataset, + 'month_option_title': month_option_title } def after_map(self, map): --- a/ckanext/ga_report/public/css/ga_report.css +++ b/ckanext/ga_report/public/css/ga_report.css @@ -61,4 +61,9 @@ .ga-reports-table .td-numeric { text-align: center; } +.ga-reports-heading { + padding-right: 10px; + margin-top: 4px; + float: left; +} --- a/ckanext/ga_report/public/scripts/ckanext_ga_reports.js +++ b/ckanext/ga_report/public/scripts/ckanext_ga_reports.js @@ -4,14 +4,22 @@ CKAN.GA_Reports.render_rickshaw = function( css_name, data, mode, colorscheme ) { var graphLegends = $('#graph-legend-container'); - if (!Modernizr.svg) { + function renderError(alertClass,alertText,legendText) { $("#chart_"+css_name) - .html( '
Your browser does not support vector graphics. No graphs can be rendered.
') + .html( '
'+alertText+'
') .closest('.rickshaw_chart_container').css('height',50); var myLegend = $('
') - .html('(Graph cannot be rendered)') + .html(legendText) .appendTo(graphLegends); + } + + if (!Modernizr.svg) { + renderError('','Your browser does not support vector graphics. No graphs can be rendered.','(Graph cannot be rendered)'); return; + } + if (data.length==0) { + renderError('alert-info','There is not enough data to render a graph.','(No graph available)'); + return } var myLegend = $('
').appendTo(graphLegends); @@ -30,7 +38,9 @@ series: data , height: 328 }); - var x_axis = new Rickshaw.Graph.Axis.Time( { graph: graph } ); + var x_axis = new Rickshaw.Graph.Axis.Time( { + graph: graph + } ); var y_axis = new Rickshaw.Graph.Axis.Y( { graph: graph, orientation: 'left', --- a/ckanext/ga_report/templates/ga_report/ga_util.html +++ b/ckanext/ga_report/templates/ga_report/ga_util.html @@ -8,8 +8,8 @@ @@ -37,7 +37,6 @@ --- a/ckanext/ga_report/templates/ga_report/publisher/index.html +++ b/ckanext/ga_report/templates/ga_report/publisher/index.html @@ -22,17 +22,19 @@ + Site Usage ${usage_nav('Publishers')}
-

Site Usage ${usage_nav('Publishers')}

${rickshaw_graph(c.top_publishers_graph,'publishers')} + +
- +

Statistics for

${month_selector(c.month, c.months, c.day)}
--- a/ckanext/ga_report/templates/ga_report/publisher/read.html +++ b/ckanext/ga_report/templates/ga_report/publisher/read.html @@ -21,17 +21,14 @@ ${ga_sidebar(download_link=h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='dataset_csv',id=c.publisher_name or 'all',month=c.month or 'all'))} - + Site Usage ${usage_nav('Datasets')}
-

Site Usage ${usage_nav('Datasets')}

${c.publisher.title}

-

No page views in this period

- - + ${rickshaw_graph(c.graph_data,'dataset-downloads',debug=True)}
@@ -48,6 +45,13 @@
+ +

Statistics for ${h.month_option_title(c.month,c.months,c.day)}:

+
+ +

Statistics for all months:

+
+
No page views in this period.
--- a/ckanext/ga_report/templates/ga_report/site/downloads.html +++ b/ckanext/ga_report/templates/ga_report/site/downloads.html @@ -11,8 +11,8 @@ ${ga_sidebar(download_link=h.url_for(controller='ckanext.ga_report.controller:GaReport',action='csv_downloads',month=c.month or 'all'))} + Downloads ${usage_nav('Downloads')}
-

Downloads ${usage_nav('Downloads')}

--- a/ckanext/ga_report/templates/ga_report/site/index.html +++ b/ckanext/ga_report/templates/ga_report/site/index.html @@ -22,8 +22,9 @@ ${ga_sidebar(download_link=h.url_for(controller='ckanext.ga_report.controller:GaReport',action='csv',month=c.month or 'all'))} + Site Usage ${usage_nav('Site-wide')} +
-

Site Usage ${usage_nav('Site-wide')}

@@ -59,7 +60,7 @@
- +

Show stats table for:

${month_selector(c.month, c.months, c.day)}
@@ -83,33 +84,37 @@
${rickshaw_graph(c.browser_versions_graph,'browser-versions',mode='stack')} +

Note: Where a browser has a large number of versions, these have been grouped together.

- +

Show stats table for:

${month_selector(c.month, c.months, c.day)} ${stat_table(c.browser_versions)}
${rickshaw_graph(c.browsers_graph,'browsers',mode='stack')} +
- +

Show stats table for:

${month_selector(c.month, c.months, c.day)} ${stat_table(c.browsers)}
${rickshaw_graph(c.os_graph,'os',mode='stack')} +
- +

Show stats table for:

${month_selector(c.month, c.months, c.day)} ${stat_table(c.os)}
${rickshaw_graph(c.os_versions_graph,'os_versions',mode='stack')} +
- +

Show stats table for:

${month_selector(c.month, c.months, c.day)} ${stat_table(c.os_versions)} @@ -117,32 +122,35 @@

Number of visits that were referred from social networks

- +

Show stats table for:

${month_selector(c.month, c.months, c.day)} ${social_table(c.social_referrer_totals)}
${rickshaw_graph(c.social_networks_graph, 'social_networks',mode='stack')} +

Percentage of visits that were referred from these social networks

- +

Show stats table for:

${month_selector(c.month, c.months, c.day)} ${stat_table(c.social_networks, 'Visits')}
${rickshaw_graph(c.languages_graph,'languages',mode='stack')} +
- +

Show stats table for:

${month_selector(c.month, c.months, c.day)} ${stat_table(c.languages)}
${rickshaw_graph(c.country_graph,'country',mode='stack')} +
- +

Show stats table for:

${month_selector(c.month, c.months, c.day)} ${stat_table(c.country)}