From: Tom Rees
Date: Wed, 04 Sep 2013 16:22:36 +0000
Subject: [#469] Moved Modernizr.js in here. Only used in this repo.
X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=1c0cf426edcaaf8dcb2c4a5d1f8cc0eda2637506
---
[#469] Moved Modernizr.js in here. Only used in this repo.
---
--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -212,13 +212,13 @@
for stat in graph_query:
graph_dict[ stat.key ] = graph_dict.get(stat.key,{
'name':stat.key,
- 'data': []
+ 'raw': {}
})
- graph_dict[ stat.key ]['data'].append({
- 'x':_get_unix_epoch(stat.period_name),
- 'y':float(stat.value)
- })
- graph = [ graph_dict[x[0]] for x in entries ]
+ graph_dict[ stat.key ]['raw'][stat.period_name] = float(stat.value)
+ stats_in_table = [x[0] for x in entries]
+ stats_not_in_table = set(graph_dict.keys()) - set(stats_in_table)
+ stats = stats_in_table + sorted(list(stats_not_in_table))
+ graph = [graph_dict[x] for x in stats]
setattr(c, v+'_graph', json.dumps( _to_rickshaw(graph,percentageMode=True) ))
# Get the total for each set of values and then set the value as
@@ -249,7 +249,7 @@
writer = csv.writer(response)
writer.writerow(["Publisher Title", "Publisher Name", "Views", "Visits", "Period Name"])
- top_publishers, top_publishers_graph = _get_top_publishers(None)
+ top_publishers = _get_top_publishers(limit=None)
for publisher,view,visit in top_publishers:
writer.writerow([publisher.title.encode('utf-8'),
@@ -271,7 +271,7 @@
if not c.publisher:
abort(404, 'A publisher with that name could not be found')
- packages = self._get_packages(c.publisher)
+ packages = self._get_packages(publisher=c.publisher, month=c.month)
response.headers['Content-Type'] = "text/csv; charset=utf-8"
response.headers['Content-Disposition'] = \
str('attachment; filename=datasets_%s_%s.csv' % (c.publisher_name, month,))
@@ -300,15 +300,16 @@
if c.month:
c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
- c.top_publishers, graph_data = _get_top_publishers()
+ c.top_publishers = _get_top_publishers()
+ graph_data = _get_top_publishers_graph()
c.top_publishers_graph = json.dumps( _to_rickshaw(graph_data) )
return render('ga_report/publisher/index.html')
- def _get_packages(self, publisher=None, count=-1):
+ def _get_packages(self, publisher=None, month='', count=-1):
'''Returns the datasets in order of views'''
have_download_data = True
- month = c.month or 'All'
+ month = month or 'All'
if month != 'All':
have_download_data = month >= DOWNLOADS_AVAILABLE_FROM
@@ -385,28 +386,25 @@
entry = q.filter(GA_Url.period_name==c.month).first()
c.publisher_page_views = entry.pageviews if entry else 0
- c.top_packages = self._get_packages(c.publisher, 20)
+ c.top_packages = self._get_packages(publisher=c.publisher, count=20, month=c.month)
# Graph query
- top_package_names = [ x[0].name for x in c.top_packages ]
+ top_packages_all_time = self._get_packages(publisher=c.publisher, count=20, month='All')
+ top_package_names = [ x[0].name for x in top_packages_all_time ]
graph_query = model.Session.query(GA_Url,model.Package)\
.filter(model.Package.name==GA_Url.package_id)\
.filter(GA_Url.url.like('/dataset/%'))\
.filter(GA_Url.package_id.in_(top_package_names))
- graph_dict = {}
+ all_series = {}
for entry,package in graph_query:
if not package: continue
if entry.period_name=='All': continue
- graph_dict[package.name] = graph_dict.get(package.name,{
+ all_series[package.name] = all_series.get(package.name,{
'name':package.title,
- 'data':[]
+ 'raw': {}
})
- graph_dict[package.name]['data'].append({
- 'x':_get_unix_epoch(entry.period_name),
- 'y':int(entry.pageviews),
- })
- graph = [ graph_dict[x] for x in top_package_names ]
-
+ all_series[package.name]['raw'][entry.period_name] = int(entry.pageviews)
+ graph = [ all_series[series_name] for series_name in top_package_names ]
c.graph_data = json.dumps( _to_rickshaw(graph) )
return render('ga_report/publisher/read.html')
@@ -414,52 +412,45 @@
def _to_rickshaw(data, percentageMode=False):
if data==[]:
return data
- # Create a consistent x-axis between all series
- num_points = [ len(series['data']) for series in data ]
- ideal_index = num_points.index( max(num_points) )
- x_axis = [ point['x'] for point in data[ideal_index]['data'] ]
+ # x-axis is every month in c.months. Note that data might not exist
+ # for entire history, eg. for recently-added datasets
+ x_axis = [x[0] for x in c.months]
+ x_axis.reverse() # Ascending order
+ x_axis = x_axis[:-1] # Remove latest month
+ totals = {}
for series in data:
- xs = [ point['x'] for point in series['data'] ]
- assert set(xs).issubset( set(x_axis) ), (xs, x_axis)
- # Zero pad any missing values
- for x in set(x_axis).difference(set(xs)):
- series['data'].append( {'x':x, 'y':0} )
- if percentageMode:
- def get_totals(series_list):
- totals = {}
- for series in series_list:
- for point in series['data']:
- totals[point['x']] = totals.get(point['x'],0) + point['y']
- lengths = [ len(series['data']) for series in series_list ]
- assert len(set(lengths))==1
- assert lengths[0] == len(totals)
- return totals
- # Transform data into percentage stacks
- totals = get_totals(data)
- # Roll insignificant series into a catch-all
- THRESHOLD = 0.01
- raw_data = data
- data = []
- for series in raw_data:
- for point in series['data']:
- fraction = float(point['y']) / totals[point['x']]
- if not (series in data) and fraction>THRESHOLD:
- data.append(series)
- # Overwrite data with a set of intereting series
- others = [ x for x in raw_data if not (x in data) ]
+ series['data'] = []
+ for x_string in x_axis:
+ x = _get_unix_epoch( x_string )
+ y = series['raw'].get(x_string,0)
+ series['data'].append({'x':x,'y':y})
+ totals[x] = totals.get(x,0)+y
+ if not percentageMode:
+ return data
+ # Turn all data into percentages
+ # Roll insignificant series into a catch-all
+ THRESHOLD = 1
+ raw_data = data
+ data = []
+ for series in raw_data:
+ for point in series['data']:
+ percentage = (100*float(point['y'])) / totals[point['x']]
+ if not (series in data) and percentage>THRESHOLD:
+ data.append(series)
+ point['y'] = percentage
+ others = [ x for x in raw_data if not (x in data) ]
+ if len(others):
+ data_other = []
+ for i in range(len(x_axis)):
+ x = _get_unix_epoch(x_axis[i])
+ y = 0
+ for series in others:
+ y += series['data'][i]['y']
+ data_other.append({'x':x,'y':y})
data.append({
'name':'Other',
- 'data': [ {'x':x,'y':y} for x,y in get_totals(others).items() ]
+ 'data': data_other
})
- # Turn each point into a percentage
- for series in data:
- for point in series['data']:
- point['y'] = (point['y']*100) / totals[point['x']]
- # Sort the points
- for series in data:
- series['data'] = sorted( series['data'], key=lambda x:x['x'] )
- # Strip the latest month's incomplete analytics
- series['data'] = series['data'][:-1]
return data
@@ -484,39 +475,51 @@
top_publishers = []
res = connection.execute(q, month)
- department_ids = []
for row in res:
g = model.Group.get(row[0])
if g:
- department_ids.append(row[0])
top_publishers.append((g, row[1], row[2]))
-
- graph = []
- if limit is not None:
- # Query for a history graph of these publishers
- q = model.Session.query(
- GA_Url.department_id,
- GA_Url.period_name,
- func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\
- .filter( GA_Url.department_id.in_(department_ids) )\
- .filter( GA_Url.period_name!='All' )\
- .filter( GA_Url.url.like('/dataset/%') )\
- .filter( GA_Url.package_id!='' )\
- .group_by( GA_Url.department_id, GA_Url.period_name )
- graph_dict = {}
- for dept_id,period_name,views in q:
- graph_dict[dept_id] = graph_dict.get( dept_id, {
- 'name' : model.Group.get(dept_id).title,
- 'data' : []
- })
- graph_dict[dept_id]['data'].append({
- 'x': _get_unix_epoch(period_name),
- 'y': views
- })
- # Sort dict into ordered list
- for id in department_ids:
- graph.append( graph_dict[id] )
- return top_publishers, graph
+ return top_publishers
+
+
+def _get_top_publishers_graph(limit=20):
+ '''
+ Returns a list of the top 20 publishers by dataset visits.
+ (The number to show can be varied with 'limit')
+ '''
+ connection = model.Session.connection()
+ q = """
+ select department_id, sum(pageviews::int) views
+ from ga_url
+ where department_id <> ''
+ and package_id <> ''
+ and url like '/dataset/%%'
+ and period_name='All'
+ group by department_id order by views desc
+ """
+ if limit:
+ q = q + " limit %s;" % (limit)
+
+ res = connection.execute(q)
+ department_ids = [ row[0] for row in res ]
+
+ # Query for a history graph of these department ids
+ q = model.Session.query(
+ GA_Url.department_id,
+ GA_Url.period_name,
+ func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\
+ .filter( GA_Url.department_id.in_(department_ids) )\
+ .filter( GA_Url.url.like('/dataset/%') )\
+ .filter( GA_Url.package_id!='' )\
+ .group_by( GA_Url.department_id, GA_Url.period_name )
+ graph_dict = {}
+ for dept_id,period_name,views in q:
+ graph_dict[dept_id] = graph_dict.get( dept_id, {
+ 'name' : model.Group.get(dept_id).title,
+ 'raw' : {}
+ })
+ graph_dict[dept_id]['raw'][period_name] = views
+ return [ graph_dict[id] for id in department_ids ]
def _get_publishers():
--- a/ckanext/ga_report/helpers.py
+++ b/ckanext/ga_report/helpers.py
@@ -80,7 +80,7 @@
return base.render_snippet('ga_report/ga_popular_single.html', **context)
-def most_popular_datasets(publisher, count=20):
+def most_popular_datasets(publisher, count=20, preview_image=None):
if not publisher:
_log.error("No valid publisher passed to 'most_popular_datasets'")
@@ -92,7 +92,8 @@
'dataset_count': len(results),
'datasets': results,
- 'publisher': publisher
+ 'publisher': publisher,
+ 'preview_image': preview_image
}
return base.render_snippet('ga_report/publisher/popular.html', **ctx)
@@ -106,12 +107,18 @@
for entry in entries:
if len(datasets) < count:
p = model.Package.get(entry.url[len('/dataset/'):])
+
if not p:
_log.warning("Could not find Package for {url}".format(url=entry.url))
continue
+ if not p.state == 'active':
+ _log.warning("Package {0} is not active, it is {1}".format(p.name, p.state))
+ continue
+
if not p in datasets:
datasets[p] = {'views':0, 'visits': 0}
+
datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews)
datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visits)
@@ -121,3 +128,17 @@
return sorted(results, key=operator.itemgetter(1), reverse=True)
+def month_option_title(month_iso, months, day):
+ month_isos = [ iso_code for (iso_code,name) in months ]
+ try:
+ index = month_isos.index(month_iso)
+ except ValueError:
+ _log.error('Month "%s" not found in list of months.' % month_iso)
+ return month_iso
+ month_name = months[index][1]
+ if index==0:
+ return month_name + (' (up to %s)'%day)
+ return month_name
+
+
+
--- a/ckanext/ga_report/plugin.py
+++ b/ckanext/ga_report/plugin.py
@@ -5,7 +5,8 @@
from ckanext.ga_report.helpers import (most_popular_datasets,
popular_datasets,
- single_popular_dataset)
+ single_popular_dataset,
+ month_option_title)
log = logging.getLogger('ckanext.ga-report')
@@ -27,7 +28,8 @@
'ga_report_installed': lambda: True,
'popular_datasets': popular_datasets,
'most_popular_datasets': most_popular_datasets,
- 'single_popular_dataset': single_popular_dataset
+ 'single_popular_dataset': single_popular_dataset,
+ 'month_option_title': month_option_title
}
def after_map(self, map):
--- a/ckanext/ga_report/public/css/ga_report.css
+++ b/ckanext/ga_report/public/css/ga_report.css
@@ -2,6 +2,11 @@
padding: 1px 0 0 0;
width: 108px;
text-align: center;
+ /* Hack to hide the momentary flash of text
+ * before sparklines are fully rendered */
+ font-size: 1px;
+ color: transparent;
+ overflow: hidden;
}
.rickshaw_chart_container {
position: relative;
@@ -18,6 +23,7 @@
.rickshaw_legend {
background: transparent;
width: 100%;
+ padding-top: 4px;
}
.rickshaw_y_axis {
position: absolute;
@@ -29,6 +35,10 @@
background: transparent !important;
color: #000000 !important;
font-weight: normal !important;
+}
+.rickshaw_legend .instructions {
+ color: #000;
+ margin-bottom: 6px;
}
.rickshaw_legend .line .action {
@@ -44,8 +54,16 @@
float: left;
width: 200px;
}
+.rickshaw_legend .line .label:hover {
+ text-decoration: underline;
+}
.ga-reports-table .td-numeric {
text-align: center;
}
+.ga-reports-heading {
+ padding-right: 10px;
+ margin-top: 4px;
+ float: left;
+}
--- a/ckanext/ga_report/public/scripts/ckanext_ga_reports.js
+++ b/ckanext/ga_report/public/scripts/ckanext_ga_reports.js
@@ -4,14 +4,22 @@
CKAN.GA_Reports.render_rickshaw = function( css_name, data, mode, colorscheme ) {
var graphLegends = $('#graph-legend-container');
- if (!Modernizr.svg) {
+ function renderError(alertClass,alertText,legendText) {
$("#chart_"+css_name)
- .html( '
Your browser does not support vector graphics. No graphs can be rendered.
')
+ .html( '
'+alertText+'
')
.closest('.rickshaw_chart_container').css('height',50);
var myLegend = $('')
- .html('(Graph cannot be rendered)')
+ .html(legendText)
.appendTo(graphLegends);
+ }
+
+ if (!Modernizr.svg) {
+ renderError('','Your browser does not support vector graphics. No graphs can be rendered.','(Graph cannot be rendered)');
return;
+ }
+ if (data.length==0) {
+ renderError('alert-info','There is not enough data to render a graph.','(No graph available)');
+ return
}
var myLegend = $('').appendTo(graphLegends);
@@ -30,7 +38,9 @@
series: data ,
height: 328
});
- var x_axis = new Rickshaw.Graph.Axis.Time( { graph: graph } );
+ var x_axis = new Rickshaw.Graph.Axis.Time( {
+ graph: graph
+ } );
var y_axis = new Rickshaw.Graph.Axis.Y( {
graph: graph,
orientation: 'left',
@@ -45,6 +55,7 @@
graph: graph,
legend: legend
} );
+ myLegend.prepend('
"Views" is the number of times a page was loaded in users' browsers.
+
"Downloads" is the number of times a user has clicked to download either an original or cached resource for a particular dataset. Download information is only available from 2nd December 2012; 'No data' is shown for records before that date.
+
These usage statistics are confined to users with javascript enabled, which excludes web crawlers and API calls.
+
The results are not shown when the number of views/visits is tiny. Where these relate to site pages, results are available in full in the CSV download. Where these relate to users' web browser information, results are not disclosed, for privacy reasons.
"Views" is the number of times a page was loaded in users' browsers.
-
"Downloads" is the number of times a user has clicked to download either an original or cached resource for a particular dataset. Download information is only available from 2nd December 2012; 'No data' is shown for records before that date.
-
These usage statistics are confined to users with javascript enabled, which excludes web crawlers and API calls.
-
The results are not shown when the number of views/visits is tiny. Where these relate to site pages, results are available in full in the CSV download. Where these relate to users' web browser information, results are not disclosed, for privacy reasons.
-
+ ${ga_sidebar(download_link=h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='dataset_csv',id=c.publisher_name or 'all',month=c.month or 'all'))}
-
+ Site Usage ${usage_nav('Datasets')}