[601] Make sure only active datasets are shown in popular datasets on publisher homepage
--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -1,6 +1,7 @@
import re
import csv
import sys
+import json
import logging
import operator
import collections
@@ -21,6 +22,10 @@
d = strptime(strdate, '%Y-%m')
return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year)
+def _get_unix_epoch(strdate):
+ from time import strptime,mktime
+ d = strptime(strdate, '%Y-%m')
+ return int(mktime(d))
def _month_details(cls, stat_key=None):
'''
@@ -107,11 +112,26 @@
return key, val
+ # Query historic values for sparkline rendering
+ sparkline_query = model.Session.query(GA_Stat)\
+ .filter(GA_Stat.stat_name=='Totals')\
+ .order_by(GA_Stat.period_name)
+ sparkline_data = {}
+ for x in sparkline_query:
+ sparkline_data[x.key] = sparkline_data.get(x.key,[])
+ key, val = clean_key(x.key,float(x.value))
+ tooltip = '%s: %s' % (_get_month_name(x.period_name), val)
+ sparkline_data[x.key].append( (tooltip,x.value) )
+ # Trim the latest month, as it looks like a huge dropoff
+ for key in sparkline_data:
+ sparkline_data[key] = sparkline_data[key][:-1]
+
c.global_totals = []
if c.month:
for e in entries:
key, val = clean_key(e.key, e.value)
- c.global_totals.append((key, val))
+ sparkline = sparkline_data[e.key]
+ c.global_totals.append((key, val, sparkline))
else:
d = collections.defaultdict(list)
for e in entries:
@@ -121,10 +141,18 @@
v = sum(v)
else:
v = float(sum(v))/float(len(v))
+ sparkline = sparkline_data[k]
key, val = clean_key(k,v)
- c.global_totals.append((key, val))
- c.global_totals = sorted(c.global_totals, key=operator.itemgetter(0))
+ c.global_totals.append((key, val, sparkline))
+ # Sort the global totals into a more pleasant order
+ def sort_func(x):
+ key = x[0]
+ total_order = ['Total page views','Total visits','Pages per visit']
+ if key in total_order:
+ return total_order.index(key)
+ return 999
+ c.global_totals = sorted(c.global_totals, key=sort_func)
keys = {
'Browser versions': 'browser_versions',
@@ -161,12 +189,13 @@
for k, v in keys.iteritems():
q = model.Session.query(GA_Stat).\
- filter(GA_Stat.stat_name==k)
+ filter(GA_Stat.stat_name==k).\
+ order_by(GA_Stat.period_name)
+ # Buffer the tabular data
if c.month:
entries = []
q = q.filter(GA_Stat.period_name==c.month).\
order_by('ga_stat.value::int desc')
-
d = collections.defaultdict(int)
for e in q.all():
d[e.key] += int(e.value)
@@ -175,10 +204,27 @@
entries.append((key,val,))
entries = sorted(entries, key=operator.itemgetter(1), reverse=True)
+ # Run a query on all months to gather graph data
+ graph_query = model.Session.query(GA_Stat).\
+ filter(GA_Stat.stat_name==k).\
+ order_by(GA_Stat.period_name)
+ graph_dict = {}
+ for stat in graph_query:
+ graph_dict[ stat.key ] = graph_dict.get(stat.key,{
+ 'name':stat.key,
+ 'raw': {}
+ })
+ graph_dict[ stat.key ]['raw'][stat.period_name] = float(stat.value)
+ stats_in_table = [x[0] for x in entries]
+ stats_not_in_table = set(graph_dict.keys()) - set(stats_in_table)
+ stats = stats_in_table + sorted(list(stats_not_in_table))
+ graph = [graph_dict[x] for x in stats]
+ setattr(c, v+'_graph', json.dumps( _to_rickshaw(graph,percentageMode=True) ))
+
# Get the total for each set of values and then set the value as
# a percentage of the total
if k == 'Social sources':
- total = sum([x for n,x in c.global_totals if n == 'Total visits'])
+ total = sum([x for n,x,graph in c.global_totals if n == 'Total visits'])
else:
total = sum([num for _,num in entries])
setattr(c, v, [(k,_percent(v,total)) for k,v in entries ])
@@ -203,7 +249,9 @@
writer = csv.writer(response)
writer.writerow(["Publisher Title", "Publisher Name", "Views", "Visits", "Period Name"])
- for publisher,view,visit in _get_top_publishers(None):
+ top_publishers = _get_top_publishers(limit=None)
+
+ for publisher,view,visit in top_publishers:
writer.writerow([publisher.title.encode('utf-8'),
publisher.name.encode('utf-8'),
view,
@@ -223,7 +271,7 @@
if not c.publisher:
abort(404, 'A publisher with that name could not be found')
- packages = self._get_packages(c.publisher)
+ packages = self._get_packages(publisher=c.publisher, month=c.month)
response.headers['Content-Type'] = "text/csv; charset=utf-8"
response.headers['Content-Disposition'] = \
str('attachment; filename=datasets_%s_%s.csv' % (c.publisher_name, month,))
@@ -253,12 +301,15 @@
c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
c.top_publishers = _get_top_publishers()
+ graph_data = _get_top_publishers_graph()
+ c.top_publishers_graph = json.dumps( _to_rickshaw(graph_data) )
+
return render('ga_report/publisher/index.html')
- def _get_packages(self, publisher=None, count=-1):
+ def _get_packages(self, publisher=None, month='', count=-1):
'''Returns the datasets in order of views'''
have_download_data = True
- month = c.month or 'All'
+ month = month or 'All'
if month != 'All':
have_download_data = month >= DOWNLOADS_AVAILABLE_FROM
@@ -284,8 +335,9 @@
filter(GA_Stat.key==package.name)
if month != 'All': # Fetch everything unless the month is specific
dls = dls.filter(GA_Stat.period_name==month)
-
- downloads = sum(int(d.value) for d in dls.all())
+ downloads = 0
+ for x in dls:
+ downloads += int(x.value)
else:
downloads = 'No data'
top_packages.append((package, entry.pageviews, entry.visits, downloads))
@@ -334,9 +386,73 @@
entry = q.filter(GA_Url.period_name==c.month).first()
c.publisher_page_views = entry.pageviews if entry else 0
- c.top_packages = self._get_packages(c.publisher, 20)
+ c.top_packages = self._get_packages(publisher=c.publisher, count=20, month=c.month)
+
+ # Graph query
+ top_packages_all_time = self._get_packages(publisher=c.publisher, count=20, month='All')
+ top_package_names = [ x[0].name for x in top_packages_all_time ]
+ graph_query = model.Session.query(GA_Url,model.Package)\
+ .filter(model.Package.name==GA_Url.package_id)\
+ .filter(GA_Url.url.like('/dataset/%'))\
+ .filter(GA_Url.package_id.in_(top_package_names))
+ all_series = {}
+ for entry,package in graph_query:
+ if not package: continue
+ if entry.period_name=='All': continue
+ all_series[package.name] = all_series.get(package.name,{
+ 'name':package.title,
+ 'raw': {}
+ })
+ all_series[package.name]['raw'][entry.period_name] = int(entry.pageviews)
+ graph = [ all_series[series_name] for series_name in top_package_names ]
+ c.graph_data = json.dumps( _to_rickshaw(graph) )
return render('ga_report/publisher/read.html')
+
+def _to_rickshaw(data, percentageMode=False):
+ if data==[]:
+ return data
+ # x-axis is every month in c.months. Note that data might not exist
+ # for entire history, eg. for recently-added datasets
+ x_axis = [x[0] for x in c.months]
+ x_axis.reverse() # Ascending order
+ x_axis = x_axis[:-1] # Remove latest month
+ totals = {}
+ for series in data:
+ series['data'] = []
+ for x_string in x_axis:
+ x = _get_unix_epoch( x_string )
+ y = series['raw'].get(x_string,0)
+ series['data'].append({'x':x,'y':y})
+ totals[x] = totals.get(x,0)+y
+ if not percentageMode:
+ return data
+ # Turn all data into percentages
+ # Roll insignificant series into a catch-all
+ THRESHOLD = 1
+ raw_data = data
+ data = []
+ for series in raw_data:
+ for point in series['data']:
+ percentage = (100*float(point['y'])) / totals[point['x']]
+ if not (series in data) and percentage>THRESHOLD:
+ data.append(series)
+ point['y'] = percentage
+ others = [ x for x in raw_data if not (x in data) ]
+ if len(others):
+ data_other = []
+ for i in range(len(x_axis)):
+ x = _get_unix_epoch(x_axis[i])
+ y = 0
+ for series in others:
+ y += series['data'][i]['y']
+ data_other.append({'x':x,'y':y})
+ data.append({
+ 'name':'Other',
+ 'data': data_other
+ })
+ return data
+
def _get_top_publishers(limit=20):
'''
@@ -366,6 +482,46 @@
return top_publishers
+def _get_top_publishers_graph(limit=20):
+ '''
+ Returns a list of the top 20 publishers by dataset visits.
+ (The number to show can be varied with 'limit')
+ '''
+ connection = model.Session.connection()
+ q = """
+ select department_id, sum(pageviews::int) views
+ from ga_url
+ where department_id <> ''
+ and package_id <> ''
+ and url like '/dataset/%%'
+ and period_name='All'
+ group by department_id order by views desc
+ """
+ if limit:
+ q = q + " limit %s;" % (limit)
+
+ res = connection.execute(q)
+ department_ids = [ row[0] for row in res ]
+
+ # Query for a history graph of these department ids
+ q = model.Session.query(
+ GA_Url.department_id,
+ GA_Url.period_name,
+ func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\
+ .filter( GA_Url.department_id.in_(department_ids) )\
+ .filter( GA_Url.url.like('/dataset/%') )\
+ .filter( GA_Url.package_id!='' )\
+ .group_by( GA_Url.department_id, GA_Url.period_name )
+ graph_dict = {}
+ for dept_id,period_name,views in q:
+ graph_dict[dept_id] = graph_dict.get( dept_id, {
+ 'name' : model.Group.get(dept_id).title,
+ 'raw' : {}
+ })
+ graph_dict[dept_id]['raw'][period_name] = views
+ return [ graph_dict[id] for id in department_ids ]
+
+
def _get_publishers():
'''
Returns a list of all publishers. Each item is a tuple:
--- /dev/null
+++ b/ckanext/ga_report/public/scripts/vendor/d3.layout.min.js
@@ -1,1 +1,1 @@
-
+(function(){function a(a){var b=a.source,d=a.target,e=c(b,d),f=[b];while(b!==e)b=b.parent,f.push(b);var g=f.length;while(d!==e)f.splice(g,0,d),d=d.parent;return f}function b(a){var b=[],c=a.parent;while(c!=null)b.push(a),a=c,c=c.parent;return b.push(a),b}function c(a,c){if(a===c)return a;var d=b(a),e=b(c),f=d.pop(),g=e.pop(),h=null;while(f===g)h=f,f=d.pop(),g=e.pop();return h}function g(a){a.fixed|=2}function h(a){a!==f&&(a.fixed&=1)}function i(){j(),f.fixed&=1,e=f=null}function j(){f.px+=d3.event.dx,f.py+=d3.event.dy,e.resume()}function k(a,b,c){var d=0,e=0;a.charge=0;if(!a.leaf){var f=a.nodes,g=f.length,h=-1,i;while(++h<g){i=f[h];if(i==null)continue;k(i,b,c),a.charge+=i.charge,d+=i.charge*i.cx,e+=i.charge*i.cy}}if(a.point){a.leaf||(a.point.x+=Math.random()-.5,a.point.y+=Math.random()-.5);var j=b*c[a.point.index];a.charge+=a.pointCharge=j,d+=j*a.point.x,e+=j*a.point.y}a.cx=d/a.charge,a.cy=e/a.charge}function l(a){return 20}function m(a){return 1}function o(a){return a.x}function p(a){return a.y}function q(a,b,c){a.y0=b,a.y=c}function t(a){var b=1,c=0,d=a[0][1],e,f=a.length;for(;b<f;++b)(e=a[b][1])>d&&(c=b,d=e);return c}function u(a){return a.reduce(v,0)}function v(a,b){return a+b[1]}function w(a,b){return x(a,Math.ceil(Math.log(b.length)/Math.LN2+1))}function x(a,b){var c=-1,d=+a[0],e=(a[1]-d)/b,f=[];while(++c<=b)f[c]=e*c+d;return f}function y(a){return[d3.min(a),d3.max(a)]}function z(a,b){return a.sort=d3.rebind(a,b.sort),a.children=d3.rebind(a,b.children),a.links=D,a.value=d3.rebind(a,b.value),a.nodes=function(b){return E=!0,(a.nodes=a)(b)},a}function A(a){return a.children}function B(a){return a.value}function C(a,b){return b.value-a.value}function D(a){return d3.merge(a.map(function(a){return(a.children||[]).map(function(b){return{source:a,target:b}})}))}function F(a,b){return a.value-b.value}function G(a,b){var c=a._pack_next;a._pack_next=b,b._pack_prev=a,b._pack_next=c,c._pack_prev=b}function H(a,b){a._pack_next=b,b._pack_prev=a}function I(a,b){var c=b.x-a.x,d=b.y-a.y,e=a.r+b.r;return e*e-c*c-d*d>.001}function J(a){function l(a){b=Math.min(a.x-a.r,b),c=Math.max(a.x+a.r,c),d=Math.min(a.y-a.r,d),e=Math.max(a.y+a.r,e)}var b=Infinity,c=-Infinity,d=Infinity,e=-Infinity,f=a.length,g,h,i,j,k;a.forEach(K),g=a[0],g.x=-g.r,g.y=0,l(g);if(f>1){h=a[1],h.x=h.r,h.y=0,l(h);if(f>2){i=a[2],O(g,h,i),l(i),G(g,i),g._pack_prev=i,G(i,h),h=g._pack_next;for(var m=3;m<f;m++){O(g,h,i=a[m]);var n=0,o=1,p=1;for(j=h._pack_next;j!==h;j=j._pack_next,o++)if(I(j,i)){n=1;break}if(n==1)for(k=g._pack_prev;k!==j._pack_prev;k=k._pack_prev,p++)if(I(k,i)){p<o&&(n=-1,j=k);break}n==0?(G(g,i),h=i,l(i)):n>0?(H(g,j),h=j,m--):(H(j,h),g=j,m--)}}}var q=(b+c)/2,r=(d+e)/2,s=0;for(var m=0;m<f;m++){var t=a[m];t.x-=q,t.y-=r,s=Math.max(s,t.r+Math.sqrt(t.x*t.x+t.y*t.y))}return a.forEach(L),s}function K(a){a._pack_next=a._pack_prev=a}function L(a){delete a._pack_next,delete a._pack_prev}function M(a){var b=a.children;b&&b.length?(b.forEach(M),a.r=J(b)):a.r=Math.sqrt(a.value)}function N(a,b,c,d){var e=a.children;a.x=b+=d*a.x,a.y=c+=d*a.y,a.r*=d;if(e){var f=-1,g=e.length;while(++f<g)N(e[f],b,c,d)}}function O(a,b,c){var d=a.r+c.r,e=b.x-a.x,f=b.y-a.y;if(d&&(e||f)){var g=b.r+c.r,h=Math.sqrt(e*e+f*f),i=Math.max(-1,Math.min(1,(d*d+h*h-g*g)/(2*d*h))),j=Math.acos(i),k=i*(d/=h),l=Math.sin(j)*d;c.x=a.x+k*e+l*f,c.y=a.y+k*f-l*e}else c.x=a.x+d,c.y=a.y}function P(a){return 1+d3.max(a,function(a){return a.y})}function Q(a){return a.reduce(function(a,b){return a+b.x},0)/a.length}function R(a){var b=a.children;return b&&b.length?R(b[0]):a}function S(a){var b=a.children,c;return b&&(c=b.length)?S(b[c-1]):a}function T(a,b){return a.parent==b.parent?1:2}function U(a){var b=a.children;return b&&b.length?b[0]:a._tree.thread}function V(a){var b=a.children,c;return b&&(c=b.length)?b[c-1]:a._tree.thread}function W(a,b){var c=a.children;if(c&&(e=c.length)){var d,e,f=-1;while(++f<e)b(d=W(c[f],b),a)>0&&(a=d)}return a}function X(a,b){return a.x-b.x}function Y(a,b){return b.x-a.x}function Z(a,b){return a.depth-b.depth}function $(a,b){function c(a,d){var e=a.children;if(e&&(i=e.length)){var f,g=null,h=-1,i;while(++h<i)f=e[h],c(f,g),g=f}b(a,d)}c(a,null)}function _(a){var b=0,c=0,d=a.children,e=d.length,f;while(--e>=0)f=d[e]._tree,f.prelim+=b,f.mod+=b,b+=f.shift+(c+=f.change)}function ba(a,b,c){a=a._tree,b=b._tree;var d=c/(b.number-a.number);a.change+=d,b.change-=d,b.shift+=c,b.prelim+=c,b.mod+=c}function bb(a,b,c){return a._tree.ancestor.parent==b.parent?a._tree.ancestor:c}function bc(a){return{x:a.x,y:a.y,dx:a.dx,dy:a.dy}}function bd(a,b){var c=a.x+b[3],d=a.y+b[0],e=a.dx-b[1]-b[3],f=a.dy-b[0]-b[2];return e<0&&(c+=e/2,e=0),f<0&&(d+=f/2,f=0),{x:c,y:d,dx:e,dy:f}}d3.layout={},d3.layout.bundle=function(){return function(b){var c=[],d=-1,e=b.length;while(++d<e)c.push(a(b[d]));return c}},d3.layout.chord=function(){function j(){var a={},j=[],l=d3.range(e),m=[],n,o,p,q,r;b=[],c=[],n=0,q=-1;while(++q<e){o=0,r=-1;while(++r<e)o+=d[q][r];j.push(o),m.push(d3.range(e)),n+=o}g&&l.sort(function(a,b){return g(j[a],j[b])}),h&&m.forEach(function(a,b){a.sort(function(a,c){return h(d[b][a],d[b][c])})}),n=(2*Math.PI-f*e)/n,o=0,q=-1;while(++q<e){p=o,r=-1;while(++r<e){var s=l[q],t=m[s][r],u=d[s][t],v=o,w=o+=u*n;a[s+"-"+t]={index:s,subindex:t,startAngle:v,endAngle:w,value:u}}c.push({index:s,startAngle:p,endAngle:o,value:(o-p)/n}),o+=f}q=-1;while(++q<e){r=q-1;while(++r<e){var x=a[q+"-"+r],y=a[r+"-"+q];(x.value||y.value)&&b.push(x.value<y.value?{source:y,target:x}:{source:x,target:y})}}i&&k()}function k(){b.sort(function(a,b){return i((a.source.value+a.target.value)/2,(b.source.value+b.target.value)/2)})}var a={},b,c,d,e,f=0,g,h,i;return a.matrix=function(f){return arguments.length?(e=(d=f)&&d.length,b=c=null,a):d},a.padding=function(d){return arguments.length?(f=d,b=c=null,a):f},a.sortGroups=function(d){return arguments.length?(g=d,b=c=null,a):g},a.sortSubgroups=function(c){return arguments.length?(h=c,b=null,a):h},a.sortChords=function(c){return arguments.length?(i=c,b&&k(),a):i},a.chords=function(){return b||j(),b},a.groups=function(){return c||j(),c},a},d3.layout.force=function(){function A(a){return function(b,c,d,e,f){if(b.point!==a){var g=b.cx-a.x,h=b.cy-a.y,i=1/Math.sqrt(g*g+h*h);if((e-c)*i<t){var j=b.charge*i*i;return a.px-=g*j,a.py-=h*j,!0}if(b.point&&isFinite(i)){var j=b.pointCharge*i*i;a.px-=g*j,a.py-=h*j}}return!b.charge}}function B(){var a=v.length,d=w.length,e,f,g,h,i,j,l,m,p;for(f=0;f<d;++f){g=w[f],h=g.source,i=g.target,m=i.x-h.x,p=i.y-h.y;if(j=m*m+p*p)j=n*y[f]*((j=Math.sqrt(j))-x[f])/j,m*=j,p*=j,i.x-=m*(l=h.weight/(i.weight+h.weight)),i.y-=p*l,h.x+=m*(l=1-l),h.y+=p*l}if(l=n*s){m=c[0]/2,p=c[1]/2,f=-1;if(l)while(++f<a)g=v[f],g.x+=(m-g.x)*l,g.y+=(p-g.y)*l}if(r){k(e=d3.geom.quadtree(v),n,z),f=-1;while(++f<a)(g=v[f]).fixed||e.visit(A(g))}f=-1;while(++f<a)g=v[f],g.fixed?(g.x=g.px,g.y=g.py):(g.x-=(g.px-(g.px=g.x))*o,g.y-=(g.py-(g.py=g.y))*o);return b.tick({type:"tick",alpha:n}),(n*=.99)<.005}function C(b){g(f=b),e=a}var a={},b=d3.dispatch("tick"),c=[1,1],d,n,o=.9,p=l,q=m,r=-30,s=.1,t=.8,u,v=[],w=[],x,y,z;return a.on=function(c,d){return b.on(c,d),a},a.nodes=function(b){return arguments.length?(v=b,a):v},a.links=function(b){return arguments.length?(w=b,a):w},a.size=function(b){return arguments.length?(c=b,a):c},a.linkDistance=function(b){return arguments.length?(p=d3.functor(b),a):p},a.distance=a.linkDistance,a.linkStrength=function(b){return arguments.length?(q=d3.functor(b),a):q},a.friction=function(b){return arguments.length?(o=b,a):o},a.charge=function(b){return arguments.length?(r=typeof b=="function"?b:+b,a):r},a.gravity=function(b){return arguments.length?(s=b,a):s},a.theta=function(b){return arguments.length?(t=b,a):t},a.start=function(){function k(a,c){var d=l(b),e=-1,f=d.length,g;while(++e<f)if(!isNaN(g=d[e][a]))return g;return Math.random()*c}function l(){if(!i){i=[];for(d=0;d<e;++d)i[d]=[];for(d=0;d<f;++d){var a=w[d];i[a.source.index].push(a.target),i[a.target.index].push(a.source)}}return i[b]}var b,d,e=v.length,f=w.length,g=c[0],h=c[1],i,j;for(b=0;b<e;++b)(j=v[b]).index=b,j.weight=0;x=[],y=[];for(b=0;b<f;++b)j=w[b],typeof j.source=="number"&&(j.source=v[j.source]),typeof j.target=="number"&&(j.target=v[j.target]),x[b]=p.call(this,j,b),y[b]=q.call(this,j,b),++j.source.weight,++j.target.weight;for(b=0;b<e;++b)j=v[b],isNaN(j.x)&&(j.x=k("x",g)),isNaN(j.y)&&(j.y=k("y",h)),isNaN(j.px)&&(j.px=j.x),isNaN(j.py)&&(j.py=j.y);z=[];if(typeof r=="function")for(b=0;b<e;++b)z[b]=+r.call(this,v[b],b);else for(b=0;b<e;++b)z[b]=r;return a.resume()},a.resume=function(){return n=.1,d3.timer(B)