--- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -191,32 +191,11 @@ q = model.Session.query(GA_Stat).\ filter(GA_Stat.stat_name==k).\ order_by(GA_Stat.period_name) - # Run the query on all months to gather graph data - series = {} - x_axis = set() - for stat in q: - x_val = _get_unix_epoch(stat.period_name) - series[ stat.key ] = series.get(stat.key,{}) - series[ stat.key ][x_val] = float(stat.value) - x_axis.add(x_val) - # Common x-axis for all series. Exclude this month (incomplete data) - x_axis = sorted(list(x_axis))[:-1] - # Buffer a rickshaw dataset from the series - def create_graph(series_name, series_data): - return { - 'name':series_name, - 'data':[ {'x':x,'y':series_data.get(x,0)} for x in x_axis ] - } - rickshaw = [ create_graph(name,data) for name, data in series.items() ] - rickshaw = sorted(rickshaw,key=lambda x:x['data'][-1]['y']) - setattr(c, v+'_graph', json.dumps(rickshaw)) - # Buffer the tabular data if c.month: entries = [] q = q.filter(GA_Stat.period_name==c.month).\ order_by('ga_stat.value::int desc') - d = collections.defaultdict(int) for e in q.all(): d[e.key] += int(e.value) @@ -225,6 +204,23 @@ entries.append((key,val,)) entries = sorted(entries, key=operator.itemgetter(1), reverse=True) + # Run a query on all months to gather graph data + graph_query = model.Session.query(GA_Stat).\ + filter(GA_Stat.stat_name==k).\ + order_by(GA_Stat.period_name) + graph_dict = {} + for stat in graph_query: + graph_dict[ stat.key ] = graph_dict.get(stat.key,{ + 'name':stat.key, + 'raw': {} + }) + graph_dict[ stat.key ]['raw'][stat.period_name] = float(stat.value) + stats_in_table = [x[0] for x in entries] + stats_not_in_table = set(graph_dict.keys()) - set(stats_in_table) + stats = stats_in_table + sorted(list(stats_not_in_table)) + graph = [graph_dict[x] for x in stats] + setattr(c, v+'_graph', json.dumps( _to_rickshaw(graph,percentageMode=True) )) + # Get the total for each set of values and then set the value as # a percentage of the total if k == 'Social sources': @@ -253,7 +249,7 @@ writer = csv.writer(response) writer.writerow(["Publisher Title", "Publisher Name", "Views", "Visits", "Period Name"]) - top_publishers, top_publishers_graph = _get_top_publishers(None) + top_publishers = _get_top_publishers(limit=None) for publisher,view,visit in top_publishers: writer.writerow([publisher.title.encode('utf-8'), @@ -275,7 +271,7 @@ if not c.publisher: abort(404, 'A publisher with that name could not be found') - packages = self._get_packages(c.publisher) + packages = self._get_packages(publisher=c.publisher, month=c.month) response.headers['Content-Type'] = "text/csv; charset=utf-8" response.headers['Content-Disposition'] = \ str('attachment; filename=datasets_%s_%s.csv' % (c.publisher_name, month,)) @@ -304,15 +300,18 @@ if c.month: c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) - c.top_publishers, graph_data = _get_top_publishers() - c.top_publishers_graph = json.dumps( _to_rickshaw(graph_data.values()) ) - - return render('ga_report/publisher/index.html') - - def _get_packages(self, publisher=None, count=-1): + c.top_publishers = _get_top_publishers() + graph_data = _get_top_publishers_graph() + c.top_publishers_graph = json.dumps( _to_rickshaw(graph_data) ) + + x = render('ga_report/publisher/index.html') + + return x + + def _get_packages(self, publisher=None, month='', count=-1): '''Returns the datasets in order of views''' have_download_data = True - month = c.month or 'All' + month = month or 'All' if month != 'All': have_download_data = month >= DOWNLOADS_AVAILABLE_FROM @@ -343,7 +342,8 @@ downloads += int(x.value) else: downloads = 'No data' - top_packages.append((package, entry.pageviews, entry.visits, downloads)) + if package.private == False: + top_packages.append((package, entry.pageviews, entry.visits, downloads)) else: log.warning('Could not find package associated package') @@ -389,42 +389,73 @@ entry = q.filter(GA_Url.period_name==c.month).first() c.publisher_page_views = entry.pageviews if entry else 0 - c.top_packages = self._get_packages(c.publisher, 20) + c.top_packages = self._get_packages(publisher=c.publisher, count=20, month=c.month) # Graph query - top_package_names = [ x[0].name for x in c.top_packages ] + top_packages_all_time = self._get_packages(publisher=c.publisher, count=20, month='All') + top_package_names = [ x[0].name for x in top_packages_all_time ] graph_query = model.Session.query(GA_Url,model.Package)\ .filter(model.Package.name==GA_Url.package_id)\ .filter(GA_Url.url.like('/dataset/%'))\ .filter(GA_Url.package_id.in_(top_package_names)) - graph_data = {} + all_series = {} for entry,package in graph_query: if not package: continue if entry.period_name=='All': continue - graph_data[package.id] = graph_data.get(package.id,{ + all_series[package.name] = all_series.get(package.name,{ 'name':package.title, - 'data':[] + 'raw': {} }) - graph_data[package.id]['data'].append({ - 'x':_get_unix_epoch(entry.period_name), - 'y':int(entry.pageviews), - }) - - c.graph_data = json.dumps( _to_rickshaw(graph_data.values()) ) + all_series[package.name]['raw'][entry.period_name] = int(entry.pageviews) + graph = [ all_series[series_name] for series_name in top_package_names ] + c.graph_data = json.dumps( _to_rickshaw(graph) ) return render('ga_report/publisher/read.html') -def _to_rickshaw(data): - num_points = [] - for package in data: - package['data'] = sorted( package['data'], key=lambda x:x['x'] ) - num_points.append( len(package['data']) ) - if len(set(num_points))>1: - example = num_points[ num_points.index(max(num_points)) ] - for package in data: - while len(package['data'])THRESHOLD: + data.append(series) + point['y'] = percentage + others = [ x for x in raw_data if not (x in data) ] + if len(others): + data_other = [] + for i in range(len(x_axis)): + x = _get_unix_epoch(x_axis[i]) + y = 0 + for series in others: + y += series['data'][i]['y'] + data_other.append({'x':x,'y':y}) + data.append({ + 'name':'Other', + 'data': data_other + }) return data + def _get_top_publishers(limit=20): ''' @@ -447,35 +478,51 @@ top_publishers = [] res = connection.execute(q, month) - department_ids = [] for row in res: g = model.Group.get(row[0]) if g: - department_ids.append(row[0]) top_publishers.append((g, row[1], row[2])) - - graph = {} - if limit is not None: - # Query for a history graph of these publishers - q = model.Session.query( - GA_Url.department_id, - GA_Url.period_name, - func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\ - .filter( GA_Url.department_id.in_(department_ids) )\ - .filter( GA_Url.period_name!='All' )\ - .filter( GA_Url.url.like('/dataset/%') )\ - .filter( GA_Url.package_id!='' )\ - .group_by( GA_Url.department_id, GA_Url.period_name ) - for dept_id,period_name,views in q: - graph[dept_id] = graph.get( dept_id, { - 'name' : model.Group.get(dept_id).title, - 'data' : [] - }) - graph[dept_id]['data'].append({ - 'x': _get_unix_epoch(period_name), - 'y': views - }) - return top_publishers, graph + return top_publishers + + +def _get_top_publishers_graph(limit=20): + ''' + Returns a list of the top 20 publishers by dataset visits. + (The number to show can be varied with 'limit') + ''' + connection = model.Session.connection() + q = """ + select department_id, sum(pageviews::int) views + from ga_url + where department_id <> '' + and package_id <> '' + and url like '/dataset/%%' + and period_name='All' + group by department_id order by views desc + """ + if limit: + q = q + " limit %s;" % (limit) + + res = connection.execute(q) + department_ids = [ row[0] for row in res ] + + # Query for a history graph of these department ids + q = model.Session.query( + GA_Url.department_id, + GA_Url.period_name, + func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\ + .filter( GA_Url.department_id.in_(department_ids) )\ + .filter( GA_Url.url.like('/dataset/%') )\ + .filter( GA_Url.package_id!='' )\ + .group_by( GA_Url.department_id, GA_Url.period_name ) + graph_dict = {} + for dept_id,period_name,views in q: + graph_dict[dept_id] = graph_dict.get( dept_id, { + 'name' : model.Group.get(dept_id).title, + 'raw' : {} + }) + graph_dict[dept_id]['raw'][period_name] = views + return [ graph_dict[id] for id in department_ids ] def _get_publishers(): @@ -485,7 +532,7 @@ ''' publishers = [] for pub in model.Session.query(model.Group).\ - filter(model.Group.type=='publisher').\ + filter(model.Group.type=='organization').\ filter(model.Group.state=='active').\ order_by(model.Group.name): publishers.append((pub.name, pub.title))