Show top datasets cross-publisher. Drop-down for the publisher. Browser version numbers filtered on download, so you get this version in the CSV too - for privacy. single_popular_dataset now copes when not much data, and can return the figures so DGU can reskin it in its own repo. Notes about usage stats centralised to notes.html.
[ckanext-ga-report.git] / ckanext / ga_report / helpers.py
blob:a/ckanext/ga_report/helpers.py -> blob:b/ckanext/ga_report/helpers.py
import logging import logging
import operator import operator
import ckan.lib.base as base import ckan.lib.base as base
import ckan.model as model import ckan.model as model
   
from ckanext.ga_report.ga_model import GA_Url, GA_Publisher from ckanext.ga_report.ga_model import GA_Url, GA_Publisher
from ckanext.ga_report.controller import _get_publishers from ckanext.ga_report.controller import _get_publishers
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
   
def popular_datasets(count=10): def popular_datasets(count=10):
import random import random
   
publisher = None publisher = None
publishers = _get_publishers(30) publishers = _get_publishers(30)
total = len(publishers) total = len(publishers)
while not publisher or not datasets: while not publisher or not datasets:
rand = random.randrange(0, total) rand = random.randrange(0, total)
publisher = publishers[rand][0] publisher = publishers[rand][0]
if not publisher.state == 'active': if not publisher.state == 'active':
publisher = None publisher = None
continue continue
datasets = _datasets_for_publisher(publisher, 10)[:count] datasets = _datasets_for_publisher(publisher, 10)[:count]
   
ctx = { ctx = {
'datasets': datasets, 'datasets': datasets,
'publisher': publisher 'publisher': publisher
} }
return base.render_snippet('ga_report/ga_popular_datasets.html', **ctx) return base.render_snippet('ga_report/ga_popular_datasets.html', **ctx)
   
def single_popular_dataset(top=20): def single_popular_dataset(top=20):
  '''Returns a random dataset from the most popular ones.
   
  :param top: the number of top datasets to select from
  '''
import random import random
   
datasets = {} top_datasets = model.Session.query(GA_Url).\
rand = random.randrange(0, top) filter(GA_Url.url.like('/dataset/%')).\
entry = model.Session.query(GA_Url).\ order_by('ga_url.pageviews::int desc')
filter(GA_Url.url.like('/dataset/%')).\ num_top_datasets = top_datasets.count()
order_by('ga_url.pageviews::int desc')[rand]  
   
  if num_top_datasets:
dataset = None dataset = None
while not dataset: while not dataset:
dataset = model.Package.get(entry.url[len('/dataset/'):]) rand = random.randrange(0, min(top, num_top_datasets))
if dataset and not dataset.state == 'active': ga_url = top_datasets[rand]
dataset = None dataset = model.Package.get(ga_url.url[len('/dataset/'):])
else: if dataset and not dataset.state == 'active':
publisher = model.Group.get(entry.department_id) dataset = None
  else:
ctx = { dataset = model.Session.query(model.Package)\
  .filter_by(state='active').first()
  publisher = dataset.get_groups('publisher')[0]
  return {
'dataset': dataset, 'dataset': dataset,
'publisher': publisher 'publisher': publisher
} }
return base.render_snippet('ga_report/ga_popular_single.html', **ctx)  
  def single_popular_dataset_html(top=20):
  context = single_popular_dataset(top)
  return base.render_snippet('ga_report/ga_popular_single.html', **context)
   
   
def most_popular_datasets(publisher, count=20): def most_popular_datasets(publisher, count=20):
   
if not publisher: if not publisher:
_log.error("No valid publisher passed to 'most_popular_datasets'") _log.error("No valid publisher passed to 'most_popular_datasets'")
return "" return ""
   
results = _datasets_for_publisher(publisher, count) results = _datasets_for_publisher(publisher, count)
   
ctx = { ctx = {
'dataset_count': len(datasets), 'dataset_count': len(results),
'datasets': results, 'datasets': results,
   
'publisher': publisher 'publisher': publisher
} }
   
return base.render_snippet('ga_report/publisher/popular.html', **ctx) return base.render_snippet('ga_report/publisher/popular.html', **ctx)
   
def _datasets_for_publisher(publisher, count): def _datasets_for_publisher(publisher, count):
datasets = {} datasets = {}
entries = model.Session.query(GA_Url).\ entries = model.Session.query(GA_Url).\
filter(GA_Url.department_id==publisher.name).\ filter(GA_Url.department_id==publisher.name).\
filter(GA_Url.url.like('/dataset/%')).\ filter(GA_Url.url.like('/dataset/%')).\
order_by('ga_url.pageviews::int desc').all() order_by('ga_url.pageviews::int desc').all()
for entry in entries: for entry in entries:
if len(datasets) < count: if len(datasets) < count:
p = model.Package.get(entry.url[len('/dataset/'):]) p = model.Package.get(entry.url[len('/dataset/'):])
if not p in datasets: if not p in datasets:
datasets[p] = {'views':0, 'visits': 0} datasets[p] = {'views':0, 'visits': 0}
datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews) datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews)
datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visitors) datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visitors)
   
results = [] results = []
for k, v in datasets.iteritems(): for k, v in datasets.iteritems():
results.append((k,v['views'],v['visits'])) results.append((k,v['views'],v['visits']))
   
return sorted(results, key=operator.itemgetter(1), reverse=True) return sorted(results, key=operator.itemgetter(1), reverse=True)