--- a/ckanext/ga_report/helpers.py +++ b/ckanext/ga_report/helpers.py @@ -1,40 +1,115 @@ import logging import operator + import ckan.lib.base as base import ckan.model as model +from ckan.logic import get_action +from ckanext.ga_report.ga_model import GA_Url, GA_Publisher +from ckanext.ga_report.controller import _get_publishers _log = logging.getLogger(__name__) +def popular_datasets(count=10): + import random + + publisher = None + publishers = _get_publishers(30) + total = len(publishers) + while not publisher or not datasets: + rand = random.randrange(0, total) + publisher = publishers[rand][0] + if not publisher.state == 'active': + publisher = None + continue + datasets = _datasets_for_publisher(publisher, 10)[:count] + + ctx = { + 'datasets': datasets, + 'publisher': publisher + } + return base.render_snippet('ga_report/ga_popular_datasets.html', **ctx) + +def single_popular_dataset(top=20): + '''Returns a random dataset from the most popular ones. + + :param top: the number of top datasets to select from + ''' + import random + + top_datasets = model.Session.query(GA_Url).\ + filter(GA_Url.url.like('/dataset/%')).\ + order_by('ga_url.pageviews::int desc') + num_top_datasets = top_datasets.count() + + dataset = None + if num_top_datasets: + count = 0 + while not dataset: + rand = random.randrange(0, min(top, num_top_datasets)) + ga_url = top_datasets[rand] + dataset = model.Package.get(ga_url.url[len('/dataset/'):]) + if dataset and not dataset.state == 'active': + dataset = None + count += 1 + if count > 10: + break + if not dataset: + # fallback + dataset = model.Session.query(model.Package)\ + .filter_by(state='active').first() + if not dataset: + return None + dataset_dict = get_action('package_show')({'model': model, + 'session': model.Session}, + {'id':dataset.id}) + return dataset_dict + +def single_popular_dataset_html(top=20): + dataset_dict = single_popular_dataset(top) + groups = package.get('groups', []) + publishers = [ g for g in groups if g.get('type') == 'publisher' ] + publisher = publishers[0] if publishers else {'name':'', 'title': ''} + context = { + 'dataset': dataset_dict, + 'publisher': publisher_dict + } + return base.render_snippet('ga_report/ga_popular_single.html', **context) + + def most_popular_datasets(publisher, count=20): - from ckanext.ga_report.ga_model import GA_Url if not publisher: _log.error("No valid publisher passed to 'most_popular_datasets'") return "" + results = _datasets_for_publisher(publisher, count) + + ctx = { + 'dataset_count': len(results), + 'datasets': results, + + 'publisher': publisher + } + + return base.render_snippet('ga_report/publisher/popular.html', **ctx) + +def _datasets_for_publisher(publisher, count): datasets = {} entries = model.Session.query(GA_Url).\ filter(GA_Url.department_id==publisher.name).\ filter(GA_Url.url.like('/dataset/%')).\ - order_by('ga_url.pageviews::int desc')[:count] + order_by('ga_url.pageviews::int desc').all() for entry in entries: - p = model.Package.get(entry.url[len('/dataset/'):]) - if not p in datasets: - datasets[p] = {'views':0, 'visits': 0} - datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews) - datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visitors) + if len(datasets) < count: + p = model.Package.get(entry.url[len('/dataset/'):]) + if not p in datasets: + datasets[p] = {'views':0, 'visits': 0} + datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews) + datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visits) results = [] for k, v in datasets.iteritems(): results.append((k,v['views'],v['visits'])) - results = sorted(results, key=operator.itemgetter(1), reverse=True) + return sorted(results, key=operator.itemgetter(1), reverse=True) - ctx = { - 'dataset_count': len(datasets), - 'datasets': results, - 'publisher': publisher - } - - return base.render_snippet('ga_report/publisher/popular.html', **ctx) -