1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | import logging import operator import ckan.lib.base as base import ckan.model as model from ckan.logic import get_action from ckanext.ga_report.ga_model import GA_Url, GA_Publisher from ckanext.ga_report.controller import _get_publishers _log = logging.getLogger(__name__) def popular_datasets(count=10): import random publisher = None publishers = _get_publishers(30) total = len(publishers) while not publisher or not datasets: rand = random.randrange(0, total) publisher = publishers[rand][0] if not publisher.state == 'active': publisher = None continue datasets = _datasets_for_publisher(publisher, 10)[:count] ctx = { 'datasets': datasets, 'publisher': publisher } return base.render_snippet('ga_report/ga_popular_datasets.html', **ctx) def single_popular_dataset(top=20): '''Returns a random dataset from the most popular ones. :param top: the number of top datasets to select from ''' import random top_datasets = model.Session.query(GA_Url).\ filter(GA_Url.url.like('/dataset/%')).\ order_by('ga_url.pageviews::int desc') num_top_datasets = top_datasets.count() dataset = None if num_top_datasets: count = 0 while not dataset: rand = random.randrange(0, min(top, num_top_datasets)) ga_url = top_datasets[rand] dataset = model.Package.get(ga_url.url[len('/dataset/'):]) if dataset and not dataset.state == 'active': dataset = None count += 1 if count > 10: break if not dataset: # fallback dataset = model.Session.query(model.Package)\ .filter_by(state='active').first() if not dataset: return None dataset_dict = get_action('package_show')({'model': model, 'session': model.Session}, {'id':dataset.id}) return dataset_dict def single_popular_dataset_html(top=20): dataset_dict = single_popular_dataset(top) groups = package.get('groups', []) publishers = [ g for g in groups if g.get('type') == 'publisher' ] publisher = publishers[0] if publishers else {'name':'', 'title': ''} context = { 'dataset': dataset_dict, 'publisher': publisher_dict } return base.render_snippet('ga_report/ga_popular_single.html', **context) def most_popular_datasets(publisher, count=20): if not publisher: _log.error("No valid publisher passed to 'most_popular_datasets'") return "" results = _datasets_for_publisher(publisher, count) ctx = { 'dataset_count': len(results), 'datasets': results, 'publisher': publisher } return base.render_snippet('ga_report/publisher/popular.html', **ctx) def _datasets_for_publisher(publisher, count): datasets = {} entries = model.Session.query(GA_Url).\ filter(GA_Url.department_id==publisher.name).\ filter(GA_Url.url.like('/dataset/%')).\ order_by('ga_url.pageviews::int desc').all() for entry in entries: if len(datasets) < count: p = model.Package.get(entry.url[len('/dataset/'):]) if not p in datasets: datasets[p] = {'views':0, 'visits': 0} datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews) datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visitors) results = [] for k, v in datasets.iteritems(): results.append((k,v['views'],v['visits'])) return sorted(results, key=operator.itemgetter(1), reverse=True) |