| import logging | import logging |
| import operator | import operator |
| import ckan.lib.base as base | import ckan.lib.base as base |
| import ckan.model as model | import ckan.model as model |
| from ckan.logic import get_action | from ckan.logic import get_action |
| from ckanext.ga_report.ga_model import GA_Url, GA_Publisher | from ckanext.ga_report.ga_model import GA_Url, GA_Publisher |
| from ckanext.ga_report.controller import _get_publishers | from ckanext.ga_report.controller import _get_publishers |
| _log = logging.getLogger(__name__) | _log = logging.getLogger(__name__) |
| def popular_datasets(count=10): | def popular_datasets(count=10): |
| import random | import random |
| publisher = None | publisher = None |
| publishers = _get_publishers(30) | publishers = _get_publishers(30) |
| total = len(publishers) | total = len(publishers) |
| while not publisher or not datasets: | while not publisher or not datasets: |
| rand = random.randrange(0, total) | rand = random.randrange(0, total) |
| publisher = publishers[rand][0] | publisher = publishers[rand][0] |
| if not publisher.state == 'active': | if not publisher.state == 'active': |
| publisher = None | publisher = None |
| continue | continue |
| datasets = _datasets_for_publisher(publisher, 10)[:count] | datasets = _datasets_for_publisher(publisher, 10)[:count] |
| ctx = { | ctx = { |
| 'datasets': datasets, | 'datasets': datasets, |
| 'publisher': publisher | 'publisher': publisher |
| } | } |
| return base.render_snippet('ga_report/ga_popular_datasets.html', **ctx) | return base.render_snippet('ga_report/ga_popular_datasets.html', **ctx) |
| def single_popular_dataset(top=20): | def single_popular_dataset(top=20): |
| '''Returns a random dataset from the most popular ones. | '''Returns a random dataset from the most popular ones. |
| :param top: the number of top datasets to select from | :param top: the number of top datasets to select from |
| ''' | ''' |
| import random | import random |
| top_datasets = model.Session.query(GA_Url).\ | top_datasets = model.Session.query(GA_Url).\ |
| filter(GA_Url.url.like('/dataset/%')).\ | filter(GA_Url.url.like('/dataset/%')).\ |
| order_by('ga_url.pageviews::int desc') | order_by('ga_url.pageviews::int desc') |
| num_top_datasets = top_datasets.count() | num_top_datasets = top_datasets.count() |
| dataset = None | dataset = None |
| if num_top_datasets: | if num_top_datasets: |
| count = 0 | count = 0 |
| while not dataset: | while not dataset: |
| rand = random.randrange(0, min(top, num_top_datasets)) | rand = random.randrange(0, min(top, num_top_datasets)) |
| ga_url = top_datasets[rand] | ga_url = top_datasets[rand] |
| dataset = model.Package.get(ga_url.url[len('/dataset/'):]) | dataset = model.Package.get(ga_url.url[len('/dataset/'):]) |
| if dataset and not dataset.state == 'active': | if dataset and not dataset.state == 'active': |
| dataset = None | dataset = None |
| count += 1 | # When testing, it is possible that top datasets are not available |
| if count > 10: | # so only go round this loop a few times before falling back on |
| break | # a random dataset. |
| count += 1 | |
| if count > 10: | |
| break | |
| if not dataset: | if not dataset: |
| # fallback | # fallback |
| dataset = model.Session.query(model.Package)\ | dataset = model.Session.query(model.Package)\ |
| .filter_by(state='active').first() | .filter_by(state='active').first() |
| if not dataset: | if not dataset: |
| return None | return None |
| dataset_dict = get_action('package_show')({'model': model, | dataset_dict = get_action('package_show')({'model': model, |
| 'session': model.Session}, | 'session': model.Session, |
| 'validate': False}, | |
| {'id':dataset.id}) | {'id':dataset.id}) |
| return dataset_dict | return dataset_dict |
| def single_popular_dataset_html(top=20): | def single_popular_dataset_html(top=20): |
| dataset_dict = single_popular_dataset(top) | dataset_dict = single_popular_dataset(top) |
| groups = package.get('groups', []) | groups = package.get('groups', []) |
| publishers = [ g for g in groups if g.get('type') == 'publisher' ] | publishers = [ g for g in groups if g.get('type') == 'publisher' ] |
| publisher = publishers[0] if publishers else {'name':'', 'title': ''} | publisher = publishers[0] if publishers else {'name':'', 'title': ''} |
| context = { | context = { |
| 'dataset': dataset_dict, | 'dataset': dataset_dict, |
| 'publisher': publisher_dict | 'publisher': publisher_dict |
| } | } |
| return base.render_snippet('ga_report/ga_popular_single.html', **context) | return base.render_snippet('ga_report/ga_popular_single.html', **context) |
| def most_popular_datasets(publisher, count=20): | def most_popular_datasets(publisher, count=20): |
| if not publisher: | if not publisher: |
| _log.error("No valid publisher passed to 'most_popular_datasets'") | _log.error("No valid publisher passed to 'most_popular_datasets'") |
| return "" | return "" |
| results = _datasets_for_publisher(publisher, count) | results = _datasets_for_publisher(publisher, count) |
| ctx = { | ctx = { |
| 'dataset_count': len(results), | 'dataset_count': len(results), |
| 'datasets': results, | 'datasets': results, |
| 'publisher': publisher | 'publisher': publisher |
| } | } |
| return base.render_snippet('ga_report/publisher/popular.html', **ctx) | return base.render_snippet('ga_report/publisher/popular.html', **ctx) |
| def _datasets_for_publisher(publisher, count): | def _datasets_for_publisher(publisher, count): |
| datasets = {} | datasets = {} |
| entries = model.Session.query(GA_Url).\ | entries = model.Session.query(GA_Url).\ |
| filter(GA_Url.department_id==publisher.name).\ | filter(GA_Url.department_id==publisher.name).\ |
| filter(GA_Url.url.like('/dataset/%')).\ | filter(GA_Url.url.like('/dataset/%')).\ |
| order_by('ga_url.pageviews::int desc').all() | order_by('ga_url.pageviews::int desc').all() |
| for entry in entries: | for entry in entries: |
| if len(datasets) < count: | if len(datasets) < count: |
| p = model.Package.get(entry.url[len('/dataset/'):]) | p = model.Package.get(entry.url[len('/dataset/'):]) |
| if not p: | |
| _log.warning("Could not find Package for {url}".format(url=entry.url)) | |
| continue | |
| if not p in datasets: | if not p in datasets: |
| datasets[p] = {'views':0, 'visits': 0} | datasets[p] = {'views':0, 'visits': 0} |
| datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews) | datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews) |
| datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visits) | datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visits) |
| results = [] | results = [] |
| for k, v in datasets.iteritems(): | for k, v in datasets.iteritems(): |
| results.append((k,v['views'],v['visits'])) | results.append((k,v['views'],v['visits'])) |
| return sorted(results, key=operator.itemgetter(1), reverse=True) | return sorted(results, key=operator.itemgetter(1), reverse=True) |