import logging |
import logging |
import operator |
import operator |
|
|
import ckan.lib.base as base |
import ckan.lib.base as base |
import ckan.model as model |
import ckan.model as model |
|
from ckan.logic import get_action |
|
|
|
from ckanext.ga_report.ga_model import GA_Url, GA_Publisher |
|
from ckanext.ga_report.controller import _get_publishers |
_log = logging.getLogger(__name__) |
_log = logging.getLogger(__name__) |
|
|
|
def popular_datasets(count=10): |
|
import random |
|
|
|
publisher = None |
|
publishers = _get_publishers(30) |
|
total = len(publishers) |
|
while not publisher or not datasets: |
|
rand = random.randrange(0, total) |
|
publisher = publishers[rand][0] |
|
if not publisher.state == 'active': |
|
publisher = None |
|
continue |
|
datasets = _datasets_for_publisher(publisher, 10)[:count] |
|
|
|
ctx = { |
|
'datasets': datasets, |
|
'publisher': publisher |
|
} |
|
return base.render_snippet('ga_report/ga_popular_datasets.html', **ctx) |
|
|
|
def single_popular_dataset(top=20): |
|
'''Returns a random dataset from the most popular ones. |
|
|
|
:param top: the number of top datasets to select from |
|
''' |
|
import random |
|
|
|
top_datasets = model.Session.query(GA_Url).\ |
|
filter(GA_Url.url.like('/dataset/%')).\ |
|
order_by('ga_url.pageviews::int desc') |
|
num_top_datasets = top_datasets.count() |
|
|
|
dataset = None |
|
if num_top_datasets: |
|
count = 0 |
|
while not dataset: |
|
rand = random.randrange(0, min(top, num_top_datasets)) |
|
ga_url = top_datasets[rand] |
|
dataset = model.Package.get(ga_url.url[len('/dataset/'):]) |
|
if dataset and not dataset.state == 'active': |
|
dataset = None |
|
count += 1 |
|
if count > 10: |
|
break |
|
if not dataset: |
|
# fallback |
|
dataset = model.Session.query(model.Package)\ |
|
.filter_by(state='active').first() |
|
if not dataset: |
|
return None |
|
dataset_dict = get_action('package_show')({'model': model, |
|
'session': model.Session, |
|
'validate': False}, |
|
{'id':dataset.id}) |
|
return dataset_dict |
|
|
|
def single_popular_dataset_html(top=20): |
|
dataset_dict = single_popular_dataset(top) |
|
groups = package.get('groups', []) |
|
publishers = [ g for g in groups if g.get('type') == 'publisher' ] |
|
publisher = publishers[0] if publishers else {'name':'', 'title': ''} |
|
context = { |
|
'dataset': dataset_dict, |
|
'publisher': publisher_dict |
|
} |
|
return base.render_snippet('ga_report/ga_popular_single.html', **context) |
|
|
|
|
def most_popular_datasets(publisher, count=20): |
def most_popular_datasets(publisher, count=20): |
from ckanext.ga_report.ga_model import GA_Url |
|
|
|
if not publisher: |
if not publisher: |
_log.error("No valid publisher passed to 'most_popular_datasets'") |
_log.error("No valid publisher passed to 'most_popular_datasets'") |
return "" |
return "" |
|
|
|
results = _datasets_for_publisher(publisher, count) |
|
|
|
ctx = { |
|
'dataset_count': len(results), |
|
'datasets': results, |
|
|
|
'publisher': publisher |
|
} |
|
|
|
return base.render_snippet('ga_report/publisher/popular.html', **ctx) |
|
|
|
def _datasets_for_publisher(publisher, count): |
datasets = {} |
datasets = {} |
entries = model.Session.query(GA_Url).\ |
entries = model.Session.query(GA_Url).\ |
filter(GA_Url.department_id==publisher.name).\ |
filter(GA_Url.department_id==publisher.name).\ |
filter(GA_Url.url.like('/dataset/%')).\ |
filter(GA_Url.url.like('/dataset/%')).\ |
order_by('ga_url.pageviews::int desc')[:count] |
order_by('ga_url.pageviews::int desc').all() |
for entry in entries: |
for entry in entries: |
p = model.Package.get(entry.url[len('/dataset/'):]) |
if len(datasets) < count: |
if not p in datasets: |
p = model.Package.get(entry.url[len('/dataset/'):]) |
datasets[p] = {'views':0, 'visits': 0} |
if not p in datasets: |
datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews) |
datasets[p] = {'views':0, 'visits': 0} |
datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visitors) |
datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews) |
|
datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visitors) |
|
|
results = [] |
results = [] |
for k, v in datasets.iteritems(): |
for k, v in datasets.iteritems(): |
results.append((k,v['views'],v['visits'])) |
results.append((k,v['views'],v['visits'])) |
|
|
results = sorted(results, key=operator.itemgetter(1), reverse=True) |
return sorted(results, key=operator.itemgetter(1), reverse=True) |
|
|
ctx = { |
|
'dataset_count': len(datasets), |
|
'datasets': results, |
|
'publisher': publisher |
|
} |
|
|
|
return base.render_snippet('ga_report/publisher/popular.html', **ctx) |
|
|
|