import logging |
import logging |
import operator |
import operator |
|
|
import ckan.lib.base as base |
import ckan.lib.base as base |
import ckan.model as model |
import ckan.model as model |
from ckan.logic import get_action |
from ckan.logic import get_action |
|
|
from ckanext.ga_report.ga_model import GA_Url, GA_Publisher |
from ckanext.ga_report.ga_model import GA_Url, GA_Publisher |
from ckanext.ga_report.controller import _get_publishers |
from ckanext.ga_report.controller import _get_publishers |
_log = logging.getLogger(__name__) |
_log = logging.getLogger(__name__) |
|
|
def popular_datasets(count=10): |
def popular_datasets(count=10): |
import random |
import random |
|
|
publisher = None |
publisher = None |
publishers = _get_publishers(30) |
publishers = _get_publishers(30) |
total = len(publishers) |
total = len(publishers) |
while not publisher or not datasets: |
while not publisher or not datasets: |
rand = random.randrange(0, total) |
rand = random.randrange(0, total) |
publisher = publishers[rand][0] |
publisher = publishers[rand][0] |
if not publisher.state == 'active': |
if not publisher.state == 'active': |
publisher = None |
publisher = None |
continue |
continue |
datasets = _datasets_for_publisher(publisher, 10)[:count] |
datasets = _datasets_for_publisher(publisher, 10)[:count] |
|
|
ctx = { |
ctx = { |
'datasets': datasets, |
'datasets': datasets, |
'publisher': publisher |
'publisher': publisher |
} |
} |
return base.render_snippet('ga_report/ga_popular_datasets.html', **ctx) |
return base.render_snippet('ga_report/ga_popular_datasets.html', **ctx) |
|
|
def single_popular_dataset(top=20): |
def single_popular_dataset(top=20): |
'''Returns a random dataset from the most popular ones. |
'''Returns a random dataset from the most popular ones. |
|
|
:param top: the number of top datasets to select from |
:param top: the number of top datasets to select from |
''' |
''' |
import random |
import random |
|
|
top_datasets = model.Session.query(GA_Url).\ |
top_datasets = model.Session.query(GA_Url).\ |
filter(GA_Url.url.like('/dataset/%')).\ |
filter(GA_Url.url.like('/dataset/%')).\ |
order_by('ga_url.pageviews::int desc') |
order_by('ga_url.pageviews::int desc') |
num_top_datasets = top_datasets.count() |
num_top_datasets = top_datasets.count() |
|
|
dataset = None |
dataset = None |
if num_top_datasets: |
if num_top_datasets: |
count = 0 |
count = 0 |
while not dataset: |
while not dataset: |
rand = random.randrange(0, min(top, num_top_datasets)) |
rand = random.randrange(0, min(top, num_top_datasets)) |
ga_url = top_datasets[rand] |
ga_url = top_datasets[rand] |
dataset = model.Package.get(ga_url.url[len('/dataset/'):]) |
dataset = model.Package.get(ga_url.url[len('/dataset/'):]) |
if dataset and not dataset.state == 'active': |
if dataset and not dataset.state == 'active': |
dataset = None |
dataset = None |
# When testing, it is possible that top datasets are not available |
# When testing, it is possible that top datasets are not available |
# so only go round this loop a few times before falling back on |
# so only go round this loop a few times before falling back on |
# a random dataset. |
# a random dataset. |
count += 1 |
count += 1 |
if count > 10: |
if count > 10: |
break |
break |
if not dataset: |
if not dataset: |
# fallback |
# fallback |
dataset = model.Session.query(model.Package)\ |
dataset = model.Session.query(model.Package)\ |
.filter_by(state='active').first() |
.filter_by(state='active').first() |
if not dataset: |
if not dataset: |
return None |
return None |
dataset_dict = get_action('package_show')({'model': model, |
dataset_dict = get_action('package_show')({'model': model, |
'session': model.Session, |
'session': model.Session, |
'validate': False}, |
'validate': False}, |
{'id':dataset.id}) |
{'id':dataset.id}) |
return dataset_dict |
return dataset_dict |
|
|
def single_popular_dataset_html(top=20): |
def single_popular_dataset_html(top=20): |
dataset_dict = single_popular_dataset(top) |
dataset_dict = single_popular_dataset(top) |
groups = package.get('groups', []) |
groups = package.get('groups', []) |
publishers = [ g for g in groups if g.get('type') == 'publisher' ] |
publishers = [ g for g in groups if g.get('type') == 'organization' ] |
publisher = publishers[0] if publishers else {'name':'', 'title': ''} |
publisher = publishers[0] if publishers else {'name':'', 'title': ''} |
context = { |
context = { |
'dataset': dataset_dict, |
'dataset': dataset_dict, |
'publisher': publisher_dict |
'publisher': publisher_dict |
} |
} |
return base.render_snippet('ga_report/ga_popular_single.html', **context) |
return base.render_snippet('ga_report/ga_popular_single.html', **context) |
|
|
|
|
def most_popular_datasets(publisher, count=20, preview_image=None): |
def most_popular_datasets(publisher, count=20, preview_image=None): |
|
|
if not publisher: |
if not publisher: |
_log.error("No valid publisher passed to 'most_popular_datasets'") |
_log.error("No valid publisher passed to 'most_popular_datasets'") |
return "" |
return "" |
|
|
results = _datasets_for_publisher(publisher, count) |
results = _datasets_for_publisher(publisher, count) |
|
|
ctx = { |
ctx = { |
'dataset_count': len(results), |
'dataset_count': len(results), |
'datasets': results, |
'datasets': results, |
|
|
'publisher': publisher, |
'publisher': publisher, |
'preview_image': preview_image |
'preview_image': preview_image |
} |
} |
|
|
return base.render_snippet('ga_report/publisher/popular.html', **ctx) |
return base.render_snippet('ga_report/publisher/popular.html', **ctx) |
|
|
def _datasets_for_publisher(publisher, count): |
def _datasets_for_publisher(publisher, count): |
datasets = {} |
datasets = {} |
entries = model.Session.query(GA_Url).\ |
entries = model.Session.query(GA_Url).\ |
filter(GA_Url.department_id==publisher.name).\ |
filter(GA_Url.department_id==publisher.name).\ |
filter(GA_Url.url.like('/dataset/%')).\ |
filter(GA_Url.url.like('/dataset/%')).\ |
order_by('ga_url.pageviews::int desc').all() |
order_by('ga_url.pageviews::int desc').all() |
for entry in entries: |
for entry in entries: |
if len(datasets) < count: |
if len(datasets) < count: |
p = model.Package.get(entry.url[len('/dataset/'):]) |
p = model.Package.get(entry.url[len('/dataset/'):]) |
|
|
if not p: |
if not p: |
_log.warning("Could not find Package for {url}".format(url=entry.url)) |
_log.warning("Could not find Package for {url}".format(url=entry.url)) |
continue |
continue |
|
|
if not p.state == 'active': |
if not p.state == 'active': |
_log.warning("Package {0} is not active, it is {1}".format(p.name, p.state)) |
_log.warning("Package {0} is not active, it is {1}".format(p.name, p.state)) |
continue |
continue |
|
|
if not p in datasets: |
if not p in datasets: |
datasets[p] = {'views':0, 'visits': 0} |
datasets[p] = {'views':0, 'visits': 0} |
|
|
datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews) |
datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews) |
datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visits) |
datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visits) |
|
|
results = [] |
results = [] |
for k, v in datasets.iteritems(): |
for k, v in datasets.iteritems(): |
results.append((k,v['views'],v['visits'])) |
results.append((k,v['views'],v['visits'])) |
|
|
return sorted(results, key=operator.itemgetter(1), reverse=True) |
return sorted(results, key=operator.itemgetter(1), reverse=True) |
|
|
def month_option_title(month_iso, months, day): |
def month_option_title(month_iso, months, day): |
month_isos = [ iso_code for (iso_code,name) in months ] |
month_isos = [ iso_code for (iso_code,name) in months ] |
try: |
try: |
index = month_isos.index(month_iso) |
index = month_isos.index(month_iso) |
except ValueError: |
except ValueError: |
_log.error('Month "%s" not found in list of months.' % month_iso) |
_log.error('Month "%s" not found in list of months.' % month_iso) |
return month_iso |
return month_iso |
month_name = months[index][1] |
month_name = months[index][1] |
if index==0: |
if index==0: |
return month_name + (' (up to %s)'%day) |
return month_name + (' (up to %s)'%day) |
return month_name |
return month_name |
|
|
|
|
|
|