Adjust popularity score to take account of number of days in the month.
--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -9,6 +9,8 @@
import ckan.model as model
from ckan.lib.base import *
+
+log = __import__('logging').getLogger(__name__)
def make_uuid():
return unicode(uuid.uuid4())
@@ -212,7 +214,7 @@
'period_complete_day': 0,
'url': url,
'pageviews': sum([int(e.pageviews) for e in entries]) + old_pageviews,
- 'visits': sum([int(e.visits) for e in entries]) + old_visits,
+ 'visits': sum([int(e.visits or 0) for e in entries]) + old_visits,
'department_id': publisher,
'package_id': package
}
@@ -343,3 +345,34 @@
q.delete()
model.Session.commit()
+def get_score_for_dataset(dataset_name):
+ '''
+ Returns a "current popularity" score for a dataset,
+ based on how many views it has had recently.
+ '''
+ import datetime
+ now = datetime.datetime.now()
+ last_month = now - datetime.timedelta(days=30)
+ period_names = ['%s-%02d' % (last_month.year, last_month.month),
+ '%s-%02d' % (now.year, now.month),
+ ]
+
+ score = 0
+ for period_name in period_names:
+ score /= 2 # previous periods are discounted by 50%
+ entry = model.Session.query(GA_Url)\
+ .filter(GA_Url.period_name==period_name)\
+ .filter(GA_Url.package_id==dataset_name).first()
+ # score
+ if entry:
+ views = float(entry.pageviews)
+ if entry.period_complete_day:
+ views_per_day = views / entry.period_complete_day
+ else:
+ views_per_day = views / 15 # guess
+ score += views_per_day
+
+ score = int(score * 100)
+ log.debug('Popularity %s: %s', score, dataset_name)
+ return score
+