From: David Read Date: Fri, 09 Nov 2012 16:15:35 +0000 Subject: Adjust popularity score to take account of number of days in the month. X-Git-Url: https://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=20b6eca0a538a77122ce85cf588045784fa9b67e --- Adjust popularity score to take account of number of days in the month. --- --- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -9,6 +9,8 @@ import ckan.model as model from ckan.lib.base import * + +log = __import__('logging').getLogger(__name__) def make_uuid(): return unicode(uuid.uuid4()) @@ -212,7 +214,7 @@ 'period_complete_day': 0, 'url': url, 'pageviews': sum([int(e.pageviews) for e in entries]) + old_pageviews, - 'visits': sum([int(e.visits) for e in entries]) + old_visits, + 'visits': sum([int(e.visits or 0) for e in entries]) + old_visits, 'department_id': publisher, 'package_id': package } @@ -344,20 +346,33 @@ model.Session.commit() def get_score_for_dataset(dataset_name): + ''' + Returns a "current popularity" score for a dataset, + based on how many views it has had recently. + ''' import datetime now = datetime.datetime.now() - period_names = ['%s-%02d' % (now.year, now.month), - '%s-%02d' % (now.year, now.month-1)] - - entry = model.Session.query(GA_Url)\ - .filter(GA_Url.period_name==period_names[0])\ - .filter(GA_Url.package_id==dataset_name).first() - score = int(entry.pageviews) if entry else 0 - - entry = model.Session.query(GA_Url)\ - .filter(GA_Url.period_name==period_names[1])\ - .filter(GA_Url.package_id==dataset_name).first() - val = int(entry.pageviews) if entry else 0 - score += val/2 if val else 0 - + last_month = now - datetime.timedelta(days=30) + period_names = ['%s-%02d' % (last_month.year, last_month.month), + '%s-%02d' % (now.year, now.month), + ] + + score = 0 + for period_name in period_names: + score /= 2 # previous periods are discounted by 50% + entry = model.Session.query(GA_Url)\ + .filter(GA_Url.period_name==period_name)\ + .filter(GA_Url.package_id==dataset_name).first() + # score + if entry: + views = float(entry.pageviews) + if entry.period_complete_day: + views_per_day = views / entry.period_complete_day + else: + views_per_day = views / 15 # guess + score += views_per_day + + score = int(score * 100) + log.debug('Popularity %s: %s', score, dataset_name) return score +