--- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -9,7 +9,7 @@ import sqlalchemy from sqlalchemy import func, cast, Integer import ckan.model as model -from ga_model import GA_Url, GA_Stat, GA_ReferralStat +from ga_model import GA_Url, GA_Stat, GA_ReferralStat, GA_Publisher log = logging.getLogger('ckanext.ga-report') @@ -22,12 +22,29 @@ def _month_details(cls): - '''Returns a list of all the month names''' + ''' + Returns a list of all the periods for which we have data, unfortunately + knows too much about the type of the cls being passed as GA_Url has a + more complex query + + This may need extending if we add a period_name to the stats + ''' months = [] - vals = model.Session.query(cls.period_name).filter(cls.period_name!='All').distinct().all() + day = None + + vals = model.Session.query(cls.period_name,cls.period_complete_day)\ + .filter(cls.period_name!='All').distinct(cls.period_name)\ + .order_by("period_name desc").all() + if vals and vals[0][1]: + day = int(vals[0][1]) + ordinal = 'th' if 11 <= day <= 13 \ + else {1:'st',2:'nd',3:'rd'}.get(day % 10, 'th') + day = "{day}{ordinal}".format(day=day, ordinal=ordinal) + for m in vals: months.append( (m[0], _get_month_name(m[0]))) - return sorted(months, key=operator.itemgetter(0), reverse=True) + + return months, day class GaReport(BaseController): @@ -56,7 +73,7 @@ # Get the month details by fetching distinct values and determining the # month names from the values. - c.months = _month_details(GA_Stat) + c.months, c.day = _month_details(GA_Stat) # Work out which month to show, based on query params of the first item c.month_desc = 'all months' @@ -71,13 +88,13 @@ entries = q.order_by('ga_stat.key').all() def clean_key(key, val): - if key in ['Average time on site', 'Pages per visit', 'New visits', 'Bounces']: + if key in ['Average time on site', 'Pages per visit', 'New visits', 'Bounce rate (home page)']: val = "%.2f" % round(float(val), 2) if key == 'Average time on site': mins, secs = divmod(float(val), 60) hours, mins = divmod(mins, 60) val = '%02d:%02d:%02d (%s seconds) ' % (hours, mins, secs, val) - if key in ['New visits','Bounces']: + if key in ['New visits','Bounce rate (home page)']: val = "%s%%" % val if key in ['Total page views', 'Total visits']: val = int(val) @@ -220,7 +237,7 @@ # Get the month details by fetching distinct values and determining the # month names from the values. - c.months = _month_details(GA_Url) + c.months, c.day = _month_details(GA_Url) # Work out which month to show, based on query params of the first item c.month = request.params.get('month', '') @@ -229,11 +246,10 @@ c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) c.top_publishers = _get_top_publishers() - return render('ga_report/publisher/index.html') def _get_packages(self, publisher=None, count=-1): - '''Returns the datasets in order of visits''' + '''Returns the datasets in order of views''' if count == -1: count = sys.maxint @@ -245,12 +261,11 @@ if publisher: q = q.filter(GA_Url.department_id==publisher.name) q = q.filter(GA_Url.period_name==month) - q = q.order_by('ga_url.visitors::int desc') + q = q.order_by('ga_url.pageviews::int desc') top_packages = [] - for entry,package in q.limit(count): if package: - top_packages.append((package, entry.pageviews, entry.visitors)) + top_packages.append((package, entry.pageviews, entry.visits)) else: log.warning('Could not find package associated package') @@ -280,7 +295,7 @@ # Get the month details by fetching distinct values and determining the # month names from the values. - c.months = _month_details(GA_Url) + c.months, c.day = _month_details(GA_Url) # Work out which month to show, based on query params of the first item c.month = request.params.get('month', '') @@ -289,7 +304,7 @@ else: c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) - month = c.mnth or 'All' + month = c.month or 'All' c.publisher_page_views = 0 q = model.Session.query(GA_Url).\ filter(GA_Url.url=='/publisher/%s' % c.publisher_name) @@ -305,38 +320,33 @@ Returns a list of the top 20 publishers by dataset visits. (The number to show can be varied with 'limit') ''' + month = c.month or 'All' connection = model.Session.connection() q = """ - select department_id, sum(pageviews::int) views, sum(visitors::int) visits + select department_id, sum(pageviews::int) views, sum(visits::int) visits from ga_url - where department_id <> ''""" - if c.month: - q = q + """ - and period_name=%s - """ - q = q + """ - group by department_id order by visits desc + where department_id <> '' + and package_id <> '' + and url like '/dataset/%%' + and period_name=%s + group by department_id order by views desc """ if limit: q = q + " limit %s;" % (limit) - # Add this back (before and period_name =%s) if you want to ignore publisher - # homepage views - # and not url like '/publisher/%%' - top_publishers = [] - res = connection.execute(q, c.month) - + res = connection.execute(q, month) for row in res: g = model.Group.get(row[0]) if g: top_publishers.append((g, row[1], row[2])) return top_publishers + def _get_publishers(): ''' Returns a list of all publishers. Each item is a tuple: - (names, title) + (name, title) ''' publishers = [] for pub in model.Session.query(model.Group).\