From: Ross Jones Date: Tue, 06 Nov 2012 10:33:54 +0000 Subject: Changes to support % of bounces from / X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=9511fbc86291da3c92fa73edfb80d05066d3d212 --- Changes to support % of bounces from / --- --- a/README.rst +++ b/README.rst @@ -33,6 +33,10 @@ googleanalytics.id = UA-1010101-1 googleanalytics.account = Account name (i.e. data.gov.uk, see top level item at https://www.google.com/analytics) ga-report.period = monthly + ga-report.bounce_url = /data + + The ga-report.bounce_url specifies the path to use when calculating bounces. For DGU this is /data + but you may want to set this to /. Note that your credentials will be readable by system administrators on your server. Rather than use sensitive account details, it is suggested you give access to the GA account to a new Google account that you create just for this purpose. --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -70,15 +70,15 @@ entries = q.order_by('ga_stat.key').all() def clean_key(key, val): - if key in ['Average time on site', 'Pages per visit', 'New visits']: + if key in ['Average time on site', 'Pages per visit', 'New visits', 'Bounces']: val = "%.2f" % round(float(val), 2) if key == 'Average time on site': mins, secs = divmod(float(val), 60) hours, mins = divmod(mins, 60) val = '%02d:%02d:%02d (%s seconds) ' % (hours, mins, secs, val) - if key == 'New visits': + if key in ['New visits','Bounces']: val = "%s%%" % val - if key in ['Bounces', 'Total page views', 'Total visits']: + if key in ['Total page views', 'Total visits']: val = int(val) return key, val @@ -93,11 +93,12 @@ for e in entries: d[e.key].append(float(e.value)) for k, v in d.iteritems(): - if k in ['Bounces', 'Total page views', 'Total visits']: + if k in ['Total page views', 'Total visits']: v = sum(v) else: v = float(sum(v))/len(v) key, val = clean_key(k,v) + c.global_totals.append((key, val)) c.global_totals = sorted(c.global_totals, key=operator.itemgetter(0)) @@ -172,17 +173,13 @@ entries.append((key,val,)) entries = sorted(entries, key=operator.itemgetter(1), reverse=True) - def percent(num, total): - p = 100 * float(num)/float(total) - return "%.2f%%" % round(p, 2) - # Get the total for each set of values and then set the value as # a percentage of the total if k == 'Social sources': total = sum([x for n,x in c.global_totals if n == 'Total visits']) else: total = sum([num for _,num in entries]) - setattr(c, v, [(k,percent(v,total)) for k,v in entries ]) + setattr(c, v, [(k,_percent(v,total)) for k,v in entries ]) return render('ga_report/site/index.html') @@ -201,7 +198,7 @@ writer = csv.writer(response) writer.writerow(["Publisher", "Views", "Visits", "Period Name"]) - for publisher,view,visit in self._get_publishers(None): + for publisher,view,visit in _get_publishers(None): writer.writerow([publisher.title.encode('utf-8'), view, visit, @@ -244,38 +241,10 @@ if c.month: c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) - c.top_publishers = self._get_publishers() + c.top_publishers = _get_publishers() return render('ga_report/publisher/index.html') - def _get_publishers(self, limit=20): - connection = model.Session.connection() - q = """ - select department_id, sum(pageviews::int) views, sum(visitors::int) visits - from ga_url - where department_id <> ''""" - if c.month: - q = q + """ - and period_name=%s - """ - q = q + """ - group by department_id order by views desc - """ - if limit: - q = q + " limit %s;" % (limit) - - # Add this back (before and period_name =%s) if you want to ignore publisher - # homepage views - # and not url like '/publisher/%%' - - top_publishers = [] - res = connection.execute(q, c.month) - - for row in res: - g = model.Group.get(row[0]) - if g: - top_publishers.append((g, row[1], row[2])) - return top_publishers def _get_packages(self, publisher, count=-1): if count == -1: @@ -345,3 +314,37 @@ return render('ga_report/publisher/read.html') +def _get_publishers(limit=20): + connection = model.Session.connection() + q = """ + select department_id, sum(pageviews::int) views, sum(visitors::int) visits + from ga_url + where department_id <> ''""" + if c.month: + q = q + """ + and period_name=%s + """ + q = q + """ + group by department_id order by views desc + """ + if limit: + q = q + " limit %s;" % (limit) + + # Add this back (before and period_name =%s) if you want to ignore publisher + # homepage views + # and not url like '/publisher/%%' + + top_publishers = [] + res = connection.execute(q, c.month) + + for row in res: + g = model.Group.get(row[0]) + if g: + top_publishers.append((g, row[1], row[2])) + return top_publishers + + +def _percent(num, total): + p = 100 * float(num)/float(total) + return "%.2f%%" % round(p, 2) + --- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -94,6 +94,7 @@ self.get_full_period_name(period_name, period_complete_day), start_date.strftime('%Y %m %d'), end_date.strftime('%Y %m %d')) + data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+') log.info('Storing Dataset Analytics for period "%s"', self.get_full_period_name(period_name, period_complete_day)) @@ -155,11 +156,6 @@ max_results=10000, end_date=end_date).execute() - if os.getenv('DEBUG'): - import pprint - pprint.pprint(results) - print 'Total results: %s' % results.get('totalResults') - packages = [] for entry in results.get('rows'): (loc,pageviews,visits) = entry @@ -207,18 +203,37 @@ results = self.service.data().ga().get( ids='ga:' + self.profile_id, start_date=start_date, - metrics='ga:pageviewsPerVisit,ga:bounces,ga:avgTimeOnSite,ga:percentNewVisits,ga:visitors', + metrics='ga:pageviewsPerVisit,ga:avgTimeOnSite,ga:percentNewVisits,ga:visitors', max_results=10000, end_date=end_date).execute() result_data = results.get('rows') data = { 'Pages per visit': result_data[0][0], - 'Bounces': result_data[0][1], - 'Average time on site': result_data[0][2], - 'New visits': result_data[0][3], - 'Total visits': result_data[0][4], + 'Average time on site': result_data[0][1], + 'New visits': result_data[0][2], + 'Total visits': result_data[0][3], } ga_model.update_sitewide_stats(period_name, "Totals", data) + + # Bounces from /data. This url is specified in configuration because + # for DGU we don't want /. + path = config.get('ga-report.bounce_url','/') + print path + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + filters='ga:pagePath=~%s$' % (path,), + start_date=start_date, + metrics='ga:bounces,ga:uniquePageviews', + dimensions='ga:pagePath', + max_results=10000, + end_date=end_date).execute() + result_data = results.get('rows') + for results in result_data: + if results[0] == path: + bounce, total = [float(x) for x in results[1:]] + pct = 100 * bounce/total + print "%d bounces from %d total == %s" % (bounce, total, pct) + ga_model.update_sitewide_stats(period_name, "Totals", {'Bounces': pct}) def _locale_stats(self, start_date, end_date, period_name): --- a/ckanext/ga_report/helpers.py +++ b/ckanext/ga_report/helpers.py @@ -3,15 +3,73 @@ import ckan.lib.base as base import ckan.model as model +from ckanext.ga_report.ga_model import GA_Url, GA_Publisher +from ckanext.ga_report.controller import _get_publishers _log = logging.getLogger(__name__) +def popular_datasets(count=10): + import random + + publisher = None + publishers = _get_publishers(30) + total = len(publishers) + while not publisher or not datasets: + rand = random.randrange(0, total) + publisher = publishers[rand][0] + if not publisher.state == 'active': + publisher = None + continue + datasets = _datasets_for_publisher(publisher, 10)[:count] + + ctx = { + 'datasets': datasets, + 'publisher': publisher + } + return base.render_snippet('ga_report/ga_popular_datasets.html', **ctx) + +def single_popular_dataset(top=20): + import random + + datasets = {} + rand = random.randrange(0, top) + entry = model.Session.query(GA_Url).\ + filter(GA_Url.url.like('/dataset/%')).\ + order_by('ga_url.pageviews::int desc')[rand] + + + dataset = None + while not dataset: + dataset = model.Package.get(entry.url[len('/dataset/'):]) + if dataset and not dataset.state == 'active': + dataset = None + else: + publisher = model.Group.get(entry.department_id) + + ctx = { + 'dataset': dataset, + 'publisher': publisher + } + return base.render_snippet('ga_report/ga_popular_single.html', **ctx) + + def most_popular_datasets(publisher, count=20): - from ckanext.ga_report.ga_model import GA_Url if not publisher: _log.error("No valid publisher passed to 'most_popular_datasets'") return "" + results = _datasets_for_publisher(publisher, count) + + ctx = { + 'dataset_count': len(datasets), + 'datasets': results, + + 'publisher': publisher + } + + return base.render_snippet('ga_report/publisher/popular.html', **ctx) + +def _datasets_for_publisher(publisher, count): datasets = {} entries = model.Session.query(GA_Url).\ filter(GA_Url.department_id==publisher.name).\ @@ -29,14 +87,5 @@ for k, v in datasets.iteritems(): results.append((k,v['views'],v['visits'])) - results = sorted(results, key=operator.itemgetter(1), reverse=True) + return sorted(results, key=operator.itemgetter(1), reverse=True) - ctx = { - 'dataset_count': len(datasets), - 'datasets': results, - - 'publisher': publisher - } - - return base.render_snippet('ga_report/publisher/popular.html', **ctx) - --- a/ckanext/ga_report/plugin.py +++ b/ckanext/ga_report/plugin.py @@ -2,6 +2,10 @@ import ckan.lib.helpers as h import ckan.plugins as p from ckan.plugins import implements, toolkit + +from ckanext.ga_report.helpers import (most_popular_datasets, + popular_datasets, + single_popular_dataset) log = logging.getLogger('ckanext.ga-report') @@ -19,10 +23,11 @@ A dictionary of extra helpers that will be available to provide ga report info to templates. """ - from ckanext.ga_report.helpers import most_popular_datasets return { 'ga_report_installed': lambda: True, + 'popular_datasets': popular_datasets, 'most_popular_datasets': most_popular_datasets, + 'single_popular_dataset': single_popular_dataset } def after_map(self, map): --- a/ckanext/ga_report/report_model.py +++ /dev/null --- /dev/null +++ b/ckanext/ga_report/templates/ga_report/ga_popular_datasets.html @@ -1,1 +1,27 @@ + + + + + + + + --- /dev/null +++ b/ckanext/ga_report/templates/ga_report/ga_popular_single.html @@ -1,1 +1,23 @@ + + + + + + + + + + --- a/ckanext/ga_report/templates/ga_report/ga_util.html +++ b/ckanext/ga_report/templates/ga_report/ga_util.html @@ -5,15 +5,6 @@ xmlns:xi="http://www.w3.org/2001/XInclude" py:strip="" > - - - - - - - -
${title}
- @@ -49,12 +40,12 @@
--- a/ckanext/ga_report/templates/ga_report/publisher/index.html +++ b/ckanext/ga_report/templates/ga_report/publisher/index.html @@ -27,6 +27,7 @@
+

Site Usage

${usage_nav('Publishers', None)}