merge
--- a/README.rst
+++ b/README.rst
@@ -33,6 +33,10 @@
googleanalytics.id = UA-1010101-1
googleanalytics.account = Account name (e.g. data.gov.uk, see top level item at https://www.google.com/analytics)
ga-report.period = monthly
+ ga-report.bounce_url = /data
+
+ The ga-report.bounce_url specifies the path to use when calculating bounces. For DGU this is /data
+ but you may want to set this to /.
3. Set up this extension's database tables using a paster command. (Ensure your CKAN pyenv is still activated, run the command from ``src/ckanext-ga-report``, alter the ``--config`` option to point to your site config file)::
--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -70,15 +70,15 @@
entries = q.order_by('ga_stat.key').all()
def clean_key(key, val):
- if key in ['Average time on site', 'Pages per visit', 'New visits']:
+ if key in ['Average time on site', 'Pages per visit', 'New visits', 'Bounces']:
val = "%.2f" % round(float(val), 2)
if key == 'Average time on site':
mins, secs = divmod(float(val), 60)
hours, mins = divmod(mins, 60)
val = '%02d:%02d:%02d (%s seconds) ' % (hours, mins, secs, val)
- if key == 'New visits':
+ if key in ['New visits','Bounces']:
val = "%s%%" % val
- if key in ['Bounces', 'Total page views', 'Total visits']:
+ if key in ['Total page views', 'Total visits']:
val = int(val)
return key, val
@@ -93,11 +93,12 @@
for e in entries:
d[e.key].append(float(e.value))
for k, v in d.iteritems():
- if k in ['Bounces', 'Total page views', 'Total visits']:
+ if k in ['Total page views', 'Total visits']:
v = sum(v)
else:
v = float(sum(v))/len(v)
key, val = clean_key(k,v)
+
c.global_totals.append((key, val))
c.global_totals = sorted(c.global_totals, key=operator.itemgetter(0))
@@ -134,29 +135,7 @@
c.social_referrer_totals.append((shorten_name(entry[0]), fill_out_url(entry[0]),'',
entry[1]))
-
- browser_version_re = re.compile("(.*)\((.*)\)")
for k, v in keys.iteritems():
-
- def clean_field(key):
- if k != 'Browser versions':
- return key
- m = browser_version_re.match(key)
- browser = m.groups()[0].strip()
- ver = m.groups()[1]
- parts = ver.split('.')
- if len(parts) > 1:
- if parts[1][0] == '0':
- ver = parts[0]
- else:
- ver = "%s.%s" % (parts[0],parts[1])
- if browser in ['Safari','Android Browser']: # Special case complex version nums
- ver = parts[0]
- if len(ver) > 2:
- ver = "%s%sX" % (ver[0], ver[1])
-
- return "%s (%s)" % (browser, ver,)
-
q = model.Session.query(GA_Stat).\
filter(GA_Stat.stat_name==k)
if c.month:
@@ -172,17 +151,13 @@
entries.append((key,val,))
entries = sorted(entries, key=operator.itemgetter(1), reverse=True)
- def percent(num, total):
- p = 100 * float(num)/float(total)
- return "%.2f%%" % round(p, 2)
-
# Get the total for each set of values and then set the value as
# a percentage of the total
if k == 'Social sources':
total = sum([x for n,x in c.global_totals if n == 'Total visits'])
else:
total = sum([num for _,num in entries])
- setattr(c, v, [(k,percent(v,total)) for k,v in entries ])
+ setattr(c, v, [(k,_percent(v,total)) for k,v in entries ])
return render('ga_report/site/index.html')
@@ -393,3 +368,7 @@
publishers.append((pub.name, pub.title))
return publishers
+def _percent(num, total):
+ p = 100 * float(num)/float(total)
+ return "%.2f%%" % round(p, 2)
+
--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -100,6 +100,7 @@
self.get_full_period_name(period_name, period_complete_day),
start_date.strftime('%Y %m %d'),
end_date.strftime('%Y %m %d'))
+
data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+')
log.info('Storing Dataset Analytics for period "%s"',
self.get_full_period_name(period_name, period_complete_day))
@@ -161,11 +162,6 @@
max_results=10000,
end_date=end_date).execute()
- if os.getenv('DEBUG'):
- import pprint
- pprint.pprint(results)
- print 'Total results: %s' % results.get('totalResults')
-
packages = []
for entry in results.get('rows'):
(loc,pageviews,visits) = entry
@@ -213,18 +209,37 @@
results = self.service.data().ga().get(
ids='ga:' + self.profile_id,
start_date=start_date,
- metrics='ga:pageviewsPerVisit,ga:bounces,ga:avgTimeOnSite,ga:percentNewVisits,ga:visitors',
+ metrics='ga:pageviewsPerVisit,ga:avgTimeOnSite,ga:percentNewVisits,ga:visitors',
max_results=10000,
end_date=end_date).execute()
result_data = results.get('rows')
data = {
'Pages per visit': result_data[0][0],
- 'Bounces': result_data[0][1],
- 'Average time on site': result_data[0][2],
- 'New visits': result_data[0][3],
- 'Total visits': result_data[0][4],
+ 'Average time on site': result_data[0][1],
+ 'New visits': result_data[0][2],
+ 'Total visits': result_data[0][3],
}
ga_model.update_sitewide_stats(period_name, "Totals", data)
+
+ # Bounces from /data. This url is specified in configuration because
+ # for DGU we don't want /.
+ path = config.get('ga-report.bounce_url','/')
+ print path
+ results = self.service.data().ga().get(
+ ids='ga:' + self.profile_id,
+ filters='ga:pagePath=~%s$' % (path,),
+ start_date=start_date,
+ metrics='ga:bounces,ga:uniquePageviews',
+ dimensions='ga:pagePath',
+ max_results=10000,
+ end_date=end_date).execute()
+ result_data = results.get('rows')
+ for results in result_data:
+ if results[0] == path:
+ bounce, total = [float(x) for x in results[1:]]
+ pct = 100 * bounce/total
+ print "%d bounces from %d total == %s" % (bounce, total, pct)
+ ga_model.update_sitewide_stats(period_name, "Totals", {'Bounces': pct})
def _locale_stats(self, start_date, end_date, period_name):