Changes to support % of bounces from /
--- a/README.rst
+++ b/README.rst
@@ -33,6 +33,10 @@
googleanalytics.id = UA-1010101-1
googleanalytics.account = Account name (i.e. data.gov.uk, see top level item at https://www.google.com/analytics)
ga-report.period = monthly
+ ga-report.bounce_url = /data
+
+ The ga-report.bounce_url specifies the path to use when calculating bounces. For DGU this is /data
+ but you may want to set this to /.
Note that your credentials will be readable by system administrators on your server. Rather than use sensitive account details, it is suggested you give access to the GA account to a new Google account that you create just for this purpose.
--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -70,15 +70,15 @@
entries = q.order_by('ga_stat.key').all()
def clean_key(key, val):
- if key in ['Average time on site', 'Pages per visit', 'New visits']:
+ if key in ['Average time on site', 'Pages per visit', 'New visits', 'Bounces']:
val = "%.2f" % round(float(val), 2)
if key == 'Average time on site':
mins, secs = divmod(float(val), 60)
hours, mins = divmod(mins, 60)
val = '%02d:%02d:%02d (%s seconds) ' % (hours, mins, secs, val)
- if key == 'New visits':
+ if key in ['New visits','Bounces']:
val = "%s%%" % val
- if key in ['Bounces', 'Total page views', 'Total visits']:
+ if key in ['Total page views', 'Total visits']:
val = int(val)
return key, val
@@ -93,11 +93,12 @@
for e in entries:
d[e.key].append(float(e.value))
for k, v in d.iteritems():
- if k in ['Bounces', 'Total page views', 'Total visits']:
+ if k in ['Total page views', 'Total visits']:
v = sum(v)
else:
v = float(sum(v))/len(v)
key, val = clean_key(k,v)
+
c.global_totals.append((key, val))
c.global_totals = sorted(c.global_totals, key=operator.itemgetter(0))
@@ -172,17 +173,13 @@
entries.append((key,val,))
entries = sorted(entries, key=operator.itemgetter(1), reverse=True)
- def percent(num, total):
- p = 100 * float(num)/float(total)
- return "%.2f%%" % round(p, 2)
-
# Get the total for each set of values and then set the value as
# a percentage of the total
if k == 'Social sources':
total = sum([x for n,x in c.global_totals if n == 'Total visits'])
else:
total = sum([num for _,num in entries])
- setattr(c, v, [(k,percent(v,total)) for k,v in entries ])
+ setattr(c, v, [(k,_percent(v,total)) for k,v in entries ])
return render('ga_report/site/index.html')
@@ -201,7 +198,7 @@
writer = csv.writer(response)
writer.writerow(["Publisher", "Views", "Visits", "Period Name"])
- for publisher,view,visit in self._get_publishers(None):
+ for publisher,view,visit in _get_publishers(None):
writer.writerow([publisher.title.encode('utf-8'),
view,
visit,
@@ -244,38 +241,10 @@
if c.month:
c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
- c.top_publishers = self._get_publishers()
+ c.top_publishers = _get_publishers()
return render('ga_report/publisher/index.html')
- def _get_publishers(self, limit=20):
- connection = model.Session.connection()
- q = """
- select department_id, sum(pageviews::int) views, sum(visitors::int) visits
- from ga_url
- where department_id <> ''"""
- if c.month:
- q = q + """
- and period_name=%s
- """
- q = q + """
- group by department_id order by views desc
- """
- if limit:
- q = q + " limit %s;" % (limit)
-
- # Add this back (before and period_name =%s) if you want to ignore publisher
- # homepage views
- # and not url like '/publisher/%%'
-
- top_publishers = []
- res = connection.execute(q, c.month)
-
- for row in res:
- g = model.Group.get(row[0])
- if g:
- top_publishers.append((g, row[1], row[2]))
- return top_publishers
def _get_packages(self, publisher, count=-1):
if count == -1:
@@ -345,3 +314,37 @@
return render('ga_report/publisher/read.html')
+def _get_publishers(limit=20):
+ connection = model.Session.connection()
+ q = """
+ select department_id, sum(pageviews::int) views, sum(visitors::int) visits
+ from ga_url
+ where department_id <> ''"""
+ if c.month:
+ q = q + """
+ and period_name=%s
+ """
+ q = q + """
+ group by department_id order by views desc
+ """
+ if limit:
+ q = q + " limit %s;" % (limit)
+
+ # Add this back (before and period_name =%s) if you want to ignore publisher
+ # homepage views
+ # and not url like '/publisher/%%'
+
+ top_publishers = []
+ res = connection.execute(q, c.month)
+
+ for row in res:
+ g = model.Group.get(row[0])
+ if g:
+ top_publishers.append((g, row[1], row[2]))
+ return top_publishers
+
+
+def _percent(num, total):
+ p = 100 * float(num)/float(total)
+ return "%.2f%%" % round(p, 2)
+
--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -94,6 +94,7 @@
self.get_full_period_name(period_name, period_complete_day),
start_date.strftime('%Y %m %d'),
end_date.strftime('%Y %m %d'))
+
data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+')
log.info('Storing Dataset Analytics for period "%s"',
self.get_full_period_name(period_name, period_complete_day))
@@ -155,11 +156,6 @@
max_results=10000,
end_date=end_date).execute()
- if os.getenv('DEBUG'):
- import pprint
- pprint.pprint(results)
- print 'Total results: %s' % results.get('totalResults')
-
packages = []
for entry in results.get('rows'):
(loc,pageviews,visits) = entry
@@ -207,18 +203,37 @@
results = self.service.data().ga().get(
ids='ga:' + self.profile_id,
start_date=start_date,
- metrics='ga:pageviewsPerVisit,ga:bounces,ga:avgTimeOnSite,ga:percentNewVisits,ga:visitors',
+ metrics='ga:pageviewsPerVisit,ga:avgTimeOnSite,ga:percentNewVisits,ga:visitors',
max_results=10000,
end_date=end_date).execute()
result_data = results.get('rows')
data = {
'Pages per visit': result_data[0][0],
- 'Bounces': result_data[0][1],
- 'Average time on site': result_data[0][2],
- 'New visits': result_data[0][3],
- 'Total visits': result_data[0][4],
+ 'Average time on site': result_data[0][1],
+ 'New visits': result_data[0][2],
+ 'Total visits': result_data[0][3],
}
ga_model.update_sitewide_stats(period_name, "Totals", data)
+
+ # Bounces from /data. This url is specified in configuration because
+ # for DGU we don't want /.
+ path = config.get('ga-report.bounce_url','/')
+ print path
+ results = self.service.data().ga().get(
+ ids='ga:' + self.profile_id,
+ filters='ga:pagePath=~%s$' % (path,),
+ start_date=start_date,
+ metrics='ga:bounces,ga:uniquePageviews',
+ dimensions='ga:pagePath',
+ max_results=10000,
+ end_date=end_date).execute()
+ result_data = results.get('rows')
+ for results in result_data:
+ if results[0] == path:
+ bounce, total = [float(x) for x in results[1:]]
+ pct = 100 * bounce/total
+ print "%d bounces from %d total == %s" % (bounce, total, pct)
+ ga_model.update_sitewide_stats(period_name, "Totals", {'Bounces': pct})
def _locale_stats(self, start_date, end_date, period_name):
--- a/ckanext/ga_report/helpers.py
+++ b/ckanext/ga_report/helpers.py
@@ -3,15 +3,73 @@
import ckan.lib.base as base
import ckan.model as model
+from ckanext.ga_report.ga_model import GA_Url, GA_Publisher
+from ckanext.ga_report.controller import _get_publishers
_log = logging.getLogger(__name__)
+def popular_datasets(count=10):
+ import random
+
+ publisher = None
+ publishers = _get_publishers(30)
+ total = len(publishers)
+ while not publisher or not datasets:
+ rand = random.randrange(0, total)
+ publisher = publishers[rand][0]
+ if not publisher.state == 'active':
+ publisher = None
+ continue
+ datasets = _datasets_for_publisher(publisher, 10)[:count]
+
+ ctx = {
+ 'datasets': datasets,
+ 'publisher': publisher
+ }
+ return base.render_snippet('ga_report/ga_popular_datasets.html', **ctx)
+
+def single_popular_dataset(top=20):
+ import random
+
+ datasets = {}
+ rand = random.randrange(0, top)
+ entry = model.Session.query(GA_Url).\
+ filter(GA_Url.url.like('/dataset/%')).\
+ order_by('ga_url.pageviews::int desc')[rand]
+
+
+ dataset = None
+ while not dataset:
+ dataset = model.Package.get(entry.url[len('/dataset/'):])
+ if dataset and not dataset.state == 'active':
+ dataset = None
+ else:
+ publisher = model.Group.get(entry.department_id)
+
+ ctx = {
+ 'dataset': dataset,
+ 'publisher': publisher
+ }
+ return base.render_snippet('ga_report/ga_popular_single.html', **ctx)
+
+
def most_popular_datasets(publisher, count=20):
- from ckanext.ga_report.ga_model import GA_Url
if not publisher:
_log.error("No valid publisher passed to 'most_popular_datasets'")
return ""
+ results = _datasets_for_publisher(publisher, count)
+
+ ctx = {
+ 'dataset_count': len(datasets),
+ 'datasets': results,
+
+ 'publisher': publisher
+ }
+
+ return base.render_snippet('ga_report/publisher/popular.html', **ctx)
+
+def _datasets_for_publisher(publisher, count):
datasets = {}
entries = model.Session.query(GA_Url).\
filter(GA_Url.department_id==publisher.name).\
@@ -29,14 +87,5 @@
for k, v in datasets.iteritems():
results.append((k,v['views'],v['visits']))
- results = sorted(results, key=operator.itemgetter(1), reverse=True)
+ return sorted(results, key=operator.itemgetter(1), reverse=True)
- ctx = {
- 'dataset_count': len(datasets),
- 'datasets': results,
-
- 'publisher': publisher
- }
-
- return base.render_snippet('ga_report/publisher/popular.html', **ctx)
-
--- a/ckanext/ga_report/plugin.py
+++ b/ckanext/ga_report/plugin.py
@@ -2,6 +2,10 @@
import ckan.lib.helpers as h
import ckan.plugins as p
from ckan.plugins import implements, toolkit
+
+from ckanext.ga_report.helpers import (most_popular_datasets,
+ popular_datasets,
+ single_popular_dataset)
log = logging.getLogger('ckanext.ga-report')
@@ -19,10 +23,11 @@
A dictionary of extra helpers that will be available to provide
ga report info to templates.
"""
- from ckanext.ga_report.helpers import most_popular_datasets
return {
'ga_report_installed': lambda: True,
+ 'popular_datasets': popular_datasets,
'most_popular_datasets': most_popular_datasets,
+ 'single_popular_dataset': single_popular_dataset
}
def after_map(self, map):
--- a/ckanext/ga_report/report_model.py
+++ /dev/null
--- /dev/null
+++ b/ckanext/ga_report/templates/ga_report/ga_popular_datasets.html
@@ -1,1 +1,27 @@
+<html xmlns:py="http://genshi.edgewall.org/"
+ xmlns:i18n="http://genshi.edgewall.org/i18n"
+ xmlns:xi="http://www.w3.org/2001/XInclude"
+ py:strip="">
+<div class="popular_datasets">
+ <div class="pull-right">
+ <a href="${h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport',action='index')}" class="btn btn-primary">More popular datasets »</a>
+ </div>
+ <h2>Popular datasets</h2>
+ <h4>${publisher.title}</h4>
+ <ul>
+ <py:for each="dataset, _, _ in datasets">
+ <li>
+ <span>${h.link_to(dataset.title, h.url_for(controller='package', action='read', id=dataset.name))}</span>
+ <div>${h.truncate(dataset.notes, length=80, whole_word=True)}</div>
+ </li>
+ </py:for>
+ </ul>
+
+ </div>
+
+</html>
+
+
+
+
--- /dev/null
+++ b/ckanext/ga_report/templates/ga_report/ga_popular_single.html
@@ -1,1 +1,23 @@
+<html xmlns:py="http://genshi.edgewall.org/"
+ xmlns:i18n="http://genshi.edgewall.org/i18n"
+ xmlns:xi="http://www.w3.org/2001/XInclude"
+ py:strip="">
+
+<div class="popular_datasets">
+ <h2>Featured dataset</h2>
+
+ <h3>${h.link_to(dataset.title, h.url_for(controller='package', action='read', id=dataset.name))}</h3> <div>${h.truncate(dataset.notes, length=200, whole_word=True)}</div>
+ <p></p>
+ <div>
+<a href="${h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport',action='index')}" class="btn btn-primary">More popular datasets</a>
+<a href="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='index')}" class="btn btn-primary">All usage data</a>
+ </div>
+ </div>
+
+
+</html>
+
+
+
+
--- a/ckanext/ga_report/templates/ga_report/ga_util.html
+++ b/ckanext/ga_report/templates/ga_report/ga_util.html
@@ -5,15 +5,6 @@
xmlns:xi="http://www.w3.org/2001/XInclude"
py:strip=""
>
-
- <table py:def="publisher_list(groups)" class="groups">
- <py:for each="group,title in groups">
- <tr>
- <td><a href="/publisher/${group.name}">${title}</a></td>
- </tr>
- </py:for>
- </table>
-
<table py:def="social_table(items, with_source=False)" class="table table-condensed table-bordered table-striped">
<tr>
@@ -49,12 +40,12 @@
<div id="minornavigation-bg-left">
<div id="minornavigation-bg-right">
<ul class="nav nav-pills">
- <li py:attrs="{'class': 'active' if active_name=='Site-wide' else None}"><a py:attrs="{'class': 'active' if active_name=='Site-wide' else None}" href="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='index')}"><img src="/images/icons/page_white_gear.png" height="16px" width="16px" alt="None" class="inline-icon "/> Site-wide</a></li>
+ <li py:attrs="{'class': 'active' if active_name=='Site-wide' else None}"><a py:attrs="{'class': 'active' if active_name=='Site-wide' else None}" href="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='index')}"><img src="/images/icons/page_white.png" height="16px" width="16px" alt="None" class="inline-icon "/> Site-wide</a></li>
<li py:attrs="{'class': 'active' if active_name=='Publishers' else None}">
- <a py:attrs="{'class': 'active' if active_name=='Publishers' else None}" href="${h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport',action='index')}"><img src="/images/icons/page_white_gear.png" height="16px" width="16px" alt="None" class="inline-icon "/> Publishers</a>
+ <a py:attrs="{'class': 'active' if active_name=='Publishers' else None}" href="${h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport',action='index')}"><img src="/images/icons/page_white.png" height="16px" width="16px" alt="None" class="inline-icon "/> Publishers</a>
</li>
<li py:if="publisher" class="active">
- <a class="active" href="${h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport',action='read', id=publisher.name)}"><img src="/images/icons/page_white_gear.png" height="16px" width="16px" alt="None" class="inline-icon "/>${publisher.title}</a>
+ <a class="active" href="${h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport',action='read', id=publisher.name)}"><img src="/images/icons/page_white.png" height="16px" width="16px" alt="None" class="inline-icon "/>${publisher.title}</a>
</li>
</ul>
--- a/ckanext/ga_report/templates/ga_report/publisher/index.html
+++ b/ckanext/ga_report/templates/ga_report/publisher/index.html
@@ -27,6 +27,7 @@
<div py:match="content">
+
<h1>Site Usage</h1>
${usage_nav('Publishers', None)}
--- a/ckanext/ga_report/templates/ga_report/publisher/read.html
+++ b/ckanext/ga_report/templates/ga_report/publisher/read.html
@@ -11,7 +11,7 @@
<li class="widget-container boxed widget_text">
<h4>Publishers</h4>
<p>
- Dataset views records the number of times a specific dataset page has been viewed. Visits records the number of unique site visits.
+ Dataset views records the number of times a specific dataset page has been viewed for <a href="${h.url_for(controller='ckanext.dgu.controllers.publisher:PublisherController',action='read',id=c.publisher.name)}">${c.publisher.title}</a>. Visits records the number of unique site visits.
</p>
<p>
Note: this data does not include API calls.