Update to handle missing downloads data
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
*.py[co]
*.py~
.gitignore
+ckan.log
# Packages
*.egg
--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -52,7 +52,7 @@
def csv(self, month):
import csv
- q = model.Session.query(GA_Stat)
+ q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name!='Downloads')
if month != 'all':
q = q.filter(GA_Stat.period_name==month)
entries = q.order_by('GA_Stat.period_name, GA_Stat.stat_name, GA_Stat.key').all()
@@ -68,6 +68,26 @@
entry.stat_name.encode('utf-8'),
entry.key.encode('utf-8'),
entry.value.encode('utf-8')])
+
+ def csv_downloads(self, month):
+ import csv
+
+ q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name=='Downloads')
+ if month != 'all':
+ q = q.filter(GA_Stat.period_name==month)
+ entries = q.order_by('GA_Stat.period_name, GA_Stat.key').all()
+
+ response.headers['Content-Type'] = "text/csv; charset=utf-8"
+ response.headers['Content-Disposition'] = str('attachment; filename=downloads_%s.csv' % (month,))
+
+ writer = csv.writer(response)
+ writer.writerow(["Period", "Resource URL", "Count"])
+
+ for entry in entries:
+ writer.writerow([entry.period_name.encode('utf-8'),
+ entry.key.encode('utf-8'),
+ entry.value.encode('utf-8')])
+
def index(self):
@@ -114,7 +134,7 @@
if k in ['Total page views', 'Total visits']:
v = sum(v)
else:
- v = float(sum(v))/len(v)
+ v = float(sum(v))/float(len(v))
key, val = clean_key(k,v)
c.global_totals.append((key, val))
@@ -178,6 +198,37 @@
setattr(c, v, [(k,_percent(v,total)) for k,v in entries ])
return render('ga_report/site/index.html')
+
+ def downloads(self):
+
+ # Get the month details by fetching distinct values and determining the
+ # month names from the values.
+ c.months, c.day = _month_details(GA_Stat)
+
+ # Work out which month to show, based on query params of the first item
+ c.month_desc = 'all months'
+ c.month = request.params.get('month', '')
+ if c.month:
+ c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
+
+ c.downloads = []
+ q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name=='Downloads')
+ q = q.filter(GA_Stat.period_name==c.month) if c.month else q
+ q = q.order_by("ga_stat.value::int desc")
+
+ data = collections.defaultdict(int)
+ for entry in q.all():
+ r = model.Session.query(model.Resource).filter(model.Resource.url==entry.key).first()
+ if not r:
+ continue
+ data[r] += int(entry.value)
+
+ for k,v in data.iteritems():
+ c.downloads.append((k,v))
+
+ c.downloads = sorted(c.downloads, key=operator.itemgetter(1), reverse=True)
+
+ return render('ga_report/site/downloads.html')
class GaDatasetReport(BaseController):
--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -13,6 +13,7 @@
FORMAT_MONTH = '%Y-%m'
MIN_VIEWS = 50
MIN_VISITS = 20
+MIN_DOWNLOADS = 10
class DownloadAnalytics(object):
'''Downloads and stores analytics info'''
@@ -203,7 +204,7 @@
start_date = '%s-01' % period_name
end_date = '%s-%s' % (period_name, last_day_of_month)
funcs = ['_totals_stats', '_social_stats', '_os_stats',
- '_locale_stats', '_browser_stats', '_mobile_stats']
+ '_locale_stats', '_browser_stats', '_mobile_stats', '_download_stats']
for f in funcs:
log.info('Downloading analytics for %s' % f.split('_')[1])
getattr(self, f)(start_date, end_date, period_name, period_complete_day)
@@ -250,7 +251,7 @@
ids='ga:' + self.profile_id,
filters='ga:pagePath==%s' % (path,),
start_date=start_date,
- metrics='ga:bounces,ga:pageviews',
+ metrics='ga:visitBounceRate',
dimensions='ga:pagePath',
max_results=10000,
end_date=end_date).execute()
@@ -260,10 +261,10 @@
path, result_data)
return
results = result_data[0]
- bounces, total = [float(x) for x in result_data[0][1:]]
- pct = 100 * bounces/total
- log.info('%d bounces from %d total == %s', bounces, total, pct)
- ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': pct},
+ bounces = float(results[1])
+ # visitBounceRate is already a %
+ log.info('Google reports visitBounceRate as %s', bounces)
+ ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': float(bounces)},
period_complete_day)
@@ -290,6 +291,31 @@
self._filter_out_long_tail(data, MIN_VIEWS)
ga_model.update_sitewide_stats(period_name, "Country", data, period_complete_day)
+
+ def _download_stats(self, start_date, end_date, period_name, period_complete_day):
+ """ Fetches stats about language and country """
+ results = self.service.data().ga().get(
+ ids='ga:' + self.profile_id,
+ start_date=start_date,
+ filters='ga:eventAction==download',
+ metrics='ga:totalEvents',
+ sort='-ga:totalEvents',
+ dimensions="ga:eventLabel",
+ max_results=10000,
+ end_date=end_date).execute()
+ result_data = results.get('rows')
+ if not result_data:
+ # We may not have data for this time period, so we need to bail
+ # early.
+ log.info("There is no downloads data for this time period")
+ return
+
+ # [[url, count], [url],count]
+ data = {}
+ for result in result_data:
+ data[result[0]] = data.get(result[0], 0) + int(result[1])
+ self._filter_out_long_tail(data, MIN_DOWNLOADS)
+ ga_model.update_sitewide_stats(period_name, "Downloads", data, period_complete_day)
def _social_stats(self, start_date, end_date, period_name, period_complete_day):
""" Finds out which social sites people are referred from """
--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -168,8 +168,12 @@
count = model.Session.query(GA_Url).\
filter(GA_Url.period_name == 'All').count()
log.debug("Deleting %d 'All' records" % count)
- model.Session.query(GA_Url).\
+ count = model.Session.query(GA_Url).\
filter(GA_Url.period_name == 'All').delete()
+ log.debug("Deleted %d 'All' records" % count)
+
+ model.Session.flush()
+ model.Session.commit()
model.repo.commit_and_remove()
--- a/ckanext/ga_report/plugin.py
+++ b/ckanext/ga_report/plugin.py
@@ -42,6 +42,16 @@
controller='ckanext.ga_report.controller:GaReport',
action='csv'
)
+ map.connect(
+ '/data/site-usage/downloads',
+ controller='ckanext.ga_report.controller:GaReport',
+ action='downloads'
+ )
+ map.connect(
+ '/data/site-usage/downloads_{month}.csv',
+ controller='ckanext.ga_report.controller:GaReport',
+ action='csv_downloads'
+ )
# GaDatasetReport
map.connect(
--- a/ckanext/ga_report/templates/ga_report/ga_util.html
+++ b/ckanext/ga_report/templates/ga_report/ga_util.html
@@ -44,6 +44,23 @@
</table>
+<table py:def="downloads_table(items)" class="table table-condensed table-bordered table-striped">
+ <tr>
+ <th>Name</th>
+ <th>Downloads</th>
+ </tr>
+ <py:for each="resource, value in items">
+ <tr>
+ <td>
+ <strong>${resource.resource_group.package.title}</strong><br/>
+ ${h.link_to((resource.name or resource.description).strip() or "No name", h.url_for(controller='package', action='resource_read', id=resource.resource_group.package.name, resource_id=resource.id))}<br/>
+ </td>
+ <td>${value}</td>
+ </tr>
+ </py:for>
+ </table>
+
+
<div py:def="usage_nav(active_name)" id="minornavigation">
<div id="minornavigation-bg-left">
<div id="minornavigation-bg-right">
@@ -55,6 +72,9 @@
<li py:attrs="{'class': 'active' if active_name=='Datasets' else None}">
<a py:attrs="{'class': 'active' if active_name=='Datasets' else None}" href="${h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='read')}"><img src="/images/icons/page_white.png" height="16px" width="16px" alt="None" class="inline-icon "/> Datasets</a>
</li>
+ <li py:attrs="{'class': 'active' if active_name=='Downloads' else None}">
+ <a py:attrs="{'class': 'active' if active_name=='Downloads' else None}" href="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='downloads')}"><img src="/images/icons/page_white.png" height="16px" width="16px" alt="None" class="inline-icon "/> Downloads</a>
+ </li>
</ul>
</div>
</div>
--- /dev/null
+++ b/ckanext/ga_report/templates/ga_report/site/downloads.html
@@ -1,1 +1,59 @@
+<html xmlns:py="http://genshi.edgewall.org/"
+ xmlns:i18n="http://genshi.edgewall.org/i18n"
+ xmlns:xi="http://www.w3.org/2001/XInclude"
+ py:strip="">
+ <xi:include href="../ga_util.html" />
+
+ <py:def function="page_title">Downloads</py:def>
+
+ <py:match path="primarysidebar">
+ <li py:if="c.downloads" class="widget-container boxed widget_text">
+ <h4>Download</h4>
+ <p><center>
+ <a class="btn button btn-primary" href="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='csv_downloads',month=c.month or 'all')}">Download as CSV</a></center>
+ </p>
+ </li>
+ <xi:include href="../notes.html" />
+
+ </py:match>
+
+ <div py:match="content">
+ <h1>Downloads</h1>
+ ${usage_nav('Downloads')}
+
+ <form class="form-inline" action="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='downloads')}" method="get">
+ <div class="controls">
+
+ ${month_selector(c.month, c.months, c.day)}
+
+ <input class="btn button btn-primary" type='submit' value="Update"/>
+ </div>
+ </form>
+
+ <py:if test="c.downloads">
+ ${downloads_table(c.downloads)}
+ </py:if>
+ <py:if test="not c.downloads">
+ <h4>No data</h4>
+ <p>There is no download data available for this month</p>
+ </py:if>
+ </div>
+
+ <xi:include href="../../layout.html" />
+
+ <py:def function="optional_footer">
+ <script type='text/javascript'>
+ $('.dropdown-toggle').dropdown();
+ $('.nav-tabs li a').click(function (e) {
+ e.preventDefault();
+ $(this).tab('show');
+ })
+ alert(window.location.hash);
+ </script>
+ </py:def>
+</html>
+
+
+
+