Only show the months for Downloads that we have
Only show the months for Downloads that we have

file:a/.gitignore -> file:b/.gitignore
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 *.py[co]
 *.py~
 .gitignore
+ckan.log
 
 # Packages
 *.egg

file:a/README.rst -> file:b/README.rst
--- a/README.rst
+++ b/README.rst
@@ -32,6 +32,7 @@
 
       googleanalytics.id = UA-1010101-1
       googleanalytics.account = Account name (e.g. data.gov.uk, see top level item at https://www.google.com/analytics)
+      googleanalytics.token.filepath = ~/pyenv/token.dat
       ga-report.period = monthly
       ga-report.bounce_url = /
 
@@ -82,13 +83,17 @@
 
     $ paster getauthtoken --config=../ckan/development.ini
 
+Now ensure you reference the correct path to your token.dat in your CKAN config file (e.g. development.ini)::
+
+    googleanalytics.token.filepath = ~/pyenv/token.dat
+
 
 Tutorial
 --------
 
 Download some GA data and store it in CKAN's database. (Ensure your CKAN pyenv is still activated, run the command from ``src/ckanext-ga-report``, alter the ``--config`` option to point to your site config file) and specifying the name of your auth file (token.dat by default) from the previous step::
 
-    $ paster loadanalytics token.dat latest --config=../ckan/development.ini
+    $ paster loadanalytics latest --config=../ckan/development.ini
 
 The value after the token file is how much data you want to retrieve, this can be
 

--- a/ckanext/ga_report/command.py
+++ b/ckanext/ga_report/command.py
@@ -1,5 +1,8 @@
 import logging
 import datetime
+import os
+
+from pylons import config
 
 from ckan.lib.cli import CkanCommand
 # No other CKAN imports allowed until _load_config is run,
@@ -58,20 +61,17 @@
     """Get data from Google Analytics API and save it
     in the ga_model
 
-    Usage: paster loadanalytics <tokenfile> <time-period>
+    Usage: paster loadanalytics <time-period>
 
-    Where <tokenfile> is the name of the auth token file from
-    the getauthtoken step.
-
-    And where <time-period> is:
+    Where <time-period> is:
         all         - data for all time
         latest      - (default) just the 'latest' data
         YYYY-MM     - just data for the specific month
     """
     summary = __doc__.split('\n')[0]
     usage = __doc__
-    max_args = 2
-    min_args = 1
+    max_args = 1
+    min_args = 0
 
     def __init__(self, name):
         super(LoadAnalytics, self).__init__(name)
@@ -92,19 +92,25 @@
         from download_analytics import DownloadAnalytics
         from ga_auth import (init_service, get_profile_id)
 
+        ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', ''))
+        if not ga_token_filepath:
+            print 'ERROR: In the CKAN config you need to specify the filepath of the ' \
+                  'Google Analytics token file under key: googleanalytics.token.filepath'
+            return
+
         try:
-            svc = init_service(self.args[0], None)
+            svc = init_service(ga_token_filepath, None)
         except TypeError:
             print ('Have you correctly run the getauthtoken task and '
-                   'specified the correct token file?')
+                   'specified the correct token file in the CKAN config under '
+                   '"googleanalytics.token.filepath"?')
             return
 
         downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc),
                                        delete_first=self.options.delete_first,
                                        skip_url_stats=self.options.skip_url_stats)
 
-        time_period = self.args[1] if self.args and len(self.args) > 1 \
-            else 'latest'
+        time_period = self.args[0] if self.args else 'latest'
         if time_period == 'all':
             downloader.all_()
         elif time_period == 'latest':

--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -21,13 +21,33 @@
     return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year)
 
 
-def _month_details(cls):
-    '''Returns a list of all the month names'''
+def _month_details(cls, stat_key=None):
+    '''
+    Returns a list of all the periods for which we have data, unfortunately
+    knows too much about the type of the cls being passed as GA_Url has a
+    more complex query
+
+    This may need extending if we add a period_name to the stats
+    '''
     months = []
-    vals = model.Session.query(cls.period_name).filter(cls.period_name!='All').distinct().all()
+    day = None
+
+    q = model.Session.query(cls.period_name,cls.period_complete_day)\
+        .filter(cls.period_name!='All').distinct(cls.period_name)
+    if stat_key:
+        q=  q.filter(cls.stat_name==stat_key)
+
+    vals = q.order_by("period_name desc").all()
+    if vals and vals[0][1]:
+        day = int(vals[0][1])
+        ordinal = 'th' if 11 <= day <= 13 \
+            else {1:'st',2:'nd',3:'rd'}.get(day % 10, 'th')
+        day = "{day}{ordinal}".format(day=day, ordinal=ordinal)
+
     for m in vals:
         months.append( (m[0], _get_month_name(m[0])))
-    return sorted(months, key=operator.itemgetter(0), reverse=True)
+
+    return months, day
 
 
 class GaReport(BaseController):
@@ -35,7 +55,7 @@
     def csv(self, month):
         import csv
 
-        q = model.Session.query(GA_Stat)
+        q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name!='Downloads')
         if month != 'all':
             q = q.filter(GA_Stat.period_name==month)
         entries = q.order_by('GA_Stat.period_name, GA_Stat.stat_name, GA_Stat.key').all()
@@ -52,11 +72,31 @@
                              entry.key.encode('utf-8'),
                              entry.value.encode('utf-8')])
 
+    def csv_downloads(self, month):
+        import csv
+
+        q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name=='Downloads')
+        if month != 'all':
+            q = q.filter(GA_Stat.period_name==month)
+        entries = q.order_by('GA_Stat.period_name, GA_Stat.key').all()
+
+        response.headers['Content-Type'] = "text/csv; charset=utf-8"
+        response.headers['Content-Disposition'] = str('attachment; filename=downloads_%s.csv' % (month,))
+
+        writer = csv.writer(response)
+        writer.writerow(["Period", "Resource URL", "Count"])
+
+        for entry in entries:
+            writer.writerow([entry.period_name.encode('utf-8'),
+                             entry.key.encode('utf-8'),
+                             entry.value.encode('utf-8')])
+
+
     def index(self):
 
         # Get the month details by fetching distinct values and determining the
         # month names from the values.
-        c.months = _month_details(GA_Stat)
+        c.months, c.day = _month_details(GA_Stat)
 
         # Work out which month to show, based on query params of the first item
         c.month_desc = 'all months'
@@ -97,7 +137,7 @@
                 if k in ['Total page views', 'Total visits']:
                     v = sum(v)
                 else:
-                    v = float(sum(v))/len(v)
+                    v = float(sum(v))/float(len(v))
                 key, val = clean_key(k,v)
 
                 c.global_totals.append((key, val))
@@ -162,6 +202,35 @@
 
         return render('ga_report/site/index.html')
 
+    def downloads(self):
+
+        # Get the month details by fetching distinct values and determining the
+        # month names from the values.
+        c.months, c.day = _month_details(GA_Stat, "Downloads")
+
+        # Work out which month to show, based on query params of the first item
+        c.month_desc = 'all months'
+        c.month = request.params.get('month', '')
+        if c.month:
+            c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
+
+        c.downloads = []
+        q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name=='Downloads')
+        q = q.filter(GA_Stat.period_name==c.month) if c.month else q
+        q = q.order_by("ga_stat.value::int desc")
+
+        data = collections.defaultdict(int)
+        for entry in q.all():
+            r = model.Session.query(model.Resource).filter(model.Resource.url==entry.key).first()
+            if not r:
+                continue
+            data[r] += int(entry.value)
+
+        c.downloads = [(k,v,) for k,v in data.iteritems()]
+        c.downloads = sorted(c.downloads, key=operator.itemgetter(1), reverse=True)
+
+        return render('ga_report/site/downloads.html')
+
 
 class GaDatasetReport(BaseController):
     """
@@ -220,7 +289,7 @@
 
         # Get the month details by fetching distinct values and determining the
         # month names from the values.
-        c.months = _month_details(GA_Url)
+        c.months, c.day = _month_details(GA_Url)
 
         # Work out which month to show, based on query params of the first item
         c.month = request.params.get('month', '')
@@ -232,7 +301,7 @@
         return render('ga_report/publisher/index.html')
 
     def _get_packages(self, publisher=None, count=-1):
-        '''Returns the datasets in order of visits'''
+        '''Returns the datasets in order of views'''
         if count == -1:
             count = sys.maxint
 
@@ -244,7 +313,7 @@
         if publisher:
             q = q.filter(GA_Url.department_id==publisher.name)
         q = q.filter(GA_Url.period_name==month)
-        q = q.order_by('ga_url.visits::int desc')
+        q = q.order_by('ga_url.pageviews::int desc')
         top_packages = []
         for entry,package in q.limit(count):
             if package:
@@ -278,7 +347,7 @@
 
         # Get the month details by fetching distinct values and determining the
         # month names from the values.
-        c.months = _month_details(GA_Url)
+        c.months, c.day = _month_details(GA_Url)
 
         # Work out which month to show, based on query params of the first item
         c.month = request.params.get('month', '')
@@ -329,7 +398,7 @@
 def _get_publishers():
     '''
     Returns a list of all publishers. Each item is a tuple:
-      (names, title)
+      (name, title)
     '''
     publishers = []
     for pub in model.Session.query(model.Group).\

--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -13,6 +13,7 @@
 FORMAT_MONTH = '%Y-%m'
 MIN_VIEWS = 50
 MIN_VISITS = 20
+MIN_DOWNLOADS = 10
 
 class DownloadAnalytics(object):
     '''Downloads and stores analytics info'''
@@ -126,7 +127,7 @@
                 ga_model.update_publisher_stats(period_name) # about 30 seconds.
 
             log.info('Downloading and storing analytics for site-wide stats')
-            self.sitewide_stats( period_name )
+            self.sitewide_stats( period_name, period_complete_day )
 
             log.info('Downloading and storing analytics for social networks')
             self.update_social_info(period_name, start_date, end_date)
@@ -153,7 +154,8 @@
         data = collections.defaultdict(list)
         rows = results.get('rows',[])
         for row in rows:
-            data[_normalize_url(row[0])].append( (row[1], int(row[2]),) )
+            url = _normalize_url('http:/' + row[0])
+            data[url].append( (row[1], int(row[2]),) )
         ga_model.update_social(period_name, data)
 
 
@@ -194,7 +196,7 @@
         if 'url' in data:
             ga_model.update_url_stats(period_name, period_complete_day, data['url'])
 
-    def sitewide_stats(self, period_name):
+    def sitewide_stats(self, period_name, period_complete_day):
         import calendar
         year, month = period_name.split('-')
         _, last_day_of_month = calendar.monthrange(int(year), int(month))
@@ -202,10 +204,10 @@
         start_date = '%s-01' % period_name
         end_date = '%s-%s' % (period_name, last_day_of_month)
         funcs = ['_totals_stats', '_social_stats', '_os_stats',
-                 '_locale_stats', '_browser_stats', '_mobile_stats']
+                 '_locale_stats', '_browser_stats', '_mobile_stats', '_download_stats']
         for f in funcs:
             log.info('Downloading analytics for %s' % f.split('_')[1])
-            getattr(self, f)(start_date, end_date, period_name)
+            getattr(self, f)(start_date, end_date, period_name, period_complete_day)
 
     def _get_results(result_data, f):
         data = {}
@@ -214,7 +216,7 @@
             data[key] = data.get(key,0) + result[1]
         return data
 
-    def _totals_stats(self, start_date, end_date, period_name):
+    def _totals_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Fetches distinct totals, total pageviews etc """
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
@@ -224,7 +226,8 @@
                                  max_results=10000,
                                  end_date=end_date).execute()
         result_data = results.get('rows')
-        ga_model.update_sitewide_stats(period_name, "Totals", {'Total page views': result_data[0][0]})
+        ga_model.update_sitewide_stats(period_name, "Totals", {'Total page views': result_data[0][0]},
+            period_complete_day)
 
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
@@ -239,7 +242,7 @@
             'New visits': result_data[0][2],
             'Total visits': result_data[0][3],
         }
-        ga_model.update_sitewide_stats(period_name, "Totals", data)
+        ga_model.update_sitewide_stats(period_name, "Totals", data, period_complete_day)
 
         # Bounces from / or another configurable page.
         path = '/%s%s' % (config.get('googleanalytics.account'),
@@ -248,7 +251,7 @@
                                  ids='ga:' + self.profile_id,
                                  filters='ga:pagePath==%s' % (path,),
                                  start_date=start_date,
-                                 metrics='ga:bounces,ga:pageviews',
+                                 metrics='ga:visitBounceRate',
                                  dimensions='ga:pagePath',
                                  max_results=10000,
                                  end_date=end_date).execute()
@@ -258,13 +261,14 @@
                       path, result_data)
             return
         results = result_data[0]
-        bounces, total = [float(x) for x in result_data[0][1:]]
-        pct = 100 * bounces/total
-        log.info('%d bounces from %d total == %s', bounces, total, pct)
-        ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': pct})
-
-
-    def _locale_stats(self, start_date, end_date, period_name):
+        bounces = float(results[1])
+        # visitBounceRate is already a %
+        log.info('Google reports visitBounceRate as %s', bounces)
+        ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': float(bounces)},
+            period_complete_day)
+
+
+    def _locale_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Fetches stats about language and country """
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
@@ -279,16 +283,41 @@
         for result in result_data:
             data[result[0]] = data.get(result[0], 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Languages", data)
+        ga_model.update_sitewide_stats(period_name, "Languages", data, period_complete_day)
 
         data = {}
         for result in result_data:
             data[result[1]] = data.get(result[1], 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Country", data)
-
-
-    def _social_stats(self, start_date, end_date, period_name):
+        ga_model.update_sitewide_stats(period_name, "Country", data, period_complete_day)
+
+
+    def _download_stats(self, start_date, end_date, period_name, period_complete_day):
+        """ Fetches stats about language and country """
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 start_date=start_date,
+                                 filters='ga:eventAction==download',
+                                 metrics='ga:totalEvents',
+                                 sort='-ga:totalEvents',
+                                 dimensions="ga:eventLabel",
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+        result_data = results.get('rows')
+        if not result_data:
+            # We may not have data for this time period, so we need to bail
+            # early.
+            log.info("There is no download data for this time period")
+            return
+
+        # [[url, count], [url],count]
+        data = {}
+        for result in result_data:
+            data[result[0]] = data.get(result[0], 0) + int(result[1])
+        self._filter_out_long_tail(data, MIN_DOWNLOADS)
+        ga_model.update_sitewide_stats(period_name, "Downloads", data, period_complete_day)
+
+    def _social_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Finds out which social sites people are referred from """
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
@@ -304,10 +333,10 @@
             if not result[0] == '(not set)':
                 data[result[0]] = data.get(result[0], 0) + int(result[2])
         self._filter_out_long_tail(data, 3)
-        ga_model.update_sitewide_stats(period_name, "Social sources", data)
-
-
-    def _os_stats(self, start_date, end_date, period_name):
+        ga_model.update_sitewide_stats(period_name, "Social sources", data, period_complete_day)
+
+
+    def _os_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Operating system stats """
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
@@ -322,17 +351,17 @@
         for result in result_data:
             data[result[0]] = data.get(result[0], 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Operating Systems", data)
+        ga_model.update_sitewide_stats(period_name, "Operating Systems", data, period_complete_day)
 
         data = {}
         for result in result_data:
             if int(result[2]) >= MIN_VIEWS:
                 key = "%s %s" % (result[0],result[1])
                 data[key] = result[2]
-        ga_model.update_sitewide_stats(period_name, "Operating Systems versions", data)
-
-
-    def _browser_stats(self, start_date, end_date, period_name):
+        ga_model.update_sitewide_stats(period_name, "Operating Systems versions", data, period_complete_day)
+
+
+    def _browser_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Information about browsers and browser versions """
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
@@ -349,14 +378,14 @@
         for result in result_data:
             data[result[0]] = data.get(result[0], 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Browsers", data)
+        ga_model.update_sitewide_stats(period_name, "Browsers", data, period_complete_day)
 
         data = {}
         for result in result_data:
             key = "%s %s" % (result[0], self._filter_browser_version(result[0], result[1]))
             data[key] = data.get(key, 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Browser versions", data)
+        ga_model.update_sitewide_stats(period_name, "Browser versions", data, period_complete_day)
 
     @classmethod
     def _filter_browser_version(cls, browser, version_str):
@@ -380,7 +409,7 @@
                 ver = ver[0] + ver[1] + 'X' * num_hidden_digits
         return ver
 
-    def _mobile_stats(self, start_date, end_date, period_name):
+    def _mobile_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Info about mobile devices """
 
         results = self.service.data().ga().get(
@@ -397,13 +426,13 @@
         for result in result_data:
             data[result[0]] = data.get(result[0], 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Mobile brands", data)
+        ga_model.update_sitewide_stats(period_name, "Mobile brands", data, period_complete_day)
 
         data = {}
         for result in result_data:
             data[result[1]] = data.get(result[1], 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Mobile devices", data)
+        ga_model.update_sitewide_stats(period_name, "Mobile devices", data, period_complete_day)
 
     @classmethod
     def _filter_out_long_tail(cls, data, threshold=10):

--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -9,6 +9,8 @@
 
 import ckan.model as model
 from ckan.lib.base import *
+
+log = __import__('logging').getLogger(__name__)
 
 def make_uuid():
     return unicode(uuid.uuid4())
@@ -45,6 +47,7 @@
                   Column('id', types.UnicodeText, primary_key=True,
                          default=make_uuid),
                   Column('period_name', types.UnicodeText),
+                  Column('period_complete_day', types.UnicodeText),
                   Column('stat_name', types.UnicodeText),
                   Column('key', types.UnicodeText),
                   Column('value', types.UnicodeText), )
@@ -132,7 +135,7 @@
             return None, publisher_match.groups()[0]
     return None, None
 
-def update_sitewide_stats(period_name, stat_name, data):
+def update_sitewide_stats(period_name, stat_name, data, period_complete_day):
     for k,v in data.iteritems():
         item = model.Session.query(GA_Stat).\
             filter(GA_Stat.period_name==period_name).\
@@ -142,11 +145,13 @@
             item.period_name = period_name
             item.key = k
             item.value = v
+            item.period_complete_day = period_complete_day
             model.Session.add(item)
         else:
             # create the row
             values = {'id': make_uuid(),
                      'period_name': period_name,
+                     'period_complete_day': period_complete_day,
                      'key': k,
                      'value': v,
                      'stat_name': stat_name
@@ -156,10 +161,20 @@
 
 
 def pre_update_url_stats(period_name):
+    log.debug("Deleting '%s' records" % period_name)
     model.Session.query(GA_Url).\
             filter(GA_Url.period_name==period_name).delete()
-    model.Session.query(GA_Url).\
-            filter(GA_Url.period_name=='All').delete()
+
+    count = model.Session.query(GA_Url).\
+            filter(GA_Url.period_name == 'All').count()
+    log.debug("Deleting %d 'All' records" % count)
+    count = model.Session.query(GA_Url).\
+            filter(GA_Url.period_name == 'All').delete()
+    log.debug("Deleted %d 'All' records" % count)
+
+    model.Session.flush()
+    model.Session.commit()
+    model.repo.commit_and_remove()
 
 
 def update_url_stats(period_name, period_complete_day, url_data):
@@ -211,8 +226,8 @@
                       'period_name': 'All',
                       'period_complete_day': 0,
                       'url': url,
-                      'pageviews': sum([int(e.pageviews) for e in entries]) + old_pageviews,
-                      'visits': sum([int(e.visits) for e in entries]) + old_visits,
+                      'pageviews': sum([int(e.pageviews) for e in entries]) + int(old_pageviews),
+                      'visits': sum([int(e.visits or 0) for e in entries]) + int(old_visits),
                       'department_id': publisher,
                       'package_id': package
                      }
@@ -338,26 +353,39 @@
     '''
     for object_type in (GA_Url, GA_Stat, GA_Publisher, GA_ReferralStat):
         q = model.Session.query(object_type)
-        if period_name != 'all':
+        if period_name != 'All':
             q = q.filter_by(period_name=period_name)
         q.delete()
-    model.Session.commit()
+    model.repo.commit_and_remove()
 
 def get_score_for_dataset(dataset_name):
+    '''
+    Returns a "current popularity" score for a dataset,
+    based on how many views it has had recently.
+    '''
     import datetime
     now = datetime.datetime.now()
-    period_names = ['%s-%02d' % (now.year, now.month),
-                    '%s-%02d' % (now.year, now.month-1)]
-
-    entry = model.Session.query(GA_Url)\
-        .filter(GA_Url.period_name==period_names[0])\
-        .filter(GA_Url.package_id==dataset_name).first()
-    score = int(entry.pageviews) if entry else 0
-
-    entry = model.Session.query(GA_Url)\
-        .filter(GA_Url.period_name==period_names[1])\
-        .filter(GA_Url.package_id==dataset_name).first()
-    val = int(entry.pageviews) if entry else 0
-    score += val/2 if val else 0
-
-    return 0
+    last_month = now - datetime.timedelta(days=30)
+    period_names = ['%s-%02d' % (last_month.year, last_month.month),
+                    '%s-%02d' % (now.year, now.month),
+                    ]
+
+    score = 0
+    for period_name in period_names:
+        score /= 2 # previous periods are discounted by 50%
+        entry = model.Session.query(GA_Url)\
+                .filter(GA_Url.period_name==period_name)\
+                .filter(GA_Url.package_id==dataset_name).first()
+        # score
+        if entry:
+            views = float(entry.pageviews)
+            if entry.period_complete_day:
+                views_per_day = views / entry.period_complete_day
+            else:
+                views_per_day = views / 15 # guess
+            score += views_per_day
+
+    score = int(score * 100)
+    log.debug('Popularity %s: %s', score, dataset_name)
+    return score
+

--- a/ckanext/ga_report/helpers.py
+++ b/ckanext/ga_report/helpers.py
@@ -50,9 +50,12 @@
             dataset = model.Package.get(ga_url.url[len('/dataset/'):])
             if dataset and not dataset.state == 'active':
                 dataset = None
-                count += 1
-                if count > 10:
-                    break
+            # When testing, it is possible that top datasets are not available
+            # so only go round this loop a few times before falling back on
+            # a random dataset.
+            count += 1
+            if count > 10:
+                break
     if not dataset:
         # fallback
         dataset = model.Session.query(model.Package)\

--- a/ckanext/ga_report/plugin.py
+++ b/ckanext/ga_report/plugin.py
@@ -42,6 +42,16 @@
             controller='ckanext.ga_report.controller:GaReport',
             action='csv'
         )
+        map.connect(
+            '/data/site-usage/downloads',
+            controller='ckanext.ga_report.controller:GaReport',
+            action='downloads'
+        )
+        map.connect(
+            '/data/site-usage/downloads_{month}.csv',
+            controller='ckanext.ga_report.controller:GaReport',
+            action='csv_downloads'
+        )
 
         # GaDatasetReport
         map.connect(

--- a/ckanext/ga_report/templates/ga_report/ga_util.html
+++ b/ckanext/ga_report/templates/ga_report/ga_util.html
@@ -5,6 +5,14 @@
   xmlns:xi="http://www.w3.org/2001/XInclude"
   py:strip=""
   >
+
+<select name="month" py:def="month_selector(current_month, months, day)">
+    <option value='' py:attrs="{'selected': 'selected' if not current_month else None}">All months</option>
+  <py:for each="i, (val,desc) in enumerate(months)">
+    <option value='${val}' py:attrs="{'selected': 'selected' if current_month == val else None}">${desc}<py:if test="i == 0 and day"> (up to ${day})</py:if></option>
+  </py:for>
+</select>
+
 
 <table py:def="social_table(items, with_source=False)" class="table table-condensed table-bordered table-striped">
     <tr>
@@ -36,6 +44,23 @@
  </table>
 
 
+<table py:def="downloads_table(items)" class="table table-condensed table-bordered table-striped">
+    <tr>
+        <th>Dataset and resource</th>
+        <th>Downloads</th>
+    </tr>
+    <py:for each="resource, value in items">
+    <tr>
+        <td>
+          <strong>${resource.resource_group.package.title}</strong><br/>
+          ${h.link_to((resource.name or resource.description).strip() or "No name", h.url_for(controller='package', action='resource_read', id=resource.resource_group.package.name, resource_id=resource.id))}<br/>
+        </td>
+        <td>${value}</td>
+    </tr>
+    </py:for>
+ </table>
+
+
 <div py:def="usage_nav(active_name)" id="minornavigation">
     <div id="minornavigation-bg-left">
     <div id="minornavigation-bg-right">
@@ -47,6 +72,9 @@
         <li py:attrs="{'class': 'active' if active_name=='Datasets' else None}">
                 <a py:attrs="{'class': 'active' if active_name=='Datasets' else None}"  href="${h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='read')}"><img src="/images/icons/page_white.png" height="16px" width="16px" alt="None" class="inline-icon "/> Datasets</a>
         </li>
+        <li py:attrs="{'class': 'active' if active_name=='Downloads' else None}">
+                <a py:attrs="{'class': 'active' if active_name=='Downloads' else None}"  href="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='downloads')}"><img src="/images/icons/page_white.png" height="16px" width="16px" alt="None" class="inline-icon "/> Downloads</a>
+        </li>
       </ul>
     </div>
     </div>

--- a/ckanext/ga_report/templates/ga_report/notes.html
+++ b/ckanext/ga_report/templates/ga_report/notes.html
@@ -6,8 +6,8 @@
     <li class="widget-container boxed widget_text">
       <h4>Notes</h4>
       <ul>
-          <li>"Views" is the number of sessions during which the page was viewed one or more times (technically known as "unique pageviews").</li>
-          <li>"Visits" is the number of unique user visits to a page, counted once for each visitor for each session.</li>
+          <li>"Views" is the number of times a page was loaded in users' browsers.</li>
+          <li>"Visits" is the number of unique user visits to a page, counted once for each visitor for each of their browsing sessions.</li>
           <li>These usage statistics are confined to users with javascript enabled, which excludes web crawlers and API calls.</li>
           <li>The results are not shown when the number of views/visits is tiny. Where these relate to site pages, results are available in full in the CSV download. Where these relate to users' web browser information, results are not disclosed, for privacy reasons.</li>
       </ul>

--- a/ckanext/ga_report/templates/ga_report/publisher/index.html
+++ b/ckanext/ga_report/templates/ga_report/publisher/index.html
@@ -27,13 +27,9 @