When downloading this month, and specifying this month (rather than "latest"), note that it only goes up to today (instead of the end of the month).
When downloading this month, and specifying this month (rather than "latest"), note that it only goes up to today (instead of the end of the month).

--- a/ckanext/ga_report/command.py
+++ b/ckanext/ga_report/command.py
@@ -55,6 +55,36 @@
         init_service('token.dat',
                       self.args[0] if self.args
                                    else 'credentials.json')
+
+class FixTimePeriods(CkanCommand):
+    """
+    Fixes the 'All' records for GA_Urls
+
+    It is possible that older urls that haven't recently been visited
+    do not have All records.  This command will traverse through those
+    records and generate valid All records for them.
+    """
+    summary = __doc__.split('\n')[0]
+    usage = __doc__
+    max_args = 0
+    min_args = 0
+
+    def __init__(self, name):
+        super(FixTimePeriods, self).__init__(name)
+
+    def command(self):
+        import ckan.model as model
+        from ga_model import post_update_url_stats
+        self._load_config()
+        model.Session.remove()
+        model.Session.configure(bind=model.meta.engine)
+
+        log = logging.getLogger('ckanext.ga_report')
+
+        log.info("Updating 'All' records for old URLs")
+        post_update_url_stats()
+        log.info("Processing complete")
+
 
 
 class LoadAnalytics(CkanCommand):

--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -13,6 +13,7 @@
 
 log = logging.getLogger('ckanext.ga-report')
 
+DOWNLOADS_AVAILABLE_FROM = '2012-12'
 
 def _get_month_name(strdate):
     import calendar
@@ -38,6 +39,7 @@
         q=  q.filter(cls.stat_name==stat_key)
 
     vals = q.order_by("period_name desc").all()
+
     if vals and vals[0][1]:
         day = int(vals[0][1])
         ordinal = 'th' if 11 <= day <= 13 \
@@ -69,25 +71,6 @@
         for entry in entries:
             writer.writerow([entry.period_name.encode('utf-8'),
                              entry.stat_name.encode('utf-8'),
-                             entry.key.encode('utf-8'),
-                             entry.value.encode('utf-8')])
-
-    def csv_downloads(self, month):
-        import csv
-
-        q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name=='Downloads')
-        if month != 'all':
-            q = q.filter(GA_Stat.period_name==month)
-        entries = q.order_by('GA_Stat.period_name, GA_Stat.key').all()
-
-        response.headers['Content-Type'] = "text/csv; charset=utf-8"
-        response.headers['Content-Disposition'] = str('attachment; filename=downloads_%s.csv' % (month,))
-
-        writer = csv.writer(response)
-        writer.writerow(["Period", "Resource URL", "Count"])
-
-        for entry in entries:
-            writer.writerow([entry.period_name.encode('utf-8'),
                              entry.key.encode('utf-8'),
                              entry.value.encode('utf-8')])
 
@@ -202,35 +185,6 @@
 
         return render('ga_report/site/index.html')
 
-    def downloads(self):
-
-        # Get the month details by fetching distinct values and determining the
-        # month names from the values.
-        c.months, c.day = _month_details(GA_Stat, "Downloads")
-
-        # Work out which month to show, based on query params of the first item
-        c.month_desc = 'all months'
-        c.month = request.params.get('month', '')
-        if c.month:
-            c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
-
-        c.downloads = []
-        q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name=='Downloads')
-        q = q.filter(GA_Stat.period_name==c.month) if c.month else q
-        q = q.order_by("ga_stat.value::int desc")
-
-        data = collections.defaultdict(int)
-        for entry in q.all():
-            r = model.Session.query(model.Resource).filter(model.Resource.url==entry.key).first()
-            if not r:
-                continue
-            data[r] += int(entry.value)
-
-        c.downloads = [(k,v,) for k,v in data.iteritems()]
-        c.downloads = sorted(c.downloads, key=operator.itemgetter(1), reverse=True)
-
-        return render('ga_report/site/downloads.html')
-
 
 class GaDatasetReport(BaseController):
     """
@@ -275,13 +229,14 @@
             str('attachment; filename=datasets_%s_%s.csv' % (c.publisher_name, month,))
 
         writer = csv.writer(response)
-        writer.writerow(["Dataset Title", "Dataset Name", "Views", "Visits", "Period Name"])
-
-        for package,view,visit in packages:
+        writer.writerow(["Dataset Title", "Dataset Name", "Views", "Visits", "Resource downloads", "Period Name"])
+
+        for package,view,visit,downloads in packages:
             writer.writerow([package.title.encode('utf-8'),
                              package.name.encode('utf-8'),
                              view,
                              visit,
+                             downloads,
                              month])
 
     def publishers(self):
@@ -302,10 +257,10 @@
 
     def _get_packages(self, publisher=None, count=-1):
         '''Returns the datasets in order of views'''
-        if count == -1:
-            count = sys.maxint
-
+        have_download_data = True
         month = c.month or 'All'
+        if month != 'All':
+            have_download_data = month >= DOWNLOADS_AVAILABLE_FROM
 
         q = model.Session.query(GA_Url,model.Package)\
             .filter(model.Package.name==GA_Url.package_id)\
@@ -315,9 +270,25 @@
         q = q.filter(GA_Url.period_name==month)
         q = q.order_by('ga_url.pageviews::int desc')
         top_packages = []
-        for entry,package in q.limit(count):
+        if count == -1:
+            entries = q.all()
+        else:
+            entries = q.limit(count)
+
+        for entry,package in entries:
             if package:
-                top_packages.append((package, entry.pageviews, entry.visits))
+                # Downloads ....
+                if have_download_data:
+                    dls = model.Session.query(GA_Stat).\
+                        filter(GA_Stat.stat_name=='Downloads').\
+                        filter(GA_Stat.key==package.name)
+                    if month != 'All':  # Fetch everything unless the month is specific
+                        dls = dls.filter(GA_Stat.period_name==month)
+
+                    downloads = sum(int(d.value) for d in dls.all())
+                else:
+                    downloads = 'No data'
+                top_packages.append((package, entry.pageviews, entry.visits, downloads))
             else:
                 log.warning('Could not find package associated package')
 

--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -32,6 +32,11 @@
         first_of_this_month = datetime.datetime(date.year, date.month, 1)
         _, last_day_of_month = calendar.monthrange(int(date.year), int(date.month))
         last_of_this_month =  datetime.datetime(date.year, date.month, last_day_of_month)
+        # if this is the latest month, note that it is only up until today
+        now = datetime.datetime.now()
+        if now.year == date.year and now.month == date.month:
+            last_day_of_month = now.day
+            last_of_this_month = now
         periods = ((date.strftime(FORMAT_MONTH),
                     last_day_of_month,
                     first_of_this_month, last_of_this_month),)
@@ -123,8 +128,12 @@
                 log.info('Storing publisher views (%i rows)', len(data.get('url')))
                 self.store(period_name, period_complete_day, data,)
 
+                # Make sure the All records are correct.
+                ga_model.post_update_url_stats()
+
                 log.info('Aggregating datasets by publisher')
                 ga_model.update_publisher_stats(period_name) # about 30 seconds.
+
 
             log.info('Downloading and storing analytics for site-wide stats')
             self.sitewide_stats( period_name, period_complete_day )
@@ -180,6 +189,7 @@
                                  end_date=end_date).execute()
 
         packages = []
+        log.info("There are %d results" % results['totalResults'])
         for entry in results.get('rows'):
             (loc,pageviews,visits) = entry
             url = _normalize_url('http:/' + loc) # strips off domain e.g. www.data.gov.uk or data.gov.uk
@@ -294,6 +304,10 @@
 
     def _download_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Fetches stats about language and country """
+        import ckan.model as model
+
+        data = {}
+
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
@@ -310,10 +324,37 @@
             log.info("There is no download data for this time period")
             return
 
-        # [[url, count], [url],count]
-        data = {}
-        for result in result_data:
-            data[result[0]] = data.get(result[0], 0) + int(result[1])
+        def process_result_data(result_data, cached=False):
+            for result in result_data:
+                url = result[0].strip()
+
+                # Get package id associated with the resource that has this URL.
+                q = model.Session.query(model.Resource)
+                if cached:
+                    r = q.filter(model.Resource.cache_url.like("%s%%" % url)).first()
+                else:
+                    r = q.filter(model.Resource.url.like("%s%%" % url)).first()
+
+                package_name = r.resource_group.package.name if r else ""
+                if package_name:
+                    data[package_name] = data.get(package_name, 0) + int(result[1])
+                else:
+                    log.warning(u"Could not find resource for URL: {url}".format(url=url))
+                    continue
+
+        process_result_data(results.get('rows'))
+
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 start_date=start_date,
+                                 filters='ga:eventAction==download-cache',
+                                 metrics='ga:totalEvents',
+                                 sort='-ga:totalEvents',
+                                 dimensions="ga:eventLabel",
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+        process_result_data(results.get('rows'), cached=False)
+
         self._filter_out_long_tail(data, MIN_DOWNLOADS)
         ga_model.update_sitewide_stats(period_name, "Downloads", data, period_complete_day)
 

--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -175,6 +175,42 @@
     model.Session.flush()
     model.Session.commit()
     model.repo.commit_and_remove()
+
+def post_update_url_stats():
+
+    """ Check the distinct url field in ga_url and make sure
+        it has an All record.  If not then create one.
+
+        After running this then every URL should have an All
+        record regardless of whether the URL has an entry for
+        the month being currently processed.
+    """
+    query = """select url, pageviews::int, visits::int
+               from ga_url
+               where url not in (select url from ga_url where period_name ='All')"""
+    connection = model.Session.connection()
+    res = connection.execute(query)
+
+    views, visits = {}, {}
+    # url, views, visits
+    for row in res:
+        views[row[0]] = views.get(row[0], 0) + row[1]
+        visits[row[0]] = visits.get(row[0], 0) + row[2]
+
+    for key in views.keys():
+        package, publisher = _get_package_and_publisher(key)
+
+        values = {'id': make_uuid(),
+                  'period_name': "All",
+                  'period_complete_day': 0,
+                  'url': key,
+                  'pageviews': views[key],
+                  'visits': visits[key],
+                  'department_id': publisher,
+                  'package_id': publisher
+                  }
+        model.Session.add(GA_Url(**values))
+    model.Session.commit()
 
 
 def update_url_stats(period_name, period_complete_day, url_data):

--- a/ckanext/ga_report/templates/ga_report/ga_util.html
+++ b/ckanext/ga_report/templates/ga_report/ga_util.html
@@ -44,23 +44,6 @@
  </table>
 
 
-<table py:def="downloads_table(items)" class="table table-condensed table-bordered table-striped">
-    <tr>
-        <th>Dataset and resource</th>
-        <th>Downloads</th>
-    </tr>
-    <py:for each="resource, value in items">
-    <tr>
-        <td>
-          <strong>${resource.resource_group.package.title}</strong><br/>
-          ${h.link_to((resource.name or resource.description).strip() or "No name", h.url_for(controller='package', action='resource_read', id=resource.resource_group.package.name, resource_id=resource.id))}<br/>
-        </td>
-        <td>${value}</td>
-    </tr>
-    </py:for>
- </table>
-
-
 <div py:def="usage_nav(active_name)" id="minornavigation">
     <div id="minornavigation-bg-left">
     <div id="minornavigation-bg-right">
@@ -72,9 +55,6 @@
         <li py:attrs="{'class': 'active' if active_name=='Datasets' else None}">
                 <a py:attrs="{'class': 'active' if active_name=='Datasets' else None}"  href="${h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='read')}"><img src="/images/icons/page_white.png" height="16px" width="16px" alt="None" class="inline-icon "/> Datasets</a>
         </li>
-        <li py:attrs="{'class': 'active' if active_name=='Downloads' else None}">
-                <a py:attrs="{'class': 'active' if active_name=='Downloads' else None}"  href="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='downloads')}"><img src="/images/icons/page_white.png" height="16px" width="16px" alt="None" class="inline-icon "/> Downloads</a>
-        </li>
       </ul>
     </div>
     </div>

--- a/ckanext/ga_report/templates/ga_report/notes.html
+++ b/ckanext/ga_report/templates/ga_report/notes.html
@@ -7,7 +7,7 @@
       <h4>Notes</h4>
       <ul>
           <li>"Views" is the number of times a page was loaded in users' browsers.</li>
-          <li>"Visits" is the number of unique user visits to a page, counted once for each visitor for each of their browsing sessions.</li>
+          <li>"Downloads" is the number of times a user has clicked to download either an original or cached resource for a particular dataset since December 2012</li>
           <li>These usage statistics are confined to users with javascript enabled, which excludes web crawlers and API calls.</li>
           <li>The results are not shown when the number of views/visits is tiny. Where these relate to site pages, results are available in full in the CSV download. Where these relate to users' web browser information, results are not disclosed, for privacy reasons.</li>
       </ul>

--- a/ckanext/ga_report/templates/ga_report/publisher/read.html
+++ b/ckanext/ga_report/templates/ga_report/publisher/read.html
@@ -44,15 +44,15 @@
      <table py:if="c.top_packages" class="table table-condensed table-bordered table-striped">
 	 <tr>
 	   <th>Dataset</th>
-<!--	   <th>Visits</th> -->
 	   <th>Views</th>
+     <th>Downloads</th>
 	 </tr>
-        <py:for each="package, views, visits in c.top_packages">
+        <py:for each="package, views, visits,downloads in c.top_packages">
 	  <tr>
 	    <td>${h.link_to(package.title or package.name, h.url_for(controller='package', action='read', id=package.name))}
 	    </td>
-<!--	    <td>${visits}</td> -->
 	    <td>${views}</td>
+      <td>${downloads}</td>
 	  </tr>
         </py:for>
      </table>

file:a/setup.py -> file:b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -33,6 +33,7 @@
         loadanalytics = ckanext.ga_report.command:LoadAnalytics
         initdb = ckanext.ga_report.command:InitDB
         getauthtoken = ckanext.ga_report.command:GetAuthToken
+        fixtimeperiods = ckanext.ga_report.command:FixTimePeriods
 	""",
 )