Template fix for broken publisher links
Template fix for broken publisher links

file:a/README.rst -> file:b/README.rst
--- a/README.rst
+++ b/README.rst
@@ -32,6 +32,7 @@
 
       googleanalytics.id = UA-1010101-1
       googleanalytics.account = Account name (e.g. data.gov.uk, see top level item at https://www.google.com/analytics)
+      googleanalytics.token.filepath = ~/pyenv/token.dat
       ga-report.period = monthly
       ga-report.bounce_url = /
 
@@ -82,13 +83,17 @@
 
     $ paster getauthtoken --config=../ckan/development.ini
 
+Now ensure you reference the correct path to your token.dat in your CKAN config file (e.g. development.ini)::
+
+    googleanalytics.token.filepath = ~/pyenv/token.dat
+
 
 Tutorial
 --------
 
 Download some GA data and store it in CKAN's database. (Ensure your CKAN pyenv is still activated, run the command from ``src/ckanext-ga-report``, alter the ``--config`` option to point to your site config file) and specifying the name of your auth file (token.dat by default) from the previous step::
 
-    $ paster loadanalytics token.dat latest --config=../ckan/development.ini
+    $ paster loadanalytics latest --config=../ckan/development.ini
 
 The value after the token file is how much data you want to retrieve, this can be
 

--- a/ckanext/ga_report/command.py
+++ b/ckanext/ga_report/command.py
@@ -1,5 +1,8 @@
 import logging
 import datetime
+import os
+
+from pylons import config
 
 from ckan.lib.cli import CkanCommand
 # No other CKAN imports allowed until _load_config is run,
@@ -58,20 +61,17 @@
     """Get data from Google Analytics API and save it
     in the ga_model
 
-    Usage: paster loadanalytics <tokenfile> <time-period>
+    Usage: paster loadanalytics <time-period>
 
-    Where <tokenfile> is the name of the auth token file from
-    the getauthtoken step.
-
-    And where <time-period> is:
+    Where <time-period> is:
         all         - data for all time
         latest      - (default) just the 'latest' data
         YYYY-MM     - just data for the specific month
     """
     summary = __doc__.split('\n')[0]
     usage = __doc__
-    max_args = 2
-    min_args = 1
+    max_args = 1
+    min_args = 0
 
     def __init__(self, name):
         super(LoadAnalytics, self).__init__(name)
@@ -92,19 +92,25 @@
         from download_analytics import DownloadAnalytics
         from ga_auth import (init_service, get_profile_id)
 
+        ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', ''))
+        if not ga_token_filepath:
+            print 'ERROR: In the CKAN config you need to specify the filepath of the ' \
+                  'Google Analytics token file under key: googleanalytics.token.filepath'
+            return
+
         try:
-            svc = init_service(self.args[0], None)
+            svc = init_service(ga_token_filepath, None)
         except TypeError:
             print ('Have you correctly run the getauthtoken task and '
-                   'specified the correct token file?')
+                   'specified the correct token file in the CKAN config under '
+                   '"googleanalytics.token.filepath"?')
             return
 
         downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc),
                                        delete_first=self.options.delete_first,
                                        skip_url_stats=self.options.skip_url_stats)
 
-        time_period = self.args[1] if self.args and len(self.args) > 1 \
-            else 'latest'
+        time_period = self.args[0] if self.args else 'latest'
         if time_period == 'all':
             downloader.all_()
         elif time_period == 'latest':

--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -22,12 +22,29 @@
 
 
 def _month_details(cls):
-    '''Returns a list of all the month names'''
+    '''
+    Returns a list of all the periods for which we have data, unfortunately
+    knows too much about the type of the cls being passed as GA_Url has a
+    more complex query
+
+    This may need extending if we add a period_name to the stats
+    '''
     months = []
-    vals = model.Session.query(cls.period_name).filter(cls.period_name!='All').distinct().all()
+    day = None
+
+    vals = model.Session.query(cls.period_name,cls.period_complete_day)\
+        .filter(cls.period_name!='All').distinct(cls.period_name)\
+        .order_by("period_name desc").all()
+    if vals and vals[0][1]:
+        day = int(vals[0][1])
+        ordinal = 'th' if 11 <= day <= 13 \
+            else {1:'st',2:'nd',3:'rd'}.get(day % 10, 'th')
+        day = "{day}{ordinal}".format(day=day, ordinal=ordinal)
+
     for m in vals:
         months.append( (m[0], _get_month_name(m[0])))
-    return sorted(months, key=operator.itemgetter(0), reverse=True)
+
+    return months, day
 
 
 class GaReport(BaseController):
@@ -56,7 +73,7 @@
 
         # Get the month details by fetching distinct values and determining the
         # month names from the values.
-        c.months = _month_details(GA_Stat)
+        c.months, c.day = _month_details(GA_Stat)
 
         # Work out which month to show, based on query params of the first item
         c.month_desc = 'all months'
@@ -220,7 +237,7 @@
 
         # Get the month details by fetching distinct values and determining the
         # month names from the values.
-        c.months = _month_details(GA_Url)
+        c.months, c.day = _month_details(GA_Url)
 
         # Work out which month to show, based on query params of the first item
         c.month = request.params.get('month', '')
@@ -278,7 +295,7 @@
 
         # Get the month details by fetching distinct values and determining the
         # month names from the values.
-        c.months = _month_details(GA_Url)
+        c.months, c.day = _month_details(GA_Url)
 
         # Work out which month to show, based on query params of the first item
         c.month = request.params.get('month', '')

--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -126,7 +126,7 @@
                 ga_model.update_publisher_stats(period_name) # about 30 seconds.
 
             log.info('Downloading and storing analytics for site-wide stats')
-            self.sitewide_stats( period_name )
+            self.sitewide_stats( period_name, period_complete_day )
 
             log.info('Downloading and storing analytics for social networks')
             self.update_social_info(period_name, start_date, end_date)
@@ -153,7 +153,8 @@
         data = collections.defaultdict(list)
         rows = results.get('rows',[])
         for row in rows:
-            data[_normalize_url(row[0])].append( (row[1], int(row[2]),) )
+            url = _normalize_url('http:/' + row[0])
+            data[url].append( (row[1], int(row[2]),) )
         ga_model.update_social(period_name, data)
 
 
@@ -194,7 +195,7 @@
         if 'url' in data:
             ga_model.update_url_stats(period_name, period_complete_day, data['url'])
 
-    def sitewide_stats(self, period_name):
+    def sitewide_stats(self, period_name, period_complete_day):
         import calendar
         year, month = period_name.split('-')
         _, last_day_of_month = calendar.monthrange(int(year), int(month))
@@ -205,7 +206,7 @@
                  '_locale_stats', '_browser_stats', '_mobile_stats']
         for f in funcs:
             log.info('Downloading analytics for %s' % f.split('_')[1])
-            getattr(self, f)(start_date, end_date, period_name)
+            getattr(self, f)(start_date, end_date, period_name, period_complete_day)
 
     def _get_results(result_data, f):
         data = {}
@@ -214,7 +215,7 @@
             data[key] = data.get(key,0) + result[1]
         return data
 
-    def _totals_stats(self, start_date, end_date, period_name):
+    def _totals_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Fetches distinct totals, total pageviews etc """
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
@@ -224,7 +225,8 @@
                                  max_results=10000,
                                  end_date=end_date).execute()
         result_data = results.get('rows')
-        ga_model.update_sitewide_stats(period_name, "Totals", {'Total page views': result_data[0][0]})
+        ga_model.update_sitewide_stats(period_name, "Totals", {'Total page views': result_data[0][0]},
+            period_complete_day)
 
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
@@ -239,7 +241,7 @@
             'New visits': result_data[0][2],
             'Total visits': result_data[0][3],
         }
-        ga_model.update_sitewide_stats(period_name, "Totals", data)
+        ga_model.update_sitewide_stats(period_name, "Totals", data, period_complete_day)
 
         # Bounces from / or another configurable page.
         path = '/%s%s' % (config.get('googleanalytics.account'),
@@ -261,10 +263,11 @@
         bounces, total = [float(x) for x in result_data[0][1:]]
         pct = 100 * bounces/total
         log.info('%d bounces from %d total == %s', bounces, total, pct)
-        ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': pct})
-
-
-    def _locale_stats(self, start_date, end_date, period_name):
+        ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': pct},
+            period_complete_day)
+
+
+    def _locale_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Fetches stats about language and country """
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
@@ -279,16 +282,16 @@
         for result in result_data:
             data[result[0]] = data.get(result[0], 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Languages", data)
+        ga_model.update_sitewide_stats(period_name, "Languages", data, period_complete_day)
 
         data = {}
         for result in result_data:
             data[result[1]] = data.get(result[1], 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Country", data)
-
-
-    def _social_stats(self, start_date, end_date, period_name):
+        ga_model.update_sitewide_stats(period_name, "Country", data, period_complete_day)
+
+
+    def _social_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Finds out which social sites people are referred from """
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
@@ -304,10 +307,10 @@
             if not result[0] == '(not set)':
                 data[result[0]] = data.get(result[0], 0) + int(result[2])
         self._filter_out_long_tail(data, 3)
-        ga_model.update_sitewide_stats(period_name, "Social sources", data)
-
-
-    def _os_stats(self, start_date, end_date, period_name):
+        ga_model.update_sitewide_stats(period_name, "Social sources", data, period_complete_day)
+
+
+    def _os_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Operating system stats """
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
@@ -322,17 +325,17 @@
         for result in result_data:
             data[result[0]] = data.get(result[0], 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Operating Systems", data)
+        ga_model.update_sitewide_stats(period_name, "Operating Systems", data, period_complete_day)
 
         data = {}
         for result in result_data:
             if int(result[2]) >= MIN_VIEWS:
                 key = "%s %s" % (result[0],result[1])
                 data[key] = result[2]
-        ga_model.update_sitewide_stats(period_name, "Operating Systems versions", data)
-
-
-    def _browser_stats(self, start_date, end_date, period_name):
+        ga_model.update_sitewide_stats(period_name, "Operating Systems versions", data, period_complete_day)
+
+
+    def _browser_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Information about browsers and browser versions """
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
@@ -349,14 +352,14 @@
         for result in result_data:
             data[result[0]] = data.get(result[0], 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Browsers", data)
+        ga_model.update_sitewide_stats(period_name, "Browsers", data, period_complete_day)
 
         data = {}
         for result in result_data:
             key = "%s %s" % (result[0], self._filter_browser_version(result[0], result[1]))
             data[key] = data.get(key, 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Browser versions", data)
+        ga_model.update_sitewide_stats(period_name, "Browser versions", data, period_complete_day)
 
     @classmethod
     def _filter_browser_version(cls, browser, version_str):
@@ -380,7 +383,7 @@
                 ver = ver[0] + ver[1] + 'X' * num_hidden_digits
         return ver
 
-    def _mobile_stats(self, start_date, end_date, period_name):
+    def _mobile_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Info about mobile devices """
 
         results = self.service.data().ga().get(
@@ -397,13 +400,13 @@
         for result in result_data:
             data[result[0]] = data.get(result[0], 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Mobile brands", data)
+        ga_model.update_sitewide_stats(period_name, "Mobile brands", data, period_complete_day)
 
         data = {}
         for result in result_data:
             data[result[1]] = data.get(result[1], 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Mobile devices", data)
+        ga_model.update_sitewide_stats(period_name, "Mobile devices", data, period_complete_day)
 
     @classmethod
     def _filter_out_long_tail(cls, data, threshold=10):

--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -9,6 +9,8 @@
 
 import ckan.model as model
 from ckan.lib.base import *
+
+log = __import__('logging').getLogger(__name__)
 
 def make_uuid():
     return unicode(uuid.uuid4())
@@ -45,6 +47,7 @@
                   Column('id', types.UnicodeText, primary_key=True,
                          default=make_uuid),
                   Column('period_name', types.UnicodeText),
+                  Column('period_complete_day', types.UnicodeText),
                   Column('stat_name', types.UnicodeText),
                   Column('key', types.UnicodeText),
                   Column('value', types.UnicodeText), )
@@ -132,7 +135,7 @@
             return None, publisher_match.groups()[0]
     return None, None
 
-def update_sitewide_stats(period_name, stat_name, data):
+def update_sitewide_stats(period_name, stat_name, data, period_complete_day):
     for k,v in data.iteritems():
         item = model.Session.query(GA_Stat).\
             filter(GA_Stat.period_name==period_name).\
@@ -142,11 +145,13 @@
             item.period_name = period_name
             item.key = k
             item.value = v
+            item.period_complete_day = period_complete_day
             model.Session.add(item)
         else:
             # create the row
             values = {'id': make_uuid(),
                      'period_name': period_name,
+                     'period_complete_day': period_complete_day,
                      'key': k,
                      'value': v,
                      'stat_name': stat_name
@@ -212,7 +217,7 @@
                       'period_complete_day': 0,
                       'url': url,
                       'pageviews': sum([int(e.pageviews) for e in entries]) + old_pageviews,
-                      'visits': sum([int(e.visits) for e in entries]) + old_visits,
+                      'visits': sum([int(e.visits or 0) for e in entries]) + old_visits,
                       'department_id': publisher,
                       'package_id': package
                      }
@@ -344,20 +349,33 @@
     model.Session.commit()
 
 def get_score_for_dataset(dataset_name):
+    '''
+    Returns a "current popularity" score for a dataset,
+    based on how many views it has had recently.
+    '''
     import datetime
     now = datetime.datetime.now()
-    period_names = ['%s-%02d' % (now.year, now.month),
-                    '%s-%02d' % (now.year, now.month-1)]
-
-    entry = model.Session.query(GA_Url)\
-        .filter(GA_Url.period_name==period_names[0])\
-        .filter(GA_Url.package_id==dataset_name).first()
-    score = int(entry.pageviews) if entry else 0
-
-    entry = model.Session.query(GA_Url)\
-        .filter(GA_Url.period_name==period_names[1])\
-        .filter(GA_Url.package_id==dataset_name).first()
-    val = int(entry.pageviews) if entry else 0
-    score += val/2 if val else 0
-
-    return 0
+    last_month = now - datetime.timedelta(days=30)
+    period_names = ['%s-%02d' % (last_month.year, last_month.month),
+                    '%s-%02d' % (now.year, now.month),
+                    ]
+
+    score = 0
+    for period_name in period_names:
+        score /= 2 # previous periods are discounted by 50%
+        entry = model.Session.query(GA_Url)\
+                .filter(GA_Url.period_name==period_name)\
+                .filter(GA_Url.package_id==dataset_name).first()
+        # score
+        if entry:
+            views = float(entry.pageviews)
+            if entry.period_complete_day:
+                views_per_day = views / entry.period_complete_day
+            else:
+                views_per_day = views / 15 # guess
+            score += views_per_day
+
+    score = int(score * 100)
+    log.debug('Popularity %s: %s', score, dataset_name)
+    return score
+

--- a/ckanext/ga_report/helpers.py
+++ b/ckanext/ga_report/helpers.py
@@ -50,9 +50,12 @@
             dataset = model.Package.get(ga_url.url[len('/dataset/'):])
             if dataset and not dataset.state == 'active':
                 dataset = None
-                count += 1
-                if count > 10:
-                    break
+            # When testing, it is possible that top datasets are not available
+            # so only go round this loop a few times before falling back on
+            # a random dataset.
+            count += 1
+            if count > 10:
+                break
     if not dataset:
         # fallback
         dataset = model.Session.query(model.Package)\

--- a/ckanext/ga_report/templates/ga_report/ga_util.html
+++ b/ckanext/ga_report/templates/ga_report/ga_util.html
@@ -5,6 +5,14 @@
   xmlns:xi="http://www.w3.org/2001/XInclude"
   py:strip=""
   >
+
+<select name="month" py:def="month_selector(current_month, months, day)">
+    <option value='' py:attrs="{'selected': 'selected' if not current_month else None}">All months</option>
+  <py:for each="i, (val,desc) in enumerate(months)">
+    <option value='${val}' py:attrs="{'selected': 'selected' if current_month == val else None}">${desc}<py:if test="i == 0 and day"> (up to ${day})</py:if></option>
+  </py:for>
+</select>
+
 
 <table py:def="social_table(items, with_source=False)" class="table table-condensed table-bordered table-striped">
     <tr>

--- a/ckanext/ga_report/templates/ga_report/publisher/index.html
+++ b/ckanext/ga_report/templates/ga_report/publisher/index.html
@@ -27,13 +27,9 @@
 
       <form class="form-inline" action="${h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='publishers')}" method="get">
           <div class="controls">
-          <select name="month">
-                <option value='' py:attrs="{'selected': 'selected' if not c.month else None}">All months</option>
 
-              <py:for each="val,desc in c.months">
-                <option value='${val}' py:attrs="{'selected': 'selected' if c.month == val else None}">${desc}</option>
-              </py:for>
-          </select>
+          ${month_selector(c.month, c.months, c.day)}
+
            <input class="btn button btn-primary" type='submit' value="Update"/>
           </div>
        </form>
@@ -41,14 +37,13 @@
        <table class="table table-condensed table-bordered table-striped">
 	 <tr>
 	   <th>Publisher</th>
-<!--	   <th>Dataset Visits</th>-->
 	   <th>Dataset Views</th>
 	 </tr>
         <py:for each="publisher, views, visits in c.top_publishers">
 	  <tr>
-	    <td>${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher', id=publisher.name))}
+	    <td>
+	        ${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher', id=publisher.name) + (("?month=" + c.month) if c.month else ''))}
 	    </td>
-<!--	    <td>${visits}</td> -->
 	    <td>${views}</td>
 	  </tr>
         </py:for>

--- a/ckanext/ga_report/templates/ga_report/publisher/read.html
+++ b/ckanext/ga_report/templates/ga_report/publisher/read.html
@@ -25,12 +25,9 @@
 
      <form class="form-inline" action="${h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='read')}" method="get">
         <div class="controls">
-          <select name="month">
-                <option value='' py:attrs="{'selected': 'selected' if not c.month else None}">All months</option>
-              <py:for each="val,desc in c.months">
-                <option value='${val}' py:attrs="{'selected': 'selected' if c.month == val else None}">${desc}</option>
-              </py:for>
-          </select>
+
+          ${month_selector(c.month, c.months, c.day)}
+
           <select name="publisher">
                 <option value='' py:attrs="{'selected': 'selected' if not c.publisher else None}">All publishers</option>
               <py:for each="val,desc in c.publishers">

--- a/ckanext/ga_report/templates/ga_report/site/index.html
+++ b/ckanext/ga_report/templates/ga_report/site/index.html
@@ -24,13 +24,9 @@
 
       <form class="form-inline" action="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='index')}" method="get">
           <div class="controls">
-          <select name="month">
-                <option value='' py:attrs="{'selected': 'selected' if not c.month else None}">All months</option>
 
-              <py:for each="val,desc in c.months">
-                <option value='${val}' py:attrs="{'selected': 'selected' if c.month == val else None}">${desc}</option>
-              </py:for>
-          </select>
+          ${month_selector(c.month, c.months, c.day)}
+
            <input class="btn button btn-primary" type='submit' value="Update"/>
           </div>
        </form>