Change the entry for the current month to include the date that reports are available to.
Change the entry for the current month to include the date that reports are available to.

- Requires a migration
alter table ga_stat add column period_complete_day text;
- Makes sure that GA_Stat entries have the period_complete_day set
- Cleans up the template and moves the drop-down to the util template

file:a/README.rst -> file:b/README.rst
--- a/README.rst
+++ b/README.rst
@@ -32,11 +32,11 @@
 
       googleanalytics.id = UA-1010101-1
       googleanalytics.account = Account name (e.g. data.gov.uk, see top level item at https://www.google.com/analytics)
+      googleanalytics.token.filepath = ~/pyenv/token.dat
       ga-report.period = monthly
-      ga-report.bounce_url = /data
+      ga-report.bounce_url = /
 
-   The ga-report.bounce_url specifies the path to use when calculating bounces. For DGU this is /data
-   but you may want to set this to /.
+   The ga-report.bounce_url specifies a particular path to record the bounce rate for. Typically it is / (the home page).
 
 3. Set up this extension's database tables using a paster command. (Ensure your CKAN pyenv is still activated, run the command from ``src/ckanext-ga-report``, alter the ``--config`` option to point to your site config file)::
 
@@ -83,13 +83,17 @@
 
     $ paster getauthtoken --config=../ckan/development.ini
 
+Now ensure you reference the correct path to your token.dat in your CKAN config file (e.g. development.ini)::
+
+    googleanalytics.token.filepath = ~/pyenv/token.dat
+
 
 Tutorial
 --------
 
 Download some GA data and store it in CKAN's database. (Ensure your CKAN pyenv is still activated, run the command from ``src/ckanext-ga-report``, alter the ``--config`` option to point to your site config file) and specifying the name of your auth file (token.dat by default) from the previous step::
 
-    $ paster loadanalytics token.dat latest --config=../ckan/development.ini
+    $ paster loadanalytics latest --config=../ckan/development.ini
 
 The value after the token file is how much data you want to retrieve, this can be
 

--- a/ckanext/ga_report/command.py
+++ b/ckanext/ga_report/command.py
@@ -1,5 +1,8 @@
 import logging
 import datetime
+import os
+
+from pylons import config
 
 from ckan.lib.cli import CkanCommand
 # No other CKAN imports allowed until _load_config is run,
@@ -58,20 +61,17 @@
     """Get data from Google Analytics API and save it
     in the ga_model
 
-    Usage: paster loadanalytics <tokenfile> <time-period>
+    Usage: paster loadanalytics <time-period>
 
-    Where <tokenfile> is the name of the auth token file from
-    the getauthtoken step.
-
-    And where <time-period> is:
+    Where <time-period> is:
         all         - data for all time
         latest      - (default) just the 'latest' data
         YYYY-MM     - just data for the specific month
     """
     summary = __doc__.split('\n')[0]
     usage = __doc__
-    max_args = 2
-    min_args = 1
+    max_args = 1
+    min_args = 0
 
     def __init__(self, name):
         super(LoadAnalytics, self).__init__(name)
@@ -80,6 +80,11 @@
                                default=False,
                                dest='delete_first',
                                help='Delete data for the period first')
+        self.parser.add_option('-s', '--skip_url_stats',
+                               action='store_true',
+                               default=False,
+                               dest='skip_url_stats',
+                               help='Skip the download of URL data - just do site-wide stats')
 
     def command(self):
         self._load_config()
@@ -87,18 +92,25 @@
         from download_analytics import DownloadAnalytics
         from ga_auth import (init_service, get_profile_id)
 
+        ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', ''))
+        if not ga_token_filepath:
+            print 'ERROR: In the CKAN config you need to specify the filepath of the ' \
+                  'Google Analytics token file under key: googleanalytics.token.filepath'
+            return
+
         try:
-            svc = init_service(self.args[0], None)
+            svc = init_service(ga_token_filepath, None)
         except TypeError:
             print ('Have you correctly run the getauthtoken task and '
-                   'specified the correct token file?')
+                   'specified the correct token file in the CKAN config under '
+                   '"googleanalytics.token.filepath"?')
             return
 
         downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc),
-                                       delete_first=self.options.delete_first)
+                                       delete_first=self.options.delete_first,
+                                       skip_url_stats=self.options.skip_url_stats)
 
-        time_period = self.args[1] if self.args and len(self.args) > 1 \
-            else 'latest'
+        time_period = self.args[0] if self.args else 'latest'
         if time_period == 'all':
             downloader.all_()
         elif time_period == 'latest':

--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -22,12 +22,29 @@
 
 
 def _month_details(cls):
-    '''Returns a list of all the month names'''
+    '''
+    Returns a list of all the periods for which we have data, unfortunately
+    knows too much about the type of the cls being passed as GA_Url has a
+    more complex query
+
+    This may need extending if we add a period_name to the stats
+    '''
     months = []
-    vals = model.Session.query(cls.period_name).filter(cls.period_name!='All').distinct().all()
+    day = None
+
+    vals = model.Session.query(cls.period_name,cls.period_complete_day)\
+        .filter(cls.period_name!='All').distinct(cls.period_name)\
+        .order_by("period_name desc").all()
+    if vals and vals[0][1]:
+        day = int(vals[0][1])
+        ordinal = 'th' if 11 <= day <= 13 \
+            else {1:'st',2:'nd',3:'rd'}.get(day % 10, 'th')
+        day = "{day}{ordinal}".format(day=day, ordinal=ordinal)
+
     for m in vals:
         months.append( (m[0], _get_month_name(m[0])))
-    return sorted(months, key=operator.itemgetter(0), reverse=True)
+
+    return months, day
 
 
 class GaReport(BaseController):
@@ -56,7 +73,7 @@
 
         # Get the month details by fetching distinct values and determining the
         # month names from the values.
-        c.months = _month_details(GA_Stat)
+        c.months, c.day = _month_details(GA_Stat)
 
         # Work out which month to show, based on query params of the first item
         c.month_desc = 'all months'
@@ -71,13 +88,13 @@
         entries = q.order_by('ga_stat.key').all()
 
         def clean_key(key, val):
-            if key in ['Average time on site', 'Pages per visit', 'New visits', 'Bounces']:
+            if key in ['Average time on site', 'Pages per visit', 'New visits', 'Bounce rate (home page)']:
                 val =  "%.2f" % round(float(val), 2)
                 if key == 'Average time on site':
                     mins, secs = divmod(float(val), 60)
                     hours, mins = divmod(mins, 60)
                     val = '%02d:%02d:%02d (%s seconds) ' % (hours, mins, secs, val)
-                if key in ['New visits','Bounces']:
+                if key in ['New visits','Bounce rate (home page)']:
                     val = "%s%%" % val
             if key in ['Total page views', 'Total visits']:
                 val = int(val)
@@ -220,7 +237,7 @@
 
         # Get the month details by fetching distinct values and determining the
         # month names from the values.
-        c.months = _month_details(GA_Url)
+        c.months, c.day = _month_details(GA_Url)
 
         # Work out which month to show, based on query params of the first item
         c.month = request.params.get('month', '')
@@ -232,7 +249,7 @@
         return render('ga_report/publisher/index.html')
 
     def _get_packages(self, publisher=None, count=-1):
-        '''Returns the datasets in order of visits'''
+        '''Returns the datasets in order of views'''
         if count == -1:
             count = sys.maxint
 
@@ -244,11 +261,11 @@
         if publisher:
             q = q.filter(GA_Url.department_id==publisher.name)
         q = q.filter(GA_Url.period_name==month)
-        q = q.order_by('ga_url.visitors::int desc')
+        q = q.order_by('ga_url.pageviews::int desc')
         top_packages = []
         for entry,package in q.limit(count):
             if package:
-                top_packages.append((package, entry.pageviews, entry.visitors))
+                top_packages.append((package, entry.pageviews, entry.visits))
             else:
                 log.warning('Could not find package associated package')
 
@@ -278,7 +295,7 @@
 
         # Get the month details by fetching distinct values and determining the
         # month names from the values.
-        c.months = _month_details(GA_Url)
+        c.months, c.day = _month_details(GA_Url)
 
         # Work out which month to show, based on query params of the first item
         c.month = request.params.get('month', '')
@@ -306,13 +323,13 @@
     month = c.month or 'All'
     connection = model.Session.connection()
     q = """
-        select department_id, sum(pageviews::int) views, sum(visitors::int) visits
+        select department_id, sum(pageviews::int) views, sum(visits::int) visits
         from ga_url
         where department_id <> ''
           and package_id <> ''
           and url like '/dataset/%%'
           and period_name=%s
-        group by department_id order by visits desc
+        group by department_id order by views desc
         """
     if limit:
         q = q + " limit %s;" % (limit)
@@ -329,7 +346,7 @@
 def _get_publishers():
     '''
     Returns a list of all publishers. Each item is a tuple:
-      (names, title)
+      (name, title)
     '''
     publishers = []
     for pub in model.Session.query(model.Group).\

--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -17,11 +17,13 @@
 class DownloadAnalytics(object):
     '''Downloads and stores analytics info'''
 
-    def __init__(self, service=None, profile_id=None, delete_first=False):
+    def __init__(self, service=None, profile_id=None, delete_first=False,
+                 skip_url_stats=False):
         self.period = config['ga-report.period']
         self.service = service
         self.profile_id = profile_id
         self.delete_first = delete_first
+        self.skip_url_stats = skip_url_stats
 
     def specific_month(self, date):
         import calendar
@@ -92,33 +94,41 @@
 
     def download_and_store(self, periods):
         for period_name, period_complete_day, start_date, end_date in periods:
+            log.info('Period "%s" (%s - %s)',
+                     self.get_full_period_name(period_name, period_complete_day),
+                     start_date.strftime('%Y-%m-%d'),
+                     end_date.strftime('%Y-%m-%d'))
+
             if self.delete_first:
-                log.info('Deleting existing Analytics for period "%s"',
+                log.info('Deleting existing Analytics for this period "%s"',
                          period_name)
                 ga_model.delete(period_name)
-            log.info('Downloading Analytics for period "%s" (%s - %s)',
-                     self.get_full_period_name(period_name, period_complete_day),
-                     start_date.strftime('%Y %m %d'),
-                     end_date.strftime('%Y %m %d'))
-
-            # Clean up the entries before we run this
-            ga_model.pre_update_url_stats(period_name)
-
-            accountName = config.get('googleanalytics.account')
-
-            data = self.download(start_date, end_date, '~/%s/dataset/[a-z0-9-_]+' % accountName)
-            log.info('Storing Dataset Analytics for period "%s"',
-                     self.get_full_period_name(period_name, period_complete_day))
-            self.store(period_name, period_complete_day, data, )
-
-            data = self.download(start_date, end_date, '~/%s/publisher/[a-z0-9-_]+' % accountName)
-            log.info('Storing Publisher Analytics for period "%s"',
-                     self.get_full_period_name(period_name, period_complete_day))
-            self.store(period_name, period_complete_day, data,)
-
-            ga_model.update_publisher_stats(period_name) # about 30 seconds.
-            self.sitewide_stats( period_name )
-
+
+            if not self.skip_url_stats:
+                # Clean out old url data before storing the new
+                ga_model.pre_update_url_stats(period_name)
+
+                accountName = config.get('googleanalytics.account')
+
+                log.info('Downloading analytics for dataset views')
+                data = self.download(start_date, end_date, '~/%s/dataset/[a-z0-9-_]+' % accountName)
+
+                log.info('Storing dataset views (%i rows)', len(data.get('url')))
+                self.store(period_name, period_complete_day, data, )
+
+                log.info('Downloading analytics for publisher views')
+                data = self.download(start_date, end_date, '~/%s/publisher/[a-z0-9-_]+' % accountName)
+
+                log.info('Storing publisher views (%i rows)', len(data.get('url')))
+                self.store(period_name, period_complete_day, data,)
+
+                log.info('Aggregating datasets by publisher')
+                ga_model.update_publisher_stats(period_name) # about 30 seconds.
+
+            log.info('Downloading and storing analytics for site-wide stats')
+            self.sitewide_stats( period_name, period_complete_day )
+
+            log.info('Downloading and storing analytics for social networks')
             self.update_social_info(period_name, start_date, end_date)
 
 
@@ -152,8 +162,8 @@
         start_date = start_date.strftime('%Y-%m-%d')
         end_date = end_date.strftime('%Y-%m-%d')
         query = 'ga:pagePath=%s$' % path
-        metrics = 'ga:uniquePageviews, ga:visits'
-        sort = '-ga:uniquePageviews'
+        metrics = 'ga:pageviews, ga:visits'
+        sort = '-ga:pageviews'
 
         # Supported query params at
         # https://developers.google.com/analytics/devguides/reporting/core/v3/reference
@@ -170,8 +180,12 @@
         packages = []
         for entry in results.get('rows'):
             (loc,pageviews,visits) = entry
-            url = _normalize_url('http:/' + loc)
+            url = _normalize_url('http:/' + loc) # strips off domain e.g. www.data.gov.uk or data.gov.uk
+
             if not url.startswith('/dataset/') and not url.startswith('/publisher/'):
+                # filter out strays like:
+                # /data/user/login?came_from=http://data.gov.uk/dataset/os-code-point-open
+                # /403.html?page=/about&from=http://data.gov.uk/publisher/planning-inspectorate
                 continue
             packages.append( (url, pageviews, visits,) ) # Temporary hack
         return dict(url=packages)
@@ -180,20 +194,18 @@
         if 'url' in data:
             ga_model.update_url_stats(period_name, period_complete_day, data['url'])
 
-    def sitewide_stats(self, period_name):
+    def sitewide_stats(self, period_name, period_complete_day):
         import calendar
         year, month = period_name.split('-')
         _, last_day_of_month = calendar.monthrange(int(year), int(month))
 
         start_date = '%s-01' % period_name
         end_date = '%s-%s' % (period_name, last_day_of_month)
-        print 'Sitewide_stats for %s (%s -> %s)' % (period_name, start_date, end_date)
-
         funcs = ['_totals_stats', '_social_stats', '_os_stats',
                  '_locale_stats', '_browser_stats', '_mobile_stats']
         for f in funcs:
-            print ' + Fetching %s stats' % f.split('_')[1]
-            getattr(self, f)(start_date, end_date, period_name)
+            log.info('Downloading analytics for %s' % f.split('_')[1])
+            getattr(self, f)(start_date, end_date, period_name, period_complete_day)
 
     def _get_results(result_data, f):
         data = {}
@@ -202,17 +214,18 @@
             data[key] = data.get(key,0) + result[1]
         return data
 
-    def _totals_stats(self, start_date, end_date, period_name):
+    def _totals_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Fetches distinct totals, total pageviews etc """
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:uniquePageviews',
-                                 sort='-ga:uniquePageviews',
-                                 max_results=10000,
-                                 end_date=end_date).execute()
-        result_data = results.get('rows')
-        ga_model.update_sitewide_stats(period_name, "Totals", {'Total page views': result_data[0][0]})
+                                 metrics='ga:pageviews',
+                                 sort='-ga:pageviews',
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+        result_data = results.get('rows')
+        ga_model.update_sitewide_stats(period_name, "Totals", {'Total page views': result_data[0][0]},
+            period_complete_day)
 
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
@@ -227,36 +240,39 @@
             'New visits': result_data[0][2],
             'Total visits': result_data[0][3],
         }
-        ga_model.update_sitewide_stats(period_name, "Totals", data)
-
-        # Bounces from /data. This url is specified in configuration because
-        # for DGU we don't want /.
-        path = config.get('ga-report.bounce_url','/')
-        print path
-        results = self.service.data().ga().get(
-                                 ids='ga:' + self.profile_id,
-                                 filters='ga:pagePath=~%s$' % (path,),
-                                 start_date=start_date,
-                                 metrics='ga:bounces,ga:uniquePageviews',
+        ga_model.update_sitewide_stats(period_name, "Totals", data, period_complete_day)
+
+        # Bounces from / or another configurable page.
+        path = '/%s%s' % (config.get('googleanalytics.account'),
+                          config.get('ga-report.bounce_url', '/'))
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 filters='ga:pagePath==%s' % (path,),
+                                 start_date=start_date,
+                                 metrics='ga:bounces,ga:pageviews',
                                  dimensions='ga:pagePath',
                                  max_results=10000,
                                  end_date=end_date).execute()
         result_data = results.get('rows')
-        for results in result_data:
-            if results[0] == path:
-                bounce, total = [float(x) for x in results[1:]]
-                pct = 100 * bounce/total
-                print "%d bounces from %d total == %s" % (bounce, total, pct)
-                ga_model.update_sitewide_stats(period_name, "Totals", {'Bounces': pct})
-
-
-    def _locale_stats(self, start_date, end_date, period_name):
+        if not result_data or len(result_data) != 1:
+            log.error('Could not pinpoint the bounces for path: %s. Got results: %r',
+                      path, result_data)
+            return
+        results = result_data[0]
+        bounces, total = [float(x) for x in result_data[0][1:]]
+        pct = 100 * bounces/total
+        log.info('%d bounces from %d total == %s', bounces, total, pct)
+        ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': pct},
+            period_complete_day)
+
+
+    def _locale_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Fetches stats about language and country """
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:uniquePageviews',
-                                 sort='-ga:uniquePageviews',
+                                 metrics='ga:pageviews',
+                                 sort='-ga:pageviews',
                                  dimensions="ga:language,ga:country",
                                  max_results=10000,
                                  end_date=end_date).execute()
@@ -265,22 +281,22 @@
         for result in result_data:
             data[result[0]] = data.get(result[0], 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Languages", data)
+        ga_model.update_sitewide_stats(period_name, "Languages", data, period_complete_day)
 
         data = {}
         for result in result_data:
             data[result[1]] = data.get(result[1], 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Country", data)
-
-
-    def _social_stats(self, start_date, end_date, period_name):
+        ga_model.update_sitewide_stats(period_name, "Country", data, period_complete_day)
+
+
+    def _social_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Finds out which social sites people are referred from """
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:uniquePageviews',
-                                 sort='-ga:uniquePageviews',
+                                 metrics='ga:pageviews',
+                                 sort='-ga:pageviews',
                                  dimensions="ga:socialNetwork,ga:referralPath",
                                  max_results=10000,
                                  end_date=end_date).execute()
@@ -290,16 +306,16 @@
             if not result[0] == '(not set)':
                 data[result[0]] = data.get(result[0], 0) + int(result[2])
         self._filter_out_long_tail(data, 3)
-        ga_model.update_sitewide_stats(period_name, "Social sources", data)
-
-
-    def _os_stats(self, start_date, end_date, period_name):
+        ga_model.update_sitewide_stats(period_name, "Social sources", data, period_complete_day)
+
+
+    def _os_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Operating system stats """
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:uniquePageviews',
-                                 sort='-ga:uniquePageviews',
+                                 metrics='ga:pageviews',
+                                 sort='-ga:pageviews',
                                  dimensions="ga:operatingSystem,ga:operatingSystemVersion",
                                  max_results=10000,
                                  end_date=end_date).execute()
@@ -308,23 +324,23 @@
         for result in result_data:
             data[result[0]] = data.get(result[0], 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Operating Systems", data)
+        ga_model.update_sitewide_stats(period_name, "Operating Systems", data, period_complete_day)
 
         data = {}
         for result in result_data:
             if int(result[2]) >= MIN_VIEWS:
                 key = "%s %s" % (result[0],result[1])
                 data[key] = result[2]
-        ga_model.update_sitewide_stats(period_name, "Operating Systems versions", data)
-
-
-    def _browser_stats(self, start_date, end_date, period_name):
+        ga_model.update_sitewide_stats(period_name, "Operating Systems versions", data, period_complete_day)
+
+
+    def _browser_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Information about browsers and browser versions """
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:uniquePageviews',
-                                 sort='-ga:uniquePageviews',
+                                 metrics='ga:pageviews',
+                                 sort='-ga:pageviews',
                                  dimensions="ga:browser,ga:browserVersion",
                                  max_results=10000,
                                  end_date=end_date).execute()
@@ -335,14 +351,14 @@
         for result in result_data:
             data[result[0]] = data.get(result[0], 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Browsers", data)
+        ga_model.update_sitewide_stats(period_name, "Browsers", data, period_complete_day)
 
         data = {}
         for result in result_data:
             key = "%s %s" % (result[0], self._filter_browser_version(result[0], result[1]))
             data[key] = data.get(key, 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Browser versions", data)
+        ga_model.update_sitewide_stats(period_name, "Browser versions", data, period_complete_day)
 
     @classmethod
     def _filter_browser_version(cls, browser, version_str):
@@ -366,14 +382,14 @@
                 ver = ver[0] + ver[1] + 'X' * num_hidden_digits
         return ver
 
-    def _mobile_stats(self, start_date, end_date, period_name):
+    def _mobile_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Info about mobile devices """
 
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:uniquePageviews',
-                                 sort='-ga:uniquePageviews',
+                                 metrics='ga:pageviews',
+                                 sort='-ga:pageviews',
                                  dimensions="ga:mobileDeviceBranding, ga:mobileDeviceInfo",
                                  max_results=10000,
                                  end_date=end_date).execute()
@@ -383,13 +399,13 @@
         for result in result_data:
             data[result[0]] = data.get(result[0], 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Mobile brands", data)
+        ga_model.update_sitewide_stats(period_name, "Mobile brands", data, period_complete_day)
 
         data = {}
         for result in result_data:
             data[result[1]] = data.get(result[1], 0) + int(result[2])
         self._filter_out_long_tail(data, MIN_VIEWS)
-        ga_model.update_sitewide_stats(period_name, "Mobile devices", data)
+        ga_model.update_sitewide_stats(period_name, "Mobile devices", data, period_complete_day)
 
     @classmethod
     def _filter_out_long_tail(cls, data, threshold=10):

--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -9,6 +9,8 @@
 
 import ckan.model as model
 from ckan.lib.base import *
+
+log = __import__('logging').getLogger(__name__)
 
 def make_uuid():
     return unicode(uuid.uuid4())
@@ -27,7 +29,7 @@
                       Column('period_name', types.UnicodeText),
                       Column('period_complete_day', types.Integer),
                       Column('pageviews', types.UnicodeText),
-                      Column('visitors', types.UnicodeText),
+                      Column('visits', types.UnicodeText),
                       Column('url', types.UnicodeText),
                       Column('department_id', types.UnicodeText),
                       Column('package_id', types.UnicodeText),
@@ -45,6 +47,7 @@
                   Column('id', types.UnicodeText, primary_key=True,
                          default=make_uuid),
                   Column('period_name', types.UnicodeText),
+                  Column('period_complete_day', types.UnicodeText),
                   Column('stat_name', types.UnicodeText),
                   Column('key', types.UnicodeText),
                   Column('value', types.UnicodeText), )
@@ -63,7 +66,7 @@
                   Column('period_name', types.UnicodeText),
                   Column('publisher_name', types.UnicodeText),
                   Column('views', types.UnicodeText),
-                  Column('visitors', types.UnicodeText),
+                  Column('visits', types.UnicodeText),
                   Column('toplevel', types.Boolean, default=False),
                   Column('subpublishercount', types.Integer, default=0),
                   Column('parent', types.UnicodeText),
@@ -111,9 +114,7 @@
     >>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices')
     '/dataset/weekly_fuel_prices'
     '''
-    # Deliberately leaving a /
-    url = url.replace('http:/','')
-    return '/' + '/'.join(url.split('/')[2:])
+    return '/' + '/'.join(url.split('/')[3:])
 
 
 def _get_package_and_publisher(url):
@@ -134,7 +135,7 @@
             return None, publisher_match.groups()[0]
     return None, None
 
-def update_sitewide_stats(period_name, stat_name, data):
+def update_sitewide_stats(period_name, stat_name, data, period_complete_day):
     for k,v in data.iteritems():
         item = model.Session.query(GA_Stat).\
             filter(GA_Stat.period_name==period_name).\
@@ -144,11 +145,13 @@
             item.period_name = period_name
             item.key = k
             item.value = v
+            item.period_complete_day = period_complete_day
             model.Session.add(item)
         else:
             # create the row
             values = {'id': make_uuid(),
                      'period_name': period_name,
+                     'period_complete_day': period_complete_day,
                      'key': k,
                      'value': v,
                      'stat_name': stat_name
@@ -157,25 +160,6 @@
         model.Session.commit()
 
 
-def update_url_stat_totals(period_name):
-
-    """
-        items = model.Session.query(GA_Url).\
-            filter(GA_Url.period_name != "All").\
-            filter(GA_Url.url==url).all()
-        values = {'id': make_uuid(),
-                  'period_name': "All",
-                  'period_complete_day': "0",
-                  'url': url,
-                  'pageviews': sum([int(x.pageviews) for x in items]),
-                  'visitors': sum([int(x.visitors) for x in items]),
-                  'department_id': department_id,
-                  'package_id': package
-                 }
-        model.Session.add(GA_Url(**values))
-        model.Session.commit()
-    """
-
 def pre_update_url_stats(period_name):
     model.Session.query(GA_Url).\
             filter(GA_Url.period_name==period_name).delete()
@@ -184,16 +168,25 @@
 
 
 def update_url_stats(period_name, period_complete_day, url_data):
-
-    for url, views, visitors in url_data:
+    '''
+    Given a list of urls and number of hits for each during a given period,
+    stores them in GA_Url under the period and recalculates the totals for
+    the 'All' period.
+    '''
+    for url, views, visits in url_data:
         package, publisher = _get_package_and_publisher(url)
+
 
         item = model.Session.query(GA_Url).\
             filter(GA_Url.period_name==period_name).\
             filter(GA_Url.url==url).first()
         if item:
             item.pageviews = item.pageviews + views
-            item.visitors = item.visitors + visitors
+            item.visits = item.visits + visits
+            if not item.package_id:
+                item.package_id = package
+            if not item.department_id:
+                item.department_id = publisher
             model.Session.add(item)
         else:
             values = {'id': make_uuid(),
@@ -201,7 +194,7 @@
                       'period_complete_day': period_complete_day,
                       'url': url,
                       'pageviews': views,
-                      'visitors': visitors,
+                      'visits': visits,
                       'department_id': publisher,
                       'package_id': package
                      }
@@ -209,6 +202,13 @@
         model.Session.commit()
 
         if package:
+            old_pageviews, old_visits = 0, 0
+            old = model.Session.query(GA_Url).\
+                filter(GA_Url.period_name=='All').\
+                filter(GA_Url.url==url).all()
+            old_pageviews = sum([int(o.pageviews) for o in old])
+            old_visits = sum([int(o.visits) for o in old])
+
             entries = model.Session.query(GA_Url).\
                 filter(GA_Url.period_name!='All').\
                 filter(GA_Url.url==url).all()
@@ -216,15 +216,14 @@
                       'period_name': 'All',
                       'period_complete_day': 0,
                       'url': url,
-                      'pageviews': sum([int(e.pageviews) for e in entries]),
-                      'visitors': sum([int(e.visitors) for e in entries]),
+                      'pageviews': sum([int(e.pageviews) for e in entries]) + old_pageviews,
+                      'visits': sum([int(e.visits or 0) for e in entries]) + old_visits,
                       'department_id': publisher,
                       'package_id': package
                      }
 
             model.Session.add(GA_Url(**values))
             model.Session.commit()
-
 
 
 
@@ -268,7 +267,7 @@
         filter(model.Group.type=='publisher').\
         filter(model.Group.state=='active').all()
     for publisher in publishers:
-        views, visitors, subpub = update_publisher(period_name, publisher, publisher.name)
+        views, visits, subpub = update_publisher(period_name, publisher, publisher.name)
         parent, parents = '', publisher.get_groups('publisher')
         if parents:
             parent = parents[0].name
@@ -277,7 +276,7 @@
             filter(GA_Publisher.publisher_name==publisher.name).first()
         if item:
             item.views = views
-            item.visitors = visitors
+            item.visits = visits
             item.publisher_name = publisher.name
             item.toplevel = publisher in toplevel
             item.subpublishercount = subpub
@@ -289,7 +288,7 @@
                      'period_name': period_name,
                      'publisher_name': publisher.name,
                      'views': views,
-                     'visitors': visitors,
+                     'visits': visits,
                      'toplevel': publisher in toplevel,
                      'subpublishercount': subpub,
                      'parent': p