Tidy logging.
Tidy logging.

--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -22,8 +22,9 @@
 
 
 def _month_details(cls):
+    '''Returns a list of all the month names'''
     months = []
-    vals = model.Session.query(cls.period_name).distinct().all()
+    vals = model.Session.query(cls.period_name).filter(cls.period_name!='All').distinct().all()
     for m in vals:
         months.append( (m[0], _get_month_name(m[0])))
     return sorted(months, key=operator.itemgetter(0), reverse=True)
@@ -236,43 +237,23 @@
         if count == -1:
             count = sys.maxint
 
-        q = model.Session.query(GA_Url)\
+        month = c.month or 'All'
+
+        q = model.Session.query(GA_Url,model.Package)\
+            .filter(model.Package.name==GA_Url.package_id)\
             .filter(GA_Url.url.like('/dataset/%'))
         if publisher:
             q = q.filter(GA_Url.department_id==publisher.name)
-        if c.month:
-            q = q.filter(GA_Url.period_name==c.month)
+        q = q.filter(GA_Url.period_name==month)
         q = q.order_by('ga_url.visitors::int desc')
-
-        if c.month:
-            top_packages = []
-            for entry in q.limit(count):
-                package_name = entry.url[len('/dataset/'):]
-                p = model.Package.get(package_name)
-                if p:
-                    top_packages.append((p, entry.pageviews, entry.visitors))
-                else:
-                    log.warning('Could not find package "%s"', package_name)
-        else:
-            ds = {}
-            for entry in q:
-                if len(ds) >= count:
-                    break
-                package_name = entry.url[len('/dataset/'):]
-                p = model.Package.get(package_name)
-                if p:
-                    if not p in ds:
-                        ds[p] = {'views': 0, 'visits': 0}
-                    ds[p]['views'] = ds[p]['views'] + int(entry.pageviews)
-                    ds[p]['visits'] = ds[p]['visits'] + int(entry.visitors)
-                else:
-                    log.warning('Could not find package "%s"', package_name)
-
-            results = []
-            for k, v in ds.iteritems():
-                results.append((k,v['views'],v['visits']))
-
-            top_packages = sorted(results, key=operator.itemgetter(1), reverse=True)
+        top_packages = []
+
+        for entry,package in q.limit(count):
+            if package:
+                top_packages.append((package, entry.pageviews, entry.visitors))
+            else:
+                log.warning('Could not find package associated package')
+
         return top_packages
 
     def read(self):
@@ -308,15 +289,12 @@
         else:
             c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
 
+        month = c.mnth or 'All'
         c.publisher_page_views = 0
         q = model.Session.query(GA_Url).\
             filter(GA_Url.url=='/publisher/%s' % c.publisher_name)
-        if c.month:
-            entry = q.filter(GA_Url.period_name==c.month).first()
-            c.publisher_page_views = entry.pageviews if entry else 0
-        else:
-            for e in q.all():
-                c.publisher_page_views = c.publisher_page_views  + int(e.pageviews)
+        entry = q.filter(GA_Url.period_name==c.month).first()
+        c.publisher_page_views = entry.pageviews if entry else 0
 
         c.top_packages = self._get_packages(c.publisher, 20)
 

--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -92,28 +92,34 @@
 
     def download_and_store(self, periods):
         for period_name, period_complete_day, start_date, end_date in periods:
+            log.info('Period "%s" (%s - %s)',
+                     self.get_full_period_name(period_name, period_complete_day),
+                     start_date.strftime('%Y-%m-%d'),
+                     end_date.strftime('%Y-%m-%d'))
+ 
             if self.delete_first:
-                log.info('Deleting existing Analytics for period "%s"',
+                log.info('Deleting existing Analytics for this period "%s"',
                          period_name)
                 ga_model.delete(period_name)
-            log.info('Downloading Analytics for period "%s" (%s - %s)',
-                     self.get_full_period_name(period_name, period_complete_day),
-                     start_date.strftime('%Y %m %d'),
-                     end_date.strftime('%Y %m %d'))
-
+
+            log.info('Downloading analytics for dataset views')
             data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+')
-            log.info('Storing Dataset Analytics for period "%s"',
-                     self.get_full_period_name(period_name, period_complete_day))
+
+            log.info('Storing dataset views (%i rows)', len(data.get('url')))
             self.store(period_name, period_complete_day, data, )
 
+            log.info('Downloading analytics for publisher views')
             data = self.download(start_date, end_date, '~/publisher/[a-z0-9-_]+')
-            log.info('Storing Publisher Analytics for period "%s"',
-                     self.get_full_period_name(period_name, period_complete_day))
+            log.info('Storing publisher views (%i rows)', len(data.get('url')))
             self.store(period_name, period_complete_day, data,)
 
+            log.info('Aggregating datasets by publisher')
             ga_model.update_publisher_stats(period_name) # about 30 seconds.
+
+            log.info('Downloading and storing analytics for site-wide stats')
             self.sitewide_stats( period_name )
 
+            log.info('Downloading and storing analytics for social networks')
             self.update_social_info(period_name, start_date, end_date)
 
     def update_social_info(self, period_name, start_date, end_date):
@@ -147,7 +153,7 @@
         start_date = start_date.strftime('%Y-%m-%d')
         end_date = end_date.strftime('%Y-%m-%d')
         query = 'ga:pagePath=%s$' % path
-        metrics = 'ga:uniquePageviews, ga:visitors'
+        metrics = 'ga:uniquePageviews, ga:visits'
         sort = '-ga:uniquePageviews'
 
         # Supported query params at
@@ -179,12 +185,10 @@
 
         start_date = '%s-01' % period_name
         end_date = '%s-%s' % (period_name, last_day_of_month)
-        print 'Sitewide_stats for %s (%s -> %s)' % (period_name, start_date, end_date)
-
         funcs = ['_totals_stats', '_social_stats', '_os_stats',
                  '_locale_stats', '_browser_stats', '_mobile_stats']
         for f in funcs:
-            print ' + Fetching %s stats' % f.split('_')[1]
+            log.info('Downloading analytics for %s' % f.split('_')[1])
             getattr(self, f)(start_date, end_date, period_name)
 
     def _get_results(result_data, f):
@@ -209,7 +213,7 @@
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:pageviewsPerVisit,ga:avgTimeOnSite,ga:percentNewVisits,ga:visitors',
+                                 metrics='ga:pageviewsPerVisit,ga:avgTimeOnSite,ga:percentNewVisits,ga:visits',
                                  max_results=10000,
                                  end_date=end_date).execute()
         result_data = results.get('rows')

--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -1,10 +1,10 @@
 import re
 import uuid
 
-from sqlalchemy import Table, Column, MetaData
+from sqlalchemy import Table, Column, MetaData, ForeignKey
 from sqlalchemy import types
 from sqlalchemy.sql import select
-from sqlalchemy.orm import mapper
+from sqlalchemy.orm import mapper, relation
 from sqlalchemy import func
 
 import ckan.model as model
@@ -14,8 +14,6 @@
     return unicode(uuid.uuid4())
 
 metadata = MetaData()
-
-
 
 class GA_Url(object):
 
@@ -32,6 +30,7 @@
                       Column('visitors', types.UnicodeText),
                       Column('url', types.UnicodeText),
                       Column('department_id', types.UnicodeText),
+                      Column('package_id', types.UnicodeText),
                 )
 mapper(GA_Url, url_table)
 
@@ -163,6 +162,10 @@
         url = _normalize_url(url)
         department_id = _get_department_id_of_url(url)
 
+        package = None
+        if url.startswith('/dataset/'):
+            package = url[len('/dataset/'):]
+
         # see if the row for this url & month is in the table already
         item = model.Session.query(GA_Url).\
             filter(GA_Url.period_name==period_name).\
@@ -172,6 +175,7 @@
             item.pageviews = views
             item.visitors = visitors
             item.department_id = department_id
+            item.package_id = package
             model.Session.add(item)
         else:
             # create the row
@@ -181,9 +185,31 @@
                       'url': url,
                       'pageviews': views,
                       'visitors': visitors,
-                      'department_id': department_id
+                      'department_id': department_id,
+                      'package_id': package
                      }
             model.Session.add(GA_Url(**values))
+
+        # We now need to recaculate the ALL time_period from the data we have
+        # Delete the old 'All'
+        old = model.Session.query(GA_Url).\
+            filter(GA_Url.period_name == "All").\
+            filter(GA_Url.url==url).delete()
+
+        items = model.Session.query(GA_Url).\
+            filter(GA_Url.period_name != "All").\
+            filter(GA_Url.url==url).all()
+        values = {'id': make_uuid(),
+                  'period_name': "All",
+                  'period_complete_day': "0",
+                  'url': url,
+                  'pageviews': sum([int(x.pageviews) for x in items]),
+                  'visitors': sum([int(x.visitors) for x in items]),
+                  'department_id': department_id,
+                  'package_id': package
+                 }
+        model.Session.add(GA_Url(**values))
+
         model.Session.commit()
 
 

--- a/ckanext/ga_report/helpers.py
+++ b/ckanext/ga_report/helpers.py
@@ -1,7 +1,9 @@
 import logging
 import operator
+
 import ckan.lib.base as base
 import ckan.model as model
+from ckan.logic import get_action
 
 from ckanext.ga_report.ga_model import GA_Url, GA_Publisher
 from ckanext.ga_report.controller import _get_publishers
@@ -39,25 +41,38 @@
                    order_by('ga_url.pageviews::int desc')
     num_top_datasets = top_datasets.count()
 
+    dataset = None
     if num_top_datasets:
-        dataset = None
+        count = 0
         while not dataset:
             rand = random.randrange(0, min(top, num_top_datasets))
             ga_url = top_datasets[rand]
             dataset = model.Package.get(ga_url.url[len('/dataset/'):])
             if dataset and not dataset.state == 'active':
                 dataset = None
-    else:
+                count += 1
+                if count > 10:
+                    break
+    if not dataset:
+        # fallback
         dataset = model.Session.query(model.Package)\
                   .filter_by(state='active').first()
-    publisher = dataset.get_groups('publisher')[0]
-    return {
-        'dataset': dataset,
-        'publisher': publisher
-    }
+        if not dataset:
+            return None
+    dataset_dict = get_action('package_show')({'model': model,
+                                               'session': model.Session},
+                                              {'id':dataset.id})
+    return dataset_dict
 
 def single_popular_dataset_html(top=20):
-    context = single_popular_dataset(top)
+    dataset_dict = single_popular_dataset(top)
+    groups = package.get('groups', [])
+    publishers = [ g for g in groups if g.get('type') == 'publisher' ]
+    publisher = publishers[0] if publishers else {'name':'', 'title': ''}
+    context = {
+        'dataset': dataset_dict,
+        'publisher': publisher_dict
+        }
     return base.render_snippet('ga_report/ga_popular_single.html', **context)
 
 

--- a/ckanext/ga_report/templates/ga_report/ga_popular_single.html
+++ b/ckanext/ga_report/templates/ga_report/ga_popular_single.html
@@ -8,14 +8,14 @@
     <h2>Featured dataset</h2>
 
     <div class="dataset-summary boxed">
-          <a class="dataset-header" href="${h.url_for(controller='package', action='read', id=dataset.name)}">
-            <h3>${dataset.title}</h3>
+          <a class="dataset-header" href="${h.url_for(controller='package', action='read', id=dataset['name'])}">
+            <h3>${dataset['title']}</h3>
           </a>
           <h4>
             <strong>Publisher</strong> :
-            <a href="/publisher/${publisher.name}">${publisher.title}</a>
+            <a href="/publisher/${publisher['name']}">${publisher['title']}</a>
           </h4>
-          <div>${h.truncate(dataset.notes, length=200, whole_word=True)}</div>
+          <div>${h.truncate(dataset['notes_rendered'], length=200, whole_word=True)}</div>
     </div>
     <div>
       <a href="${h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='read')}" class="btn">Other popular datasets</a>

--- a/ckanext/ga_report/templates/ga_report/notes.html
+++ b/ckanext/ga_report/templates/ga_report/notes.html
@@ -6,11 +6,11 @@
     <li class="widget-container boxed widget_text">
       <h4>Notes</h4>
       <ul>
-          <li>'Views' is the number of sessions during which that page was viewed one or more times ('Unique Pageviews').</li>
-<!--          <li>'Visits' is the number of individual sessions initiated by all the visitors to your site, counted once for each visitor for each session.</li>-->
-          <li>'Visitors' is the number of unique users visiting the site (whether once or more times).</li>
+          <li>"Views" is the number of sessions during which the page was viewed one or more times (technically known as "unique pageviews").</li>
+          <li>"Visits" is the number of unique user visits to a page, counted once for each visitor for each session.</li>
+<!--!          <li>"Visitors" is the number of unique users visiting the site (whether once or more times).</li> -->
           <li>These usage statistics are confined to users with javascript enabled, which excludes web crawlers and API calls.</li>
-          <li>The results for only small numbers of views/visits are not shown. Where these relate to site pages, then they are available in full in the CSV download. Where these relate to users' web browser information, they are not disclosed, for privacy reasons.</li>
+          <li>The results are not shown when the number of views/visits is tiny. Where these relate to site pages, results are available in full in the CSV download. Where these relate to users' web browser information, results are not disclosed, for privacy reasons.</li>
       </ul>
     </li>
 </html>