Changes to support % of bounces from /
Changes to support % of bounces from /

file:a/README.rst -> file:b/README.rst
--- a/README.rst
+++ b/README.rst
@@ -33,6 +33,10 @@
       googleanalytics.id = UA-1010101-1
       googleanalytics.account = Account name (i.e. data.gov.uk, see top level item at https://www.google.com/analytics)
       ga-report.period = monthly
+      ga-report.bounce_url = /data
+
+   The ga-report.bounce_url specifies the path to use when calculating bounces. For DGU this is /data
+   but you may want to set this to /.
 
    Note that your credentials will be readable by system administrators on your server. Rather than use sensitive account details, it is suggested you give access to the GA account to a new Google account that you create just for this purpose.
 

--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -70,15 +70,15 @@
         entries = q.order_by('ga_stat.key').all()
 
         def clean_key(key, val):
-            if key in ['Average time on site', 'Pages per visit', 'New visits']:
+            if key in ['Average time on site', 'Pages per visit', 'New visits', 'Bounces']:
                 val =  "%.2f" % round(float(val), 2)
                 if key == 'Average time on site':
                     mins, secs = divmod(float(val), 60)
                     hours, mins = divmod(mins, 60)
                     val = '%02d:%02d:%02d (%s seconds) ' % (hours, mins, secs, val)
-                if key == 'New visits':
+                if key in ['New visits','Bounces']:
                     val = "%s%%" % val
-            if key in ['Bounces', 'Total page views', 'Total visits']:
+            if key in ['Total page views', 'Total visits']:
                 val = int(val)
 
             return key, val
@@ -93,11 +93,12 @@
             for e in entries:
                 d[e.key].append(float(e.value))
             for k, v in d.iteritems():
-                if k in ['Bounces', 'Total page views', 'Total visits']:
+                if k in ['Total page views', 'Total visits']:
                     v = sum(v)
                 else:
                     v = float(sum(v))/len(v)
                 key, val = clean_key(k,v)
+
                 c.global_totals.append((key, val))
                 c.global_totals = sorted(c.global_totals, key=operator.itemgetter(0))
 
@@ -172,17 +173,13 @@
                 entries.append((key,val,))
             entries = sorted(entries, key=operator.itemgetter(1), reverse=True)
 
-            def percent(num, total):
-                p = 100 * float(num)/float(total)
-                return "%.2f%%" % round(p, 2)
-
             # Get the total for each set of values and then set the value as
             # a percentage of the total
             if k == 'Social sources':
                 total = sum([x for n,x in c.global_totals if n == 'Total visits'])
             else:
                 total = sum([num for _,num in entries])
-            setattr(c, v, [(k,percent(v,total)) for k,v in entries ])
+            setattr(c, v, [(k,_percent(v,total)) for k,v in entries ])
 
         return render('ga_report/site/index.html')
 
@@ -201,7 +198,7 @@
         writer = csv.writer(response)
         writer.writerow(["Publisher", "Views", "Visits", "Period Name"])
 
-        for publisher,view,visit in self._get_publishers(None):
+        for publisher,view,visit in _get_publishers(None):
             writer.writerow([publisher.title.encode('utf-8'),
                              view,
                              visit,
@@ -244,38 +241,10 @@
         if c.month:
             c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
 
-        c.top_publishers = self._get_publishers()
+        c.top_publishers = _get_publishers()
 
         return render('ga_report/publisher/index.html')
 
-    def _get_publishers(self, limit=20):
-        connection = model.Session.connection()
-        q = """
-            select department_id, sum(pageviews::int) views, sum(visitors::int) visits
-            from ga_url
-            where department_id <> ''"""
-        if c.month:
-            q = q + """
-                    and period_name=%s
-            """
-        q = q + """
-                group by department_id order by views desc
-            """
-        if limit:
-            q = q + " limit %s;" % (limit)
-
-        # Add this back (before and period_name =%s) if you want to ignore publisher
-        # homepage views
-        # and not url like '/publisher/%%'
-
-        top_publishers = []
-        res = connection.execute(q, c.month)
-
-        for row in res:
-            g = model.Group.get(row[0])
-            if g:
-                top_publishers.append((g, row[1], row[2]))
-        return top_publishers
 
     def _get_packages(self, publisher, count=-1):
         if count == -1:
@@ -345,3 +314,37 @@
 
         return render('ga_report/publisher/read.html')
 
+def _get_publishers(limit=20):
+    connection = model.Session.connection()
+    q = """
+        select department_id, sum(pageviews::int) views, sum(visitors::int) visits
+        from ga_url
+        where department_id <> ''"""
+    if c.month:
+        q = q + """
+                and period_name=%s
+        """
+    q = q + """
+            group by department_id order by views desc
+        """
+    if limit:
+        q = q + " limit %s;" % (limit)
+
+    # Add this back (before and period_name =%s) if you want to ignore publisher
+    # homepage views
+    # and not url like '/publisher/%%'
+
+    top_publishers = []
+    res = connection.execute(q, c.month)
+
+    for row in res:
+        g = model.Group.get(row[0])
+        if g:
+            top_publishers.append((g, row[1], row[2]))
+    return top_publishers
+
+
+def _percent(num, total):
+    p = 100 * float(num)/float(total)
+    return "%.2f%%" % round(p, 2)
+

--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -94,6 +94,7 @@
                      self.get_full_period_name(period_name, period_complete_day),
                      start_date.strftime('%Y %m %d'),
                      end_date.strftime('%Y %m %d'))
+
             data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+')
             log.info('Storing Dataset Analytics for period "%s"',
                      self.get_full_period_name(period_name, period_complete_day))
@@ -155,11 +156,6 @@
                                  max_results=10000,
                                  end_date=end_date).execute()
 
-        if os.getenv('DEBUG'):
-            import pprint
-            pprint.pprint(results)
-            print 'Total results: %s' % results.get('totalResults')
-
         packages = []
         for entry in results.get('rows'):
             (loc,pageviews,visits) = entry
@@ -207,18 +203,37 @@
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:pageviewsPerVisit,ga:bounces,ga:avgTimeOnSite,ga:percentNewVisits,ga:visitors',
+                                 metrics='ga:pageviewsPerVisit,ga:avgTimeOnSite,ga:percentNewVisits,ga:visitors',
                                  max_results=10000,
                                  end_date=end_date).execute()
         result_data = results.get('rows')
         data = {
             'Pages per visit': result_data[0][0],
-            'Bounces': result_data[0][1],
-            'Average time on site': result_data[0][2],
-            'New visits': result_data[0][3],
-            'Total visits': result_data[0][4],
+            'Average time on site': result_data[0][1],
+            'New visits': result_data[0][2],
+            'Total visits': result_data[0][3],
         }
         ga_model.update_sitewide_stats(period_name, "Totals", data)
+
+        # Bounces from /data. This url is specified in configuration because
+        # for DGU we don't want /.
+        path = config.get('ga-report.bounce_url','/')
+        print path
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 filters='ga:pagePath=~%s$' % (path,),
+                                 start_date=start_date,
+                                 metrics='ga:bounces,ga:uniquePageviews',
+                                 dimensions='ga:pagePath',
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+        result_data = results.get('rows')
+        for results in result_data:
+            if results[0] == path:
+                bounce, total = [float(x) for x in results[1:]]
+                pct = 100 * bounce/total
+                print "%d bounces from %d total == %s" % (bounce, total, pct)
+                ga_model.update_sitewide_stats(period_name, "Totals", {'Bounces': pct})
 
 
     def _locale_stats(self, start_date, end_date, period_name):

--- a/ckanext/ga_report/helpers.py
+++ b/ckanext/ga_report/helpers.py
@@ -3,15 +3,73 @@
 import ckan.lib.base as base
 import ckan.model as model
 
+from ckanext.ga_report.ga_model import GA_Url, GA_Publisher
+from ckanext.ga_report.controller import _get_publishers
 _log = logging.getLogger(__name__)
 
+def popular_datasets(count=10):
+    import random
+
+    publisher = None
+    publishers = _get_publishers(30)
+    total = len(publishers)
+    while not publisher or not datasets:
+        rand = random.randrange(0, total)
+        publisher = publishers[rand][0]
+        if not publisher.state == 'active':
+            publisher = None
+            continue
+        datasets = _datasets_for_publisher(publisher, 10)[:count]
+
+    ctx = {
+        'datasets': datasets,
+        'publisher': publisher
+    }
+    return base.render_snippet('ga_report/ga_popular_datasets.html', **ctx)
+
+def single_popular_dataset(top=20):
+    import random
+
+    datasets = {}
+    rand = random.randrange(0, top)
+    entry = model.Session.query(GA_Url).\
+        filter(GA_Url.url.like('/dataset/%')).\
+        order_by('ga_url.pageviews::int desc')[rand]
+
+
+    dataset = None
+    while not dataset:
+        dataset = model.Package.get(entry.url[len('/dataset/'):])
+        if dataset and not dataset.state == 'active':
+            dataset = None
+        else:
+            publisher = model.Group.get(entry.department_id)
+
+    ctx = {
+        'dataset': dataset,
+        'publisher': publisher
+    }
+    return base.render_snippet('ga_report/ga_popular_single.html', **ctx)
+
+
 def most_popular_datasets(publisher, count=20):
-    from ckanext.ga_report.ga_model import GA_Url
 
     if not publisher:
         _log.error("No valid publisher passed to 'most_popular_datasets'")
         return ""
 
+    results = _datasets_for_publisher(publisher, count)
+
+    ctx = {
+        'dataset_count': len(datasets),
+        'datasets': results,
+
+        'publisher': publisher
+    }
+
+    return base.render_snippet('ga_report/publisher/popular.html', **ctx)
+
+def _datasets_for_publisher(publisher, count):
     datasets = {}
     entries = model.Session.query(GA_Url).\
         filter(GA_Url.department_id==publisher.name).\
@@ -29,14 +87,5 @@
     for k, v in datasets.iteritems():
         results.append((k,v['views'],v['visits']))
 
-    results = sorted(results, key=operator.itemgetter(1), reverse=True)
+    return sorted(results, key=operator.itemgetter(1), reverse=True)
 
-    ctx = {
-        'dataset_count': len(datasets),
-        'datasets': results,
-
-        'publisher': publisher
-    }
-
-    return base.render_snippet('ga_report/publisher/popular.html', **ctx)
-

--- a/ckanext/ga_report/plugin.py
+++ b/ckanext/ga_report/plugin.py
@@ -2,6 +2,10 @@
 import ckan.lib.helpers as h
 import ckan.plugins as p
 from ckan.plugins import implements, toolkit
+
+from ckanext.ga_report.helpers import (most_popular_datasets,
+                                       popular_datasets,
+                                       single_popular_dataset)
 
 log = logging.getLogger('ckanext.ga-report')
 
@@ -19,10 +23,11 @@
         A dictionary of extra helpers that will be available to provide
         ga report info to templates.
         """
-        from ckanext.ga_report.helpers import most_popular_datasets
         return {
             'ga_report_installed': lambda: True,
+            'popular_datasets': popular_datasets,
             'most_popular_datasets': most_popular_datasets,
+            'single_popular_dataset': single_popular_dataset
         }
 
     def after_map(self, map):

--- a/ckanext/ga_report/report_model.py
+++ /dev/null

--- /dev/null
+++ b/ckanext/ga_report/templates/ga_report/ga_popular_datasets.html
@@ -1,1 +1,27 @@
+<html xmlns:py="http://genshi.edgewall.org/"
+  xmlns:i18n="http://genshi.edgewall.org/i18n"
+  xmlns:xi="http://www.w3.org/2001/XInclude"
+  py:strip="">
 
+<div class="popular_datasets">
+    <div class="pull-right">
+        <a href="${h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport',action='index')}" class="btn btn-primary">More popular datasets »</a>
+    </div>
+    <h2>Popular datasets</h2>
+    <h4>${publisher.title}</h4>
+    <ul>
+        <py:for each="dataset, _, _ in datasets">
+            <li>
+                <span>${h.link_to(dataset.title, h.url_for(controller='package', action='read', id=dataset.name))}</span>
+        <div>${h.truncate(dataset.notes, length=80, whole_word=True)}</div>
+            </li>
+        </py:for>
+    </ul>
+
+ </div>
+
+</html>
+
+
+
+

--- /dev/null
+++ b/ckanext/ga_report/templates/ga_report/ga_popular_single.html
@@ -1,1 +1,23 @@
+<html xmlns:py="http://genshi.edgewall.org/"
+  xmlns:i18n="http://genshi.edgewall.org/i18n"
+  xmlns:xi="http://www.w3.org/2001/XInclude"
+  py:strip="">
 
+
+<div class="popular_datasets">
+    <h2>Featured dataset</h2>
+
+    <h3>${h.link_to(dataset.title, h.url_for(controller='package', action='read', id=dataset.name))}</h3>        <div>${h.truncate(dataset.notes, length=200, whole_word=True)}</div>
+    <p></p>
+    <div>
+<a href="${h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport',action='index')}" class="btn btn-primary">More popular datasets</a>
+<a href="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='index')}" class="btn btn-primary">All usage data</a>
+    </div>
+ </div>
+
+
+</html>
+
+
+
+

--- a/ckanext/ga_report/templates/ga_report/ga_util.html
+++ b/ckanext/ga_report/templates/ga_report/ga_util.html
@@ -5,15 +5,6 @@
   xmlns:xi="http://www.w3.org/2001/XInclude"
   py:strip=""
   >
-
- <table py:def="publisher_list(groups)" class="groups">
-   <py:for each="group,title in groups">
-   <tr>
-     <td><a href="/publisher/${group.name}">${title}</a></td>
-   </tr>
-   </py:for>
- </table>
-
 
 <table py:def="social_table(items, with_source=False)" class="table table-condensed table-bordered table-striped">
     <tr>
@@ -49,12 +40,12 @@
     <div id="minornavigation-bg-left">
     <div id="minornavigation-bg-right">
         <ul class="nav nav-pills">
-        <li py:attrs="{'class': 'active' if active_name=='Site-wide' else None}"><a py:attrs="{'class': 'active' if active_name=='Site-wide' else None}"  href="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='index')}"><img src="/images/icons/page_white_gear.png" height="16px" width="16px" alt="None" class="inline-icon "/> Site-wide</a></li>
+        <li py:attrs="{'class': 'active' if active_name=='Site-wide' else None}"><a py:attrs="{'class': 'active' if active_name=='Site-wide' else None}"  href="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='index')}"><img src="/images/icons/page_white.png" height="16px" width="16px" alt="None" class="inline-icon "/> Site-wide</a></li>
         <li py:attrs="{'class': 'active' if active_name=='Publishers' else None}">
-                <a py:attrs="{'class': 'active' if active_name=='Publishers' else None}"  href="${h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport',action='index')}"><img src="/images/icons/page_white_gear.png" height="16px" width="16px" alt="None" class="inline-icon "/> Publishers</a>
+                <a py:attrs="{'class': 'active' if active_name=='Publishers' else None}"  href="${h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport',action='index')}"><img src="/images/icons/page_white.png" height="16px" width="16px" alt="None" class="inline-icon "/> Publishers</a>
         </li>
         <li py:if="publisher" class="active">
-                <a class="active"  href="${h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport',action='read', id=publisher.name)}"><img src="/images/icons/page_white_gear.png" height="16px" width="16px" alt="None" class="inline-icon "/>${publisher.title}</a>
+                <a class="active"  href="${h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport',action='read', id=publisher.name)}"><img src="/images/icons/page_white.png" height="16px" width="16px" alt="None" class="inline-icon "/>${publisher.title}</a>
         </li>
 
         </ul>

--- a/ckanext/ga_report/templates/ga_report/publisher/index.html
+++ b/ckanext/ga_report/templates/ga_report/publisher/index.html
@@ -27,6 +27,7 @@
 
 
   <div py:match="content">
+
       <h1>Site Usage</h1>
 
       ${usage_nav('Publishers', None)}