Changes to fix some issues around bounces and presentation
Changes to fix some issues around bounces and presentation

--- a/ckanext/ga_report/command.py
+++ b/ckanext/ga_report/command.py
@@ -80,7 +80,7 @@
                                default=False,
                                dest='delete_first',
                                help='Delete data for the period first')
-        self.parser.add_option('-s', '--slip_url_stats',
+        self.parser.add_option('-s', '--skip_url_stats',
                                action='store_true',
                                default=False,
                                dest='skip_url_stats',

--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -71,13 +71,13 @@
         entries = q.order_by('ga_stat.key').all()
 
         def clean_key(key, val):
-            if key in ['Average time on site', 'Pages per visit', 'New visits', 'Bounces']:
+            if key in ['Average time on site', 'Pages per visit', 'New visits', 'Bounce rate (home page)']:
                 val =  "%.2f" % round(float(val), 2)
                 if key == 'Average time on site':
                     mins, secs = divmod(float(val), 60)
                     hours, mins = divmod(mins, 60)
                     val = '%02d:%02d:%02d (%s seconds) ' % (hours, mins, secs, val)
-                if key in ['New visits','Bounces']:
+                if key in ['New visits','Bounce rate (home page)']:
                     val = "%s%%" % val
             if key in ['Total page views', 'Total visits']:
                 val = int(val)
@@ -244,11 +244,11 @@
         if publisher:
             q = q.filter(GA_Url.department_id==publisher.name)
         q = q.filter(GA_Url.period_name==month)
-        q = q.order_by('ga_url.visitors::int desc')
+        q = q.order_by('ga_url.visits::int desc')
         top_packages = []
         for entry,package in q.limit(count):
             if package:
-                top_packages.append((package, entry.pageviews, entry.visitors))
+                top_packages.append((package, entry.pageviews, entry.visits))
             else:
                 log.warning('Could not find package associated package')
 
@@ -306,13 +306,13 @@
     month = c.month or 'All'
     connection = model.Session.connection()
     q = """
-        select department_id, sum(pageviews::int) views, sum(visitors::int) visits
+        select department_id, sum(pageviews::int) views, sum(visits::int) visits
         from ga_url
         where department_id <> ''
           and package_id <> ''
           and url like '/dataset/%%'
           and period_name=%s
-        group by department_id order by visits desc
+        group by department_id order by views desc
         """
     if limit:
         q = q + " limit %s;" % (limit)

--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -98,7 +98,7 @@
                      self.get_full_period_name(period_name, period_complete_day),
                      start_date.strftime('%Y-%m-%d'),
                      end_date.strftime('%Y-%m-%d'))
- 
+
             if self.delete_first:
                 log.info('Deleting existing Analytics for this period "%s"',
                          period_name)
@@ -162,8 +162,8 @@
         start_date = start_date.strftime('%Y-%m-%d')
         end_date = end_date.strftime('%Y-%m-%d')
         query = 'ga:pagePath=%s$' % path
-        metrics = 'ga:uniquePageviews, ga:visits'
-        sort = '-ga:uniquePageviews'
+        metrics = 'ga:pageviews, ga:visits'
+        sort = '-ga:pageviews'
 
         # Supported query params at
         # https://developers.google.com/analytics/devguides/reporting/core/v3/reference
@@ -219,8 +219,8 @@
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:uniquePageviews',
-                                 sort='-ga:uniquePageviews',
+                                 metrics='ga:pageviews',
+                                 sort='-ga:pageviews',
                                  max_results=10000,
                                  end_date=end_date).execute()
         result_data = results.get('rows')
@@ -248,12 +248,12 @@
                                  ids='ga:' + self.profile_id,
                                  filters='ga:pagePath==%s' % (path,),
                                  start_date=start_date,
-                                 metrics='ga:bounces,ga:uniquePageviews',
+                                 metrics='ga:bounces,ga:pageviews',
                                  dimensions='ga:pagePath',
                                  max_results=10000,
                                  end_date=end_date).execute()
         result_data = results.get('rows')
-        if len(result_data) != 1:
+        if not result_data or len(result_data) != 1:
             log.error('Could not pinpoint the bounces for path: %s. Got results: %r',
                       path, result_data)
             return
@@ -261,7 +261,7 @@
         bounces, total = [float(x) for x in result_data[0][1:]]
         pct = 100 * bounces/total
         log.info('%d bounces from %d total == %s', bounces, total, pct)
-        ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate': pct})
+        ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': pct})
 
 
     def _locale_stats(self, start_date, end_date, period_name):
@@ -269,8 +269,8 @@
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:uniquePageviews',
-                                 sort='-ga:uniquePageviews',
+                                 metrics='ga:pageviews',
+                                 sort='-ga:pageviews',
                                  dimensions="ga:language,ga:country",
                                  max_results=10000,
                                  end_date=end_date).execute()
@@ -293,8 +293,8 @@
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:uniquePageviews',
-                                 sort='-ga:uniquePageviews',
+                                 metrics='ga:pageviews',
+                                 sort='-ga:pageviews',
                                  dimensions="ga:socialNetwork,ga:referralPath",
                                  max_results=10000,
                                  end_date=end_date).execute()
@@ -312,8 +312,8 @@
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:uniquePageviews',
-                                 sort='-ga:uniquePageviews',
+                                 metrics='ga:pageviews',
+                                 sort='-ga:pageviews',
                                  dimensions="ga:operatingSystem,ga:operatingSystemVersion",
                                  max_results=10000,
                                  end_date=end_date).execute()
@@ -337,8 +337,8 @@
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:uniquePageviews',
-                                 sort='-ga:uniquePageviews',
+                                 metrics='ga:pageviews',
+                                 sort='-ga:pageviews',
                                  dimensions="ga:browser,ga:browserVersion",
                                  max_results=10000,
                                  end_date=end_date).execute()
@@ -386,8 +386,8 @@
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:uniquePageviews',
-                                 sort='-ga:uniquePageviews',
+                                 metrics='ga:pageviews',
+                                 sort='-ga:pageviews',
                                  dimensions="ga:mobileDeviceBranding, ga:mobileDeviceInfo",
                                  max_results=10000,
                                  end_date=end_date).execute()

--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -27,7 +27,7 @@
                       Column('period_name', types.UnicodeText),
                       Column('period_complete_day', types.Integer),
                       Column('pageviews', types.UnicodeText),
-                      Column('visitors', types.UnicodeText),
+                      Column('visits', types.UnicodeText),
                       Column('url', types.UnicodeText),
                       Column('department_id', types.UnicodeText),
                       Column('package_id', types.UnicodeText),
@@ -63,7 +63,7 @@
                   Column('period_name', types.UnicodeText),
                   Column('publisher_name', types.UnicodeText),
                   Column('views', types.UnicodeText),
-                  Column('visitors', types.UnicodeText),
+                  Column('visits', types.UnicodeText),
                   Column('toplevel', types.Boolean, default=False),
                   Column('subpublishercount', types.Integer, default=0),
                   Column('parent', types.UnicodeText),
@@ -155,25 +155,6 @@
         model.Session.commit()
 
 
-def update_url_stat_totals(period_name):
-
-    """
-        items = model.Session.query(GA_Url).\
-            filter(GA_Url.period_name != "All").\
-            filter(GA_Url.url==url).all()
-        values = {'id': make_uuid(),
-                  'period_name': "All",
-                  'period_complete_day': "0",
-                  'url': url,
-                  'pageviews': sum([int(x.pageviews) for x in items]),
-                  'visitors': sum([int(x.visitors) for x in items]),
-                  'department_id': department_id,
-                  'package_id': package
-                 }
-        model.Session.add(GA_Url(**values))
-        model.Session.commit()
-    """
-
 def pre_update_url_stats(period_name):
     model.Session.query(GA_Url).\
             filter(GA_Url.period_name==period_name).delete()
@@ -187,7 +168,7 @@
     stores them in GA_Url under the period and recalculates the totals for
     the 'All' period.
     '''
-    for url, views, visitors in url_data:
+    for url, views, visits in url_data:
         package, publisher = _get_package_and_publisher(url)
 
 
@@ -196,7 +177,7 @@
             filter(GA_Url.url==url).first()
         if item:
             item.pageviews = item.pageviews + views
-            item.visitors = item.visitors + visitors
+            item.visits = item.visits + visits
             if not item.package_id:
                 item.package_id = package
             if not item.department_id:
@@ -208,7 +189,7 @@
                       'period_complete_day': period_complete_day,
                       'url': url,
                       'pageviews': views,
-                      'visitors': visitors,
+                      'visits': visits,
                       'department_id': publisher,
                       'package_id': package
                      }
@@ -221,7 +202,7 @@
                 filter(GA_Url.period_name=='All').\
                 filter(GA_Url.url==url).all()
             old_pageviews = sum([int(o.pageviews) for o in old])
-            old_visits = sum([int(o.visitors) for o in old])
+            old_visits = sum([int(o.visits) for o in old])
 
             entries = model.Session.query(GA_Url).\
                 filter(GA_Url.period_name!='All').\
@@ -231,7 +212,7 @@
                       'period_complete_day': 0,
                       'url': url,
                       'pageviews': sum([int(e.pageviews) for e in entries]) + old_pageviews,
-                      'visitors': sum([int(e.visitors) for e in entries]) + old_visits,
+                      'visits': sum([int(e.visits) for e in entries]) + old_visits,
                       'department_id': publisher,
                       'package_id': package
                      }
@@ -281,7 +262,7 @@
         filter(model.Group.type=='publisher').\
         filter(model.Group.state=='active').all()
     for publisher in publishers:
-        views, visitors, subpub = update_publisher(period_name, publisher, publisher.name)
+        views, visits, subpub = update_publisher(period_name, publisher, publisher.name)
         parent, parents = '', publisher.get_groups('publisher')
         if parents:
             parent = parents[0].name
@@ -290,7 +271,7 @@
             filter(GA_Publisher.publisher_name==publisher.name).first()
         if item:
             item.views = views
-            item.visitors = visitors
+            item.visits = visits
             item.publisher_name = publisher.name
             item.toplevel = publisher in toplevel
             item.subpublishercount = subpub
@@ -302,7 +283,7 @@
                      'period_name': period_name,
                      'publisher_name': publisher.name,
                      'views': views,
-                     'visitors': visitors,
+                     'visits': visits,
                      'toplevel': publisher in toplevel,
                      'subpublishercount': subpub,
                      'parent': parent
@@ -312,7 +293,7 @@
 
 
 def update_publisher(period_name, pub, part=''):
-    views,visitors,subpub = 0, 0, 0
+    views,visits,subpub = 0, 0, 0
     for publisher in go_down_tree(pub):
         subpub = subpub + 1
         items = model.Session.query(GA_Url).\
@@ -320,9 +301,9 @@
                 filter(GA_Url.department_id==publisher.name).all()
         for item in items:
             views = views + int(item.pageviews)
-            visitors = visitors + int(item.visitors)
-
-    return views, visitors, (subpub-1)
+            visits = visits + int(item.visits)
+
+    return views, visits, (subpub-1)
 
 
 def get_top_level():

--- a/ckanext/ga_report/helpers.py
+++ b/ckanext/ga_report/helpers.py
@@ -106,7 +106,7 @@
             if not p in datasets:
                 datasets[p] = {'views':0, 'visits': 0}
             datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews)
-            datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visitors)
+            datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visits)
 
     results = []
     for k, v in datasets.iteritems():

--- a/ckanext/ga_report/templates/ga_report/notes.html
+++ b/ckanext/ga_report/templates/ga_report/notes.html
@@ -8,7 +8,6 @@
       <ul>
           <li>"Views" is the number of sessions during which the page was viewed one or more times (technically known as "unique pageviews").</li>
           <li>"Visits" is the number of unique user visits to a page, counted once for each visitor for each session.</li>
-<!--!          <li>"Visitors" is the number of unique users visiting the site (whether once or more times).</li> -->
           <li>These usage statistics are confined to users with javascript enabled, which excludes web crawlers and API calls.</li>
           <li>The results are not shown when the number of views/visits is tiny. Where these relate to site pages, results are available in full in the CSV download. Where these relate to users' web browser information, results are not disclosed, for privacy reasons.</li>
       </ul>

--- a/ckanext/ga_report/templates/ga_report/publisher/index.html
+++ b/ckanext/ga_report/templates/ga_report/publisher/index.html
@@ -41,14 +41,14 @@
        <table class="table table-condensed table-bordered table-striped">
 	 <tr>
 	   <th>Publisher</th>
-	   <th>Dataset Visits</th>
+<!--	   <th>Dataset Visits</th>-->
 	   <th>Dataset Views</th>
 	 </tr>
         <py:for each="publisher, views, visits in c.top_publishers">
 	  <tr>
 	    <td>${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher', id=publisher.name))}
 	    </td>
-	    <td>${visits}</td>
+<!--	    <td>${visits}</td> -->
 	    <td>${views}</td>
 	  </tr>
         </py:for>

--- a/ckanext/ga_report/templates/ga_report/publisher/read.html
+++ b/ckanext/ga_report/templates/ga_report/publisher/read.html
@@ -47,14 +47,14 @@
      <table py:if="c.top_packages" class="table table-condensed table-bordered table-striped">
 	 <tr>
 	   <th>Dataset</th>
-	   <th>Visits</th>
+<!--	   <th>Visits</th> -->
 	   <th>Views</th>
 	 </tr>
         <py:for each="package, views, visits in c.top_packages">
 	  <tr>
 	    <td>${h.link_to(package.title or package.name, h.url_for(controller='package', action='read', id=package.name))}
 	    </td>
-	    <td>${visits}</td>
+<!--	    <td>${visits}</td> -->
 	    <td>${views}</td>
 	  </tr>
         </py:for>