Force len in avg calc to be a float too
Force len in avg calc to be a float too

--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -114,7 +114,7 @@
                 if k in ['Total page views', 'Total visits']:
                     v = sum(v)
                 else:
-                    v = float(sum(v))/len(v)
+                    v = float(sum(v))/float(len(v))
                 key, val = clean_key(k,v)
 
                 c.global_totals.append((key, val))

--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -153,7 +153,8 @@
         data = collections.defaultdict(list)
         rows = results.get('rows',[])
         for row in rows:
-            data[_normalize_url(row[0])].append( (row[1], int(row[2]),) )
+            url = _normalize_url('http:/' + row[0])
+            data[url].append( (row[1], int(row[2]),) )
         ga_model.update_social(period_name, data)
 
 
@@ -249,7 +250,7 @@
                                  ids='ga:' + self.profile_id,
                                  filters='ga:pagePath==%s' % (path,),
                                  start_date=start_date,
-                                 metrics='ga:bounces,ga:pageviews',
+                                 metrics='ga:visitBounceRate',
                                  dimensions='ga:pagePath',
                                  max_results=10000,
                                  end_date=end_date).execute()
@@ -259,10 +260,10 @@
                       path, result_data)
             return
         results = result_data[0]
-        bounces, total = [float(x) for x in result_data[0][1:]]
-        pct = 100 * bounces/total
-        log.info('%d bounces from %d total == %s', bounces, total, pct)
-        ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': pct},
+        bounces = float(results[1])
+        # visitBounceRate is already a %
+        log.info('Google reports visitBounceRate as %s', bounces)
+        ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': float(bounces)},
             period_complete_day)
 
 

--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -161,10 +161,20 @@
 
 
 def pre_update_url_stats(period_name):
+    log.debug("Deleting '%s' records" % period_name)
     model.Session.query(GA_Url).\
             filter(GA_Url.period_name==period_name).delete()
-    model.Session.query(GA_Url).\
-            filter(GA_Url.period_name=='All').delete()
+
+    count = model.Session.query(GA_Url).\
+            filter(GA_Url.period_name == 'All').count()
+    log.debug("Deleting %d 'All' records" % count)
+    count = model.Session.query(GA_Url).\
+            filter(GA_Url.period_name == 'All').delete()
+    log.debug("Deleted %d 'All' records" % count)
+
+    model.Session.flush()
+    model.Session.commit()
+    model.repo.commit_and_remove()
 
 
 def update_url_stats(period_name, period_complete_day, url_data):
@@ -216,8 +226,8 @@
                       'period_name': 'All',
                       'period_complete_day': 0,
                       'url': url,
-                      'pageviews': sum([int(e.pageviews) for e in entries]) + old_pageviews,
-                      'visits': sum([int(e.visits or 0) for e in entries]) + old_visits,
+                      'pageviews': sum([int(e.pageviews) for e in entries]) + int(old_pageviews),
+                      'visits': sum([int(e.visits or 0) for e in entries]) + int(old_visits),
                       'department_id': publisher,
                       'package_id': package
                      }
@@ -343,10 +353,10 @@
     '''
     for object_type in (GA_Url, GA_Stat, GA_Publisher, GA_ReferralStat):
         q = model.Session.query(object_type)
-        if period_name != 'all':
+        if period_name != 'All':
             q = q.filter_by(period_name=period_name)
         q.delete()
-    model.Session.commit()
+    model.repo.commit_and_remove()
 
 def get_score_for_dataset(dataset_name):
     '''

--- a/ckanext/ga_report/helpers.py
+++ b/ckanext/ga_report/helpers.py
@@ -50,9 +50,12 @@
             dataset = model.Package.get(ga_url.url[len('/dataset/'):])
             if dataset and not dataset.state == 'active':
                 dataset = None
-                count += 1
-                if count > 10:
-                    break
+            # When testing, it is possible that top datasets are not available
+            # so only go round this loop a few times before falling back on
+            # a random dataset.
+            count += 1
+            if count > 10:
+                break
     if not dataset:
         # fallback
         dataset = model.Session.query(model.Package)\

--- a/ckanext/ga_report/templates/ga_report/publisher/index.html
+++ b/ckanext/ga_report/templates/ga_report/publisher/index.html
@@ -37,14 +37,13 @@
        <table class="table table-condensed table-bordered table-striped">
 	 <tr>
 	   <th>Publisher</th>
-<!--	   <th>Dataset Visits</th>-->
 	   <th>Dataset Views</th>
 	 </tr>
         <py:for each="publisher, views, visits in c.top_publishers">
 	  <tr>
-	    <td>${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher', id=publisher.name) + ("?month=" + c.month) if c.month else '')}
+	    <td>
+	        ${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher', id=publisher.name) + (("?month=" + c.month) if c.month else ''))}
 	    </td>
-<!--	    <td>${visits}</td> -->
 	    <td>${views}</td>
 	  </tr>
         </py:for>