Force len in avg calc to be a float too
--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -114,7 +114,7 @@
if k in ['Total page views', 'Total visits']:
v = sum(v)
else:
- v = float(sum(v))/len(v)
+ v = float(sum(v))/float(len(v))
key, val = clean_key(k,v)
c.global_totals.append((key, val))
--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -153,7 +153,8 @@
data = collections.defaultdict(list)
rows = results.get('rows',[])
for row in rows:
- data[_normalize_url(row[0])].append( (row[1], int(row[2]),) )
+ url = _normalize_url('http:/' + row[0])
+ data[url].append( (row[1], int(row[2]),) )
ga_model.update_social(period_name, data)
@@ -249,7 +250,7 @@
ids='ga:' + self.profile_id,
filters='ga:pagePath==%s' % (path,),
start_date=start_date,
- metrics='ga:bounces,ga:pageviews',
+ metrics='ga:visitBounceRate',
dimensions='ga:pagePath',
max_results=10000,
end_date=end_date).execute()
@@ -259,10 +260,10 @@
path, result_data)
return
results = result_data[0]
- bounces, total = [float(x) for x in result_data[0][1:]]
- pct = 100 * bounces/total
- log.info('%d bounces from %d total == %s', bounces, total, pct)
- ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': pct},
+ bounces = float(results[1])
+ # visitBounceRate is already a %
+ log.info('Google reports visitBounceRate as %s', bounces)
+ ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': float(bounces)},
period_complete_day)
--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -161,10 +161,20 @@
def pre_update_url_stats(period_name):
+ log.debug("Deleting '%s' records" % period_name)
model.Session.query(GA_Url).\
filter(GA_Url.period_name==period_name).delete()
- model.Session.query(GA_Url).\
- filter(GA_Url.period_name=='All').delete()
+
+ count = model.Session.query(GA_Url).\
+ filter(GA_Url.period_name == 'All').count()
+ log.debug("Deleting %d 'All' records" % count)
+ count = model.Session.query(GA_Url).\
+ filter(GA_Url.period_name == 'All').delete()
+ log.debug("Deleted %d 'All' records" % count)
+
+ model.Session.flush()
+ model.Session.commit()
+ model.repo.commit_and_remove()
def update_url_stats(period_name, period_complete_day, url_data):
@@ -216,8 +226,8 @@
'period_name': 'All',
'period_complete_day': 0,
'url': url,
- 'pageviews': sum([int(e.pageviews) for e in entries]) + old_pageviews,
- 'visits': sum([int(e.visits or 0) for e in entries]) + old_visits,
+ 'pageviews': sum([int(e.pageviews) for e in entries]) + int(old_pageviews),
+ 'visits': sum([int(e.visits or 0) for e in entries]) + int(old_visits),
'department_id': publisher,
'package_id': package
}
@@ -343,10 +353,10 @@
'''
for object_type in (GA_Url, GA_Stat, GA_Publisher, GA_ReferralStat):
q = model.Session.query(object_type)
- if period_name != 'all':
+ if period_name != 'All':
q = q.filter_by(period_name=period_name)
q.delete()
- model.Session.commit()
+ model.repo.commit_and_remove()
def get_score_for_dataset(dataset_name):
'''
--- a/ckanext/ga_report/helpers.py
+++ b/ckanext/ga_report/helpers.py
@@ -50,9 +50,12 @@
dataset = model.Package.get(ga_url.url[len('/dataset/'):])
if dataset and not dataset.state == 'active':
dataset = None
- count += 1
- if count > 10:
- break
+ # When testing, it is possible that top datasets are not available
+ # so only go round this loop a few times before falling back on
+ # a random dataset.
+ count += 1
+ if count > 10:
+ break
if not dataset:
# fallback
dataset = model.Session.query(model.Package)\
--- a/ckanext/ga_report/templates/ga_report/publisher/index.html
+++ b/ckanext/ga_report/templates/ga_report/publisher/index.html
@@ -37,14 +37,13 @@
<table class="table table-condensed table-bordered table-striped">
<tr>
<th>Publisher</th>
-<!-- <th>Dataset Visits</th>-->
<th>Dataset Views</th>
</tr>
<py:for each="publisher, views, visits in c.top_publishers">
<tr>
- <td>${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher', id=publisher.name) + ("?month=" + c.month) if c.month else '')}
+ <td>
+ ${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher', id=publisher.name) + (("?month=" + c.month) if c.month else ''))}
</td>
-<!-- <td>${visits}</td> -->
<td>${views}</td>
</tr>
</py:for>