Merge branch 'master' of github.com:datagovuk/ckanext-ga-report
--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -92,33 +92,40 @@
def download_and_store(self, periods):
for period_name, period_complete_day, start_date, end_date in periods:
+ log.info('Period "%s" (%s - %s)',
+ self.get_full_period_name(period_name, period_complete_day),
+ start_date.strftime('%Y-%m-%d'),
+ end_date.strftime('%Y-%m-%d'))
+
if self.delete_first:
- log.info('Deleting existing Analytics for period "%s"',
+ log.info('Deleting existing Analytics for this period "%s"',
period_name)
ga_model.delete(period_name)
- log.info('Downloading Analytics for period "%s" (%s - %s)',
- self.get_full_period_name(period_name, period_complete_day),
- start_date.strftime('%Y %m %d'),
- end_date.strftime('%Y %m %d'))
# Clean up the entries before we run this
ga_model.pre_update_url_stats(period_name)
accountName = config.get('googleanalytics.account')
+ log.info('Downloading analytics for dataset views')
data = self.download(start_date, end_date, '~/%s/dataset/[a-z0-9-_]+' % accountName)
- log.info('Storing Dataset Analytics for period "%s"',
- self.get_full_period_name(period_name, period_complete_day))
+
+ log.info('Storing dataset views (%i rows)', len(data.get('url')))
self.store(period_name, period_complete_day, data, )
+ log.info('Downloading analytics for publisher views')
data = self.download(start_date, end_date, '~/%s/publisher/[a-z0-9-_]+' % accountName)
- log.info('Storing Publisher Analytics for period "%s"',
- self.get_full_period_name(period_name, period_complete_day))
+
+ log.info('Storing publisher views (%i rows)', len(data.get('url')))
self.store(period_name, period_complete_day, data,)
+ log.info('Aggregating datasets by publisher')
ga_model.update_publisher_stats(period_name) # about 30 seconds.
+
+ log.info('Downloading and storing analytics for site-wide stats')
self.sitewide_stats( period_name )
+ log.info('Downloading and storing analytics for social networks')
self.update_social_info(period_name, start_date, end_date)
@@ -187,12 +194,10 @@
start_date = '%s-01' % period_name
end_date = '%s-%s' % (period_name, last_day_of_month)
- print 'Sitewide_stats for %s (%s -> %s)' % (period_name, start_date, end_date)
-
funcs = ['_totals_stats', '_social_stats', '_os_stats',
'_locale_stats', '_browser_stats', '_mobile_stats']
for f in funcs:
- print ' + Fetching %s stats' % f.split('_')[1]
+ log.info('Downloading analytics for %s' % f.split('_')[1])
getattr(self, f)(start_date, end_date, period_name)
def _get_results(result_data, f):
--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -184,9 +184,14 @@
def update_url_stats(period_name, period_complete_day, url_data):
-
+ '''
+ Given a list of urls and number of hits for each during a given period,
+ stores them in GA_Url under the period and recalculates the totals for
+ the 'All' period.
+ '''
for url, views, visitors in url_data:
package, publisher = _get_package_and_publisher(url)
+
item = model.Session.query(GA_Url).\
filter(GA_Url.period_name==period_name).\
@@ -194,6 +199,10 @@
if item:
item.pageviews = item.pageviews + views
item.visitors = item.visitors + visitors
+ if not item.package_id:
+ item.package_id = package
+ if not item.department_id:
+ item.department_id = publisher
model.Session.add(item)
else:
values = {'id': make_uuid(),
@@ -209,6 +218,13 @@
model.Session.commit()
if package:
+ old_pageviews, old_visits = 0, 0
+ old = model.Session.query(GA_Url).\
+ filter(GA_Url.period_name=='All').\
+ filter(GA_Url.url==url).all()
+ old_pageviews = sum([int(o.pageviews) for o in old])
+ old_visits = sum([int(o.visitors) for o in old])
+
entries = model.Session.query(GA_Url).\
filter(GA_Url.period_name!='All').\
filter(GA_Url.url==url).all()
@@ -216,15 +232,14 @@
'period_name': 'All',
'period_complete_day': 0,
'url': url,
- 'pageviews': sum([int(e.pageviews) for e in entries]),
- 'visitors': sum([int(e.visitors) for e in entries]),
+ 'pageviews': sum([int(e.pageviews) for e in entries]) + old_pageviews,
+ 'visitors': sum([int(e.visitors) for e in entries]) + old_visits,
'department_id': publisher,
'package_id': package
}
model.Session.add(GA_Url(**values))
model.Session.commit()
-
--- a/ckanext/ga_report/helpers.py
+++ b/ckanext/ga_report/helpers.py
@@ -60,7 +60,8 @@
if not dataset:
return None
dataset_dict = get_action('package_show')({'model': model,
- 'session': model.Session},
+ 'session': model.Session,
+ 'validate': False},
{'id':dataset.id})
return dataset_dict
--- a/ckanext/ga_report/templates/ga_report/notes.html
+++ b/ckanext/ga_report/templates/ga_report/notes.html
@@ -6,11 +6,11 @@
<li class="widget-container boxed widget_text">
<h4>Notes</h4>
<ul>
- <li>'Views' is the number of sessions during which that page was viewed one or more times ('Unique Pageviews').</li>
-<!-- <li>'Visits' is the number of individual sessions initiated by all the visitors to your site, counted once for each visitor for each session.</li>-->
- <li>'Visitors' is the number of unique users visiting the site (whether once or more times).</li>
+ <li>"Views" is the number of sessions during which the page was viewed one or more times (technically known as "unique pageviews").</li>
+ <li>"Visits" is the number of unique user visits to a page, counted once for each visitor for each session.</li>
+<!--! <li>"Visitors" is the number of unique users visiting the site (whether once or more times).</li> -->
<li>These usage statistics are confined to users with javascript enabled, which excludes web crawlers and API calls.</li>
- <li>The results for only small numbers of views/visits are not shown. Where these relate to site pages, then they are available in full in the CSV download. Where these relate to users' web browser information, they are not disclosed, for privacy reasons.</li>
+ <li>The results are not shown when the number of views/visits is tiny. Where these relate to site pages, results are available in full in the CSV download. Where these relate to users' web browser information, results are not disclosed, for privacy reasons.</li>
</ul>
</li>
</html>