From: Ross Jones Date: Fri, 04 Jan 2013 13:50:21 +0000 Subject: Update to handle missing downloads data X-Git-Url: https://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=fb2cf804c3fc7d54a87b987399d7aa4037c1c57e --- Update to handle missing downloads data --- --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.py[co] *.py~ .gitignore +ckan.log # Packages *.egg --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -52,7 +52,7 @@ def csv(self, month): import csv - q = model.Session.query(GA_Stat) + q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name!='Downloads') if month != 'all': q = q.filter(GA_Stat.period_name==month) entries = q.order_by('GA_Stat.period_name, GA_Stat.stat_name, GA_Stat.key').all() @@ -68,6 +68,26 @@ entry.stat_name.encode('utf-8'), entry.key.encode('utf-8'), entry.value.encode('utf-8')]) + + def csv_downloads(self, month): + import csv + + q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name=='Downloads') + if month != 'all': + q = q.filter(GA_Stat.period_name==month) + entries = q.order_by('GA_Stat.period_name, GA_Stat.key').all() + + response.headers['Content-Type'] = "text/csv; charset=utf-8" + response.headers['Content-Disposition'] = str('attachment; filename=downloads_%s.csv' % (month,)) + + writer = csv.writer(response) + writer.writerow(["Period", "Resource URL", "Count"]) + + for entry in entries: + writer.writerow([entry.period_name.encode('utf-8'), + entry.key.encode('utf-8'), + entry.value.encode('utf-8')]) + def index(self): @@ -114,7 +134,7 @@ if k in ['Total page views', 'Total visits']: v = sum(v) else: - v = float(sum(v))/len(v) + v = float(sum(v))/float(len(v)) key, val = clean_key(k,v) c.global_totals.append((key, val)) @@ -178,6 +198,37 @@ setattr(c, v, [(k,_percent(v,total)) for k,v in entries ]) return render('ga_report/site/index.html') + + def downloads(self): + + # Get the month details by fetching distinct values and determining the + # month names from the values. + c.months, c.day = _month_details(GA_Stat) + + # Work out which month to show, based on query params of the first item + c.month_desc = 'all months' + c.month = request.params.get('month', '') + if c.month: + c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) + + c.downloads = [] + q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name=='Downloads') + q = q.filter(GA_Stat.period_name==c.month) if c.month else q + q = q.order_by("ga_stat.value::int desc") + + data = collections.defaultdict(int) + for entry in q.all(): + r = model.Session.query(model.Resource).filter(model.Resource.url==entry.key).first() + if not r: + continue + data[r] += int(entry.value) + + for k,v in data.iteritems(): + c.downloads.append((k,v)) + + c.downloads = sorted(c.downloads, key=operator.itemgetter(1), reverse=True) + + return render('ga_report/site/downloads.html') class GaDatasetReport(BaseController): --- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -13,6 +13,7 @@ FORMAT_MONTH = '%Y-%m' MIN_VIEWS = 50 MIN_VISITS = 20 +MIN_DOWNLOADS = 10 class DownloadAnalytics(object): '''Downloads and stores analytics info''' @@ -203,7 +204,7 @@ start_date = '%s-01' % period_name end_date = '%s-%s' % (period_name, last_day_of_month) funcs = ['_totals_stats', '_social_stats', '_os_stats', - '_locale_stats', '_browser_stats', '_mobile_stats'] + '_locale_stats', '_browser_stats', '_mobile_stats', '_download_stats'] for f in funcs: log.info('Downloading analytics for %s' % f.split('_')[1]) getattr(self, f)(start_date, end_date, period_name, period_complete_day) @@ -250,7 +251,7 @@ ids='ga:' + self.profile_id, filters='ga:pagePath==%s' % (path,), start_date=start_date, - metrics='ga:bounces,ga:pageviews', + metrics='ga:visitBounceRate', dimensions='ga:pagePath', max_results=10000, end_date=end_date).execute() @@ -260,10 +261,10 @@ path, result_data) return results = result_data[0] - bounces, total = [float(x) for x in result_data[0][1:]] - pct = 100 * bounces/total - log.info('%d bounces from %d total == %s', bounces, total, pct) - ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': pct}, + bounces = float(results[1]) + # visitBounceRate is already a % + log.info('Google reports visitBounceRate as %s', bounces) + ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': float(bounces)}, period_complete_day) @@ -290,6 +291,31 @@ self._filter_out_long_tail(data, MIN_VIEWS) ga_model.update_sitewide_stats(period_name, "Country", data, period_complete_day) + + def _download_stats(self, start_date, end_date, period_name, period_complete_day): + """ Fetches stats about language and country """ + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + start_date=start_date, + filters='ga:eventAction==download', + metrics='ga:totalEvents', + sort='-ga:totalEvents', + dimensions="ga:eventLabel", + max_results=10000, + end_date=end_date).execute() + result_data = results.get('rows') + if not result_data: + # We may not have data for this time period, so we need to bail + # early. + log.info("There is no downloads data for this time period") + return + + # [[url, count], [url],count] + data = {} + for result in result_data: + data[result[0]] = data.get(result[0], 0) + int(result[1]) + self._filter_out_long_tail(data, MIN_DOWNLOADS) + ga_model.update_sitewide_stats(period_name, "Downloads", data, period_complete_day) def _social_stats(self, start_date, end_date, period_name, period_complete_day): """ Finds out which social sites people are referred from """ --- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -168,8 +168,12 @@ count = model.Session.query(GA_Url).\ filter(GA_Url.period_name == 'All').count() log.debug("Deleting %d 'All' records" % count) - model.Session.query(GA_Url).\ + count = model.Session.query(GA_Url).\ filter(GA_Url.period_name == 'All').delete() + log.debug("Deleted %d 'All' records" % count) + + model.Session.flush() + model.Session.commit() model.repo.commit_and_remove() --- a/ckanext/ga_report/plugin.py +++ b/ckanext/ga_report/plugin.py @@ -42,6 +42,16 @@ controller='ckanext.ga_report.controller:GaReport', action='csv' ) + map.connect( + '/data/site-usage/downloads', + controller='ckanext.ga_report.controller:GaReport', + action='downloads' + ) + map.connect( + '/data/site-usage/downloads_{month}.csv', + controller='ckanext.ga_report.controller:GaReport', + action='csv_downloads' + ) # GaDatasetReport map.connect( --- a/ckanext/ga_report/templates/ga_report/ga_util.html +++ b/ckanext/ga_report/templates/ga_report/ga_util.html @@ -44,6 +44,23 @@ + + + + + + + + + + + +
NameDownloads
+ ${resource.resource_group.package.title}
+ ${h.link_to((resource.name or resource.description).strip() or "No name", h.url_for(controller='package', action='resource_read', id=resource.resource_group.package.name, resource_id=resource.id))}
+
${value}
+ +
@@ -55,6 +72,9 @@
  • None Datasets
  • +
  • + None Downloads +
  • --- /dev/null +++ b/ckanext/ga_report/templates/ga_report/site/downloads.html @@ -1,1 +1,59 @@ + + + + Downloads + + +
  • +

    Download

    +

    + Download as CSV
    +

    +
  • + + +
    + +
    +

    Downloads

    + ${usage_nav('Downloads')} + +
    +
    + + ${month_selector(c.month, c.months, c.day)} + + +
    +
    + + + ${downloads_table(c.downloads)} + + +

    No data

    +

    There is no download data available for this month

    +
    +
    + + + + + + + + + + +