From: Ross Jones Date: Thu, 25 Oct 2012 11:17:04 +0000 Subject: Minor template tweaks X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=ae8ef255ba7b46ba15ab4f65a16ee5905c514aec --- Minor template tweaks --- --- a/README.rst +++ b/README.rst @@ -26,7 +26,7 @@ 1. Activate you CKAN python environment and install this extension's software:: $ pyenv/bin/activate - $ pip install -e git+https://github.com/okfn/ckanext-ga-report.git#egg=ckanext-ga-report + $ pip install -e git+https://github.com/datagovuk/ckanext-ga-report.git#egg=ckanext-ga-report 2. Ensure you development.ini (or similar) contains the info about your Google Analytics account and configuration:: --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -1,10 +1,142 @@ import logging -from ckan.lib.base import BaseController, c, render -import report_model +import operator +from ckan.lib.base import BaseController, c, render, request, response + +import sqlalchemy +from sqlalchemy import func, cast, Integer +import ckan.model as model +from ga_model import GA_Url, GA_Stat log = logging.getLogger('ckanext.ga-report') + +def _get_month_name(str): + import calendar + from time import strptime + d = strptime('2012-10', '%Y-%m') + return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year) + + +def _month_details(cls): + months = [] + vals = model.Session.query(cls.period_name).distinct().all() + for m in vals: + months.append( (m[0], _get_month_name(m))) + return sorted(months, key=operator.itemgetter(0), reverse=True) + + class GaReport(BaseController): + + def csv(self, month): + import csv + + entries = model.Session.query(GA_Stat).\ + filter(GA_Stat.period_name==month).\ + order_by('GA_Stat.stat_name, GA_Stat.key').all() + + response.headers['Content-disposition'] = 'attachment; filename=dgu_analytics_%s.csv' % (month) + response.headers['Content-Type'] = "text/csv; charset=utf-8" + + writer = csv.writer(response) + writer.writerow(["Period", "Statistic", "Key", "Value"]) + + for entry in entries: + writer.writerow([entry.period_name.encode('utf-8'), + entry.stat_name.encode('utf-8'), + entry.key.encode('utf-8'), + entry.value.encode('utf-8')]) + def index(self): - return render('index.html') + # Get the month details by fetching distinct values and determining the + # month names from the values. + c.months = _month_details(GA_Stat) + + # Work out which month to show, based on query params of the first item + c.month = request.params.get('month', c.months[0][0] if c.months else '') + c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) + + entries = model.Session.query(GA_Stat).\ + filter(GA_Stat.stat_name=='Totals').\ + filter(GA_Stat.period_name==c.month).all() + c.global_totals = [(s.key, s.value) for s in entries ] + + keys = { + 'Browser versions': 'browsers', + 'Operating Systems versions': 'os', + 'Social sources': 'social_networks', + 'Languages': 'languages', + 'Country': 'country' + } + + for k, v in keys.iteritems(): + entries = model.Session.query(GA_Stat).\ + filter(GA_Stat.stat_name==k).\ + filter(GA_Stat.period_name==c.month).\ + order_by('ga_stat.value::int desc').all() + setattr(c, v, [(s.key, s.value) for s in entries ]) + + + return render('ga_report/site/index.html') + + +class GaPublisherReport(BaseController): + """ + Displays the pageview and visit count for specific publishers based on + the datasets associated with the publisher. + """ + + def index(self): + # Get the month details by fetching distinct values and determining the + # month names from the values. + c.months = _month_details(GA_Url) + + # Work out which month to show, based on query params of the first item + c.month = request.params.get('month', c.months[0][0] if c.months else '') + c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) + + connection = model.Session.connection() + q = """ + select department_id, sum(pageviews::int) views, sum(visitors::int) visits + from ga_url + where department_id <> '' + and not url like '/publisher/%%' + and period_name=%s + group by department_id order by views desc limit 20; + """ + c.top_publishers = [] + res = connection.execute(q, c.month) + for row in res: + c.top_publishers.append((model.Group.get(row[0]), row[1], row[2])) + + return render('ga_report/publisher/index.html') + + + def read(self, id): + c.publisher = model.Group.get(id) + c.top_packages = [] # package, dataset_views in c.top_packages + + # Get the month details by fetching distinct values and determining the + # month names from the values. + c.months = _month_details(GA_Url) + + # Work out which month to show, based on query params of the first item + c.month = request.params.get('month', c.months[0][0] if c.months else '') + c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) + + entry = model.Session.query(GA_Url).\ + filter(GA_Url.url=='/publisher/%s' % c.publisher.name).\ + filter(GA_Url.period_name==c.month).first() + c.publisher_page_views = entry.pageviews if entry else 0 + + entries = model.Session.query(GA_Url).\ + filter(GA_Url.department_id==c.publisher.name).\ + filter(GA_Url.period_name==c.month).\ + order_by('ga_url.pageviews::int desc')[:20] + for entry in entries: + if entry.url.startswith('/dataset/'): + p = model.Package.get(entry.url[len('/dataset/'):]) + c.top_packages.append((p,entry.pageviews,entry.visitors)) + + return render('ga_report/publisher/read.html') + --- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -85,7 +85,7 @@ self.get_full_period_name(period_name, period_complete_day), start_date.strftime('%Y %m %d'), end_date.strftime('%Y %m %d')) - + """ data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+') log.info('Storing Dataset Analytics for period "%s"', self.get_full_period_name(period_name, period_complete_day)) @@ -95,8 +95,8 @@ log.info('Storing Publisher Analytics for period "%s"', self.get_full_period_name(period_name, period_complete_day)) self.store(period_name, period_complete_day, data,) - ga_model.update_publisher_stats(period_name) - + """ + ga_model.update_publisher_stats(period_name) # about 30 seconds. self.sitewide_stats( period_name ) @@ -105,7 +105,7 @@ start_date = start_date.strftime('%Y-%m-%d') end_date = end_date.strftime('%Y-%m-%d') query = 'ga:pagePath=%s$' % path - metrics = 'ga:uniquePageviews, ga:visits' + metrics = 'ga:uniquePageviews, ga:visitors' sort = '-ga:uniquePageviews' # Supported query params at @@ -135,7 +135,6 @@ if 'url' in data: ga_model.update_url_stats(period_name, period_complete_day, data['url']) - def sitewide_stats(self, period_name): import calendar year, month = period_name.split('-') @@ -151,6 +150,12 @@ print ' + Fetching %s stats' % f.split('_')[1] getattr(self, f)(start_date, end_date, period_name) + def _get_results(result_data, f): + data = {} + for result in result_data: + key = f(result) + data[key] = data.get(key,0) + result[1] + return data def _totals_stats(self, start_date, end_date, period_name): """ Fetches distinct totals, total pageviews etc """ @@ -264,7 +269,7 @@ data = {} for result in result_data: - key = "%s (%s)" % (result[0],result[1]) + key = "%s (%s)" % (result[0], result[1]) data[key] = result[2] ga_model.update_sitewide_stats(period_name, "Browser versions", data) --- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -41,7 +41,7 @@ Column('period_name', types.UnicodeText), Column('period_complete_day', types.Integer), Column('pageviews', types.UnicodeText), - Column('visits', types.UnicodeText), + Column('visitors', types.UnicodeText), Column('url', types.UnicodeText), Column('department_id', types.UnicodeText), ) @@ -63,7 +63,10 @@ Column('period_name', types.UnicodeText), Column('publisher_name', types.UnicodeText), Column('views', types.UnicodeText), - Column('visits', types.UnicodeText), + Column('visitors', types.UnicodeText), + Column('toplevel', types.Boolean, default=False), + Column('subpublishercount', types.Integer, default=0), + Column('parent', types.UnicodeText), ) mapper(GA_Publisher, pub_table) @@ -136,7 +139,7 @@ def update_url_stats(period_name, period_complete_day, url_data): - for url, views, visits in url_data: + for url, views, visitors in url_data: url = _normalize_url(url) department_id = _get_department_id_of_url(url) @@ -147,7 +150,7 @@ if item: item.period_name = period_name item.pageviews = views - item.visits = visits + item.visitors = visitors item.department_id = department_id model.Session.add(item) else: @@ -157,7 +160,7 @@ 'period_complete_day': period_complete_day, 'url': url, 'pageviews': views, - 'visits': visits, + 'visitors': visitors, 'department_id': department_id } model.Session.add(GA_Url(**values)) @@ -166,16 +169,30 @@ def update_publisher_stats(period_name): - publishers = get_top_level() + """ + Updates the publisher stats from the data retrieved for /dataset/* + and /publisher/*. Will run against each dataset and generates the + totals for the entire tree beneath each publisher. + """ + toplevel = get_top_level() + publishers = model.Session.query(model.Group).\ + filter(model.Group.type=='publisher').\ + filter(model.Group.state=='active').all() for publisher in publishers: - views, visits = update_publisher(period_name, publisher, publisher.name) + views, visitors, subpub = update_publisher(period_name, publisher, publisher.name) + parent, parents = '', publisher.get_groups('publisher') + if parents: + parent = parents[0].name item = model.Session.query(GA_Publisher).\ filter(GA_Publisher.period_name==period_name).\ filter(GA_Publisher.publisher_name==publisher.name).first() if item: item.views = views - item.visits = visits + item.visitors = visitors item.publisher_name = publisher.name + item.toplevel = publisher in toplevel + item.subpublishercount = subpub + item.parent = parent model.Session.add(item) else: # create the row @@ -183,23 +200,27 @@ 'period_name': period_name, 'publisher_name': publisher.name, 'views': views, - 'visits': visits, + 'visitors': visitors, + 'toplevel': publisher in toplevel, + 'subpublishercount': subpub, + 'parent': parent } model.Session.add(GA_Publisher(**values)) model.Session.commit() def update_publisher(period_name, pub, part=''): - views,visits = 0, 0 + views,visitors,subpub = 0, 0, 0 for publisher in go_down_tree(pub): - f = model.Session.query(GA_Url).\ + subpub = subpub + 1 + items = model.Session.query(GA_Url).\ filter(GA_Url.period_name==period_name).\ - filter(GA_Url.url=='/publisher/' + publisher.name).first() - if f: - views = views + int(f.pageviews) - visits = visits + int(f.visits) - - return views, visits + filter(GA_Url.department_id==publisher.name).all() + for item in items: + views = views + int(item.pageviews) + visitors = visitors + int(item.visitors) + + return views, visitors, (subpub-1) def get_top_level(): --- a/ckanext/ga_report/plugin.py +++ b/ckanext/ga_report/plugin.py @@ -1,13 +1,14 @@ import logging import ckan.lib.helpers as h +import ckan.plugins as p from ckan.plugins import implements, toolkit -import gasnippet -import commands -import dbutil +#import gasnippet +#import commands +#import dbutil log = logging.getLogger('ckanext.ga-report') -class GoogleAnalyticsPlugin(p.SingletonPlugin): +class GAReportPlugin(p.SingletonPlugin): implements(p.IConfigurer, inherit=True) implements(p.IRoutes, inherit=True) @@ -17,9 +18,24 @@ def after_map(self, map): map.connect( - '/data/analytics/index', - controller='ckanext.ga-report.controller:GaReport', + '/data/analytics', + controller='ckanext.ga_report.controller:GaReport', action='index' + ) + map.connect( + '/data/analytics_{month}.csv', + controller='ckanext.ga_report.controller:GaReport', + action='csv' + ) + map.connect( + '/data/analytics/publisher/', + controller='ckanext.ga_report.controller:GaPublisherReport', + action='index' + ) + map.connect( + '/data/analytics/publisher/{id}', + controller='ckanext.ga_report.controller:GaPublisherReport', + action='read' ) return map --- /dev/null +++ b/ckanext/ga_report/templates/ga_report/publisher/index.html @@ -1,1 +1,63 @@ + + Publisher Analytics for ${g.site_title} + + +
  • +

    Publishers

    +
  • +
    + +
    +

    Publisher Analytics

    +

    The top 20 publishers

    + +
    +
    + + +
    +
    + + + + + + + + + + + + + + +
    PublisherDataset ViewsVisits
    ${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport', action='read', id=publisher.name))} + ${views}${visits}
    + + +
    + + + + + + + + + + + + --- /dev/null +++ b/ckanext/ga_report/templates/ga_report/publisher/read.html @@ -1,1 +1,65 @@ + + Analytics for ${g.site_title} + + +
  • +

    ${c.publisher.title}

    +

    + The table shows the top 20 most viewed datasets belonging to ${c.publisher.title}. +

    +

    + As well as showing the number of views within ${c.month_desc}, it will also show the + number of visitors that viewed each dataset. +

    +

    +

    The dataset list page for ${c.publisher.title} was viewed ${c.publisher_page_views} times during ${c.month_desc}

    +

    +
  • +
    + +
    +

    Analytics for ${c.publisher.title}

    + +

    Top 20 most viewed datasets

    +

    Note: this data does not include API calls

    + +
    +
    + + +
    +
    + + + + + + + + + + + + + + +
    DatasetViewsVisits
    ${h.link_to(package.title or package.name, h.url_for(controller='package', action='read', id=package.name))} + ${views}${visits}
    + + +
    + + + + + + + --- /dev/null +++ b/ckanext/ga_report/templates/ga_report/site/index.html @@ -1,1 +1,147 @@ + + Site analytics + + +
  • +

    Statistics

    +
  • +

    It is possible toexport the data as CSV, which contains all of the information for ${c.month_desc}

    + +
    + +
    +

    Site statistics

    + +
    +
    + + +
    +
    + +
    + +
    +
    + + + + + + + + + + + +
    NameValue
    ${name}${value}
    +
    +
    + + + + + + + + + + + +
    NameValue
    ${name}${value}
    +
    +
    + + + + + + + + + + + +
    NameValue
    ${name}${value}
    +
    +
    + + + + + + + + + + + +
    NameValue
    ${name}${value}
    +
    +
    + + + + + + + + + + + +
    NameValue
    ${name}${value}
    +
    +
    + + + + + + + + + + + +
    NameValue
    ${name}${value}
    +
    + + +
    +
    + + + +
    + + + + + + + + + + + --- a/setup.py +++ b/setup.py @@ -27,11 +27,12 @@ """ [ckan.plugins] # Add plugins here, eg - ga-report=ckanext.ga_report.plugin:GaReportPlugin + ga-report=ckanext.ga_report.plugin:GAReportPlugin [paste.paster_command] loadanalytics = ckanext.ga_report.command:LoadAnalytics initdb = ckanext.ga_report.command:InitDB + getauthtoken = ckanext.ga_report.command:GetAuthToken """, )