Moved minor nav into util file and implemented
Moved minor nav into util file and implemented

import logging import logging
import datetime import datetime
   
from ckan.lib.cli import CkanCommand from ckan.lib.cli import CkanCommand
# No other CKAN imports allowed until _load_config is run, # No other CKAN imports allowed until _load_config is run,
# or logging is disabled # or logging is disabled
   
   
class InitDB(CkanCommand): class InitDB(CkanCommand):
"""Initialise the extension's database tables """Initialise the extension's database tables
""" """
summary = __doc__.split('\n')[0] summary = __doc__.split('\n')[0]
usage = __doc__ usage = __doc__
max_args = 0 max_args = 0
min_args = 0 min_args = 0
   
def command(self): def command(self):
self._load_config() self._load_config()
   
import ckan.model as model import ckan.model as model
model.Session.remove() model.Session.remove()
model.Session.configure(bind=model.meta.engine) model.Session.configure(bind=model.meta.engine)
log = logging.getLogger('ckanext.ga-report') log = logging.getLogger('ckanext.ga-report')
   
import ga_model import ga_model
ga_model.init_tables() ga_model.init_tables()
log.info("DB tables are setup") log.info("DB tables are setup")
   
   
class GetAuthToken(CkanCommand): class GetAuthToken(CkanCommand):
""" Get's the Google auth token """ Get's the Google auth token
   
Usage: paster getauthtoken <credentials_file> Usage: paster getauthtoken <credentials_file>
   
Where <credentials_file> is the file name containing the details Where <credentials_file> is the file name containing the details
for the service (obtained from https://code.google.com/apis/console). for the service (obtained from https://code.google.com/apis/console).
By default this is set to credentials.json By default this is set to credentials.json
""" """
summary = __doc__.split('\n')[0] summary = __doc__.split('\n')[0]
usage = __doc__ usage = __doc__
max_args = 0 max_args = 0
min_args = 0 min_args = 0
   
def command(self): def command(self):
""" """
In this case we don't want a valid service, but rather just to In this case we don't want a valid service, but rather just to
force the user through the auth flow. We allow this to complete to force the user through the auth flow. We allow this to complete to
act as a form of verification instead of just getting the token and act as a form of verification instead of just getting the token and
assuming it is correct. assuming it is correct.
""" """
from ga_auth import init_service from ga_auth import init_service
init_service('token.dat', init_service('token.dat',
self.args[0] if self.args self.args[0] if self.args
else 'credentials.json') else 'credentials.json')
   
   
class LoadAnalytics(CkanCommand): class LoadAnalytics(CkanCommand):
"""Get data from Google Analytics API and save it """Get data from Google Analytics API and save it
in the ga_model in the ga_model
   
Usage: paster loadanalytics <tokenfile> <time-period> Usage: paster loadanalytics <tokenfile> <time-period>
   
Where <tokenfile> is the name of the auth token file from Where <tokenfile> is the name of the auth token file from
the getauthtoken step. the getauthtoken step.
   
And where <time-period> is: And where <time-period> is:
all - data for all time all - data for all time
latest - (default) just the 'latest' data latest - (default) just the 'latest' data
YYYY-MM-DD - just data for all time periods going YYYY-MM - just data for the specific month
back to (and including) this date  
""" """
summary = __doc__.split('\n')[0] summary = __doc__.split('\n')[0]
usage = __doc__ usage = __doc__
max_args = 2 max_args = 2
min_args = 1 min_args = 1
   
def command(self): def command(self):
self._load_config() self._load_config()
   
from download_analytics import DownloadAnalytics from download_analytics import DownloadAnalytics
from ga_auth import (init_service, get_profile_id) from ga_auth import (init_service, get_profile_id)
   
try: try:
svc = init_service(self.args[0], None) svc = init_service(self.args[0], None)
except TypeError: except TypeError:
print ('Have you correctly run the getauthtoken task and ' print ('Have you correctly run the getauthtoken task and '
'specified the correct file here') 'specified the correct file here')
return return
   
downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc)) downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc))
   
time_period = self.args[1] if self.args and len(self.args) > 1 \ time_period = self.args[1] if self.args and len(self.args) > 1 \
else 'latest' else 'latest'
if time_period == 'all': if time_period == 'all':
downloader.all_() downloader.all_()
elif time_period == 'latest': elif time_period == 'latest':
downloader.latest() downloader.latest()
else: else:
since_date = datetime.datetime.strptime(time_period, '%Y-%m-%d') # The month to use
downloader.since_date(since_date) for_date = datetime.datetime.strptime(time_period, '%Y-%m')
  downloader.specific_month(for_date)
   
  import re
import logging import logging
import operator import operator
from ckan.lib.base import BaseController, c, render, request, response import collections
  from ckan.lib.base import BaseController, c, render, request, response, abort
   
import sqlalchemy import sqlalchemy
from sqlalchemy import func, cast, Integer from sqlalchemy import func, cast, Integer
import ckan.model as model import ckan.model as model
from ga_model import GA_Url, GA_Stat from ga_model import GA_Url, GA_Stat
   
log = logging.getLogger('ckanext.ga-report') log = logging.getLogger('ckanext.ga-report')
   
   
def _get_month_name(str): def _get_month_name(strdate):
import calendar import calendar
from time import strptime from time import strptime
d = strptime('2012-10', '%Y-%m') d = strptime(strdate, '%Y-%m')
return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year) return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year)
   
   
def _month_details(cls): def _month_details(cls):
months = [] months = []
vals = model.Session.query(cls.period_name).distinct().all() vals = model.Session.query(cls.period_name).distinct().all()
for m in vals: for m in vals:
months.append( (m[0], _get_month_name(m))) months.append( (m[0], _get_month_name(m[0])))
return sorted(months, key=operator.itemgetter(0), reverse=True) return sorted(months, key=operator.itemgetter(0), reverse=True)
   
   
class GaReport(BaseController): class GaReport(BaseController):
   
def csv(self, month): def csv(self, month):
import csv import csv
   
entries = model.Session.query(GA_Stat).\ q = model.Session.query(GA_Stat)
filter(GA_Stat.period_name==month).\ if month != 'all':
order_by('GA_Stat.stat_name, GA_Stat.key').all() q = q.filter(GA_Stat.period_name==month)
  entries = q.order_by('GA_Stat.period_name, GA_Stat.stat_name, GA_Stat.key').all()
response.headers['Content-disposition'] = 'attachment; filename=dgu_analytics_%s.csv' % (month)  
response.headers['Content-Type'] = "text/csv; charset=utf-8" response.headers['Content-Type'] = "text/csv; charset=utf-8"
   
writer = csv.writer(response) writer = csv.writer(response)
writer.writerow(["Period", "Statistic", "Key", "Value"]) writer.writerow(["Period", "Statistic", "Key", "Value"])
   
for entry in entries: for entry in entries:
writer.writerow([entry.period_name.encode('utf-8'), writer.writerow([entry.period_name.encode('utf-8'),
entry.stat_name.encode('utf-8'), entry.stat_name.encode('utf-8'),
entry.key.encode('utf-8'), entry.key.encode('utf-8'),
entry.value.encode('utf-8')]) entry.value.encode('utf-8')])
   
def index(self): def index(self):
   
# Get the month details by fetching distinct values and determining the # Get the month details by fetching distinct values and determining the
# month names from the values. # month names from the values.
c.months = _month_details(GA_Stat) c.months = _month_details(GA_Stat)
   
# Work out which month to show, based on query params of the first item # Work out which month to show, based on query params of the first item
c.month = request.params.get('month', c.months[0][0] if c.months else '') c.month_desc = 'all time'
c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) c.month = request.params.get('month', '')
  if c.month:
entries = model.Session.query(GA_Stat).\ c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
filter(GA_Stat.stat_name=='Totals').\  
filter(GA_Stat.period_name==c.month).all() q = model.Session.query(GA_Stat).\
c.global_totals = [(s.key, s.value) for s in entries ] filter(GA_Stat.stat_name=='Totals')
  if c.month:
  q = q.filter(GA_Stat.period_name==c.month)
  entries = q.order_by('ga_stat.key').all()
   
  def clean_key(key, val):
  if key in ['Average time on site', 'Pages per visit', 'Percent new visits']:
  val = "%.2f" % round(float(val), 2)
  if key == 'Average time on site':
  mins, secs = divmod(float(val), 60)
  hours, mins = divmod(mins, 60)
  val = '%02d:%02d:%02d (%s seconds) ' % (hours, mins, secs, val)
  key = '%s *' % key
  if key in ['Bounces', 'Total pageviews']:
  val = int(val)
  return key, val
   
  c.global_totals = []
  if c.month:
  for e in entries:
  key, val = clean_key(e.key, e.value)
  c.global_totals.append((key, val))
  else:
  d = collections.defaultdict(list)
  for e in entries:
  d[e.key].append(float(e.value))
  for k, v in d.iteritems():
  if k in ['Bounces', 'Total pageviews']:
  v = sum(v)
  else:
  v = float(sum(v))/len(v)
  key, val = clean_key(k,v)
  c.global_totals.append((key, val))
  c.global_totals = sorted(c.global_totals, key=operator.itemgetter(0))
   
keys = { keys = {
'Browser versions': 'browsers', 'Browser versions': 'browsers',
'Operating Systems versions': 'os', 'Operating Systems versions': 'os',
'Social sources': 'social_networks', 'Social sources': 'social_networks',
'Languages': 'languages', 'Languages': 'languages',
'Country': 'country' 'Country': 'country'
} }
   
  browser_version_re = re.compile("(.*)\((.*)\)")
for k, v in keys.iteritems(): for k, v in keys.iteritems():
entries = model.Session.query(GA_Stat).\  
filter(GA_Stat.stat_name==k).\ def clean_field(key):
filter(GA_Stat.period_name==c.month).\ if k != 'Browser versions':
order_by('ga_stat.value::int desc').all() return key
setattr(c, v, [(s.key, s.value) for s in entries ]) m = browser_version_re.match(key)
  browser = m.groups()[0].strip()
  ver = m.groups()[1]
  parts = ver.split('.')
  if len(parts) > 1:
  if parts[1][0] == '0':
  ver = parts[0]
  else:
  ver = "%s.%s" % (parts[0],parts[1])
  if browser in ['Safari','Android Browser']: # Special case complex version nums
  ver = parts[0]
  if len(ver) > 2:
  ver = "%s%sX" % (ver[0], ver[1])
   
  return "%s (%s)" % (browser, ver,)
   
  q = model.Session.query(GA_Stat).\
  filter(GA_Stat.stat_name==k)
  if c.month:
  entries = []
  q = q.filter(GA_Stat.period_name==c.month).\
  order_by('ga_stat.value::int desc')
   
  d = collections.defaultdict(int)
  for e in q.all():
  d[clean_field(e.key)] += int(e.value)
  entries = []
  for key, val in d.iteritems():
  entries.append((key,val,))
  entries = sorted(entries, key=operator.itemgetter(1), reverse=True)
   
  setattr(c, v, [(k,v) for k,v in entries ])
   
   
   
return render('ga_report/site/index.html') return render('ga_report/site/index.html')
   
   
class GaPublisherReport(BaseController): class GaPublisherReport(BaseController):
""" """
Displays the pageview and visit count for specific publishers based on Displays the pageview and visit count for specific publishers based on
the datasets associated with the publisher. the datasets associated with the publisher.
""" """
   
def index(self): def index(self):
   
# Get the month details by fetching distinct values and determining the # Get the month details by fetching distinct values and determining the
# month names from the values. # month names from the values.
c.months = _month_details(GA_Url) c.months = _month_details(GA_Url)
   
# Work out which month to show, based on query params of the first item # Work out which month to show, based on query params of the first item
c.month = request.params.get('month', c.months[0][0] if c.months else '') c.month = request.params.get('month', '')
c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) c.month_desc = 'all time'
  if c.month:
  c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
   
connection = model.Session.connection() connection = model.Session.connection()
q = """ q = """
select department_id, sum(pageviews::int) views, sum(visitors::int) visits select department_id, sum(pageviews::int) views, sum(visitors::int) visits
from ga_url from ga_url
where department_id <> '' where department_id <> ''"""
and not url like '/publisher/%%' if c.month:
and period_name=%s q = q + """
group by department_id order by views desc limit 20; and period_name=%s
""" """
  q = q + """
  group by department_id order by views desc limit 20;
  """
   
  # Add this back (before and period_name =%s) if you want to ignore publisher
  # homepage views
  # and not url like '/publisher/%%'
   
c.top_publishers = [] c.top_publishers = []
res = connection.execute(q, c.month) res = connection.execute(q, c.month)
   
for row in res: for row in res:
c.top_publishers.append((model.Group.get(row[0]), row[1], row[2])) c.top_publishers.append((model.Group.get(row[0]), row[1], row[2]))
   
return render('ga_report/publisher/index.html') return render('ga_report/publisher/index.html')
   
   
def read(self, id): def read(self, id):
  count = 20
   
c.publisher = model.Group.get(id) c.publisher = model.Group.get(id)
  if not c.publisher:
  abort(404, 'A publisher with that name could not be found')
c.top_packages = [] # package, dataset_views in c.top_packages c.top_packages = [] # package, dataset_views in c.top_packages
   
# Get the month details by fetching distinct values and determining the # Get the month details by fetching distinct values and determining the
# month names from the values. # month names from the values.
c.months = _month_details(GA_Url) c.months = _month_details(GA_Url)
   
# Work out which month to show, based on query params of the first item # Work out which month to show, based on query params of the first item
c.month = request.params.get('month', c.months[0][0] if c.months else '') c.month = request.params.get('month', '')
c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) if not c.month:
  c.month_desc = 'all time'
entry = model.Session.query(GA_Url).\ else:
filter(GA_Url.url=='/publisher/%s' % c.publisher.name).\ c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
filter(GA_Url.period_name==c.month).first()  
c.publisher_page_views = entry.pageviews if entry else 0 c.publisher_page_views = 0
  q = model.Session.query(GA_Url).\
entries = model.Session.query(GA_Url).\ filter(GA_Url.url=='/publisher/%s' % c.publisher.name)
  if c.month:
  entry = q.filter(GA_Url.period_name==c.month).first()
  c.publisher_page_views = entry.pageviews if entry else 0
  else:
  for e in q.all():
  c.publisher_page_views = c.publisher_page_views + int(e.pageviews)
   
   
  q = model.Session.query(GA_Url).\
filter(GA_Url.department_id==c.publisher.name).\ filter(GA_Url.department_id==c.publisher.name).\
filter(GA_Url.period_name==c.month).\ filter(GA_Url.url.like('/dataset/%'))
order_by('ga_url.pageviews::int desc')[:20] if c.month:
for entry in entries: q = q.filter(GA_Url.period_name==c.month)
if entry.url.startswith('