Changes related to presentation, and new tabs.
Changes related to presentation, and new tabs.

- Changed nearly all values to percentages of the total
- Added non-version info for OS and browsers
- Made tabs into dropdowns for Browser/OS

  import re
  import csv
  import sys
import logging import logging
import operator import operator
from ckan.lib.base import BaseController, c, render, request, response import collections
  from ckan.lib.base import BaseController, c, render, request, response, abort
   
import sqlalchemy import sqlalchemy
from sqlalchemy import func, cast, Integer from sqlalchemy import func, cast, Integer
import ckan.model as model import ckan.model as model
from ga_model import GA_Url, GA_Stat from ga_model import GA_Url, GA_Stat
   
log = logging.getLogger('ckanext.ga-report') log = logging.getLogger('ckanext.ga-report')
   
   
def _get_month_name(strdate): def _get_month_name(strdate):
import calendar import calendar
from time import strptime from time import strptime
d = strptime(strdate, '%Y-%m') d = strptime(strdate, '%Y-%m')
return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year) return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year)
   
   
def _month_details(cls): def _month_details(cls):
months = [] months = []
vals = model.Session.query(cls.period_name).distinct().all() vals = model.Session.query(cls.period_name).distinct().all()
for m in vals: for m in vals:
months.append( (m[0], _get_month_name(m[0]))) months.append( (m[0], _get_month_name(m[0])))
return sorted(months, key=operator.itemgetter(0), reverse=True) return sorted(months, key=operator.itemgetter(0), reverse=True)
   
   
class GaReport(BaseController): class GaReport(BaseController):
   
def csv(self, month): def csv(self, month):
import csv import csv
   
entries = model.Session.query(GA_Stat).\ q = model.Session.query(GA_Stat)
filter(GA_Stat.period_name==month).\ if month != 'all':
order_by('GA_Stat.stat_name, GA_Stat.key').all() q = q.filter(GA_Stat.period_name==month)
  entries = q.order_by('GA_Stat.period_name, GA_Stat.stat_name, GA_Stat.key').all()
   
response.headers['Content-Type'] = "text/csv; charset=utf-8" response.headers['Content-Type'] = "text/csv; charset=utf-8"
  response.headers['Content-Disposition'] = str('attachment; filename=stats_%s.csv' % (month,))
   
writer = csv.writer(response) writer = csv.writer(response)
writer.writerow(["Period", "Statistic", "Key", "Value"]) writer.writerow(["Period", "Statistic", "Key", "Value"])
   
for entry in entries: for entry in entries:
writer.writerow([entry.period_name.encode('utf-8'), writer.writerow([entry.period_name.encode('utf-8'),
entry.stat_name.encode('utf-8'), entry.stat_name.encode('utf-8'),
entry.key.encode('utf-8'), entry.key.encode('utf-8'),
entry.value.encode('utf-8')]) entry.value.encode('utf-8')])
   
def index(self): def index(self):
   
# Get the month details by fetching distinct values and determining the # Get the month details by fetching distinct values and determining the
# month names from the values. # month names from the values.
c.months = _month_details(GA_Stat) c.months = _month_details(GA_Stat)
   
# Work out which month to show, based on query params of the first item # Work out which month to show, based on query params of the first item
c.month = request.params.get('month', c.months[0][0] if c.months else '') c.month_desc = 'all time'
c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) c.month = request.params.get('month', '')
  if c.month:
entries = model.Session.query(GA_Stat).\ c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
filter(GA_Stat.stat_name=='Totals').\  
filter(GA_Stat.period_name==c.month).\ q = model.Session.query(GA_Stat).\
order_by('ga_stat.key').all() filter(GA_Stat.stat_name=='Totals')
  if c.month:
  q = q.filter(GA_Stat.period_name==c.month)
  entries = q.order_by('ga_stat.key').all()
   
  def clean_key(key, val):
  if key in ['Average time on site', 'Pages per visit', 'Percent new visits']:
  val = "%.2f" % round(float(val), 2)
  if key == 'Average time on site':
  mins, secs = divmod(float(val), 60)
  hours, mins = divmod(mins, 60)
  val = '%02d:%02d:%02d (%s seconds) ' % (hours, mins, secs, val)
  if key == 'Percent new visits':
  key = 'New visits'
  val = "%s%%" % val
  if key in ['Bounces', 'Total pageviews']:
  val = int(val)
  if key == 'Total pageviews':
  key = 'Total page views'
   
  return key, val
   
c.global_totals = [] c.global_totals = []
for e in entries: if c.month:
val = e.value for e in entries:
if e.key in ['Average time on site', 'Pages per visit', 'Percent new visits']: key, val = clean_key(e.key, e.value)
val = "%.2f" % round(float(e.value), 2) c.global_totals.append((key, val))
e.key = '%s *' % e.key else:
c.global_totals.append((e.key, val)) d = collections.defaultdict(list)
  for e in entries:
  d[e.key].append(float(e.value))
  for k, v in d.iteritems():
  if k in ['Bounces', 'Total pageviews']:
  v = sum(v)
  else:
  v = float(sum(v))/len(v)
  key, val = clean_key(k,v)
  c.global_totals.append((key, val))
  c.global_totals = sorted(c.global_totals, key=operator.itemgetter(0))
   
keys = { keys = {
'Browser versions': 'browsers', 'Browser versions': 'browser_versions',
'Operating Systems versions': 'os', 'Browsers': 'browsers',
  'Operating Systems versions': 'os_versions',
  'Operating Systems': 'os',
'Social sources': 'social_networks', 'Social sources': 'social_networks',
'Languages': 'languages', 'Languages': 'languages',
'Country': 'country' 'Country': 'country'
} }
   
  browser_version_re = re.compile("(.*)\((.*)\)")
for k, v in keys.iteritems(): for k, v in keys.iteritems():
entries = model.Session.query(GA_Stat).\  
filter(GA_Stat.stat_name==k).\ def clean_field(key):
filter(GA_Stat.period_name==c.month).\ if k != 'Browser versions':
order_by('ga_stat.value::int desc').all() return key
setattr(c, v, [(s.key, s.value) for s in entries ]) m = browser_version_re.match(key)
  browser = m.groups()[0].strip()
  ver = m.groups()[1]
  parts = ver.split('.')
  if len(parts) > 1:
  if parts[1][0] == '0':
  ver = parts[0]
  else:
  ver = "%s.%s" % (parts[0],parts[1])
  if browser in ['Safari','Android Browser']: # Special case complex version nums
  ver = parts[0]
  if len(ver) > 2:
  ver = "%s%sX" % (ver[0], ver[1])
   
  return "%s (%s)" % (browser, ver,)
   
  q = model.Session.query(GA_Stat).\
  filter(GA_Stat.stat_name==k)
  if c.month:
  entries = []
  q = q.filter(GA_Stat.period_name==c.month).\
  order_by('ga_stat.value::int desc')
   
  d = collections.defaultdict(int)
  for e in q.all():
  d[clean_field(e.key)] += int(e.value)
  entries = []
  for key, val in d.iteritems():
  entries.append((key,val,))
  entries = sorted(entries, key=operator.itemgetter(1), reverse=True)
   
  def percent(num, total):
  p = 100 * float(num)/float(total)
  return "%.2f%%" % round(p, 2)
   
  # Get the total for each set of values and then set the value as
  # a percentage of the total
  total = sum([num for _,num in entries])
  setattr(c, v, [(k,percent(v,total)) for k,v in entries ])
   
return render('ga_report/site/index.html') return render('ga_report/site/index.html')
   
   
class GaPublisherReport(BaseController): class GaPublisherReport(BaseController):
""" """
Displays the pageview and visit count for specific publishers based on Displays the pageview and visit count for specific publishers based on
the datasets associated with the publisher. the datasets associated with the publisher.
""" """
  def csv(self, month):
   
  c.month = month if not month =='all' else ''
  response.headers['Content-Type'] = "text/csv; charset=utf-8"
  response.headers['Content-Disposition'] = str('attachment; filename=publishers_%s.csv' % (month,))
   
  writer = csv.writer(response)
  writer.writerow(["Publisher", "Views", "Visits", "Period Name"])
   
  for publisher,view,visit in self._get_publishers(None):
  writer.writerow([publisher.title.encode('utf-8'),
  view,
  visit,
  month])
   
   
   
  def publisher_csv(self, id, month):
   
  c.month = month if not month =='all' else ''
  c.publisher = model.Group.get(id)
  if not c.publisher:
  abort(404, 'A publisher with that name could not be found')
   
  packages = self._get_packages(c.publisher)
  response.headers['Content-Type'] = "text/csv; charset=utf-8"
  response.headers['Content-Disposition'] = \
  str('attachment; filename=%s_%s.csv' % (c.publisher.name, month,))
   
  writer = csv.writer(response)
  writer.writerow(["Publisher", "Views", "Visits", "Period Name"])
   
  for package,view,visit in packages:
  writer.writerow([package.title.encode('utf-8'),
  view,
  visit,
  month])
   
   
   
def index(self): def index(self):
   
# Get the month details by fetching distinct values and determining the # Get the month details by fetching distinct values and determining the
# month names from the values. # month names from the values.
c.months = _month_details(GA_Url) c.months = _month_details(GA_Url)
   
# Work out which month to show, based on query params of the first item # Work out which month to show, based on query params of the first item
c.month = request.params.get('month', c.months[0][0] if c.months else '') c.month = request.params.get('month', '')
c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) c.month_desc = 'all time'
  if c.month:
  c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
   
  c.top_publishers = self._get_publishers()
   
  return render('ga_report/publisher/index.html')
   
  def _get_publishers(self, limit=20):
connection = model.Session.connection() connection = model.Session.connection()
q = """ q = """
select department_id, sum(pageviews::int) views, sum(visitors::int) visits select department_id, sum(pageviews::int) views, sum(visitors::int) visits
from ga_url from ga_url
where department_id <> '' where department_id <> ''"""
and period_name=%s if c.month:
group by department_id order by views desc limit 20; q = q + """
""" and period_name=%s
  """
  q = q + """
  group by department_id order by views desc
  """
  if limit:
  q = q + " limit %s;" % (limit)
   
# Add this back (before and period_name =%s) if you want to ignore publisher # Add this back (before and period_name =%s) if you want to ignore publisher
# homepage views # homepage views
# and not url like '/publisher/%%' # and not url like '/publisher/%%'
   
c.top_publishers = [] top_publishers = []
res = connection.execute(q, c.month) res = connection.execute(q, c.month)
   
for row in res: for row in res:
c.top_publishers.append((model.Group.get(row[0]), row[1], row[2])) g = model.Group.get(row[0])
  if g:
return render('ga_report/publisher/index.html') top_publishers.append((g, row[1], row[2]))
  return top_publishers
   
  def _get_packages(self, publisher, count=-1):
  if count == -1:
  count = sys.maxint
   
  top_packages = []
  q = model.Session.query(GA_Url).\
  filter(GA_Url.department_id==publisher.name).\
  filter(GA_Url.url.like('/dataset/%'))
  if c.month:
  q = q.filter(GA_Url.period_name==c.month)
  q = q.order_by('ga_url.pageviews::int desc')
   
  if c.month:
  for entry in q[:count]:
  p = model.Package.get(entry.url[len('/dataset/'):])
  top_packages.append((p,entry.pageviews,entry.visitors))
  else:
  ds = {}
  for entry in q.all():
  if len(ds) >= count:
  break
  p = model.Package.get(entry.url[len('/dataset/'):])
  if not p in ds:
  ds[p] = {'views':0, 'visits': 0}
  ds[p]['views'] = ds[p]['views'] + int(entry.pageviews)
  ds[p]['visits'] = ds[p]['visits'] + int(entry.visitors)
   
  results = []
  for k, v in ds.iteritems():
  results.append((k,v['views'],v['visits']))
   
  top_packages = sorted(results, key=operator.itemgetter(1), reverse=True)
  return top_packages
   
   
def read(self, id): def read(self, id):
  count = 20
   
c.publisher = model.Group.get(id) c.publisher = model.Group.get(id)
  if not c.publisher:
  abort(404, 'A publisher with that name could not be found')
c.top_packages = [] # package, dataset_views in c.top_packages c.top_packages = [] # package, dataset_views in c.top_packages
   
# Get the month details by fetching distinct values and determining the # Get the month details by fetching distinct values and determining the
# month names from the values. # month names from the values.
c.months = _month_details(GA_Url) c.months = _month_details(GA_Url)
   
# Work out which month to show, based on query params of the first item # Work out which month to show, based on query params of the first item
c.month = request.params.get('month', c.months[0][0] if c.months else '') c.month = request.params.get('month', '')
c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) if not c.month:
  c.month_desc = 'all time'
entry = model.Session.query(GA_Url).\ else:
filter(GA_Url.url=='/publisher/%s' % c.publisher.name).\ c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
filter(GA_Url.period_name==c.month).first()  
c.publisher_page_views = entry.pageviews if entry else 0 c.publisher_page_views = 0
  q = model.Session.query(GA_Url).\
entries = model.Session.query(GA_Url).\ filter(GA_Url.url=='/publisher/%s' % c.publisher.name)
filter(GA_Url.department_id==c.publisher.name).\ if c.month:
filter(GA_Url.period_name==c.month).\ entry = q.filter(GA_Url.period_name==c.month).first()
order_by('ga_url.pageviews::int desc')[:20] c.publisher_page_views = entry.pageviews if entry else 0
for entry in entries: else:
if entry.url.startswith('/dataset/'): for e in q.all():
p = model.Package.get(entry.url[len('/dataset/'):]) c.publisher_page_views = c.publisher_page_views + int(e.pageviews)
c.top_packages.append((p,entry.pageviews,entry.visitors))  
  c.top_packages = self._get_packages(c.publisher, 20)
   
return render('ga_report/publisher/read.html') return render('ga_report/publisher/read.html')
   
  import logging
  import operator
  import ckan.lib.base as base
  import ckan.model as model
 
  _log = logging.getLogger(__name__)
 
  def most_popular_datasets(publisher, count=20):
  from ckanext.ga_report.ga_model import GA_Url
 
  if not publisher:
  _log.e