Fixes to show All Time data
Fixes to show All Time data

import logging import logging
import operator import operator
from ckan.lib.base import BaseController, c, render, request, response, abort from ckan.lib.base import BaseController, c, render, request, response, abort
   
import sqlalchemy import sqlalchemy
from sqlalchemy import func, cast, Integer from sqlalchemy import func, cast, Integer
import ckan.model as model import ckan.model as model
from ga_model import GA_Url, GA_Stat from ga_model import GA_Url, GA_Stat
   
log = logging.getLogger('ckanext.ga-report') log = logging.getLogger('ckanext.ga-report')
   
   
def _get_month_name(strdate): def _get_month_name(strdate):
import calendar import calendar
from time import strptime from time import strptime
d = strptime(strdate, '%Y-%m') d = strptime(strdate, '%Y-%m')
return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year) return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year)
   
   
def _month_details(cls): def _month_details(cls):
months = [] months = []
vals = model.Session.query(cls.period_name).distinct().all() vals = model.Session.query(cls.period_name).distinct().all()
for m in vals: for m in vals:
months.append( (m[0], _get_month_name(m[0]))) months.append( (m[0], _get_month_name(m[0])))
return sorted(months, key=operator.itemgetter(0), reverse=True) return sorted(months, key=operator.itemgetter(0), reverse=True)
   
   
class GaReport(BaseController): class GaReport(BaseController):
   
def csv(self, month): def csv(self, month):
import csv import csv
   
entries = model.Session.query(GA_Stat).\ entries = model.Session.query(GA_Stat).\
filter(GA_Stat.period_name==month).\ filter(GA_Stat.period_name==month).\
order_by('GA_Stat.stat_name, GA_Stat.key').all() order_by('GA_Stat.stat_name, GA_Stat.key').all()
   
response.headers['Content-Type'] = "text/csv; charset=utf-8" response.headers['Content-Type'] = "text/csv; charset=utf-8"
   
writer = csv.writer(response) writer = csv.writer(response)
writer.writerow(["Period", "Statistic", "Key", "Value"]) writer.writerow(["Period", "Statistic", "Key", "Value"])
   
for entry in entries: for entry in entries:
writer.writerow([entry.period_name.encode('utf-8'), writer.writerow([entry.period_name.encode('utf-8'),
entry.stat_name.encode('utf-8'), entry.stat_name.encode('utf-8'),
entry.key.encode('utf-8'), entry.key.encode('utf-8'),
entry.value.encode('utf-8')]) entry.value.encode('utf-8')])
   
def index(self): def index(self):
   
# Get the month details by fetching distinct values and determining the # Get the month details by fetching distinct values and determining the
# month names from the values. # month names from the values.
c.months = _month_details(GA_Stat) c.months = _month_details(GA_Stat)
   
# Work out which month to show, based on query params of the first item # Work out which month to show, based on query params of the first item
c.month = request.params.get('month', c.months[0][0] if c.months else '') c.month = request.params.get('month', c.months[0][0] if c.months else '')
c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
   
entries = model.Session.query(GA_Stat).\ entries = model.Session.query(GA_Stat).\
filter(GA_Stat.stat_name=='Totals').\ filter(GA_Stat.stat_name=='Totals').\
filter(GA_Stat.period_name==c.month).\ filter(GA_Stat.period_name==c.month).\
order_by('ga_stat.key').all() order_by('ga_stat.key').all()
   
c.global_totals = [] c.global_totals = []
for e in entries: for e in entries:
val = e.value val = e.value
if e.key in ['Average time on site', 'Pages per visit', 'Percent new visits']: if e.key in ['Average time on site', 'Pages per visit', 'Percent new visits']:
val = "%.2f" % round(float(e.value), 2) val = "%.2f" % round(float(e.value), 2)
if e.key == 'Average time on site': if e.key == 'Average time on site':
mins, secs = divmod(float(val), 60) mins, secs = divmod(float(val), 60)
hours, mins = divmod(mins, 60) hours, mins = divmod(mins, 60)
val = '%02d:%02d:%02d (%s seconds) ' % (hours, mins, secs, val) val = '%02d:%02d:%02d (%s seconds) ' % (hours, mins, secs, val)
e.key = '%s *' % e.key e.key = '%s *' % e.key
c.global_totals.append((e.key, val)) c.global_totals.append((e.key, val))
   
   
keys = { keys = {
'Browser versions': 'browsers', 'Browser versions': 'browsers',
'Operating Systems versions': 'os', 'Operating Systems versions': 'os',
'Social sources': 'social_networks', 'Social sources': 'social_networks',
'Languages': 'languages', 'Languages': 'languages',
'Country': 'country' 'Country': 'country'
} }
   
for k, v in keys.iteritems(): for k, v in keys.iteritems():
entries = model.Session.query(GA_Stat).\ entries = model.Session.query(GA_Stat).\
filter(GA_Stat.stat_name==k).\ filter(GA_Stat.stat_name==k).\
filter(GA_Stat.period_name==c.month).\ filter(GA_Stat.period_name==c.month).\
order_by('ga_stat.value::int desc').all() order_by('ga_stat.value::int desc').all()
setattr(c, v, [(s.key, s.value) for s in entries ]) setattr(c, v, [(s.key, s.value) for s in entries ])
   
   
return render('ga_report/site/index.html') return render('ga_report/site/index.html')
   
   
class GaPublisherReport(BaseController): class GaPublisherReport(BaseController):
""" """
Displays the pageview and visit count for specific publishers based on Displays the pageview and visit count for specific publishers based on
the datasets associated with the publisher. the datasets associated with the publisher.
""" """
   
def index(self): def index(self):
   
# Get the month details by fetching distinct values and determining the # Get the month details by fetching distinct values and determining the
# month names from the values. # month names from the values.
c.months = _month_details(GA_Url) c.months = _month_details(GA_Url)
   
# Work out which month to show, based on query params of the first item # Work out which month to show, based on query params of the first item
c.month = request.params.get('month', c.months[0][0] if c.months else '') c.month = request.params.get('month', '')
c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) c.month_desc = 'all time'
  if c.month:
  c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
   
connection = model.Session.connection() connection = model.Session.connection()
q = """ q = """
select department_id, sum(pageviews::int) views, sum(visitors::int) visits select department_id, sum(pageviews::int) views, sum(visitors::int) visits
from ga_url from ga_url
where department_id <> '' where department_id <> ''"""
and period_name=%s if c.month:
group by department_id order by views desc limit 20; q = q + """
""" and period_name=%s
  """
  q = q + """
  group by department_id order by views desc limit 20;
  """
   
# Add this back (before and period_name =%s) if you want to ignore publisher # Add this back (before and period_name =%s) if you want to ignore publisher
# homepage views # homepage views
# and not url like '/publisher/%%' # and not url like '/publisher/%%'
   
c.top_publishers = [] c.top_publishers = []
res = connection.execute(q, c.month) res = connection.execute(q, c.month)
   
for row in res: for row in res:
c.top_publishers.append((model.Group.get(row[0]), row[1], row[2])) c.top_publishers.append((model.Group.get(row[0]), row[1], row[2]))
   
return render('ga_report/publisher/index.html') return render('ga_report/publisher/index.html')
   
   
def read(self, id): def read(self, id):
  count = 20
   
c.publisher = model.Group.get(id) c.publisher = model.Group.get(id)
if not c.publisher: if not c.publisher:
abort(404, 'A publisher with that name could not be found') abort(404, 'A publisher with that name could not be found')
c.top_packages = [] # package, dataset_views in c.top_packages c.top_packages = [] # package, dataset_views in c.top_packages
   
# Get the month details by fetching distinct values and determining the # Get the month details by fetching distinct values and determining the
# month names from the values. # month names from the values.
c.months = _month_details(GA_Url) c.months = _month_details(GA_Url)
   
# Work out which month to show, based on query params of the first item # Work out which month to show, based on query params of the first item
c.month = request.params.get('month', c.months[0][0] if c.months else '') c.month = request.params.get('month', '')
c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) if not c.month:
  c.month_desc = 'all time'
  else:
  c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
   
entry = model.Session.query(GA_Url).\ c.publisher_page_views = 0
filter(GA_Url.url=='/publisher/%s' % c.publisher.name).\ q = model.Session.query(GA_Url).\
filter(GA_Url.period_name==c.month).first() filter(GA_Url.url=='/publisher/%s' % c.publisher.name)
c.publisher_page_views = entry.pageviews if entry else 0 if c.month:
  entry = q.filter(GA_Url.period_name==c.month).first()
  c.publisher_page_views = entry.pageviews if entry else 0
  else:
  for e in q.all():
  c.publisher_page_views = c.publisher_page_views + int(e.pageviews)
   
entries = model.Session.query(GA_Url).\  
  q = model.Session.query(GA_Url).\
filter(GA_Url.department_id==c.publisher.name).\ filter(GA_Url.department_id==c.publisher.name).\
filter(GA_Url.period_name==c.month).\ filter(GA_Url.url.like('/dataset/%'))
order_by('ga_url.pageviews::int desc')[:20] if c.month:
for entry in entries: q = q.filter(GA_Url.period_name==c.month)
if entry.url.startswith('/dataset/'): q = q.order_by('ga_url.pageviews::int desc')
   
  if c.month:
  for entry in q[:count]:
p = model.Package.get(entry.url[len('/dataset/'):]) p = model.Package.get(entry.url[len('/dataset/'):])
c.top_packages.append((p,entry.pageviews,entry.visitors)) c.top_packages.append((p,entry.pageviews,entry.visitors))
  else:
  ds = {}
  for entry in q.all():
  if len(ds) >= count:
  break
  p = model.Package.get(entry.url[len('/dataset/'):])
  if not p in ds:
  ds[p] = {'views':0, 'visits': 0}
  ds[p]['views'] = ds[p]['views'] + int(entry.pageviews)
  ds[p]['visits'] = ds[p]['visits'] + int(entry.visitors)
   
  results = []
  for k, v in ds.iteritems():
  results.append((k,v['views'],v['visits']))
   
  c.top_packages = sorted(results, key=operator.itemgetter(1), reverse=True)
   
return render('ga_report/publisher/read.html') return render('ga_report/publisher/read.html')
   
import logging import logging
import operator import operator
import ckan.lib.base as base import ckan.lib.base as base
import ckan.model as model import ckan.model as model
   
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
   
def most_popular_datasets(publisher, count=20): def most_popular_datasets(publisher, count=20):
from ckanext.ga_report.ga_model import GA_Url from ckanext.ga_report.ga_model import GA_Url
   
if not publisher: if not publisher:
_log.error("No valid publisher passed to 'most_popular_datasets'") _log.error("No valid publisher passed to 'most_popular_datasets'")
return "" return ""
   
datasets = {} datasets = {}
entries = model.Session.query(GA_Url).\ entries = model.Session.query(GA_Url).\
filter(GA_Url.department_id==publisher.name).\ filter(GA_Url.department_id==publisher.name).\
filter(GA_Url.url.like('/dataset/%')).\ filter(GA_Url.url.like('/dataset/%')).\
order_by('ga_url.pageviews::int desc')[:count] order_by('ga_url.pageviews::int desc').all()
for entry in entries: for entry in entries:
p = model.Package.get(entry.url[len('/dataset/'):]) if len(datasets) < count:
if not p in datasets: p = model.Package.get(entry.url[len('/dataset/'):])
datasets[p] = {'views':0, 'visits': 0} if not p in datasets:
datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews) datasets[p] = {'views':0, 'visits': 0}
datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visitors) datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews)
  datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visitors)
   
results = [] results = []
for k, v in datasets.iteritems(): for k, v in datasets.iteritems():
results.append((k,v['views'],v['visits'])) results.append((k,v['views'],v['visits']))
   
results = sorted(results, key=operator.itemgetter(1), reverse=True) results = sorted(results, key=operator.itemgetter(1), reverse=True)
   
ctx = { ctx = {
'dataset_count': len(datasets), 'dataset_count': len(datasets),
'datasets': results, 'datasets': results,
   
'publisher': publisher 'publisher': publisher
} }
   
return base.render_snippet('ga_report/publisher/popular.html', **ctx) return base.render_snippet('ga_report/publisher/popular.html', **ctx)
   
<html xmlns:py="http://genshi.edgewall.org/" <html xmlns:py="http://genshi.edgewall.org/"
xmlns:i18n="http://genshi.edgewall.org/i18n" xmlns:i18n="http://genshi.edgewall.org/i18n"
xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:xi="http://www.w3.org/2001/XInclude"
py:strip=""> py:strip="">
   
<py:def function="page_title">Publisher Analytics for ${g.site_title}</py:def> <py:def function="page_title">Publisher Analytics for ${g.site_title}</py:def>
   
<py:match path="primarysidebar"> <py:match path="primarysidebar">
<li class="widget-container boxed widget_text"> <li class="widget-container boxed widget_text">
<h4>Publishers</h4> <h4>Publishers</h4>
<p>The table shows the top 20 publishers as recorded by page views of datasets owned by that publisher, and the number of visits to each publisher's home page.</p> <p>The table shows the top 20 publishers as recorded by page views of datasets owned by that publisher, and the number of visits to each publisher's home page.</p>
</li> </li>
</py:match> </py:match>
   
<div py:match="content"> <div py:match="content">
<h1>Publisher Analytics</h1> <h1>Publisher Analytics</h1>
<h2>The top 20 publishers</h2> <h2>The top 20 publishers of ${c.month_desc}</h2>
   
<form class="form-inline" action="${h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport',action='index')}" method="get"> <form class="form-inline" action="${h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport',action='index')}" method="get">
<div class="controls"> <div class="controls">
<select name="month"> <select name="month">
  <option value='' py:attrs="{'selected': 'selected' if not c.month else None}">All time</option>
   
<py:for each="val,desc in c.months"> <py:for each="val,desc in c.months">
<option value='${val}' py:attrs="{'selected': 'selected' if c.month == val else None}">${desc}</option> <option value='${val}' py:attrs="{'selected': 'selected' if c.month == val else None}">${desc}</option>
</py:for> </py:for>
</select> </select>
<input class="btn button" type='submit' value="Update"/> <input class="btn button" type='submit' value="Update"/>
</div> </div>
</form> </form>
   
<table class="table table-condensed table-bordered table-striped"> <table class="table table-condensed table-bordered table-striped">
<tr> <tr>
<th>Publisher</th> <th>Publisher</th>
<th>Dataset Views</th> <th>Dataset Views</th>
<th>Visits</th> <th>Visits</th>
</tr> </tr>
<py:for each="publisher, views, visits in c.top_publishers"> <py:for each="publisher, views, visits in c.top_publishers">
<tr> <tr>
<td>${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport', action='read', id=publisher.name))} <td>${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport', action='read', id=publisher.name))}
</td> </td>
<td>${views}</td> <td>${views}</td>
<td>${visits}</td> <td>${visits}</td>
</tr> </tr>
</py:for> </py:for>
</table> </table>
   
   
</div> </div>
   
<xi:include href="../../layout.html" /> <xi:include href="../../layout.html" />
   
<py:def function="optional_footer"> <py:def function="optional_footer">
<script type='text/javascript'> <script type='text/javascript'>
$('.nav-tabs li a').click(function (e) { $('.nav-tabs li a').click(function (e) {
e.preventDefault(); e.preventDefault();
$(this).tab('show'); $(this).tab('show');
}) })
</script> </script>
</py:def> </py:def>
   
</html> </html>
   
   
   
   
<html xmlns:py="http://genshi.edgewall.org/" <html xmlns:py="http://genshi.edgewall.org/"
xmlns:i18n="http://genshi.edgewall.org/i18n" xmlns:i18n="http://genshi.edgewall.org/i18n"
xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:xi="http://www.w3.org/2001/XInclude"
py:strip=""> py:strip="">
   
<py:def function="page_title">Analytics for ${g.site_title}</py:def> <py:def function="page_title">Analytics for ${g.site_title}</py:def>
   
<py:match path="primarysidebar"> <py:match path="primarysidebar">
   
<li class="widget-container boxed widget_text"> <li class="widget-container boxed widget_text">
<h4>${c.publisher.title}</h4> <h4>${c.publisher.title}</h4>
<p> <p>
The table shows the top 20 most viewed datasets belonging to ${c.publisher.title}. The table shows the top 20 most viewed datasets belonging to ${c.publisher.title}.
</p> </p>
<p> <p>
As well as showing the number of views within ${c.month_desc}, it will also show the As well as showing the number of views for ${c.month_desc}, it will also show the