gitphp 0.2.9.1 :: ckanext-ga-report.git/commitdiff

Done integrating graphs onto site_usage/publishers and site_usage/datasets. Including some interesting queries.

16 files changed: (show all)
ckanext/ga_report/command.py
ckanext/ga_report/controller.py
ckanext/ga_report/download_analytics.py
ckanext/ga_report/ga_model.py
ckanext/ga_report/public/css/ga_report.css (new)
ckanext/ga_report/public/scripts/vendor/d3.layout.min.js (new)
ckanext/ga_report/public/scripts/vendor/d3.v2.js (new)
ckanext/ga_report/public/scripts/vendor/jquery.sparkline.modified.js (new)
ckanext/ga_report/public/scripts/vendor/rickshaw.min.css (new)
ckanext/ga_report/public/scripts/vendor/rickshaw.min.js (new)
ckanext/ga_report/templates/ga_report/ga_util.html
ckanext/ga_report/templates/ga_report/notes.html
ckanext/ga_report/templates/ga_report/publisher/index.html
ckanext/ga_report/templates/ga_report/publisher/read.html
ckanext/ga_report/templates/ga_report/site/index.html
setup.py

file:a/ckanext/ga_report/command.py -> file:b/ckanext/ga_report/command.py

import logging	import logging
import datetime	import datetime
import os	import os

from pylons import config	from pylons import config

from ckan.lib.cli import CkanCommand	from ckan.lib.cli import CkanCommand
# No other CKAN imports allowed until _load_config is run,	# No other CKAN imports allowed until _load_config is run,
# or logging is disabled	# or logging is disabled


class InitDB(CkanCommand):	class InitDB(CkanCommand):
"""Initialise the extension's database tables	"""Initialise the extension's database tables
"""	"""
summary = __doc__.split('\n')[0]	summary = __doc__.split('\n')[0]
usage = __doc__	usage = __doc__
max_args = 0	max_args = 0
min_args = 0	min_args = 0

def command(self):	def command(self):
self._load_config()	self._load_config()

import ckan.model as model	import ckan.model as model
model.Session.remove()	model.Session.remove()
model.Session.configure(bind=model.meta.engine)	model.Session.configure(bind=model.meta.engine)
log = logging.getLogger('ckanext.ga-report')	log = logging.getLogger('ckanext.ga-report')

import ga_model	import ga_model
ga_model.init_tables()	ga_model.init_tables()
log.info("DB tables are setup")	log.info("DB tables are setup")


class GetAuthToken(CkanCommand):	class GetAuthToken(CkanCommand):
""" Get's the Google auth token	""" Get's the Google auth token

Usage: paster getauthtoken <credentials_file>	Usage: paster getauthtoken <credentials_file>

Where <credentials_file> is the file name containing the details	Where <credentials_file> is the file name containing the details
for the service (obtained from https://code.google.com/apis/console).	for the service (obtained from https://code.google.com/apis/console).
By default this is set to credentials.json	By default this is set to credentials.json
"""	"""
summary = __doc__.split('\n')[0]	summary = __doc__.split('\n')[0]
usage = __doc__	usage = __doc__
max_args = 0	max_args = 0
min_args = 0	min_args = 0

def command(self):	def command(self):
"""	"""
In this case we don't want a valid service, but rather just to	In this case we don't want a valid service, but rather just to
force the user through the auth flow. We allow this to complete to	force the user through the auth flow. We allow this to complete to
act as a form of verification instead of just getting the token and	act as a form of verification instead of just getting the token and
assuming it is correct.	assuming it is correct.
"""	"""
from ga_auth import init_service	from ga_auth import init_service
init_service('token.dat',	init_service('token.dat',
self.args[0] if self.args	self.args[0] if self.args
else 'credentials.json')	else 'credentials.json')

	class FixTimePeriods(CkanCommand):
	"""
	Fixes the 'All' records for GA_Urls

	It is possible that older urls that haven't recently been visited
	do not have All records. This command will traverse through those
	records and generate valid All records for them.
	"""
	summary = __doc__.split('\n')[0]
	usage = __doc__
	max_args = 0
	min_args = 0

	def __init__(self, name):
	super(FixTimePeriods, self).__init__(name)

	def command(self):
	import ckan.model as model
	from ga_model import post_update_url_stats
	self._load_config()
	model.Session.remove()
	model.Session.configure(bind=model.meta.engine)

	log = logging.getLogger('ckanext.ga_report')

	log.info("Updating 'All' records for old URLs")
	post_update_url_stats()
	log.info("Processing complete")



class LoadAnalytics(CkanCommand):	class LoadAnalytics(CkanCommand):
"""Get data from Google Analytics API and save it	"""Get data from Google Analytics API and save it
in the ga_model	in the ga_model

Usage: paster loadanalytics <time-period>	Usage: paster loadanalytics <time-period>

Where <time-period> is:	Where <time-period> is:
all - data for all time	all - data for all time
latest - (default) just the 'latest' data	latest - (default) just the 'latest' data
YYYY-MM - just data for the specific month	YYYY-MM - just data for the specific month
"""	"""
summary = __doc__.split('\n')[0]	summary = __doc__.split('\n')[0]
usage = __doc__	usage = __doc__
max_args = 1	max_args = 1
min_args = 0	min_args = 0

def __init__(self, name):	def __init__(self, name):
super(LoadAnalytics, self).__init__(name)	super(LoadAnalytics, self).__init__(name)
self.parser.add_option('-d', '--delete-first',	self.parser.add_option('-d', '--delete-first',
action='store_true',	action='store_true',
default=False,	default=False,
dest='delete_first',	dest='delete_first',
help='Delete data for the period first')	help='Delete data for the period first')
self.parser.add_option('-s', '--skip_url_stats',	self.parser.add_option('-s', '--skip_url_stats',
action='store_true',	action='store_true',
default=False,	default=False,
dest='skip_url_stats',	dest='skip_url_stats',
help='Skip the download of URL data - just do site-wide stats')	help='Skip the download of URL data - just do site-wide stats')

def command(self):	def command(self):
self._load_config()	self._load_config()

from download_analytics import DownloadAnalytics	from download_analytics import DownloadAnalytics
from ga_auth import (init_service, get_profile_id)	from ga_auth import (init_service, get_profile_id)

ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', ''))	ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', ''))
if not ga_token_filepath:	if not ga_token_filepath:
print 'ERROR: In the CKAN config you need to specify the filepath of the ' \	print 'ERROR: In the CKAN config you need to specify the filepath of the ' \
'Google Analytics token file under key: googleanalytics.token.filepath'	'Google Analytics token file under key: googleanalytics.token.filepath'
return	return

try:	try:
svc = init_service(ga_token_filepath, None)	svc = init_service(ga_token_filepath, None)
except TypeError:	except TypeError:
print ('Have you correctly run the getauthtoken task and '	print ('Have you correctly run the getauthtoken task and '
'specified the correct token file in the CKAN config under '	'specified the correct token file in the CKAN config under '
'"googleanalytics.token.filepath"?')	'"googleanalytics.token.filepath"?')
return	return

downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc),	downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc),
delete_first=self.options.delete_first,	delete_first=self.options.delete_first,
skip_url_stats=self.options.skip_url_stats)	skip_url_stats=self.options.skip_url_stats)

time_period = self.args[0] if self.args else 'latest'	time_period = self.args[0] if self.args else 'latest'
if time_period == 'all':	if time_period == 'all':
downloader.all_()	downloader.all_()
elif time_period == 'latest':	elif time_period == 'latest':
downloader.latest()	downloader.latest()
else:	else:
# The month to use	# The month to use
for_date = datetime.datetime.strptime(time_period, '%Y-%m')	for_date = datetime.datetime.strptime(time_period, '%Y-%m')
downloader.specific_month(for_date)	downloader.specific_month(for_date)

file:a/ckanext/ga_report/controller.py -> file:b/ckanext/ga_report/controller.py

import re	import re
import csv	import csv
import sys	import sys
	import json
import logging	import logging
import operator	import operator
import collections	import collections
from ckan.lib.base import (BaseController, c, g, render, request, response, abort)	from ckan.lib.base import (BaseController, c, g, render, request, response, abort)

import sqlalchemy	import sqlalchemy
from sqlalchemy import func, cast, Integer	from sqlalchemy import func, cast, Integer
import ckan.model as model	import ckan.model as model
from ga_model import GA_Url, GA_Stat, GA_ReferralStat, GA_Publisher	from ga_model import GA_Url, GA_Stat, GA_ReferralStat, GA_Publisher

log = logging.getLogger('ckanext.ga-report')	log = logging.getLogger('ckanext.ga-report')

	DOWNLOADS_AVAILABLE_FROM = '2012-12'

def _get_month_name(strdate):	def _get_month_name(strdate):
import calendar	import calendar
from time import strptime	from time import strptime
d = strptime(strdate, '%Y-%m')	d = strptime(strdate, '%Y-%m')
return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year)	return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year)

	def _get_unix_epoch(strdate):
def _month_details(cls):	from time import strptime,mktime
	d = strptime(strdate, '%Y-%m')
	return int(mktime(d))

	def _month_details(cls, stat_key=None):
'''	'''
Returns a list of all the periods for which we have data, unfortunately	Returns a list of all the periods for which we have data, unfortunately
knows too much about the type of the cls being passed as GA_Url has a	knows too much about the type of the cls being passed as GA_Url has a
more complex query	more complex query

This may need extending if we add a period_name to the stats	This may need extending if we add a period_name to the stats
'''	'''
months = []	months = []
day = None	day = None

vals = model.Session.query(cls.period_name,cls.period_complete_day)\	q = model.Session.query(cls.period_name,cls.period_complete_day)\
.filter(cls.period_name!='All').distinct(cls.period_name)\	.filter(cls.period_name!='All').distinct(cls.period_name)
.order_by("period_name desc").all()	if stat_key:
	q= q.filter(cls.stat_name==stat_key)

	vals = q.order_by("period_name desc").all()

if vals and vals[0][1]:	if vals and vals[0][1]:
day = int(vals[0][1])	day = int(vals[0][1])
ordinal = 'th' if 11 <= day <= 13 \	ordinal = 'th' if 11 <= day <= 13 \
else {1:'st',2:'nd',3:'rd'}.get(day % 10, 'th')	else {1:'st',2:'nd',3:'rd'}.get(day % 10, 'th')
day = "{day}{ordinal}".format(day=day, ordinal=ordinal)	day = "{day}{ordinal}".format(day=day, ordinal=ordinal)

for m in vals:	for m in vals:
months.append( (m[0], _get_month_name(m[0])))	months.append( (m[0], _get_month_name(m[0])))

return months, day	return months, day


class GaReport(BaseController):	class GaReport(BaseController):

def csv(self, month):	def csv(self, month):
import csv	import csv

q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name!='Downloads')	q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name!='Downloads')
if month != 'all':	if month != 'all':
q = q.filter(GA_Stat.period_name==month)	q = q.filter(GA_Stat.period_name==month)
entries = q.order_by('GA_Stat.period_name, GA_Stat.stat_name, GA_Stat.key').all()	entries = q.order_by('GA_Stat.period_name, GA_Stat.stat_name, GA_Stat.key').all()

response.headers['Content-Type'] = "text/csv; charset=utf-8"	response.headers['Content-Type'] = "text/csv; charset=utf-8"
response.headers['Content-Disposition'] = str('attachment; filename=stats_%s.csv' % (month,))	response.headers['Content-Disposition'] = str('attachment; filename=stats_%s.csv' % (month,))

writer = csv.writer(response)	writer = csv.writer(response)
writer.writerow(["Period", "Statistic", "Key", "Value"])	writer.writerow(["Period", "Statistic", "Key", "Value"])

for entry in entries:	for entry in entries:
writer.writerow([entry.period_name.encode('utf-8'),	writer.writerow([entry.period_name.encode('utf-8'),
entry.stat_name.encode('utf-8'),	entry.stat_name.encode('utf-8'),
entry.key.encode('utf-8'),
entry.value.encode('utf-8')])

def csv_downloads(self, month):
import csv

q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name=='Downloads')
if month != 'all':
q = q.filter(GA_Stat.period_name==month)
entries = q.order_by('GA_Stat.period_name, GA_Stat.key').all()

response.headers['Content-Type'] = "text/csv; charset=utf-8"
response.headers['Content-Disposition'] = str('attachment; filename=downloads_%s.csv' % (month,))

writer = csv.writer(response)
writer.writerow(["Period", "Resource URL", "Count"])

for entry in entries:
writer.writerow([entry.period_name.encode('utf-8'),
entry.key.encode('utf-8'),	entry.key.encode('utf-8'),
entry.value.encode('utf-8')])	entry.value.encode('utf-8')])


def index(self):	def index(self):

# Get the month details by fetching distinct values and determining the	# Get the month details by fetching distinct values and determining the
# month names from the values.	# month names from the values.
c.months, c.day = _month_details(GA_Stat)	c.months, c.day = _month_details(GA_Stat)

# Work out which month to show, based on query params of the first item	# Work out which month to show, based on query params of the first item
c.month_desc = 'all months'	c.month_desc = 'all months'
c.month = request.params.get('month', '')	c.month = request.params.get('month', '')
if c.month:	if c.month:
c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])	c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])

q = model.Session.query(GA_Stat).\	q = model.Session.query(GA_Stat).\
filter(GA_Stat.stat_name=='Totals')	filter(GA_Stat.stat_name=='Totals')
if c.month:	if c.month:
q = q.filter(GA_Stat.period_name==c.month)	q = q.filter(GA_Stat.period_name==c.month)
entries = q.order_by('ga_stat.key').all()	entries = q.order_by('ga_stat.key').all()

def clean_key(key, val):	def clean_key(key, val):
if key in ['Average time on site', 'Pages per visit', 'New visits', 'Bounce rate (home page)']:	if key in ['Average time on site', 'Pages per visit', 'New visits', 'Bounce rate (home page)']:
val = "%.2f" % round(float(val), 2)	val = "%.2f" % round(float(val), 2)
if key == 'Average time on site':	if key == 'Average time on site':
mins, secs = divmod(float(val), 60)	mins, secs = divmod(float(val), 60)
hours, mins = divmod(mins, 60)	hours, mins = divmod(mins, 60)
val = '%02d:%02d:%02d (%s seconds) ' % (hours, mins, secs, val)	val = '%02d:%02d:%02d (%s seconds) ' % (hours, mins, secs, val)
if key in ['New visits','Bounce rate (home page)']:	if key in ['New visits','Bounce rate (home page)']:
val = "%s%%" % val	val = "%s%%" % val
if key in ['Total page views', 'Total visits']:	if key in ['Total page views', 'Total visits']:
val = int(val)	val = int(val)

return key, val	return key, val

	# Query historic values for sparkline rendering
	sparkline_query = model.Session.query(GA_Stat)\
	.filter(GA_Stat.stat_name=='Totals')\
	.order_by(GA_Stat.period_name)
	sparkline_data = {}
	for x in sparkline_query:
	sparkline_data[x.key] = sparkline_data.get(x.key,[])
	key, val = clean_key(x.key,float(x.value))
	tooltip = '%s: %s' % (_get_month_name(x.period_name), val)
	sparkline_data[x.key].append( (tooltip,x.value) )
	# Trim the latest month, as it looks like a huge dropoff
	for key in sparkline_data:
	sparkline_data[key] = sparkline_data[key][:-1]

c.global_totals = []	c.global_totals = []
if c.month:	if c.month:
for e in entries:	for e in entries:
key, val = clean_key(e.key, e.value)	key, val = clean_key(e.key, e.value)
c.global_totals.append((key, val))	sparkline = sparkline_data[e.key]
	c.global_totals.append((key, val, sparkline))
else:	else:
d = collections.defaultdict(list)	d = collections.defaultdict(list)
for e in entries:	for e in entries:
d[e.key].append(float(e.value))	d[e.key].append(float(e.value))
for k, v in d.iteritems():	for k, v in d.iteritems():
if k in ['Total page views', 'Total visits']:	if k in ['Total page views', 'Total visits']:
v = sum(v)	v = sum(v)
else:	else:
v = float(sum(v))/float(len(v))	v = float(sum(v))/float(len(v))
	sparkline = sparkline_data[k]
key, val = clean_key(k,v)	key, val = clean_key(k,v)

c.global_totals.append((key, val))	c.global_totals.append((key, val, sparkline))
c.global_totals = sorted(c.global_totals, key=operator.itemgetter(0))	# Sort the global totals into a more pleasant order
	def sort_func(x):
	key = x[0]
	total_order = ['Total page views','Total visits','Pages per visit']
&nbs