gitphp 0.2.9.1 :: ckanext-ga-report.git/commitdiff

Added new method to get a recent score for a dataset

8 files changed: (show all)
ckanext/ga_report/command.py
ckanext/ga_report/controller.py
ckanext/ga_report/download_analytics.py
ckanext/ga_report/ga_model.py
ckanext/ga_report/helpers.py
ckanext/ga_report/templates/ga_report/notes.html
ckanext/ga_report/templates/ga_report/publisher/index.html
ckanext/ga_report/templates/ga_report/publisher/read.html

file:a/ckanext/ga_report/command.py -> file:b/ckanext/ga_report/command.py

import logging	import logging
import datetime	import datetime

from ckan.lib.cli import CkanCommand	from ckan.lib.cli import CkanCommand
# No other CKAN imports allowed until _load_config is run,	# No other CKAN imports allowed until _load_config is run,
# or logging is disabled	# or logging is disabled


class InitDB(CkanCommand):	class InitDB(CkanCommand):
"""Initialise the extension's database tables	"""Initialise the extension's database tables
"""	"""
summary = __doc__.split('\n')[0]	summary = __doc__.split('\n')[0]
usage = __doc__	usage = __doc__
max_args = 0	max_args = 0
min_args = 0	min_args = 0

def command(self):	def command(self):
self._load_config()	self._load_config()

import ckan.model as model	import ckan.model as model
model.Session.remove()	model.Session.remove()
model.Session.configure(bind=model.meta.engine)	model.Session.configure(bind=model.meta.engine)
log = logging.getLogger('ckanext.ga-report')	log = logging.getLogger('ckanext.ga-report')

import ga_model	import ga_model
ga_model.init_tables()	ga_model.init_tables()
log.info("DB tables are setup")	log.info("DB tables are setup")


class GetAuthToken(CkanCommand):	class GetAuthToken(CkanCommand):
""" Get's the Google auth token	""" Get's the Google auth token

Usage: paster getauthtoken <credentials_file>	Usage: paster getauthtoken <credentials_file>

Where <credentials_file> is the file name containing the details	Where <credentials_file> is the file name containing the details
for the service (obtained from https://code.google.com/apis/console).	for the service (obtained from https://code.google.com/apis/console).
By default this is set to credentials.json	By default this is set to credentials.json
"""	"""
summary = __doc__.split('\n')[0]	summary = __doc__.split('\n')[0]
usage = __doc__	usage = __doc__
max_args = 0	max_args = 0
min_args = 0	min_args = 0

def command(self):	def command(self):
"""	"""
In this case we don't want a valid service, but rather just to	In this case we don't want a valid service, but rather just to
force the user through the auth flow. We allow this to complete to	force the user through the auth flow. We allow this to complete to
act as a form of verification instead of just getting the token and	act as a form of verification instead of just getting the token and
assuming it is correct.	assuming it is correct.
"""	"""
from ga_auth import init_service	from ga_auth import init_service
init_service('token.dat',	init_service('token.dat',
self.args[0] if self.args	self.args[0] if self.args
else 'credentials.json')	else 'credentials.json')


class LoadAnalytics(CkanCommand):	class LoadAnalytics(CkanCommand):
"""Get data from Google Analytics API and save it	"""Get data from Google Analytics API and save it
in the ga_model	in the ga_model

Usage: paster loadanalytics <tokenfile> <time-period>	Usage: paster loadanalytics <tokenfile> <time-period>

Where <tokenfile> is the name of the auth token file from	Where <tokenfile> is the name of the auth token file from
the getauthtoken step.	the getauthtoken step.

And where <time-period> is:	And where <time-period> is:
all - data for all time	all - data for all time
latest - (default) just the 'latest' data	latest - (default) just the 'latest' data
YYYY-MM - just data for the specific month	YYYY-MM - just data for the specific month
"""	"""
summary = __doc__.split('\n')[0]	summary = __doc__.split('\n')[0]
usage = __doc__	usage = __doc__
max_args = 2	max_args = 2
min_args = 1	min_args = 1

def __init__(self, name):	def __init__(self, name):
super(LoadAnalytics, self).__init__(name)	super(LoadAnalytics, self).__init__(name)
self.parser.add_option('-d', '--delete-first',	self.parser.add_option('-d', '--delete-first',
action='store_true',	action='store_true',
default=False,	default=False,
dest='delete_first',	dest='delete_first',
help='Delete data for the period first')	help='Delete data for the period first')
self.parser.add_option('-s', '--slip_url_stats',	self.parser.add_option('-s', '--skip_url_stats',
action='store_true',	action='store_true',
default=False,	default=False,
dest='skip_url_stats',	dest='skip_url_stats',
help='Skip the download of URL data - just do site-wide stats')	help='Skip the download of URL data - just do site-wide stats')

def command(self):	def command(self):
self._load_config()	self._load_config()

from download_analytics import DownloadAnalytics	from download_analytics import DownloadAnalytics
from ga_auth import (init_service, get_profile_id)	from ga_auth import (init_service, get_profile_id)

try:	try:
svc = init_service(self.args[0], None)	svc = init_service(self.args[0], None)
except TypeError:	except TypeError:
print ('Have you correctly run the getauthtoken task and '	print ('Have you correctly run the getauthtoken task and '
'specified the correct token file?')	'specified the correct token file?')
return	return

downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc),	downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc),
delete_first=self.options.delete_first,	delete_first=self.options.delete_first,
skip_url_stats=self.options.skip_url_stats)	skip_url_stats=self.options.skip_url_stats)

time_period = self.args[1] if self.args and len(self.args) > 1 \	time_period = self.args[1] if self.args and len(self.args) > 1 \
else 'latest'	else 'latest'
if time_period == 'all':	if time_period == 'all':
downloader.all_()	downloader.all_()
elif time_period == 'latest':	elif time_period == 'latest':
downloader.latest()	downloader.latest()
else:	else:
# The month to use	# The month to use
for_date = datetime.datetime.strptime(time_period, '%Y-%m')	for_date = datetime.datetime.strptime(time_period, '%Y-%m')
downloader.specific_month(for_date)	downloader.specific_month(for_date)

file:a/ckanext/ga_report/controller.py -> file:b/ckanext/ga_report/controller.py

import re	import re
import csv	import csv
import sys	import sys
import logging	import logging
import operator	import operator
import collections	import collections
from ckan.lib.base import (BaseController, c, g, render, request, response, abort)	from ckan.lib.base import (BaseController, c, g, render, request, response, abort)

import sqlalchemy	import sqlalchemy
from sqlalchemy import func, cast, Integer	from sqlalchemy import func, cast, Integer
import ckan.model as model	import ckan.model as model
from ga_model import GA_Url, GA_Stat, GA_ReferralStat, GA_Publisher	from ga_model import GA_Url, GA_Stat, GA_ReferralStat, GA_Publisher

log = logging.getLogger('ckanext.ga-report')	log = logging.getLogger('ckanext.ga-report')


def _get_month_name(strdate):	def _get_month_name(strdate):
import calendar	import calendar
from time import strptime	from time import strptime
d = strptime(strdate, '%Y-%m')	d = strptime(strdate, '%Y-%m')
return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year)	return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year)


def _month_details(cls):	def _month_details(cls):
'''Returns a list of all the month names'''	'''Returns a list of all the month names'''
months = []	months = []
vals = model.Session.query(cls.period_name).filter(cls.period_name!='All').distinct().all()	vals = model.Session.query(cls.period_name).filter(cls.period_name!='All').distinct().all()
for m in vals:	for m in vals:
months.append( (m[0], _get_month_name(m[0])))	months.append( (m[0], _get_month_name(m[0])))
return sorted(months, key=operator.itemgetter(0), reverse=True)	return sorted(months, key=operator.itemgetter(0), reverse=True)


class GaReport(BaseController):	class GaReport(BaseController):

def csv(self, month):	def csv(self, month):
import csv	import csv

q = model.Session.query(GA_Stat)	q = model.Session.query(GA_Stat)
if month != 'all':	if month != 'all':
q = q.filter(GA_Stat.period_name==month)	q = q.filter(GA_Stat.period_name==month)
entries = q.order_by('GA_Stat.period_name, GA_Stat.stat_name, GA_Stat.key').all()	entries = q.order_by('GA_Stat.period_name, GA_Stat.stat_name, GA_Stat.key').all()

response.headers['Content-Type'] = "text/csv; charset=utf-8"	response.headers['Content-Type'] = "text/csv; charset=utf-8"
response.headers['Content-Disposition'] = str('attachment; filename=stats_%s.csv' % (month,))	response.headers['Content-Disposition'] = str('attachment; filename=stats_%s.csv' % (month,))

writer = csv.writer(response)	writer = csv.writer(response)
writer.writerow(["Period", "Statistic", "Key", "Value"])	writer.writerow(["Period", "Statistic", "Key", "Value"])

for entry in entries:	for entry in entries:
writer.writerow([entry.period_name.encode('utf-8'),	writer.writerow([entry.period_name.encode('utf-8'),
entry.stat_name.encode('utf-8'),	entry.stat_name.encode('utf-8'),
entry.key.encode('utf-8'),	entry.key.encode('utf-8'),
entry.value.encode('utf-8')])	entry.value.encode('utf-8')])

def index(self):	def index(self):

# Get the month details by fetching distinct values and determining the	# Get the month details by fetching distinct values and determining the
# month names from the values.	# month names from the values.
c.months = _month_details(GA_Stat)	c.months = _month_details(GA_Stat)

# Work out which month to show, based on query params of the first item	# Work out which month to show, based on query params of the first item
c.month_desc = 'all months'	c.month_desc = 'all months'
c.month = request.params.get('month', '')	c.month = request.params.get('month', '')
if c.month:	if c.month:
c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])	c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])

q = model.Session.query(GA_Stat).\	q = model.Session.query(GA_Stat).\
filter(GA_Stat.stat_name=='Totals')	filter(GA_Stat.stat_name=='Totals')
if c.month:	if c.month:
q = q.filter(GA_Stat.period_name==c.month)	q = q.filter(GA_Stat.period_name==c.month)
entries = q.order_by('ga_stat.key').all()	entries = q.order_by('ga_stat.key').all()

def clean_key(key, val):	def clean_key(key, val):
if key in ['Average time on site', 'Pages per visit', 'New visits', 'Bounces']:	if key in ['Average time on site', 'Pages per visit', 'New visits', 'Bounce rate (home page)']:
val = "%.2f" % round(float(val), 2)	val = "%.2f" % round(float(val), 2)
if key == 'Average time on site':	if key == 'Average time on site':
mins, secs = divmod(float(val), 60)	mins, secs = divmod(float(val), 60)
hours, mins = divmod(mins, 60)	hours, mins = divmod(mins, 60)
val = '%02d:%02d:%02d (%s seconds) ' % (hours, mins, secs, val)	val = '%02d:%02d:%02d (%s seconds) ' % (hours, mins, secs, val)
if key in ['New visits','Bounces']:	if key in ['New visits','Bounce rate (home page)']:
val = "%s%%" % val	val = "%s%%" % val
if key in ['Total page views', 'Total visits']:	if key in ['Total page views', 'Total visits']:
val = int(val)	val = int(val)

return key, val	return key, val

c.global_totals = []	c.global_totals = []
if c.month:	if c.month:
for e in entries:	for e in entries:
key, val = clean_key(e.key, e.value)	key, val = clean_key(e.key, e.value)
c.global_totals.append((key, val))	c.global_totals.append((key, val))
else:	else:
d = collections.defaultdict(list)	d = collections.defaultdict(list)
for e in entries:	for e in entries:
d[e.key].append(float(e.value))	d[e.key].append(float(e.value))
for k, v in d.iteritems():	for k, v in d.iteritems():
if k in ['Total page views', 'Total visits']:	if k in ['Total page views', 'Total visits']:
v = sum(v)	v = sum(v)
else:	else:
v = float(sum(v))/len(v)	v = float(sum(v))/len(v)
key, val = clean_key(k,v)	key, val = clean_key(k,v)

c.global_totals.append((key, val))	c.global_totals.append((key, val))
c.global_totals = sorted(c.global_totals, key=operator.itemgetter(0))	c.global_totals = sorted(c.global_totals, key=operator.itemgetter(0))

keys = {	keys = {
'Browser versions': 'browser_versions',	'Browser versions': 'browser_versions',
'Browsers': 'browsers',	'Browsers': 'browsers',
'Operating Systems versions': 'os_versions',	'Operating Systems versions': 'os_versions',
'Operating Systems': 'os',	'Operating Systems': 'os',
'Social sources': 'social_networks',	'Social sources': 'social_networks',
'Languages': 'languages',	'Languages': 'languages',
'Country': 'country'	'Country': 'country'
}	}

def shorten_name(name, length=60):	def shorten_name(name, length=60):
return (name[:length] + '..') if len(name) > 60 else name	return (name[:length] + '..') if len(name) > 60 else name

def fill_out_url(url):	def fill_out_url(url):
import urlparse	import urlparse
return urlparse.urljoin(g.site_url, url)	return urlparse.urljoin(g.site_url, url)

c.social_referrer_totals, c.social_referrers = [], []	c.social_referrer_totals, c.social_referrers = [], []
q = model.Session.query(GA_ReferralStat)	q = model.Session.query(GA_ReferralStat)
q = q.filter(GA_ReferralStat.period_name==c.month) if c.month else q	q = q.filter(GA_ReferralStat.period_name==c.month) if c.month else q
q = q.order_by('ga_referrer.count::int desc')	q = q.order_by('ga_referrer.count::int desc')
for entry in q.all():	for entry in q.all():
c.social_referrers.append((shorten_name(entry.url), fill_out_url(entry.url),	c.social_referrers.append((shorten_name(entry.url), fill_out_url(entry.url),
entry.source,entry.count))	entry.source,entry.count))

q = model.Session.query(GA_ReferralStat.url,	q = model.Session.query(GA_ReferralStat.url,
func.sum(GA_ReferralStat.count).label('count'))	func.sum(GA_ReferralStat.count).label('count'))
q = q.filter(GA_ReferralStat.period_name==c.month) if c.month else q	q = q.filter(GA_ReferralStat.period_name==c.month) if c.month else q
q = q.order_by('count desc').group_by(GA_ReferralStat.url)	q = q.order_by('count desc').group_by(GA_ReferralStat.url)
for entry in q.all():	for entry in q.all():
c.social_referrer_totals.append((shorten_name(entry[0]), fill_out_url(entry[0]),'',	c.social_referrer_totals.append((shorten_name(entry[0]), fill_out_url(entry[0]),'',
entry[1]))	entry[1]))

for k, v in keys.iteritems():	for k, v in keys.iteritems():
q = model.Session.query(GA_Stat).\	q = model.Session.query(GA_Stat).\
filter(GA_Stat.stat_name==k)	filter(GA_Stat.stat_name==k)
if c.month:	if c.month:
entries = []	entries = []
q = q.filter(GA_Stat.period_name==c.month).\	q = q.filter(GA_Stat.period_name==c.month).\
order_by('ga_stat.value::int desc')	order_by('ga_stat.value::int desc')

d = collections.defaultdict(int)	d = collections.defaultdict(int)
for e in q.all():	for e in q.all():
d[e.key] += int(e.value)	d[e.key] += int(e.value)
entries = []	entries = []
for key, val in d.iteritems():	for key, val in d.iteritems():
entries.append((key,val,))	entries.append((key,val,))
entries = sorted(entries, key=operator.itemgetter(1), reverse=True)	entries = sorted(entries, key=operator.itemgetter(1), reverse=True)

# Get the total for each set of values and then set the value as	# Get the total for each set of values and then set the value as
# a percentage of the total	# a percentage of the total
if k == 'Social sources':	if k == 'Social sources':
total = sum([x for n,x in c.global_totals if n == 'Total visits'])	total = sum([x for n,x in c.global_totals if n == 'Total visits'])
else:	else:
total = sum([num for _,num in entries])	total = sum([num for _,num in entries])
setattr(c, v, [(k,_percent(v,total)) for k,v in entries ])	setattr(c, v, [(k,_percent(v,total)) for k,v in entries ])

return render('ga_report/site/index.html')	return render('ga_report/site/index.html')


class GaDatasetReport(BaseController):	class GaDatasetReport(BaseController):
"""	"""
Displays the pageview and visit count for datasets	Displays the pageview and visit count for datasets
with options to filter by publisher and time period.	with options to filter by publisher and time period.
"""	"""
def publisher_csv(self, month):	def publisher_csv(self, month):
'''	'''
Returns a CSV of each publisher with the total number of dataset	Returns a CSV of each publisher with the total number of dataset
views & visits.	views & visits.
'''	'''
c.month = month if not month == 'all' else ''	c.month = month if not month == 'all' else ''
response.headers['Content-Type'] = "text/csv; charset=utf-8"	response.headers['Content-Type'] = "text/csv; charset=utf-8"
response.headers['Content-Disposition'] = str('attachment; filename=publishers_%s.csv' % (month,))	response.headers['Content-Disposition'] = str('attachment; filename=publishers_%s.csv' % (month,))

writer = csv.writer(response)	writer = csv.writer(response)
writer.writerow(["Publisher Title", "Publisher Name", "Views", "Visits", "Period Name"])	writer.writerow(["Publisher Title", "Publisher Name", "Views", "Visits", "Period Name"])

for publisher,view,visit in _get_top_publishers(None):	for publisher,view,visit in _get_top_publishers(None):
writer.writerow([publisher.title.encode('utf-8'),	writer.writerow([publisher.title.encode('utf-8'),
publisher.name.encode('utf-8'),	publisher.name.encode('utf-8'),
view,	view,
visit,	visit,
month])	month])

def dataset_csv(self, id='all', month='all'):	def dataset_csv(self, id='all', month='all'):
'''	'''
Returns a CSV with the number of views & visits for each dataset.	Returns a CSV with the number of views & visits for each dataset.

:param id: A Publisher ID or None if you want for all	:param id: A Publisher ID or None if you want for all
:param month: The time period, or 'all'	:param month: The time period, or 'all'
'''	'''
c.month = month if not month == 'all' else ''	c.month = month if not month == 'all' else ''
if id != 'all':	if id != 'all':
c.publisher = model.Group.get(id)	c.publisher = model.Group.get(id)
if not c.publisher:	if not c.publisher:
abort(404, 'A publisher with that name could not be found')	abort(404, 'A publisher with that name could not be found')

packages = self._get_packages(c.publisher)	packages = self._get_packages(c.publisher)
response.headers['Content-Type'] = "text/csv; charset=utf-8"	response.headers['Content-Type'] = "text/csv; charset=utf-8"
response.headers['Content-Disposition'] = \	response.headers['Content-Disposition'] = \
str('attachment; filename=datasets_%s_%s.csv' % (c.publisher_name,