Fix to date fetching
Fix to date fetching

import logging import logging
import datetime import datetime
   
from ckan.lib.cli import CkanCommand from ckan.lib.cli import CkanCommand
# No other CKAN imports allowed until _load_config is run, # No other CKAN imports allowed until _load_config is run,
# or logging is disabled # or logging is disabled
   
   
class InitDB(CkanCommand): class InitDB(CkanCommand):
"""Initialise the extension's database tables """Initialise the extension's database tables
""" """
summary = __doc__.split('\n')[0] summary = __doc__.split('\n')[0]
usage = __doc__ usage = __doc__
max_args = 0 max_args = 0
min_args = 0 min_args = 0
   
def command(self): def command(self):
self._load_config() self._load_config()
   
import ckan.model as model import ckan.model as model
model.Session.remove() model.Session.remove()
model.Session.configure(bind=model.meta.engine) model.Session.configure(bind=model.meta.engine)
log = logging.getLogger('ckanext.ga-report') log = logging.getLogger('ckanext.ga-report')
   
import ga_model import ga_model
ga_model.init_tables() ga_model.init_tables()
log.info("DB tables are setup") log.info("DB tables are setup")
   
   
class GetAuthToken(CkanCommand): class GetAuthToken(CkanCommand):
""" Get's the Google auth token """ Get's the Google auth token
   
Usage: paster getauthtoken <credentials_file> Usage: paster getauthtoken <credentials_file>
   
Where <credentials_file> is the file name containing the details Where <credentials_file> is the file name containing the details
for the service (obtained from https://code.google.com/apis/console). for the service (obtained from https://code.google.com/apis/console).
By default this is set to credentials.json By default this is set to credentials.json
""" """
summary = __doc__.split('\n')[0] summary = __doc__.split('\n')[0]
usage = __doc__ usage = __doc__
max_args = 0 max_args = 0
min_args = 0 min_args = 0
   
def command(self): def command(self):
""" """
In this case we don't want a valid service, but rather just to In this case we don't want a valid service, but rather just to
force the user through the auth flow. We allow this to complete to force the user through the auth flow. We allow this to complete to
act as a form of verification instead of just getting the token and act as a form of verification instead of just getting the token and
assuming it is correct. assuming it is correct.
""" """
from ga_auth import init_service from ga_auth import init_service
init_service('token.dat', init_service('token.dat',
self.args[0] if self.args self.args[0] if self.args
else 'credentials.json') else 'credentials.json')
   
   
class LoadAnalytics(CkanCommand): class LoadAnalytics(CkanCommand):
"""Get data from Google Analytics API and save it """Get data from Google Analytics API and save it
in the ga_model in the ga_model
   
Usage: paster loadanalytics <tokenfile> <time-period> Usage: paster loadanalytics <tokenfile> <time-period>
   
Where <tokenfile> is the name of the auth token file from Where <tokenfile> is the name of the auth token file from
the getauthtoken step. the getauthtoken step.
   
And where <time-period> is: And where <time-period> is:
all - data for all time all - data for all time
latest - (default) just the 'latest' data latest - (default) just the 'latest' data
YYYY-MM-DD - just data for all time periods going YYYY-MM - just data for the specific month
back to (and including) this date  
""" """
summary = __doc__.split('\n')[0] summary = __doc__.split('\n')[0]
usage = __doc__ usage = __doc__
max_args = 2 max_args = 2
min_args = 1 min_args = 1
   
def command(self): def command(self):
self._load_config() self._load_config()
   
from download_analytics import DownloadAnalytics from download_analytics import DownloadAnalytics
from ga_auth import (init_service, get_profile_id) from ga_auth import (init_service, get_profile_id)
   
try: try:
svc = init_service(self.args[0], None) svc = init_service(self.args[0], None)
except TypeError: except TypeError:
print ('Have you correctly run the getauthtoken task and ' print ('Have you correctly run the getauthtoken task and '
'specified the correct file here') 'specified the correct file here')
return return
   
downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc)) downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc))
   
time_period = self.args[1] if self.args and len(self.args) > 1 \ time_period = self.args[1] if self.args and len(self.args) > 1 \
else 'latest' else 'latest'
if time_period == 'all': if time_period == 'all':
downloader.all_() downloader.all_()
elif time_period == 'latest': elif time_period == 'latest':
downloader.latest() downloader.latest()
else: else:
since_date = datetime.datetime.strptime(time_period, '%Y-%m-%d') # The month to use
downloader.since_date(since_date) for_date = datetime.datetime.strptime(time_period, '%Y-%m')
  downloader.specific_month(for_date)
   
import logging import logging
import operator import operator
from ckan.lib.base import BaseController, c, render, request, response from ckan.lib.base import BaseController, c, render, request, response
   
import sqlalchemy import sqlalchemy
from sqlalchemy import func, cast, Integer from sqlalchemy import func, cast, Integer
import ckan.model as model import ckan.model as model
from ga_model import GA_Url, GA_Stat from ga_model import GA_Url, GA_Stat
   
log = logging.getLogger('ckanext.ga-report') log = logging.getLogger('ckanext.ga-report')
   
   
def _get_month_name(str): def _get_month_name(strdate):
import calendar import calendar
from time import strptime from time import strptime
d = strptime('2012-10', '%Y-%m') d = strptime(strdate, '%Y-%m')
return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year) return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year)
   
   
def _month_details(cls): def _month_details(cls):
months = [] months = []
vals = model.Session.query(cls.period_name).distinct().all() vals = model.Session.query(cls.period_name).distinct().all()
for m in vals: for m in vals:
months.append( (m[0], _get_month_name(m))) months.append( (m[0], _get_month_name(m[0])))
return sorted(months, key=operator.itemgetter(0), reverse=True) return sorted(months, key=operator.itemgetter(0), reverse=True)
   
   
class GaReport(BaseController): class GaReport(BaseController):
   
def csv(self, month): def csv(self, month):
import csv import csv
   
entries = model.Session.query(GA_Stat).\ entries = model.Session.query(GA_Stat).\
filter(GA_Stat.period_name==month).\ filter(GA_Stat.period_name==month).\
order_by('GA_Stat.stat_name, GA_Stat.key').all() order_by('GA_Stat.stat_name, GA_Stat.key').all()
   
response.headers['Content-disposition'] = 'attachment; filename=dgu_analytics_%s.csv' % (month) response.headers['Content-disposition'] = 'attachment; filename=dgu_analytics_%s.csv' % (month)
response.headers['Content-Type'] = "text/csv; charset=utf-8" response.headers['Content-Type'] = "text/csv; charset=utf-8"
   
writer = csv.writer(response) writer = csv.writer(response)
writer.writerow(["Period", "Statistic", "Key", "Value"]) writer.writerow(["Period", "Statistic", "Key", "Value"])
   
for entry in entries: for entry in entries:
writer.writerow([entry.period_name.encode('utf-8'), writer.writerow([entry.period_name.encode('utf-8'),
entry.stat_name.encode('utf-8'), entry.stat_name.encode('utf-8'),
entry.key.encode('utf-8'), entry.key.encode('utf-8'),
entry.value.encode('utf-8')]) entry.value.encode('utf-8')])
   
def index(self): def index(self):
   
# Get the month details by fetching distinct values and determining the # Get the month details by fetching distinct values and determining the
# month names from the values. # month names from the values.
c.months = _month_details(GA_Stat) c.months = _month_details(GA_Stat)
   
# Work out which month to show, based on query params of the first item # Work out which month to show, based on query params of the first item
c.month = request.params.get('month', c.months[0][0] if c.months else '') c.month = request.params.get('month', c.months[0][0] if c.months else '')
c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
   
entries = model.Session.query(GA_Stat).\ entries = model.Session.query(GA_Stat).\
filter(GA_Stat.stat_name=='Totals').\ filter(GA_Stat.stat_name=='Totals').\
filter(GA_Stat.period_name==c.month).all() filter(GA_Stat.period_name==c.month).all()
c.global_totals = [(s.key, s.value) for s in entries ] c.global_totals = [(s.key, s.value) for s in entries ]
   
keys = { keys = {
'Browser versions': 'browsers', 'Browser versions': 'browsers',
'Operating Systems versions': 'os', 'Operating Systems versions': 'os',
'Social sources': 'social_networks', 'Social sources': 'social_networks',
'Languages': 'languages', 'Languages': 'languages',
'Country': 'country' 'Country': 'country'
} }
   
for k, v in keys.iteritems(): for k, v in keys.iteritems():
entries = model.Session.query(GA_Stat).\ entries = model.Session.query(GA_Stat).\
filter(GA_Stat.stat_name==k).\ filter(GA_Stat.stat_name==k).\
filter(GA_Stat.period_name==c.month).\ filter(GA_Stat.period_name==c.month).\
order_by('ga_stat.value::int desc').all() order_by('ga_stat.value::int desc').all()
setattr(c, v, [(s.key, s.value) for s in entries ]) setattr(c, v, [(s.key, s.value) for s in entries ])
   
   
return render('ga_report/site/index.html') return render('ga_report/site/index.html')
   
   
class GaPublisherReport(BaseController): class GaPublisherReport(BaseController):
""" """
Displays the pageview and visit count for specific publishers based on Displays the pageview and visit count for specific publishers based on
the datasets associated with the publisher. the datasets associated with the publisher.
""" """
   
def index(self): def index(self):
# Get the month details by fetching distinct values and determining the # Get the month details by fetching distinct values and determining the
# month names from the values. # month names from the values.
c.months = _month_details(GA_Url) c.months = _month_details(GA_Url)
   
# Work out which month to show, based on query params of the first item # Work out which month to show, based on query params of the first item
c.month = request.params.get('month', c.months[0][0] if c.months else '') c.month = request.params.get('month', c.months[0][0] if c.months else '')
c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
   
connection = model.Session.connection() connection = model.Session.connection()
q = """ q = """
select department_id, sum(pageviews::int) views, sum(visitors::int) visits select department_id, sum(pageviews::int) views, sum(visitors::int) visits
from ga_url from ga_url
where department_id <> '' where department_id <> ''
and not url like '/publisher/%%' and not url like '/publisher/%%'
and period_name=%s and period_name=%s
group by department_id order by views desc limit 20; group by department_id order by views desc limit 20;
""" """
c.top_publishers = [] c.top_publishers = []
res = connection.execute(q, c.month) res = connection.execute(q, c.month)
for row in res: for row in res:
c.top_publishers.append((model.Group.get(row[0]), row[1], row[2])) c.top_publishers.append((model.Group.get(row[0]), row[1], row[2]))
   
return render('ga_report/publisher/index.html') return render('ga_report/publisher/index.html')
   
   
def read(self, id): def read(self, id):
c.publisher = model.Group.get(id) c.publisher = model.Group.get(id)
c.top_packages = [] # package, dataset_views in c.top_packages c.top_packages = [] # package, dataset_views in c.top_packages
   
# Get the month details by fetching distinct values and determining the # Get the month details by fetching distinct values and determining the
# month names from the values. # month names from the values.
c.months = _month_details(GA_Url) c.months = _month_details(GA_Url)
   
# Work out which month to show, based on query params of the first item # Work out which month to show, based on query params of the first item
c.month = request.params.get('month', c.months[0][0] if c.months else '') c.month = request.params.get('month', c.months[0][0] if c.months else '')
c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
   
entry = model.Session.query(GA_Url).\ entry = model.Session.query(GA_Url).\
filter(GA_Url.url=='/publisher/%s' % c.publisher.name).\ filter(GA_Url.url=='/publisher/%s' % c.publisher.name).\
filter(GA_Url.period_name==c.month).first() filter(GA_Url.period_name==c.month).first()
c.publisher_page_views = entry.pageviews if entry else 0 c.publisher_page_views = entry.pageviews if entry else 0
   
entries = model.Session.query(GA_Url).\ entries = model.Session.query(GA_Url).\
filter(GA_Url.department_id==c.publisher.name).\ filter(GA_Url.department_id==c.publisher.name).\
filter(GA_Url.period_name==c.month).\ filter(GA_Url.period_name==c.month).\
order_by('ga_url.pageviews::int desc')[:20] order_by('ga_url.pageviews::int desc')[:20]
for entry in entries: for entry in entries:
if entry.url.startswith('/dataset/'): if entry.url.startswith('/dataset/'):
p = model.Package.get(entry.url[len('/dataset/'):]) p = model.Package.get(entry.url[len('/dataset/'):])
c.top_packages.append((p,entry.pageviews,entry.visitors)) c.top_packages.append((p,entry.pageviews,entry.visitors))
   
return render('ga_report/publisher/read.html') return render('ga_report/publisher/read.html')
   
import os import os
import logging import logging
import datetime import datetime
   
from pylons import config from pylons import config
   
import ga_model import ga_model
   
#from ga_client import GA #from ga_client import GA
   
log = logging.getLogger('ckanext.ga-report') log = logging.getLogger('ckanext.ga-report')
   
FORMAT_MONTH = '%Y-%m' FORMAT_MONTH = '%Y-%m'
   
class DownloadAnalytics(object): class DownloadAnalytics(object):
'''Downloads and stores analytics info''' '''Downloads and stores analytics info'''
   
def __init__(self, service=None, profile_id=None): def __init__(self, service=None, profile_id=None):
self.period = config['ga-report.period'] self.period = config['ga-report.period']
self.service = service self.service = service
self.profile_id = profile_id self.profile_id = profile_id
   
   
def all_(self): def specific_month(self, date):
self.since_date(datetime.datetime(2010, 1, 1)) import calendar
   
  first_of_this_month = datetime.datetime(date.year, date.month, 1)
  _, last_day_of_month = calendar.monthrange(int(date.year), int(date.month))
  last_of_this_month = datetime.datetime(date.year, date.month, last_day_of_month)
  periods = ((date.strftime(FORMAT_MONTH),
  last_day_of_month,
  first_of_this_month, last_of_this_month),)
  self.download_and_store(periods)
   
   
def latest(self): def latest(self):
if self.period == 'monthly': if self.period == 'monthly':
# from first of this month to today # from first of this month to today
now = datetime.datetime.now() now = datetime.datetime.now()
first_of_this_month = datetime.datetime(now.year, now.month, 1) first_of_this_month = datetime.datetime(now.year, now.month, 1)
periods = ((now.strftime(FORMAT_MONTH), periods = ((now.strftime(FORMAT_MONTH),
now.day, now.day,
first_of_this_month, now),) first_of_this_month, now),)
else: else:
raise NotImplementedError raise NotImplementedError
self.download_and_store(periods) self.download_and_store(periods)
   
   
def since_date(self, since_date): def for_date(self, for_date):
assert isinstance(since_date, datetime.datetime) assert isinstance(since_date, datetime.datetime)
periods = [] # (period_name, period_complete_day, start_date, end_date) periods = [] # (period_name, period_complete_day, start_date, end_date)
if self.period == 'monthly': if self.period == 'monthly':
first_of_the_months_until_now = [] first_of_the_months_until_now = []
year = since_date.year year = for_date.year
month = since_date.month month = for_date.month
now = datetime.datetime.now() now = datetime.datetime.now()
first_of_this_month = datetime.datetime(now.year, now.month, 1) first_of_this_month = datetime.datetime(now.year, now.month, 1)
while True: while True:
first_of_the_month = datetime.datetime(year, month, 1) first_of_the_month = datetime.datetime(year, month, 1)
if first_of_the_month == first_of_this_month: if first_of_the_month == first_of_this_month:
periods.append((now.strftime(FORMAT_MONTH), periods.append((now.strftime(FORMAT_MONTH),
now.day, now.day,
first_of_this_month, now)) first_of_this_month, now))
break break
elif first_of_the_month < first_of_this_month: elif first_of_the_month < first_of_this_month:
in_the_next_month = first_of_the_month + datetime.timedelta(40) in_the_next_month = first_of_the_month + datetime.timedelta(40)
last_of_the_month = datetime.datetime(in_the_next_month.year, last_of_the_month = datetime.datetime(in_the_next_month.year,
in_the_next_month.month, 1)\ in_the_next_month.month, 1)\
- datetime.timedelta(1) - datetime.timedelta(1)
periods.append((now.strftime(FORMAT_MONTH), 0, periods.append((now.strftime(FORMAT_MONTH), 0,
first_of_the_month, last_of_the_month)) first_of_the_month, last_of_the_month))
else: else:
# first_of_the_month has got to the future somehow # first_of_the_month has got to the future somehow
break break
month += 1 month += 1
if month > 12: if month > 12:
year += 1 year += 1
month = 1 month = 1
else: else:
raise NotImplementedError raise NotImplementedError
self.download_and_store(periods) self.download_and_store(periods)
   
@staticmethod @staticmethod
def get_full_period_name(period_name, period_complete_day): def get_full_period_name(period_name, period_complete_day):
if period_complete_day: if period_complete_day:
return period_name + ' (up to %ith)' % period_complete_day return period_name + ' (up to %ith)' % period_complete_day
else: else:
return period_name return period_name
   
   
def download_and_store(self, periods): def download_and_store(self, periods):
for period_name, period_complete_day, start_date, end_date in periods: for period_name, period_complete_day, start_date, end_date in periods:
log.info('Downloading Analytics for period "%s" (%s - %s)', log.info('Downloading Analytics for period "%s" (%s - %s)',
self.get_full_period_name(period_name, period_complete_day), self.get_full_period_name(period_name, period_complete_day),
start_date.strftime('%Y %m %d'), start_date.strftime('%Y %m %d'),
end_date.strftime('%Y %m %d')) end_date.strftime('%Y %m %d'))
   
data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+') data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+')
log.info('Storing Dataset Analytics for period "%s"', log.info('Storing Dataset Analytics for period "%s"',
self.get_full_period_name(period_name, period_complete_day)) self.get_full_period_name(period_name, period_complete_day))
self.store(period_name, period_complete_day, data, ) self.store(period_name, period_complete_day, data, )
   
data = self.download(start_date, end_date, '~/publisher/[a-z0-9-_]+') data = self.download(start_date, end_date, '~/publisher/[a-z0-9-_]+')
log.info('Storing Publisher Analytics for period "%s"', log.info('Storing Publisher Analytics for period "%s"',
self.get_full_period_name(period_name, period_complete_day)) self.get_full_period_name(period_name, period_complete_day))
self.store(period_name, period_complete_day, data,) self.store(period_name, period_complete_day, data,)
   
ga_model.update_publisher_stats(period_name) # about 30 seconds. ga_model.update_publisher_stats(period_name) # about 30 seconds.
self.sitewide_stats( period_name ) self.sitewide_stats( period_name )
   
   
def download(self, start_date, end_date, path='~/dataset/[a-z0-9-_]+'): def download(self, start_date, end_date, path='~/dataset/[a-z0-9-_]+'):
'''Get data from GA for a given time period''' '''Get data from GA for a given time period'''
start_date = start_date.strftime('%Y-%m-%d') start_date = start_date.strftime('%Y-%m-%d')
end_date = end_date.strftime('%Y-%m-%d') end_date = end_date.strftime('%Y-%m-%d')
query = 'ga:pagePath=%s$' % path query = 'ga:pagePath=%s$' % path
metrics = 'ga:uniquePageviews, ga:visitors' metrics = 'ga:uniquePageviews, ga:visitors'
sort = '-ga:uniquePageviews' sort = '-ga:uniquePageviews'
   
# Supported query params at # Supported query params at
# https://developers.google.com/analytics/devguides/reporting/core/v3/reference # https://developers.google.com/analytics/devguides/reporting/core/v3/reference
results = self.service.data().ga().get( results = self.service.data().ga().get(
ids='ga:' + self.profile_id, ids='ga:' + self.profile_id,
filters=query, filters=query,
start_date=start_date, start_date=start_date,
metrics=metrics, metrics=metrics,
sort=sort, sort=sort,
dimensions="ga:pagePath", dimensions="ga:pagePath",
max_results=10000, max_results=10000,
end_date=end_date).execute() end_date=end_date).execute()
   
if os.getenv('DEBUG'): if os.getenv('DEBUG'):
import pprint import pprint
pprint.pprint(results) pprint.pprint(results)
print 'Total results: %s' % results.get('totalResults') print 'Total results: %s' % results.get('totalResults')
   
packages = [] packages = []
for entry in results.get('rows'): for entry in results.get('rows'):
(loc,pageviews,visits) = entry (loc,pageviews,visits) = entry
packages.append( ('http:/' + loc, pageviews, visits,) ) # Temporary hack packages.append( ('http:/' + loc, pageviews, visits,) ) # Temporary hack
return dict(url=packages) return dict(url=packages)
   
def store(self, period_name, period_complete_day, data): def store(self, period_name, period_complete_day, data):
if 'url' in data: if 'url' in data:
ga_model.update_url_stats(period_name, period_complete_day, data['url']) ga_model.update_url_stats(period_name, period_complete_day, data['url'])
   
def sitewide_stats(self, period_name): def sitewide_stats(self, period_name):
import calendar import calendar
year, month = period_name.split('-') year, month = period_name.split('-')
_, last_day_of_month = calendar.monthrange(int(year), int(month)) _, last_day_of_month = calendar.monthrange(int(year), int(month))
   
start_date = '%s-01' % period_name start_date = '%s-01' % period_name
end_date = '%s-%s' % (period_name, last_day_of_month) end_date = '%s-%s' % (period_name, last_day_of_month)
print 'Sitewide_stats for %s (%s -> %s)' % (period_name, start_date, end_date) print 'Sitewide_stats for %s (%s -> %s)' % (period_name, start_date, end_date)
   
funcs = ['_totals_stats', '_social_stats', '_os_stats', funcs = ['_totals_stats', '_social_stats', '_os_stats',
'_locale_stats', '_browser_stats', '_mobile_stats'] '_locale_stats', '_browser_stats', '_mobile_stats']
for f in funcs: for f in funcs:
print ' + Fetching %s stats' % f.split('_')[1] print ' + Fetching %s stats' % f.split('_')[1]
getattr(self, f)(start_date, end_date, period_name) getattr(self, f)(start_date, end_date, period_name)
   
def _get_results(result_data, f): def _get_results(result_data, f):
data = {} data = {}
for result in result_data: for result in result_data:
key = f(result) key = f(result)
data[key] = data.get(key,0) + result[1] data[key] = data.get(key,0) + result[1]
return data return data
   
def _totals_stats(self, start_date, end_date, period_name): def _totals_stats(self, start_date, end_date, period_name):
""" Fetches distinct totals, total pageviews etc """ """ Fetches distinct totals, total pageviews etc """
results = self.service.data().ga().get( results = self.service.data().ga().get(
ids='ga:' + self.profile_id, ids='ga:' + self.profile_id,
start_date=start_date, start_date=start_date,
metrics='ga:uniquePageviews', metrics='ga:uniquePageviews',
sort='-ga:uniquePageviews', sort='-ga:uniquePageviews',
max_results=10000, max_results=10000,
end_date=end_date).execute() end_date=end_date).execute()
result_data = results.get('rows') result_data = results.get('rows')
ga_model.update_sitewide_stats(period_name, "Totals", {'Total pageviews': result_data[0][0]}) ga_model.update_sitewide_stats(period_name, "Totals", {'Total pageviews': result_data[0][0]})
   
results = self.service.data().ga().get( results = self.service.data().ga().get(
ids='ga:' + self.profile_id, ids='ga:' + self.profile_id,
start_date=start_date, start_date=start_date,
metrics='ga:pageviewsPerVisit,ga:bounces,ga:avgTimeOnSite,ga:percentNewVisits', metrics='ga:pageviewsPerVisit,ga:bounces,ga:avgTimeOnSite,ga:percentNewVisits',
max_results=10000, max_results=10000,
end_date=end_date).execute() end_date=end_date).execute()
result_data = results.get('rows') result_data = results.get('rows')
data = { data = {
'Pages per visit': result_data[0][0], 'Pages per visit': result_data[0][0],
'Bounces': result_data[0][1], 'Bounces': result_data[0][1],
'Average time on site': result_data[0][2], 'Average time on site': result_data[0][2],
'Percent new visits': result_data[0][3], 'Percent new visits': result_data[0][3],
} }
ga_model.update_sitewide_stats(period_name, "Totals", data) ga_model.update_sitewide_stats(period_name, "Totals", data)
   
   
def _locale_stats(self, start_date, end_date, period_name): def _locale_stats(self, start_date, end_date, period_name):
""" Fetches stats about language and country """ """ Fetches stats about language and country """
results = self.service.data().ga().get( results = self.service.data().ga().get(
ids='ga:' + self.profile_id, ids='ga:' + self.profile_id,
start_date=start_date, start_date=start_date,
metrics='ga:uniquePageviews', metrics='ga:uniquePageviews',
sort='-ga:uniquePageviews', sort='-ga:uniquePageviews',
dimensions="ga:language,ga:country", dimensions="ga:language,ga:country",
max_results=10000, max_results=10000,
end_date=end_date).execute() end_date=end_date).execute()
result_data = results.get('rows') result_data = results.get('rows')
data = {} data = {}
for result in result_data: for result in result_data:
data[result[0]] = data.get(result[0], 0) + int(result[2]) data[result[0]] = data.get(result[0], 0) + int(result[2])
ga_model.update_sitewide_stats(period_name, "Languages", data) ga_model.update_sitewide_stats(period_name, "Languages", data)
   
data = {} data = {}
for result in result_data: for result in result_data:
data[result[1]] = data.get(result[1], 0) + int(result[2]) data[result[1]] = data.get(result[1], 0) + int(result[2])
ga_model.update_sitewide_stats(period_name, "Country", data) ga_model.update_sitewide_stats(period_name, "Country", data)
   
   
def _social_stats(self, start_date, end_date, period_name): def _social_stats(self, start_date, end_date, period_name):
""" Finds out which social sites people are referred from """ """ Finds out which social sites people are referred from """
results = self.service.data().ga().get( results = self.service.data().ga().get(
ids='ga:' + self.profile_id, ids='ga:' + self.profile_id,
start_date=start_date, start_date=start_date,
metrics='ga:uniquePageviews', metrics='ga:uniquePageviews',
sort='-ga:uniquePageviews', sort='-ga:uniquePageviews',
dimensions="ga:socialNetwork,ga:referralPath", dimensions="ga:socialNetwork,ga:referralPath",
max_results=10000, max_results=10000,
end_date=end_date).execute() end_date=end_date).execute()
result_data = results.get('rows') result_data = results.get('rows')
twitter_links = [] twitter_links = []
data = {} data = {}
for result in result_data: for result in result_data:
if not result[0] == '(not set)': if not result[0] == '(not set)':
data[result[0]] = data.get(result[0], 0) + int(result[2]) data[result[0]] = data.get(result[0], 0) + int(result[2])
if result[0] == 'Twitter': if result[0] == 'Twitter':
twitter_links.append(result[1]) twitter_links.append(result[1])
ga_model.update_sitewide_stats(period_name, "Social sources", data) ga_model.update_sitewide_stats(period_name, "Social sources", data)
   
   
def _os_stats(self, start_date, end_date, period_name): def _os_stats(self, start_date, end_date, period_name):
""" Operating system stats """ """ Operating system stats """
results = self.service.data().ga().get( results = self.service.data().ga().get(
ids='ga:' + self.profile_id, ids='ga:' + self.profile_id,
start_date=start_date, start_date=start_date,
metrics='ga:uniquePageviews', metrics='ga:uniquePageviews',
sort='-ga:uniquePageviews', sort='-ga:uniquePageviews',
dimensions="ga:operatingSystem,ga:operatingSystemVersion", dimensions="ga:operatingSystem,ga:operatingSystemVersion",
max_results=10000, max_results=10000,
end_date=end_date).execute() end_date=end_date).execute()
result_data = results.get('rows') result_data = results.get('rows')
data = {} data = {}
for result in result_data: for result in result_data:
data[result[0]] = data.get(result[0], 0) + int(result[2]) data[result[0]] = data.get(result[0], 0) + int(result[2])
ga_model.update_sitewide_stats(period_name, "Operating Systems", data) ga_model.update_sitewide_stats(period_name, "Operating Systems", data)
   
data = {} data = {}
for result in result_data: for result in result_data:
key = "%s (%s)" % (result[0],result[1]) key = "%s (%s)" % (result[0],result[1])
data[key] = result[2] data[key] = result[2]
ga_model.update_sitewide_stats(period_name, "Operating Systems versions", data) ga_model.update_sitewide_stats(period_name, "Operating Systems versions", data)
   
   
def _browser_stats(self, start_date, end_date, period_name): def _browser_stats(self, start_date, end_date, period_name):
""" Information about browsers and browser versions """ """ Information about browsers and browser versions """
results = self.service.data().ga().get( results = self.service.data().ga().get(
ids='ga:' + self.profile_id, ids='ga:' + self.profile_id,
start_date=start_date, start_date=start_date,
metrics='ga:uniquePageviews', metrics='ga:uniquePageviews',
sort='-ga:uniquePageviews', sort='-ga:uniquePageviews',
dimensions="ga:browser,ga:browserVersion", dimensions="ga:browser,ga:browserVersion",
max_results=10000, max_results=10000,
end_date=end_date).execute() end_date=end_date).execute()
result_data = results.get('rows') result_data = results.get('rows')
data = {} data = {}
for result in result_data: for result in result_data:
data[result[0]] = data.get(result[0], 0) + int(result[2]) data[result[0]] = data.get(result[0], 0) + int(result[2])
ga_model.update_sitewide_stats(period_name, "Browsers", data) ga_model.update_sitewide_stats(period_name, "Browsers", data)
   
data = {} data = {}
for result in result_data: for result in result_data:
key = "%s (%s)" % (result[0], result[1]) key = "%s (%s)" % (result[0], result[1])
data[key] = result[2] data[key] = result[2]
ga_model.update_sitewide_stats(period_name, "Browser versions", data) ga_model.update_sitewide_stats(period_name, "Browser versions", data)
   
   
def _mobile_stats(self, start_date, end_date, period_name): def _mobile_stats(self, start_date, end_date, period_name):
""" Info about mobile devices """ """ Info about mobile devices """
   
results = self.service.data().ga().get( results = self.service.data().ga().get(
ids='ga:' + self.profile_id, ids='ga:' + self.profile_id,
start_date=start_date, start_date=start_date,
metrics='ga:uniquePageviews', metrics='ga:uniquePageviews',
sort='-ga:uniquePageviews', sort='-ga:uniquePageviews',
dimensions="ga:mobileDeviceBranding, ga:mobileDeviceInfo", dimensions="ga:mobileDeviceBranding, ga:mobileDeviceInfo",
max_results=10000, max_results=10000,
end_date=end_date).execute() end_date=end_date).execute()
   
result_data = results.get('rows') result_data = results.get('rows')
data = {} data = {}
for result in result_data: for result in result_data:
data[result[0]] = data.get(result[0], 0) + int(result[2]) data[result[0]] = data.get(result[0], 0) + int(result[2])
ga_model.update_sitewide_stats(period_name, "Mobile brands", data) ga_model.update_sitewide_stats(period_name, "Mobile brands", data)
   
data = {} data = {}
for result in result_data: for result in result_data:
data[result[1]] = data.get(result[1], 0) + int(result[2]) data[result[1]] = data.get(result[1], 0) + int(result[2])
ga_model.update_sitewide_stats(period_name, "Mobile devices", data) ga_model.update_sitewide_stats(period_name, "Mobile devices", data)
   
import os import os
import datetime import datetime
from nose.tools import assert_equal from nose.tools import assert_equal
from ckanext.ga_report.download_analytics import DownloadAnalytics from ckanext.ga_report.download_analytics import DownloadAnalytics
from ckanext.ga_report.ga_auth import (init_service, get_profile_id) from ckanext.ga_report.ga_auth import (init_service, get_profile_id)
from ckanext.ga_report.ga_model import init_tables from ckanext.ga_report.ga_model import init_tables
   
class TestAPI: class TestAPI:
   
@classmethod @classmethod
def setup_class(cls): def setup_class(cls):
if not os.path.exists("token.dat") or not os.path.exists("credentials.json"): if not os.path.exists("token.dat") or not os.path.exists("credentials.json"):
print '*' * 60 print '*' * 60
print "Tests may not run without first having run the auth process" print "Tests may not run without first having run the auth process"
print '*' * 60 print '*' * 60
init_tables() init_tables()
   
@classmethod @classmethod
def teardown_class(cls): def teardown_class(cls):
pass pass
   
def test_latest(self): def test_latest(self):
svc = init_service("token.dat", "credentials.json") svc = init_service("token.dat", "credentials.json")
try: try:
downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc)) downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc))
downloader.latest() downloader.latest()
except Exception as e: except Exception as e:
assert False, e assert False, e
   
   
def test_since(self): def test_since(self):
svc = init_service("token.dat", "credentials.json") svc = init_service("token.dat", "credentials.json")
downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc)) downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc))
try: try:
downloader.since_date(datetime.datetime.now() - datetime.timedelta(days=-30)) downloader.for_date(datetime.datetime.now() - datetime.timedelta(days=-30))
except Exception as e: except Exception as e:
assert False, e assert False, e