import os |
import os |
import logging |
import logging |
import datetime |
import datetime |
|
|
from pylons import config |
from pylons import config |
|
|
import ga_model |
import ga_model |
|
|
#from ga_client import GA |
#from ga_client import GA |
|
|
log = logging.getLogger('ckanext.ga-report') |
log = logging.getLogger('ckanext.ga-report') |
|
|
FORMAT_MONTH = '%Y-%m' |
FORMAT_MONTH = '%Y-%m' |
|
|
class DownloadAnalytics(object): |
class DownloadAnalytics(object): |
'''Downloads and stores analytics info''' |
'''Downloads and stores analytics info''' |
|
|
def __init__(self, service=None, profile_id=None): |
def __init__(self, service=None, profile_id=None): |
self.period = config['ga-report.period'] |
self.period = config['ga-report.period'] |
self.service = service |
self.service = service |
self.profile_id = profile_id |
self.profile_id = profile_id |
|
|
|
|
def all_(self): |
def all_(self): |
self.since_date(datetime.datetime(2010, 1, 1)) |
self.since_date(datetime.datetime(2010, 1, 1)) |
|
|
def latest(self): |
def latest(self): |
if self.period == 'monthly': |
if self.period == 'monthly': |
# from first of this month to today |
# from first of this month to today |
now = datetime.datetime.now() |
now = datetime.datetime.now() |
first_of_this_month = datetime.datetime(now.year, now.month, 1) |
first_of_this_month = datetime.datetime(now.year, now.month, 1) |
periods = ((now.strftime(FORMAT_MONTH), |
periods = ((now.strftime(FORMAT_MONTH), |
now.day, |
now.day, |
first_of_this_month, now),) |
first_of_this_month, now),) |
else: |
else: |
raise NotImplementedError |
raise NotImplementedError |
self.download_and_store(periods) |
self.download_and_store(periods) |
|
|
|
|
def since_date(self, since_date): |
def since_date(self, since_date): |
assert isinstance(since_date, datetime.datetime) |
assert isinstance(since_date, datetime.datetime) |
periods = [] # (period_name, period_complete_day, start_date, end_date) |
periods = [] # (period_name, period_complete_day, start_date, end_date) |
if self.period == 'monthly': |
if self.period == 'monthly': |
first_of_the_months_until_now = [] |
first_of_the_months_until_now = [] |
year = since_date.year |
year = since_date.year |
month = since_date.month |
month = since_date.month |
now = datetime.datetime.now() |
now = datetime.datetime.now() |
first_of_this_month = datetime.datetime(now.year, now.month, 1) |
first_of_this_month = datetime.datetime(now.year, now.month, 1) |
while True: |
while True: |
first_of_the_month = datetime.datetime(year, month, 1) |
first_of_the_month = datetime.datetime(year, month, 1) |
if first_of_the_month == first_of_this_month: |
if first_of_the_month == first_of_this_month: |
periods.append((now.strftime(FORMAT_MONTH), |
periods.append((now.strftime(FORMAT_MONTH), |
now.day, |
now.day, |
first_of_this_month, now)) |
first_of_this_month, now)) |
break |
break |
elif first_of_the_month < first_of_this_month: |
elif first_of_the_month < first_of_this_month: |
in_the_next_month = first_of_the_month + datetime.timedelta(40) |
in_the_next_month = first_of_the_month + datetime.timedelta(40) |
last_of_the_month = datetime.datetime(in_the_next_month.year, |
last_of_the_month = datetime.datetime(in_the_next_month.year, |
in_the_next_month.month, 1)\ |
in_the_next_month.month, 1)\ |
- datetime.timedelta(1) |
- datetime.timedelta(1) |
periods.append((now.strftime(FORMAT_MONTH), 0, |
periods.append((now.strftime(FORMAT_MONTH), 0, |
first_of_the_month, last_of_the_month)) |
first_of_the_month, last_of_the_month)) |
else: |
else: |
# first_of_the_month has got to the future somehow |
# first_of_the_month has got to the future somehow |
break |
break |
month += 1 |
month += 1 |
if month > 12: |
if month > 12: |
year += 1 |
year += 1 |
month = 1 |
month = 1 |
else: |
else: |
raise NotImplementedError |
raise NotImplementedError |
self.download_and_store(periods) |
self.download_and_store(periods) |
|
|
@staticmethod |
@staticmethod |
def get_full_period_name(period_name, period_complete_day): |
def get_full_period_name(period_name, period_complete_day): |
if period_complete_day: |
if period_complete_day: |
return period_name + ' (up to %ith)' % period_complete_day |
return period_name + ' (up to %ith)' % period_complete_day |
else: |
else: |
return period_name |
return period_name |
|
|
|
|
def download_and_store(self, periods): |
def download_and_store(self, periods): |
for period_name, period_complete_day, start_date, end_date in periods: |
for period_name, period_complete_day, start_date, end_date in periods: |
log.info('Downloading Analytics for period "%s" (%s - %s)', |
log.info('Downloading Analytics for period "%s" (%s - %s)', |
self.get_full_period_name(period_name, period_complete_day), |
self.get_full_period_name(period_name, period_complete_day), |
start_date.strftime('%Y %m %d'), |
start_date.strftime('%Y %m %d'), |
end_date.strftime('%Y %m %d')) |
end_date.strftime('%Y %m %d')) |
|
""" |
data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+') |
data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+') |
log.info('Storing Dataset Analytics for period "%s"', |
log.info('Storing Dataset Analytics for period "%s"', |
self.get_full_period_name(period_name, period_complete_day)) |
self.get_full_period_name(period_name, period_complete_day)) |
self.store(period_name, period_complete_day, data, ) |
self.store(period_name, period_complete_day, data, ) |
|
|
data = self.download(start_date, end_date, '~/publisher/[a-z0-9-_]+') |
data = self.download(start_date, end_date, '~/publisher/[a-z0-9-_]+') |
log.info('Storing Publisher Analytics for period "%s"', |
log.info('Storing Publisher Analytics for period "%s"', |
self.get_full_period_name(period_name, period_complete_day)) |
self.get_full_period_name(period_name, period_complete_day)) |
self.store(period_name, period_complete_day, data,) |
self.store(period_name, period_complete_day, data,) |
ga_model.update_publisher_stats(period_name) |
""" |
|
ga_model.update_publisher_stats(period_name) # about 30 seconds. |
self.sitewide_stats( period_name ) |
self.sitewide_stats( period_name ) |
|
|
|
|
def download(self, start_date, end_date, path='~/dataset/[a-z0-9-_]+'): |
def download(self, start_date, end_date, path='~/dataset/[a-z0-9-_]+'): |
'''Get data from GA for a given time period''' |
'''Get data from GA for a given time period''' |
start_date = start_date.strftime('%Y-%m-%d') |
start_date = start_date.strftime('%Y-%m-%d') |
end_date = end_date.strftime('%Y-%m-%d') |
end_date = end_date.strftime('%Y-%m-%d') |
query = 'ga:pagePath=%s$' % path |
query = 'ga:pagePath=%s$' % path |
metrics = 'ga:uniquePageviews, ga:visits' |
metrics = 'ga:uniquePageviews, ga:visitors' |
sort = '-ga:uniquePageviews' |
sort = '-ga:uniquePageviews' |
|
|
# Supported query params at |
# Supported query params at |
# https://developers.google.com/analytics/devguides/reporting/core/v3/reference |
# https://developers.google.com/analytics/devguides/reporting/core/v3/reference |
results = self.service.data().ga().get( |
results = self.service.data().ga().get( |
ids='ga:' + self.profile_id, |
ids='ga:' + self.profile_id, |
filters=query, |
filters=query, |
start_date=start_date, |
start_date=start_date, |
metrics=metrics, |
metrics=metrics, |
sort=sort, |
sort=sort, |
dimensions="ga:pagePath", |
dimensions="ga:pagePath", |
max_results=10000, |
max_results=10000, |
end_date=end_date).execute() |
end_date=end_date).execute() |
|
|
if os.getenv('DEBUG'): |
if os.getenv('DEBUG'): |
import pprint |
import pprint |
pprint.pprint(results) |
pprint.pprint(results) |
print 'Total results: %s' % results.get('totalResults') |
print 'Total results: %s' % results.get('totalResults') |
|
|
packages = [] |
packages = [] |
for entry in results.get('rows'): |
for entry in results.get('rows'): |
(loc,pageviews,visits) = entry |
(loc,pageviews,visits) = entry |
packages.append( ('http:/' + loc, pageviews, visits,) ) # Temporary hack |
packages.append( ('http:/' + loc, pageviews, visits,) ) # Temporary hack |
return dict(url=packages) |
return dict(url=packages) |
|
|
def store(self, period_name, period_complete_day, data): |
def store(self, period_name, period_complete_day, data): |
if 'url' in data: |
if 'url' in data: |
ga_model.update_url_stats(period_name, period_complete_day, data['url']) |
ga_model.update_url_stats(period_name, period_complete_day, data['url']) |
|
|
|
|
def sitewide_stats(self, period_name): |
def sitewide_stats(self, period_name): |
import calendar |
import calendar |
year, month = period_name.split('-') |
year, month = period_name.split('-') |
_, last_day_of_month = calendar.monthrange(int(year), int(month)) |
_, last_day_of_month = calendar.monthrange(int(year), int(month)) |
|
|
start_date = '%s-01' % period_name |
start_date = '%s-01' % period_name |
end_date = '%s-%s' % (period_name, last_day_of_month) |
end_date = '%s-%s' % (period_name, last_day_of_month) |
print 'Sitewide_stats for %s (%s -> %s)' % (period_name, start_date, end_date) |
print 'Sitewide_stats for %s (%s -> %s)' % (period_name, start_date, end_date) |
|
|
funcs = ['_totals_stats', '_social_stats', '_os_stats', |
funcs = ['_totals_stats', '_social_stats', '_os_stats', |
'_locale_stats', '_browser_stats', '_mobile_stats'] |
'_locale_stats', '_browser_stats', '_mobile_stats'] |
for f in funcs: |
for f in funcs: |
print ' + Fetching %s stats' % f.split('_')[1] |
print ' + Fetching %s stats' % f.split('_')[1] |
getattr(self, f)(start_date, end_date, period_name) |
getattr(self, f)(start_date, end_date, period_name) |
|
|
|
def _get_results(result_data, f): |
|
data = {} |
|
for result in result_data: |
|
key = f(result) |
|
data[key] = data.get(key,0) + result[1] |
|
return data |
|
|
def _totals_stats(self, start_date, end_date, period_name): |
def _totals_stats(self, start_date, end_date, period_name): |
""" Fetches distinct totals, total pageviews etc """ |
""" Fetches distinct totals, total pageviews etc """ |
results = self.service.data().ga().get( |
results = self.service.data().ga().get( |
ids='ga:' + self.profile_id, |
ids='ga:' + self.profile_id, |
start_date=start_date, |
start_date=start_date, |
metrics='ga:uniquePageviews', |
metrics='ga:uniquePageviews', |
sort='-ga:uniquePageviews', |
sort='-ga:uniquePageviews', |
max_results=10000, |
max_results=10000, |
end_date=end_date).execute() |
end_date=end_date).execute() |
result_data = results.get('rows') |
result_data = results.get('rows') |
ga_model.update_sitewide_stats(period_name, "Totals", {'Total pageviews': result_data[0][0]}) |
ga_model.update_sitewide_stats(period_name, "Totals", {'Total pageviews': result_data[0][0]}) |
|
|
results = self.service.data().ga().get( |
results = self.service.data().ga().get( |
ids='ga:' + self.profile_id, |
ids='ga:' + self.profile_id, |
start_date=start_date, |
start_date=start_date, |
metrics='ga:pageviewsPerVisit,ga:bounces,ga:avgTimeOnSite,ga:percentNewVisits', |
metrics='ga:pageviewsPerVisit,ga:bounces,ga:avgTimeOnSite,ga:percentNewVisits', |
max_results=10000, |
max_results=10000, |
end_date=end_date).execute() |
end_date=end_date).execute() |
result_data = results.get('rows') |
result_data = results.get('rows') |
data = { |
data = { |
'Pages per visit': result_data[0][0], |
'Pages per visit': result_data[0][0], |
'Bounces': result_data[0][1], |
'Bounces': result_data[0][1], |
'Average time on site': result_data[0][2], |
'Average time on site': result_data[0][2], |
'Percent new visits': result_data[0][3], |
'Percent new visits': result_data[0][3], |
} |
} |
ga_model.update_sitewide_stats(period_name, "Totals", data) |
ga_model.update_sitewide_stats(period_name, "Totals", data) |
|
|
|
|
def _locale_stats(self, start_date, end_date, period_name): |
def _locale_stats(self, start_date, end_date, period_name): |
""" Fetches stats about language and country """ |
""" Fetches stats about language and country """ |
results = self.service.data().ga().get( |
results = self.service.data().ga().get( |
ids='ga:' + self.profile_id, |
ids='ga:' + self.profile_id, |
start_date=start_date, |
start_date=start_date, |
metrics='ga:uniquePageviews', |
metrics='ga:uniquePageviews', |
sort='-ga:uniquePageviews', |
sort='-ga:uniquePageviews', |
dimensions="ga:language,ga:country", |
dimensions="ga:language,ga:country", |
max_results=10000, |
max_results=10000, |
end_date=end_date).execute() |
end_date=end_date).execute() |
result_data = results.get('rows') |
result_data = results.get('rows') |
data = {} |
data = {} |
for result in result_data: |
for result in result_data: |
data[result[0]] = data.get(result[0], 0) + int(result[2]) |
data[result[0]] = data.get(result[0], 0) + int(result[2]) |
ga_model.update_sitewide_stats(period_name, "Languages", data) |
ga_model.update_sitewide_stats(period_name, "Languages", data) |
|
|
data = {} |
data = {} |
for result in result_data: |
for result in result_data: |
data[result[1]] = data.get(result[1], 0) + int(result[2]) |
data[result[1]] = data.get(result[1], 0) + int(result[2]) |
ga_model.update_sitewide_stats(period_name, "Country", data) |
ga_model.update_sitewide_stats(period_name, "Country", data) |
|
|
|
|
def _social_stats(self, start_date, end_date, period_name): |
def _social_stats(self, start_date, end_date, period_name): |
""" Finds out which social sites people are referred from """ |
""" Finds out which social sites people are referred from """ |
results = self.service.data().ga().get( |
results = self.service.data().ga().get( |
ids='ga:' + self.profile_id, |
ids='ga:' + self.profile_id, |
start_date=start_date, |
start_date=start_date, |
metrics='ga:uniquePageviews', |
metrics='ga:uniquePageviews', |
sort='-ga:uniquePageviews', |
sort='-ga:uniquePageviews', |
dimensions="ga:socialNetwork,ga:referralPath", |
dimensions="ga:socialNetwork,ga:referralPath", |
max_results=10000, |
max_results=10000, |
end_date=end_date).execute() |
end_date=end_date).execute() |
result_data = results.get('rows') |
result_data = results.get('rows') |
twitter_links = [] |
twitter_links = [] |
data = {} |
data = {} |
for result in result_data: |
for result in result_data: |
if not result[0] == '(not set)': |
if not result[0] == '(not set)': |
data[result[0]] = data.get(result[0], 0) + int(result[2]) |
data[result[0]] = data.get(result[0], 0) + int(result[2]) |
if result[0] == 'Twitter': |
if result[0] == 'Twitter': |
twitter_links.append(result[1]) |
twitter_links.append(result[1]) |
ga_model.update_sitewide_stats(period_name, "Social sources", data) |
ga_model.update_sitewide_stats(period_name, "Social sources", data) |
|
|
|
|
def _os_stats(self, start_date, end_date, period_name): |
def _os_stats(self, start_date, end_date, period_name): |
""" Operating system stats """ |
""" Operating system stats """ |
results = self.service.data().ga().get( |
results = self.service.data().ga().get( |
ids='ga:' + self.profile_id, |
ids='ga:' + self.profile_id, |
start_date=start_date, |
start_date=start_date, |
metrics='ga:uniquePageviews', |
metrics='ga:uniquePageviews', |
sort='-ga:uniquePageviews', |
sort='-ga:uniquePageviews', |
dimensions="ga:operatingSystem,ga:operatingSystemVersion", |
dimensions="ga:operatingSystem,ga:operatingSystemVersion", |
max_results=10000, |
max_results=10000, |
end_date=end_date).execute() |
end_date=end_date).execute() |
result_data = results.get('rows') |
result_data = results.get('rows') |
data = {} |
data = {} |
for result in result_data: |
for result in result_data: |
data[result[0]] = data.get(result[0], 0) + int(result[2]) |
data[result[0]] = data.get(result[0], 0) + int(result[2]) |
ga_model.update_sitewide_stats(period_name, "Operating Systems", data) |
ga_model.update_sitewide_stats(period_name, "Operating Systems", data) |
|
|
data = {} |
data = {} |
for result in result_data: |
for result in result_data: |
key = "%s (%s)" % (result[0],result[1]) |
key = "%s (%s)" % (result[0],result[1]) |
data[key] = result[2] |
data[key] = result[2] |
ga_model.update_sitewide_stats(period_name, "Operating Systems versions", data) |
ga_model.update_sitewide_stats(period_name, "Operating Systems versions", data) |
|
|
|
|
def _browser_stats(self, start_date, end_date, period_name): |
def _browser_stats(self, start_date, end_date, period_name): |
""" Information about browsers and browser versions """ |
""" Information about browsers and browser versions """ |
results = self.service.data().ga().get( |
results = self.service.data().ga().get( |
ids='ga:' + self.profile_id, |
ids='ga:' + self.profile_id, |
start_date=start_date, |
start_date=start_date, |
metrics='ga:uniquePageviews', |
metrics='ga:uniquePageviews', |
sort='-ga:uniquePageviews', |
sort='-ga:uniquePageviews', |
dimensions="ga:browser,ga:browserVersion", |
dimensions="ga:browser,ga:browserVersion", |
max_results=10000, |
max_results=10000, |
end_date=end_date).execute() |
end_date=end_date).execute() |
result_data = results.get('rows') |
result_data = results.get('rows') |
data = {} |
data = {} |
for result in result_data: |
for result in result_data: |
data[result[0]] = data.get(result[0], 0) + int(result[2]) |
data[result[0]] = data.get(result[0], 0) + int(result[2]) |
ga_model.update_sitewide_stats(period_name, "Browsers", data) |
ga_model.update_sitewide_stats(period_name, "Browsers", data) |
|
|
data = {} |
data = {} |
for result in result_data: |
for result in result_data: |
key = "%s (%s)" % (result[0],result[1]) |
key = "%s (%s)" % (result[0], result[1]) |
data[key] = result[2] |
data[key] = result[2] |
ga_model.update_sitewide_stats(period_name, "Browser versions", data) |
ga_model.update_sitewide_stats(period_name, "Browser versions", data) |
|
|
|
|
def _mobile_stats(self, start_date, end_date, period_name): |
def _mobile_stats(self, start_date, end_date, period_name): |
""" Info about mobile devices """ |
""" Info about mobile devices """ |
|
|
results = self.service.data().ga().get( |
results = self.service.data().ga().get( |
ids='ga:' + self.profile_id, |
ids='ga:' + self.profile_id, |
start_date=start_date, |
start_date=start_date, |
metrics='ga:uniquePageviews', |
metrics='ga:uniquePageviews', |
sort='-ga:uniquePageviews', |
sort='-ga:uniquePageviews', |
dimensions="ga:mobileDeviceBranding, ga:mobileDeviceInfo", |
dimensions="ga:mobileDeviceBranding, ga:mobileDeviceInfo", |
max_results=10000, |
max_results=10000, |
end_date=end_date).execute() |
end_date=end_date).execute() |
|
|
result_data = results.get('rows') |
result_data = results.get('rows') |
data = {} |
data = {} |
for result in result_data: |
for result in result_data: |
data[result[0]] = data.get(result[0], 0) + int(result[2]) |
data[result[0]] = data.get(result[0], 0) + int(result[2]) |
ga_model.update_sitewide_stats(period_name, "Mobile brands", data) |
ga_model.update_sitewide_stats(period_name, "Mobile brands", data) |
|
|
data = {} |
data = {} |
for result in result_data: |
for result in result_data: |
data[result[1]] = data.get(result[1], 0) + int(result[2]) |
data[result[1]] = data.get(result[1], 0) + int(result[2]) |
ga_model.update_sitewide_stats(period_name, "Mobile devices", data) |
ga_model.update_sitewide_stats(period_name, "Mobile devices", data) |
|
|