import os |
import os |
import logging |
import logging |
import datetime |
import datetime |
import httplib |
import httplib |
import collections |
import collections |
import requests |
import requests |
import json |
import json |
from pylons import config |
from pylons import config |
from ga_model import _normalize_url |
from ga_model import _normalize_url |
import ga_model |
import ga_model |
|
|
#from ga_client import GA |
#from ga_client import GA |
|
|
log = logging.getLogger('ckanext.ga-report') |
log = logging.getLogger('ckanext.ga-report') |
|
|
FORMAT_MONTH = '%Y-%m' |
FORMAT_MONTH = '%Y-%m' |
MIN_VIEWS = 50 |
MIN_VIEWS = 50 |
MIN_VISITS = 20 |
MIN_VISITS = 20 |
MIN_DOWNLOADS = 10 |
MIN_DOWNLOADS = 10 |
|
|
class DownloadAnalytics(object): |
class DownloadAnalytics(object): |
'''Downloads and stores analytics info''' |
'''Downloads and stores analytics info''' |
|
|
def __init__(self, service=None, token=None, profile_id=None, delete_first=False, |
def __init__(self, service=None, token=None, profile_id=None, delete_first=False, |
skip_url_stats=False): |
skip_url_stats=False): |
self.period = config['ga-report.period'] |
self.period = config['ga-report.period'] |
self.service = service |
self.service = service |
self.profile_id = profile_id |
self.profile_id = profile_id |
self.delete_first = delete_first |
self.delete_first = delete_first |
self.skip_url_stats = skip_url_stats |
self.skip_url_stats = skip_url_stats |
self.token = token |
self.token = token |
|
|
def specific_month(self, date): |
def specific_month(self, date): |
import calendar |
import calendar |
|
|
first_of_this_month = datetime.datetime(date.year, date.month, 1) |
first_of_this_month = datetime.datetime(date.year, date.month, 1) |
_, last_day_of_month = calendar.monthrange(int(date.year), int(date.month)) |
_, last_day_of_month = calendar.monthrange(int(date.year), int(date.month)) |
last_of_this_month = datetime.datetime(date.year, date.month, last_day_of_month) |
last_of_this_month = datetime.datetime(date.year, date.month, last_day_of_month) |
# if this is the latest month, note that it is only up until today |
# if this is the latest month, note that it is only up until today |
now = datetime.datetime.now() |
now = datetime.datetime.now() |
if now.year == date.year and now.month == date.month: |
if now.year == date.year and now.month == date.month: |
last_day_of_month = now.day |
last_day_of_month = now.day |
last_of_this_month = now |
last_of_this_month = now |
periods = ((date.strftime(FORMAT_MONTH), |
periods = ((date.strftime(FORMAT_MONTH), |
last_day_of_month, |
last_day_of_month, |
first_of_this_month, last_of_this_month),) |
first_of_this_month, last_of_this_month),) |
self.download_and_store(periods) |
self.download_and_store(periods) |
|
|
|
|
def latest(self): |
def latest(self): |
if self.period == 'monthly': |
if self.period == 'monthly': |
# from first of this month to today |
# from first of this month to today |
now = datetime.datetime.now() |
now = datetime.datetime.now() |
first_of_this_month = datetime.datetime(now.year, now.month, 1) |
first_of_this_month = datetime.datetime(now.year, now.month, 1) |
periods = ((now.strftime(FORMAT_MONTH), |
periods = ((now.strftime(FORMAT_MONTH), |
now.day, |
now.day, |
first_of_this_month, now),) |
first_of_this_month, now),) |
else: |
else: |
raise NotImplementedError |
raise NotImplementedError |
self.download_and_store(periods) |
self.download_and_store(periods) |
|
|
|
|
def for_date(self, for_date): |
def for_date(self, for_date): |
assert isinstance(since_date, datetime.datetime) |
assert isinstance(since_date, datetime.datetime) |
periods = [] # (period_name, period_complete_day, start_date, end_date) |
periods = [] # (period_name, period_complete_day, start_date, end_date) |
if self.period == 'monthly': |
if self.period == 'monthly': |
first_of_the_months_until_now = [] |
first_of_the_months_until_now = [] |
year = for_date.year |
year = for_date.year |
month = for_date.month |
month = for_date.month |
now = datetime.datetime.now() |
now = datetime.datetime.now() |
first_of_this_month = datetime.datetime(now.year, now.month, 1) |
first_of_this_month = datetime.datetime(now.year, now.month, 1) |
while True: |
while True: |
first_of_the_month = datetime.datetime(year, month, 1) |
first_of_the_month = datetime.datetime(year, month, 1) |
if first_of_the_month == first_of_this_month: |
if first_of_the_month == first_of_this_month: |
periods.append((now.strftime(FORMAT_MONTH), |
periods.append((now.strftime(FORMAT_MONTH), |
now.day, |
now.day, |
first_of_this_month, now)) |
first_of_this_month, now)) |
break |
break |
elif first_of_the_month < first_of_this_month: |
elif first_of_the_month < first_of_this_month: |
in_the_next_month = first_of_the_month + datetime.timedelta(40) |
in_the_next_month = first_of_the_month + datetime.timedelta(40) |
last_of_the_month = datetime.datetime(in_the_next_month.year, |
last_of_the_month = datetime.datetime(in_the_next_month.year, |
in_the_next_month.month, 1)\ |
in_the_next_month.month, 1)\ |
- datetime.timedelta(1) |
- datetime.timedelta(1) |
periods.append((now.strftime(FORMAT_MONTH), 0, |
periods.append((now.strftime(FORMAT_MONTH), 0, |
first_of_the_month, last_of_the_month)) |
first_of_the_month, last_of_the_month)) |
else: |
else: |
# first_of_the_month has got to the future somehow |
# first_of_the_month has got to the future somehow |
break |
break |
month += 1 |
month += 1 |
if month > 12: |
if month > 12: |
year += 1 |
year += 1 |
month = 1 |
month = 1 |
else: |
else: |
raise NotImplementedError |
raise NotImplementedError |
self.download_and_store(periods) |
self.download_and_store(periods) |
|
|
@staticmethod |
@staticmethod |
def get_full_period_name(period_name, period_complete_day): |
def get_full_period_name(period_name, period_complete_day): |
if period_complete_day: |
if period_complete_day: |
return period_name + ' (up to %ith)' % period_complete_day |
return period_name + ' (up to %ith)' % period_complete_day |
else: |
else: |
return period_name |
return period_name |
|
|
|
|
def download_and_store(self, periods): |
def download_and_store(self, periods): |
for period_name, period_complete_day, start_date, end_date in periods: |
for period_name, period_complete_day, start_date, end_date in periods: |
log.info('Period "%s" (%s - %s)', |
log.info('Period "%s" (%s - %s)', |
self.get_full_period_name(period_name, period_complete_day), |
self.get_full_period_name(period_name, period_complete_day), |
start_date.strftime('%Y-%m-%d'), |
start_date.strftime('%Y-%m-%d'), |
end_date.strftime('%Y-%m-%d')) |
end_date.strftime('%Y-%m-%d')) |
|
|
if self.delete_first: |
if self.delete_first: |
log.info('Deleting existing Analytics for this period "%s"', |
log.info('Deleting existing Analytics for this period "%s"', |
period_name) |
period_name) |
ga_model.delete(period_name) |
ga_model.delete(period_name) |
|
|
if not self.skip_url_stats: |
if not self.skip_url_stats: |
# Clean out old url data before storing the new |
# Clean out old url data before storing the new |
ga_model.pre_update_url_stats(period_name) |
ga_model.pre_update_url_stats(period_name) |
|
|
accountName = config.get('googleanalytics.account') |
accountName = config.get('googleanalytics.account') |
|
|
log.info('Downloading analytics for dataset views') |
log.info('Downloading analytics for dataset views') |
data = self.download(start_date, end_date, '~/%s/dataset/[a-z0-9-_]+' % accountName) |
data = self.download(start_date, end_date, '~^/dataset/[a-z0-9-_]+') |
|
|
log.info('Storing dataset views (%i rows)', len(data.get('url'))) |
log.info('Storing dataset views (%i rows)', len(data.get('url'))) |
self.store(period_name, period_complete_day, data, ) |
self.store(period_name, period_complete_day, data, ) |
|
|
log.info('Downloading analytics for publisher views') |
log.info('Downloading analytics for publisher views') |
data = self.download(start_date, end_date, '~/%s/publisher/[a-z0-9-_]+' % accountName) |
data = self.download(start_date, end_date, '~^/organization/[a-z0-9-_]+') |
|
|
log.info('Storing publisher views (%i rows)', len(data.get('url'))) |
log.info('Storing publisher views (%i rows)', len(data.get('url'))) |
self.store(period_name, period_complete_day, data,) |
self.store(period_name, period_complete_day, data,) |
|
|
# Make sure the All records are correct. |
# Make sure the All records are correct. |
ga_model.post_update_url_stats() |
ga_model.post_update_url_stats() |
|
|
log.info('Associating datasets with their publisher') |
log.info('Associating datasets with their publisher') |
ga_model.update_publisher_stats(period_name) # about 30 seconds. |
ga_model.update_publisher_stats(period_name) # about 30 seconds. |
|
|
|
|
log.info('Downloading and storing analytics for site-wide stats') |
log.info('Downloading and storing analytics for site-wide stats') |
self.sitewide_stats( period_name, period_complete_day ) |
self.sitewide_stats( period_name, period_complete_day ) |
|
|
log.info('Downloading and storing analytics for social networks') |
log.info('Downloading and storing analytics for social networks') |
self.update_social_info(period_name, start_date, end_date) |
self.update_social_info(period_name, start_date, end_date) |
|
|
|
|
def update_social_info(self, period_name, start_date, end_date): |
def update_social_info(self, period_name, start_date, end_date): |
start_date = start_date.strftime('%Y-%m-%d') |
start_date = start_date.strftime('%Y-%m-%d') |
end_date = end_date.strftime('%Y-%m-%d') |
end_date = end_date.strftime('%Y-%m-%d') |
query = 'ga:hasSocialSourceReferral=~Yes$' |
query = 'ga:hasSocialSourceReferral=~Yes$' |
metrics = 'ga:entrances' |
metrics = 'ga:entrances' |
sort = '-ga:entrances' |
sort = '-ga:entrances' |
|
|
try: |
try: |
# Because of issues of invalid responses, we are going to make these requests |
# Because of issues of invalid responses, we are going to make these requests |
# ourselves. |
# ourselves. |
headers = {'authorization': 'Bearer ' + self.token} |
headers = {'authorization': 'Bearer ' + self.token} |
|
|
args = dict(ids='ga:' + self.profile_id, |
args = dict(ids='ga:' + self.profile_id, |
filters=query, |
filters=query, |
metrics=metrics, |
metrics=metrics, |
sort=sort, |
sort=sort, |
dimensions="ga:landingPagePath,ga:socialNetwork", |
dimensions="ga:landingPagePath,ga:socialNetwork", |
max_results=10000) |
max_results=10000) |
|
|