flake8
flake8

import logging import logging
  import datetime
   
from ckan.lib.cli import CkanCommand from ckan.lib.cli import CkanCommand
# No other CKAN imports allowed until _load_config is run, or logging is disabled # No other CKAN imports allowed until _load_config is run,
  # or logging is disabled
   
   
class InitDB(CkanCommand): class InitDB(CkanCommand):
"""Initialise the extension's database tables """Initialise the extension's database tables
""" """
summary = __doc__.split('\n')[0] summary = __doc__.split('\n')[0]
usage = __doc__ usage = __doc__
max_args = 0 max_args = 0
min_args = 0 min_args = 0
   
def command(self): def command(self):
self._load_config() self._load_config()
   
import ckan.model as model import ckan.model as model
model.Session.remove() model.Session.remove()
model.Session.configure(bind=model.meta.engine) model.Session.configure(bind=model.meta.engine)
log = logging.getLogger('ckanext.ga-report') log = logging.getLogger('ckanext.ga-report')
   
import ga_model import ga_model
ga_model.init_tables() ga_model.init_tables()
log.info("DB tables are setup") log.info("DB tables are setup")
   
   
class GetAuthToken(CkanCommand): class GetAuthToken(CkanCommand):
""" Get's the Google auth token """ Get's the Google auth token
   
Usage: paster getauthtoken <credentials_file> Usage: paster getauthtoken <credentials_file>
   
Where <credentials_file> is the file name containing the details Where <credentials_file> is the file name containing the details
for the service (obtained from https://code.google.com/apis/console). for the service (obtained from https://code.google.com/apis/console).
By default this is set to credentials.json By default this is set to credentials.json
""" """
summary = __doc__.split('\n')[0] summary = __doc__.split('\n')[0]
usage = __doc__ usage = __doc__
max_args = 0 max_args = 0
min_args = 0 min_args = 0
   
def command(self): def command(self):
""" """
In this case we don't want a valid service, but rather just to In this case we don't want a valid service, but rather just to
force the user through the auth flow. We allow this to complete to force the user through the auth flow. We allow this to complete to
act as a form of verification instead of just getting the token and act as a form of verification instead of just getting the token and
assuming it is correct. assuming it is correct.
""" """
from ga_auth import init_service from ga_auth import init_service
initialize_service('token.dat', init_service('token.dat',
self.args[0] if self.args self.args[0] if self.args
else 'credentials.json') else 'credentials.json')
   
   
class LoadAnalytics(CkanCommand): class LoadAnalytics(CkanCommand):
"""Get data from Google Analytics API and save it """Get data from Google Analytics API and save it
in the ga_model in the ga_model
   
Usage: paster loadanalytics <tokenfile> <time-period> Usage: paster loadanalytics <tokenfile> <time-period>
   
Where <tokenfile> is the name of the auth token file from Where <tokenfile> is the name of the auth token file from
the getauthtoken step. the getauthtoken step.
   
And where <time-period> is: And where <time-period> is:
all - data for all time all - data for all time
latest - (default) just the 'latest' data latest - (default) just the 'latest' data
YYYY-MM-DD - just data for all time periods going YYYY-MM-DD - just data for all time periods going
back to (and including) this date back to (and including) this date
""" """
summary = __doc__.split('\n')[0] summary = __doc__.split('\n')[0]
usage = __doc__ usage = __doc__
max_args = 2 max_args = 2
min_args = 1 min_args = 1
   
def command(self): def command(self):
self._load_config() self._load_config()
   
from ga_auth import init_service from download_analytics import DownloadAnalytics
  from ga_auth import (init_service, get_profile_id)
   
try: try:
svc = init_service(self.args[0], None) svc = init_service(self.args[0], None)
except TypeError: except TypeError:
print 'Have you correctly run the getauthtoken task and specified the correct file here' print ('Have you correctly run the getauthtoken task and '
  'specified the correct file here')
return return
   
from download_analytics import DownloadAnalytics  
from ga_auth import get_profile_id  
downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc)) downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc))
   
time_period = self.args[1] if self.args and len(self.args) > 1 else 'latest' time_period = self.args[1] if self.args and len(self.args) > 1 \
  else 'latest'
if time_period == 'all': if time_period == 'all':
downloader.all_() downloader.all_()
elif time_period == 'latest': elif time_period == 'latest':
downloader.latest() downloader.latest()
else: else:
since_date = datetime.datetime.strptime(time_period, '%Y-%m-%d') since_date = datetime.datetime.strptime(time_period, '%Y-%m-%d')
downloader.since_date(since_date) downloader.since_date(since_date)
   
   
import httplib2 import httplib2
from apiclient.discovery import build from apiclient.discovery import build
from oauth2client.client import flow_from_clientsecrets from oauth2client.client import flow_from_clientsecrets
from oauth2client.file import Storage from oauth2client.file import Storage
from oauth2client.tools import run from oauth2client.tools import run
   
from pylons import config from pylons import config
   
   
def _prepare_credentials( token_filename, credentials_filename ): def _prepare_credentials(token_filename, credentials_filename):
""" """
Either returns the user's oauth credentials or uses the credentials Either returns the user's oauth credentials or uses the credentials
file to generate a token (by forcing the user to login in the browser) file to generate a token (by forcing the user to login in the browser)
""" """
storage = Storage( token_filename ) storage = Storage(token_filename)
credentials = storage.get() credentials = storage.get()
   
if credentials is None or credentials.invalid: if credentials is None or credentials.invalid:
flow = flow_from_clientsecrets(credentials_filename, flow = flow_from_clientsecrets(credentials_filename,
scope='https://www.googleapis.com/auth/analytics.readonly', scope='https://www.googleapis.com/auth/analytics.readonly',
message="Can't find the credentials file") message="Can't find the credentials file")
credentials = run(flow, storage) credentials = run(flow, storage)
   
return credentials return credentials
   
def init_service( token_file, credentials_file ):  
  def init_service(token_file, credentials_file):
""" """
Given a file containing the user's oauth token (and another with Given a file containing the user's oauth token (and another with
credentials in case we need to generate the token) will return a credentials in case we need to generate the token) will return a
service object representing the analytics API. service object representing the analytics API.
""" """
http = httplib2.Http() http = httplib2.Http()
   
credentials = _prepare_credentials(token_file, credentials_file) credentials = _prepare_credentials(token_file, credentials_file)
http = credentials.authorize(http) # authorize the http object http = credentials.authorize(http) # authorize the http object
   
return build('analytics', 'v3', http=http) return build('analytics', 'v3', http=http)
   
   
def get_profile_id(service): def get_profile_id(service):
""" """
Get the profile ID for this user and the service specified by the Get the profile ID for this user and the service specified by the
'googleanalytics.id' configuration option. 'googleanalytics.id' configuration option.
""" """
accounts = service.management().accounts().list().execute() accounts = service.management().accounts().list().execute()
   
if not accounts.get('items'): if not accounts.get('items'):
return None return None
   
accountId = accounts.get('items')[0].get('id') accountId = accounts.get('items')[0].get('id')
webPropertyId = config.get('googleanalytics.id') webPropertyId = config.get('googleanalytics.id')
profiles = service.management().profiles().list( profiles = service.management().profiles().list(
accountId=accountId, webPropertyId=webPropertyId).execute() accountId=accountId, webPropertyId=webPropertyId).execute()
   
if profiles.get('items'): if profiles.get('items'):
return profiles.get('items')[0].get('id') return profiles.get('items')[0].get('id')
   
return None return None
   
import re import re
import uuid import uuid
   
from sqlalchemy import Table, Column, MetaData from sqlalchemy import Table, Column, MetaData
from sqlalchemy import types from sqlalchemy import types
from sqlalchemy.sql import select, text from sqlalchemy.sql import select
from sqlalchemy import func from sqlalchemy import func
   
import ckan.model as model import ckan.model as model
from ckan.model.types import JsonType from ckan.model.types import JsonType
from ckan.lib.base import * from ckan.lib.base import *
   
   
def make_uuid(): def make_uuid():
return unicode(uuid.uuid4()) return unicode(uuid.uuid4())
   
   
def init_tables(): def init_tables():
metadata = MetaData() metadata = MetaData()
package_stats = Table('ga_url', metadata, package_stats = Table('ga_url', metadata,
Column('id', types.UnicodeText, primary_key=True, default=make_uuid), Column('id', types.UnicodeText, primary_key=True,
  default=make_uuid),
Column('period_name', types.UnicodeText), Column('period_name', types.UnicodeText),
Column('period_complete_day', types.Integer), Column('period_complete_day', types.Integer),
Column('visits', types.Integer), Column('visits', types.Integer),
Column('group_id', types.String(60)), Column('group_id', types.String(60)),
Column('next_page', JsonType), Column('next_page', JsonType),
) )
metadata.create_all(model.meta.engine) metadata.create_all(model.meta.engine)
   
   
cached_tables = {} cached_tables = {}
   
   
def get_table(name): def get_table(name):
if name not in cached_tables: if name not in cached_tables:
meta = MetaData() meta = MetaData()
meta.reflect(bind=model.meta.engine) meta.reflect(bind=model.meta.engine)
table = meta.tables[name] table = meta.tables[name]
cached_tables[name] = table cached_tables[name] = table
return cached_tables[name] return cached_tables[name]
   
   
def _normalize_url(url): def _normalize_url(url):
'''Strip off the hostname etc. Do this before storing it. '''Strip off the hostname etc. Do this before storing it.
   
>>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices') >>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices')
'/dataset/weekly_fuel_prices' '/dataset/weekly_fuel_prices'
''' '''
url = re.sub('https?://(www\.)?data.gov.uk', '', url) url = re.sub('https?://(www\.)?data.gov.uk', '', url)
return url return url
   
   
def _get_department_id_of_url(url): def _get_department_id_of_url(url):
# e.g. /dataset/fuel_prices # e.g. /dataset/fuel_prices
# e.g. /dataset/fuel_prices/resource/e63380d4 # e.g. /dataset/fuel_prices/resource/e63380d4
dataset_match = re.match('/dataset/([^/]+)(/.*)?', url) dataset_match = re.match('/dataset/([^/]+)(/.*)?', url)
if dataset_match: if dataset_match:
dataset_ref = dataset_match.groups()[0] dataset_ref = dataset_match.groups()[0]
dataset = model.Package.get(dataset_ref) dataset = model.Package.get(dataset_ref)
if dataset: if dataset:
publisher_groups = dataset.get_groups('publisher') publisher_groups = dataset.get_groups('publisher')
if publisher_groups: if publisher_groups:
return publisher_groups[0].id return publisher_groups[0].id
   
   
def update_url_stats(period_name, period_complete_day, url_data): def update_url_stats(period_name, period_complete_day, url_data):
table = get_table('ga_url') table = get_table('ga_url')
connection = model.Session.connection() connection = model.Session.connection()
for url, views, next_page in url_data: for url, views, next_page in url_data:
url = _normalize_url(url) url = _normalize_url(url)
department_id = _get_department_id_of_url(url) department_id = _get_department_id_of_url(url)
# see if the row for this url & month is in the table already # see if the row for this url & month is in the table already
s = select([func.count(id_col)], s = select([func.count(id_col)],
table.c.period_name == period_name, table.c.period_name == period_name,
table.c.url == url) table.c.url == url)
count = connection.execute(s).fetchone() count = connection.execute(s).fetchone()
if count and count[0]: if count and count[0]:
# update the row # update the row
connection.execute(table.update()\ connection.execute(table.update()
.where(table.c.period_name == period_name, .where(table.c.period_name == period_name,
table.c.url == url)\ table.c.url == url)
.values(period_complete_day=period_complete_day, .values(period_complete_day=period_complete_day,
views=views, views=views,
department_id=department_id, department_id=department_id,
next_page=next_page)) next_page=next_page))
else: else:
# create the row # create the row
values = {'period_name': period_name, values = {'period_name': period_name,
'period_complete_day': period_complete_day, 'period_complete_day': period_complete_day,
'url': url, 'url': url,
'views': views, 'views': views,
'department_id': department_id, 'department_id': department_id,
'next_page': next_page} 'next_page': next_page}
connection.execute(stats.insert()\ connection.execute(stats.insert().
.values(**values)) values(**values))