gitphp 0.2.9.1 :: ckanext-ga-report.git/commitdiff

flake8

3 files changed: (show all)
ckanext/ga_report/command.py
ckanext/ga_report/ga_auth.py
ckanext/ga_report/ga_model.py

file:a/ckanext/ga_report/command.py -> file:b/ckanext/ga_report/command.py

import logging	import logging
	import datetime

from ckan.lib.cli import CkanCommand	from ckan.lib.cli import CkanCommand
# No other CKAN imports allowed until _load_config is run, or logging is disabled	# No other CKAN imports allowed until _load_config is run,
	# or logging is disabled


class InitDB(CkanCommand):	class InitDB(CkanCommand):
"""Initialise the extension's database tables	"""Initialise the extension's database tables
"""	"""
summary = __doc__.split('\n')[0]	summary = __doc__.split('\n')[0]
usage = __doc__	usage = __doc__
max_args = 0	max_args = 0
min_args = 0	min_args = 0

def command(self):	def command(self):
self._load_config()	self._load_config()

import ckan.model as model	import ckan.model as model
model.Session.remove()	model.Session.remove()
model.Session.configure(bind=model.meta.engine)	model.Session.configure(bind=model.meta.engine)
log = logging.getLogger('ckanext.ga-report')	log = logging.getLogger('ckanext.ga-report')

import ga_model	import ga_model
ga_model.init_tables()	ga_model.init_tables()
log.info("DB tables are setup")	log.info("DB tables are setup")


class GetAuthToken(CkanCommand):	class GetAuthToken(CkanCommand):
""" Get's the Google auth token	""" Get's the Google auth token

Usage: paster getauthtoken <credentials_file>	Usage: paster getauthtoken <credentials_file>

Where <credentials_file> is the file name containing the details	Where <credentials_file> is the file name containing the details
for the service (obtained from https://code.google.com/apis/console).	for the service (obtained from https://code.google.com/apis/console).
By default this is set to credentials.json	By default this is set to credentials.json
"""	"""
summary = __doc__.split('\n')[0]	summary = __doc__.split('\n')[0]
usage = __doc__	usage = __doc__
max_args = 0	max_args = 0
min_args = 0	min_args = 0

def command(self):	def command(self):
"""	"""
In this case we don't want a valid service, but rather just to	In this case we don't want a valid service, but rather just to
force the user through the auth flow. We allow this to complete to	force the user through the auth flow. We allow this to complete to
act as a form of verification instead of just getting the token and	act as a form of verification instead of just getting the token and
assuming it is correct.	assuming it is correct.
"""	"""
from ga_auth import init_service	from ga_auth import init_service
initialize_service('token.dat',	init_service('token.dat',
self.args[0] if self.args	self.args[0] if self.args
else 'credentials.json')	else 'credentials.json')


class LoadAnalytics(CkanCommand):	class LoadAnalytics(CkanCommand):
"""Get data from Google Analytics API and save it	"""Get data from Google Analytics API and save it
in the ga_model	in the ga_model

Usage: paster loadanalytics <tokenfile> <time-period>	Usage: paster loadanalytics <tokenfile> <time-period>

Where <tokenfile> is the name of the auth token file from	Where <tokenfile> is the name of the auth token file from
the getauthtoken step.	the getauthtoken step.

And where <time-period> is:	And where <time-period> is:
all - data for all time	all - data for all time
latest - (default) just the 'latest' data	latest - (default) just the 'latest' data
YYYY-MM-DD - just data for all time periods going	YYYY-MM-DD - just data for all time periods going
back to (and including) this date	back to (and including) this date
"""	"""
summary = __doc__.split('\n')[0]	summary = __doc__.split('\n')[0]
usage = __doc__	usage = __doc__
max_args = 2	max_args = 2
min_args = 1	min_args = 1

def command(self):	def command(self):
self._load_config()	self._load_config()

from ga_auth import init_service	from download_analytics import DownloadAnalytics
	from ga_auth import (init_service, get_profile_id)

try:	try:
svc = init_service(self.args[0], None)	svc = init_service(self.args[0], None)
except TypeError:	except TypeError:
print 'Have you correctly run the getauthtoken task and specified the correct file here'	print ('Have you correctly run the getauthtoken task and '
	'specified the correct file here')
return	return

from download_analytics import DownloadAnalytics
from ga_auth import get_profile_id
downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc))	downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc))

time_period = self.args[1] if self.args and len(self.args) > 1 else 'latest'	time_period = self.args[1] if self.args and len(self.args) > 1 \
	else 'latest'
if time_period == 'all':	if time_period == 'all':
downloader.all_()	downloader.all_()
elif time_period == 'latest':	elif time_period == 'latest':
downloader.latest()	downloader.latest()
else:	else:
since_date = datetime.datetime.strptime(time_period, '%Y-%m-%d')	since_date = datetime.datetime.strptime(time_period, '%Y-%m-%d')
downloader.since_date(since_date)	downloader.since_date(since_date)

file:a/ckanext/ga_report/ga_auth.py -> file:b/ckanext/ga_report/ga_auth.py

file:a/ckanext/ga_report/ga_model.py -> file:b/ckanext/ga_report/ga_model.py

import re	import re
import uuid	import uuid

from sqlalchemy import Table, Column, MetaData	from sqlalchemy import Table, Column, MetaData
from sqlalchemy import types	from sqlalchemy import types
from sqlalchemy.sql import select, text	from sqlalchemy.sql import select
from sqlalchemy import func	from sqlalchemy import func

import ckan.model as model	import ckan.model as model
from ckan.model.types import JsonType	from ckan.model.types import JsonType
from ckan.lib.base import *	from ckan.lib.base import *


def make_uuid():	def make_uuid():
return unicode(uuid.uuid4())	return unicode(uuid.uuid4())


def init_tables():	def init_tables():
metadata = MetaData()	metadata = MetaData()
package_stats = Table('ga_url', metadata,	package_stats = Table('ga_url', metadata,
Column('id', types.UnicodeText, primary_key=True, default=make_uuid),	Column('id', types.UnicodeText, primary_key=True,
	default=make_uuid),
Column('period_name', types.UnicodeText),	Column('period_name', types.UnicodeText),
Column('period_complete_day', types.Integer),	Column('period_complete_day', types.Integer),
Column('visits', types.Integer),	Column('visits', types.Integer),
Column('group_id', types.String(60)),	Column('group_id', types.String(60)),
Column('next_page', JsonType),	Column('next_page', JsonType),
)	)
metadata.create_all(model.meta.engine)	metadata.create_all(model.meta.engine)


cached_tables = {}	cached_tables = {}


def get_table(name):	def get_table(name):
if name not in cached_tables:	if name not in cached_tables:
meta = MetaData()	meta = MetaData()
meta.reflect(bind=model.meta.engine)	meta.reflect(bind=model.meta.engine)
table = meta.tables[name]	table = meta.tables[name]
cached_tables[name] = table	cached_tables[name] = table
return cached_tables[name]	return cached_tables[name]


def _normalize_url(url):	def _normalize_url(url):
'''Strip off the hostname etc. Do this before storing it.	'''Strip off the hostname etc. Do this before storing it.

>>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices')	>>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices')
'/dataset/weekly_fuel_prices'	'/dataset/weekly_fuel_prices'
'''	'''
url = re.sub('https?://(www\.)?data.gov.uk', '', url)	url = re.sub('https?://(www\.)?data.gov.uk', '', url)
return url	return url


def _get_department_id_of_url(url):	def _get_department_id_of_url(url):
# e.g. /dataset/fuel_prices	# e.g. /dataset/fuel_prices
# e.g. /dataset/fuel_prices/resource/e63380d4	# e.g. /dataset/fuel_prices/resource/e63380d4
dataset_match = re.match('/dataset/([^/]+)(/.*)?', url)	dataset_match = re.match('/dataset/([^/]+)(/.*)?', url)
if dataset_match:	if dataset_match:
dataset_ref = dataset_match.groups()[0]	dataset_ref = dataset_match.groups()[0]
dataset = model.Package.get(dataset_ref)	dataset = model.Package.get(dataset_ref)
if dataset:	if dataset:
publisher_groups = dataset.get_groups('publisher')	publisher_groups = dataset.get_groups('publisher')
if publisher_groups:	if publisher_groups:
return publisher_groups[0].id	return publisher_groups[0].id


def update_url_stats(period_name, period_complete_day, url_data):	def update_url_stats(period_name, period_complete_day, url_data):
table = get_table('ga_url')	table = get_table('ga_url')
connection = model.Session.connection()	connection = model.Session.connection()
for url, views, next_page in url_data:	for url, views, next_page in url_data:
url = _normalize_url(url)	url = _normalize_url(url)
department_id = _get_department_id_of_url(url)	department_id = _get_department_id_of_url(url)
# see if the row for this url & month is in the table already	# see if the row for this url & month is in the table already
s = select([func.count(id_col)],	s = select([func.count(id_col)],
table.c.period_name == period_name,	table.c.period_name == period_name,
table.c.url == url)	table.c.url == url)
count = connection.execute(s).fetchone()	count = connection.execute(s).fetchone()
if count and count[0]:	if count and count[0]:
# update the row	# update the row
connection.execute(table.update()\	connection.execute(table.update()
.where(table.c.period_name == period_name,	.where(table.c.period_name == period_name,
table.c.url == url)\	table.c.url == url)
.values(period_complete_day=period_complete_day,	.values(period_complete_day=period_complete_day,
views=views,	views=views,
department_id=department_id,	department_id=department_id,
next_page=next_page))	next_page=next_page))
else:	else:
# create the row	# create the row
values = {'period_name': period_name,	values = {'period_name': period_name,
'period_complete_day': period_complete_day,	'period_complete_day': period_complete_day,
'url': url,	'url': url,
'views': views,	'views': views,
'department_id': department_id,	'department_id': department_id,
'next_page': next_page}	'next_page': next_page}
connection.execute(stats.insert()\	connection.execute(stats.insert().
.values(**values))	values(**values))

import httplib2	import httplib2
from apiclient.discovery import build	from apiclient.discovery import build
from oauth2client.client import flow_from_clientsecrets	from oauth2client.client import flow_from_clientsecrets
from oauth2client.file import Storage	from oauth2client.file import Storage
from oauth2client.tools import run	from oauth2client.tools import run

from pylons import config	from pylons import config


def _prepare_credentials( token_filename, credentials_filename ):	def _prepare_credentials(token_filename, credentials_filename):
"""	"""
Either returns the user's oauth credentials or uses the credentials	Either returns the user's oauth credentials or uses the credentials
file to generate a token (by forcing the user to login in the browser)	file to generate a token (by forcing the user to login in the browser)
"""	"""
storage = Storage( token_filename )	storage = Storage(token_filename)
credentials = storage.get()	credentials = storage.get()

if credentials is None or credentials.invalid:	if credentials is None or credentials.invalid:
flow = flow_from_clientsecrets(credentials_filename,	flow = flow_from_clientsecrets(credentials_filename,
scope='https://www.googleapis.com/auth/analytics.readonly',	scope='https://www.googleapis.com/auth/analytics.readonly',
message="Can't find the credentials file")	message="Can't find the credentials file")
credentials = run(flow, storage)	credentials = run(flow, storage)

return credentials	return credentials

def init_service( token_file, credentials_file ):
	def init_service(token_file, credentials_file):
"""	"""
Given a file containing the user's oauth token (and another with	Given a file containing the user's oauth token (and another with
credentials in case we need to generate the token) will return a	credentials in case we need to generate the token) will return a
service object representing the analytics API.	service object representing the analytics API.
"""	"""
http = httplib2.Http()	http = httplib2.Http()

credentials = _prepare_credentials(token_file, credentials_file)	credentials = _prepare_credentials(token_file, credentials_file)
http = credentials.authorize(http) # authorize the http object	http = credentials.authorize(http) # authorize the http object

return build('analytics', 'v3', http=http)	return build('analytics', 'v3', http=http)


def get_profile_id(service):	def get_profile_id(service):
"""	"""
Get the profile ID for this user and the service specified by the	Get the profile ID for this user and the service specified by the
'googleanalytics.id' configuration option.	'googleanalytics.id' configuration option.
"""	"""
accounts = service.management().accounts().list().execute()	accounts = service.management().accounts().list().execute()

if not accounts.get('items'):	if not accounts.get('items'):
return None	return None

accountId = accounts.get('items')[0].get('id')	accountId = accounts.get('items')[0].get('id')
webPropertyId = config.get('googleanalytics.id')	webPropertyId = config.get('googleanalytics.id')
profiles = service.management().profiles().list(	profiles = service.management().profiles().list(
accountId=accountId, webPropertyId=webPropertyId).execute()	accountId=accountId, webPropertyId=webPropertyId).execute()

if profiles.get('items'):	if profiles.get('items'):
return profiles.get('items')[0].get('id')	return profiles.get('items')[0].get('id')

return None	return None