gitphp 0.2.9.1 :: ckanext-ga-report.git/blobdiff

blob:a/ckanext/ga_report/ga_model.py -> blob:b/ckanext/ga_report/ga_model.py

import re	import re
import uuid	import uuid

from sqlalchemy import Table, Column, MetaData	from sqlalchemy import Table, Column, MetaData
from sqlalchemy import types	from sqlalchemy import types
from sqlalchemy.sql import select, text	from sqlalchemy.sql import select
from sqlalchemy import func	from sqlalchemy import func

import ckan.model as model	import ckan.model as model
from ckan.model.types import JsonType	from ckan.model.types import JsonType
from ckan.lib.base import *	from ckan.lib.base import *


def make_uuid():	def make_uuid():
return unicode(uuid.uuid4())	return unicode(uuid.uuid4())


def init_tables():	def init_tables():
metadata = MetaData()	metadata = MetaData()
package_stats = Table('ga_url', metadata,	package_stats = Table('ga_url', metadata,
Column('id', types.UnicodeText, primary_key=True, default=make_uuid),	Column('id', types.UnicodeText, primary_key=True,
	default=make_uuid),
Column('period_name', types.UnicodeText),	Column('period_name', types.UnicodeText),
Column('period_complete_day', types.Integer),	Column('period_complete_day', types.Integer),
Column('visits', types.Integer),	Column('visits', types.Integer),
Column('group_id', types.String(60)),	Column('group_id', types.String(60)),
Column('next_page', JsonType),	Column('next_page', JsonType),
)	)
metadata.create_all(model.meta.engine)	metadata.create_all(model.meta.engine)


cached_tables = {}	cached_tables = {}


def get_table(name):	def get_table(name):
if name not in cached_tables:	if name not in cached_tables:
meta = MetaData()	meta = MetaData()
meta.reflect(bind=model.meta.engine)	meta.reflect(bind=model.meta.engine)
table = meta.tables[name]	table = meta.tables[name]
cached_tables[name] = table	cached_tables[name] = table
return cached_tables[name]	return cached_tables[name]


def _normalize_url(url):	def _normalize_url(url):
'''Strip off the hostname etc. Do this before storing it.	'''Strip off the hostname etc. Do this before storing it.

>>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices')	>>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices')
'/dataset/weekly_fuel_prices'	'/dataset/weekly_fuel_prices'
'''	'''
url = re.sub('https?://(www\.)?data.gov.uk', '', url)	url = re.sub('https?://(www\.)?data.gov.uk', '', url)
return url	return url


def _get_department_id_of_url(url):	def _get_department_id_of_url(url):
# e.g. /dataset/fuel_prices	# e.g. /dataset/fuel_prices
# e.g. /dataset/fuel_prices/resource/e63380d4	# e.g. /dataset/fuel_prices/resource/e63380d4
dataset_match = re.match('/dataset/([^/]+)(/.*)?', url)	dataset_match = re.match('/dataset/([^/]+)(/.*)?', url)
if dataset_match:	if dataset_match:
dataset_ref = dataset_match.groups()[0]	dataset_ref = dataset_match.groups()[0]
dataset = model.Package.get(dataset_ref)	dataset = model.Package.get(dataset_ref)
if dataset:	if dataset:
publisher_groups = dataset.get_groups('publisher')	publisher_groups = dataset.get_groups('publisher')
if publisher_groups:	if publisher_groups:
return publisher_groups[0].id	return publisher_groups[0].id


def update_url_stats(period_name, period_complete_day, url_data):	def update_url_stats(period_name, period_complete_day, url_data):
table = get_table('ga_url')	table = get_table('ga_url')
connection = model.Session.connection()	connection = model.Session.connection()
for url, views, next_page in url_data:	for url, views, next_page in url_data:
url = _normalize_url(url)	url = _normalize_url(url)
department_id = _get_department_id_of_url(url)	department_id = _get_department_id_of_url(url)
# see if the row for this url & month is in the table already	# see if the row for this url & month is in the table already
s = select([func.count(id_col)],	s = select([func.count(id_col)],
table.c.period_name == period_name,	table.c.period_name == period_name,
table.c.url == url)	table.c.url == url)
count = connection.execute(s).fetchone()	count = connection.execute(s).fetchone()
if count and count[0]:	if count and count[0]:
# update the row	# update the row
connection.execute(table.update()\	connection.execute(table.update()
.where(table.c.period_name == period_name,	.where(table.c.period_name == period_name,
table.c.url == url)\	table.c.url == url)
.values(period_complete_day=period_complete_day,	.values(period_complete_day=period_complete_day,
views=views,	views=views,
department_id=department_id,	department_id=department_id,
next_page=next_page))	next_page=next_page))
else:	else:
# create the row	# create the row
values = {'period_name': period_name,	values = {'period_name': period_name,
'period_complete_day': period_complete_day,	'period_complete_day': period_complete_day,
'url': url,	'url': url,
'views': views,	'views': views,
'department_id': department_id,	'department_id': department_id,
'next_page': next_page}	'next_page': next_page}
connection.execute(stats.insert()\	connection.execute(stats.insert().
.values(**values))	values(**values))