--- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -1,1 +1,91 @@ +import re +import uuid +from sqlalchemy import Table, Column, MetaData +from sqlalchemy import types +from sqlalchemy.sql import select, text +from sqlalchemy import func + +import ckan.model as model +from ckan.model.types import JsonType +from ckan.lib.base import * + +def make_uuid(): + return unicode(uuid.uuid4()) + +def init_tables(): + metadata = MetaData() + package_stats = Table('ga_url', metadata, + Column('id', types.UnicodeText, primary_key=True, default=make_uuid), + Column('period_name', types.UnicodeText), + Column('period_complete_day', types.Integer), + Column('visits', types.Integer), + Column('group_id', types.String(60)), + Column('next_page', JsonType), + ) + metadata.create_all(model.meta.engine) + + +cached_tables = {} + +def get_table(name): + if name not in cached_tables: + meta = MetaData() + meta.reflect(bind=model.meta.engine) + table = meta.tables[name] + cached_tables[name] = table + return cached_tables[name] + + +def _normalize_url(url): + '''Strip off the hostname etc. Do this before storing it. + + >>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices') + '/dataset/weekly_fuel_prices' + ''' + url = re.sub('https?://(www\.)?data.gov.uk', '', url) + return url + +def _get_department_id_of_url(url): + # e.g. /dataset/fuel_prices + # e.g. /dataset/fuel_prices/resource/e63380d4 + dataset_match = re.match('/dataset/([^/]+)(/.*)?', url) + if dataset_match: + dataset_ref = dataset_match.groups()[0] + dataset = model.Package.get(dataset_ref) + if dataset: + publisher_groups = dataset.get_groups('publisher') + if publisher_groups: + return publisher_groups[0].id + +def update_url_stats(period_name, period_complete_day, url_data): + table = get_table('ga_url') + connection = model.Session.connection() + for url, views, next_page in url_data: + url = _normalize_url(url) + department_id = _get_department_id_of_url(url) + # see if the row for this url & month is in the table already + s = select([func.count(id_col)], + table.c.period_name == period_name, + table.c.url == url) + count = connection.execute(s).fetchone() + if count and count[0]: + # update the row + connection.execute(table.update()\ + .where(table.c.period_name == period_name, + table.c.url == url)\ + .values(period_complete_day=period_complete_day, + views=views, + department_id=department_id, + next_page=next_page)) + else: + # create the row + values = {'period_name': period_name, + 'period_complete_day': period_complete_day, + 'url': url, + 'views': views, + 'department_id': department_id, + 'next_page': next_page} + connection.execute(stats.insert()\ + .values(**values)) +