--- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -1,10 +1,10 @@ import re import uuid -from sqlalchemy import Table, Column, MetaData +from sqlalchemy import Table, Column, MetaData, ForeignKey from sqlalchemy import types from sqlalchemy.sql import select -from sqlalchemy.orm import mapper +from sqlalchemy.orm import mapper, relation from sqlalchemy import func import ckan.model as model @@ -13,7 +13,7 @@ def make_uuid(): return unicode(uuid.uuid4()) - +metadata = MetaData() class GA_Url(object): @@ -21,20 +21,6 @@ for k,v in kwargs.items(): setattr(self, k, v) -class GA_Stat(object): - - def __init__(self, **kwargs): - for k,v in kwargs.items(): - setattr(self, k, v) - -class GA_Publisher(object): - - def __init__(self, **kwargs): - for k,v in kwargs.items(): - setattr(self, k, v) - - -metadata = MetaData() url_table = Table('ga_url', metadata, Column('id', types.UnicodeText, primary_key=True, default=make_uuid), @@ -44,8 +30,16 @@ Column('visitors', types.UnicodeText), Column('url', types.UnicodeText), Column('department_id', types.UnicodeText), + Column('package_id', types.UnicodeText), ) mapper(GA_Url, url_table) + + +class GA_Stat(object): + + def __init__(self, **kwargs): + for k,v in kwargs.items(): + setattr(self, k, v) stat_table = Table('ga_stat', metadata, Column('id', types.UnicodeText, primary_key=True, @@ -56,6 +50,12 @@ Column('value', types.UnicodeText), ) mapper(GA_Stat, stat_table) + +class GA_Publisher(object): + + def __init__(self, **kwargs): + for k,v in kwargs.items(): + setattr(self, k, v) pub_table = Table('ga_publisher', metadata, Column('id', types.UnicodeText, primary_key=True, @@ -71,6 +71,24 @@ mapper(GA_Publisher, pub_table) +class GA_ReferralStat(object): + + def __init__(self, **kwargs): + for k,v in kwargs.items(): + setattr(self, k, v) + +referrer_table = Table('ga_referrer', metadata, + Column('id', types.UnicodeText, primary_key=True, + default=make_uuid), + Column('period_name', types.UnicodeText), + Column('source', types.UnicodeText), + Column('url', types.UnicodeText), + Column('count', types.Integer), + ) +mapper(GA_ReferralStat, referrer_table) + + + def init_tables(): metadata.create_all(model.meta.engine) @@ -93,8 +111,9 @@ >>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices') '/dataset/weekly_fuel_prices' ''' - url = re.sub('https?://(www\.)?data.gov.uk', '', url) - return url + # Deliberately leaving a / + url = url.replace('http:/','') + return '/' + '/'.join(url.split('/')[2:]) def _get_department_id_of_url(url): @@ -137,36 +156,100 @@ model.Session.commit() +def update_url_stat_totals(period_name): + + """ + items = model.Session.query(GA_Url).\ + filter(GA_Url.period_name != "All").\ + filter(GA_Url.url==url).all() + values = {'id': make_uuid(), + 'period_name': "All", + 'period_complete_day': "0", + 'url': url, + 'pageviews': sum([int(x.pageviews) for x in items]), + 'visitors': sum([int(x.visitors) for x in items]), + 'department_id': department_id, + 'package_id': package + } + model.Session.add(GA_Url(**values)) + model.Session.commit() + """ + +def pre_update_url_stats(period_name): + model.Session.query(GA_Url).\ + filter(GA_Url.period_name==period_name).delete() + model.Session.query(GA_Url).\ + filter(GA_Url.period_name=='All').delete() + def update_url_stats(period_name, period_complete_day, url_data): + for url, views, visitors in url_data: - url = _normalize_url(url) department_id = _get_department_id_of_url(url) - # see if the row for this url & month is in the table already - item = model.Session.query(GA_Url).\ - filter(GA_Url.period_name==period_name).\ - filter(GA_Url.url==url).first() - if item: - item.period_name = period_name - item.pageviews = views - item.visitors = visitors - item.department_id = department_id - model.Session.add(item) - else: - # create the row + package = None + if url.startswith('/dataset/'): + package = url[len('/dataset/'):] + + values = {'id': make_uuid(), + 'period_name': period_name, + 'period_complete_day': period_complete_day, + 'url': url, + 'pageviews': views, + 'visitors': visitors, + 'department_id': department_id, + 'package_id': package + } + model.Session.add(GA_Url(**values)) + model.Session.commit() + + if package: + entries = model.Session.query(GA_Url).\ + filter(GA_Url.period_name!='All').\ + filter(GA_Url.url==url).all() values = {'id': make_uuid(), - 'period_name': period_name, - 'period_complete_day': period_complete_day, + 'period_name': 'All', + 'period_complete_day': 0, 'url': url, - 'pageviews': views, - 'visitors': visitors, - 'department_id': department_id + 'pageviews': sum([int(e.pageviews) for e in entries]), + 'visitors': sum([int(e.visitors) for e in entries]), + 'department_id': department_id, + 'package_id': package } model.Session.add(GA_Url(**values)) - model.Session.commit() - - + model.Session.commit() + + + + + +def update_social(period_name, data): + # Clean up first. + model.Session.query(GA_ReferralStat).\ + filter(GA_ReferralStat.period_name==period_name).delete() + + for url,data in data.iteritems(): + for entry in data: + source = entry[0] + count = entry[1] + + item = model.Session.query(GA_ReferralStat).\ + filter(GA_ReferralStat.period_name==period_name).\ + filter(GA_ReferralStat.source==source).\ + filter(GA_ReferralStat.url==url).first() + if item: + item.count = item.count + count + model.Session.add(item) + else: + # create the row + values = {'id': make_uuid(), + 'period_name': period_name, + 'source': source, + 'url': url, + 'count': count, + } + model.Session.add(GA_ReferralStat(**values)) + model.Session.commit() def update_publisher_stats(period_name): """ @@ -248,3 +331,15 @@ for grandchild in go_down_tree(child): yield grandchild +def delete(period_name): + ''' + Deletes table data for the specified period, or specify 'all' + for all periods. + ''' + for object_type in (GA_Url, GA_Stat, GA_Publisher, GA_ReferralStat): + q = model.Session.query(object_type) + if period_name != 'all': + q = q.filter_by(period_name=period_name) + q.delete() + model.Session.commit() +