--- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -13,6 +13,8 @@ def make_uuid(): return unicode(uuid.uuid4()) +metadata = MetaData() + class GA_Url(object): @@ -21,31 +23,24 @@ for k,v in kwargs.items(): setattr(self, k, v) -class GA_Stat(object): - - def __init__(self, **kwargs): - for k,v in kwargs.items(): - setattr(self, k, v) - -class GA_Publisher(object): - - def __init__(self, **kwargs): - for k,v in kwargs.items(): - setattr(self, k, v) - - -metadata = MetaData() url_table = Table('ga_url', metadata, Column('id', types.UnicodeText, primary_key=True, default=make_uuid), Column('period_name', types.UnicodeText), Column('period_complete_day', types.Integer), Column('pageviews', types.UnicodeText), - Column('visits', types.UnicodeText), + Column('visitors', types.UnicodeText), Column('url', types.UnicodeText), Column('department_id', types.UnicodeText), ) mapper(GA_Url, url_table) + + +class GA_Stat(object): + + def __init__(self, **kwargs): + for k,v in kwargs.items(): + setattr(self, k, v) stat_table = Table('ga_stat', metadata, Column('id', types.UnicodeText, primary_key=True, @@ -57,15 +52,42 @@ mapper(GA_Stat, stat_table) +class GA_Publisher(object): + + def __init__(self, **kwargs): + for k,v in kwargs.items(): + setattr(self, k, v) + pub_table = Table('ga_publisher', metadata, Column('id', types.UnicodeText, primary_key=True, default=make_uuid), Column('period_name', types.UnicodeText), Column('publisher_name', types.UnicodeText), Column('views', types.UnicodeText), - Column('visits', types.UnicodeText), + Column('visitors', types.UnicodeText), + Column('toplevel', types.Boolean, default=False), + Column('subpublishercount', types.Integer, default=0), + Column('parent', types.UnicodeText), ) mapper(GA_Publisher, pub_table) + + +class GA_ReferralStat(object): + + def __init__(self, **kwargs): + for k,v in kwargs.items(): + setattr(self, k, v) + +referrer_table = Table('ga_referrer', metadata, + Column('id', types.UnicodeText, primary_key=True, + default=make_uuid), + Column('period_name', types.UnicodeText), + Column('source', types.UnicodeText), + Column('url', types.UnicodeText), + Column('count', types.Integer), + ) +mapper(GA_ReferralStat, referrer_table) + def init_tables(): @@ -90,8 +112,9 @@ >>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices') '/dataset/weekly_fuel_prices' ''' - url = re.sub('https?://(www\.)?data.gov.uk', '', url) - return url + # Deliberately leaving a / + url = url.replace('http:/','') + return '/' + '/'.join(url.split('/')[2:]) def _get_department_id_of_url(url): @@ -136,7 +159,7 @@ def update_url_stats(period_name, period_complete_day, url_data): - for url, views, visits in url_data: + for url, views, visitors in url_data: url = _normalize_url(url) department_id = _get_department_id_of_url(url) @@ -147,7 +170,7 @@ if item: item.period_name = period_name item.pageviews = views - item.visits = visits + item.visitors = visitors item.department_id = department_id model.Session.add(item) else: @@ -157,25 +180,66 @@ 'period_complete_day': period_complete_day, 'url': url, 'pageviews': views, - 'visits': visits, + 'visitors': visitors, 'department_id': department_id } model.Session.add(GA_Url(**values)) model.Session.commit() +def update_social(period_name, data): + # Clean up first. + model.Session.query(GA_ReferralStat).\ + filter(GA_ReferralStat.period_name==period_name).delete() + + for url,data in data.iteritems(): + for entry in data: + source = entry[0] + count = entry[1] + + item = model.Session.query(GA_ReferralStat).\ + filter(GA_ReferralStat.period_name==period_name).\ + filter(GA_ReferralStat.source==source).\ + filter(GA_ReferralStat.url==url).first() + if item: + item.count = item.count + count + model.Session.add(item) + else: + # create the row + values = {'id': make_uuid(), + 'period_name': period_name, + 'source': source, + 'url': url, + 'count': count, + } + model.Session.add(GA_ReferralStat(**values)) + model.Session.commit() def update_publisher_stats(period_name): - publishers = get_top_level() + """ + Updates the publisher stats from the data retrieved for /dataset/* + and /publisher/*. Will run against each dataset and generates the + totals for the entire tree beneath each publisher. + """ + toplevel = get_top_level() + publishers = model.Session.query(model.Group).\ + filter(model.Group.type=='publisher').\ + filter(model.Group.state=='active').all() for publisher in publishers: - views, visits = update_publisher(period_name, publisher, publisher.name) + views, visitors, subpub = update_publisher(period_name, publisher, publisher.name) + parent, parents = '', publisher.get_groups('publisher') + if parents: + parent = parents[0].name item = model.Session.query(GA_Publisher).\ filter(GA_Publisher.period_name==period_name).\ filter(GA_Publisher.publisher_name==publisher.name).first() if item: item.views = views - item.visits = visits + item.visitors = visitors item.publisher_name = publisher.name + item.toplevel = publisher in toplevel + item.subpublishercount = subpub + item.parent = parent model.Session.add(item) else: # create the row @@ -183,23 +247,27 @@ 'period_name': period_name, 'publisher_name': publisher.name, 'views': views, - 'visits': visits, + 'visitors': visitors, + 'toplevel': publisher in toplevel, + 'subpublishercount': subpub, + 'parent': parent } model.Session.add(GA_Publisher(**values)) model.Session.commit() def update_publisher(period_name, pub, part=''): - views,visits = 0, 0 + views,visitors,subpub = 0, 0, 0 for publisher in go_down_tree(pub): - f = model.Session.query(GA_Url).\ + subpub = subpub + 1 + items = model.Session.query(GA_Url).\ filter(GA_Url.period_name==period_name).\ - filter(GA_Url.url=='/publisher/' + publisher.name).first() - if f: - views = views + int(f.pageviews) - visits = visits + int(f.visits) - - return views, visits + filter(GA_Url.department_id==publisher.name).all() + for item in items: + views = views + int(item.pageviews) + visitors = visitors + int(item.visitors) + + return views, visitors, (subpub-1) def get_top_level():