--- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -1,10 +1,10 @@ import re import uuid -from sqlalchemy import Table, Column, MetaData +from sqlalchemy import Table, Column, MetaData, ForeignKey from sqlalchemy import types from sqlalchemy.sql import select -from sqlalchemy.orm import mapper +from sqlalchemy.orm import mapper, relation from sqlalchemy import func import ckan.model as model @@ -14,8 +14,6 @@ return unicode(uuid.uuid4()) metadata = MetaData() - - class GA_Url(object): @@ -32,6 +30,7 @@ Column('visitors', types.UnicodeText), Column('url', types.UnicodeText), Column('department_id', types.UnicodeText), + Column('package_id', types.UnicodeText), ) mapper(GA_Url, url_table) @@ -112,12 +111,10 @@ >>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices') '/dataset/weekly_fuel_prices' ''' - # Deliberately leaving a / - url = url.replace('http:/','') - return '/' + '/'.join(url.split('/')[2:]) - - -def _get_department_id_of_url(url): + return '/' + '/'.join(url.split('/')[3:]) + + +def _get_package_and_publisher(url): # e.g. /dataset/fuel_prices # e.g. /dataset/fuel_prices/resource/e63380d4 dataset_match = re.match('/dataset/([^/]+)(/.*)?', url) @@ -127,12 +124,13 @@ if dataset: publisher_groups = dataset.get_groups('publisher') if publisher_groups: - return publisher_groups[0].name + return dataset_ref,publisher_groups[0].name + return dataset_ref, None else: publisher_match = re.match('/publisher/([^/]+)(/.*)?', url) if publisher_match: - return publisher_match.groups()[0] - + return None, publisher_match.groups()[0] + return None, None def update_sitewide_stats(period_name, stat_name, data): for k,v in data.iteritems(): @@ -157,34 +155,91 @@ model.Session.commit() +def update_url_stat_totals(period_name): + + """ + items = model.Session.query(GA_Url).\ + filter(GA_Url.period_name != "All").\ + filter(GA_Url.url==url).all() + values = {'id': make_uuid(), + 'period_name': "All", + 'period_complete_day': "0", + 'url': url, + 'pageviews': sum([int(x.pageviews) for x in items]), + 'visitors': sum([int(x.visitors) for x in items]), + 'department_id': department_id, + 'package_id': package + } + model.Session.add(GA_Url(**values)) + model.Session.commit() + """ + +def pre_update_url_stats(period_name): + model.Session.query(GA_Url).\ + filter(GA_Url.period_name==period_name).delete() + model.Session.query(GA_Url).\ + filter(GA_Url.period_name=='All').delete() + def update_url_stats(period_name, period_complete_day, url_data): + ''' + Given a list of urls and number of hits for each during a given period, + stores them in GA_Url under the period and recalculates the totals for + the 'All' period. + ''' for url, views, visitors in url_data: - url = _normalize_url(url) - department_id = _get_department_id_of_url(url) - - # see if the row for this url & month is in the table already + package, publisher = _get_package_and_publisher(url) + + item = model.Session.query(GA_Url).\ filter(GA_Url.period_name==period_name).\ filter(GA_Url.url==url).first() if item: - item.period_name = period_name - item.pageviews = views - item.visitors = visitors - item.department_id = department_id + item.pageviews = item.pageviews + views + item.visitors = item.visitors + visitors + if not item.package_id: + item.package_id = package + if not item.department_id: + item.department_id = publisher model.Session.add(item) else: - # create the row values = {'id': make_uuid(), 'period_name': period_name, 'period_complete_day': period_complete_day, 'url': url, 'pageviews': views, 'visitors': visitors, - 'department_id': department_id + 'department_id': publisher, + 'package_id': package } model.Session.add(GA_Url(**values)) model.Session.commit() + + if package: + old_pageviews, old_visits = 0, 0 + old = model.Session.query(GA_Url).\ + filter(GA_Url.period_name=='All').\ + filter(GA_Url.url==url).all() + old_pageviews = sum([int(o.pageviews) for o in old]) + old_visits = sum([int(o.visitors) for o in old]) + + entries = model.Session.query(GA_Url).\ + filter(GA_Url.period_name!='All').\ + filter(GA_Url.url==url).all() + values = {'id': make_uuid(), + 'period_name': 'All', + 'period_complete_day': 0, + 'url': url, + 'pageviews': sum([int(e.pageviews) for e in entries]) + old_pageviews, + 'visitors': sum([int(e.visitors) for e in entries]) + old_visits, + 'department_id': publisher, + 'package_id': package + } + + model.Session.add(GA_Url(**values)) + model.Session.commit() + + def update_social(period_name, data):