From: Ross Jones Date: Wed, 17 Oct 2012 14:22:52 +0000 Subject: Fixed the publishr information (to enable leaderboard) to show the number X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=7dad03298ce0c10bf85ebbbbf638f9af04726629 --- Fixed the publishr information (to enable leaderboard) to show the number of children publishers and the total for all sub-publishers --- --- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -85,7 +85,7 @@ self.get_full_period_name(period_name, period_complete_day), start_date.strftime('%Y %m %d'), end_date.strftime('%Y %m %d')) - + """ data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+') log.info('Storing Dataset Analytics for period "%s"', self.get_full_period_name(period_name, period_complete_day)) @@ -95,8 +95,8 @@ log.info('Storing Publisher Analytics for period "%s"', self.get_full_period_name(period_name, period_complete_day)) self.store(period_name, period_complete_day, data,) - ga_model.update_publisher_stats(period_name) - + """ + ga_model.update_publisher_stats(period_name) # about 30 seconds. self.sitewide_stats( period_name ) @@ -105,7 +105,7 @@ start_date = start_date.strftime('%Y-%m-%d') end_date = end_date.strftime('%Y-%m-%d') query = 'ga:pagePath=%s$' % path - metrics = 'ga:uniquePageviews, ga:visits' + metrics = 'ga:uniquePageviews, ga:visitors' sort = '-ga:uniquePageviews' # Supported query params at @@ -135,7 +135,6 @@ if 'url' in data: ga_model.update_url_stats(period_name, period_complete_day, data['url']) - def sitewide_stats(self, period_name): import calendar year, month = period_name.split('-') @@ -151,6 +150,12 @@ print ' + Fetching %s stats' % f.split('_')[1] getattr(self, f)(start_date, end_date, period_name) + def _get_results(result_data, f): + data = {} + for result in result_data: + key = f(result) + data[key] = data.get(key,0) + result[1] + return data def _totals_stats(self, start_date, end_date, period_name): """ Fetches distinct totals, total pageviews etc """ @@ -264,7 +269,7 @@ data = {} for result in result_data: - key = "%s (%s)" % (result[0],result[1]) + key = "%s (%s)" % (result[0], result[1]) data[key] = result[2] ga_model.update_sitewide_stats(period_name, "Browser versions", data) --- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -41,7 +41,7 @@ Column('period_name', types.UnicodeText), Column('period_complete_day', types.Integer), Column('pageviews', types.UnicodeText), - Column('visits', types.UnicodeText), + Column('visitors', types.UnicodeText), Column('url', types.UnicodeText), Column('department_id', types.UnicodeText), ) @@ -63,7 +63,10 @@ Column('period_name', types.UnicodeText), Column('publisher_name', types.UnicodeText), Column('views', types.UnicodeText), - Column('visits', types.UnicodeText), + Column('visitors', types.UnicodeText), + Column('toplevel', types.Boolean, default=False), + Column('subpublishercount', types.Integer, default=0), + Column('parent', types.UnicodeText), ) mapper(GA_Publisher, pub_table) @@ -136,7 +139,7 @@ def update_url_stats(period_name, period_complete_day, url_data): - for url, views, visits in url_data: + for url, views, visitors in url_data: url = _normalize_url(url) department_id = _get_department_id_of_url(url) @@ -147,7 +150,7 @@ if item: item.period_name = period_name item.pageviews = views - item.visits = visits + item.visitors = visitors item.department_id = department_id model.Session.add(item) else: @@ -157,7 +160,7 @@ 'period_complete_day': period_complete_day, 'url': url, 'pageviews': views, - 'visits': visits, + 'visitors': visitors, 'department_id': department_id } model.Session.add(GA_Url(**values)) @@ -166,16 +169,30 @@ def update_publisher_stats(period_name): - publishers = get_top_level() + """ + Updates the publisher stats from the data retrieved for /dataset/* + and /publisher/*. Will run against each dataset and generates the + totals for the entire tree beneath each publisher. + """ + toplevel = get_top_level() + publishers = model.Session.query(model.Group).\ + filter(model.Group.type=='publisher').\ + filter(model.Group.state=='active').all() for publisher in publishers: - views, visits = update_publisher(period_name, publisher, publisher.name) + views, visitors, subpub = update_publisher(period_name, publisher, publisher.name) + parent, parents = '', publisher.get_groups('publisher') + if parents: + parent = parents[0].name item = model.Session.query(GA_Publisher).\ filter(GA_Publisher.period_name==period_name).\ filter(GA_Publisher.publisher_name==publisher.name).first() if item: item.views = views - item.visits = visits + item.visitors = visitors item.publisher_name = publisher.name + item.toplevel = publisher in toplevel + item.subpublishercount = subpub + item.parent = parent model.Session.add(item) else: # create the row @@ -183,23 +200,27 @@ 'period_name': period_name, 'publisher_name': publisher.name, 'views': views, - 'visits': visits, + 'visitors': visitors, + 'toplevel': publisher in toplevel, + 'subpublishercount': subpub, + 'parent': parent } model.Session.add(GA_Publisher(**values)) model.Session.commit() def update_publisher(period_name, pub, part=''): - views,visits = 0, 0 + views,visitors,subpub = 0, 0, 0 for publisher in go_down_tree(pub): - f = model.Session.query(GA_Url).\ + subpub = subpub + 1 + items = model.Session.query(GA_Url).\ filter(GA_Url.period_name==period_name).\ - filter(GA_Url.url=='/publisher/' + publisher.name).first() - if f: - views = views + int(f.pageviews) - visits = visits + int(f.visits) - - return views, visits + filter(GA_Url.department_id==publisher.name).all() + for item in items: + views = views + int(item.pageviews) + visitors = visitors + int(item.visitors) + + return views, visitors, (subpub-1) def get_top_level():