From: Ross Jones Date: Wed, 17 Oct 2012 15:29:33 +0000 Subject: Fixing up setup.py and making sure routes exist for site and publisher reports X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=70d647d2e5780f39b1d74434f5ec79d0bf748a27 --- Fixing up setup.py and making sure routes exist for site and publisher reports --- --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -6,5 +6,11 @@ class GaReport(BaseController): def index(self): - return render('index.html') + return render('ga_report/site/index.html') + +class GaPublisherReport(BaseController): + + def index(self, id): + return render('ga_report/publisher/index.html') + --- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -85,18 +85,27 @@ self.get_full_period_name(period_name, period_complete_day), start_date.strftime('%Y %m %d'), end_date.strftime('%Y %m %d')) - data = self.download(start_date, end_date) - log.info('Storing Analytics for period "%s"', + """ + data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+') + log.info('Storing Dataset Analytics for period "%s"', self.get_full_period_name(period_name, period_complete_day)) - self.store(period_name, period_complete_day, data) - - - def download(self, start_date, end_date): + self.store(period_name, period_complete_day, data, ) + + data = self.download(start_date, end_date, '~/publisher/[a-z0-9-_]+') + log.info('Storing Publisher Analytics for period "%s"', + self.get_full_period_name(period_name, period_complete_day)) + self.store(period_name, period_complete_day, data,) + """ + ga_model.update_publisher_stats(period_name) # about 30 seconds. + self.sitewide_stats( period_name ) + + + def download(self, start_date, end_date, path='~/dataset/[a-z0-9-_]+'): '''Get data from GA for a given time period''' start_date = start_date.strftime('%Y-%m-%d') end_date = end_date.strftime('%Y-%m-%d') - query = 'ga:pagePath=~/dataset/[a-z0-9-]+$' - metrics = 'ga:uniquePageviews' + query = 'ga:pagePath=%s$' % path + metrics = 'ga:uniquePageviews, ga:visitors' sort = '-ga:uniquePageviews' # Supported query params at @@ -118,11 +127,173 @@ packages = [] for entry in results.get('rows'): - (loc,size,) = entry - packages.append( ('http:/' + loc,size, '',) ) # Temporary hack + (loc,pageviews,visits) = entry + packages.append( ('http:/' + loc, pageviews, visits,) ) # Temporary hack return dict(url=packages) def store(self, period_name, period_complete_day, data): if 'url' in data: ga_model.update_url_stats(period_name, period_complete_day, data['url']) + def sitewide_stats(self, period_name): + import calendar + year, month = period_name.split('-') + _, last_day_of_month = calendar.monthrange(int(year), int(month)) + + start_date = '%s-01' % period_name + end_date = '%s-%s' % (period_name, last_day_of_month) + print 'Sitewide_stats for %s (%s -> %s)' % (period_name, start_date, end_date) + + funcs = ['_totals_stats', '_social_stats', '_os_stats', + '_locale_stats', '_browser_stats', '_mobile_stats'] + for f in funcs: + print ' + Fetching %s stats' % f.split('_')[1] + getattr(self, f)(start_date, end_date, period_name) + + def _get_results(result_data, f): + data = {} + for result in result_data: + key = f(result) + data[key] = data.get(key,0) + result[1] + return data + + def _totals_stats(self, start_date, end_date, period_name): + """ Fetches distinct totals, total pageviews etc """ + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + start_date=start_date, + metrics='ga:uniquePageviews', + sort='-ga:uniquePageviews', + max_results=10000, + end_date=end_date).execute() + result_data = results.get('rows') + ga_model.update_sitewide_stats(period_name, "Totals", {'Total pageviews': result_data[0][0]}) + + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + start_date=start_date, + metrics='ga:pageviewsPerVisit,ga:bounces,ga:avgTimeOnSite,ga:percentNewVisits', + max_results=10000, + end_date=end_date).execute() + result_data = results.get('rows') + data = { + 'Pages per visit': result_data[0][0], + 'Bounces': result_data[0][1], + 'Average time on site': result_data[0][2], + 'Percent new visits': result_data[0][3], + } + ga_model.update_sitewide_stats(period_name, "Totals", data) + + + def _locale_stats(self, start_date, end_date, period_name): + """ Fetches stats about language and country """ + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + start_date=start_date, + metrics='ga:uniquePageviews', + sort='-ga:uniquePageviews', + dimensions="ga:language,ga:country", + max_results=10000, + end_date=end_date).execute() + result_data = results.get('rows') + data = {} + for result in result_data: + data[result[0]] = data.get(result[0], 0) + int(result[2]) + ga_model.update_sitewide_stats(period_name, "Languages", data) + + data = {} + for result in result_data: + data[result[1]] = data.get(result[1], 0) + int(result[2]) + ga_model.update_sitewide_stats(period_name, "Country", data) + + + def _social_stats(self, start_date, end_date, period_name): + """ Finds out which social sites people are referred from """ + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + start_date=start_date, + metrics='ga:uniquePageviews', + sort='-ga:uniquePageviews', + dimensions="ga:socialNetwork,ga:referralPath", + max_results=10000, + end_date=end_date).execute() + result_data = results.get('rows') + twitter_links = [] + data = {} + for result in result_data: + if not result[0] == '(not set)': + data[result[0]] = data.get(result[0], 0) + int(result[2]) + if result[0] == 'Twitter': + twitter_links.append(result[1]) + ga_model.update_sitewide_stats(period_name, "Social sources", data) + + + def _os_stats(self, start_date, end_date, period_name): + """ Operating system stats """ + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + start_date=start_date, + metrics='ga:uniquePageviews', + sort='-ga:uniquePageviews', + dimensions="ga:operatingSystem,ga:operatingSystemVersion", + max_results=10000, + end_date=end_date).execute() + result_data = results.get('rows') + data = {} + for result in result_data: + data[result[0]] = data.get(result[0], 0) + int(result[2]) + ga_model.update_sitewide_stats(period_name, "Operating Systems", data) + + data = {} + for result in result_data: + key = "%s (%s)" % (result[0],result[1]) + data[key] = result[2] + ga_model.update_sitewide_stats(period_name, "Operating Systems versions", data) + + + def _browser_stats(self, start_date, end_date, period_name): + """ Information about browsers and browser versions """ + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + start_date=start_date, + metrics='ga:uniquePageviews', + sort='-ga:uniquePageviews', + dimensions="ga:browser,ga:browserVersion", + max_results=10000, + end_date=end_date).execute() + result_data = results.get('rows') + data = {} + for result in result_data: + data[result[0]] = data.get(result[0], 0) + int(result[2]) + ga_model.update_sitewide_stats(period_name, "Browsers", data) + + data = {} + for result in result_data: + key = "%s (%s)" % (result[0], result[1]) + data[key] = result[2] + ga_model.update_sitewide_stats(period_name, "Browser versions", data) + + + def _mobile_stats(self, start_date, end_date, period_name): + """ Info about mobile devices """ + + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + start_date=start_date, + metrics='ga:uniquePageviews', + sort='-ga:uniquePageviews', + dimensions="ga:mobileDeviceBranding, ga:mobileDeviceInfo", + max_results=10000, + end_date=end_date).execute() + + result_data = results.get('rows') + data = {} + for result in result_data: + data[result[0]] = data.get(result[0], 0) + int(result[2]) + ga_model.update_sitewide_stats(period_name, "Mobile brands", data) + + data = {} + for result in result_data: + data[result[1]] = data.get(result[1], 0) + int(result[2]) + ga_model.update_sitewide_stats(period_name, "Mobile devices", data) + --- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -16,6 +16,18 @@ class GA_Url(object): + + def __init__(self, **kwargs): + for k,v in kwargs.items(): + setattr(self, k, v) + +class GA_Stat(object): + + def __init__(self, **kwargs): + for k,v in kwargs.items(): + setattr(self, k, v) + +class GA_Publisher(object): def __init__(self, **kwargs): for k,v in kwargs.items(): @@ -28,12 +40,35 @@ default=make_uuid), Column('period_name', types.UnicodeText), Column('period_complete_day', types.Integer), - Column('metric', types.UnicodeText), - Column('value', types.UnicodeText), + Column('pageviews', types.UnicodeText), + Column('visitors', types.UnicodeText), Column('url', types.UnicodeText), Column('department_id', types.UnicodeText), ) mapper(GA_Url, url_table) + +stat_table = Table('ga_stat', metadata, + Column('id', types.UnicodeText, primary_key=True, + default=make_uuid), + Column('period_name', types.UnicodeText), + Column('stat_name', types.UnicodeText), + Column('key', types.UnicodeText), + Column('value', types.UnicodeText), ) +mapper(GA_Stat, stat_table) + + +pub_table = Table('ga_publisher', metadata, + Column('id', types.UnicodeText, primary_key=True, + default=make_uuid), + Column('period_name', types.UnicodeText), + Column('publisher_name', types.UnicodeText), + Column('views', types.UnicodeText), + Column('visitors', types.UnicodeText), + Column('toplevel', types.Boolean, default=False), + Column('subpublishercount', types.Integer, default=0), + Column('parent', types.UnicodeText), +) +mapper(GA_Publisher, pub_table) def init_tables(): @@ -73,21 +108,49 @@ publisher_groups = dataset.get_groups('publisher') if publisher_groups: return publisher_groups[0].name + else: + publisher_match = re.match('/publisher/([^/]+)(/.*)?', url) + if publisher_match: + return publisher_match.groups()[0] + + +def update_sitewide_stats(period_name, stat_name, data): + for k,v in data.iteritems(): + item = model.Session.query(GA_Stat).\ + filter(GA_Stat.period_name==period_name).\ + filter(GA_Stat.key==k).\ + filter(GA_Stat.stat_name==stat_name).first() + if item: + item.period_name = period_name + item.key = k + item.value = v + model.Session.add(item) + else: + # create the row + values = {'id': make_uuid(), + 'period_name': period_name, + 'key': k, + 'value': v, + 'stat_name': stat_name + } + model.Session.add(GA_Stat(**values)) + model.Session.commit() + def update_url_stats(period_name, period_complete_day, url_data): - for url, views, next_page in url_data: + for url, views, visitors in url_data: url = _normalize_url(url) department_id = _get_department_id_of_url(url) # see if the row for this url & month is in the table already item = model.Session.query(GA_Url).\ filter(GA_Url.period_name==period_name).\ - filter(GA_Url.url==url).\ - filter(GA_Url.metric == 'Total views').first() + filter(GA_Url.url==url).first() if item: - item.period_name = period_complete_day = period_complete_day - item.value = views + item.period_name = period_name + item.pageviews = views + item.visitors = visitors item.department_id = department_id model.Session.add(item) else: @@ -96,10 +159,92 @@ 'period_name': period_name, 'period_complete_day': period_complete_day, 'url': url, - 'value': views, - 'metric': 'Total views', + 'pageviews': views, + 'visitors': visitors, 'department_id': department_id } model.Session.add(GA_Url(**values)) model.Session.commit() + + +def update_publisher_stats(period_name): + """ + Updates the publisher stats from the data retrieved for /dataset/* + and /publisher/*. Will run against each dataset and generates the + totals for the entire tree beneath each publisher. + """ + toplevel = get_top_level() + publishers = model.Session.query(model.Group).\ + filter(model.Group.type=='publisher').\ + filter(model.Group.state=='active').all() + for publisher in publishers: + views, visitors, subpub = update_publisher(period_name, publisher, publisher.name) + parent, parents = '', publisher.get_groups('publisher') + if parents: + parent = parents[0].name + item = model.Session.query(GA_Publisher).\ + filter(GA_Publisher.period_name==period_name).\ + filter(GA_Publisher.publisher_name==publisher.name).first() + if item: + item.views = views + item.visitors = visitors + item.publisher_name = publisher.name + item.toplevel = publisher in toplevel + item.subpublishercount = subpub + item.parent = parent + model.Session.add(item) + else: + # create the row + values = {'id': make_uuid(), + 'period_name': period_name, + 'publisher_name': publisher.name, + 'views': views, + 'visitors': visitors, + 'toplevel': publisher in toplevel, + 'subpublishercount': subpub, + 'parent': parent + } + model.Session.add(GA_Publisher(**values)) + model.Session.commit() + + +def update_publisher(period_name, pub, part=''): + views,visitors,subpub = 0, 0, 0 + for publisher in go_down_tree(pub): + subpub = subpub + 1 + items = model.Session.query(GA_Url).\ + filter(GA_Url.period_name==period_name).\ + filter(GA_Url.department_id==publisher.name).all() + for item in items: + views = views + int(item.pageviews) + visitors = visitors + int(item.visitors) + + return views, visitors, (subpub-1) + + +def get_top_level(): + '''Returns the top level publishers.''' + return model.Session.query(model.Group).\ + outerjoin(model.Member, model.Member.table_id == model.Group.id and \ + model.Member.table_name == 'group' and \ + model.Member.state == 'active').\ + filter(model.Member.id==None).\ + filter(model.Group.type=='publisher').\ + order_by(model.Group.name).all() + +def get_children(publisher): + '''Finds child publishers for the given publisher (object). (Not recursive)''' + from ckan.model.group import HIERARCHY_CTE + return model.Session.query(model.Group).\ + from_statement(HIERARCHY_CTE).params(id=publisher.id, type='publisher').\ + all() + +def go_down_tree(publisher): + '''Provided with a publisher object, it walks down the hierarchy and yields each publisher, + including the one you supply.''' + yield publisher + for child in get_children(publisher): + for grandchild in go_down_tree(child): + yield grandchild + --- a/ckanext/ga_report/plugin.py +++ b/ckanext/ga_report/plugin.py @@ -1,13 +1,14 @@ import logging import ckan.lib.helpers as h +import ckan.plugins as p from ckan.plugins import implements, toolkit -import gasnippet -import commands -import dbutil +#import gasnippet +#import commands +#import dbutil log = logging.getLogger('ckanext.ga-report') -class GoogleAnalyticsPlugin(p.SingletonPlugin): +class GAReportPlugin(p.SingletonPlugin): implements(p.IConfigurer, inherit=True) implements(p.IRoutes, inherit=True) @@ -17,8 +18,13 @@ def after_map(self, map): map.connect( - '/data/analytics/index', - controller='ckanext.ga-report.controller:GaReport', + '/data/analytics/usage', + controller='ckanext.ga_report.controller:GaReport', + action='index' + ) + map.connect( + '/data/analytics/publisher/{id}', + controller='ckanext.ga_report.controller:GaPublisherReport', action='index' ) return map --- /dev/null +++ b/ckanext/ga_report/templates/ga_report/publisher/index.html @@ -1,1 +1,1 @@ - +HAI --- /dev/null +++ b/ckanext/ga_report/templates/ga_report/site/index.html @@ -1,1 +1,1 @@ - +HAI Site --- a/ckanext/ga_report/tests/test_api.py +++ b/ckanext/ga_report/tests/test_api.py @@ -36,16 +36,3 @@ except Exception as e: assert False, e -""" - downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc)) - - time_period = self.args[1] if self.args and len(self.args) > 1 \ - else 'latest' - if time_period == 'all': - downloader.all_() - elif time_period == 'latest': - downloader.latest() - else: - since_date = datetime.datetime.strptime(time_period, '%Y-%m-%d') - downloader.since_date(since_date) -""" --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ """ [ckan.plugins] # Add plugins here, eg - ga-report=ckanext.ga_report.plugin:GaReportPlugin + ga-report=ckanext.ga_report.plugin:GAReportPlugin [paste.paster_command] loadanalytics = ckanext.ga_report.command:LoadAnalytics