From: David Read Date: Wed, 07 Nov 2012 13:38:29 +0000 Subject: Tidy logging. X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=753e746cceb78d2cb57df91505b36e76fa4ad38e --- Tidy logging. --- --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -9,7 +9,7 @@ import sqlalchemy from sqlalchemy import func, cast, Integer import ckan.model as model -from ga_model import GA_Url, GA_Stat, GA_ReferralStat, GA_Publisher +from ga_model import GA_Url, GA_Stat, GA_ReferralStat log = logging.getLogger('ckanext.ga-report') @@ -229,6 +229,7 @@ c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) c.top_publishers = _get_top_publishers() + return render('ga_report/publisher/index.html') def _get_packages(self, publisher=None, count=-1): @@ -246,6 +247,7 @@ q = q.filter(GA_Url.period_name==month) q = q.order_by('ga_url.visitors::int desc') top_packages = [] + for entry,package in q.limit(count): if package: top_packages.append((package, entry.pageviews, entry.visitors)) @@ -287,7 +289,7 @@ else: c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) - month = c.month or 'All' + month = c.mnth or 'All' c.publisher_page_views = 0 q = model.Session.query(GA_Url).\ filter(GA_Url.url=='/publisher/%s' % c.publisher_name) @@ -303,26 +305,33 @@ Returns a list of the top 20 publishers by dataset visits. (The number to show can be varied with 'limit') ''' - month = c.month or 'All' connection = model.Session.connection() q = """ select department_id, sum(pageviews::int) views, sum(visitors::int) visits from ga_url - where department_id <> '' - and period_name=%s - group by department_id order by visits desc + where department_id <> ''""" + if c.month: + q = q + """ + and period_name=%s + """ + q = q + """ + group by department_id order by visits desc """ if limit: q = q + " limit %s;" % (limit) + # Add this back (before and period_name =%s) if you want to ignore publisher + # homepage views + # and not url like '/publisher/%%' + top_publishers = [] - res = connection.execute(q, month) + res = connection.execute(q, c.month) + for row in res: g = model.Group.get(row[0]) if g: top_publishers.append((g, row[1], row[2])) return top_publishers - def _get_publishers(): ''' --- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -3,7 +3,7 @@ import datetime import collections from pylons import config -from ga_model import _normalize_url + import ga_model #from ga_client import GA @@ -92,35 +92,35 @@ def download_and_store(self, periods): for period_name, period_complete_day, start_date, end_date in periods: + log.info('Period "%s" (%s - %s)', + self.get_full_period_name(period_name, period_complete_day), + start_date.strftime('%Y-%m-%d'), + end_date.strftime('%Y-%m-%d')) + if self.delete_first: - log.info('Deleting existing Analytics for period "%s"', + log.info('Deleting existing Analytics for this period "%s"', period_name) ga_model.delete(period_name) - log.info('Downloading Analytics for period "%s" (%s - %s)', - self.get_full_period_name(period_name, period_complete_day), - start_date.strftime('%Y %m %d'), - end_date.strftime('%Y %m %d')) - - # Clean up the entries before we run this - ga_model.pre_update_url_stats(period_name) - - accountName = config.get('googleanalytics.account') - - data = self.download(start_date, end_date, '~/%s/dataset/[a-z0-9-_]+' % accountName) - log.info('Storing Dataset Analytics for period "%s"', - self.get_full_period_name(period_name, period_complete_day)) + + log.info('Downloading analytics for dataset views') + data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+') + + log.info('Storing dataset views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data, ) - data = self.download(start_date, end_date, '~/%s/publisher/[a-z0-9-_]+' % accountName) - log.info('Storing Publisher Analytics for period "%s"', - self.get_full_period_name(period_name, period_complete_day)) + log.info('Downloading analytics for publisher views') + data = self.download(start_date, end_date, '~/publisher/[a-z0-9-_]+') + log.info('Storing publisher views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data,) + log.info('Aggregating datasets by publisher') ga_model.update_publisher_stats(period_name) # about 30 seconds. + + log.info('Downloading and storing analytics for site-wide stats') self.sitewide_stats( period_name ) + log.info('Downloading and storing analytics for social networks') self.update_social_info(period_name, start_date, end_date) - def update_social_info(self, period_name, start_date, end_date): start_date = start_date.strftime('%Y-%m-%d') @@ -143,11 +143,12 @@ data = collections.defaultdict(list) rows = results.get('rows',[]) for row in rows: + from ga_model import _normalize_url data[_normalize_url(row[0])].append( (row[1], int(row[2]),) ) ga_model.update_social(period_name, data) - def download(self, start_date, end_date, path=None): + def download(self, start_date, end_date, path='~/dataset/[a-z0-9-_]+'): '''Get data from GA for a given time period''' start_date = start_date.strftime('%Y-%m-%d') end_date = end_date.strftime('%Y-%m-%d') @@ -170,10 +171,7 @@ packages = [] for entry in results.get('rows'): (loc,pageviews,visits) = entry - url = _normalize_url('http:/' + loc) - if not url.startswith('/dataset/') and not url.startswith('/publisher/'): - continue - packages.append( (url, pageviews, visits,) ) # Temporary hack + packages.append( ('http:/' + loc, pageviews, visits,) ) # Temporary hack return dict(url=packages) def store(self, period_name, period_complete_day, data): @@ -187,12 +185,10 @@ start_date = '%s-01' % period_name end_date = '%s-%s' % (period_name, last_day_of_month) - print 'Sitewide_stats for %s (%s -> %s)' % (period_name, start_date, end_date) - funcs = ['_totals_stats', '_social_stats', '_os_stats', '_locale_stats', '_browser_stats', '_mobile_stats'] for f in funcs: - print ' + Fetching %s stats' % f.split('_')[1] + log.info('Downloading analytics for %s' % f.split('_')[1]) getattr(self, f)(start_date, end_date, period_name) def _get_results(result_data, f): --- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -156,9 +156,46 @@ model.Session.commit() -def update_url_stat_totals(period_name): - - """ + +def update_url_stats(period_name, period_complete_day, url_data): + for url, views, visitors in url_data: + url = _normalize_url(url) + department_id = _get_department_id_of_url(url) + + package = None + if url.startswith('/dataset/'): + package = url[len('/dataset/'):] + + # see if the row for this url & month is in the table already + item = model.Session.query(GA_Url).\ + filter(GA_Url.period_name==period_name).\ + filter(GA_Url.url==url).first() + if item: + item.period_name = period_name + item.pageviews = views + item.visitors = visitors + item.department_id = department_id + item.package_id = package + model.Session.add(item) + else: + # create the row + values = {'id': make_uuid(), + 'period_name': period_name, + 'period_complete_day': period_complete_day, + 'url': url, + 'pageviews': views, + 'visitors': visitors, + 'department_id': department_id, + 'package_id': package + } + model.Session.add(GA_Url(**values)) + + # We now need to recaculate the ALL time_period from the data we have + # Delete the old 'All' + old = model.Session.query(GA_Url).\ + filter(GA_Url.period_name == "All").\ + filter(GA_Url.url==url).delete() + items = model.Session.query(GA_Url).\ filter(GA_Url.period_name != "All").\ filter(GA_Url.url==url).all() @@ -172,55 +209,8 @@ 'package_id': package } model.Session.add(GA_Url(**values)) + model.Session.commit() - """ - -def pre_update_url_stats(period_name): - model.Session.query(GA_Url).\ - filter(GA_Url.period_name==period_name).delete() - model.Session.query(GA_Url).\ - filter(GA_Url.period_name=='All').delete() - - -def update_url_stats(period_name, period_complete_day, url_data): - - for url, views, visitors in url_data: - department_id = _get_department_id_of_url(url) - - package = None - if url.startswith('/dataset/'): - package = url[len('/dataset/'):] - - values = {'id': make_uuid(), - 'period_name': period_name, - 'period_complete_day': period_complete_day, - 'url': url, - 'pageviews': views, - 'visitors': visitors, - 'department_id': department_id, - 'package_id': package - } - model.Session.add(GA_Url(**values)) - model.Session.commit() - - if package: - entries = model.Session.query(GA_Url).\ - filter(GA_Url.period_name!='All').\ - filter(GA_Url.url==url).all() - values = {'id': make_uuid(), - 'period_name': 'All', - 'period_complete_day': 0, - 'url': url, - 'pageviews': sum([int(e.pageviews) for e in entries]), - 'visitors': sum([int(e.visitors) for e in entries]), - 'department_id': department_id, - 'package_id': package - } - model.Session.add(GA_Url(**values)) - model.Session.commit() - - - def update_social(period_name, data): --- a/ckanext/ga_report/templates/ga_report/notes.html +++ b/ckanext/ga_report/templates/ga_report/notes.html @@ -6,11 +6,11 @@
  • Notes

  • --- a/ckanext/ga_report/templates/ga_report/publisher/index.html +++ b/ckanext/ga_report/templates/ga_report/publisher/index.html @@ -41,15 +41,15 @@ + - + -
    PublisherDataset Views Dataset VisitsDataset Views
    ${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher', id=publisher.name))} ${views} ${visits}${views}
    --- a/ckanext/ga_report/templates/ga_report/publisher/read.html +++ b/ckanext/ga_report/templates/ga_report/publisher/read.html @@ -47,15 +47,15 @@ + - + -
    DatasetViews VisitsViews
    ${h.link_to(package.title or package.name, h.url_for(controller='package', action='read', id=package.name))} ${views} ${visits}${views}