From: David Read Date: Wed, 07 Nov 2012 15:06:53 +0000 Subject: Fix display of publisher in Featured Dataset. X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=d57ef52c79800397fb95a1cf1272c732cf60fb73 --- Fix display of publisher in Featured Dataset. --- --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -309,8 +309,6 @@ select department_id, sum(pageviews::int) views, sum(visitors::int) visits from ga_url where department_id <> '' - and package_id <> '' - and url like '/dataset/%%' and period_name=%s group by department_id order by visits desc """ --- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -92,33 +92,40 @@ def download_and_store(self, periods): for period_name, period_complete_day, start_date, end_date in periods: + log.info('Period "%s" (%s - %s)', + self.get_full_period_name(period_name, period_complete_day), + start_date.strftime('%Y-%m-%d'), + end_date.strftime('%Y-%m-%d')) + if self.delete_first: - log.info('Deleting existing Analytics for period "%s"', + log.info('Deleting existing Analytics for this period "%s"', period_name) ga_model.delete(period_name) - log.info('Downloading Analytics for period "%s" (%s - %s)', - self.get_full_period_name(period_name, period_complete_day), - start_date.strftime('%Y %m %d'), - end_date.strftime('%Y %m %d')) # Clean up the entries before we run this ga_model.pre_update_url_stats(period_name) accountName = config.get('googleanalytics.account') + log.info('Downloading analytics for dataset views') data = self.download(start_date, end_date, '~/%s/dataset/[a-z0-9-_]+' % accountName) - log.info('Storing Dataset Analytics for period "%s"', - self.get_full_period_name(period_name, period_complete_day)) + + log.info('Storing dataset views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data, ) + log.info('Downloading analytics for publisher views') data = self.download(start_date, end_date, '~/%s/publisher/[a-z0-9-_]+' % accountName) - log.info('Storing Publisher Analytics for period "%s"', - self.get_full_period_name(period_name, period_complete_day)) + + log.info('Storing publisher views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data,) + log.info('Aggregating datasets by publisher') ga_model.update_publisher_stats(period_name) # about 30 seconds. + + log.info('Downloading and storing analytics for site-wide stats') self.sitewide_stats( period_name ) + log.info('Downloading and storing analytics for social networks') self.update_social_info(period_name, start_date, end_date) @@ -187,12 +194,10 @@ start_date = '%s-01' % period_name end_date = '%s-%s' % (period_name, last_day_of_month) - print 'Sitewide_stats for %s (%s -> %s)' % (period_name, start_date, end_date) - funcs = ['_totals_stats', '_social_stats', '_os_stats', '_locale_stats', '_browser_stats', '_mobile_stats'] for f in funcs: - print ' + Fetching %s stats' % f.split('_')[1] + log.info('Downloading analytics for %s' % f.split('_')[1]) getattr(self, f)(start_date, end_date, period_name) def _get_results(result_data, f): --- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -116,7 +116,7 @@ return '/' + '/'.join(url.split('/')[2:]) -def _get_package_and_publisher(url): +def _get_department_id_of_url(url): # e.g. /dataset/fuel_prices # e.g. /dataset/fuel_prices/resource/e63380d4 dataset_match = re.match('/dataset/([^/]+)(/.*)?', url) @@ -126,13 +126,12 @@ if dataset: publisher_groups = dataset.get_groups('publisher') if publisher_groups: - return dataset_ref,publisher_groups[0].name - return dataset_ref, None + return publisher_groups[0].name else: publisher_match = re.match('/publisher/([^/]+)(/.*)?', url) if publisher_match: - return None, publisher_match.groups()[0] - return None, None + return publisher_match.groups()[0] + def update_sitewide_stats(period_name, stat_name, data): for k,v in data.iteritems(): @@ -184,28 +183,28 @@ def update_url_stats(period_name, period_complete_day, url_data): - + ''' + Given a list of urls and number of hits for each during a given period, + stores them in GA_Url under the period and recalculates the totals for + the 'All' period. + ''' for url, views, visitors in url_data: - package, publisher = _get_package_and_publisher(url) - - item = model.Session.query(GA_Url).\ - filter(GA_Url.period_name==period_name).\ - filter(GA_Url.url==url).first() - if item: - item.pageviews = item.pageviews + views - item.visitors = item.visitors + visitors - model.Session.add(item) - else: - values = {'id': make_uuid(), - 'period_name': period_name, - 'period_complete_day': period_complete_day, - 'url': url, - 'pageviews': views, - 'visitors': visitors, - 'department_id': publisher, - 'package_id': package - } - model.Session.add(GA_Url(**values)) + department_id = _get_department_id_of_url(url) + + package = None + if url.startswith('/dataset/'): + package = url[len('/dataset/'):] + + values = {'id': make_uuid(), + 'period_name': period_name, + 'period_complete_day': period_complete_day, + 'url': url, + 'pageviews': views, + 'visitors': visitors, + 'department_id': department_id, + 'package_id': package + } + model.Session.add(GA_Url(**values)) model.Session.commit() if package: @@ -218,10 +217,9 @@ 'url': url, 'pageviews': sum([int(e.pageviews) for e in entries]), 'visitors': sum([int(e.visitors) for e in entries]), - 'department_id': publisher, + 'department_id': department_id, 'package_id': package } - model.Session.add(GA_Url(**values)) model.Session.commit() --- a/ckanext/ga_report/helpers.py +++ b/ckanext/ga_report/helpers.py @@ -60,7 +60,8 @@ if not dataset: return None dataset_dict = get_action('package_show')({'model': model, - 'session': model.Session}, + 'session': model.Session, + 'validate': False}, {'id':dataset.id}) return dataset_dict --- a/ckanext/ga_report/templates/ga_report/notes.html +++ b/ckanext/ga_report/templates/ga_report/notes.html @@ -6,11 +6,11 @@
  • Notes