--- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -1,7 +1,7 @@ import os import logging import datetime - +import collections from pylons import config import ga_model @@ -21,8 +21,17 @@ self.profile_id = profile_id - def all_(self): - self.since_date(datetime.datetime(2010, 1, 1)) + def specific_month(self, date): + import calendar + + first_of_this_month = datetime.datetime(date.year, date.month, 1) + _, last_day_of_month = calendar.monthrange(int(date.year), int(date.month)) + last_of_this_month = datetime.datetime(date.year, date.month, last_day_of_month) + periods = ((date.strftime(FORMAT_MONTH), + last_day_of_month, + first_of_this_month, last_of_this_month),) + self.download_and_store(periods) + def latest(self): if self.period == 'monthly': @@ -37,13 +46,13 @@ self.download_and_store(periods) - def since_date(self, since_date): + def for_date(self, for_date): assert isinstance(since_date, datetime.datetime) periods = [] # (period_name, period_complete_day, start_date, end_date) if self.period == 'monthly': first_of_the_months_until_now = [] - year = since_date.year - month = since_date.month + year = for_date.year + month = for_date.month now = datetime.datetime.now() first_of_this_month = datetime.datetime(now.year, now.month, 1) while True: @@ -95,9 +104,36 @@ log.info('Storing Publisher Analytics for period "%s"', self.get_full_period_name(period_name, period_complete_day)) self.store(period_name, period_complete_day, data,) - ga_model.update_publisher_stats(period_name) - + + ga_model.update_publisher_stats(period_name) # about 30 seconds. self.sitewide_stats( period_name ) + + self.update_social_info(period_name, start_date, end_date) + + def update_social_info(self, period_name, start_date, end_date): + start_date = start_date.strftime('%Y-%m-%d') + end_date = end_date.strftime('%Y-%m-%d') + query = 'ga:hasSocialSourceReferral=~Yes$' + metrics = 'ga:entrances' + sort = '-ga:entrances' + + # Supported query params at + # https://developers.google.com/analytics/devguides/reporting/core/v3/reference + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + filters=query, + start_date=start_date, + metrics=metrics, + sort=sort, + dimensions="ga:landingPagePath,ga:socialNetwork", + max_results=10000, + end_date=end_date).execute() + data = collections.defaultdict(list) + rows = results.get('rows',[]) + for row in rows: + from ga_model import _normalize_url + data[_normalize_url(row[0])].append( (row[1], int(row[2]),) ) + ga_model.update_social(period_name, data) def download(self, start_date, end_date, path='~/dataset/[a-z0-9-_]+'): @@ -105,7 +141,7 @@ start_date = start_date.strftime('%Y-%m-%d') end_date = end_date.strftime('%Y-%m-%d') query = 'ga:pagePath=%s$' % path - metrics = 'ga:uniquePageviews, ga:visits' + metrics = 'ga:uniquePageviews, ga:visitors' sort = '-ga:uniquePageviews' # Supported query params at @@ -119,11 +155,6 @@ dimensions="ga:pagePath", max_results=10000, end_date=end_date).execute() - - if os.getenv('DEBUG'): - import pprint - pprint.pprint(results) - print 'Total results: %s' % results.get('totalResults') packages = [] for entry in results.get('rows'): @@ -135,7 +166,6 @@ if 'url' in data: ga_model.update_url_stats(period_name, period_complete_day, data['url']) - def sitewide_stats(self, period_name): import calendar year, month = period_name.split('-') @@ -151,6 +181,12 @@ print ' + Fetching %s stats' % f.split('_')[1] getattr(self, f)(start_date, end_date, period_name) + def _get_results(result_data, f): + data = {} + for result in result_data: + key = f(result) + data[key] = data.get(key,0) + result[1] + return data def _totals_stats(self, start_date, end_date, period_name): """ Fetches distinct totals, total pageviews etc """ @@ -162,22 +198,42 @@ max_results=10000, end_date=end_date).execute() result_data = results.get('rows') - ga_model.update_sitewide_stats(period_name, "Totals", {'Total pageviews': result_data[0][0]}) - - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - start_date=start_date, - metrics='ga:pageviewsPerVisit,ga:bounces,ga:avgTimeOnSite,ga:percentNewVisits', + ga_model.update_sitewide_stats(period_name, "Totals", {'Total page views': result_data[0][0]}) + + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + start_date=start_date, + metrics='ga:pageviewsPerVisit,ga:avgTimeOnSite,ga:percentNewVisits,ga:visitors', max_results=10000, end_date=end_date).execute() result_data = results.get('rows') data = { 'Pages per visit': result_data[0][0], - 'Bounces': result_data[0][1], - 'Average time on site': result_data[0][2], - 'Percent new visits': result_data[0][3], + 'Average time on site': result_data[0][1], + 'New visits': result_data[0][2], + 'Total visits': result_data[0][3], } ga_model.update_sitewide_stats(period_name, "Totals", data) + + # Bounces from /data. This url is specified in configuration because + # for DGU we don't want /. + path = config.get('ga-report.bounce_url','/') + print path + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + filters='ga:pagePath=~%s$' % (path,), + start_date=start_date, + metrics='ga:bounces,ga:uniquePageviews', + dimensions='ga:pagePath', + max_results=10000, + end_date=end_date).execute() + result_data = results.get('rows') + for results in result_data: + if results[0] == path: + bounce, total = [float(x) for x in results[1:]] + pct = 100 * bounce/total + print "%d bounces from %d total == %s" % (bounce, total, pct) + ga_model.update_sitewide_stats(period_name, "Totals", {'Bounces': pct}) def _locale_stats(self, start_date, end_date, period_name): @@ -264,7 +320,7 @@ data = {} for result in result_data: - key = "%s (%s)" % (result[0],result[1]) + key = "%s (%s)" % (result[0], result[1]) data[key] = result[2] ga_model.update_sitewide_stats(period_name, "Browser versions", data)