--- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -85,18 +85,27 @@ self.get_full_period_name(period_name, period_complete_day), start_date.strftime('%Y %m %d'), end_date.strftime('%Y %m %d')) - data = self.download(start_date, end_date) - log.info('Storing Analytics for period "%s"', + + data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+') + log.info('Storing Dataset Analytics for period "%s"', self.get_full_period_name(period_name, period_complete_day)) - self.store(period_name, period_complete_day, data) - - - def download(self, start_date, end_date): + self.store(period_name, period_complete_day, data, ) + + data = self.download(start_date, end_date, '~/publisher/[a-z0-9-_]+') + log.info('Storing Publisher Analytics for period "%s"', + self.get_full_period_name(period_name, period_complete_day)) + self.store(period_name, period_complete_day, data,) + ga_model.update_publisher_stats(period_name) + + self.sitewide_stats( period_name ) + + + def download(self, start_date, end_date, path='~/dataset/[a-z0-9-_]+'): '''Get data from GA for a given time period''' start_date = start_date.strftime('%Y-%m-%d') end_date = end_date.strftime('%Y-%m-%d') - query = 'ga:pagePath=~/dataset/[a-z0-9-]+$' - metrics = 'ga:uniquePageviews' + query = 'ga:pagePath=%s$' % path + metrics = 'ga:uniquePageviews, ga:visits' sort = '-ga:uniquePageviews' # Supported query params at @@ -118,11 +127,168 @@ packages = [] for entry in results.get('rows'): - (loc,size,) = entry - packages.append( ('http:/' + loc,size, '',) ) # Temporary hack + (loc,pageviews,visits) = entry + packages.append( ('http:/' + loc, pageviews, visits,) ) # Temporary hack return dict(url=packages) def store(self, period_name, period_complete_day, data): if 'url' in data: ga_model.update_url_stats(period_name, period_complete_day, data['url']) + + def sitewide_stats(self, period_name): + import calendar + year, month = period_name.split('-') + _, last_day_of_month = calendar.monthrange(int(year), int(month)) + + start_date = '%s-01' % period_name + end_date = '%s-%s' % (period_name, last_day_of_month) + print 'Sitewide_stats for %s (%s -> %s)' % (period_name, start_date, end_date) + + funcs = ['_totals_stats', '_social_stats', '_os_stats', + '_locale_stats', '_browser_stats', '_mobile_stats'] + for f in funcs: + print ' + Fetching %s stats' % f.split('_')[1] + getattr(self, f)(start_date, end_date, period_name) + + + def _totals_stats(self, start_date, end_date, period_name): + """ Fetches distinct totals, total pageviews etc """ + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + start_date=start_date, + metrics='ga:uniquePageviews', + sort='-ga:uniquePageviews', + max_results=10000, + end_date=end_date).execute() + result_data = results.get('rows') + ga_model.update_sitewide_stats(period_name, "Totals", {'Total pageviews': result_data[0][0]}) + + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + start_date=start_date, + metrics='ga:pageviewsPerVisit,ga:bounces,ga:avgTimeOnSite,ga:percentNewVisits', + max_results=10000, + end_date=end_date).execute() + result_data = results.get('rows') + data = { + 'Pages per visit': result_data[0][0], + 'Bounces': result_data[0][1], + 'Average time on site': result_data[0][2], + 'Percent new visits': result_data[0][3], + } + ga_model.update_sitewide_stats(period_name, "Totals", data) + + + def _locale_stats(self, start_date, end_date, period_name): + """ Fetches stats about language and country """ + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + start_date=start_date, + metrics='ga:uniquePageviews', + sort='-ga:uniquePageviews', + dimensions="ga:language,ga:country", + max_results=10000, + end_date=end_date).execute() + result_data = results.get('rows') + data = {} + for result in result_data: + data[result[0]] = data.get(result[0], 0) + int(result[2]) + ga_model.update_sitewide_stats(period_name, "Languages", data) + + data = {} + for result in result_data: + data[result[1]] = data.get(result[1], 0) + int(result[2]) + ga_model.update_sitewide_stats(period_name, "Country", data) + + + def _social_stats(self, start_date, end_date, period_name): + """ Finds out which social sites people are referred from """ + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + start_date=start_date, + metrics='ga:uniquePageviews', + sort='-ga:uniquePageviews', + dimensions="ga:socialNetwork,ga:referralPath", + max_results=10000, + end_date=end_date).execute() + result_data = results.get('rows') + twitter_links = [] + data = {} + for result in result_data: + if not result[0] == '(not set)': + data[result[0]] = data.get(result[0], 0) + int(result[2]) + if result[0] == 'Twitter': + twitter_links.append(result[1]) + ga_model.update_sitewide_stats(period_name, "Social sources", data) + + + def _os_stats(self, start_date, end_date, period_name): + """ Operating system stats """ + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + start_date=start_date, + metrics='ga:uniquePageviews', + sort='-ga:uniquePageviews', + dimensions="ga:operatingSystem,ga:operatingSystemVersion", + max_results=10000, + end_date=end_date).execute() + result_data = results.get('rows') + data = {} + for result in result_data: + data[result[0]] = data.get(result[0], 0) + int(result[2]) + ga_model.update_sitewide_stats(period_name, "Operating Systems", data) + + data = {} + for result in result_data: + key = "%s (%s)" % (result[0],result[1]) + data[key] = result[2] + ga_model.update_sitewide_stats(period_name, "Operating Systems versions", data) + + + def _browser_stats(self, start_date, end_date, period_name): + """ Information about browsers and browser versions """ + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + start_date=start_date, + metrics='ga:uniquePageviews', + sort='-ga:uniquePageviews', + dimensions="ga:browser,ga:browserVersion", + max_results=10000, + end_date=end_date).execute() + result_data = results.get('rows') + data = {} + for result in result_data: + data[result[0]] = data.get(result[0], 0) + int(result[2]) + ga_model.update_sitewide_stats(period_name, "Browsers", data) + + data = {} + for result in result_data: + key = "%s (%s)" % (result[0],result[1]) + data[key] = result[2] + ga_model.update_sitewide_stats(period_name, "Browser versions", data) + + + def _mobile_stats(self, start_date, end_date, period_name): + """ Info about mobile devices """ + + results = self.service.data().ga().get( + ids='ga:' + self.profile_id, + start_date=start_date, + metrics='ga:uniquePageviews', + sort='-ga:uniquePageviews', + dimensions="ga:mobileDeviceBranding, ga:mobileDeviceInfo", + max_results=10000, + end_date=end_date).execute() + + result_data = results.get('rows') + data = {} + for result in result_data: + data[result[0]] = data.get(result[0], 0) + int(result[2]) + ga_model.update_sitewide_stats(period_name, "Mobile brands", data) + + data = {} + for result in result_data: + data[result[1]] = data.get(result[1], 0) + int(result[2]) + ga_model.update_sitewide_stats(period_name, "Mobile devices", data) +