1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | import logging import datetime from pylons import config import ga_model #from ga_client import GA log = logging.getLogger('ckanext.ga-report') FORMAT_MONTH = '%Y-%m' class DownloadAnalytics(object): '''Downloads and stores analytics info''' def __init__(self, service=None, profile_id=None): self.period = config['ga-report.period'] self.service = service self.profile_id = profile_id def all_(self): self.since_date(datetime.datetime(2010, 1, 1)) def latest(self): if self.period == 'monthly': # from first of this month to today now = datetime.datetime.now() first_of_this_month = datetime.datetime(now.year, now.month, 1) periods = ((now.strftime(FORMAT_MONTH), now.day, first_of_this_month, now),) else: raise NotImplementedError self.download_and_store(periods) def since_date(self, since_date): assert isinstance(since_date, datetime.datetime) periods = [] # (period_name, period_complete_day, start_date, end_date) if self.period == 'monthly': first_of_the_months_until_now = [] year = since_date.year month = since_date.month now = datetime.datetime.now() first_of_this_month = datetime.datetime(now.year, now.month, 1) while True: first_of_the_month = datetime.datetime(year, month, 1) if first_of_the_month == first_of_this_month: periods.append((now.strftime(FORMAT_MONTH), now.day, first_of_this_month, now)) break elif first_of_the_month < first_of_this_month: in_the_next_month = first_of_the_month + datetime.timedelta(40) last_of_the_month = datetime.datetime(in_the_next_month.year, in_the_next_month.month, 1)\ - datetime.timedelta(1) periods.append((now.strftime(FORMAT_MONTH), 0, first_of_the_month, last_of_the_month)) else: # first_of_the_month has got to the future somehow break month += 1 if month > 12: year += 1 month = 1 else: raise NotImplementedError self.download_and_store(periods) @staticmethod def get_full_period_name(period_name, period_complete_day): if period_complete_day: return period_name + ' (up to %ith)' % period_complete_day else: return period_name def download_and_store(self, periods): for period_name, period_complete_day, start_date, end_date in periods: log.info('Downloading Analytics for period "%s" (%s - %s)', self.get_full_period_name(period_name, period_complete_day), start_date.strftime('%Y %m %d'), end_date.strftime('%Y %m %d')) data = self.download(start_date, end_date) log.info('Storing Analytics for period "%s"', self.get_full_period_name(period_name, period_complete_day)) self.store(period_name, period_complete_day, data) def download(self, start_date, end_date): '''Get data from GA for a given time period''' start_date = start_date.strftime('%Y-%m-%d') end_date = end_date.strftime('%Y-%m-%d') # url #query = 'ga:pagePath=~^%s,ga:pagePath=~^%s' % \ # (PACKAGE_URL, self.resource_url_tag) query = 'ga:pagePath=~/dataset/[a-z0-9-]+$' metrics = 'ga:uniquePageviews' sort = '-ga:uniquePageviews' # Supported query params at # https://developers.google.com/analytics/devguides/reporting/core/v3/reference results = self.service.data().ga().get( ids='ga:' + self.profile_id, filters=query, start_date=start_date, metrics=metrics, sort=sort, dimensions="ga:pagePath", end_date=end_date).execute() import pprint pprint.pprint(results) print 'Total results: %s' % results.get('totalResults') packages = [] for entry in results.get('rows'): (loc,size,) = entry packages.append( ('http:/' + loc,size, '',) ) return dict(url=packages) def print_results(self, results): import pprint pprint.pprint(results) if results: print 'Profile: %s' % results.get('profileInfo').get('profileName') print 'Total Visits: %s' % results.get('rows', [[-1]])[0][0] else: print 'No results found' def store(self, period_name, period_complete_day, data): if 'url' in data: ga_model.update_url_stats(period_name, period_complete_day, data['url']) |