--- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -121,13 +121,13 @@ accountName = config.get('googleanalytics.account') log.info('Downloading analytics for dataset views') - data = self.download(start_date, end_date, '~/%s/dataset/[a-z0-9-_]+' % accountName) + data = self.download(start_date, end_date, '~^/dataset/[a-z0-9-_]+') log.info('Storing dataset views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data, ) log.info('Downloading analytics for publisher views') - data = self.download(start_date, end_date, '~/%s/publisher/[a-z0-9-_]+' % accountName) + data = self.download(start_date, end_date, '~^/organization/[a-z0-9-_]+') log.info('Storing publisher views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data,) @@ -177,7 +177,7 @@ data = collections.defaultdict(list) rows = results.get('rows',[]) for row in rows: - url = _normalize_url('http:/' + row[0]) + url = row[0] data[url].append( (row[1], int(row[2]),) ) ga_model.update_social(period_name, data) @@ -192,11 +192,8 @@ # Supported query params at # https://developers.google.com/analytics/devguides/reporting/core/v3/reference - try: - # Because of issues of invalid responses, we are going to make these requests - # ourselves. - headers = {'authorization': 'Bearer ' + self.token} - + # https://ga-dev-tools.appspot.com/explorer/ + try: args = {} args["sort"] = "-ga:pageviews" args["max-results"] = 100000 @@ -207,25 +204,22 @@ args["ids"] = "ga:" + self.profile_id args["filters"] = query args["alt"] = "json" - - r = requests.get("https://www.googleapis.com/analytics/v3/data/ga", params=args, headers=headers) - if r.status_code != 200: - raise Exception("Request with params: %s failed" % args) - - results = json.loads(r.content) - print len(results.keys()) - except Exception, e: - log.exception(e) - #return dict(url=[]) - raise e + print args + results = self._get_json(args) + + except Exception, e: + log.exception(e) + return dict(url=[]) packages = [] log.info("There are %d results" % results['totalResults']) - for entry in results.get('rows'): + if results['totalResults'] > 0: + for entry in results.get('rows'): (loc,pageviews,visits) = entry - url = _normalize_url('http:/' + loc) # strips off domain e.g. www.data.gov.uk or data.gov.uk - - if not url.startswith('/dataset/') and not url.startswith('/publisher/'): + #url = _normalize_url('http:/' + loc) # strips off domain e.g. www.data.gov.uk or data.gov.uk + url = loc + #print url + if not url.startswith('/dataset/') and not url.startswith('/organization/'): # filter out strays like: # /data/user/login?came_from=http://data.gov.uk/dataset/os-code-point-open # /403.html?page=/about&from=http://data.gov.uk/publisher/planning-inspectorate @@ -258,37 +252,32 @@ return data def _get_json(self, params, prev_fail=False): - if prev_fail: - import os - ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', '')) - if not ga_token_filepath: - print 'ERROR: In the CKAN config you need to specify the filepath of the ' \ - 'Google Analytics token file under key: googleanalytics.token.filepath' - return - - try: - self.token, svc = init_service(ga_token_filepath, None) - except TypeError: - print ('Have you correctly run the getauthtoken task and ' - 'specified the correct token file in the CKAN config under ' - '"googleanalytics.token.filepath"?') - - try: - # Because of issues of invalid responses, we are going to make these requests - # ourselves. + ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', '')) + if not ga_token_filepath: + print 'ERROR: In the CKAN config you need to specify the filepath of the ' \ + 'Google Analytics token file under key: googleanalytics.token.filepath' + return + + log.info("Trying to refresh our OAuth token") + try: + from ga_auth import init_service + self.token, svc = init_service(ga_token_filepath, None) + log.info("OAuth token refreshed") + except Exception, auth_exception: + log.error("Oauth refresh failed") + log.exception(auth_exception) + return + + try: headers = {'authorization': 'Bearer ' + self.token} r = requests.get("https://www.googleapis.com/analytics/v3/data/ga", params=params, headers=headers) if r.status_code != 200: - log.info("STATUS: %s" % (r.status_code,)) - log.info("CONTENT: %s" % (r.content,)) - raise Exception("Request with params: %s failed" % params) + log.info("STATUS: %s" % (r.status_code,)) + log.info("CONTENT: %s" % (r.content,)) + raise Exception("Request with params: %s failed" % params) return json.loads(r.content) except Exception, e: - if not prev_fail: - print e - results = self._get_json(self, params, prev_fail=True) - else: log.exception(e) return dict(url=[]) @@ -344,8 +333,7 @@ ga_model.update_sitewide_stats(period_name, "Totals", data, period_complete_day) # Bounces from / or another configurable page. - path = '/%s%s' % (config.get('googleanalytics.account'), - config.get('ga-report.bounce_url', '/')) + path = '/' #% (config.get('googleanalytics.account'), config.get('ga-report.bounce_url', '/')) try: # Because of issues of invalid responses, we are going to make these requests