--- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -121,13 +121,13 @@ accountName = config.get('googleanalytics.account') log.info('Downloading analytics for dataset views') - data = self.download(start_date, end_date, '~/%s/dataset/[a-z0-9-_]+' % accountName) + data = self.download(start_date, end_date, '~^/dataset/[a-z0-9-_]+') log.info('Storing dataset views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data, ) log.info('Downloading analytics for publisher views') - data = self.download(start_date, end_date, '~/%s/publisher/[a-z0-9-_]+' % accountName) + data = self.download(start_date, end_date, '~^/organization/[a-z0-9-_]+') log.info('Storing publisher views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data,) @@ -177,7 +177,7 @@ data = collections.defaultdict(list) rows = results.get('rows',[]) for row in rows: - url = _normalize_url('http:/' + row[0]) + url = row[0] data[url].append( (row[1], int(row[2]),) ) ga_model.update_social(period_name, data) @@ -192,11 +192,8 @@ # Supported query params at # https://developers.google.com/analytics/devguides/reporting/core/v3/reference - try: - # Because of issues of invalid responses, we are going to make these requests - # ourselves. - headers = {'authorization': 'Bearer ' + self.token} - + # https://ga-dev-tools.appspot.com/explorer/ + try: args = {} args["sort"] = "-ga:pageviews" args["max-results"] = 100000 @@ -207,25 +204,22 @@ args["ids"] = "ga:" + self.profile_id args["filters"] = query args["alt"] = "json" - - r = requests.get("https://www.googleapis.com/analytics/v3/data/ga", params=args, headers=headers) - if r.status_code != 200: - raise Exception("Request with params: %s failed" % args) - - results = json.loads(r.content) - print len(results.keys()) - except Exception, e: - log.exception(e) - #return dict(url=[]) - raise e + print args + results = self._get_json(args) + + except Exception, e: + log.exception(e) + return dict(url=[]) packages = [] log.info("There are %d results" % results['totalResults']) - for entry in results.get('rows'): + if results['totalResults'] > 0: + for entry in results.get('rows'): (loc,pageviews,visits) = entry - url = _normalize_url('http:/' + loc) # strips off domain e.g. www.data.gov.uk or data.gov.uk - - if not url.startswith('/dataset/') and not url.startswith('/publisher/'): + #url = _normalize_url('http:/' + loc) # strips off domain e.g. www.data.gov.uk or data.gov.uk + url = loc + #print url + if not url.startswith('/dataset/') and not url.startswith('/organization/'): # filter out strays like: # /data/user/login?came_from=http://data.gov.uk/dataset/os-code-point-open # /403.html?page=/about&from=http://data.gov.uk/publisher/planning-inspectorate @@ -266,6 +260,7 @@ log.info("Trying to refresh our OAuth token") try: + from ga_auth import init_service self.token, svc = init_service(ga_token_filepath, None) log.info("OAuth token refreshed") except Exception, auth_exception: @@ -338,8 +333,7 @@ ga_model.update_sitewide_stats(period_name, "Totals", data, period_complete_day) # Bounces from / or another configurable page. - path = '/%s%s' % (config.get('googleanalytics.account'), - config.get('ga-report.bounce_url', '/')) + path = '/' #% (config.get('googleanalytics.account'), config.get('ga-report.bounce_url', '/')) try: # Because of issues of invalid responses, we are going to make these requests