From: Ross Jones Date: Fri, 11 Oct 2013 15:02:13 +0000 Subject: Tidy up X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=468c6046de585364e0e567316f5bdf1f193b2b9f --- Tidy up --- --- a/ckanext/ga_report/command.py +++ b/ckanext/ga_report/command.py @@ -115,6 +115,7 @@ default=False, dest='skip_url_stats', help='Skip the download of URL data - just do site-wide stats') + self.token = "" def command(self): self._load_config() @@ -129,14 +130,14 @@ return try: - svc = init_service(ga_token_filepath, None) + self.token, svc = init_service(ga_token_filepath, None) except TypeError: print ('Have you correctly run the getauthtoken task and ' 'specified the correct token file in the CKAN config under ' '"googleanalytics.token.filepath"?') return - downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc), + downloader = DownloadAnalytics(svc, self.token, profile_id=get_profile_id(svc), delete_first=self.options.delete_first, skip_url_stats=self.options.skip_url_stats) --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -211,7 +211,7 @@ graph_dict = {} for stat in graph_query: graph_dict[ stat.key ] = graph_dict.get(stat.key,{ - 'name':stat.key, + 'name':stat.key, 'raw': {} }) graph_dict[ stat.key ]['raw'][stat.period_name] = float(stat.value) @@ -304,7 +304,9 @@ graph_data = _get_top_publishers_graph() c.top_publishers_graph = json.dumps( _to_rickshaw(graph_data) ) - return render('ga_report/publisher/index.html') + x = render('ga_report/publisher/index.html') + + return x def _get_packages(self, publisher=None, month='', count=-1): '''Returns the datasets in order of views''' @@ -412,7 +414,7 @@ def _to_rickshaw(data, percentageMode=False): if data==[]: return data - # x-axis is every month in c.months. Note that data might not exist + # x-axis is every month in c.months. Note that data might not exist # for entire history, eg. for recently-added datasets x_axis = [x[0] for x in c.months] x_axis.reverse() # Ascending order @@ -444,10 +446,10 @@ for i in range(len(x_axis)): x = _get_unix_epoch(x_axis[i]) y = 0 - for series in others: + for series in others: y += series['data'][i]['y'] data_other.append({'x':x,'y':y}) - data.append({ + data.append({ 'name':'Other', 'data': data_other }) @@ -505,8 +507,8 @@ # Query for a history graph of these department ids q = model.Session.query( - GA_Url.department_id, - GA_Url.period_name, + GA_Url.department_id, + GA_Url.period_name, func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\ .filter( GA_Url.department_id.in_(department_ids) )\ .filter( GA_Url.url.like('/dataset/%') )\ @@ -529,7 +531,7 @@ ''' publishers = [] for pub in model.Session.query(model.Group).\ - filter(model.Group.type=='publisher').\ + filter(model.Group.type=='organization').\ filter(model.Group.state=='active').\ order_by(model.Group.name): publishers.append((pub.name, pub.title)) --- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -3,6 +3,8 @@ import datetime import httplib import collections +import requests +import json from pylons import config from ga_model import _normalize_url import ga_model @@ -19,13 +21,14 @@ class DownloadAnalytics(object): '''Downloads and stores analytics info''' - def __init__(self, service=None, profile_id=None, delete_first=False, + def __init__(self, service=None, token=None, profile_id=None, delete_first=False, skip_url_stats=False): self.period = config['ga-report.period'] self.service = service self.profile_id = profile_id self.delete_first = delete_first self.skip_url_stats = skip_url_stats + self.token = token def specific_month(self, date): import calendar @@ -150,17 +153,27 @@ metrics = 'ga:entrances' sort = '-ga:entrances' - # Supported query params at - # https://developers.google.com/analytics/devguides/reporting/core/v3/reference - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - filters=query, - start_date=start_date, - metrics=metrics, - sort=sort, - dimensions="ga:landingPagePath,ga:socialNetwork", - max_results=10000, - end_date=end_date).execute() + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = dict(ids='ga:' + self.profile_id, + filters=query, + metrics=metrics, + sort=sort, + dimensions="ga:landingPagePath,ga:socialNetwork", + max_results=10000) + + args['start-date'] = start_date + args['end-date'] = end_date + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + + data = collections.defaultdict(list) rows = results.get('rows',[]) for row in rows: @@ -180,19 +193,31 @@ # Supported query params at # https://developers.google.com/analytics/devguides/reporting/core/v3/reference try: - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - filters=query, - start_date=start_date, - metrics=metrics, - sort=sort, - dimensions="ga:pagePath", - max_results=10000, - end_date=end_date).execute() - except httplib.BadStatusLine: - log.error(u"Failed to download data=> ids: ga:{0}, filters: {1}, start_date: {2}, end_date: {3}, metrics: {4}, sort: {5}, dimensions: ga:pagePath".format( - self.profile_id, query, start_date, end_date, metrics, sort )) - return dict(url=[]) + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = {} + args["sort"] = "-ga:pageviews" + args["max-results"] = 100000 + args["dimensions"] = "ga:pagePath" + args["start-date"] = start_date + args["end-date"] = end_date + args["metrics"] = metrics + args["ids"] = "ga:" + self.profile_id + args["filters"] = query + args["alt"] = "json" + + r = requests.get("https://www.googleapis.com/analytics/v3/data/ga", params=args, headers=headers) + if r.status_code != 200: + raise Exception("Request with params: %s failed" % args) + + results = json.loads(r.content) + print len(results.keys()) + except Exception, e: + log.exception(e) + #return dict(url=[]) + raise e packages = [] log.info("There are %d results" % results['totalResults']) @@ -232,25 +257,77 @@ data[key] = data.get(key,0) + result[1] return data + def _get_json(self, params, prev_fail=False): + ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', '')) + if not ga_token_filepath: + print 'ERROR: In the CKAN config you need to specify the filepath of the ' \ + 'Google Analytics token file under key: googleanalytics.token.filepath' + return + + log.info("Trying to refresh our OAuth token") + try: + self.token, svc = init_service(ga_token_filepath, None) + log.info("OAuth token refreshed") + except Exception, auth_exception: + log.error("Oauth refresh failed") + log.exception(auth_exception) + return + + try: + headers = {'authorization': 'Bearer ' + self.token} + r = requests.get("https://www.googleapis.com/analytics/v3/data/ga", params=params, headers=headers) + if r.status_code != 200: + log.info("STATUS: %s" % (r.status_code,)) + log.info("CONTENT: %s" % (r.content,)) + raise Exception("Request with params: %s failed" % params) + + return json.loads(r.content) + except Exception, e: + log.exception(e) + + return dict(url=[]) + def _totals_stats(self, start_date, end_date, period_name, period_complete_day): """ Fetches distinct totals, total pageviews etc """ - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - start_date=start_date, - metrics='ga:pageviews', - sort='-ga:pageviews', - max_results=10000, - end_date=end_date).execute() + try: + args = {} + args["max-results"] = 100000 + args["start-date"] = start_date + args["end-date"] = end_date + args["ids"] = "ga:" + self.profile_id + + args["metrics"] = "ga:pageviews" + args["sort"] = "-ga:pageviews" + args["alt"] = "json" + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + result_data = results.get('rows') ga_model.update_sitewide_stats(period_name, "Totals", {'Total page views': result_data[0][0]}, period_complete_day) - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - start_date=start_date, - metrics='ga:pageviewsPerVisit,ga:avgTimeOnSite,ga:percentNewVisits,ga:visits', - max_results=10000, - end_date=end_date).execute() + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = {} + args["max-results"] = 100000 + args["start-date"] = start_date + args["end-date"] = end_date + args["ids"] = "ga:" + self.profile_id + + args["metrics"] = "ga:pageviewsPerVisit,ga:avgTimeOnSite,ga:percentNewVisits,ga:visits" + args["alt"] = "json" + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + result_data = results.get('rows') data = { 'Pages per visit': result_data[0][0], @@ -263,14 +340,28 @@ # Bounces from / or another configurable page. path = '/%s%s' % (config.get('googleanalytics.account'), config.get('ga-report.bounce_url', '/')) - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - filters='ga:pagePath==%s' % (path,), - start_date=start_date, - metrics='ga:visitBounceRate', - dimensions='ga:pagePath', - max_results=10000, - end_date=end_date).execute() + + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = {} + args["max-results"] = 100000 + args["start-date"] = start_date + args["end-date"] = end_date + args["ids"] = "ga:" + self.profile_id + + args["filters"] = 'ga:pagePath==%s' % (path,) + args["dimensions"] = 'ga:pagePath' + args["metrics"] = "ga:visitBounceRate" + args["alt"] = "json" + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + result_data = results.get('rows') if not result_data or len(result_data) != 1: log.error('Could not pinpoint the bounces for path: %s. Got results: %r', @@ -286,14 +377,28 @@ def _locale_stats(self, start_date, end_date, period_name, period_complete_day): """ Fetches stats about language and country """ - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - start_date=start_date, - metrics='ga:pageviews', - sort='-ga:pageviews', - dimensions="ga:language,ga:country", - max_results=10000, - end_date=end_date).execute() + + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = {} + args["max-results"] = 100000 + args["start-date"] = start_date + args["end-date"] = end_date + args["ids"] = "ga:" + self.profile_id + + args["dimensions"] = "ga:language,ga:country" + args["metrics"] = "ga:pageviews" + args["sort"] = "-ga:pageviews" + args["alt"] = "json" + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + result_data = results.get('rows') data = {} for result in result_data: @@ -314,15 +419,27 @@ data = {} - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - start_date=start_date, - filters='ga:eventAction==download', - metrics='ga:totalEvents', - sort='-ga:totalEvents', - dimensions="ga:eventLabel", - max_results=10000, - end_date=end_date).execute() + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = {} + args["max-results"] = 100000 + args["start-date"] = start_date + args["end-date"] = end_date + args["ids"] = "ga:" + self.profile_id + + args["filters"] = 'ga:eventAction==download' + args["dimensions"] = "ga:eventLabel" + args["metrics"] = "ga:totalEvents" + args["alt"] = "json" + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + result_data = results.get('rows') if not result_data: # We may not have data for this time period, so we need to bail @@ -361,15 +478,25 @@ log.info('Associating downloads of resource URLs with their respective datasets') process_result_data(results.get('rows')) - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - start_date=start_date, - filters='ga:eventAction==download-cache', - metrics='ga:totalEvents', - sort='-ga:totalEvents', - dimensions="ga:eventLabel", - max_results=10000, - end_date=end_date).execute() + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = dict( ids='ga:' + self.profile_id, + filters='ga:eventAction==download-cache', + metrics='ga:totalEvents', + sort='-ga:totalEvents', + dimensions="ga:eventLabel", + max_results=10000) + args['start-date'] = start_date + args['end-date'] = end_date + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + log.info('Associating downloads of cache resource URLs with their respective datasets') process_result_data(results.get('rows'), cached=False) @@ -378,14 +505,25 @@ def _social_stats(self, start_date, end_date, period_name, period_complete_day): """ Finds out which social sites people are referred from """ - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - start_date=start_date, - metrics='ga:pageviews', - sort='-ga:pageviews', - dimensions="ga:socialNetwork,ga:referralPath", - max_results=10000, - end_date=end_date).execute() + + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = dict( ids='ga:' + self.profile_id, + metrics='ga:pageviews', + sort='-ga:pageviews', + dimensions="ga:socialNetwork,ga:referralPath", + max_results=10000) + args['start-date'] = start_date + args['end-date'] = end_date + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + result_data = results.get('rows') data = {} for result in result_data: @@ -397,14 +535,24 @@ def _os_stats(self, start_date, end_date, period_name, period_complete_day): """ Operating system stats """ - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - start_date=start_date, - metrics='ga:pageviews', - sort='-ga:pageviews', - dimensions="ga:operatingSystem,ga:operatingSystemVersion", - max_results=10000, - end_date=end_date).execute() + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = dict( ids='ga:' + self.profile_id, + metrics='ga:pageviews', + sort='-ga:pageviews', + dimensions="ga:operatingSystem,ga:operatingSystemVersion", + max_results=10000) + args['start-date'] = start_date + args['end-date'] = end_date + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + result_data = results.get('rows') data = {} for result in result_data: @@ -422,14 +570,27 @@ def _browser_stats(self, start_date, end_date, period_name, period_complete_day): """ Information about browsers and browser versions """ - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - start_date=start_date, - metrics='ga:pageviews', - sort='-ga:pageviews', - dimensions="ga:browser,ga:browserVersion", - max_results=10000, - end_date=end_date).execute() + + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = dict( ids='ga:' + self.profile_id, + metrics='ga:pageviews', + sort='-ga:pageviews', + dimensions="ga:browser,ga:browserVersion", + max_results=10000) + + args['start-date'] = start_date + args['end-date'] = end_date + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + + result_data = results.get('rows') # e.g. [u'Firefox', u'19.0', u'20'] @@ -471,14 +632,24 @@ def _mobile_stats(self, start_date, end_date, period_name, period_complete_day): """ Info about mobile devices """ - results = self.service.data().ga().get( - ids='ga:' + self.profile_id, - start_date=start_date, - metrics='ga:pageviews', - sort='-ga:pageviews', - dimensions="ga:mobileDeviceBranding, ga:mobileDeviceInfo", - max_results=10000, - end_date=end_date).execute() + try: + # Because of issues of invalid responses, we are going to make these requests + # ourselves. + headers = {'authorization': 'Bearer ' + self.token} + + args = dict( ids='ga:' + self.profile_id, + metrics='ga:pageviews', + sort='-ga:pageviews', + dimensions="ga:mobileDeviceBranding, ga:mobileDeviceInfo", + max_results=10000) + args['start-date'] = start_date + args['end-date'] = end_date + + results = self._get_json(args) + except Exception, e: + log.exception(e) + results = dict(url=[]) + result_data = results.get('rows') data = {} --- a/ckanext/ga_report/ga_auth.py +++ b/ckanext/ga_report/ga_auth.py @@ -36,7 +36,7 @@ credentials = _prepare_credentials(token_file, credentials_file) http = credentials.authorize(http) # authorize the http object - return build('analytics', 'v3', http=http) + return credentials.access_token, build('analytics', 'v3', http=http) def get_profile_id(service): --- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -125,7 +125,7 @@ dataset_ref = dataset_match.groups()[0] dataset = model.Package.get(dataset_ref) if dataset: - publisher_groups = dataset.get_groups('publisher') + publisher_groups = dataset.get_groups('organization') if publisher_groups: return dataset_ref,publisher_groups[0].name return dataset_ref, None @@ -323,11 +323,11 @@ """ toplevel = get_top_level() publishers = model.Session.query(model.Group).\ - filter(model.Group.type=='publisher').\ + filter(model.Group.type=='organization').\ filter(model.Group.state=='active').all() for publisher in publishers: views, visits, subpub = update_publisher(period_name, publisher, publisher.name) - parent, parents = '', publisher.get_groups('publisher') + parent, parents = '', publisher.get_groups('organization') if parents: parent = parents[0].name item = model.Session.query(GA_Publisher).\ @@ -377,7 +377,7 @@ model.Member.table_name == 'group' and \ model.Member.state == 'active').\ filter(model.Member.id==None).\ - filter(model.Group.type=='publisher').\ + filter(model.Group.type=='organization').\ order_by(model.Group.name).all() def get_children(publisher): --- a/ckanext/ga_report/helpers.py +++ b/ckanext/ga_report/helpers.py @@ -71,7 +71,7 @@ def single_popular_dataset_html(top=20): dataset_dict = single_popular_dataset(top) groups = package.get('groups', []) - publishers = [ g for g in groups if g.get('type') == 'publisher' ] + publishers = [ g for g in groups if g.get('type') == 'organization' ] publisher = publishers[0] if publishers else {'name':'', 'title': ''} context = { 'dataset': dataset_dict, @@ -118,7 +118,7 @@ if not p in datasets: datasets[p] = {'views':0, 'visits': 0} - + datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews) datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visits) --- a/ckanext/ga_report/templates/ga_report/ga_util.html +++ b/ckanext/ga_report/templates/ga_report/ga_util.html @@ -62,13 +62,10 @@ Site-wide - --- a/ckanext/ga_report/templates/ga_report/publisher/index.html +++ b/ckanext/ga_report/templates/ga_report/publisher/index.html @@ -8,9 +8,11 @@ Usage by Publisher - + + ${ga_sidebar(download_link=h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='publisher_csv',month=c.month or 'all'))} - + + @@ -36,7 +38,7 @@

Statistics for

- ${month_selector(c.month, c.months, c.day)} + ${month_selector(c.month, c.months, c.day)}
--- a/ckanext/ga_report/templates/ga_report/publisher/popular.html +++ b/ckanext/ga_report/templates/ga_report/publisher/popular.html @@ -15,14 +15,12 @@ - --- a/ckanext/ga_report/templates/ga_report/publisher/read.html +++ b/ckanext/ga_report/templates/ga_report/publisher/read.html @@ -19,9 +19,9 @@
- + ${ga_sidebar(download_link=h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='dataset_csv',id=c.publisher_name or 'all',month=c.month or 'all'))} - + Site Usage ${usage_nav('Datasets')}
--- a/ckanext/ga_report/templates/ga_report/site/downloads.html +++ b/ckanext/ga_report/templates/ga_report/site/downloads.html @@ -7,9 +7,9 @@ Downloads - + ${ga_sidebar(download_link=h.url_for(controller='ckanext.ga_report.controller:GaReport',action='csv_downloads',month=c.month or 'all'))} - + Downloads ${usage_nav('Downloads')}
--- a/ckanext/ga_report/templates/ga_report/site/index.html +++ b/ckanext/ga_report/templates/ga_report/site/index.html @@ -19,9 +19,9 @@ - + ${ga_sidebar(download_link=h.url_for(controller='ckanext.ga_report.controller:GaReport',action='csv',month=c.month or 'all'))} - + Site Usage ${usage_nav('Site-wide')} @@ -168,7 +168,7 @@ CKAN.GA_Reports.bind_sparklines(); CKAN.GA_Reports.bind_sidebar(); CKAN.GA_Reports.bind_month_selector(); - }); + });