Merge branch 'master' of git+ssh://maxious.lambdacomplex.org/git/ckanext-ga-report
Merge branch 'master' of git+ssh://maxious.lambdacomplex.org/git/ckanext-ga-report

--- a/ckanext/ga_report/command.py
+++ b/ckanext/ga_report/command.py
@@ -52,9 +52,7 @@
         assuming it is correct.
         """
         from ga_auth import init_service
-        init_service('token.dat',
-                      self.args[0] if self.args
-                                   else 'credentials.json')
+        init_service('token.dat', 'credentials.json')
 
 class FixTimePeriods(CkanCommand):
     """
@@ -115,6 +113,7 @@
                                default=False,
                                dest='skip_url_stats',
                                help='Skip the download of URL data - just do site-wide stats')
+        self.token = ""
 
     def command(self):
         self._load_config()
@@ -129,14 +128,14 @@
             return
 
         try:
-            svc = init_service(ga_token_filepath, None)
+            self.token, svc = init_service(ga_token_filepath, None)
         except TypeError:
             print ('Have you correctly run the getauthtoken task and '
                    'specified the correct token file in the CKAN config under '
                    '"googleanalytics.token.filepath"?')
             return
 
-        downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc),
+        downloader = DownloadAnalytics(svc, self.token, profile_id=get_profile_id(svc),
                                        delete_first=self.options.delete_first,
                                        skip_url_stats=self.options.skip_url_stats)
 

--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -211,7 +211,7 @@
             graph_dict = {}
             for stat in graph_query:
                 graph_dict[ stat.key ] = graph_dict.get(stat.key,{
-                    'name':stat.key, 
+                    'name':stat.key,
                     'raw': {}
                     })
                 graph_dict[ stat.key ]['raw'][stat.period_name] = float(stat.value)
@@ -304,7 +304,9 @@
         graph_data = _get_top_publishers_graph()
         c.top_publishers_graph = json.dumps( _to_rickshaw(graph_data) )
 
-        return render('ga_report/publisher/index.html')
+        x =  render('ga_report/publisher/index.html')
+
+        return x
 
     def _get_packages(self, publisher=None, month='', count=-1):
         '''Returns the datasets in order of views'''
@@ -340,7 +342,8 @@
                         downloads += int(x.value)
                 else:
                     downloads = 'No data'
-                top_packages.append((package, entry.pageviews, entry.visits, downloads))
+                if package.private == False:
+                    top_packages.append((package, entry.pageviews, entry.visits, downloads))
             else:
                 log.warning('Could not find package associated package')
 
@@ -412,7 +415,7 @@
 def _to_rickshaw(data, percentageMode=False):
     if data==[]:
         return data
-    # x-axis is every month in c.months. Note that data might not exist 
+    # x-axis is every month in c.months. Note that data might not exist
     # for entire history, eg. for recently-added datasets
     x_axis = [x[0] for x in c.months]
     x_axis.reverse() # Ascending order
@@ -444,10 +447,10 @@
         for i in range(len(x_axis)):
             x = _get_unix_epoch(x_axis[i])
             y = 0
-            for series in others: 
+            for series in others:
                 y += series['data'][i]['y']
             data_other.append({'x':x,'y':y})
-        data.append({ 
+        data.append({
             'name':'Other',
             'data': data_other
             })
@@ -505,8 +508,8 @@
 
     # Query for a history graph of these department ids
     q = model.Session.query(
-            GA_Url.department_id, 
-            GA_Url.period_name, 
+            GA_Url.department_id,
+            GA_Url.period_name,
             func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\
         .filter( GA_Url.department_id.in_(department_ids) )\
         .filter( GA_Url.url.like('/dataset/%') )\
@@ -529,7 +532,7 @@
     '''
     publishers = []
     for pub in model.Session.query(model.Group).\
-               filter(model.Group.type=='publisher').\
+               filter(model.Group.type=='organization').\
                filter(model.Group.state=='active').\
                order_by(model.Group.name):
         publishers.append((pub.name, pub.title))

--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -1,7 +1,12 @@
 import os
 import logging
 import datetime
+import httplib
+import urllib
 import collections
+import requests
+import json
+import re
 from pylons import config
 from ga_model import _normalize_url
 import ga_model
@@ -18,13 +23,14 @@
 class DownloadAnalytics(object):
     '''Downloads and stores analytics info'''
 
-    def __init__(self, service=None, profile_id=None, delete_first=False,
+    def __init__(self, service=None, token=None, profile_id=None, delete_first=False,
                  skip_url_stats=False):
         self.period = config['ga-report.period']
         self.service = service
         self.profile_id = profile_id
         self.delete_first = delete_first
         self.skip_url_stats = skip_url_stats
+        self.token = token
 
     def specific_month(self, date):
         import calendar
@@ -117,13 +123,13 @@
                 accountName = config.get('googleanalytics.account')
 
                 log.info('Downloading analytics for dataset views')
-                data = self.download(start_date, end_date, '~/%s/dataset/[a-z0-9-_]+' % accountName)
+                data = self.download(start_date, end_date, '~^/dataset/[a-z0-9-_]+')
 
                 log.info('Storing dataset views (%i rows)', len(data.get('url')))
                 self.store(period_name, period_complete_day, data, )
 
                 log.info('Downloading analytics for publisher views')
-                data = self.download(start_date, end_date, '~/%s/publisher/[a-z0-9-_]+' % accountName)
+                data = self.download(start_date, end_date, '~^/organization/[a-z0-9-_]+')
 
                 log.info('Storing publisher views (%i rows)', len(data.get('url')))
                 self.store(period_name, period_complete_day, data,)
@@ -149,21 +155,31 @@
         metrics = 'ga:entrances'
         sort = '-ga:entrances'
 
-        # Supported query params at
-        # https://developers.google.com/analytics/devguides/reporting/core/v3/reference
-        results = self.service.data().ga().get(
-                                 ids='ga:' + self.profile_id,
-                                 filters=query,
-                                 start_date=start_date,
-                                 metrics=metrics,
-                                 sort=sort,
-                                 dimensions="ga:landingPagePath,ga:socialNetwork",
-                                 max_results=10000,
-                                 end_date=end_date).execute()
+        try:
+            # Because of issues of invalid responses, we are going to make these requests
+            # ourselves.
+            headers = {'authorization': 'Bearer ' + self.token}
+
+            args = dict(ids='ga:' + self.profile_id,
+                       filters=query,
+                       metrics=metrics,
+                       sort=sort,
+                       dimensions="ga:landingPagePath,ga:socialNetwork",
+                       max_results=10000)
+
+            args['start-date'] = start_date
+            args['end-date'] = end_date
+
+            results = self._get_json(args)
+        except Exception, e:
+            log.exception(e)
+            results = dict(url=[])
+
+
         data = collections.defaultdict(list)
         rows = results.get('rows',[])
         for row in rows:
-            url = _normalize_url('http:/' + row[0])
+            url = row[0]
             data[url].append( (row[1], int(row[2]),) )
         ga_model.update_social(period_name, data)
 
@@ -178,23 +194,34 @@
 
         # Supported query params at
         # https://developers.google.com/analytics/devguides/reporting/core/v3/reference
-        results = self.service.data().ga().get(
-                                 ids='ga:' + self.profile_id,
-                                 filters=query,
-                                 start_date=start_date,
-                                 metrics=metrics,
-                                 sort=sort,
-                                 dimensions="ga:pagePath",
-                                 max_results=10000,
-                                 end_date=end_date).execute()
+	# https://ga-dev-tools.appspot.com/explorer/
+        try:
+            args = {}
+            args["sort"] = "-ga:pageviews"
+            args["max-results"] = 100000
+            args["dimensions"] = "ga:pagePath"
+            args["start-date"] = start_date
+            args["end-date"] = end_date
+            args["metrics"] = metrics
+            args["ids"] = "ga:" + self.profile_id
+            args["filters"] = query
+            args["alt"] = "json"
+            print args
+            results = self._get_json(args)
+
+        except Exception, e:
+            log.exception(e)
+            return dict(url=[])
 
         packages = []
         log.info("There are %d results" % results['totalResults'])
-        for entry in results.get('rows'):
+	if results['totalResults'] > 0:
+          for entry in results.get('rows'):
             (loc,pageviews,visits) = entry
-            url = _normalize_url('http:/' + loc) # strips off domain e.g. www.data.gov.uk or data.gov.uk
-
-            if not url.startswith('/dataset/') and not url.startswith('/publisher/'):
+            #url = _normalize_url('http:/' + loc) # strips off domain e.g. www.data.gov.uk or data.gov.uk
+            url = loc
+	    #print url
+            if not url.startswith('/dataset/') and not url.startswith('/organization/'):
                 # filter out strays like:
                 # /data/user/login?came_from=http://data.gov.uk/dataset/os-code-point-open
                 # /403.html?page=/about&from=http://data.gov.uk/publisher/planning-inspectorate
@@ -226,25 +253,78 @@
             data[key] = data.get(key,0) + result[1]
         return data
 
+    def _get_json(self, params, prev_fail=False):
+        ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', ''))
+        if not ga_token_filepath:
+            print 'ERROR: In the CKAN config you need to specify the filepath of the ' \
+                'Google Analytics token file under key: googleanalytics.token.filepath'
+            return
+
+        log.info("Trying to refresh our OAuth token")
+        try:
+            from ga_auth import init_service
+            self.token, svc = init_service(ga_token_filepath, None)
+            log.info("OAuth token refreshed")
+        except Exception, auth_exception:
+            log.error("Oauth refresh failed")
+            log.exception(auth_exception)
+            return
+
+        try:
+            headers = {'authorization': 'Bearer ' + self.token}
+            r = requests.get("https://www.googleapis.com/analytics/v3/data/ga", params=params, headers=headers)
+            if r.status_code != 200:
+                log.info("STATUS: %s" % (r.status_code,))
+                log.info("CONTENT: %s" % (r.content,))
+                raise Exception("Request with params: %s failed" % params)
+
+            return json.loads(r.content)
+        except Exception, e:
+              log.exception(e)
+
+        return dict(url=[])
+
     def _totals_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Fetches distinct totals, total pageviews etc """
-        results = self.service.data().ga().get(
-                                 ids='ga:' + self.profile_id,
-                                 start_date=start_date,
-                                 metrics='ga:pageviews',
-                                 sort='-ga:pageviews',
-                                 max_results=10000,
-                                 end_date=end_date).execute()
+        try:
+            args = {}
+            args["max-results"] = 100000
+            args["start-date"] = start_date
+            args["end-date"] = end_date
+            args["ids"] = "ga:" + self.profile_id
+
+            args["metrics"] = "ga:pageviews"
+            args["sort"] = "-ga:pageviews"
+            args["alt"] = "json"
+
+            results = self._get_json(args)
+        except Exception, e:
+            log.exception(e)
+            results = dict(url=[])
+
         result_data = results.get('rows')
         ga_model.update_sitewide_stats(period_name, "Totals", {'Total page views': result_data[0][0]},
             period_complete_day)
 
-        results = self.service.data().ga().get(
-                                 ids='ga:' + self.profile_id,
-                                 start_date=start_date,
-                                 metrics='ga:pageviewsPerVisit,ga:avgTimeOnSite,ga:percentNewVisits,ga:visits',
-                                 max_results=10000,
-                                 end_date=end_date).execute()
+        try:
+            # Because of issues of invalid responses, we are going to make these requests
+            # ourselves.
+            headers = {'authorization': 'Bearer ' + self.token}
+
+            args = {}
+            args["max-results"] = 100000
+            args["start-date"] = start_date
+            args["end-date"] = end_date
+            args["ids"] = "ga:" + self.profile_id
+
+            args["metrics"] = "ga:pageviewsPerVisit,ga:avgTimeOnSite,ga:percentNewVisits,ga:visits"
+            args["alt"] = "json"
+
+            results = self._get_json(args)
+        except Exception, e:
+            log.exception(e)
+            results = dict(url=[])
+
         result_data = results.get('rows')
         data = {
             'Pages per visit': result_data[0][0],
@@ -255,16 +335,29 @@
         ga_model.update_sitewide_stats(period_name, "Totals", data, period_complete_day)
 
         # Bounces from / or another configurable page.
-        path = '/%s%s' % (config.get('googleanalytics.account'),
-                          config.get('ga-report.bounce_url', '/'))
-        results = self.service.data().ga().get(
-                                 ids='ga:' + self.profile_id,
-                                 filters='ga:pagePath==%s' % (path,),
-                                 start_date=start_date,
-                                 metrics='ga:visitBounceRate',
-                                 dimensions='ga:pagePath',
-                                 max_results=10000,
-                                 end_date=end_date).execute()
+        path = '/' #% (config.get('googleanalytics.account'),                          config.get('ga-report.bounce_url', '/'))
+
+        try:
+            # Because of issues of invalid responses, we are going to make these requests
+            # ourselves.
+            headers = {'authorization': 'Bearer ' + self.token}
+
+            args = {}
+            args["max-results"] = 100000
+            args["start-date"] = start_date
+            args["end-date"] = end_date
+            args["ids"] = "ga:" + self.profile_id
+
+            args["filters"] = 'ga:pagePath==%s' % (path,)
+            args["dimensions"] = 'ga:pagePath'
+            args["metrics"] = "ga:visitBounceRate"
+            args["alt"] = "json"
+
+            results = self._get_json(args)
+        except Exception, e:
+            log.exception(e)
+            results = dict(url=[])
+
         result_data = results.get('rows')
         if not result_data or len(result_data) != 1:
             log.error('Could not pinpoint the bounces for path: %s. Got results: %r',
@@ -280,14 +373,28 @@
 
     def _locale_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Fetches stats about language and country """
-        results = self.service.data().ga().get(
-                                 ids='ga:' + self.profile_id,
-                                 start_date=start_date,
-                                 metrics='ga:pageviews',
-                                 sort='-ga:pageviews',
-                                 dimensions="ga:language,ga:country",
-                                 max_results=10000,
-                                 end_date=end_date).execute()
+
+        try:
+            # Because of issues of invalid responses, we are going to make these requests
+            # ourselves.
+            headers = {'authorization': 'Bearer ' + self.token}
+
+            args = {}
+            args["max-results"] = 100000
+            args["start-date"] = start_date
+            args["end-date"] = end_date
+            args["ids"] = "ga:" + self.profile_id
+
+            args["dimensions"] = "ga:language,ga:country"
+            args["metrics"] = "ga:pageviews"
+            args["sort"] = "-ga:pageviews"
+            args["alt"] = "json"
+
+            results = self._get_json(args)
+        except Exception, e:
+            log.exception(e)
+            results = dict(url=[])
+
         result_data = results.get('rows')
         data = {}
         for result in result_data:
@@ -308,15 +415,27 @@
 
         data = {}
 
-        results = self.service.data().ga().get(
-                                 ids='ga:' + self.profile_id,
-                                 start_date=start_date,
-                                 filters='ga:eventAction==download',
-                                 metrics='ga:totalEvents',
-                                 sort='-ga:totalEvents',
-                                 dimensions="ga:eventLabel",
-                                 max_results=10000,
-                                 end_date=end_date).execute()
+        try:
+            # Because of issues of invalid responses, we are going to make these requests
+            # ourselves.
+            headers = {'authorization': 'Bearer ' + self.token}
+
+            args = {}
+            args["max-results"] = 100000
+            args["start-date"] = start_date
+            args["end-date"] = end_date
+            args["ids"] = "ga:" + self.profile_id
+
+            args["filters"] = 'ga:eventAction==Download'
+            args["dimensions"] = "ga:eventLabel"
+            args["metrics"] = "ga:totalEvents"
+            args["alt"] = "json"
+
+            results = self._get_json(args)
+        except Exception, e:
+            log.exception(e)
+            results = dict(url=[])
+
         result_data = results.get('rows')
         if not result_data:
             # We may not have data for this time period, so we need to bail
@@ -333,7 +452,7 @@
                 if progress_count % 100 == 0:
                     log.debug('.. %d/%d done so far', progress_count, progress_total)
 
-                url = result[0].strip()
+                url = urllib.unquote(result[0].strip())
 
                 # Get package id associated with the resource that has this URL.
                 q = model.Session.query(model.Resource)
@@ -341,8 +460,15 @@
                     r = q.filter(model.Resource.cache_url.like("%s%%" % url)).first()
                 else:
                     r = q.filter(model.Resource.url.like("%s%%" % url)).first()
+		
+		# new style internal download links
+		if re.search('(?:/resource/)(.*)(?:/download/)',url):
+		    resource_id = re.search('(?:/resource/)(.*)(?:/download/)',url)
+                    r = q.filter(model.Resource.id.like("%s%%" % resource_id.group(1))).first()
 
                 package_name = r.resource_group.package.name if r else ""
+
+
                 if package_name:
                     data[package_name] = data.get(package_name, 0) + int(result[1])
                 else:
@@ -355,31 +481,52 @@
         log.info('Associating downloads of resource URLs with their respective datasets')
         process_result_data(results.get('rows'))
 
-        results = self.service.data().ga().get(
-                                 ids='ga:' + self.profile_id,
-                                 start_date=start_date,
-                                 filters='ga:eventAction==download-cache',
-                                 metrics='ga:totalEvents',
-                                 sort='-ga:totalEvents',
-                                 dimensions="ga:eventLabel",
-                                 max_results=10000,
-                                 end_date=end_date).execute()
+        '''try:
+            # Because of issues of invalid responses, we are going to make these requests
+            # ourselves.
+            headers = {'authorization': 'Bearer ' + self.token}
+
+            args = dict( ids='ga:' + self.profile_id,
+                         filters='ga:eventAction==download-cache',
+                         metrics='ga:totalEvents',
+                         sort='-ga:totalEvents',
+                         dimensions="ga:eventLabel",
+                         max_results=10000)
+            args['start-date'] = start_date
+            args['end-date'] = end_date
+
+            results = self._get_json(args)
+        except Exception, e:
+            log.exception(e)
+            results = dict(url=[])
+
         log.info('Associating downloads of cache resource URLs with their respective datasets')
-        process_result_data(results.get('rows'), cached=False)
+        process_result_data(results.get('rows'), cached=False)'''
 
         self._filter_out_long_tail(data, MIN_DOWNLOADS)
         ga_model.update_sitewide_stats(period_name, "Downloads", data, period_complete_day)
 
     def _social_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Finds out which social sites people are referred from """
-        results = self.service.data().ga().get(
-                                 ids='ga:' + self.profile_id,
-                                 start_date=start_date,
-                                 metrics='ga:pageviews',
-                                 sort='-ga:pageviews',
-                                 dimensions="ga:socialNetwork,ga:referralPath",
-                                 max_results=10000,
-                                 end_date=end_date).execute()
+
+        try:
+            # Because of issues of invalid responses, we are going to make these requests
+            # ourselves.
+            headers = {'authorization': 'Bearer ' + self.token}
+
+            args = dict( ids='ga:' + self.profile_id,
+                         metrics='ga:pageviews',
+                         sort='-ga:pageviews',
+                         dimensions="ga:socialNetwork,ga:referralPath",
+                         max_results=10000)
+            args['start-date'] = start_date
+            args['end-date'] = end_date
+
+            results = self._get_json(args)
+        except Exception, e:
+            log.exception(e)
+            results = dict(url=[])
+
         result_data = results.get('rows')
         data = {}
         for result in result_data:
@@ -391,14 +538,24 @@
 
     def _os_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Operating system stats """
-        results = self.service.data().ga().get(
-                                 ids='ga:' + self.profile_id,
-                                 start_date=start_date,
-                                 metrics='ga:pageviews',
-                                 sort='-ga:pageviews',
-                                 dimensions="ga:operatingSystem,ga:operatingSystemVersion",
-                                 max_results=10000,
-                                 end_date=end_date).execute()
+        try:
+            # Because of issues of invalid responses, we are going to make these requests
+            # ourselves.
+            headers = {'authorization': 'Bearer ' + self.token}
+
+            args = dict( ids='ga:' + self.profile_id,
+                         metrics='ga:pageviews',
+                         sort='-ga:pageviews',
+                         dimensions="ga:operatingSystem,ga:operatingSystemVersion",
+                         max_results=10000)
+            args['start-date'] = start_date
+            args['end-date'] = end_date
+
+            results = self._get_json(args)
+        except Exception, e:
+            log.exception(e)
+            results = dict(url=[])
+
         result_data = results.get('rows')
         data = {}
         for result in result_data:
@@ -416,14 +573,27 @@
 
     def _browser_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Information about browsers and browser versions """
-        results = self.service.data().ga().get(
-                                 ids='ga:' + self.profile_id,
-                                 start_date=start_date,
-                                 metrics='ga:pageviews',
-                                 sort='-ga:pageviews',
-                                 dimensions="ga:browser,ga:browserVersion",
-                                 max_results=10000,
-                                 end_date=end_date).execute()
+
+        try:
+            # Because of issues of invalid responses, we are going to make these requests
+            # ourselves.
+            headers = {'authorization': 'Bearer ' + self.token}
+
+            args = dict( ids='ga:' + self.profile_id,
+                         metrics='ga:pageviews',
+                         sort='-ga:pageviews',
+                         dimensions="ga:browser,ga:browserVersion",
+                         max_results=10000)
+
+            args['start-date'] = start_date
+            args['end-date'] = end_date
+
+            results = self._get_json(args)
+        except Exception, e:
+            log.exception(e)
+            results = dict(url=[])
+
+
         result_data = results.get('rows')
         # e.g. [u'Firefox', u'19.0', u'20']
 
@@ -465,14 +635,24 @@
     def _mobile_stats(self, start_date, end_date, period_name, period_complete_day):
         """ Info about mobile devices """
 
-        results = self.service.data().ga().get(
-                                 ids='ga:' + self.profile_id,
-                                 start_date=start_date,
-                                 metrics='ga:pageviews',
-                                 sort='-ga:pageviews',
-                                 dimensions="ga:mobileDeviceBranding, ga:mobileDeviceInfo",
-                                 max_results=10000,
-                                 end_date=end_date).execute()
+        try:
+            # Because of issues of invalid responses, we are going to make these requests
+            # ourselves.
+            headers = {'authorization': 'Bearer ' + self.token}
+
+            args = dict( ids='ga:' + self.profile_id,
+                         metrics='ga:pageviews',
+                         sort='-ga:pageviews',
+                         dimensions="ga:mobileDeviceBranding, ga:mobileDeviceInfo",
+                         max_results=10000)
+            args['start-date'] = start_date
+            args['end-date'] = end_date
+
+            results = self._get_json(args)
+        except Exception, e:
+            log.exception(e)
+            results = dict(url=[])
+
 
         result_data = results.get('rows')
         data = {}

--- a/ckanext/ga_report/ga_auth.py
+++ b/ckanext/ga_report/ga_auth.py
@@ -36,7 +36,7 @@
     credentials = _prepare_credentials(token_file, credentials_file)
     http = credentials.authorize(http)  # authorize the http object
 
-    return build('analytics', 'v3', http=http)
+    return credentials.access_token, build('analytics', 'v3', http=http)
 
 
 def get_profile_id(service):

--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -114,7 +114,7 @@
     >>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices')
     '/dataset/weekly_fuel_prices'
     '''
-    return '/' + '/'.join(url.split('/')[3:])
+    return url #'/' + '/'.join(url.split('/')[3:])
 
 
 def _get_package_and_publisher(url):
@@ -125,12 +125,12 @@
         dataset_ref = dataset_match.groups()[0]
         dataset = model.Package.get(dataset_ref)
         if dataset:
-            publisher_groups = dataset.get_groups('publisher')
+            publisher_groups = dataset.get_groups('organization')
             if publisher_groups:
                 return dataset_ref,publisher_groups[0].name
         return dataset_ref, None
     else:
-        publisher_match = re.match('/publisher/([^/]+)(/.*)?', url)
+        publisher_match = re.match('/organization/([^/]+)(/.*)?', url)
         if publisher_match:
             return None, publisher_match.groups()[0]
     return None, None
@@ -323,11 +323,11 @@
     """
     toplevel = get_top_level()
     publishers = model.Session.query(model.Group).\
-        filter(model.Group.type=='publisher').\
+        filter(model.Group.type=='organization').\
         filter(model.Group.state=='active').all()
     for publisher in publishers:
         views, visits, subpub = update_publisher(period_name, publisher, publisher.name)
-        parent, parents = '', publisher.get_groups('publisher')
+        parent, parents = '', publisher.get_parent_groups(type='organization')
         if parents:
             parent = parents[0].name
         item = model.Session.query(GA_Publisher).\
@@ -377,15 +377,12 @@
                      model.Member.table_name == 'group' and \
                      model.Member.state == 'active').\
            filter(model.Member.id==None).\
-           filter(model.Group.type=='publisher').\
+           filter(model.Group.type=='organization').\
            order_by(model.Group.name).all()
 
 def get_children(publisher):
-    '''Finds child publishers for the given publisher (object). (Not recursive)'''
-    from ckan.model.group import HIERARCHY_CTE
-    return model.Session.query(model.Group).\
-           from_statement(HIERARCHY_CTE).params(id=publisher.id, type='publisher').\
-           all()
+    '''Finds child publishers for the given publisher (object). (Not recursive i.e. returns one level)'''
+    return publisher.get_children_groups(type='organization')
 
 def go_down_tree(publisher):
     '''Provided with a publisher object, it walks down the hierarchy and yields each publisher,

--- a/ckanext/ga_report/helpers.py
+++ b/ckanext/ga_report/helpers.py
@@ -71,7 +71,7 @@
 def single_popular_dataset_html(top=20):
     dataset_dict = single_popular_dataset(top)
     groups = package.get('groups', [])
-    publishers = [ g for g in groups if g.get('type') == 'publisher' ]
+    publishers = [ g for g in groups if g.get('type') == 'organization' ]
     publisher = publishers[0] if publishers else {'name':'', 'title': ''}
     context = {
         'dataset': dataset_dict,
@@ -107,12 +107,22 @@
     for entry in entries:
         if len(datasets) < count:
             p = model.Package.get(entry.url[len('/dataset/'):])
+
             if not p:
                 _log.warning("Could not find Package for {url}".format(url=entry.url))
                 continue
 
+            if not p.state == 'active':
+                _log.warning("Package {0} is not active, it is {1}".format(p.name, p.state))
+                continue
+
+            if not p.private == False:
+                _log.warning("Package {0} is private {1}".format(p.name, p.state))
+                continue
+
             if not p in datasets:
                 datasets[p] = {'views':0, 'visits': 0}
+
             datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews)
             datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visits)
 

--- a/ckanext/ga_report/plugin.py
+++ b/ckanext/ga_report/plugin.py
@@ -35,49 +35,49 @@
     def after_map(self, map):
         # GaReport
         map.connect(
-            '/data/site-usage',
+            '/site-usage',