Fixed the publishr information (to enable leaderboard) to show the number
Fixed the publishr information (to enable leaderboard) to show the number
of children publishers and the total for all sub-publishers

file:a/README.rst -> file:b/README.rst
--- a/README.rst
+++ b/README.rst
@@ -31,11 +31,10 @@
 2. Ensure you development.ini (or similar) contains the info about your Google Analytics account and configuration::
 
       googleanalytics.id = UA-1010101-1
-      googleanalytics.username = googleaccount@gmail.com
-      googleanalytics.password = googlepassword
+      googleanalytics.account = Account name (i.e. data.gov.uk, see top level item at https://www.google.com/analytics)
       ga-report.period = monthly
 
-   Note that your password will be readable by system administrators on your server. Rather than use sensitive account details, it is suggested you give access to the GA account to a new Google account that you create just for this purpose.
+   Note that your credentials will be readable by system administrators on your server. Rather than use sensitive account details, it is suggested you give access to the GA account to a new Google account that you create just for this purpose.
 
 3. Set up this extension's database tables using a paster command. (Ensure your CKAN pyenv is still activated, run the command from ``src/ckanext-ga-report``, alter the ``--config`` option to point to your site config file)::
 

--- a/ckanext/ga_report/command.py
+++ b/ckanext/ga_report/command.py
@@ -1,7 +1,10 @@
 import logging
+import datetime
 
 from ckan.lib.cli import CkanCommand
-# No other CKAN imports allowed until _load_config is run, or logging is disabled
+# No other CKAN imports allowed until _load_config is run,
+# or logging is disabled
+
 
 class InitDB(CkanCommand):
     """Initialise the extension's database tables
@@ -26,6 +29,12 @@
 
 class GetAuthToken(CkanCommand):
     """ Get's the Google auth token
+
+    Usage: paster getauthtoken <credentials_file>
+
+    Where <credentials_file> is the file name containing the details
+    for the service (obtained from https://code.google.com/apis/console).
+    By default this is set to credentials.json
     """
     summary = __doc__.split('\n')[0]
     usage = __doc__
@@ -33,10 +42,17 @@
     min_args = 0
 
     def command(self):
-        from ga_auth import initialize_service
-        initialize_service('token.dat',
-                           self.args[0] if self.args
-                                        else 'credentials.json')
+        """
+        In this case we don't want a valid service, but rather just to
+        force the user through the auth flow. We allow this to complete to
+        act as a form of verification instead of just getting the token and
+        assuming it is correct.
+        """
+        from ga_auth import init_service
+        init_service('token.dat',
+                      self.args[0] if self.args
+                                   else 'credentials.json')
+
 
 class LoadAnalytics(CkanCommand):
     """Get data from Google Analytics API and save it
@@ -61,18 +77,20 @@
     def command(self):
         self._load_config()
 
-        from ga_auth import initialize_service
+        from download_analytics import DownloadAnalytics
+        from ga_auth import (init_service, get_profile_id)
+
         try:
-            svc = initialize_service(self.args[0], None)
+            svc = init_service(self.args[0], None)
         except TypeError:
-            print 'Have you correctly run the getauthtoken task and specified the correct file here'
+            print ('Have you correctly run the getauthtoken task and '
+                   'specified the correct file here')
             return
 
-        from download_analytics import DownloadAnalytics
-        from ga_auth import get_profile_id
         downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc))
 
-        time_period = self.args[1] if self.args and len(self.args) > 1 else 'latest'
+        time_period = self.args[1] if self.args and len(self.args) > 1 \
+            else 'latest'
         if time_period == 'all':
             downloader.all_()
         elif time_period == 'latest':
@@ -81,4 +99,3 @@
             since_date = datetime.datetime.strptime(time_period, '%Y-%m-%d')
             downloader.since_date(since_date)
 
-

--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -1,3 +1,4 @@
+import os
 import logging
 import datetime
 
@@ -84,22 +85,27 @@
                      self.get_full_period_name(period_name, period_complete_day),
                      start_date.strftime('%Y %m %d'),
                      end_date.strftime('%Y %m %d'))
-            data = self.download(start_date, end_date)
-            log.info('Storing Analytics for period "%s"',
+            """
+            data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+')
+            log.info('Storing Dataset Analytics for period "%s"',
                      self.get_full_period_name(period_name, period_complete_day))
-            self.store(period_name, period_complete_day, data)
-
-
-    def download(self, start_date, end_date):
+            self.store(period_name, period_complete_day, data, )
+
+            data = self.download(start_date, end_date, '~/publisher/[a-z0-9-_]+')
+            log.info('Storing Publisher Analytics for period "%s"',
+                     self.get_full_period_name(period_name, period_complete_day))
+            self.store(period_name, period_complete_day, data,)
+            """
+            ga_model.update_publisher_stats(period_name) # about 30 seconds.
+            self.sitewide_stats( period_name )
+
+
+    def download(self, start_date, end_date, path='~/dataset/[a-z0-9-_]+'):
         '''Get data from GA for a given time period'''
         start_date = start_date.strftime('%Y-%m-%d')
         end_date = end_date.strftime('%Y-%m-%d')
-        # url
-        #query = 'ga:pagePath=~^%s,ga:pagePath=~^%s' % \
-        #        (PACKAGE_URL, self.resource_url_tag)
-        query = 'ga:pagePath=~^/dataset/'
-        #query = 'ga:pagePath=~^/User/'
-        metrics = 'ga:uniquePageviews'
+        query = 'ga:pagePath=%s$' % path
+        metrics = 'ga:uniquePageviews, ga:visitors'
         sort = '-ga:uniquePageviews'
 
         # Supported query params at
@@ -110,35 +116,184 @@
                                  start_date=start_date,
                                  metrics=metrics,
                                  sort=sort,
-                                 end_date=end_date).execute()
-        self.print_results(results)
-
-#        for entry in GA.ga_query(query_filter=query,
-#                                 from_date=start_date,
-#                                 metrics=metrics,
-#                                 sort=sort,
-#                                 to_date=end_date):
-#            print entry, type(entry)
-#            import pdb; pdb.set_trace()
-#            for dim in entry.dimension:
-#                if dim.name == "ga:pagePath":
-#                    package = dim.value
-#                    count = entry.get_metric(
-#                        'ga:uniquePageviews').value or 0
-#                    packages[package] = int(count)
-        return []
-
-    def print_results(self, results):
-        import pprint
-        pprint.pprint(results)
-        if results:
-            print 'Profile: %s' % results.get('profileInfo').get('profileName')
+                                 dimensions="ga:pagePath",
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+
+        if os.getenv('DEBUG'):
+            import pprint
+            pprint.pprint(results)
             print 'Total results: %s' % results.get('totalResults')
-            print 'Total Visits: %s' % results.get('rows', [[-1]])[0][0]
-        else:
-            print 'No results found'
+
+        packages = []
+        for entry in results.get('rows'):
+            (loc,pageviews,visits) = entry
+            packages.append( ('http:/' + loc, pageviews, visits,) ) # Temporary hack
+        return dict(url=packages)
 
     def store(self, period_name, period_complete_day, data):
         if 'url' in data:
             ga_model.update_url_stats(period_name, period_complete_day, data['url'])
 
+    def sitewide_stats(self, period_name):
+        import calendar
+        year, month = period_name.split('-')
+        _, last_day_of_month = calendar.monthrange(int(year), int(month))
+
+        start_date = '%s-01' % period_name
+        end_date = '%s-%s' % (period_name, last_day_of_month)
+        print 'Sitewide_stats for %s (%s -> %s)' % (period_name, start_date, end_date)
+
+        funcs = ['_totals_stats', '_social_stats', '_os_stats',
+                 '_locale_stats', '_browser_stats', '_mobile_stats']
+        for f in funcs:
+            print ' + Fetching %s stats' % f.split('_')[1]
+            getattr(self, f)(start_date, end_date, period_name)
+
+    def _get_results(result_data, f):
+        data = {}
+        for result in result_data:
+            key = f(result)
+            data[key] = data.get(key,0) + result[1]
+        return data
+
+    def _totals_stats(self, start_date, end_date, period_name):
+        """ Fetches distinct totals, total pageviews etc """
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 start_date=start_date,
+                                 metrics='ga:uniquePageviews',
+                                 sort='-ga:uniquePageviews',
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+        result_data = results.get('rows')
+        ga_model.update_sitewide_stats(period_name, "Totals", {'Total pageviews': result_data[0][0]})
+
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 start_date=start_date,
+                                 metrics='ga:pageviewsPerVisit,ga:bounces,ga:avgTimeOnSite,ga:percentNewVisits',
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+        result_data = results.get('rows')
+        data = {
+            'Pages per visit': result_data[0][0],
+            'Bounces': result_data[0][1],
+            'Average time on site': result_data[0][2],
+            'Percent new visits': result_data[0][3],
+        }
+        ga_model.update_sitewide_stats(period_name, "Totals", data)
+
+
+    def _locale_stats(self, start_date, end_date, period_name):
+        """ Fetches stats about language and country """
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 start_date=start_date,
+                                 metrics='ga:uniquePageviews',
+                                 sort='-ga:uniquePageviews',
+                                 dimensions="ga:language,ga:country",
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+        result_data = results.get('rows')
+        data = {}
+        for result in result_data:
+            data[result[0]] = data.get(result[0], 0) + int(result[2])
+        ga_model.update_sitewide_stats(period_name, "Languages", data)
+
+        data = {}
+        for result in result_data:
+            data[result[1]] = data.get(result[1], 0) + int(result[2])
+        ga_model.update_sitewide_stats(period_name, "Country", data)
+
+
+    def _social_stats(self, start_date, end_date, period_name):
+        """ Finds out which social sites people are referred from """
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 start_date=start_date,
+                                 metrics='ga:uniquePageviews',
+                                 sort='-ga:uniquePageviews',
+                                 dimensions="ga:socialNetwork,ga:referralPath",
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+        result_data = results.get('rows')
+        twitter_links = []
+        data = {}
+        for result in result_data:
+            if not result[0] == '(not set)':
+                data[result[0]] = data.get(result[0], 0) + int(result[2])
+                if result[0] == 'Twitter':
+                    twitter_links.append(result[1])
+        ga_model.update_sitewide_stats(period_name, "Social sources", data)
+
+
+    def _os_stats(self, start_date, end_date, period_name):
+        """ Operating system stats """
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 start_date=start_date,
+                                 metrics='ga:uniquePageviews',
+                                 sort='-ga:uniquePageviews',
+                                 dimensions="ga:operatingSystem,ga:operatingSystemVersion",
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+        result_data = results.get('rows')
+        data = {}
+        for result in result_data:
+            data[result[0]] = data.get(result[0], 0) + int(result[2])
+        ga_model.update_sitewide_stats(period_name, "Operating Systems", data)
+
+        data = {}
+        for result in result_data:
+            key = "%s (%s)" % (result[0],result[1])
+            data[key] = result[2]
+        ga_model.update_sitewide_stats(period_name, "Operating Systems versions", data)
+
+
+    def _browser_stats(self, start_date, end_date, period_name):
+        """ Information about browsers and browser versions """
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 start_date=start_date,
+                                 metrics='ga:uniquePageviews',
+                                 sort='-ga:uniquePageviews',
+                                 dimensions="ga:browser,ga:browserVersion",
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+        result_data = results.get('rows')
+        data = {}
+        for result in result_data:
+            data[result[0]] = data.get(result[0], 0) + int(result[2])
+        ga_model.update_sitewide_stats(period_name, "Browsers", data)
+
+        data = {}
+        for result in result_data:
+            key = "%s (%s)" % (result[0], result[1])
+            data[key] = result[2]
+        ga_model.update_sitewide_stats(period_name, "Browser versions", data)
+
+
+    def _mobile_stats(self, start_date, end_date, period_name):
+        """ Info about mobile devices """
+
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 start_date=start_date,
+                                 metrics='ga:uniquePageviews',
+                                 sort='-ga:uniquePageviews',
+                                 dimensions="ga:mobileDeviceBranding, ga:mobileDeviceInfo",
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+
+        result_data = results.get('rows')
+        data = {}
+        for result in result_data:
+            data[result[0]] = data.get(result[0], 0) + int(result[2])
+        ga_model.update_sitewide_stats(period_name, "Mobile brands", data)
+
+        data = {}
+        for result in result_data:
+            data[result[1]] = data.get(result[1], 0) + int(result[2])
+        ga_model.update_sitewide_stats(period_name, "Mobile devices", data)
+

--- a/ckanext/ga_report/ga_auth.py
+++ b/ckanext/ga_report/ga_auth.py
@@ -1,3 +1,4 @@
+import os
 import httplib2
 from apiclient.discovery import build
 from oauth2client.client import flow_from_clientsecrets
@@ -7,8 +8,12 @@
 from pylons import config
 
 
-def _prepare_credentials( token_filename, credentials_filename ):
-    storage = Storage( token_filename )
+def _prepare_credentials(token_filename, credentials_filename):
+    """
+    Either returns the user's oauth credentials or uses the credentials
+    file to generate a token (by forcing the user to login in the browser)
+    """
+    storage = Storage(token_filename)
     credentials = storage.get()
 
     if credentials is None or credentials.invalid:
@@ -19,7 +24,13 @@
 
     return credentials
 
-def initialize_service( token_file, credentials_file ):
+
+def init_service(token_file, credentials_file):
+    """
+    Given a file containing the user's oauth token (and another with
+    credentials in case we need to generate the token) will return a
+    service object representing the analytics API.
+    """
     http = httplib2.Http()
 
     credentials = _prepare_credentials(token_file, credentials_file)
@@ -27,19 +38,33 @@
 
     return build('analytics', 'v3', http=http)
 
+
 def get_profile_id(service):
-    # Get a list of all Google Analytics accounts for this user
+    """
+    Get the profile ID for this user and the service specified by the
+    'googleanalytics.id' configuration option. This function iterates
+    over all of the accounts available to the user who invoked the
+    service to find one where the account name matches (in case the
+    user has several).
+    """
     accounts = service.management().accounts().list().execute()
 
-    if accounts.get('items'):
-        firstAccountId = accounts.get('items')[0].get('id')
-        webPropertyId = config.get('googleanalytics.id')
-        profiles = service.management().profiles().list(
-                    accountId=firstAccountId,
-                    webPropertyId=webPropertyId).execute()
+    if not accounts.get('items'):
+        return None
 
-        if profiles.get('items'):
-            # return the first Profile ID
-            return profiles.get('items')[0].get('id')
+    accountName = config.get('googleanalytics.account')
+    webPropertyId = config.get('googleanalytics.id')
+    for acc in accounts.get('items'):
+        if acc.get('name') == accountName:
+            accountId = acc.get('id')
+
+    webproperties = service.management().webproperties().list(accountId=accountId).execute()
+
+    profiles = service.management().profiles().list(
+        accountId=accountId, webPropertyId=webPropertyId).execute()
+
+    if profiles.get('items'):
+        return profiles.get('items')[0].get('id')
 
     return None
+

--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -3,30 +3,80 @@
 
 from sqlalchemy import Table, Column, MetaData
 from sqlalchemy import types
-from sqlalchemy.sql import select, text
+from sqlalchemy.sql import select
+from sqlalchemy.orm import mapper
 from sqlalchemy import func
 
 import ckan.model as model
-from ckan.model.types import JsonType
 from ckan.lib.base import *
 
 def make_uuid():
     return unicode(uuid.uuid4())
 
+
+
+class GA_Url(object):
+
+    def __init__(self, **kwargs):
+        for k,v in kwargs.items():
+            setattr(self, k, v)
+
+class GA_Stat(object):
+
+    def __init__(self, **kwargs):
+        for k,v in kwargs.items():
+            setattr(self, k, v)
+
+class GA_Publisher(object):
+
+    def __init__(self, **kwargs):
+        for k,v in kwargs.items():
+            setattr(self, k, v)
+
+
+metadata = MetaData()
+url_table = Table('ga_url', metadata,
+                      Column('id', types.UnicodeText, primary_key=True,
+                             default=make_uuid),
+                      Column('period_name', types.UnicodeText),
+                      Column('period_complete_day', types.Integer),
+                      Column('pageviews', types.UnicodeText),
+                      Column('visitors', types.UnicodeText),
+                      Column('url', types.UnicodeText),
+                      Column('department_id', types.UnicodeText),
+                )
+mapper(GA_Url, url_table)
+
+stat_table = Table('ga_stat', metadata,
+                  Column('id', types.UnicodeText, primary_key=True,
+                         default=make_uuid),
+                  Column('period_name', types.UnicodeText),
+                  Column('stat_name', types.UnicodeText),
+                  Column('key', types.UnicodeText),
+                  Column('value', types.UnicodeText), )
+mapper(GA_Stat, stat_table)
+
+
+pub_table = Table('ga_publisher', metadata,
+                  Column('id', types.UnicodeText, primary_key=True,
+                         default=make_uuid),
+                  Column('period_name', types.UnicodeText),
+                  Column('publisher_name', types.UnicodeText),
+                  Column('views', types.UnicodeText),
+                  Column('visitors', types.UnicodeText),
+                  Column('toplevel', types.Boolean, default=False),
+                  Column('subpublishercount', types.Integer, default=0),
+                  Column('parent', types.UnicodeText),
+)
+mapper(GA_Publisher, pub_table)
+
+
 def init_tables():
-    metadata = MetaData()
-    package_stats = Table('ga_url', metadata,
-                          Column('id', types.UnicodeText, primary_key=True, default=make_uuid),
-                          Column('period_name', types.UnicodeText),
-                          Column('period_complete_day', types.Integer),
-                          Column('visits', types.Integer),
-                          Column('group_id', types.String(60)),
-                          Column('next_page', JsonType),
-                          )
     metadata.create_all(model.meta.engine)
 
 
 cached_tables = {}
+
 
 def get_table(name):
     if name not in cached_tables:
@@ -45,6 +95,7 @@
     '''
     url = re.sub('https?://(www\.)?data.gov.uk', '', url)
     return url
+
 
 def _get_department_id_of_url(url):
     # e.g. /dataset/fuel_prices
@@ -56,36 +107,144 @@
         if dataset:
             publisher_groups = dataset.get_groups('publisher')
             if publisher_groups:
-                return publisher_groups[0].id
+                return publisher_groups[0].name
+    else:
+        publisher_match = re.match('/publisher/([^/]+)(/.*)?', url)
+        if publisher_match:
+            return publisher_match.groups()[0]
+
+
+def update_sitewide_stats(period_name, stat_name, data):
+    for k,v in data.iteritems():
+        item = model.Session.query(GA_Stat).\
+            filter(GA_Stat.period_name==period_name).\
+            filter(GA_Stat.key==k).\
+            filter(GA_Stat.stat_name==stat_name).first()
+        if item:
+            item.period_name = period_name
+            item.key = k
+            item.value = v
+            model.Session.add(item)
+        else:
+            # create the row
+            values = {'id': make_uuid(),
+                     'period_name': period_name,
+                     'key': k,
+                     'value': v,
+                     'stat_name': stat_name
+                     }
+            model.Session.add(GA_Stat(**values))
+        model.Session.commit()
+
+
 
 def update_url_stats(period_name, period_complete_day, url_data):
-    table = get_table('ga_url')
-    connection = model.Session.connection()
-    for url, views, next_page in url_data:
+    for url, views, visitors in url_data:
         url = _normalize_url(url)
         department_id = _get_department_id_of_url(url)
+
         # see if the row for this url & month is in the table already
-        s = select([func.count(id_col)],
-                   table.c.period_name == period_name,
-                   table.c.url == url)
-        count = connection.execute(s).fetchone()
-        if count and count[0]:
-            # update the row
-            connection.execute(table.update()\
-                .where(table.c.period_name == period_name,
-                       table.c.url == url)\
-                .values(period_complete_day=period_complete_day,
-                        views=views,
-                        department_id=department_id,
-                        next_page=next_page))
+        item = model.Session.query(GA_Url).\
+            filter(GA_Url.period_name==period_name).\
+            filter(GA_Url.url==url).first()
+        if item:
+            item.period_name = period_name
+            item.pageviews = views
+            item.visitors = visitors
+            item.department_id = department_id
+            model.Session.add(item)
         else:
             # create the row
-            values = {'period_name': period_name,
+            values = {'id': make_uuid(),
+                      'period_name': period_name,
                       'period_complete_day': period_complete_day,
                       'url': url,
-                      'views': views,
-                      'department_id': department_id,
-                      'next_page': next_page}
-            connection.execute(stats.insert()\
-                               .values(**values))
-
+                      'pageviews': views,
+                      'visitors': visitors,
+                      'department_id': department_id
+                     }
+            model.Session.add(GA_Url(**values))
+        model.Session.commit()
+
+
+
+def update_publisher_stats(period_name):
+    """
+    Updates the publisher stats from the data retrieved for /dataset/*
+    and /publisher/*. Will run against each dataset and generates the
+    totals for the entire tree beneath each publisher.
+    """
+    toplevel = get_top_level()
+    publishers = model.Session.query(model.Group).\
+        filter(model.Group.type=='publisher').\
+        filter(model.Group.state=='active').all()
+    for publisher in publishers:
+        views, visitors, subpub = update_publisher(period_name, publisher, publisher.name)
+        parent, parents = '', publisher.get_groups('publisher')
+        if parents:
+            parent = parents[0].name
+        item = model.Session.query(GA_Publisher).\
+            filter(GA_Publisher.period_name==period_name).\
+            filter(GA_Publisher.publisher_name==publisher.name).first()
+        if item:
+            item.views = views
+            item.visitors = visitors
+            item.publisher_name = publisher.name
+            item.toplevel = publisher in toplevel
+            item.subpublishercount = subpub
+            item.parent = parent
+            model.Session.add(item)
+        else:
+            # create the row
+            values = {'id': make_uuid(),
+                     'period_name': period_name,
+                     'publisher_name': publisher.name,
+                     'views': views,
+                     'visitors': visitors,
+                     'toplevel': publisher in toplevel,
+                     'subpublishercount': subpub,
+                     'parent': parent
+                     }
+            model.Session.add(GA_Publisher(**values))
+        model.Session.commit()
+
+
+def update_publisher(period_name, pub, part=''):
+    views,visitors,subpub = 0, 0, 0
+    for publisher in go_down_tree(pub):
+        subpub = subpub + 1
+        items = model.Session.query(GA_Url).\
+                filter(GA_Url.period_name==period_name).\
+                filter(GA_Url.department_id==publisher.name).all()
+        for item in items:
+            views = views + int(item.pageviews)
+            visitors = visitors + int(item.visitors)
+
+    return views, visitors, (subpub-1)
+
+
+def get_top_level():
+    '''Returns the top level publishers.'''
+    return model.Session.query(model.Group).\
+           outerjoin(model.Member, model.Member.table_id == model.Group.id and \
+                     model.Member.table_name == 'group' and \
+                     model.Member.state == 'active').\
+           filter(model.Member.id==None).\
+           filter(model.Group.type=='publisher').\
+           order_by(model.Group.name).all()
+
+def get_children(publisher):
+    '''Finds child publishers for the given publisher (object). (Not recursive)'''
+    from ckan.model.group import HIERARCHY_CTE
+    return model.Session.query(model.Group).\
+           from_statement(HIERARCHY_CTE).params(id=publisher.id, type='publisher').\
+           all()
+
+def go_down_tree(publisher):
+    '''Provided with a publisher object, it walks down the hierarchy and yields each publisher,
+    including the one you supply.'''
+    yield publisher
+    for child in get_children(publisher):
+        for grandchild in go_down_tree(child):
+            yield grandchild
+

--- /dev/null
+++ b/ckanext/ga_report/tests/test_api.py
@@ -1,1 +1,38 @@
+import os
+import datetime
+from nose.tools import assert_equal
+from ckanext.ga_report.download_analytics import DownloadAnalytics
+from ckanext.ga_report.ga_auth import (init_service, get_profile_id)
+from ckanext.ga_report.ga_model import init_tables
 
+class TestAPI:
+
+    @classmethod
+    def setup_class(cls):
+        if not os.path.exists("token.dat") or not os.path.exists("credentials.json"):
+            print '*' * 60
+            print "Tests may not run without first having run the auth process"
+            print '*' * 60
+        init_tables()
+
+    @classmethod
+    def teardown_class(cls):
+        pass
+
+    def test_latest(self):
+        svc = init_service("token.dat", "credentials.json")
+        try:
+            downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc))
+            downloader.latest()
+        except Exception as e:
+            assert False, e
+
+
+    def test_since(self):
+        svc = init_service("token.dat", "credentials.json")
+        downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc))
+        try:
+            downloader.since_date(datetime.datetime.now() - datetime.timedelta(days=-30))
+        except Exception as e:
+            assert False, e
+

--- /dev/null
+++ b/ckanext/ga_report/tests/test_auth.py
@@ -1,1 +1,41 @@
+import os
+from nose.tools import assert_equal
+from ckanext.ga_report.ga_auth import (init_service, get_profile_id)
 
+class TestAuth:
+
+    @classmethod
+    def setup_class(cls):
+        if not os.path.exists("token.dat") or not os.path.exists("credentials.json"):
+            print '*' * 60
+            print "Tests may not run without first having run the auth process"
+            print '*' * 60
+
+    @classmethod
+    def teardown_class(cls):
+        pass
+
+    def test_init(self):
+        try:
+            res = init_service(None, None)
+            assert False, "Init service worked without credentials or tokens"
+        except TypeError:
+            pass
+
+    def test_init_with_token(self):
+        res = init_service("token.dat", None)
+        assert res is not None, "Init service worked without credentials"
+
+    def test_init_with_token_and_credentials(self):
+