Change the rules for top datasets
Change the rules for top datasets

file:a/README.rst -> file:b/README.rst
--- a/README.rst
+++ b/README.rst
@@ -26,7 +26,7 @@
 1. Activate you CKAN python environment and install this extension's software::
 
     $ pyenv/bin/activate
-    $ pip install -e  git+https://github.com/okfn/ckanext-ga-report.git#egg=ckanext-ga-report
+    $ pip install -e  git+https://github.com/datagovuk/ckanext-ga-report.git#egg=ckanext-ga-report
 
 2. Ensure you development.ini (or similar) contains the info about your Google Analytics account and configuration::
 

--- a/ckanext/ga_report/command.py
+++ b/ckanext/ga_report/command.py
@@ -66,8 +66,7 @@
     And where <time-period> is:
         all         - data for all time
         latest      - (default) just the 'latest' data
-        YYYY-MM-DD  - just data for all time periods going
-                      back to (and including) this date
+        YYYY-MM     - just data for the specific month
     """
     summary = __doc__.split('\n')[0]
     usage = __doc__
@@ -96,6 +95,7 @@
         elif time_period == 'latest':
             downloader.latest()
         else:
-            since_date = datetime.datetime.strptime(time_period, '%Y-%m-%d')
-            downloader.since_date(since_date)
+            # The month to use
+            for_date = datetime.datetime.strptime(time_period, '%Y-%m')
+            downloader.specific_month(for_date)
 

--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -1,10 +1,142 @@
 import logging
-from ckan.lib.base import BaseController, c, render
-import report_model
+import operator
+from ckan.lib.base import BaseController, c, render, request, response
+
+import sqlalchemy
+from sqlalchemy import func, cast, Integer
+import ckan.model as model
+from ga_model import GA_Url, GA_Stat
 
 log = logging.getLogger('ckanext.ga-report')
 
+
+def _get_month_name(strdate):
+    import calendar
+    from time import strptime
+    d = strptime(strdate, '%Y-%m')
+    return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year)
+
+
+def _month_details(cls):
+    months = []
+    vals = model.Session.query(cls.period_name).distinct().all()
+    for m in vals:
+        months.append( (m[0], _get_month_name(m[0])))
+    return sorted(months, key=operator.itemgetter(0), reverse=True)
+
+
 class GaReport(BaseController):
+
+    def csv(self, month):
+        import csv
+
+        entries = model.Session.query(GA_Stat).\
+            filter(GA_Stat.period_name==month).\
+            order_by('GA_Stat.stat_name, GA_Stat.key').all()
+
+        response.headers['Content-Type'] = "text/csv; charset=utf-8"
+
+        writer = csv.writer(response)
+        writer.writerow(["Period", "Statistic", "Key", "Value"])
+
+        for entry in entries:
+            writer.writerow([entry.period_name.encode('utf-8'),
+                             entry.stat_name.encode('utf-8'),
+                             entry.key.encode('utf-8'),
+                             entry.value.encode('utf-8')])
+
     def index(self):
-        return render('index.html')
 
+        # Get the month details by fetching distinct values and determining the
+        # month names from the values.
+        c.months = _month_details(GA_Stat)
+
+        # Work out which month to show, based on query params of the first item
+        c.month = request.params.get('month', c.months[0][0] if c.months else '')
+        c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
+
+        entries = model.Session.query(GA_Stat).\
+            filter(GA_Stat.stat_name=='Totals').\
+            filter(GA_Stat.period_name==c.month).\
+            order_by('ga_stat.key').all()
+        c.global_totals = [(s.key, s.value) for s in entries ]
+
+        keys = {
+            'Browser versions': 'browsers',
+            'Operating Systems versions': 'os',
+            'Social sources': 'social_networks',
+            'Languages': 'languages',
+            'Country': 'country'
+        }
+
+        for k, v in keys.iteritems():
+            entries = model.Session.query(GA_Stat).\
+                filter(GA_Stat.stat_name==k).\
+                filter(GA_Stat.period_name==c.month).\
+                order_by('ga_stat.value::int desc').all()
+            setattr(c, v, [(s.key, s.value) for s in entries ])
+
+
+        return render('ga_report/site/index.html')
+
+
+class GaPublisherReport(BaseController):
+    """
+    Displays the pageview and visit count for specific publishers based on
+    the datasets associated with the publisher.
+    """
+
+    def index(self):
+
+        # Get the month details by fetching distinct values and determining the
+        # month names from the values.
+        c.months = _month_details(GA_Url)
+
+        # Work out which month to show, based on query params of the first item
+        c.month = request.params.get('month', c.months[0][0] if c.months else '')
+        c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
+#                and not url like '/publisher/%%'
+        connection = model.Session.connection()
+        q = """
+            select department_id, sum(pageviews::int) views, sum(visitors::int) visits
+            from ga_url
+            where department_id <> ''
+                and period_name=%s
+            group by department_id order by views desc limit 20;
+        """
+        c.top_publishers = []
+        res = connection.execute(q, c.month)
+        for row in res:
+            c.top_publishers.append((model.Group.get(row[0]), row[1], row[2]))
+
+        return render('ga_report/publisher/index.html')
+
+
+    def read(self, id):
+        c.publisher = model.Group.get(id)
+        c.top_packages = [] # package, dataset_views in c.top_packages
+
+        # Get the month details by fetching distinct values and determining the
+        # month names from the values.
+        c.months = _month_details(GA_Url)
+
+        # Work out which month to show, based on query params of the first item
+        c.month = request.params.get('month', c.months[0][0] if c.months else '')
+        c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
+
+        entry = model.Session.query(GA_Url).\
+            filter(GA_Url.url=='/publisher/%s' % c.publisher.name).\
+            filter(GA_Url.period_name==c.month).first()
+        c.publisher_page_views = entry.pageviews if entry else 0
+
+        entries = model.Session.query(GA_Url).\
+            filter(GA_Url.department_id==c.publisher.name).\
+            filter(GA_Url.period_name==c.month).\
+            order_by('ga_url.pageviews::int desc')[:20]
+        for entry in entries:
+            if entry.url.startswith('/dataset/'):
+                p = model.Package.get(entry.url[len('/dataset/'):])
+                c.top_packages.append((p,entry.pageviews,entry.visitors))
+
+        return render('ga_report/publisher/read.html')
+

--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -1,3 +1,4 @@
+import os
 import logging
 import datetime
 
@@ -20,8 +21,17 @@
         self.profile_id = profile_id
 
 
-    def all_(self):
-        self.since_date(datetime.datetime(2010, 1, 1))
+    def specific_month(self, date):
+        import calendar
+
+        first_of_this_month = datetime.datetime(date.year, date.month, 1)
+        _, last_day_of_month = calendar.monthrange(int(date.year), int(date.month))
+        last_of_this_month =  datetime.datetime(date.year, date.month, last_day_of_month)
+        periods = ((date.strftime(FORMAT_MONTH),
+                    last_day_of_month,
+                    first_of_this_month, last_of_this_month),)
+        self.download_and_store(periods)
+
 
     def latest(self):
         if self.period == 'monthly':
@@ -36,13 +46,13 @@
         self.download_and_store(periods)
 
 
-    def since_date(self, since_date):
+    def for_date(self, for_date):
         assert isinstance(since_date, datetime.datetime)
         periods = [] # (period_name, period_complete_day, start_date, end_date)
         if self.period == 'monthly':
             first_of_the_months_until_now = []
-            year = since_date.year
-            month = since_date.month
+            year = for_date.year
+            month = for_date.month
             now = datetime.datetime.now()
             first_of_this_month = datetime.datetime(now.year, now.month, 1)
             while True:
@@ -84,18 +94,27 @@
                      self.get_full_period_name(period_name, period_complete_day),
                      start_date.strftime('%Y %m %d'),
                      end_date.strftime('%Y %m %d'))
-            data = self.download(start_date, end_date)
-            log.info('Storing Analytics for period "%s"',
+
+            data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+')
+            log.info('Storing Dataset Analytics for period "%s"',
                      self.get_full_period_name(period_name, period_complete_day))
-            self.store(period_name, period_complete_day, data)
-
-
-    def download(self, start_date, end_date):
+            self.store(period_name, period_complete_day, data, )
+
+            data = self.download(start_date, end_date, '~/publisher/[a-z0-9-_]+')
+            log.info('Storing Publisher Analytics for period "%s"',
+                     self.get_full_period_name(period_name, period_complete_day))
+            self.store(period_name, period_complete_day, data,)
+
+            ga_model.update_publisher_stats(period_name) # about 30 seconds.
+            self.sitewide_stats( period_name )
+
+
+    def download(self, start_date, end_date, path='~/dataset/[a-z0-9-_]+'):
         '''Get data from GA for a given time period'''
         start_date = start_date.strftime('%Y-%m-%d')
         end_date = end_date.strftime('%Y-%m-%d')
-        query = 'ga:pagePath=~/dataset/[a-z0-9-]+$'
-        metrics = 'ga:uniquePageviews'
+        query = 'ga:pagePath=%s$' % path
+        metrics = 'ga:uniquePageviews, ga:visitors'
         sort = '-ga:uniquePageviews'
 
         # Supported query params at
@@ -110,18 +129,180 @@
                                  max_results=10000,
                                  end_date=end_date).execute()
 
-
-        import pprint
-        pprint.pprint(results)
-        print 'Total results: %s' % results.get('totalResults')
+        if os.getenv('DEBUG'):
+            import pprint
+            pprint.pprint(results)
+            print 'Total results: %s' % results.get('totalResults')
 
         packages = []
         for entry in results.get('rows'):
-            (loc,size,) = entry
-            packages.append( ('http:/' + loc,size, '',) ) # Temporary hack
+            (loc,pageviews,visits) = entry
+            packages.append( ('http:/' + loc, pageviews, visits,) ) # Temporary hack
         return dict(url=packages)
 
     def store(self, period_name, period_complete_day, data):
         if 'url' in data:
             ga_model.update_url_stats(period_name, period_complete_day, data['url'])
 
+    def sitewide_stats(self, period_name):
+        import calendar
+        year, month = period_name.split('-')
+        _, last_day_of_month = calendar.monthrange(int(year), int(month))
+
+        start_date = '%s-01' % period_name
+        end_date = '%s-%s' % (period_name, last_day_of_month)
+        print 'Sitewide_stats for %s (%s -> %s)' % (period_name, start_date, end_date)
+
+        funcs = ['_totals_stats', '_social_stats', '_os_stats',
+                 '_locale_stats', '_browser_stats', '_mobile_stats']
+        for f in funcs:
+            print ' + Fetching %s stats' % f.split('_')[1]
+            getattr(self, f)(start_date, end_date, period_name)
+
+    def _get_results(result_data, f):
+        data = {}
+        for result in result_data:
+            key = f(result)
+            data[key] = data.get(key,0) + result[1]
+        return data
+
+    def _totals_stats(self, start_date, end_date, period_name):
+        """ Fetches distinct totals, total pageviews etc """
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 start_date=start_date,
+                                 metrics='ga:uniquePageviews',
+                                 sort='-ga:uniquePageviews',
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+        result_data = results.get('rows')
+        ga_model.update_sitewide_stats(period_name, "Totals", {'Total pageviews': result_data[0][0]})
+
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 start_date=start_date,
+                                 metrics='ga:pageviewsPerVisit,ga:bounces,ga:avgTimeOnSite,ga:percentNewVisits',
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+        result_data = results.get('rows')
+        data = {
+            'Pages per visit': result_data[0][0],
+            'Bounces': result_data[0][1],
+            'Average time on site': result_data[0][2],
+            'Percent new visits': result_data[0][3],
+        }
+        ga_model.update_sitewide_stats(period_name, "Totals", data)
+
+
+    def _locale_stats(self, start_date, end_date, period_name):
+        """ Fetches stats about language and country """
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 start_date=start_date,
+                                 metrics='ga:uniquePageviews',
+                                 sort='-ga:uniquePageviews',
+                                 dimensions="ga:language,ga:country",
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+        result_data = results.get('rows')
+        data = {}
+        for result in result_data:
+            data[result[0]] = data.get(result[0], 0) + int(result[2])
+        ga_model.update_sitewide_stats(period_name, "Languages", data)
+
+        data = {}
+        for result in result_data:
+            data[result[1]] = data.get(result[1], 0) + int(result[2])
+        ga_model.update_sitewide_stats(period_name, "Country", data)
+
+
+    def _social_stats(self, start_date, end_date, period_name):
+        """ Finds out which social sites people are referred from """
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 start_date=start_date,
+                                 metrics='ga:uniquePageviews',
+                                 sort='-ga:uniquePageviews',
+                                 dimensions="ga:socialNetwork,ga:referralPath",
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+        result_data = results.get('rows')
+        twitter_links = []
+        data = {}
+        for result in result_data:
+            if not result[0] == '(not set)':
+                data[result[0]] = data.get(result[0], 0) + int(result[2])
+                if result[0] == 'Twitter':
+                    twitter_links.append(result[1])
+        ga_model.update_sitewide_stats(period_name, "Social sources", data)
+
+
+    def _os_stats(self, start_date, end_date, period_name):
+        """ Operating system stats """
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 start_date=start_date,
+                                 metrics='ga:uniquePageviews',
+                                 sort='-ga:uniquePageviews',
+                                 dimensions="ga:operatingSystem,ga:operatingSystemVersion",
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+        result_data = results.get('rows')
+        data = {}
+        for result in result_data:
+            data[result[0]] = data.get(result[0], 0) + int(result[2])
+        ga_model.update_sitewide_stats(period_name, "Operating Systems", data)
+
+        data = {}
+        for result in result_data:
+            key = "%s (%s)" % (result[0],result[1])
+            data[key] = result[2]
+        ga_model.update_sitewide_stats(period_name, "Operating Systems versions", data)
+
+
+    def _browser_stats(self, start_date, end_date, period_name):
+        """ Information about browsers and browser versions """
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 start_date=start_date,
+                                 metrics='ga:uniquePageviews',
+                                 sort='-ga:uniquePageviews',
+                                 dimensions="ga:browser,ga:browserVersion",
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+        result_data = results.get('rows')
+        data = {}
+        for result in result_data:
+            data[result[0]] = data.get(result[0], 0) + int(result[2])
+        ga_model.update_sitewide_stats(period_name, "Browsers", data)
+
+        data = {}
+        for result in result_data:
+            key = "%s (%s)" % (result[0], result[1])
+            data[key] = result[2]
+        ga_model.update_sitewide_stats(period_name, "Browser versions", data)
+
+
+    def _mobile_stats(self, start_date, end_date, period_name):
+        """ Info about mobile devices """
+
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 start_date=start_date,
+                                 metrics='ga:uniquePageviews',
+                                 sort='-ga:uniquePageviews',
+                                 dimensions="ga:mobileDeviceBranding, ga:mobileDeviceInfo",
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+
+        result_data = results.get('rows')
+        data = {}
+        for result in result_data:
+            data[result[0]] = data.get(result[0], 0) + int(result[2])
+        ga_model.update_sitewide_stats(period_name, "Mobile brands", data)
+
+        data = {}
+        for result in result_data:
+            data[result[1]] = data.get(result[1], 0) + int(result[2])
+        ga_model.update_sitewide_stats(period_name, "Mobile devices", data)
+

--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -16,6 +16,18 @@
 
 
 class GA_Url(object):
+
+    def __init__(self, **kwargs):
+        for k,v in kwargs.items():
+            setattr(self, k, v)
+
+class GA_Stat(object):
+
+    def __init__(self, **kwargs):
+        for k,v in kwargs.items():
+            setattr(self, k, v)
+
+class GA_Publisher(object):
 
     def __init__(self, **kwargs):
         for k,v in kwargs.items():
@@ -28,12 +40,35 @@
                              default=make_uuid),
                       Column('period_name', types.UnicodeText),
                       Column('period_complete_day', types.Integer),
-                      Column('metric', types.UnicodeText),
-                      Column('value', types.UnicodeText),
+                      Column('pageviews', types.UnicodeText),
+                      Column('visitors', types.UnicodeText),
                       Column('url', types.UnicodeText),
                       Column('department_id', types.UnicodeText),
                 )
 mapper(GA_Url, url_table)
+
+stat_table = Table('ga_stat', metadata,
+                  Column('id', types.UnicodeText, primary_key=True,
+                         default=make_uuid),
+                  Column('period_name', types.UnicodeText),
+                  Column('stat_name', types.UnicodeText),
+                  Column('key', types.UnicodeText),
+                  Column('value', types.UnicodeText), )
+mapper(GA_Stat, stat_table)
+
+
+pub_table = Table('ga_publisher', metadata,
+                  Column('id', types.UnicodeText, primary_key=True,
+                         default=make_uuid),
+                  Column('period_name', types.UnicodeText),
+                  Column('publisher_name', types.UnicodeText),
+                  Column('views', types.UnicodeText),
+                  Column('visitors', types.UnicodeText),
+                  Column('toplevel', types.Boolean, default=False),
+                  Column('subpublishercount', types.Integer, default=0),
+                  Column('parent', types.UnicodeText),
+)
+mapper(GA_Publisher, pub_table)
 
 
 def init_tables():
@@ -73,22 +108,49 @@
             publisher_groups = dataset.get_groups('publisher')
             if publisher_groups:
                 return publisher_groups[0].name
+    else:
+        publisher_match = re.match('/publisher/([^/]+)(/.*)?', url)
+        if publisher_match:
+            return publisher_match.groups()[0]
+
+
+def update_sitewide_stats(period_name, stat_name, data):
+    for k,v in data.iteritems():
+        item = model.Session.query(GA_Stat).\
+            filter(GA_Stat.period_name==period_name).\
+            filter(GA_Stat.key==k).\
+            filter(GA_Stat.stat_name==stat_name).first()
+        if item:
+            item.period_name = period_name
+            item.key = k
+            item.value = v
+            model.Session.add(item)
+        else:
+            # create the row
+            values = {'id': make_uuid(),
+                     'period_name': period_name,
+                     'key': k,
+                     'value': v,
+                     'stat_name': stat_name
+                     }
+            model.Session.add(GA_Stat(**values))
+        model.Session.commit()
+
 
 
 def update_url_stats(period_name, period_complete_day, url_data):
-    table = get_table('ga_url')
-    for url, views, next_page in url_data:
+    for url, views, visitors in url_data:
         url = _normalize_url(url)
         department_id = _get_department_id_of_url(url)
 
         # see if the row for this url & month is in the table already
         item = model.Session.query(GA_Url).\
             filter(GA_Url.period_name==period_name).\
-            filter(GA_Url.url==url).\
-            filter(GA_Url.metric == 'Total views').first()
+            filter(GA_Url.url==url).first()
         if item:
-            item.period_name = period_complete_day = period_complete_day
-            item.value = views
+            item.period_name = period_name
+            item.pageviews = views
+            item.visitors = visitors
             item.department_id = department_id
             model.Session.add(item)
         else:
@@ -97,10 +159,92 @@
                       'period_name': period_name,
                       'period_complete_day': period_complete_day,
                       'url': url,
-                      'value': views,
-                      'metric': 'Total views',
+                      'pageviews': views,
+                      'visitors': visitors,
                       'department_id': department_id
                      }
             model.Session.add(GA_Url(**values))
         model.Session.commit()
 
+
+
+def update_publisher_stats(period_name):
+    """
+    Updates the publisher stats from the data retrieved for /dataset/*
+    and /publisher/*. Will run against each dataset and generates the
+    totals for the entire tree beneath each publisher.
+    """
+    toplevel = get_top_level()
+    publishers = model.Session.query(model.Group).\
+        filter(model.Group.type=='publisher').\
+        filter(model.Group.state=='active').all()
+    for publisher in publishers:
+        views, visitors, subpub = update_publisher(period_name, publisher, publisher.name)
+        parent, parents = '', publisher.get_groups('publisher')
+        if parents:
+            parent = parents[0].name
+        item = model.Session.query(GA_Publisher).\
+            filter(GA_Publisher.period_name==period_name).\
+            filter(GA_Publisher.publisher_name==publisher.name).first()
+        if item:
+            item.views = views
+            item.visitors = visitors
+            item.publisher_name = publisher.name
+            item.toplevel = publisher in toplevel
+            item.subpublishercount = subpub
+            item.parent = parent
+            model.Session.add(item)
+        else:
+            # create the row
+            values = {'id': make_uuid(),
+                     'period_name': period_name,
+                     'publisher_name': publisher.name,
+                     'views': views,
+                     'visitors': visitors,
+                     'toplevel': publisher in toplevel,
+                     'subpublishercount': subpub,
+                     'parent': parent
+                     }
+            model.Session.add(GA_Publisher(**values))
+        model.Session.commit()
+
+
+def update_publisher(period_name, pub, part=''):
+    views,visitors,subpub = 0, 0, 0
+    for publisher in go_down_tree(pub):
+        subpub = subpub + 1
+        items = model.Session.query(GA_Url).\
+                filter(GA_Url.period_name==period_name).\
+                filter(GA_Url.department_id==publisher.name).all()
+        for item in items:
+            views = views + int(item.pageviews)
+            visitors = visitors + int(item.visitors)
+
+    return views, visitors, (subpub-1)
+
+
+def get_top_level():
+    '''Returns the top level publishers.'''
+    return model.Session.query(model.Group).\
+           outerjoin(model.Member, model.Member.table_id == model.Group.id and \
+                     model.Member.table_name == 'group' and \
+                     model.Member.state == 'active').\
+           filter(model.Member.id==None).\
+           filter(model.Group.type=='publisher').\
+           order_by(model.Group.name).all()
+
+def get_children(publisher):
+    '''Finds child publishers for the given publisher (object). (Not recursive)'''
+    from ckan.model.group import HIERARCHY_CTE
+    return model.Session.query(model.Group).\
+           from_statement(HIERARCHY_CTE).params(id=publisher.id, type='publisher').\
+           all()
+
+def go_down_tree(publisher):
+    '''Provided with a publisher object, it walks down the hierarchy and yields each publisher,
+    including the one you supply.'''
+    yield publisher
+    for child in get_children(publisher):
+        for grandchild in go_down_tree(child):
+            yield grandchild
+

--- a/ckanext/ga_report/plugin.py
+++ b/ckanext/ga_report/plugin.py
@@ -1,13 +1,14 @@
 import logging
 import ckan.lib.helpers as h
+import ckan.plugins as p
 from ckan.plugins import implements, toolkit
-import gasnippet
-import commands
-import dbutil
+#import gasnippet
+#import commands
+#import dbutil
 
 log = logging.getLogger('ckanext.ga-report')
 
-class GoogleAnalyticsPlugin(p.SingletonPlugin):
+class GAReportPlugin(p.SingletonPlugin):
     implements(p.IConfigurer, inherit=True)
     implements(p.IRoutes, inherit=True)
 
@@ -17,9 +18,24 @@
 
     def after_map(self, map):
         map.connect(
-            '/data/analytics/index',
-            controller='ckanext.ga-report.controller:GaReport',
+            '/data/analytics/publisher',
+            controller='ckanext.ga_report.controller:GaPublisherReport',
             action='index'
+        )
+        map.connect(
+            '/data/analytics/publisher/{id}',
+            controller='ckanext.ga_report.controller:GaPublisherReport',
+            action='read'
+        )
+        map.connect(
+            '/data/analytics',
+            controller='ckanext.ga_report.controller:GaReport',
+            action='index'
+        )
+        map.connect(
+            '/data/analytics/data_{month}.csv',
+            controller='ckanext.ga_report.controller:GaReport',
+            action='csv'
         )
         return map
 

--- /dev/null
+++ b/ckanext/ga_report/templates/ga_report/publisher/index.html
@@ -1,1 +1,64 @@
+<html xmlns:py="http://genshi.edgewall.org/"
+  xmlns:i18n="http://genshi.edgewall.org/i18n"
+  xmlns:xi="http://www.w3.org/2001/XInclude"
+  py:strip="">
 
+  <py:def function="page_title">Publisher Analytics for ${g.site_title}</py:def>
+
+  <py:match path="primarysidebar">
+    <li class="widget-container boxed widget_text">
+      <h4>Publishers</h4>
+      <p>The table shows the top 20 publishers as recorded by page views of datasets owned by that publisher, and the number of visits to each publisher's home page.</p>
+    </li>
+  </py:match>
+
+  <div py:match="content">
+      <h1>Publisher Analytics</h1>
+      <h2>The top 20 publishers</h2>
+
+      <form class="form-inline" action="${h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport',action='index')}" method="get">
+          <div class="controls">
+          <select name="month">
+              <py:for each="val,desc in c.months">
+                <option value='${val}' py:attrs="{'selected': 'selected' if c.month == val else None}">${desc}</option>
+              </py:for>
+          </select>
+           <input class="btn button" type='submit' value="Update"/>
+          </div>
+       </form>
+
+       <table class="table table-condensed table-bordered table-striped">
+	 <tr>
+	   <th>Publisher</th>
+	   <th>Dataset Views</th>
+	   <th>Visits</th>
+	 </tr>
+        <py:for each="publisher, views, visits in c.top_publishers">
+	  <tr>
+	    <td>${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaPublisherReport', action='read', id=publisher.name))}
+	    </td>
+	    <td>${views}</td>
+	    <td>${visits}</td>
+	  </tr>
+        </py:for>
+       </table>
+
+
+  </div>
+
+  <xi:include href="../../layout.html" />
+
+  <py:def function="optional_footer">
+    <script type='text/javascript'>
+        $('.nav-tabs li a').click(function (e) {
+          e.preventDefault();
+          $(this).tab('show');
+        })
+    </script>
+  </py:def>
+
+</html>
+
+
+
+

file: