Fixed the publishr information (to enable leaderboard) to show the number
Fixed the publishr information (to enable leaderboard) to show the number
of children publishers and the total for all sub-publishers

--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -85,7 +85,7 @@
                      self.get_full_period_name(period_name, period_complete_day),
                      start_date.strftime('%Y %m %d'),
                      end_date.strftime('%Y %m %d'))
-
+            """
             data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+')
             log.info('Storing Dataset Analytics for period "%s"',
                      self.get_full_period_name(period_name, period_complete_day))
@@ -95,8 +95,8 @@
             log.info('Storing Publisher Analytics for period "%s"',
                      self.get_full_period_name(period_name, period_complete_day))
             self.store(period_name, period_complete_day, data,)
-            ga_model.update_publisher_stats(period_name)
-
+            """
+            ga_model.update_publisher_stats(period_name) # about 30 seconds.
             self.sitewide_stats( period_name )
 
 
@@ -105,7 +105,7 @@
         start_date = start_date.strftime('%Y-%m-%d')
         end_date = end_date.strftime('%Y-%m-%d')
         query = 'ga:pagePath=%s$' % path
-        metrics = 'ga:uniquePageviews, ga:visits'
+        metrics = 'ga:uniquePageviews, ga:visitors'
         sort = '-ga:uniquePageviews'
 
         # Supported query params at
@@ -135,7 +135,6 @@
         if 'url' in data:
             ga_model.update_url_stats(period_name, period_complete_day, data['url'])
 
-
     def sitewide_stats(self, period_name):
         import calendar
         year, month = period_name.split('-')
@@ -151,6 +150,12 @@
             print ' + Fetching %s stats' % f.split('_')[1]
             getattr(self, f)(start_date, end_date, period_name)
 
+    def _get_results(result_data, f):
+        data = {}
+        for result in result_data:
+            key = f(result)
+            data[key] = data.get(key,0) + result[1]
+        return data
 
     def _totals_stats(self, start_date, end_date, period_name):
         """ Fetches distinct totals, total pageviews etc """
@@ -264,7 +269,7 @@
 
         data = {}
         for result in result_data:
-            key = "%s (%s)" % (result[0],result[1])
+            key = "%s (%s)" % (result[0], result[1])
             data[key] = result[2]
         ga_model.update_sitewide_stats(period_name, "Browser versions", data)
 

--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -41,7 +41,7 @@
                       Column('period_name', types.UnicodeText),
                       Column('period_complete_day', types.Integer),
                       Column('pageviews', types.UnicodeText),
-                      Column('visits', types.UnicodeText),
+                      Column('visitors', types.UnicodeText),
                       Column('url', types.UnicodeText),
                       Column('department_id', types.UnicodeText),
                 )
@@ -63,7 +63,10 @@
                   Column('period_name', types.UnicodeText),
                   Column('publisher_name', types.UnicodeText),
                   Column('views', types.UnicodeText),
-                  Column('visits', types.UnicodeText),
+                  Column('visitors', types.UnicodeText),
+                  Column('toplevel', types.Boolean, default=False),
+                  Column('subpublishercount', types.Integer, default=0),
+                  Column('parent', types.UnicodeText),
 )
 mapper(GA_Publisher, pub_table)
 
@@ -136,7 +139,7 @@
 
 
 def update_url_stats(period_name, period_complete_day, url_data):
-    for url, views, visits in url_data:
+    for url, views, visitors in url_data:
         url = _normalize_url(url)
         department_id = _get_department_id_of_url(url)
 
@@ -147,7 +150,7 @@
         if item:
             item.period_name = period_name
             item.pageviews = views
-            item.visits = visits
+            item.visitors = visitors
             item.department_id = department_id
             model.Session.add(item)
         else:
@@ -157,7 +160,7 @@
                       'period_complete_day': period_complete_day,
                       'url': url,
                       'pageviews': views,
-                      'visits': visits,
+                      'visitors': visitors,
                       'department_id': department_id
                      }
             model.Session.add(GA_Url(**values))
@@ -166,16 +169,30 @@
 
 
 def update_publisher_stats(period_name):
-    publishers = get_top_level()
+    """
+    Updates the publisher stats from the data retrieved for /dataset/*
+    and /publisher/*. Will run against each dataset and generates the
+    totals for the entire tree beneath each publisher.
+    """
+    toplevel = get_top_level()
+    publishers = model.Session.query(model.Group).\
+        filter(model.Group.type=='publisher').\
+        filter(model.Group.state=='active').all()
     for publisher in publishers:
-        views, visits = update_publisher(period_name, publisher, publisher.name)
+        views, visitors, subpub = update_publisher(period_name, publisher, publisher.name)
+        parent, parents = '', publisher.get_groups('publisher')
+        if parents:
+            parent = parents[0].name
         item = model.Session.query(GA_Publisher).\
             filter(GA_Publisher.period_name==period_name).\
             filter(GA_Publisher.publisher_name==publisher.name).first()
         if item:
             item.views = views
-            item.visits = visits
+            item.visitors = visitors
             item.publisher_name = publisher.name
+            item.toplevel = publisher in toplevel
+            item.subpublishercount = subpub
+            item.parent = parent
             model.Session.add(item)
         else:
             # create the row
@@ -183,23 +200,27 @@
                      'period_name': period_name,
                      'publisher_name': publisher.name,
                      'views': views,
-                     'visits': visits,
+                     'visitors': visitors,
+                     'toplevel': publisher in toplevel,
+                     'subpublishercount': subpub,
+                     'parent': parent
                      }
             model.Session.add(GA_Publisher(**values))
         model.Session.commit()
 
 
 def update_publisher(period_name, pub, part=''):
-    views,visits = 0, 0
+    views,visitors,subpub = 0, 0, 0
     for publisher in go_down_tree(pub):
-        f = model.Session.query(GA_Url).\
+        subpub = subpub + 1
+        items = model.Session.query(GA_Url).\
                 filter(GA_Url.period_name==period_name).\
-                filter(GA_Url.url=='/publisher/' + publisher.name).first()
-        if f:
-            views = views + int(f.pageviews)
-            visits = visits + int(f.visits)
-
-    return views, visits
+                filter(GA_Url.department_id==publisher.name).all()
+        for item in items:
+            views = views + int(item.pageviews)
+            visitors = visitors + int(item.visitors)
+
+    return views, visitors, (subpub-1)
 
 
 def get_top_level():