Adjust popularity score to take account of number of days in the month.
Adjust popularity score to take account of number of days in the month.

--- a/ckanext/ga_report/command.py
+++ b/ckanext/ga_report/command.py
@@ -80,7 +80,7 @@
                                default=False,
                                dest='delete_first',
                                help='Delete data for the period first')
-        self.parser.add_option('-s', '--slip_url_stats',
+        self.parser.add_option('-s', '--skip_url_stats',
                                action='store_true',
                                default=False,
                                dest='skip_url_stats',

--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -71,13 +71,13 @@
         entries = q.order_by('ga_stat.key').all()
 
         def clean_key(key, val):
-            if key in ['Average time on site', 'Pages per visit', 'New visits', 'Bounces']:
+            if key in ['Average time on site', 'Pages per visit', 'New visits', 'Bounce rate (home page)']:
                 val =  "%.2f" % round(float(val), 2)
                 if key == 'Average time on site':
                     mins, secs = divmod(float(val), 60)
                     hours, mins = divmod(mins, 60)
                     val = '%02d:%02d:%02d (%s seconds) ' % (hours, mins, secs, val)
-                if key in ['New visits','Bounces']:
+                if key in ['New visits','Bounce rate (home page)']:
                     val = "%s%%" % val
             if key in ['Total page views', 'Total visits']:
                 val = int(val)
@@ -232,7 +232,7 @@
         return render('ga_report/publisher/index.html')
 
     def _get_packages(self, publisher=None, count=-1):
-        '''Returns the datasets in order of visits'''
+        '''Returns the datasets in order of views'''
         if count == -1:
             count = sys.maxint
 
@@ -244,11 +244,11 @@
         if publisher:
             q = q.filter(GA_Url.department_id==publisher.name)
         q = q.filter(GA_Url.period_name==month)
-        q = q.order_by('ga_url.visitors::int desc')
+        q = q.order_by('ga_url.pageviews::int desc')
         top_packages = []
         for entry,package in q.limit(count):
             if package:
-                top_packages.append((package, entry.pageviews, entry.visitors))
+                top_packages.append((package, entry.pageviews, entry.visits))
             else:
                 log.warning('Could not find package associated package')
 
@@ -306,13 +306,13 @@
     month = c.month or 'All'
     connection = model.Session.connection()
     q = """
-        select department_id, sum(pageviews::int) views, sum(visitors::int) visits
+        select department_id, sum(pageviews::int) views, sum(visits::int) visits
         from ga_url
         where department_id <> ''
           and package_id <> ''
           and url like '/dataset/%%'
           and period_name=%s
-        group by department_id order by visits desc
+        group by department_id order by views desc
         """
     if limit:
         q = q + " limit %s;" % (limit)
@@ -329,7 +329,7 @@
 def _get_publishers():
     '''
     Returns a list of all publishers. Each item is a tuple:
-      (names, title)
+      (name, title)
     '''
     publishers = []
     for pub in model.Session.query(model.Group).\

--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -98,7 +98,7 @@
                      self.get_full_period_name(period_name, period_complete_day),
                      start_date.strftime('%Y-%m-%d'),
                      end_date.strftime('%Y-%m-%d'))
- 
+
             if self.delete_first:
                 log.info('Deleting existing Analytics for this period "%s"',
                          period_name)
@@ -162,8 +162,8 @@
         start_date = start_date.strftime('%Y-%m-%d')
         end_date = end_date.strftime('%Y-%m-%d')
         query = 'ga:pagePath=%s$' % path
-        metrics = 'ga:uniquePageviews, ga:visits'
-        sort = '-ga:uniquePageviews'
+        metrics = 'ga:pageviews, ga:visits'
+        sort = '-ga:pageviews'
 
         # Supported query params at
         # https://developers.google.com/analytics/devguides/reporting/core/v3/reference
@@ -219,8 +219,8 @@
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:uniquePageviews',
-                                 sort='-ga:uniquePageviews',
+                                 metrics='ga:pageviews',
+                                 sort='-ga:pageviews',
                                  max_results=10000,
                                  end_date=end_date).execute()
         result_data = results.get('rows')
@@ -248,12 +248,12 @@
                                  ids='ga:' + self.profile_id,
                                  filters='ga:pagePath==%s' % (path,),
                                  start_date=start_date,
-                                 metrics='ga:bounces,ga:uniquePageviews',
+                                 metrics='ga:bounces,ga:pageviews',
                                  dimensions='ga:pagePath',
                                  max_results=10000,
                                  end_date=end_date).execute()
         result_data = results.get('rows')
-        if len(result_data) != 1:
+        if not result_data or len(result_data) != 1:
             log.error('Could not pinpoint the bounces for path: %s. Got results: %r',
                       path, result_data)
             return
@@ -261,7 +261,7 @@
         bounces, total = [float(x) for x in result_data[0][1:]]
         pct = 100 * bounces/total
         log.info('%d bounces from %d total == %s', bounces, total, pct)
-        ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate': pct})
+        ga_model.update_sitewide_stats(period_name, "Totals", {'Bounce rate (home page)': pct})
 
 
     def _locale_stats(self, start_date, end_date, period_name):
@@ -269,8 +269,8 @@
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:uniquePageviews',
-                                 sort='-ga:uniquePageviews',
+                                 metrics='ga:pageviews',
+                                 sort='-ga:pageviews',
                                  dimensions="ga:language,ga:country",
                                  max_results=10000,
                                  end_date=end_date).execute()
@@ -293,8 +293,8 @@
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:uniquePageviews',
-                                 sort='-ga:uniquePageviews',
+                                 metrics='ga:pageviews',
+                                 sort='-ga:pageviews',
                                  dimensions="ga:socialNetwork,ga:referralPath",
                                  max_results=10000,
                                  end_date=end_date).execute()
@@ -312,8 +312,8 @@
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:uniquePageviews',
-                                 sort='-ga:uniquePageviews',
+                                 metrics='ga:pageviews',
+                                 sort='-ga:pageviews',
                                  dimensions="ga:operatingSystem,ga:operatingSystemVersion",
                                  max_results=10000,
                                  end_date=end_date).execute()
@@ -337,8 +337,8 @@
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:uniquePageviews',
-                                 sort='-ga:uniquePageviews',
+                                 metrics='ga:pageviews',
+                                 sort='-ga:pageviews',
                                  dimensions="ga:browser,ga:browserVersion",
                                  max_results=10000,
                                  end_date=end_date).execute()
@@ -386,8 +386,8 @@
         results = self.service.data().ga().get(
                                  ids='ga:' + self.profile_id,
                                  start_date=start_date,
-                                 metrics='ga:uniquePageviews',
-                                 sort='-ga:uniquePageviews',
+                                 metrics='ga:pageviews',
+                                 sort='-ga:pageviews',
                                  dimensions="ga:mobileDeviceBranding, ga:mobileDeviceInfo",
                                  max_results=10000,
                                  end_date=end_date).execute()

--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -9,6 +9,8 @@
 
 import ckan.model as model
 from ckan.lib.base import *
+
+log = __import__('logging').getLogger(__name__)
 
 def make_uuid():
     return unicode(uuid.uuid4())
@@ -27,7 +29,7 @@
                       Column('period_name', types.UnicodeText),
                       Column('period_complete_day', types.Integer),
                       Column('pageviews', types.UnicodeText),
-                      Column('visitors', types.UnicodeText),
+                      Column('visits', types.UnicodeText),
                       Column('url', types.UnicodeText),
                       Column('department_id', types.UnicodeText),
                       Column('package_id', types.UnicodeText),
@@ -63,7 +65,7 @@
                   Column('period_name', types.UnicodeText),
                   Column('publisher_name', types.UnicodeText),
                   Column('views', types.UnicodeText),
-                  Column('visitors', types.UnicodeText),
+                  Column('visits', types.UnicodeText),
                   Column('toplevel', types.Boolean, default=False),
                   Column('subpublishercount', types.Integer, default=0),
                   Column('parent', types.UnicodeText),
@@ -155,25 +157,6 @@
         model.Session.commit()
 
 
-def update_url_stat_totals(period_name):
-
-    """
-        items = model.Session.query(GA_Url).\
-            filter(GA_Url.period_name != "All").\
-            filter(GA_Url.url==url).all()
-        values = {'id': make_uuid(),
-                  'period_name': "All",
-                  'period_complete_day': "0",
-                  'url': url,
-                  'pageviews': sum([int(x.pageviews) for x in items]),
-                  'visitors': sum([int(x.visitors) for x in items]),
-                  'department_id': department_id,
-                  'package_id': package
-                 }
-        model.Session.add(GA_Url(**values))
-        model.Session.commit()
-    """
-
 def pre_update_url_stats(period_name):
     model.Session.query(GA_Url).\
             filter(GA_Url.period_name==period_name).delete()
@@ -187,7 +170,7 @@
     stores them in GA_Url under the period and recalculates the totals for
     the 'All' period.
     '''
-    for url, views, visitors in url_data:
+    for url, views, visits in url_data:
         package, publisher = _get_package_and_publisher(url)
 
 
@@ -196,7 +179,7 @@
             filter(GA_Url.url==url).first()
         if item:
             item.pageviews = item.pageviews + views
-            item.visitors = item.visitors + visitors
+            item.visits = item.visits + visits
             if not item.package_id:
                 item.package_id = package
             if not item.department_id:
@@ -208,7 +191,7 @@
                       'period_complete_day': period_complete_day,
                       'url': url,
                       'pageviews': views,
-                      'visitors': visitors,
+                      'visits': visits,
                       'department_id': publisher,
                       'package_id': package
                      }
@@ -221,7 +204,7 @@
                 filter(GA_Url.period_name=='All').\
                 filter(GA_Url.url==url).all()
             old_pageviews = sum([int(o.pageviews) for o in old])
-            old_visits = sum([int(o.visitors) for o in old])
+            old_visits = sum([int(o.visits) for o in old])
 
             entries = model.Session.query(GA_Url).\
                 filter(GA_Url.period_name!='All').\
@@ -231,7 +214,7 @@
                       'period_complete_day': 0,
                       'url': url,
                       'pageviews': sum([int(e.pageviews) for e in entries]) + old_pageviews,
-                      'visitors': sum([int(e.visitors) for e in entries]) + old_visits,
+                      'visits': sum([int(e.visits or 0) for e in entries]) + old_visits,
                       'department_id': publisher,
                       'package_id': package
                      }
@@ -281,7 +264,7 @@
         filter(model.Group.type=='publisher').\
         filter(model.Group.state=='active').all()
     for publisher in publishers:
-        views, visitors, subpub = update_publisher(period_name, publisher, publisher.name)
+        views, visits, subpub = update_publisher(period_name, publisher, publisher.name)
         parent, parents = '', publisher.get_groups('publisher')
         if parents:
             parent = parents[0].name
@@ -290,7 +273,7 @@
             filter(GA_Publisher.publisher_name==publisher.name).first()
         if item:
             item.views = views
-            item.visitors = visitors
+            item.visits = visits
             item.publisher_name = publisher.name
             item.toplevel = publisher in toplevel
             item.subpublishercount = subpub
@@ -302,7 +285,7 @@
                      'period_name': period_name,
                      'publisher_name': publisher.name,
                      'views': views,
-                     'visitors': visitors,
+                     'visits': visits,
                      'toplevel': publisher in toplevel,
                      'subpublishercount': subpub,
                      'parent': parent
@@ -312,7 +295,7 @@
 
 
 def update_publisher(period_name, pub, part=''):
-    views,visitors,subpub = 0, 0, 0
+    views,visits,subpub = 0, 0, 0
     for publisher in go_down_tree(pub):
         subpub = subpub + 1
         items = model.Session.query(GA_Url).\
@@ -320,9 +303,9 @@
                 filter(GA_Url.department_id==publisher.name).all()
         for item in items:
             views = views + int(item.pageviews)
-            visitors = visitors + int(item.visitors)
-
-    return views, visitors, (subpub-1)
+            visits = visits + int(item.visits)
+
+    return views, visits, (subpub-1)
 
 
 def get_top_level():
@@ -362,3 +345,34 @@
         q.delete()
     model.Session.commit()
 
+def get_score_for_dataset(dataset_name):
+    '''
+    Returns a "current popularity" score for a dataset,
+    based on how many views it has had recently.
+    '''
+    import datetime
+    now = datetime.datetime.now()
+    last_month = now - datetime.timedelta(days=30)
+    period_names = ['%s-%02d' % (last_month.year, last_month.month),
+                    '%s-%02d' % (now.year, now.month),
+                    ]
+
+    score = 0
+    for period_name in period_names:
+        score /= 2 # previous periods are discounted by 50%
+        entry = model.Session.query(GA_Url)\
+                .filter(GA_Url.period_name==period_name)\
+                .filter(GA_Url.package_id==dataset_name).first()
+        # score
+        if entry:
+            views = float(entry.pageviews)
+            if entry.period_complete_day:
+                views_per_day = views / entry.period_complete_day
+            else:
+                views_per_day = views / 15 # guess
+            score += views_per_day
+
+    score = int(score * 100)
+    log.debug('Popularity %s: %s', score, dataset_name)
+    return score
+

--- a/ckanext/ga_report/helpers.py
+++ b/ckanext/ga_report/helpers.py
@@ -106,7 +106,7 @@
             if not p in datasets:
                 datasets[p] = {'views':0, 'visits': 0}
             datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews)
-            datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visitors)
+            datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visits)
 
     results = []
     for k, v in datasets.iteritems():

--- a/ckanext/ga_report/templates/ga_report/notes.html
+++ b/ckanext/ga_report/templates/ga_report/notes.html
@@ -6,9 +6,8 @@
     <li class="widget-container boxed widget_text">
       <h4>Notes</h4>
       <ul>
-          <li>"Views" is the number of sessions during which the page was viewed one or more times (technically known as "unique pageviews").</li>
-          <li>"Visits" is the number of unique user visits to a page, counted once for each visitor for each session.</li>
-<!--!          <li>"Visitors" is the number of unique users visiting the site (whether once or more times).</li> -->
+          <li>"Views" is the number of times a page was loaded in users' browsers.</li>
+          <li>"Visits" is the number of unique user visits to a page, counted once for each visitor for each of their browsing sessions.</li>
           <li>These usage statistics are confined to users with javascript enabled, which excludes web crawlers and API calls.</li>
           <li>The results are not shown when the number of views/visits is tiny. Where these relate to site pages, results are available in full in the CSV download. Where these relate to users' web browser information, results are not disclosed, for privacy reasons.</li>
       </ul>

--- a/ckanext/ga_report/templates/ga_report/publisher/index.html
+++ b/ckanext/ga_report/templates/ga_report/publisher/index.html
@@ -41,14 +41,14 @@
        <table class="table table-condensed table-bordered table-striped">
 	 <tr>
 	   <th>Publisher</th>
-	   <th>Dataset Visits</th>
+<!--	   <th>Dataset Visits</th>-->
 	   <th>Dataset Views</th>
 	 </tr>
         <py:for each="publisher, views, visits in c.top_publishers">
 	  <tr>
 	    <td>${h.link_to(publisher.title, h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport', action='read_publisher', id=publisher.name))}
 	    </td>
-	    <td>${visits}</td>
+<!--	    <td>${visits}</td> -->
 	    <td>${views}</td>
 	  </tr>
         </py:for>

--- a/ckanext/ga_report/templates/ga_report/publisher/read.html
+++ b/ckanext/ga_report/templates/ga_report/publisher/read.html
@@ -47,14 +47,14 @@
      <table py:if="c.top_packages" class="table table-condensed table-bordered table-striped">
 	 <tr>
 	   <th>Dataset</th>
-	   <th>Visits</th>
+<!--	   <th>Visits</th> -->
 	   <th>Views</th>
 	 </tr>
         <py:for each="package, views, visits in c.top_packages">
 	  <tr>
 	    <td>${h.link_to(package.title or package.name, h.url_for(controller='package', action='read', id=package.name))}
 	    </td>
-	    <td>${visits}</td>
+<!--	    <td>${visits}</td> -->
 	    <td>${views}</td>
 	  </tr>
         </py:for>