[423] Handle many more graphing edge-cases.
[423] Handle many more graphing edge-cases.

Prepare graphs for an expected time period (since July 2012) rather than for the time period found in the DB, which can be reduced to absurdity with certain queries. Graphs always have a consistent X-axis, ugly logic to combine disparate data series can be removed.

On 'Publisher' and 'Dataset' tabs, always graph the *top 20* series regardless of the month currently rendered in the table. This makes more sense from a useability POV.

Finally, some client side error checking was improved.

--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -212,12 +212,9 @@
             for stat in graph_query:
                 graph_dict[ stat.key ] = graph_dict.get(stat.key,{
                     'name':stat.key, 
-                    'data': []
+                    'raw': {}
                     })
-                graph_dict[ stat.key ]['data'].append({
-                    'x':_get_unix_epoch(stat.period_name),
-                    'y':float(stat.value)
-                    })
+                graph_dict[ stat.key ]['raw'][stat.period_name] = float(stat.value)
             stats_in_table = [x[0] for x in entries]
             stats_not_in_table = set(graph_dict.keys()) - set(stats_in_table)
             stats = stats_in_table + sorted(list(stats_not_in_table))
@@ -252,7 +249,7 @@
         writer = csv.writer(response)
         writer.writerow(["Publisher Title", "Publisher Name", "Views", "Visits", "Period Name"])
 
-        top_publishers, top_publishers_graph = _get_top_publishers(None)
+        top_publishers = _get_top_publishers(limit=None)
 
         for publisher,view,visit in top_publishers:
             writer.writerow([publisher.title.encode('utf-8'),
@@ -274,7 +271,7 @@
             if not c.publisher:
                 abort(404, 'A publisher with that name could not be found')
 
-        packages = self._get_packages(c.publisher)
+        packages = self._get_packages(publisher=c.publisher, month=c.month)
         response.headers['Content-Type'] = "text/csv; charset=utf-8"
         response.headers['Content-Disposition'] = \
             str('attachment; filename=datasets_%s_%s.csv' % (c.publisher_name, month,))
@@ -303,15 +300,16 @@
         if c.month:
             c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
 
-        c.top_publishers, graph_data = _get_top_publishers()
+        c.top_publishers = _get_top_publishers()
+        graph_data = _get_top_publishers_graph()
         c.top_publishers_graph = json.dumps( _to_rickshaw(graph_data) )
 
         return render('ga_report/publisher/index.html')
 
-    def _get_packages(self, publisher=None, count=-1):
+    def _get_packages(self, publisher=None, month='', count=-1):
         '''Returns the datasets in order of views'''
         have_download_data = True
-        month = c.month or 'All'
+        month = month or 'All'
         if month != 'All':
             have_download_data = month >= DOWNLOADS_AVAILABLE_FROM
 
@@ -388,28 +386,25 @@
         entry = q.filter(GA_Url.period_name==c.month).first()
         c.publisher_page_views = entry.pageviews if entry else 0
 
-        c.top_packages = self._get_packages(c.publisher, 20)
+        c.top_packages = self._get_packages(publisher=c.publisher, count=20, month=c.month)
 
         # Graph query
-        top_package_names = [ x[0].name for x in c.top_packages ]
+        top_packages_all_time = self._get_packages(publisher=c.publisher, count=20, month='All')
+        top_package_names = [ x[0].name for x in top_packages_all_time ]
         graph_query = model.Session.query(GA_Url,model.Package)\
             .filter(model.Package.name==GA_Url.package_id)\
             .filter(GA_Url.url.like('/dataset/%'))\
             .filter(GA_Url.package_id.in_(top_package_names))
-        graph_dict = {}
+        all_series = {}
         for entry,package in graph_query:
             if not package: continue
             if entry.period_name=='All': continue
-            graph_dict[package.name] = graph_dict.get(package.name,{
+            all_series[package.name] = all_series.get(package.name,{
                 'name':package.title,
-                'data':[]
+                'raw': {}
                 })
-            graph_dict[package.name]['data'].append({
-                'x':_get_unix_epoch(entry.period_name),
-                'y':int(entry.pageviews),
-                })
-        graph = [ graph_dict[x] for x in top_package_names ]
-
+            all_series[package.name]['raw'][entry.period_name] = int(entry.pageviews)
+        graph = [ all_series[series_name] for series_name in top_package_names ]
         c.graph_data = json.dumps( _to_rickshaw(graph) )
 
         return render('ga_report/publisher/read.html')
@@ -417,53 +412,45 @@
 def _to_rickshaw(data, percentageMode=False):
     if data==[]:
         return data
-    # Create a consistent x-axis between all series
-    num_points = [ len(series['data']) for series in data ]
-    ideal_index = num_points.index( max(num_points) )
-    x_axis = []
+    # x-axis is every month in c.months. Note that data might not exist 
+    # for entire history, eg. for recently-added datasets
+    x_axis = [x[0] for x in c.months]
+    x_axis.reverse() # Ascending order
+    x_axis = x_axis[:-1] # Remove latest month
+    totals = {}
     for series in data:
+        series['data'] = []
+        for x_string in x_axis:
+            x = _get_unix_epoch( x_string )
+            y = series['raw'].get(x_string,0)
+            series['data'].append({'x':x,'y':y})
+            totals[x] = totals.get(x,0)+y
+    if not percentageMode:
+        return data
+    # Turn all data into percentages
+    # Roll insignificant series into a catch-all
+    THRESHOLD = 1
+    raw_data = data
+    data = []
+    for series in raw_data:
         for point in series['data']:
-            x_axis.append(point['x'])
-    x_axis = sorted( list( set(x_axis) ) )
-    # Zero pad any missing values
-    for series in data:
-        xs = [ point['x'] for point in series['data'] ]
-        for x in set(x_axis).difference(set(xs)):
-            series['data'].append( {'x':x, 'y':0} )
-    if percentageMode:
-        def get_totals(series_list):
-            totals = {}
-            for series in series_list:
-                for point in series['data']:
-                    totals[point['x']] = totals.get(point['x'],0) + point['y']
-            return totals
-        # Transform data into percentage stacks
-        totals = get_totals(data)
-        # Roll insignificant series into a catch-all
-        THRESHOLD = 0.01
-        raw_data = data
-        data = []
-        for series in raw_data:
-            for point in series['data']:
-                fraction = float(point['y']) / totals[point['x']]
-                if not (series in data) and fraction>THRESHOLD:
-                    data.append(series)
-        # Overwrite data with a set of interesting series
-        others = [ x for x in raw_data if not (x in data) ]
-        if len(others):
-            data.append({ 
-                'name':'Other',
-                'data': [ {'x':x,'y':y} for x,y in get_totals(others).items() ] 
-                })
-        # Turn each point into a percentage
-        for series in data:
-            for point in series['data']:
-                point['y'] = (point['y']*100) / totals[point['x']]
-    # Sort the points
-    for series in data:
-        series['data'] = sorted( series['data'], key=lambda x:x['x'] )
-        # Strip the latest month's incomplete analytics
-        series['data'] = series['data'][:-1]
+            percentage = (100*float(point['y'])) / totals[point['x']]
+            if not (series in data) and percentage>THRESHOLD:
+                data.append(series)
+            point['y'] = percentage
+    others = [ x for x in raw_data if not (x in data) ]
+    if len(others):
+        data_other = []
+        for i in range(len(x_axis)):
+            x = _get_unix_epoch(x_axis[i])
+            y = 0
+            for series in others: 
+                y += series['data'][i]['y']
+            data_other.append({'x':x,'y':y})
+        data.append({ 
+            'name':'Other',
+            'data': data_other
+            })
     return data
 
 
@@ -488,39 +475,51 @@
 
     top_publishers = []
     res = connection.execute(q, month)
-    department_ids = []
     for row in res:
         g = model.Group.get(row[0])
         if g:
-            department_ids.append(row[0])
             top_publishers.append((g, row[1], row[2]))
-
-    graph = []
-    if limit is not None:
-        # Query for a history graph of these publishers
-        q = model.Session.query(
-                GA_Url.department_id, 
-                GA_Url.period_name, 
-                func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\
-            .filter( GA_Url.department_id.in_(department_ids) )\
-            .filter( GA_Url.period_name!='All' )\
-            .filter( GA_Url.url.like('/dataset/%') )\
-            .filter( GA_Url.package_id!='' )\
-            .group_by( GA_Url.department_id, GA_Url.period_name )
-        graph_dict = {}
-        for dept_id,period_name,views in q:
-            graph_dict[dept_id] = graph_dict.get( dept_id, {
-                'name' : model.Group.get(dept_id).title,
-                'data' : []
-                })
-            graph_dict[dept_id]['data'].append({
-                'x': _get_unix_epoch(period_name),
-                'y': views
-                })
-        # Sort dict into ordered list
-        for id in department_ids:
-            graph.append( graph_dict[id] )
-    return top_publishers, graph
+    return top_publishers
+
+
+def _get_top_publishers_graph(limit=20):
+    '''
+    Returns a list of the top 20 publishers by dataset visits.
+    (The number to show can be varied with 'limit')
+    '''
+    connection = model.Session.connection()
+    q = """
+        select department_id, sum(pageviews::int) views
+        from ga_url
+        where department_id <> ''
+          and package_id <> ''
+          and url like '/dataset/%%'
+          and period_name='All'
+        group by department_id order by views desc
+        """
+    if limit:
+        q = q + " limit %s;" % (limit)
+
+    res = connection.execute(q)
+    department_ids = [ row[0] for row in res ]
+
+    # Query for a history graph of these department ids
+    q = model.Session.query(
+            GA_Url.department_id, 
+            GA_Url.period_name, 
+            func.sum(cast(GA_Url.pageviews,sqlalchemy.types.INT)))\
+        .filter( GA_Url.department_id.in_(department_ids) )\
+        .filter( GA_Url.url.like('/dataset/%') )\
+        .filter( GA_Url.package_id!='' )\
+        .group_by( GA_Url.department_id, GA_Url.period_name )
+    graph_dict = {}
+    for dept_id,period_name,views in q:
+        graph_dict[dept_id] = graph_dict.get( dept_id, {
+            'name' : model.Group.get(dept_id).title,
+            'raw' : {}
+            })
+        graph_dict[dept_id]['raw'][period_name] = views
+    return [ graph_dict[id] for id in department_ids ]
 
 
 def _get_publishers():

--- a/ckanext/ga_report/helpers.py
+++ b/ckanext/ga_report/helpers.py
@@ -122,3 +122,17 @@
 
     return sorted(results, key=operator.itemgetter(1), reverse=True)
 
+def month_option_title(month_iso, months, day):
+    month_isos = [ iso_code for (iso_code,name) in months ]
+    try:
+        index = month_isos.index(month_iso)
+    except ValueError:
+        _log.error('Month "%s" not found in list of months.' % month_iso)
+        return month_iso
+    month_name = months[index][1]
+    if index==0:
+        return month_name + (' (up to %s)'%day)
+    return month_name
+
+
+

--- a/ckanext/ga_report/plugin.py
+++ b/ckanext/ga_report/plugin.py
@@ -5,7 +5,8 @@
 
 from ckanext.ga_report.helpers import (most_popular_datasets,
                                        popular_datasets,
-                                       single_popular_dataset)
+                                       single_popular_dataset,
+                                       month_option_title)
 
 log = logging.getLogger('ckanext.ga-report')
 
@@ -27,7 +28,8 @@
             'ga_report_installed': lambda: True,
             'popular_datasets': popular_datasets,
             'most_popular_datasets': most_popular_datasets,
-            'single_popular_dataset': single_popular_dataset
+            'single_popular_dataset': single_popular_dataset,
+            'month_option_title': month_option_title
         }
 
     def after_map(self, map):

--- a/ckanext/ga_report/public/css/ga_report.css
+++ b/ckanext/ga_report/public/css/ga_report.css
@@ -61,4 +61,9 @@
 .ga-reports-table .td-numeric {
   text-align: center;
 }
+.ga-reports-heading {
+  padding-right: 10px;
+  margin-top: 4px;
+  float: left;
+}
 

--- a/ckanext/ga_report/public/scripts/ckanext_ga_reports.js
+++ b/ckanext/ga_report/public/scripts/ckanext_ga_reports.js
@@ -4,14 +4,22 @@
 CKAN.GA_Reports.render_rickshaw = function( css_name, data, mode, colorscheme ) {
     var graphLegends = $('#graph-legend-container');
 
-    if (!Modernizr.svg) {
+    function renderError(alertClass,alertText,legendText) {
         $("#chart_"+css_name)
-          .html( '<div class="alert">Your browser does not support vector graphics. No graphs can be rendered.</div>')
+          .html( '<div class="alert '+alertClass+'">'+alertText+'</div>')
           .closest('.rickshaw_chart_container').css('height',50);
         var myLegend = $('<div id="legend_'+css_name+'"/>')
-          .html('(Graph cannot be rendered)')
+          .html(legendText)
           .appendTo(graphLegends);
+    }
+
+    if (!Modernizr.svg) {
+        renderError('','Your browser does not support vector graphics. No graphs can be rendered.','(Graph cannot be rendered)');
         return;
+    }
+    if (data.length==0) {
+        renderError('alert-info','There is not enough data to render a graph.','(No graph available)');
+        return
     }
     var myLegend = $('<div id="legend_'+css_name+'"/>').appendTo(graphLegends);
 
@@ -30,7 +38,9 @@
         series: data ,
         height: 328
     });
-    var x_axis = new Rickshaw.Graph.Axis.Time( { graph: graph } );
+    var x_axis = new Rickshaw.Graph.Axis.Time( { 
+        graph: graph 
+    } );
     var y_axis = new Rickshaw.Graph.Axis.Y( {
         graph: graph,
         orientation: 'left',

--- a/ckanext/ga_report/templates/ga_report/ga_util.html
+++ b/ckanext/ga_report/templates/ga_report/ga_util.html
@@ -8,8 +8,8 @@
 
 <select name="month" py:def="month_selector(current_month, months, day)">
     <option value='' py:attrs="{'selected': 'selected' if not current_month else None}">All months</option>
-  <py:for each="i, (val,desc) in enumerate(months)">
-    <option value='${val}' py:attrs="{'selected': 'selected' if current_month == val else None}">${desc}<py:if test="i == 0 and day"> (up to ${day})</py:if></option>
+  <py:for each="(iso_code,string_name) in months">
+    <option value='${iso_code}' py:attrs="{'selected': 'selected' if current_month == iso_code else None}">${h.month_option_title(iso_code,months,day)}</option>
   </py:for>
 </select>
 
@@ -37,7 +37,6 @@
     <script type="text/javascript">
       $(function() {
           var items = $items_json;
-          if (items.length==0) { return; }
           CKAN.GA_Reports.render_rickshaw('$id', items, '$mode', '$colorscheme');
       });
     </script>

--- a/ckanext/ga_report/templates/ga_report/publisher/index.html
+++ b/ckanext/ga_report/templates/ga_report/publisher/index.html
@@ -30,9 +30,11 @@
     <div class="boxed">
 
        ${rickshaw_graph(c.top_publishers_graph,'publishers')}
+
+       <hr/>
        <form class="form-inline" action="${h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='publishers')}" method="get">
           <div class="controls">
-            <label>Show stats table for:</label>
+            <h4 class="ga-reports-heading">Statistics for</h4>
             ${month_selector(c.month, c.months, c.day)}
           </div>
        </form>

--- a/ckanext/ga_report/templates/ga_report/publisher/read.html
+++ b/ckanext/ga_report/templates/ga_report/publisher/read.html
@@ -29,9 +29,7 @@
     <div class="boxed">
      <h3 py:if="c.publisher"><a href="${h.url_for(controller='ckanext.dgu.controllers.publisher:PublisherController',action='read',id=c.publisher.name)}">${c.publisher.title}</a></h3>
 
-     <p py:if="not c.top_packages">No page views in this period</p>
-
-     <py:if test="c.top_packages">
+     <py:if test="c.graph_data">
        ${rickshaw_graph(c.graph_data,'dataset-downloads',debug=True)}
      </py:if>
      <form class="form-inline" action="${h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='read')}" method="get">
@@ -48,6 +46,13 @@
           <input class="btn button btn-primary" type='submit' value="Update"/>
         </div>
      </form>
+     <py:if test="c.month">
+       <h4>Statistics for ${h.month_option_title(c.month,c.months,c.day)}:</h4>
+     </py:if>
+     <py:if test="not c.month">
+       <h4>Statistics for all months:</h4>
+     </py:if>
+     <div class="alert alert-info" py:if="not c.top_packages">No page views in this period.</div>
      <py:if test="c.top_packages">
        <table class="ga-reports-table table table-condensed table-bordered table-striped">
          <tr>

--- a/ckanext/ga_report/templates/ga_report/site/index.html
+++ b/ckanext/ga_report/templates/ga_report/site/index.html
@@ -59,7 +59,7 @@
       <div class="tab-content">
         <div class="tab-pane active" id="totals">
              <form class="form-inline" action="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='index')}" method="get">
-               <label>Show stats table for:</label>
+               <h4 class="ga-reports-heading">Show stats table for:</h4>
                ${month_selector(c.month, c.months, c.day)}
              </form>
              <table class="ga-reports-table table table-condensed table-bordered table-striped">
@@ -83,33 +83,37 @@
         </div>
          <div class="tab-pane" id="browsers_versions">
              ${rickshaw_graph(c.browser_versions_graph,'browser-versions',mode='stack')}
+             <hr/>
              <p>Note: Where a browser has a large number of versions, these have been grouped together.</p>
              <form class="form-inline" action="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='index')}" method="get">
-               <label>Show stats table for:</label>
+               <h4 class="ga-reports-heading">Show stats table for:</h4>
                ${month_selector(c.month, c.months, c.day)}
              </form>
              ${stat_table(c.browser_versions)}
          </div>
          <div class="tab-pane" id="browsers_names">
              ${rickshaw_graph(c.browsers_graph,'browsers',mode='stack')}
+             <hr/>
              <form class="form-inline" action="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='index')}" method="get">
-               <label>Show stats table for:</label>
+               <h4 class="ga-reports-heading">Show stats table for:</h4>
                ${month_selector(c.month, c.months, c.day)}
              </form>
              ${stat_table(c.browsers)}
          </div>
          <div class="tab-pane" id="os">
              ${rickshaw_graph(c.os_graph,'os',mode='stack')}
+             <hr/>
              <form class="form-inline" action="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='index')}" method="get">
-               <label>Show stats table for:</label>
+               <h4 class="ga-reports-heading">Show stats table for:</h4>
                ${month_selector(c.month, c.months, c.day)}
              </form>
              ${stat_table(c.os)}
          </div>
          <div class="tab-pane" id="os_versions">
              ${rickshaw_graph(c.os_versions_graph,'os_versions',mode='stack')}
+             <hr/>
              <form class="form-inline" action="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='index')}" method="get">
-               <label>Show stats table for:</label>
+               <h4 class="ga-reports-heading">Show stats table for:</h4>
                ${month_selector(c.month, c.months, c.day)}
              </form>
              ${stat_table(c.os_versions)}
@@ -117,32 +121,35 @@
         <div class="tab-pane" id="social_referrals_totals">
             <p>Number of visits that were referred from social networks</p>
              <form class="form-inline" action="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='index')}" method="get">
-               <label>Show stats table for:</label>
+               <h4 class="ga-reports-heading">Show stats table for:</h4>
                ${month_selector(c.month, c.months, c.day)}
              </form>
             ${social_table(c.social_referrer_totals)}
         </div>
         <div class="tab-pane" id="social_networks">
              ${rickshaw_graph(c.social_networks_graph, 'social_networks',mode='stack')}
+             <hr/>
             <p>Percentage of visits that were referred from these social networks</p>
              <form class="form-inline" action="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='index')}" method="get">
-               <label>Show stats table for:</label>
+               <h4 class="ga-reports-heading">Show stats table for:</h4>
                ${month_selector(c.month, c.months, c.day)}
              </form>
              ${stat_table(c.social_networks, 'Visits')}
         </div>
         <div class="tab-pane" id="languages">
              ${rickshaw_graph(c.languages_graph,'languages',mode='stack')}
+             <hr/>
              <form class="form-inline" action="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='index')}" method="get">
-               <label>Show stats table for:</label>
+               <h4 class="ga-reports-heading">Show stats table for:</h4>
                ${month_selector(c.month, c.months, c.day)}
              </form>
              ${stat_table(c.languages)}
         </div>
         <div class="tab-pane" id="country">
              ${rickshaw_graph(c.country_graph,'country',mode='stack')}
+             <hr/>
              <form class="form-inline" action="${h.url_for(controller='ckanext.ga_report.controller:GaReport',action='index')}" method="get">
-               <label>Show stats table for:</label>
+               <h4 class="ga-reports-heading">Show stats table for:</h4>
                ${month_selector(c.month, c.months, c.day)}
              </form>
              ${stat_table(c.country)}