From: Tom Rees Date: Thu, 24 Jan 2013 18:44:25 +0000 Subject: #167 Fixing for /dataset and /publishers X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=78c6e5902e57571e97b795027bb194693ae22acc --- #167 Fixing for /dataset and /publishers --- --- a/ckanext/ga_report/command.py +++ b/ckanext/ga_report/command.py @@ -23,7 +23,7 @@ import ckan.model as model model.Session.remove() model.Session.configure(bind=model.meta.engine) - log = logging.getLogger('ckanext.ga-report') + log = logging.getLogger('ckanext.ga_report') import ga_model ga_model.init_tables() --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -445,9 +445,15 @@ # Roll insignificant series into one data = significant_series data.append(catch_all) + # Turn each point into a percentage + for package in data: + for point in package['data']: + point['y'] = (point['y']*100) / totals[point['x']] # Sort the points for package in data: package['data'] = sorted( package['data'], key=lambda x:x['x'] ) + # Strip the latest month's incomplete analytics + package['data'] = package['data'][:-1] return data --- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -32,6 +32,11 @@ first_of_this_month = datetime.datetime(date.year, date.month, 1) _, last_day_of_month = calendar.monthrange(int(date.year), int(date.month)) last_of_this_month = datetime.datetime(date.year, date.month, last_day_of_month) + # if this is the latest month, note that it is only up until today + now = datetime.datetime.now() + if now.year == date.year and now.month == date.month: + last_day_of_month = now.day + last_of_this_month = now periods = ((date.strftime(FORMAT_MONTH), last_day_of_month, first_of_this_month, last_of_this_month),) @@ -126,7 +131,7 @@ # Make sure the All records are correct. ga_model.post_update_url_stats() - log.info('Aggregating datasets by publisher') + log.info('Associating datasets with their publisher') ga_model.update_publisher_stats(period_name) # about 30 seconds. @@ -298,7 +303,7 @@ def _download_stats(self, start_date, end_date, period_name, period_complete_day): - """ Fetches stats about language and country """ + """ Fetches stats about data downloads """ import ckan.model as model data = {} @@ -320,7 +325,14 @@ return def process_result_data(result_data, cached=False): + progress_total = len(result_data) + progress_count = 0 + resources_not_matched = [] for result in result_data: + progress_count += 1 + if progress_count % 100 == 0: + log.debug('.. %d/%d done so far', progress_count, progress_total) + url = result[0].strip() # Get package id associated with the resource that has this URL. @@ -334,9 +346,13 @@ if package_name: data[package_name] = data.get(package_name, 0) + int(result[1]) else: - log.warning(u"Could not find resource for URL: {url}".format(url=url)) + resources_not_matched.append(url) continue - + if resources_not_matched: + log.debug('Could not match %i or %i resource URLs to datasets. e.g. %r', + len(resources_not_matched), progress_total, resources_not_matched[:3]) + + log.info('Associating downloads of resource URLs with their respective datasets') process_result_data(results.get('rows')) results = self.service.data().ga().get( @@ -348,6 +364,7 @@ dimensions="ga:eventLabel", max_results=10000, end_date=end_date).execute() + log.info('Associating downloads of cache resource URLs with their respective datasets') process_result_data(results.get('rows'), cached=False) self._filter_out_long_tail(data, MIN_DOWNLOADS) --- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -161,20 +161,20 @@ def pre_update_url_stats(period_name): - log.debug("Deleting '%s' records" % period_name) - model.Session.query(GA_Url).\ - filter(GA_Url.period_name==period_name).delete() - - count = model.Session.query(GA_Url).\ - filter(GA_Url.period_name == 'All').count() - log.debug("Deleting %d 'All' records" % count) - count = model.Session.query(GA_Url).\ - filter(GA_Url.period_name == 'All').delete() - log.debug("Deleted %d 'All' records" % count) + q = model.Session.query(GA_Url).\ + filter(GA_Url.period_name==period_name) + log.debug("Deleting %d '%s' records" % (q.count(), period_name)) + q.delete() + + q = model.Session.query(GA_Url).\ + filter(GA_Url.period_name == 'All') + log.debug("Deleting %d 'All' records..." % q.count()) + q.delete() model.Session.flush() model.Session.commit() model.repo.commit_and_remove() + log.debug('...done') def post_update_url_stats(): @@ -185,6 +185,7 @@ record regardless of whether the URL has an entry for the month being currently processed. """ + log.debug('Post-processing "All" records...') query = """select url, pageviews::int, visits::int from ga_url where url not in (select url from ga_url where period_name ='All')""" @@ -197,7 +198,13 @@ views[row[0]] = views.get(row[0], 0) + row[1] visits[row[0]] = visits.get(row[0], 0) + row[2] + progress_total = len(views.keys()) + progress_count = 0 for key in views.keys(): + progress_count += 1 + if progress_count % 100 == 0: + log.debug('.. %d/%d done so far', progress_count, progress_total) + package, publisher = _get_package_and_publisher(key) values = {'id': make_uuid(), @@ -207,10 +214,11 @@ 'pageviews': views[key], 'visits': visits[key], 'department_id': publisher, - 'package_id': publisher + 'package_id': package } model.Session.add(GA_Url(**values)) model.Session.commit() + log.debug('..done') def update_url_stats(period_name, period_complete_day, url_data): @@ -219,9 +227,14 @@ stores them in GA_Url under the period and recalculates the totals for the 'All' period. ''' + progress_total = len(url_data) + progress_count = 0 for url, views, visits in url_data: + progress_count += 1 + if progress_count % 100 == 0: + log.debug('.. %d/%d done so far', progress_count, progress_total) + package, publisher = _get_package_and_publisher(url) - item = model.Session.query(GA_Url).\ filter(GA_Url.period_name==period_name).\ --- a/ckanext/ga_report/helpers.py +++ b/ckanext/ga_report/helpers.py @@ -106,6 +106,10 @@ for entry in entries: if len(datasets) < count: p = model.Package.get(entry.url[len('/dataset/'):]) + if not p: + _log.warning("Could not find Package for {url}".format(url=entry.url)) + continue + if not p in datasets: datasets[p] = {'views':0, 'visits': 0} datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews) --- a/ckanext/ga_report/public/css/ga_report.css +++ b/ckanext/ga_report/public/css/ga_report.css @@ -16,16 +16,8 @@ bottom: 0; } .rickshaw_legend { - position: absolute; - right: 0; - top: 0; - margin-left: 15px; - padding: 0 5px; background: transparent; - max-width: 150px; - overflow: hidden; - background: rgba(0,0,0,0.05); - border-radius:5px; + width: 100%; } .rickshaw_y_axis { position: absolute; --- /dev/null +++ b/ckanext/ga_report/public/scripts/ckanext_ga_reports.js @@ -1,1 +1,114 @@ +var CKAN = CKAN || {}; +CKAN.GA_Reports = {}; + +CKAN.GA_Reports.render_rickshaw = function( css_name, data, mode, colorscheme ) { + var graphLegends = $('#graph-legend-container'); + var myLegend = $('
').appendTo(graphLegends); + + var palette = new Rickshaw.Color.Palette( { scheme: colorscheme } ); + $.each(data, function(i, object) { + object['color'] = palette.color(); + }); + + var graphElement = document.querySelector("#chart_"+css_name); + + var graph = new Rickshaw.Graph( { + element: document.querySelector("#chart_"+css_name), + renderer: mode, + series: data , + height: 328 + }); + var x_axis = new Rickshaw.Graph.Axis.Time( { graph: graph } ); + var y_axis = new Rickshaw.Graph.Axis.Y( { + graph: graph, + orientation: 'left', + tickFormat: Rickshaw.Fixtures.Number.formatKMBT, + element: document.getElementById('y_axis_'+css_name), + } ); + var legend = new Rickshaw.Graph.Legend( { + element: document.querySelector('#legend_'+css_name), + graph: graph + } ); + var hoverDetail = new Rickshaw.Graph.HoverDetail( { + graph: graph, + formatter: function(series, x, y) { + var date = '' + new Date(x * 1000).toUTCString() + ''; + var swatch = ''; + var content = swatch + series.name + ": " + parseInt(y) + '
' + date; + return content; + } + } ); + graph.render(); +}; + +CKAN.GA_Reports.bind_sparklines = function() { + /* + * Bind to the 'totals' tab being on screen, when the + * Sparkline graphs should be drawn. + * Note that they cannot be drawn sooner. + */ + $('a[href="#totals"]').on( + 'shown', + function() { + var sparkOptions = { + enableTagOptions: true, + type: 'line', + width: 100, + height: 26, + chartRangeMin: 0, + spotColor: '', + maxSpotColor: '', + minSpotColor: '', + highlightSpotColor: '000000', + lineColor: '3F8E6D', + fillColor: 'B7E66B' + }; + $('.sparkline').sparkline('html',sparkOptions); + } + ); +}; + +CKAN.GA_Reports.bind_sidebar = function() { + /* + * Bind to changes in the tab behaviour: + * Show the correct rickshaw graph in the sidebar. + * Not to be called before all graphs load. + */ + $('a[data-toggle="hashchange"]').on( + 'shown', + function(e) { + var href = $(e.target).attr('href'); + var pane = $(href); + if (!pane.length) { console.err('bad href',href); return; } + var legend_name = "none"; + var graph = pane.find('.rickshaw_chart'); + if (graph.length) { + legend_name = graph.attr('id').replace('chart_',''); + } + legend_name = '#legend_'+legend_name; + $('#graph-legend-container > *').hide(); + $(legend_name).show(); + } + ); +}; + +/* + * Custom bootstrap plugin for handling data-toggle="hashchange". + * Behaves like data-toggle="tab" but I respond to the hashchange. + * Page state is memo-ized in the URL this way. Why doesn't Bootstrap do this? + */ +$(function() { + var mapping = {}; + $('a[data-toggle="hashchange"]').each( + function(i,link) { + link = $(link); + mapping[link.attr('href')] = link; + } + ); + $(window).hashchange(function() { + var link = mapping[window.location.hash]; + if (link) { link.tab('show'); } + }); +}); + --- a/ckanext/ga_report/templates/ga_report/ga_util.html +++ b/ckanext/ga_report/templates/ga_report/ga_util.html @@ -30,36 +30,13 @@ -
+
-
--- a/ckanext/ga_report/templates/ga_report/notes.html +++ b/ckanext/ga_report/templates/ga_report/notes.html @@ -7,7 +7,7 @@

Notes

  • "Views" is the number of times a page was loaded in users' browsers.
  • -
  • "Downloads" is the number of times a user has clicked to download either an original or cached resource for a particular dataset since December 2012
  • +
  • "Downloads" is the number of times a user has clicked to download either an original or cached resource for a particular dataset. Download information is only available from 2nd December 2012; 'No data' is shown for records before that date.
  • These usage statistics are confined to users with javascript enabled, which excludes web crawlers and API calls.
  • The results are not shown when the number of views/visits is tiny. Where these relate to site pages, results are available in full in the CSV download. Where these relate to users' web browser information, results are not disclosed, for privacy reasons.
--- a/ckanext/ga_report/templates/ga_report/publisher/index.html +++ b/ckanext/ga_report/templates/ga_report/publisher/index.html @@ -14,12 +14,18 @@ Download as CSV

+
  • +

    Graph Legend

    +
    +
    +
  • + --- a/ckanext/ga_report/templates/ga_report/publisher/read.html +++ b/ckanext/ga_report/templates/ga_report/publisher/read.html @@ -10,6 +10,7 @@ + @@ -22,6 +23,11 @@

    Download as CSV

    + +
  • +

    Graph Legend

    +
    +
  • --- a/ckanext/ga_report/templates/ga_report/site/index.html +++ b/ckanext/ga_report/templates/ga_report/site/index.html @@ -10,6 +10,7 @@ + @@ -22,6 +23,12 @@

    Download as CSV

    + +
  • +

    Graph Legend

    +
    +
    (No graph loaded)
    +
  • @@ -42,38 +49,36 @@
    -
    +
    @@ -135,20 +140,14 @@
    Name