Merge commit 'd0db210'
--- a/ckanext/ga_report/command.py
+++ b/ckanext/ga_report/command.py
@@ -23,7 +23,7 @@
import ckan.model as model
model.Session.remove()
model.Session.configure(bind=model.meta.engine)
- log = logging.getLogger('ckanext.ga-report')
+ log = logging.getLogger('ckanext.ga_report')
import ga_model
ga_model.init_tables()
--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -445,6 +445,10 @@
# Roll insignificant series into one
data = significant_series
data.append(catch_all)
+ # Turn each point into a percentage
+ for package in data:
+ for point in package['data']:
+ point['y'] = (point['y']*100) / totals[point['x']]
# Sort the points
for package in data:
package['data'] = sorted( package['data'], key=lambda x:x['x'] )
--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -32,6 +32,11 @@
first_of_this_month = datetime.datetime(date.year, date.month, 1)
_, last_day_of_month = calendar.monthrange(int(date.year), int(date.month))
last_of_this_month = datetime.datetime(date.year, date.month, last_day_of_month)
+ # if this is the latest month, note that it is only up until today
+ now = datetime.datetime.now()
+ if now.year == date.year and now.month == date.month:
+ last_day_of_month = now.day
+ last_of_this_month = now
periods = ((date.strftime(FORMAT_MONTH),
last_day_of_month,
first_of_this_month, last_of_this_month),)
@@ -126,7 +131,7 @@
# Make sure the All records are correct.
ga_model.post_update_url_stats()
- log.info('Aggregating datasets by publisher')
+ log.info('Associating datasets with their publisher')
ga_model.update_publisher_stats(period_name) # about 30 seconds.
@@ -298,7 +303,7 @@
def _download_stats(self, start_date, end_date, period_name, period_complete_day):
- """ Fetches stats about language and country """
+ """ Fetches stats about data downloads """
import ckan.model as model
data = {}
@@ -320,7 +325,14 @@
return
def process_result_data(result_data, cached=False):
+ progress_total = len(result_data)
+ progress_count = 0
+ resources_not_matched = []
for result in result_data:
+ progress_count += 1
+ if progress_count % 100 == 0:
+ log.debug('.. %d/%d done so far', progress_count, progress_total)
+
url = result[0].strip()
# Get package id associated with the resource that has this URL.
@@ -334,9 +346,13 @@
if package_name:
data[package_name] = data.get(package_name, 0) + int(result[1])
else:
- log.warning(u"Could not find resource for URL: {url}".format(url=url))
+ resources_not_matched.append(url)
continue
-
+ if resources_not_matched:
+ log.debug('Could not match %i or %i resource URLs to datasets. e.g. %r',
+ len(resources_not_matched), progress_total, resources_not_matched[:3])
+
+ log.info('Associating downloads of resource URLs with their respective datasets')
process_result_data(results.get('rows'))
results = self.service.data().ga().get(
@@ -348,6 +364,7 @@
dimensions="ga:eventLabel",
max_results=10000,
end_date=end_date).execute()
+ log.info('Associating downloads of cache resource URLs with their respective datasets')
process_result_data(results.get('rows'), cached=False)
self._filter_out_long_tail(data, MIN_DOWNLOADS)
--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -161,20 +161,20 @@
def pre_update_url_stats(period_name):
- log.debug("Deleting '%s' records" % period_name)
- model.Session.query(GA_Url).\
- filter(GA_Url.period_name==period_name).delete()
-
- count = model.Session.query(GA_Url).\
- filter(GA_Url.period_name == 'All').count()
- log.debug("Deleting %d 'All' records" % count)
- count = model.Session.query(GA_Url).\
- filter(GA_Url.period_name == 'All').delete()
- log.debug("Deleted %d 'All' records" % count)
+ q = model.Session.query(GA_Url).\
+ filter(GA_Url.period_name==period_name)
+ log.debug("Deleting %d '%s' records" % (q.count(), period_name))
+ q.delete()
+
+ q = model.Session.query(GA_Url).\
+ filter(GA_Url.period_name == 'All')
+ log.debug("Deleting %d 'All' records..." % q.count())
+ q.delete()
model.Session.flush()
model.Session.commit()
model.repo.commit_and_remove()
+ log.debug('...done')
def post_update_url_stats():
@@ -185,6 +185,7 @@
record regardless of whether the URL has an entry for
the month being currently processed.
"""
+ log.debug('Post-processing "All" records...')
query = """select url, pageviews::int, visits::int
from ga_url
where url not in (select url from ga_url where period_name ='All')"""
@@ -197,7 +198,13 @@
views[row[0]] = views.get(row[0], 0) + row[1]
visits[row[0]] = visits.get(row[0], 0) + row[2]
+ progress_total = len(views.keys())
+ progress_count = 0
for key in views.keys():
+ progress_count += 1
+ if progress_count % 100 == 0:
+ log.debug('.. %d/%d done so far', progress_count, progress_total)
+
package, publisher = _get_package_and_publisher(key)
values = {'id': make_uuid(),
@@ -211,6 +218,7 @@
}
model.Session.add(GA_Url(**values))
model.Session.commit()
+ log.debug('..done')
def update_url_stats(period_name, period_complete_day, url_data):
@@ -219,9 +227,14 @@
stores them in GA_Url under the period and recalculates the totals for
the 'All' period.
'''
+ progress_total = len(progress_data)
+ progress_count = 0
for url, views, visits in url_data:
+ progress_count += 1
+ if progress_count % 100 == 0:
+ log.debug('.. %d/%d done so far', progress_count, progress_total)
+
package, publisher = _get_package_and_publisher(url)
-
item = model.Session.query(GA_Url).\
filter(GA_Url.period_name==period_name).\
--- /dev/null
+++ b/ckanext/ga_report/public/scripts/ckanext_ga_reports.js
@@ -1,1 +1,42 @@
+var CKAN = CKAN || {};
+CKAN.GA_Reports = {};
+
+CKAN.GA_Reports.render_rickshaw = function( css_name, data, mode, colorscheme ) {
+ var palette = new Rickshaw.Color.Palette( { scheme: colorscheme } );
+ $.each(data, function(i, object) {
+ object['color'] = palette.color();
+ });
+
+ var graphElement = document.querySelector("#chart_"+css_name);
+
+ var graph = new Rickshaw.Graph( {
+ element: document.querySelector("#chart_"+css_name),
+ renderer: mode,
+ series: data ,
+ height: 328
+ });
+ var x_axis = new Rickshaw.Graph.Axis.Time( { graph: graph } );
+ var y_axis = new Rickshaw.Graph.Axis.Y( {
+ graph: graph,
+ orientation: 'left',
+ tickFormat: Rickshaw.Fixtures.Number.formatKMBT,
+ element: document.getElementById('y_axis_'+css_name),
+ } );
+ var legend = new Rickshaw.Graph.Legend( {
+ element: document.querySelector('#legend_'+css_name),
+ graph: graph
+ } );
+ var hoverDetail = new Rickshaw.Graph.HoverDetail( {
+ graph: graph,
+ formatter: function(series, x, y) {
+ var date = '<span class="date">' + new Date(x * 1000).toUTCString() + '</span>';
+ var swatch = '<span class="detail_swatch" style="background-color: ' + series.color + '"></span>';
+ var content = swatch + series.name + ": " + parseInt(y) + '<br>' + date;
+ return content;
+ }
+ } );
+ graph.render();
+};
+
+
--- a/ckanext/ga_report/templates/ga_report/ga_util.html
+++ b/ckanext/ga_report/templates/ga_report/ga_util.html
@@ -37,29 +37,7 @@
<div id="legend_$id" class="rickshaw_legend"></div>
<script type="text/javascript">
$(function() {
- var all_series = ${items_json};
- var palette = new Rickshaw.Color.Palette( { scheme: '$colorscheme' } );
- $.each(all_series, function(i, object) {
- object['color'] = palette.color();
- });
- var graph = new Rickshaw.Graph( {
- element: document.querySelector("#chart_$id"),
- renderer: '$mode',
- series: all_series ,
- height: 328
- });
- var x_axis = new Rickshaw.Graph.Axis.Time( { graph: graph } );
- var y_axis = new Rickshaw.Graph.Axis.Y( {
- graph: graph,
- orientation: 'left',
- tickFormat: Rickshaw.Fixtures.Number.formatKMBT,
- element: document.getElementById('y_axis_$id'),
- } );
- var legend = new Rickshaw.Graph.Legend( {
- element: document.querySelector('#legend_$id'),
- graph: graph
- } );
- graph.render();
+ CKAN.GA_Reports.render_rickshaw('$id', $items_json, '$mode', '$colorscheme');
});
</script>
</div>
--- a/ckanext/ga_report/templates/ga_report/publisher/index.html
+++ b/ckanext/ga_report/templates/ga_report/publisher/index.html
@@ -20,6 +20,7 @@
<py:def function="optional_head">
<link rel="stylesheet" type="text/css" href="/scripts/vendor/rickshaw.min.css"/>
<link rel="stylesheet" type="text/css" href="/css/ga_report.css"/>
+ <script type="text/javascript" src="/scripts/ckanext_ga_reports.js"></script>
<script type="text/javascript" src="/scripts/vendor/jquery.sparkline.modified.js"></script>
<script type="text/javascript" src="/scripts/vendor/d3.v2.js"></script>
<script type="text/javascript" src="/scripts/vendor/d3.layout.min.js"></script>
--- a/ckanext/ga_report/templates/ga_report/publisher/read.html
+++ b/ckanext/ga_report/templates/ga_report/publisher/read.html
@@ -10,6 +10,7 @@
<py:def function="optional_head">
<link rel="stylesheet" type="text/css" href="/scripts/vendor/rickshaw.min.css"/>
<link rel="stylesheet" type="text/css" href="/css/ga_report.css"/>
+ <script type="text/javascript" src="/scripts/ckanext_ga_reports.js"></script>
<script type="text/javascript" src="/scripts/vendor/jquery.sparkline.modified.js"></script>
<script type="text/javascript" src="/scripts/vendor/d3.v2.js"></script>
<script type="text/javascript" src="/scripts/vendor/d3.layout.min.js"></script>
--- a/ckanext/ga_report/templates/ga_report/site/index.html
+++ b/ckanext/ga_report/templates/ga_report/site/index.html
@@ -10,6 +10,7 @@
<py:def function="optional_head">
<link rel="stylesheet" type="text/css" href="/scripts/vendor/rickshaw.min.css"/>
<link rel="stylesheet" type="text/css" href="/css/ga_report.css"/>
+ <script type="text/javascript" src="/scripts/ckanext_ga_reports.js"></script>
<script type="text/javascript" src="/scripts/vendor/jquery.sparkline.modified.js"></script>
<script type="text/javascript" src="/scripts/vendor/d3.v2.js"></script>
<script type="text/javascript" src="/scripts/vendor/d3.layout.min.js"></script>