From: Tom Rees Date: Fri, 25 Jan 2013 12:06:59 +0000 Subject: #167 Refactoring controller's logic to create better Percentage graphs X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=66388ec20aa1030e4cd7333f4ee6f4c75876b1c3 --- #167 Refactoring controller's logic to create better Percentage graphs --- --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -411,49 +411,52 @@ if data==[]: return data # Create a consistent x-axis - num_points = [ len(package['data']) for package in data ] + num_points = [ len(series['data']) for series in data ] ideal_index = num_points.index( max(num_points) ) x_axis = [ point['x'] for point in data[ideal_index]['data'] ] - for package in data: - xs = [ point['x'] for point in package['data'] ] + for series in data: + xs = [ point['x'] for point in series['data'] ] assert set(xs).issubset( set(x_axis) ), (xs, x_axis) # Zero pad any missing values for x in set(x_axis).difference(set(xs)): - package['data'].append( {'x':x, 'y':0} ) - assert len(package['data'])==len(x_axis), (len(package['data']),len(x_axis),package['data'],x_axis,set(x_axis).difference(set(xs))) + series['data'].append( {'x':x, 'y':0} ) + assert len(series['data'])==len(x_axis), (len(series['data']),len(x_axis),series['data'],x_axis,set(x_axis).difference(set(xs))) if percentageMode: + def get_totals(series_list): + totals = {} + for series in series_list: + for point in series['data']: + totals[point['x']] = totals.get(point['x'],0) + point['y'] + lengths = [ len(series['data']) for series in series_list ] + assert len(set(lengths))==1 + assert lengths[0] == len(totals) + return totals # Transform data into percentage stacks - totals = {} - for x in x_axis: - for package in data: - for point in package['data']: - totals[ point['x'] ] = totals.get(point['x'],0) + point['y'] + totals = get_totals(data) # Roll insignificant series into a catch-all THRESHOLD = 0.01 - significant_series = [] - for package in data: - for point in package['data']: + raw_data = data + data = [] + for series in raw_data: + for point in series['data']: fraction = float(point['y']) / totals[point['x']] - if fraction>THRESHOLD and not (package in significant_series): - significant_series.append(package) - temp = {} - for package in data: - if package in significant_series: continue - for point in package['data']: - temp[point['x']] = temp.get(point['x'],0) + point['y'] - catch_all = { 'name':'Other','data': [ {'x':x,'y':y} for x,y in temp.items() ] } - # Roll insignificant series into one - data = significant_series - data.append(catch_all) + if not (series in data) and fraction>THRESHOLD: + data.append(series) + # Overwrite data with a set of intereting series + others = [ x for x in raw_data if not (x in data) ] + data.append({ + 'name':'Other', + 'data': [ {'x':x,'y':y} for x,y in get_totals(others).items() ] + }) # Turn each point into a percentage - for package in data: - for point in package['data']: + for series in data: + for point in series['data']: point['y'] = (point['y']*100) / totals[point['x']] # Sort the points - for package in data: - package['data'] = sorted( package['data'], key=lambda x:x['x'] ) + for series in data: + series['data'] = sorted( series['data'], key=lambda x:x['x'] ) # Strip the latest month's incomplete analytics - package['data'] = package['data'][:-1] + series['data'] = series['data'][:-1] return data