--- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -9,7 +9,7 @@ import sqlalchemy from sqlalchemy import func, cast, Integer import ckan.model as model -from ga_model import GA_Url, GA_Stat, GA_ReferralStat +from ga_model import GA_Url, GA_Stat, GA_ReferralStat, GA_Publisher log = logging.getLogger('ckanext.ga-report') @@ -22,11 +22,29 @@ def _month_details(cls): + ''' + Returns a list of all the periods for which we have data, unfortunately + knows too much about the type of the cls being passed as GA_Url has a + more complex query + + This may need extending if we add a period_name to the stats + ''' months = [] - vals = model.Session.query(cls.period_name).distinct().all() + day = None + + vals = model.Session.query(cls.period_name,cls.period_complete_day)\ + .filter(cls.period_name!='All').distinct(cls.period_name)\ + .order_by("period_name desc").all() + if vals and vals[0][1]: + day = int(vals[0][1]) + ordinal = 'th' if 11 <= day <= 13 \ + else {1:'st',2:'nd',3:'rd'}.get(day % 10, 'th') + day = "{day}{ordinal}".format(day=day, ordinal=ordinal) + for m in vals: months.append( (m[0], _get_month_name(m[0]))) - return sorted(months, key=operator.itemgetter(0), reverse=True) + + return months, day class GaReport(BaseController): @@ -34,7 +52,7 @@ def csv(self, month): import csv - q = model.Session.query(GA_Stat) + q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name!='Downloads') if month != 'all': q = q.filter(GA_Stat.period_name==month) entries = q.order_by('GA_Stat.period_name, GA_Stat.stat_name, GA_Stat.key').all() @@ -51,11 +69,31 @@ entry.key.encode('utf-8'), entry.value.encode('utf-8')]) + def csv_downloads(self, month): + import csv + + q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name=='Downloads') + if month != 'all': + q = q.filter(GA_Stat.period_name==month) + entries = q.order_by('GA_Stat.period_name, GA_Stat.key').all() + + response.headers['Content-Type'] = "text/csv; charset=utf-8" + response.headers['Content-Disposition'] = str('attachment; filename=downloads_%s.csv' % (month,)) + + writer = csv.writer(response) + writer.writerow(["Period", "Resource URL", "Count"]) + + for entry in entries: + writer.writerow([entry.period_name.encode('utf-8'), + entry.key.encode('utf-8'), + entry.value.encode('utf-8')]) + + def index(self): # Get the month details by fetching distinct values and determining the # month names from the values. - c.months = _month_details(GA_Stat) + c.months, c.day = _month_details(GA_Stat) # Work out which month to show, based on query params of the first item c.month_desc = 'all months' @@ -70,15 +108,15 @@ entries = q.order_by('ga_stat.key').all() def clean_key(key, val): - if key in ['Average time on site', 'Pages per visit', 'New visits']: + if key in ['Average time on site', 'Pages per visit', 'New visits', 'Bounce rate (home page)']: val = "%.2f" % round(float(val), 2) if key == 'Average time on site': mins, secs = divmod(float(val), 60) hours, mins = divmod(mins, 60) val = '%02d:%02d:%02d (%s seconds) ' % (hours, mins, secs, val) - if key == 'New visits': + if key in ['New visits','Bounce rate (home page)']: val = "%s%%" % val - if key in ['Bounces', 'Total page views', 'Total visits']: + if key in ['Total page views', 'Total visits']: val = int(val) return key, val @@ -93,11 +131,12 @@ for e in entries: d[e.key].append(float(e.value)) for k, v in d.iteritems(): - if k in ['Bounces', 'Total page views', 'Total visits']: + if k in ['Total page views', 'Total visits']: v = sum(v) else: - v = float(sum(v))/len(v) + v = float(sum(v))/float(len(v)) key, val = clean_key(k,v) + c.global_totals.append((key, val)) c.global_totals = sorted(c.global_totals, key=operator.itemgetter(0)) @@ -134,29 +173,7 @@ c.social_referrer_totals.append((shorten_name(entry[0]), fill_out_url(entry[0]),'', entry[1])) - - browser_version_re = re.compile("(.*)\((.*)\)") for k, v in keys.iteritems(): - - def clean_field(key): - if k != 'Browser versions': - return key - m = browser_version_re.match(key) - browser = m.groups()[0].strip() - ver = m.groups()[1] - parts = ver.split('.') - if len(parts) > 1: - if parts[1][0] == '0': - ver = parts[0] - else: - ver = "%s.%s" % (parts[0],parts[1]) - if browser in ['Safari','Android Browser']: # Special case complex version nums - ver = parts[0] - if len(ver) > 2: - ver = "%s%sX" % (ver[0], ver[1]) - - return "%s (%s)" % (browser, ver,) - q = model.Session.query(GA_Stat).\ filter(GA_Stat.stat_name==k) if c.month: @@ -172,19 +189,42 @@ entries.append((key,val,)) entries = sorted(entries, key=operator.itemgetter(1), reverse=True) - def percent(num, total): - p = 100 * float(num)/float(total) - return "%.2f%%" % round(p, 2) - # Get the total for each set of values and then set the value as # a percentage of the total if k == 'Social sources': total = sum([x for n,x in c.global_totals if n == 'Total visits']) else: total = sum([num for _,num in entries]) - setattr(c, v, [(k,percent(v,total)) for k,v in entries ]) + setattr(c, v, [(k,_percent(v,total)) for k,v in entries ]) return render('ga_report/site/index.html') + + def downloads(self): + + # Get the month details by fetching distinct values and determining the + # month names from the values. + c.months, c.day = _month_details(GA_Stat) + + # Work out which month to show, based on query params of the first item + c.month_desc = 'all months' + c.month = request.params.get('month', '') + if c.month: + c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) + + c.downloads = [] + q = model.Session.query(GA_Stat).filter(GA_Stat.stat_name=='Downloads') + q = q.filter(GA_Stat.period_name==c.month) if c.month else q + q = q.order_by("ga_stat.value::int desc") + + for entry in q.all(): + print entry.key + r = model.Session.query(model.Resource).filter(model.Resource.url==entry.key).first() + if r: + c.downloads.append((r,entry.value)) + else: + log.info("Failed to find resource for %s" % entry.key) + + return render('ga_report/site/downloads.html') class GaDatasetReport(BaseController): @@ -244,7 +284,7 @@ # Get the month details by fetching distinct values and determining the # month names from the values. - c.months = _month_details(GA_Url) + c.months, c.day = _month_details(GA_Url) # Work out which month to show, based on query params of the first item c.month = request.params.get('month', '') @@ -253,51 +293,29 @@ c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) c.top_publishers = _get_top_publishers() - return render('ga_report/publisher/index.html') def _get_packages(self, publisher=None, count=-1): - '''Returns the datasets in order of visits''' + '''Returns the datasets in order of views''' if count == -1: count = sys.maxint - q = model.Session.query(GA_Url)\ + month = c.month or 'All' + + q = model.Session.query(GA_Url,model.Package)\ + .filter(model.Package.name==GA_Url.package_id)\ .filter(GA_Url.url.like('/dataset/%')) if publisher: q = q.filter(GA_Url.department_id==publisher.name) - if c.month: - q = q.filter(GA_Url.period_name==c.month) - q = q.order_by('ga_url.visitors::int desc') - - if c.month: - top_packages = [] - for entry in q.limit(count): - package_name = entry.url[len('/dataset/'):] - p = model.Package.get(package_name) - if p: - top_packages.append((p, entry.pageviews, entry.visitors)) - else: - log.warning('Could not find package "%s"', package_name) - else: - ds = {} - for entry in q: - if len(ds) >= count: - break - package_name = entry.url[len('/dataset/'):] - p = model.Package.get(package_name) - if p: - if not p in ds: - ds[p] = {'views': 0, 'visits': 0} - ds[p]['views'] = ds[p]['views'] + int(entry.pageviews) - ds[p]['visits'] = ds[p]['visits'] + int(entry.visitors) - else: - log.warning('Could not find package "%s"', package_name) - - results = [] - for k, v in ds.iteritems(): - results.append((k,v['views'],v['visits'])) - - top_packages = sorted(results, key=operator.itemgetter(1), reverse=True) + q = q.filter(GA_Url.period_name==month) + q = q.order_by('ga_url.pageviews::int desc') + top_packages = [] + for entry,package in q.limit(count): + if package: + top_packages.append((package, entry.pageviews, entry.visits)) + else: + log.warning('Could not find package associated package') + return top_packages def read(self): @@ -324,7 +342,7 @@ # Get the month details by fetching distinct values and determining the # month names from the values. - c.months = _month_details(GA_Url) + c.months, c.day = _month_details(GA_Url) # Work out which month to show, based on query params of the first item c.month = request.params.get('month', '') @@ -333,15 +351,12 @@ else: c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) + month = c.month or 'All' c.publisher_page_views = 0 q = model.Session.query(GA_Url).\ filter(GA_Url.url=='/publisher/%s' % c.publisher_name) - if c.month: - entry = q.filter(GA_Url.period_name==c.month).first() - c.publisher_page_views = entry.pageviews if entry else 0 - else: - for e in q.all(): - c.publisher_page_views = c.publisher_page_views + int(e.pageviews) + entry = q.filter(GA_Url.period_name==c.month).first() + c.publisher_page_views = entry.pageviews if entry else 0 c.top_packages = self._get_packages(c.publisher, 20) @@ -352,38 +367,33 @@ Returns a list of the top 20 publishers by dataset visits. (The number to show can be varied with 'limit') ''' + month = c.month or 'All' connection = model.Session.connection() q = """ - select department_id, sum(pageviews::int) views, sum(visitors::int) visits + select department_id, sum(pageviews::int) views, sum(visits::int) visits from ga_url - where department_id <> ''""" - if c.month: - q = q + """ - and period_name=%s - """ - q = q + """ - group by department_id order by visits desc + where department_id <> '' + and package_id <> '' + and url like '/dataset/%%' + and period_name=%s + group by department_id order by views desc """ if limit: q = q + " limit %s;" % (limit) - # Add this back (before and period_name =%s) if you want to ignore publisher - # homepage views - # and not url like '/publisher/%%' - top_publishers = [] - res = connection.execute(q, c.month) - + res = connection.execute(q, month) for row in res: g = model.Group.get(row[0]) if g: top_publishers.append((g, row[1], row[2])) return top_publishers + def _get_publishers(): ''' Returns a list of all publishers. Each item is a tuple: - (names, title) + (name, title) ''' publishers = [] for pub in model.Session.query(model.Group).\ @@ -393,3 +403,7 @@ publishers.append((pub.name, pub.title)) return publishers +def _percent(num, total): + p = 100 * float(num)/float(total) + return "%.2f%%" % round(p, 2) +