1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 | import logging import operator from ckan.lib.base import BaseController, c, render, request, response, abort import sqlalchemy from sqlalchemy import func, cast, Integer import ckan.model as model from ga_model import GA_Url, GA_Stat log = logging.getLogger('ckanext.ga-report') def _get_month_name(strdate): import calendar from time import strptime d = strptime(strdate, '%Y-%m') return '%s %s' % (calendar.month_name[d.tm_mon], d.tm_year) def _month_details(cls): months = [] vals = model.Session.query(cls.period_name).distinct().all() for m in vals: months.append( (m[0], _get_month_name(m[0]))) return sorted(months, key=operator.itemgetter(0), reverse=True) class GaReport(BaseController): def csv(self, month): import csv entries = model.Session.query(GA_Stat).\ filter(GA_Stat.period_name==month).\ order_by('GA_Stat.stat_name, GA_Stat.key').all() response.headers['Content-Type'] = "text/csv; charset=utf-8" writer = csv.writer(response) writer.writerow(["Period", "Statistic", "Key", "Value"]) for entry in entries: writer.writerow([entry.period_name.encode('utf-8'), entry.stat_name.encode('utf-8'), entry.key.encode('utf-8'), entry.value.encode('utf-8')]) def index(self): # Get the month details by fetching distinct values and determining the # month names from the values. c.months = _month_details(GA_Stat) # Work out which month to show, based on query params of the first item c.month = request.params.get('month', c.months[0][0] if c.months else '') c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) entries = model.Session.query(GA_Stat).\ filter(GA_Stat.stat_name=='Totals').\ filter(GA_Stat.period_name==c.month).\ order_by('ga_stat.key').all() c.global_totals = [] for e in entries: val = e.value if e.key in ['Average time on site', 'Pages per visit', 'Percent new visits']: val = "%.2f" % round(float(e.value), 2) if e.key == 'Average time on site': mins, secs = divmod(float(val), 60) hours, mins = divmod(mins, 60) val = '%02d:%02d:%02d (%s seconds) ' % (hours, mins, secs, val) e.key = '%s *' % e.key c.global_totals.append((e.key, val)) keys = { 'Browser versions': 'browsers', 'Operating Systems versions': 'os', 'Social sources': 'social_networks', 'Languages': 'languages', 'Country': 'country' } for k, v in keys.iteritems(): entries = model.Session.query(GA_Stat).\ filter(GA_Stat.stat_name==k).\ filter(GA_Stat.period_name==c.month).\ order_by('ga_stat.value::int desc').all() setattr(c, v, [(s.key, s.value) for s in entries ]) return render('ga_report/site/index.html') class GaPublisherReport(BaseController): """ Displays the pageview and visit count for specific publishers based on the datasets associated with the publisher. """ def index(self): # Get the month details by fetching distinct values and determining the # month names from the values. c.months = _month_details(GA_Url) # Work out which month to show, based on query params of the first item c.month = request.params.get('month', c.months[0][0] if c.months else '') c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) connection = model.Session.connection() q = """ select department_id, sum(pageviews::int) views, sum(visitors::int) visits from ga_url where department_id <> '' and period_name=%s group by department_id order by views desc limit 20; """ # Add this back (before and period_name =%s) if you want to ignore publisher # homepage views # and not url like '/publisher/%%' c.top_publishers = [] res = connection.execute(q, c.month) for row in res: c.top_publishers.append((model.Group.get(row[0]), row[1], row[2])) return render('ga_report/publisher/index.html') def read(self, id): c.publisher = model.Group.get(id) if not c.publisher: abort(404, '404 Not Found') c.top_packages = [] # package, dataset_views in c.top_packages # Get the month details by fetching distinct values and determining the # month names from the values. c.months = _month_details(GA_Url) # Work out which month to show, based on query params of the first item c.month = request.params.get('month', c.months[0][0] if c.months else '') c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) entry = model.Session.query(GA_Url).\ filter(GA_Url.url=='/publisher/%s' % c.publisher.name).\ filter(GA_Url.period_name==c.month).first() c.publisher_page_views = entry.pageviews if entry else 0 entries = model.Session.query(GA_Url).\ filter(GA_Url.department_id==c.publisher.name).\ filter(GA_Url.period_name==c.month).\ order_by('ga_url.pageviews::int desc')[:20] for entry in entries: if entry.url.startswith('/dataset/'): p = model.Package.get(entry.url[len('/dataset/'):]) c.top_packages.append((p,entry.pageviews,entry.visitors)) return render('ga_report/publisher/read.html') |