From: Ross Jones Date: Wed, 07 Nov 2012 19:20:34 +0000 Subject: Changed to pageviews instead of unique pageviews X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=4b80158f040905c08d3a097be0f0e929eb9f0afd --- Changed to pageviews instead of unique pageviews --- --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -244,11 +244,11 @@ if publisher: q = q.filter(GA_Url.department_id==publisher.name) q = q.filter(GA_Url.period_name==month) - q = q.order_by('ga_url.visitors::int desc') + q = q.order_by('ga_url.visits::int desc') top_packages = [] for entry,package in q.limit(count): if package: - top_packages.append((package, entry.pageviews, entry.visitors)) + top_packages.append((package, entry.pageviews, entry.visits)) else: log.warning('Could not find package associated package') @@ -306,9 +306,11 @@ month = c.month or 'All' connection = model.Session.connection() q = """ - select department_id, sum(pageviews::int) views, sum(visitors::int) visits + select department_id, sum(pageviews::int) views, sum(visits::int) visits from ga_url where department_id <> '' + and package_id <> '' + and url like '/dataset/%%' and period_name=%s group by department_id order by visits desc """ --- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -96,7 +96,7 @@ self.get_full_period_name(period_name, period_complete_day), start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')) - + if self.delete_first: log.info('Deleting existing Analytics for this period "%s"', period_name) @@ -159,8 +159,8 @@ start_date = start_date.strftime('%Y-%m-%d') end_date = end_date.strftime('%Y-%m-%d') query = 'ga:pagePath=%s$' % path - metrics = 'ga:uniquePageviews, ga:visits' - sort = '-ga:uniquePageviews' + metrics = 'ga:pageviews, ga:visits' + sort = '-ga:pageviews' # Supported query params at # https://developers.google.com/analytics/devguides/reporting/core/v3/reference @@ -212,8 +212,8 @@ results = self.service.data().ga().get( ids='ga:' + self.profile_id, start_date=start_date, - metrics='ga:uniquePageviews', - sort='-ga:uniquePageviews', + metrics='ga:pageviews', + sort='-ga:pageviews', max_results=10000, end_date=end_date).execute() result_data = results.get('rows') @@ -242,7 +242,7 @@ ids='ga:' + self.profile_id, filters='ga:pagePath=~%s$' % (path,), start_date=start_date, - metrics='ga:bounces,ga:uniquePageviews', + metrics='ga:bounces,ga:pageviews', dimensions='ga:pagePath', max_results=10000, end_date=end_date).execute() @@ -260,8 +260,8 @@ results = self.service.data().ga().get( ids='ga:' + self.profile_id, start_date=start_date, - metrics='ga:uniquePageviews', - sort='-ga:uniquePageviews', + metrics='ga:pageviews', + sort='-ga:pageviews', dimensions="ga:language,ga:country", max_results=10000, end_date=end_date).execute() @@ -284,8 +284,8 @@ results = self.service.data().ga().get( ids='ga:' + self.profile_id, start_date=start_date, - metrics='ga:uniquePageviews', - sort='-ga:uniquePageviews', + metrics='ga:pageviews', + sort='-ga:pageviews', dimensions="ga:socialNetwork,ga:referralPath", max_results=10000, end_date=end_date).execute() @@ -303,8 +303,8 @@ results = self.service.data().ga().get( ids='ga:' + self.profile_id, start_date=start_date, - metrics='ga:uniquePageviews', - sort='-ga:uniquePageviews', + metrics='ga:pageviews', + sort='-ga:pageviews', dimensions="ga:operatingSystem,ga:operatingSystemVersion", max_results=10000, end_date=end_date).execute() @@ -328,8 +328,8 @@ results = self.service.data().ga().get( ids='ga:' + self.profile_id, start_date=start_date, - metrics='ga:uniquePageviews', - sort='-ga:uniquePageviews', + metrics='ga:pageviews', + sort='-ga:pageviews', dimensions="ga:browser,ga:browserVersion", max_results=10000, end_date=end_date).execute() @@ -377,8 +377,8 @@ results = self.service.data().ga().get( ids='ga:' + self.profile_id, start_date=start_date, - metrics='ga:uniquePageviews', - sort='-ga:uniquePageviews', + metrics='ga:pageviews', + sort='-ga:pageviews', dimensions="ga:mobileDeviceBranding, ga:mobileDeviceInfo", max_results=10000, end_date=end_date).execute() --- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -27,7 +27,7 @@ Column('period_name', types.UnicodeText), Column('period_complete_day', types.Integer), Column('pageviews', types.UnicodeText), - Column('visitors', types.UnicodeText), + Column('visits', types.UnicodeText), Column('url', types.UnicodeText), Column('department_id', types.UnicodeText), Column('package_id', types.UnicodeText), @@ -63,7 +63,7 @@ Column('period_name', types.UnicodeText), Column('publisher_name', types.UnicodeText), Column('views', types.UnicodeText), - Column('visitors', types.UnicodeText), + Column('visits', types.UnicodeText), Column('toplevel', types.Boolean, default=False), Column('subpublishercount', types.Integer, default=0), Column('parent', types.UnicodeText), @@ -116,7 +116,7 @@ return '/' + '/'.join(url.split('/')[2:]) -def _get_department_id_of_url(url): +def _get_package_and_publisher(url): # e.g. /dataset/fuel_prices # e.g. /dataset/fuel_prices/resource/e63380d4 dataset_match = re.match('/dataset/([^/]+)(/.*)?', url) @@ -126,12 +126,13 @@ if dataset: publisher_groups = dataset.get_groups('publisher') if publisher_groups: - return publisher_groups[0].name + return dataset_ref,publisher_groups[0].name + return dataset_ref, None else: publisher_match = re.match('/publisher/([^/]+)(/.*)?', url) if publisher_match: - return publisher_match.groups()[0] - + return None, publisher_match.groups()[0] + return None, None def update_sitewide_stats(period_name, stat_name, data): for k,v in data.iteritems(): @@ -156,25 +157,6 @@ model.Session.commit() -def update_url_stat_totals(period_name): - - """ - items = model.Session.query(GA_Url).\ - filter(GA_Url.period_name != "All").\ - filter(GA_Url.url==url).all() - values = {'id': make_uuid(), - 'period_name': "All", - 'period_complete_day': "0", - 'url': url, - 'pageviews': sum([int(x.pageviews) for x in items]), - 'visitors': sum([int(x.visitors) for x in items]), - 'department_id': department_id, - 'package_id': package - } - model.Session.add(GA_Url(**values)) - model.Session.commit() - """ - def pre_update_url_stats(period_name): model.Session.query(GA_Url).\ filter(GA_Url.period_name==period_name).delete() @@ -183,31 +165,43 @@ def update_url_stats(period_name, period_complete_day, url_data): - ''' - Given a list of urls and number of hits for each during a given period, - stores them in GA_Url under the period and recalculates the totals for - the 'All' period. - ''' - for url, views, visitors in url_data: - department_id = _get_department_id_of_url(url) - - package = None - if url.startswith('/dataset/'): - package = url[len('/dataset/'):] - - values = {'id': make_uuid(), - 'period_name': period_name, - 'period_complete_day': period_complete_day, - 'url': url, - 'pageviews': views, - 'visitors': visitors, - 'department_id': department_id, - 'package_id': package - } - model.Session.add(GA_Url(**values)) + + for url, views, visits in url_data: + package, publisher = _get_package_and_publisher(url) + + + item = model.Session.query(GA_Url).\ + filter(GA_Url.period_name==period_name).\ + filter(GA_Url.url==url).first() + if item: + item.pageviews = item.pageviews + views + item.visits = item.visits + visits + if not item.package_id: + item.package_id = package + if not item.department_id: + item.department_id = publisher + model.Session.add(item) + else: + values = {'id': make_uuid(), + 'period_name': period_name, + 'period_complete_day': period_complete_day, + 'url': url, + 'pageviews': views, + 'visits': visits, + 'department_id': publisher, + 'package_id': package + } + model.Session.add(GA_Url(**values)) model.Session.commit() if package: + old_pageviews, old_visits = 0, 0 + old = model.Session.query(GA_Url).\ + filter(GA_Url.period_name=='All').\ + filter(GA_Url.url==url).all() + old_pageviews = sum([int(o.pageviews) for o in old]) + old_visits = sum([int(o.visits) for o in old]) + entries = model.Session.query(GA_Url).\ filter(GA_Url.period_name!='All').\ filter(GA_Url.url==url).all() @@ -215,14 +209,14 @@ 'period_name': 'All', 'period_complete_day': 0, 'url': url, - 'pageviews': sum([int(e.pageviews) for e in entries]), - 'visitors': sum([int(e.visitors) for e in entries]), - 'department_id': department_id, + 'pageviews': sum([int(e.pageviews) for e in entries]) + old_pageviews, + 'visits': sum([int(e.visits) for e in entries]) + old_visits, + 'department_id': publisher, 'package_id': package } + model.Session.add(GA_Url(**values)) model.Session.commit() - @@ -266,7 +260,7 @@ filter(model.Group.type=='publisher').\ filter(model.Group.state=='active').all() for publisher in publishers: - views, visitors, subpub = update_publisher(period_name, publisher, publisher.name) + views, visits, subpub = update_publisher(period_name, publisher, publisher.name) parent, parents = '', publisher.get_groups('publisher') if parents: parent = parents[0].name @@ -275,7 +269,7 @@ filter(GA_Publisher.publisher_name==publisher.name).first() if item: item.views = views - item.visitors = visitors + item.visits = visits item.publisher_name = publisher.name item.toplevel = publisher in toplevel item.subpublishercount = subpub @@ -287,7 +281,7 @@ 'period_name': period_name, 'publisher_name': publisher.name, 'views': views, - 'visitors': visitors, + 'visits': visits, 'toplevel': publisher in toplevel, 'subpublishercount': subpub, 'parent': parent @@ -297,7 +291,7 @@ def update_publisher(period_name, pub, part=''): - views,visitors,subpub = 0, 0, 0 + views,visits,subpub = 0, 0, 0 for publisher in go_down_tree(pub): subpub = subpub + 1 items = model.Session.query(GA_Url).\ @@ -305,9 +299,9 @@ filter(GA_Url.department_id==publisher.name).all() for item in items: views = views + int(item.pageviews) - visitors = visitors + int(item.visitors) - - return views, visitors, (subpub-1) + visits = visits + int(item.visits) + + return views, visits, (subpub-1) def get_top_level(): --- a/ckanext/ga_report/helpers.py +++ b/ckanext/ga_report/helpers.py @@ -60,8 +60,7 @@ if not dataset: return None dataset_dict = get_action('package_show')({'model': model, - 'session': model.Session, - 'validate': False}, + 'session': model.Session}, {'id':dataset.id}) return dataset_dict @@ -106,7 +105,7 @@ if not p in datasets: datasets[p] = {'views':0, 'visits': 0} datasets[p]['views'] = datasets[p]['views'] + int(entry.pageviews) - datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visitors) + datasets[p]['visits'] = datasets[p]['visits'] + int(entry.visits) results = [] for k, v in datasets.iteritems(): --- a/ckanext/ga_report/templates/ga_report/notes.html +++ b/ckanext/ga_report/templates/ga_report/notes.html @@ -8,7 +8,6 @@