From: David Read Date: Wed, 07 Nov 2012 13:38:29 +0000 Subject: Tidy logging. X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-ga-report.git&a=commitdiff&h=753e746cceb78d2cb57df91505b36e76fa4ad38e --- Tidy logging. --- --- a/ckanext/ga_report/controller.py +++ b/ckanext/ga_report/controller.py @@ -22,8 +22,9 @@ def _month_details(cls): + '''Returns a list of all the month names''' months = [] - vals = model.Session.query(cls.period_name).distinct().all() + vals = model.Session.query(cls.period_name).filter(cls.period_name!='All').distinct().all() for m in vals: months.append( (m[0], _get_month_name(m[0]))) return sorted(months, key=operator.itemgetter(0), reverse=True) @@ -236,43 +237,23 @@ if count == -1: count = sys.maxint - q = model.Session.query(GA_Url)\ + month = c.month or 'All' + + q = model.Session.query(GA_Url,model.Package)\ + .filter(model.Package.name==GA_Url.package_id)\ .filter(GA_Url.url.like('/dataset/%')) if publisher: q = q.filter(GA_Url.department_id==publisher.name) - if c.month: - q = q.filter(GA_Url.period_name==c.month) + q = q.filter(GA_Url.period_name==month) q = q.order_by('ga_url.visitors::int desc') - - if c.month: - top_packages = [] - for entry in q.limit(count): - package_name = entry.url[len('/dataset/'):] - p = model.Package.get(package_name) - if p: - top_packages.append((p, entry.pageviews, entry.visitors)) - else: - log.warning('Could not find package "%s"', package_name) - else: - ds = {} - for entry in q: - if len(ds) >= count: - break - package_name = entry.url[len('/dataset/'):] - p = model.Package.get(package_name) - if p: - if not p in ds: - ds[p] = {'views': 0, 'visits': 0} - ds[p]['views'] = ds[p]['views'] + int(entry.pageviews) - ds[p]['visits'] = ds[p]['visits'] + int(entry.visitors) - else: - log.warning('Could not find package "%s"', package_name) - - results = [] - for k, v in ds.iteritems(): - results.append((k,v['views'],v['visits'])) - - top_packages = sorted(results, key=operator.itemgetter(1), reverse=True) + top_packages = [] + + for entry,package in q.limit(count): + if package: + top_packages.append((package, entry.pageviews, entry.visitors)) + else: + log.warning('Could not find package associated package') + return top_packages def read(self): @@ -308,15 +289,12 @@ else: c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month]) + month = c.mnth or 'All' c.publisher_page_views = 0 q = model.Session.query(GA_Url).\ filter(GA_Url.url=='/publisher/%s' % c.publisher_name) - if c.month: - entry = q.filter(GA_Url.period_name==c.month).first() - c.publisher_page_views = entry.pageviews if entry else 0 - else: - for e in q.all(): - c.publisher_page_views = c.publisher_page_views + int(e.pageviews) + entry = q.filter(GA_Url.period_name==c.month).first() + c.publisher_page_views = entry.pageviews if entry else 0 c.top_packages = self._get_packages(c.publisher, 20) --- a/ckanext/ga_report/download_analytics.py +++ b/ckanext/ga_report/download_analytics.py @@ -92,28 +92,34 @@ def download_and_store(self, periods): for period_name, period_complete_day, start_date, end_date in periods: + log.info('Period "%s" (%s - %s)', + self.get_full_period_name(period_name, period_complete_day), + start_date.strftime('%Y-%m-%d'), + end_date.strftime('%Y-%m-%d')) + if self.delete_first: - log.info('Deleting existing Analytics for period "%s"', + log.info('Deleting existing Analytics for this period "%s"', period_name) ga_model.delete(period_name) - log.info('Downloading Analytics for period "%s" (%s - %s)', - self.get_full_period_name(period_name, period_complete_day), - start_date.strftime('%Y %m %d'), - end_date.strftime('%Y %m %d')) - + + log.info('Downloading analytics for dataset views') data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+') - log.info('Storing Dataset Analytics for period "%s"', - self.get_full_period_name(period_name, period_complete_day)) + + log.info('Storing dataset views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data, ) + log.info('Downloading analytics for publisher views') data = self.download(start_date, end_date, '~/publisher/[a-z0-9-_]+') - log.info('Storing Publisher Analytics for period "%s"', - self.get_full_period_name(period_name, period_complete_day)) + log.info('Storing publisher views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data,) + log.info('Aggregating datasets by publisher') ga_model.update_publisher_stats(period_name) # about 30 seconds. + + log.info('Downloading and storing analytics for site-wide stats') self.sitewide_stats( period_name ) + log.info('Downloading and storing analytics for social networks') self.update_social_info(period_name, start_date, end_date) def update_social_info(self, period_name, start_date, end_date): @@ -147,7 +153,7 @@ start_date = start_date.strftime('%Y-%m-%d') end_date = end_date.strftime('%Y-%m-%d') query = 'ga:pagePath=%s$' % path - metrics = 'ga:uniquePageviews, ga:visitors' + metrics = 'ga:uniquePageviews, ga:visits' sort = '-ga:uniquePageviews' # Supported query params at @@ -179,12 +185,10 @@ start_date = '%s-01' % period_name end_date = '%s-%s' % (period_name, last_day_of_month) - print 'Sitewide_stats for %s (%s -> %s)' % (period_name, start_date, end_date) - funcs = ['_totals_stats', '_social_stats', '_os_stats', '_locale_stats', '_browser_stats', '_mobile_stats'] for f in funcs: - print ' + Fetching %s stats' % f.split('_')[1] + log.info('Downloading analytics for %s' % f.split('_')[1]) getattr(self, f)(start_date, end_date, period_name) def _get_results(result_data, f): @@ -209,7 +213,7 @@ results = self.service.data().ga().get( ids='ga:' + self.profile_id, start_date=start_date, - metrics='ga:pageviewsPerVisit,ga:avgTimeOnSite,ga:percentNewVisits,ga:visitors', + metrics='ga:pageviewsPerVisit,ga:avgTimeOnSite,ga:percentNewVisits,ga:visits', max_results=10000, end_date=end_date).execute() result_data = results.get('rows') --- a/ckanext/ga_report/ga_model.py +++ b/ckanext/ga_report/ga_model.py @@ -1,10 +1,10 @@ import re import uuid -from sqlalchemy import Table, Column, MetaData +from sqlalchemy import Table, Column, MetaData, ForeignKey from sqlalchemy import types from sqlalchemy.sql import select -from sqlalchemy.orm import mapper +from sqlalchemy.orm import mapper, relation from sqlalchemy import func import ckan.model as model @@ -14,8 +14,6 @@ return unicode(uuid.uuid4()) metadata = MetaData() - - class GA_Url(object): @@ -32,6 +30,7 @@ Column('visitors', types.UnicodeText), Column('url', types.UnicodeText), Column('department_id', types.UnicodeText), + Column('package_id', types.UnicodeText), ) mapper(GA_Url, url_table) @@ -163,6 +162,10 @@ url = _normalize_url(url) department_id = _get_department_id_of_url(url) + package = None + if url.startswith('/dataset/'): + package = url[len('/dataset/'):] + # see if the row for this url & month is in the table already item = model.Session.query(GA_Url).\ filter(GA_Url.period_name==period_name).\ @@ -172,6 +175,7 @@ item.pageviews = views item.visitors = visitors item.department_id = department_id + item.package_id = package model.Session.add(item) else: # create the row @@ -181,9 +185,31 @@ 'url': url, 'pageviews': views, 'visitors': visitors, - 'department_id': department_id + 'department_id': department_id, + 'package_id': package } model.Session.add(GA_Url(**values)) + + # We now need to recaculate the ALL time_period from the data we have + # Delete the old 'All' + old = model.Session.query(GA_Url).\ + filter(GA_Url.period_name == "All").\ + filter(GA_Url.url==url).delete() + + items = model.Session.query(GA_Url).\ + filter(GA_Url.period_name != "All").\ + filter(GA_Url.url==url).all() + values = {'id': make_uuid(), + 'period_name': "All", + 'period_complete_day': "0", + 'url': url, + 'pageviews': sum([int(x.pageviews) for x in items]), + 'visitors': sum([int(x.visitors) for x in items]), + 'department_id': department_id, + 'package_id': package + } + model.Session.add(GA_Url(**values)) + model.Session.commit() --- a/ckanext/ga_report/helpers.py +++ b/ckanext/ga_report/helpers.py @@ -1,7 +1,9 @@ import logging import operator + import ckan.lib.base as base import ckan.model as model +from ckan.logic import get_action from ckanext.ga_report.ga_model import GA_Url, GA_Publisher from ckanext.ga_report.controller import _get_publishers @@ -39,25 +41,38 @@ order_by('ga_url.pageviews::int desc') num_top_datasets = top_datasets.count() + dataset = None if num_top_datasets: - dataset = None + count = 0 while not dataset: rand = random.randrange(0, min(top, num_top_datasets)) ga_url = top_datasets[rand] dataset = model.Package.get(ga_url.url[len('/dataset/'):]) if dataset and not dataset.state == 'active': dataset = None - else: + count += 1 + if count > 10: + break + if not dataset: + # fallback dataset = model.Session.query(model.Package)\ .filter_by(state='active').first() - publisher = dataset.get_groups('publisher')[0] - return { - 'dataset': dataset, - 'publisher': publisher - } + if not dataset: + return None + dataset_dict = get_action('package_show')({'model': model, + 'session': model.Session}, + {'id':dataset.id}) + return dataset_dict def single_popular_dataset_html(top=20): - context = single_popular_dataset(top) + dataset_dict = single_popular_dataset(top) + groups = package.get('groups', []) + publishers = [ g for g in groups if g.get('type') == 'publisher' ] + publisher = publishers[0] if publishers else {'name':'', 'title': ''} + context = { + 'dataset': dataset_dict, + 'publisher': publisher_dict + } return base.render_snippet('ga_report/ga_popular_single.html', **context) --- a/ckanext/ga_report/templates/ga_report/ga_popular_single.html +++ b/ckanext/ga_report/templates/ga_report/ga_popular_single.html @@ -8,14 +8,14 @@

Featured dataset

- -

${dataset.title}

+
+

${dataset['title']}

Publisher : - ${publisher.title} + ${publisher['title']}

-
${h.truncate(dataset.notes, length=200, whole_word=True)}
+
${h.truncate(dataset['notes_rendered'], length=200, whole_word=True)}
Other popular datasets --- a/ckanext/ga_report/templates/ga_report/notes.html +++ b/ckanext/ga_report/templates/ga_report/notes.html @@ -6,11 +6,11 @@
  • Notes

      -
    • 'Views' is the number of sessions during which that page was viewed one or more times ('Unique Pageviews').
    • - -
    • 'Visitors' is the number of unique users visiting the site (whether once or more times).
    • +
    • "Views" is the number of sessions during which the page was viewed one or more times (technically known as "unique pageviews").
    • +
    • "Visits" is the number of unique user visits to a page, counted once for each visitor for each session.
    • +
    • These usage statistics are confined to users with javascript enabled, which excludes web crawlers and API calls.
    • -
    • The results for only small numbers of views/visits are not shown. Where these relate to site pages, then they are available in full in the CSV download. Where these relate to users' web browser information, they are not disclosed, for privacy reasons.
    • +
    • The results are not shown when the number of views/visits is tiny. Where these relate to site pages, results are available in full in the CSV download. Where these relate to users' web browser information, results are not disclosed, for privacy reasons.