Tidy logging.
--- a/ckanext/ga_report/controller.py
+++ b/ckanext/ga_report/controller.py
@@ -22,8 +22,9 @@
def _month_details(cls):
+ '''Returns a list of all the month names'''
months = []
- vals = model.Session.query(cls.period_name).distinct().all()
+ vals = model.Session.query(cls.period_name).filter(cls.period_name!='All').distinct().all()
for m in vals:
months.append( (m[0], _get_month_name(m[0])))
return sorted(months, key=operator.itemgetter(0), reverse=True)
@@ -236,43 +237,23 @@
if count == -1:
count = sys.maxint
- q = model.Session.query(GA_Url)\
+ month = c.month or 'All'
+
+ q = model.Session.query(GA_Url,model.Package)\
+ .filter(model.Package.name==GA_Url.package_id)\
.filter(GA_Url.url.like('/dataset/%'))
if publisher:
q = q.filter(GA_Url.department_id==publisher.name)
- if c.month:
- q = q.filter(GA_Url.period_name==c.month)
+ q = q.filter(GA_Url.period_name==month)
q = q.order_by('ga_url.visitors::int desc')
-
- if c.month:
- top_packages = []
- for entry in q.limit(count):
- package_name = entry.url[len('/dataset/'):]
- p = model.Package.get(package_name)
- if p:
- top_packages.append((p, entry.pageviews, entry.visitors))
- else:
- log.warning('Could not find package "%s"', package_name)
- else:
- ds = {}
- for entry in q:
- if len(ds) >= count:
- break
- package_name = entry.url[len('/dataset/'):]
- p = model.Package.get(package_name)
- if p:
- if not p in ds:
- ds[p] = {'views': 0, 'visits': 0}
- ds[p]['views'] = ds[p]['views'] + int(entry.pageviews)
- ds[p]['visits'] = ds[p]['visits'] + int(entry.visitors)
- else:
- log.warning('Could not find package "%s"', package_name)
-
- results = []
- for k, v in ds.iteritems():
- results.append((k,v['views'],v['visits']))
-
- top_packages = sorted(results, key=operator.itemgetter(1), reverse=True)
+ top_packages = []
+
+ for entry,package in q.limit(count):
+ if package:
+ top_packages.append((package, entry.pageviews, entry.visitors))
+ else:
+ log.warning('Could not find package associated package')
+
return top_packages
def read(self):
@@ -308,15 +289,12 @@
else:
c.month_desc = ''.join([m[1] for m in c.months if m[0]==c.month])
+ month = c.mnth or 'All'
c.publisher_page_views = 0
q = model.Session.query(GA_Url).\
filter(GA_Url.url=='/publisher/%s' % c.publisher_name)
- if c.month:
- entry = q.filter(GA_Url.period_name==c.month).first()
- c.publisher_page_views = entry.pageviews if entry else 0
- else:
- for e in q.all():
- c.publisher_page_views = c.publisher_page_views + int(e.pageviews)
+ entry = q.filter(GA_Url.period_name==c.month).first()
+ c.publisher_page_views = entry.pageviews if entry else 0
c.top_packages = self._get_packages(c.publisher, 20)
--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -92,28 +92,34 @@
def download_and_store(self, periods):
for period_name, period_complete_day, start_date, end_date in periods:
+ log.info('Period "%s" (%s - %s)',
+ self.get_full_period_name(period_name, period_complete_day),
+ start_date.strftime('%Y-%m-%d'),
+ end_date.strftime('%Y-%m-%d'))
+
if self.delete_first:
- log.info('Deleting existing Analytics for period "%s"',
+ log.info('Deleting existing Analytics for this period "%s"',
period_name)
ga_model.delete(period_name)
- log.info('Downloading Analytics for period "%s" (%s - %s)',
- self.get_full_period_name(period_name, period_complete_day),
- start_date.strftime('%Y %m %d'),
- end_date.strftime('%Y %m %d'))
-
+
+ log.info('Downloading analytics for dataset views')
data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+')
- log.info('Storing Dataset Analytics for period "%s"',
- self.get_full_period_name(period_name, period_complete_day))
+
+ log.info('Storing dataset views (%i rows)', len(data.get('url')))
self.store(period_name, period_complete_day, data, )
+ log.info('Downloading analytics for publisher views')
data = self.download(start_date, end_date, '~/publisher/[a-z0-9-_]+')
- log.info('Storing Publisher Analytics for period "%s"',
- self.get_full_period_name(period_name, period_complete_day))
+ log.info('Storing publisher views (%i rows)', len(data.get('url')))
self.store(period_name, period_complete_day, data,)
+ log.info('Aggregating datasets by publisher')
ga_model.update_publisher_stats(period_name) # about 30 seconds.
+
+ log.info('Downloading and storing analytics for site-wide stats')
self.sitewide_stats( period_name )
+ log.info('Downloading and storing analytics for social networks')
self.update_social_info(period_name, start_date, end_date)
def update_social_info(self, period_name, start_date, end_date):
@@ -147,7 +153,7 @@
start_date = start_date.strftime('%Y-%m-%d')
end_date = end_date.strftime('%Y-%m-%d')
query = 'ga:pagePath=%s$' % path
- metrics = 'ga:uniquePageviews, ga:visitors'
+ metrics = 'ga:uniquePageviews, ga:visits'
sort = '-ga:uniquePageviews'
# Supported query params at
@@ -179,12 +185,10 @@
start_date = '%s-01' % period_name
end_date = '%s-%s' % (period_name, last_day_of_month)
- print 'Sitewide_stats for %s (%s -> %s)' % (period_name, start_date, end_date)
-
funcs = ['_totals_stats', '_social_stats', '_os_stats',
'_locale_stats', '_browser_stats', '_mobile_stats']
for f in funcs:
- print ' + Fetching %s stats' % f.split('_')[1]
+ log.info('Downloading analytics for %s' % f.split('_')[1])
getattr(self, f)(start_date, end_date, period_name)
def _get_results(result_data, f):
@@ -209,7 +213,7 @@
results = self.service.data().ga().get(
ids='ga:' + self.profile_id,
start_date=start_date,
- metrics='ga:pageviewsPerVisit,ga:avgTimeOnSite,ga:percentNewVisits,ga:visitors',
+ metrics='ga:pageviewsPerVisit,ga:avgTimeOnSite,ga:percentNewVisits,ga:visits',
max_results=10000,
end_date=end_date).execute()
result_data = results.get('rows')
--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -1,10 +1,10 @@
import re
import uuid
-from sqlalchemy import Table, Column, MetaData
+from sqlalchemy import Table, Column, MetaData, ForeignKey
from sqlalchemy import types
from sqlalchemy.sql import select
-from sqlalchemy.orm import mapper
+from sqlalchemy.orm import mapper, relation
from sqlalchemy import func
import ckan.model as model
@@ -14,8 +14,6 @@
return unicode(uuid.uuid4())
metadata = MetaData()
-
-
class GA_Url(object):
@@ -32,6 +30,7 @@
Column('visitors', types.UnicodeText),
Column('url', types.UnicodeText),
Column('department_id', types.UnicodeText),
+ Column('package_id', types.UnicodeText),
)
mapper(GA_Url, url_table)
@@ -163,6 +162,10 @@
url = _normalize_url(url)
department_id = _get_department_id_of_url(url)
+ package = None
+ if url.startswith('/dataset/'):
+ package = url[len('/dataset/'):]
+
# see if the row for this url & month is in the table already
item = model.Session.query(GA_Url).\
filter(GA_Url.period_name==period_name).\
@@ -172,6 +175,7 @@
item.pageviews = views
item.visitors = visitors
item.department_id = department_id
+ item.package_id = package
model.Session.add(item)
else:
# create the row
@@ -181,9 +185,31 @@
'url': url,
'pageviews': views,
'visitors': visitors,
- 'department_id': department_id
+ 'department_id': department_id,
+ 'package_id': package
}
model.Session.add(GA_Url(**values))
+
+ # We now need to recaculate the ALL time_period from the data we have
+ # Delete the old 'All'
+ old = model.Session.query(GA_Url).\
+ filter(GA_Url.period_name == "All").\
+ filter(GA_Url.url==url).delete()
+
+ items = model.Session.query(GA_Url).\
+ filter(GA_Url.period_name != "All").\
+ filter(GA_Url.url==url).all()
+ values = {'id': make_uuid(),
+ 'period_name': "All",
+ 'period_complete_day': "0",
+ 'url': url,
+ 'pageviews': sum([int(x.pageviews) for x in items]),
+ 'visitors': sum([int(x.visitors) for x in items]),
+ 'department_id': department_id,
+ 'package_id': package
+ }
+ model.Session.add(GA_Url(**values))
+
model.Session.commit()
--- a/ckanext/ga_report/helpers.py
+++ b/ckanext/ga_report/helpers.py
@@ -1,7 +1,9 @@
import logging
import operator
+
import ckan.lib.base as base
import ckan.model as model
+from ckan.logic import get_action
from ckanext.ga_report.ga_model import GA_Url, GA_Publisher
from ckanext.ga_report.controller import _get_publishers
@@ -39,25 +41,38 @@
order_by('ga_url.pageviews::int desc')
num_top_datasets = top_datasets.count()
+ dataset = None
if num_top_datasets:
- dataset = None
+ count = 0
while not dataset:
rand = random.randrange(0, min(top, num_top_datasets))
ga_url = top_datasets[rand]
dataset = model.Package.get(ga_url.url[len('/dataset/'):])
if dataset and not dataset.state == 'active':
dataset = None
- else:
+ count += 1
+ if count > 10:
+ break
+ if not dataset:
+ # fallback
dataset = model.Session.query(model.Package)\
.filter_by(state='active').first()
- publisher = dataset.get_groups('publisher')[0]
- return {
- 'dataset': dataset,
- 'publisher': publisher
- }
+ if not dataset:
+ return None
+ dataset_dict = get_action('package_show')({'model': model,
+ 'session': model.Session},
+ {'id':dataset.id})
+ return dataset_dict
def single_popular_dataset_html(top=20):
- context = single_popular_dataset(top)
+ dataset_dict = single_popular_dataset(top)
+ groups = package.get('groups', [])
+ publishers = [ g for g in groups if g.get('type') == 'publisher' ]
+ publisher = publishers[0] if publishers else {'name':'', 'title': ''}
+ context = {
+ 'dataset': dataset_dict,
+ 'publisher': publisher_dict
+ }
return base.render_snippet('ga_report/ga_popular_single.html', **context)
--- a/ckanext/ga_report/templates/ga_report/ga_popular_single.html
+++ b/ckanext/ga_report/templates/ga_report/ga_popular_single.html
@@ -8,14 +8,14 @@
<h2>Featured dataset</h2>
<div class="dataset-summary boxed">
- <a class="dataset-header" href="${h.url_for(controller='package', action='read', id=dataset.name)}">
- <h3>${dataset.title}</h3>
+ <a class="dataset-header" href="${h.url_for(controller='package', action='read', id=dataset['name'])}">
+ <h3>${dataset['title']}</h3>
</a>
<h4>
<strong>Publisher</strong> :
- <a href="/publisher/${publisher.name}">${publisher.title}</a>
+ <a href="/publisher/${publisher['name']}">${publisher['title']}</a>
</h4>
- <div>${h.truncate(dataset.notes, length=200, whole_word=True)}</div>
+ <div>${h.truncate(dataset['notes_rendered'], length=200, whole_word=True)}</div>
</div>
<div>
<a href="${h.url_for(controller='ckanext.ga_report.controller:GaDatasetReport',action='read')}" class="btn">Other popular datasets</a>
--- a/ckanext/ga_report/templates/ga_report/notes.html
+++ b/ckanext/ga_report/templates/ga_report/notes.html
@@ -6,11 +6,11 @@
<li class="widget-container boxed widget_text">
<h4>Notes</h4>
<ul>
- <li>'Views' is the number of sessions during which that page was viewed one or more times ('Unique Pageviews').</li>
-<!-- <li>'Visits' is the number of individual sessions initiated by all the visitors to your site, counted once for each visitor for each session.</li>-->
- <li>'Visitors' is the number of unique users visiting the site (whether once or more times).</li>
+ <li>"Views" is the number of sessions during which the page was viewed one or more times (technically known as "unique pageviews").</li>
+ <li>"Visits" is the number of unique user visits to a page, counted once for each visitor for each session.</li>
+<!--! <li>"Visitors" is the number of unique users visiting the site (whether once or more times).</li> -->
<li>These usage statistics are confined to users with javascript enabled, which excludes web crawlers and API calls.</li>
- <li>The results for only small numbers of views/visits are not shown. Where these relate to site pages, then they are available in full in the CSV download. Where these relate to users' web browser information, they are not disclosed, for privacy reasons.</li>
+ <li>The results are not shown when the number of views/visits is tiny. Where these relate to site pages, results are available in full in the CSV download. Where these relate to users' web browser information, results are not disclosed, for privacy reasons.</li>
</ul>
</li>
</html>