Token is now referenced in the config, making it easier for the gov_daily script to kick off downloads.
--- a/README.rst
+++ b/README.rst
@@ -32,6 +32,7 @@
googleanalytics.id = UA-1010101-1
googleanalytics.account = Account name (e.g. data.gov.uk, see top level item at https://www.google.com/analytics)
+ googleanalytics.token.filepath = ~/pyenv/token.dat
ga-report.period = monthly
ga-report.bounce_url = /
@@ -82,13 +83,17 @@
$ paster getauthtoken --config=../ckan/development.ini
+Now ensure you reference the correct path to your token.dat in your CKAN config file (e.g. development.ini)::
+
+ googleanalytics.token.filepath = ~/pyenv/token.dat
+
Tutorial
--------
Download some GA data and store it in CKAN's database. (Ensure your CKAN pyenv is still activated, run the command from ``src/ckanext-ga-report``, alter the ``--config`` option to point to your site config file) and specifying the name of your auth file (token.dat by default) from the previous step::
- $ paster loadanalytics token.dat latest --config=../ckan/development.ini
+ $ paster loadanalytics latest --config=../ckan/development.ini
The value after the token file is how much data you want to retrieve, this can be
--- a/ckanext/ga_report/command.py
+++ b/ckanext/ga_report/command.py
@@ -1,5 +1,8 @@
import logging
import datetime
+import os
+
+from pylons import config
from ckan.lib.cli import CkanCommand
# No other CKAN imports allowed until _load_config is run,
@@ -58,20 +61,17 @@
"""Get data from Google Analytics API and save it
in the ga_model
- Usage: paster loadanalytics <tokenfile> <time-period>
+ Usage: paster loadanalytics <time-period>
- Where <tokenfile> is the name of the auth token file from
- the getauthtoken step.
-
- And where <time-period> is:
+ Where <time-period> is:
all - data for all time
latest - (default) just the 'latest' data
YYYY-MM - just data for the specific month
"""
summary = __doc__.split('\n')[0]
usage = __doc__
- max_args = 2
- min_args = 1
+ max_args = 1
+ min_args = 0
def __init__(self, name):
super(LoadAnalytics, self).__init__(name)
@@ -92,19 +92,25 @@
from download_analytics import DownloadAnalytics
from ga_auth import (init_service, get_profile_id)
+ ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', ''))
+ if not ga_token_filepath:
+ print 'ERROR: In the CKAN config you need to specify the filepath of the ' \
+ 'Google Analytics token file under key: googleanalytics.token.filepath'
+ return
+
try:
- svc = init_service(self.args[0], None)
+ svc = init_service(ga_token_filepath, None)
except TypeError:
print ('Have you correctly run the getauthtoken task and '
- 'specified the correct token file?')
+ 'specified the correct token file in the CKAN config under '
+ '"googleanalytics.token.filepath"?')
return
downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc),
delete_first=self.options.delete_first,
skip_url_stats=self.options.skip_url_stats)
- time_period = self.args[1] if self.args and len(self.args) > 1 \
- else 'latest'
+ time_period = self.args[0] if self.args else 'latest'
if time_period == 'all':
downloader.all_()
elif time_period == 'latest':
--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -9,6 +9,8 @@
import ckan.model as model
from ckan.lib.base import *
+
+log = __import__('logging').getLogger(__name__)
def make_uuid():
return unicode(uuid.uuid4())
@@ -212,7 +214,7 @@
'period_complete_day': 0,
'url': url,
'pageviews': sum([int(e.pageviews) for e in entries]) + old_pageviews,
- 'visits': sum([int(e.visits) for e in entries]) + old_visits,
+ 'visits': sum([int(e.visits or 0) for e in entries]) + old_visits,
'department_id': publisher,
'package_id': package
}
@@ -344,20 +346,33 @@
model.Session.commit()
def get_score_for_dataset(dataset_name):
+ '''
+ Returns a "current popularity" score for a dataset,
+ based on how many views it has had recently.
+ '''
import datetime
now = datetime.datetime.now()
- period_names = ['%s-%02d' % (now.year, now.month),
- '%s-%02d' % (now.year, now.month-1)]
-
- entry = model.Session.query(GA_Url)\
- .filter(GA_Url.period_name==period_names[0])\
- .filter(GA_Url.package_id==dataset_name).first()
- score = int(entry.pageviews) if entry else 0
-
- entry = model.Session.query(GA_Url)\
- .filter(GA_Url.period_name==period_names[1])\
- .filter(GA_Url.package_id==dataset_name).first()
- val = int(entry.pageviews) if entry else 0
- score += val/2 if val else 0
-
- return 0
+ last_month = now - datetime.timedelta(days=30)
+ period_names = ['%s-%02d' % (last_month.year, last_month.month),
+ '%s-%02d' % (now.year, now.month),
+ ]
+
+ score = 0
+ for period_name in period_names:
+ score /= 2 # previous periods are discounted by 50%
+ entry = model.Session.query(GA_Url)\
+ .filter(GA_Url.period_name==period_name)\
+ .filter(GA_Url.package_id==dataset_name).first()
+ # score
+ if entry:
+ views = float(entry.pageviews)
+ if entry.period_complete_day:
+ views_per_day = views / entry.period_complete_day
+ else:
+ views_per_day = views / 15 # guess
+ score += views_per_day
+
+ score = int(score * 100)
+ log.debug('Popularity %s: %s', score, dataset_name)
+ return score
+