Initial iteration
[ckanext-ga-report.git] / ckanext / ga_report / download_analytics.py
David Read



















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import logging
import datetime
 
from pylons import config
 
import ga_model
from ga_client import GA
 
log = logging.getLogger('ckanext.ga-report')
 
FORMAT_MONTH = '%Y-%m'
 
class DownloadAnalytics(object):
    '''Downloads and stores analytics info'''
    def __init__(self):
        self.period = config['ga-report.period']
    
    def all_(self):
        pass
    
    def latest(self):
        if self.period == 'monthly':
            # from first of this month to today
            now = datetime.datetime.now()
            first_of_this_month = datetime.datetime(now.year, now.month, 1)
            periods = ((now.strftime(FORMAT_MONTH),
                        now.day,
                        first_of_this_month, now),)
        else:
            raise NotImplementedError
        self.download_and_store(periods)
 
 
    def since_date(self, since_date):
        assert isinstance(since_date, datetime.datetime)
        periods = [] # (period_name, period_complete_day, start_date, end_date)
        if self.period == 'monthly':
            first_of_the_months_until_now = []
            year = since_date.year
            month = since_date.month
            now = datetime.datetime.now()
            first_of_this_month = datetime.datetime(now.year, now.month, 1)
            while True:
                first_of_the_month = datetime.datetime(year, month, 1)
                if first_of_the_month == first_of_this_month:
                    periods.append((now.strftime(FORMAT_MONTH),
                                    now.day,
                                    first_of_this_month, now))
                    break
                elif first_of_the_month < first_of_this_month:
                    in_the_next_month = first_of_the_month + datetime.timedelta(40)
                    last_of_the_month == datetime.datetime(in_the_next_month.year,
                                                           in_the_next_month.month, a)\
                                                           - datetime.timedelta(1)
                    periods.append((now.strftime(FORMAT_MONTH), 0,
                                    first_of_the_month, last_of_the_month))
                else:
                    # first_of_the_month has got to the future somehow
                    break
                month += 1
                if month > 12:
                    year += 1
                    month = 1
        else:
            raise NotImplementedError
        self.download_and_store(periods)
 
    @staticmethod
    def get_full_period_name(period_name, period_complete_day):
        if period_complete_day:
            return period_name + ' (up to %ith)' % period_complete_day
        else:
            return period_name
        
 
    def download_and_store(self, periods):
        for period_name, period_complete_day, start_date, end_date in periods:
            log.info('Downloading Analytics for period "%s" (%s - %s)',
                     self.get_full_period_name(period_name, period_complete_day),
                     start_date.strftime('%Y %m %d'),
                     end_date.strftime('%Y %m %d'))
            data = self.download(start_date, end_date)
            log.info('Storing Analytics for period "%s"',
                     self.get_full_period_name(period_name, period_complete_day))
            self.store(period_name, period_complete_day, data)
 
    @classmethod
    def download(cls, start_date, end_date):
        '''Get data from GA for a given time period'''
        start_date = start_date.strftime('%Y-%m-%d')
        end_date = end_date.strftime('%Y-%m-%d')
        # url
        #query = 'ga:pagePath=~^%s,ga:pagePath=~^%s' % \
        #        (PACKAGE_URL, self.resource_url_tag)
        query = 'ga:pagePath=~^/dataset/'
        metrics = 'ga:uniquePageviews'
        sort = '-ga:uniquePageviews'
        for entry in GA.ga_query(query_filter=query,
                                 from_date=start_date,
                                 metrics=metrics,
                                 sort=sort,
                                 to_date=end_date):
            print entry
            import pdb; pdb.set_trace()
            for dim in entry.dimension:
                if dim.name == "ga:pagePath":
                    package = dim.value
                    count = entry.get_metric(
                        'ga:uniquePageviews').value or 0
                    packages[package] = int(count)
        return packages
 
    def store(self, period_name, period_complete_day, data):
        if 'url' in data:
            ga_model.update_url_stats(period_name, period_complete_day, data['url'])