Initial iteration
[ckanext-ga-report.git] / ckanext / ga_report / download_analytics.py
blob:a/ckanext/ga_report/download_analytics.py -> blob:b/ckanext/ga_report/download_analytics.py
--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -1,1 +1,116 @@
+import logging
+import datetime
 
+from pylons import config
+
+import ga_model
+from ga_client import GA
+
+log = logging.getLogger('ckanext.ga-report')
+
+FORMAT_MONTH = '%Y-%m'
+
+class DownloadAnalytics(object):
+    '''Downloads and stores analytics info'''
+    def __init__(self):
+        self.period = config['ga-report.period']
+    
+    def all_(self):
+        pass
+    
+    def latest(self):
+        if self.period == 'monthly':
+            # from first of this month to today
+            now = datetime.datetime.now()
+            first_of_this_month = datetime.datetime(now.year, now.month, 1)
+            periods = ((now.strftime(FORMAT_MONTH),
+                        now.day,
+                        first_of_this_month, now),)
+        else:
+            raise NotImplementedError
+        self.download_and_store(periods)
+
+
+    def since_date(self, since_date):
+        assert isinstance(since_date, datetime.datetime)
+        periods = [] # (period_name, period_complete_day, start_date, end_date)
+        if self.period == 'monthly':
+            first_of_the_months_until_now = []
+            year = since_date.year
+            month = since_date.month
+            now = datetime.datetime.now()
+            first_of_this_month = datetime.datetime(now.year, now.month, 1)
+            while True:
+                first_of_the_month = datetime.datetime(year, month, 1)
+                if first_of_the_month == first_of_this_month:
+                    periods.append((now.strftime(FORMAT_MONTH),
+                                    now.day,
+                                    first_of_this_month, now))
+                    break
+                elif first_of_the_month < first_of_this_month:
+                    in_the_next_month = first_of_the_month + datetime.timedelta(40)
+                    last_of_the_month == datetime.datetime(in_the_next_month.year,
+                                                           in_the_next_month.month, a)\
+                                                           - datetime.timedelta(1)
+                    periods.append((now.strftime(FORMAT_MONTH), 0,
+                                    first_of_the_month, last_of_the_month))
+                else:
+                    # first_of_the_month has got to the future somehow
+                    break
+                month += 1
+                if month > 12:
+                    year += 1
+                    month = 1
+        else:
+            raise NotImplementedError
+        self.download_and_store(periods)
+
+    @staticmethod
+    def get_full_period_name(period_name, period_complete_day):
+        if period_complete_day:
+            return period_name + ' (up to %ith)' % period_complete_day
+        else:
+            return period_name
+        
+
+    def download_and_store(self, periods):
+        for period_name, period_complete_day, start_date, end_date in periods:
+            log.info('Downloading Analytics for period "%s" (%s - %s)',
+                     self.get_full_period_name(period_name, period_complete_day),
+                     start_date.strftime('%Y %m %d'),
+                     end_date.strftime('%Y %m %d'))
+            data = self.download(start_date, end_date)
+            log.info('Storing Analytics for period "%s"',
+                     self.get_full_period_name(period_name, period_complete_day))
+            self.store(period_name, period_complete_day, data)
+
+    @classmethod
+    def download(cls, start_date, end_date):
+        '''Get data from GA for a given time period'''
+        start_date = start_date.strftime('%Y-%m-%d')
+        end_date = end_date.strftime('%Y-%m-%d')
+        # url
+        #query = 'ga:pagePath=~^%s,ga:pagePath=~^%s' % \
+        #        (PACKAGE_URL, self.resource_url_tag)
+        query = 'ga:pagePath=~^/dataset/'
+        metrics = 'ga:uniquePageviews'
+        sort = '-ga:uniquePageviews'
+        for entry in GA.ga_query(query_filter=query,
+                                 from_date=start_date,
+                                 metrics=metrics,
+                                 sort=sort,
+                                 to_date=end_date):
+            print entry
+            import pdb; pdb.set_trace()
+            for dim in entry.dimension:
+                if dim.name == "ga:pagePath":
+                    package = dim.value
+                    count = entry.get_metric(
+                        'ga:uniquePageviews').value or 0
+                    packages[package] = int(count)
+        return packages
+
+    def store(self, period_name, period_complete_day, data):
+        if 'url' in data:
+            ga_model.update_url_stats(period_name, period_complete_day, data['url'])
+