Initial iteration
[ckanext-ga-report.git] / ckanext / ga_report / download_analytics.py
blob:a/ckanext/ga_report/download_analytics.py -> blob:b/ckanext/ga_report/download_analytics.py
--- a/ckanext/ga_report/download_analytics.py
+++ b/ckanext/ga_report/download_analytics.py
@@ -1,10 +1,12 @@
+import os
 import logging
 import datetime
 
 from pylons import config
 
 import ga_model
-from ga_client import GA
+
+#from ga_client import GA
 
 log = logging.getLogger('ckanext.ga-report')
 
@@ -12,12 +14,16 @@
 
 class DownloadAnalytics(object):
     '''Downloads and stores analytics info'''
-    def __init__(self):
+
+    def __init__(self, service=None, profile_id=None):
         self.period = config['ga-report.period']
-    
+        self.service = service
+        self.profile_id = profile_id
+
+
     def all_(self):
-        pass
-    
+        self.since_date(datetime.datetime(2010, 1, 1))
+
     def latest(self):
         if self.period == 'monthly':
             # from first of this month to today
@@ -49,8 +55,8 @@
                     break
                 elif first_of_the_month < first_of_this_month:
                     in_the_next_month = first_of_the_month + datetime.timedelta(40)
-                    last_of_the_month == datetime.datetime(in_the_next_month.year,
-                                                           in_the_next_month.month, a)\
+                    last_of_the_month = datetime.datetime(in_the_next_month.year,
+                                                           in_the_next_month.month, 1)\
                                                            - datetime.timedelta(1)
                     periods.append((now.strftime(FORMAT_MONTH), 0,
                                     first_of_the_month, last_of_the_month))
@@ -71,7 +77,7 @@
             return period_name + ' (up to %ith)' % period_complete_day
         else:
             return period_name
-        
+
 
     def download_and_store(self, periods):
         for period_name, period_complete_day, start_date, end_date in periods:
@@ -84,31 +90,37 @@
                      self.get_full_period_name(period_name, period_complete_day))
             self.store(period_name, period_complete_day, data)
 
-    @classmethod
-    def download(cls, start_date, end_date):
+
+    def download(self, start_date, end_date):
         '''Get data from GA for a given time period'''
         start_date = start_date.strftime('%Y-%m-%d')
         end_date = end_date.strftime('%Y-%m-%d')
-        # url
-        #query = 'ga:pagePath=~^%s,ga:pagePath=~^%s' % \
-        #        (PACKAGE_URL, self.resource_url_tag)
-        query = 'ga:pagePath=~^/dataset/'
+        query = 'ga:pagePath=~/dataset/[a-z0-9-]+$'
         metrics = 'ga:uniquePageviews'
         sort = '-ga:uniquePageviews'
-        for entry in GA.ga_query(query_filter=query,
-                                 from_date=start_date,
+
+        # Supported query params at
+        # https://developers.google.com/analytics/devguides/reporting/core/v3/reference
+        results = self.service.data().ga().get(
+                                 ids='ga:' + self.profile_id,
+                                 filters=query,
+                                 start_date=start_date,
                                  metrics=metrics,
                                  sort=sort,
-                                 to_date=end_date):
-            print entry
-            import pdb; pdb.set_trace()
-            for dim in entry.dimension:
-                if dim.name == "ga:pagePath":
-                    package = dim.value
-                    count = entry.get_metric(
-                        'ga:uniquePageviews').value or 0
-                    packages[package] = int(count)
-        return packages
+                                 dimensions="ga:pagePath",
+                                 max_results=10000,
+                                 end_date=end_date).execute()
+
+        if os.getenv('DEBUG'):
+            import pprint
+            pprint.pprint(results)
+            print 'Total results: %s' % results.get('totalResults')
+
+        packages = []
+        for entry in results.get('rows'):
+            (loc,size,) = entry
+            packages.append( ('http:/' + loc,size, '',) ) # Temporary hack
+        return dict(url=packages)
 
     def store(self, period_name, period_complete_day, data):
         if 'url' in data: