Initial iteration
[ckanext-ga-report.git] / ckanext / ga_report / download_analytics.py
blob:a/ckanext/ga_report/download_analytics.py -> blob:b/ckanext/ga_report/download_analytics.py
  import logging
  import datetime
   
  from pylons import config
   
  import ga_model
  from ga_client import GA
   
  log = logging.getLogger('ckanext.ga-report')
   
  FORMAT_MONTH = '%Y-%m'
   
  class DownloadAnalytics(object):
  '''Downloads and stores analytics info'''
  def __init__(self):
  self.period = config['ga-report.period']
   
  def all_(self):
  pass
   
  def latest(self):
  if self.period == 'monthly':
  # from first of this month to today
  now = datetime.datetime.now()
  first_of_this_month = datetime.datetime(now.year, now.month, 1)
  periods = ((now.strftime(FORMAT_MONTH),
  now.day,
  first_of_this_month, now),)
  else:
  raise NotImplementedError
  self.download_and_store(periods)
   
   
  def since_date(self, since_date):
  assert isinstance(since_date, datetime.datetime)
  periods = [] # (period_name, period_complete_day, start_date, end_date)
  if self.period == 'monthly':
  first_of_the_months_until_now = []
  year = since_date.year
  month = since_date.month
  now = datetime.datetime.now()
  first_of_this_month = datetime.datetime(now.year, now.month, 1)
  while True:
  first_of_the_month = datetime.datetime(year, month, 1)
  if first_of_the_month == first_of_this_month:
  periods.append((now.strftime(FORMAT_MONTH),
  now.day,
  first_of_this_month, now))
  break
  elif first_of_the_month < first_of_this_month:
  in_the_next_month = first_of_the_month + datetime.timedelta(40)
  last_of_the_month == datetime.datetime(in_the_next_month.year,
  in_the_next_month.month, a)\
  - datetime.timedelta(1)
  periods.append((now.strftime(FORMAT_MONTH), 0,
  first_of_the_month, last_of_the_month))
  else:
  # first_of_the_month has got to the future somehow
  break
  month += 1
  if month > 12:
  year += 1
  month = 1
  else:
  raise NotImplementedError
  self.download_and_store(periods)
   
  @staticmethod
  def get_full_period_name(period_name, period_complete_day):
  if period_complete_day:
  return period_name + ' (up to %ith)' % period_complete_day
  else:
  return period_name
   
   
  def download_and_store(self, periods):
  for period_name, period_complete_day, start_date, end_date in periods:
  log.info('Downloading Analytics for period "%s" (%s - %s)',
  self.get_full_period_name(period_name, period_complete_day),
  start_date.strftime('%Y %m %d'),
  end_date.strftime('%Y %m %d'))
  data = self.download(start_date, end_date)
  log.info('Storing Analytics for period "%s"',
  self.get_full_period_name(period_name, period_complete_day))
  self.store(period_name, period_complete_day, data)
   
  @classmethod
  def download(cls, start_date, end_date):
  '''Get data from GA for a given time period'''
  start_date = start_date.strftime('%Y-%m-%d')
  end_date = end_date.strftime('%Y-%m-%d')
  # url
  #query = 'ga:pagePath=~^%s,ga:pagePath=~^%s' % \
  # (PACKAGE_URL, self.resource_url_tag)
  query = 'ga:pagePath=~^/dataset/'
  metrics = 'ga:uniquePageviews'
  sort = '-ga:uniquePageviews'
  for entry in GA.ga_query(query_filter=query,
  from_date=start_date,
  metrics=metrics,
  sort=sort,
  to_date=end_date):
  print entry
  import pdb; pdb.set_trace()
  for dim in entry.dimension:
  if dim.name == "ga:pagePath":
  package = dim.value
  count = entry.get_metric(
  'ga:uniquePageviews').value or 0
  packages[package] = int(count)
  return packages
   
  def store(self, period_name, period_complete_day, data):
  if 'url' in data:
  ga_model.update_url_stats(period_name, period_complete_day, data['url'])