Initial iteration
[ckanext-ga-report.git] / ckanext / ga_report / ga_model.py
blob:a/ckanext/ga_report/ga_model.py -> blob:b/ckanext/ga_report/ga_model.py
  import re
  import uuid
   
  from sqlalchemy import Table, Column, MetaData
  from sqlalchemy import types
  from sqlalchemy.sql import select, text
  from sqlalchemy import func
   
  import ckan.model as model
  from ckan.model.types import JsonType
  from ckan.lib.base import *
   
  def make_uuid():
  return unicode(uuid.uuid4())
   
  def init_tables():
  metadata = MetaData()
  package_stats = Table('ga_url', metadata,
  Column('id', types.UnicodeText, primary_key=True, default=make_uuid),
  Column('period_name', types.UnicodeText),
  Column('period_complete_day', types.Integer),
  Column('visits', types.Integer),
  Column('group_id', types.String(60)),
  Column('next_page', JsonType),
  )
  metadata.create_all(model.meta.engine)
   
   
  cached_tables = {}
   
  def get_table(name):
  if name not in cached_tables:
  meta = MetaData()
  meta.reflect(bind=model.meta.engine)
  table = meta.tables[name]
  cached_tables[name] = table
  return cached_tables[name]
   
   
  def _normalize_url(url):
  '''Strip off the hostname etc. Do this before storing it.
   
  >>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices')
  '/dataset/weekly_fuel_prices'
  '''
  url = re.sub('https?://(www\.)?data.gov.uk', '', url)
  return url
   
  def _get_department_id_of_url(url):
  # e.g. /dataset/fuel_prices
  # e.g. /dataset/fuel_prices/resource/e63380d4
  dataset_match = re.match('/dataset/([^/]+)(/.*)?', url)
  if dataset_match:
  dataset_ref = dataset_match.groups()[0]
  dataset = model.Package.get(dataset_ref)
  if dataset:
  publisher_groups = dataset.get_groups('publisher')
  if publisher_groups:
  return publisher_groups[0].id
   
  def update_url_stats(period_name, period_complete_day, url_data):
  table = get_table('ga_url')
  connection = model.Session.connection()
  for url, views, next_page in url_data:
  url = _normalize_url(url)
  department_id = _get_department_id_of_url(url)
  # see if the row for this url & month is in the table already
  s = select([func.count(id_col)],
  table.c.period_name == period_name,
  table.c.url == url)
  count = connection.execute(s).fetchone()
  if count and count[0]:
  # update the row
  connection.execute(table.update()\
  .where(table.c.period_name == period_name,
  table.c.url == url)\
  .values(period_complete_day=period_complete_day,
  views=views,
  department_id=department_id,
  next_page=next_page))
  else:
  # create the row
  values = {'period_name': period_name,
  'period_complete_day': period_complete_day,
  'url': url,
  'views': views,
  'department_id': department_id,
  'next_page': next_page}
  connection.execute(stats.insert()\
  .values(**values))