gitphp 0.2.9.1 :: ckanext-ga-report.git/blobdiff

import re

import uuid

from sqlalchemy import Table, Column, MetaData

from sqlalchemy import types

from sqlalchemy.sql import select, text

from sqlalchemy import func

import ckan.model as model

from ckan.model.types import JsonType

from ckan.lib.base import *

def make_uuid():

return unicode(uuid.uuid4())

def init_tables():

metadata = MetaData()

package_stats = Table('ga_url', metadata,

Column('id', types.UnicodeText, primary_key=True, default=make_uuid),

Column('period_name', types.UnicodeText),

Column('period_complete_day', types.Integer),

Column('visits', types.Integer),

Column('group_id', types.String(60)),

Column('next_page', JsonType),

)

metadata.create_all(model.meta.engine)

cached_tables = {}

def get_table(name):

if name not in cached_tables:

meta = MetaData()

meta.reflect(bind=model.meta.engine)

table = meta.tables[name]

cached_tables[name] = table

return cached_tables[name]

def _normalize_url(url):

'''Strip off the hostname etc. Do this before storing it.

>>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices')

'/dataset/weekly_fuel_prices'

'''

url = re.sub('https?://(www\.)?data.gov.uk', '', url)

return url

def _get_department_id_of_url(url):

# e.g. /dataset/fuel_prices

# e.g. /dataset/fuel_prices/resource/e63380d4

dataset_match = re.match('/dataset/([^/]+)(/.*)?', url)

if dataset_match:

dataset_ref = dataset_match.groups()[0]

dataset = model.Package.get(dataset_ref)

if dataset:

publisher_groups = dataset.get_groups('publisher')

if publisher_groups:

return publisher_groups[0].id

def update_url_stats(period_name, period_complete_day, url_data):

table = get_table('ga_url')

connection = model.Session.connection()

for url, views, next_page in url_data:

url = _normalize_url(url)

department_id = _get_department_id_of_url(url)

# see if the row for this url & month is in the table already

s = select([func.count(id_col)],

table.c.period_name == period_name,

table.c.url == url)

count = connection.execute(s).fetchone()

if count and count[0]:

# update the row

connection.execute(table.update()\

.where(table.c.period_name == period_name,

table.c.url == url)\

.values(period_complete_day=period_complete_day,

views=views,

department_id=department_id,

next_page=next_page))

else:

# create the row

values = {'period_name': period_name,

'period_complete_day': period_complete_day,

'url': url,

'views': views,

'department_id': department_id,

'next_page': next_page}

connection.execute(stats.insert()\

.values(**values))