Initial iteration
[ckanext-ga-report.git] / ckanext / ga_report / ga_model.py
blob:a/ckanext/ga_report/ga_model.py -> blob:b/ckanext/ga_report/ga_model.py
--- a/ckanext/ga_report/ga_model.py
+++ b/ckanext/ga_report/ga_model.py
@@ -1,1 +1,91 @@
+import re
+import uuid
 
+from sqlalchemy import Table, Column, MetaData
+from sqlalchemy import types
+from sqlalchemy.sql import select, text
+from sqlalchemy import func
+
+import ckan.model as model
+from ckan.model.types import JsonType
+from ckan.lib.base import *
+
+def make_uuid():
+    return unicode(uuid.uuid4())
+
+def init_tables():
+    metadata = MetaData()
+    package_stats = Table('ga_url', metadata,
+                          Column('id', types.UnicodeText, primary_key=True, default=make_uuid),
+                          Column('period_name', types.UnicodeText),
+                          Column('period_complete_day', types.Integer),
+                          Column('visits', types.Integer),
+                          Column('group_id', types.String(60)),
+                          Column('next_page', JsonType),
+                          )
+    metadata.create_all(model.meta.engine)
+
+
+cached_tables = {}
+
+def get_table(name):
+    if name not in cached_tables:
+        meta = MetaData()
+        meta.reflect(bind=model.meta.engine)
+        table = meta.tables[name]
+        cached_tables[name] = table
+    return cached_tables[name]
+
+
+def _normalize_url(url):
+    '''Strip off the hostname etc. Do this before storing it.
+
+    >>> normalize_url('http://data.gov.uk/dataset/weekly_fuel_prices')
+    '/dataset/weekly_fuel_prices'
+    '''
+    url = re.sub('https?://(www\.)?data.gov.uk', '', url)
+    return url
+
+def _get_department_id_of_url(url):
+    # e.g. /dataset/fuel_prices
+    # e.g. /dataset/fuel_prices/resource/e63380d4
+    dataset_match = re.match('/dataset/([^/]+)(/.*)?', url)
+    if dataset_match:
+        dataset_ref = dataset_match.groups()[0]
+        dataset = model.Package.get(dataset_ref)
+        if dataset:
+            publisher_groups = dataset.get_groups('publisher')
+            if publisher_groups:
+                return publisher_groups[0].id
+
+def update_url_stats(period_name, period_complete_day, url_data):
+    table = get_table('ga_url')
+    connection = model.Session.connection()
+    for url, views, next_page in url_data:
+        url = _normalize_url(url)
+        department_id = _get_department_id_of_url(url)
+        # see if the row for this url & month is in the table already
+        s = select([func.count(id_col)],
+                   table.c.period_name == period_name,
+                   table.c.url == url)
+        count = connection.execute(s).fetchone()
+        if count and count[0]:
+            # update the row
+            connection.execute(table.update()\
+                .where(table.c.period_name == period_name,
+                       table.c.url == url)\
+                .values(period_complete_day=period_complete_day,
+                        views=views,
+                        department_id=department_id,
+                        next_page=next_page))
+        else:
+            # create the row
+            values = {'period_name': period_name,
+                      'period_complete_day': period_complete_day,
+                      'url': url,
+                      'views': views,
+                      'department_id': department_id,
+                      'next_page': next_page}
+            connection.execute(stats.insert()\
+                               .values(**values))
+