Fix missing package definition
Fix missing package definition

file:b/.gitignore (new)
  *.pyc
 
 Binary files a/ckanext/__init__.pyc and b/ckanext/__init__.pyc differ
 Binary files a/ckanext/dga_stats/__init__.pyc and b/ckanext/dga_stats/__init__.pyc differ
 Binary files a/ckanext/dga_stats/controller.pyc and b/ckanext/dga_stats/controller.pyc differ
 Binary files a/ckanext/dga_stats/plugin.pyc and b/ckanext/dga_stats/plugin.pyc differ
import datetime import datetime
   
from pylons import config from pylons import config
from sqlalchemy import Table, select, func, and_ from sqlalchemy import Table, select, func, and_
   
import ckan.plugins as p import ckan.plugins as p
import ckan.model as model import ckan.model as model
   
cache_enabled = p.toolkit.asbool(config.get('ckanext.stats.cache_enabled', 'True')) cache_enabled = p.toolkit.asbool(config.get('ckanext.stats.cache_enabled', 'True'))
   
if cache_enabled: if cache_enabled:
from pylons import cache from pylons import cache
our_cache = cache.get_cache('stats', type='dbm') our_cache = cache.get_cache('stats', type='dbm')
   
DATE_FORMAT = '%Y-%m-%d' DATE_FORMAT = '%Y-%m-%d'
   
def table(name): def table(name):
return Table(name, model.meta.metadata, autoload=True) return Table(name, model.meta.metadata, autoload=True)
   
def datetime2date(datetime_): def datetime2date(datetime_):
return datetime.date(datetime_.year, datetime_.month, datetime_.day) return datetime.date(datetime_.year, datetime_.month, datetime_.day)
   
   
class Stats(object): class Stats(object):
@classmethod @classmethod
def top_rated_packages(cls, limit=10): def top_rated_packages(cls, limit=10):
# NB Not using sqlalchemy as sqla 0.4 doesn't work using both group_by # NB Not using sqlalchemy as sqla 0.4 doesn't work using both group_by
# and apply_avg # and apply_avg
package = table('package') package = table('package')
rating = table('rating') rating = table('rating')
sql = select([package.c.id, func.avg(rating.c.rating), func.count(rating.c.rating)], from_obj=[package.join(rating)]).\ sql = select([package.c.id, func.avg(rating.c.rating), func.count(rating.c.rating)], from_obj=[package.join(rating)]).\
where(package.c.private == 'f').\ where(package.c.private == 'f').\
group_by(package.c.id).\ group_by(package.c.id).\
order_by(func.avg(rating.c.rating).desc(), func.count(rating.c.rating).desc()).\ order_by(func.avg(rating.c.rating).desc(), func.count(rating.c.rating).desc()).\
limit(limit) limit(limit)
res_ids = model.Session.execute(sql).fetchall() res_ids = model.Session.execute(sql).fetchall()
res_pkgs = [(model.Session.query(model.Package).get(unicode(pkg_id)), avg, num) for pkg_id, avg, num in res_ids] res_pkgs = [(model.Session.query(model.Package).get(unicode(pkg_id)), avg, num) for pkg_id, avg, num in res_ids]
return res_pkgs return res_pkgs
   
@classmethod @classmethod
def most_edited_packages(cls, limit=10): def most_edited_packages(cls, limit=10):
package_revision = table('package_revision') package_revision = table('package_revision')
package = table('package') package = table('package')
s = select([package_revision.c.id, func.count(package_revision.c.revision_id)], from_obj=[package_revision.join(package)]).\ s = select([package_revision.c.id, func.count(package_revision.c.revision_id)], from_obj=[package_revision.join(package)]).\
where(package.c.private == 'f').\ where(package.c.private == 'f').\
group_by(package_revision.c.id).\ group_by(package_revision.c.id).\
order_by(func.count(package_revision.c.revision_id).desc()).\ order_by(func.count(package_revision.c.revision_id).desc()).\
limit(limit) limit(limit)
res_ids = model.Session.execute(s).fetchall() res_ids = model.Session.execute(s).fetchall()
res_pkgs = [(model.Session.query(model.Package).get(unicode(pkg_id)), val) for pkg_id, val in res_ids] res_pkgs = [(model.Session.query(model.Package).get(unicode(pkg_id)), val) for pkg_id, val in res_ids]
return res_pkgs return res_pkgs
   
@classmethod @classmethod
def largest_groups(cls, limit=10): def largest_groups(cls, limit=10):
member = table('member') member = table('member')
s = select([member.c.group_id, func.count(member.c.table_id)]).\ s = select([member.c.group_id, func.count(member.c.table_id)]).\
group_by(member.c.group_id).\ group_by(member.c.group_id).\
where(and_(member.c.group_id!=None, member.c.table_name=='package')).\ where(and_(member.c.group_id!=None, member.c.table_name=='package')).\
order_by(func.count(member.c.table_id).desc()).\ order_by(func.count(member.c.table_id).desc()).\
limit(limit) limit(limit)
   
res_ids = model.Session.execute(s).fetchall() res_ids = model.Session.execute(s).fetchall()
res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), val) for group_id, val in res_ids] res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), val) for group_id, val in res_ids]
return res_groups return res_groups
   
@classmethod @classmethod
def by_org(cls, limit=10): def by_org(cls, limit=10):
group = table('group') group = table('group')
package = table('package') package = table('package')
s = select([group.c.id, package.c.private, func.count(package.c.private)]).\ s = select([group.c.id, package.c.private, func.count(package.c.private)]).\
group_by(group.c.id, package.c.private).\ group_by(group.c.id, package.c.private).\
order_by(group.c.id).\ order_by(group.c.id).\
limit(limit) limit(limit)
   
res_ids = model.Session.execute(s).fetchall() res_ids = model.Session.execute(s).fetchall()
res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), private, val) for group_id, private, val in res_ids] res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), private, val) for group_id, private, val in res_ids]
return res_groups return res_groups
   
@classmethod @classmethod
def top_tags(cls, limit=10, returned_tag_info='object'): # by package def top_tags(cls, limit=10, returned_tag_info='object'): # by package
assert returned_tag_info in ('name', 'id', 'object') assert returned_tag_info in ('name', 'id', 'object')
tag = table('tag') tag = table('tag')
package_tag = table('package_tag') package_tag = table('package_tag')
package = table('package') package = table('package')
#TODO filter out tags with state=deleted #TODO filter out tags with state=deleted
if returned_tag_info == 'name': if returned_tag_info == 'name':
from_obj = [package_tag.join(tag)] from_obj = [package_tag.join(tag)]
tag_column = tag.c.name tag_column = tag.c.name
else: else:
from_obj = None from_obj = None
tag_column = package_tag.c.tag_id tag_column = package_tag.c.tag_id
s = select([tag_column, func.count(package_tag.c.package_id)], s = select([tag_column, func.count(package_tag.c.package_id)],
from_obj=from_obj) from_obj=from_obj)
s = s.group_by(tag_column).\ s = s.group_by(tag_column).\
where(package.c.private == 'f').\ where(package.c.private == 'f').\
order_by(func.count(package_tag.c.package_id).desc()).\ order_by(func.count(package_tag.c.package_id).desc()).\
limit(limit) limit(limit)
res_col = model.Session.execute(s).fetchall() res_col = model.Session.execute(s).fetchall()
if returned_tag_info in ('id', 'name'): if returned_tag_info in ('id', 'name'):
return res_col return res_col
elif returned_tag_info == 'object': elif returned_tag_info == 'object':
res_tags = [(model.Session.query(model.Tag).get(unicode(tag_id)), val) for tag_id, val in res_col] res_tags = [(model.Session.query(model.Tag).get(unicode(tag_id)), val) for tag_id, val in res_col]
return res_tags return res_tags
   
@classmethod @classmethod
def top_package_owners(cls, limit=10): def top_package_owners(cls, limit=10):
package_role = table('package_role') package_role = table('package_role')
user_object_role = table('user_object_role') user_object_role = table('user_object_role')
package = table('package') package = table('package')
# join(package, package_role.c.package_id == package.c.id).\ # join(package, package_role.c.package_id == package.c.id).\
# where(package.c.private == 'f').\ # where(package.c.private == 'f').\
s = select([user_object_role.c.user_id, func.count(user_object_role.c.role)], from_obj=[user_object_role.join(package_role)]).\ s = select([user_object_role.c.user_id, func.count(user_object_role.c.role)], from_obj=[user_object_role.join(package_role)]).\
where(user_object_role.c.role==model.authz.Role.ADMIN).\ where(user_object_role.c.role==model.authz.Role.ADMIN).\
where(user_object_role.c.user_id!=None).\ where(user_object_role.c.user_id!=None).\
group_by(user_object_role.c.user_id).\ group_by(user_object_role.c.user_id).\
order_by(func.count(user_object_role.c.role).desc()).\ order_by(func.count(user_object_role.c.role).desc()).\
limit(limit) limit(limit)
res_ids = model.Session.execute(s).fetchall() res_ids = model.Session.execute(s).fetchall()
res_users = [(model.Session.query(model.User).get(unicode(user_id)), val) for user_id, val in res_ids] res_users = [(model.Session.query(model.User).get(unicode(user_id)), val) for user_id, val in res_ids]
return res_users return res_users
   
@classmethod @classmethod
def summary_stats(cls): def summary_stats(cls):
connection = model.Session.connection() connection = model.Session.connection()
# select name,role from user_object_role inner join \"user\" on user_object_role.user_id = \"user\".id where name not in ('logged_in','visitor') group by name,role" # select name,role from user_object_role inner join \"user\" on user_object_role.user_id = \"user\".id where name not in ('logged_in','visitor') group by name,role"
   
res = connection.execute("SELECT 'Total Organisations', count(*) from \"group\" where type = 'organization' and state = 'active' union \ res = connection.execute("SELECT 'Total Organisations', count(*) from \"group\" where type = 'organization' and state = 'active' union \
select 'Total Datasets', count(*) from package where state='active' or state='draft' or state='draft-complete' union \ select 'Total Datasets', count(*) from package where state='active' or state='draft' or state='draft-complete' union \
select 'Total Data Files/Resources', count(*) from resource where state='active'").fetchall(); select 'Total Data Files/Resources', count(*) from resource where state='active'").fetchall();
return res return res
   
   
@classmethod @classmethod
def activity_counts(cls): def activity_counts(cls):
connection = model.Session.connection() connection = model.Session.connection()
res = connection.execute("select to_char(timestamp, 'YYYY-MM') as month,activity_type, count(*) from activity group by month, activity_type order by month;").fetchall(); res = connection.execute("select to_char(timestamp, 'YYYY-MM') as month,activity_type, count(*) from activity group by month, activity_type order by month;").fetchall();
return res return res
   
   
class RevisionStats(object): class RevisionStats(object):
@classmethod @classmethod
def package_addition_rate(cls, weeks_ago=0): def package_addition_rate(cls, weeks_ago=0):
week_commenced = cls.get_date_weeks_ago(weeks_ago) week_commenced = cls.get_date_weeks_ago(weeks_ago)
return cls.get_objects_in_a_week(week_commenced, return cls.get_objects_in_a_week(week_commenced,
type_='package_addition_rate') type_='package_addition_rate')
   
@classmethod @classmethod
def package_revision_rate(cls, weeks_ago=0): def package_revision_rate(cls, weeks_ago=0):
week_commenced = cls.get_date_weeks_ago(weeks_ago) week_commenced = cls.get_date_weeks_ago(weeks_ago)
return cls.get_objects_in_a_week(week_commenced, return cls.get_objects_in_a_week(week_commenced,
type_='package_revision_rate') type_='package_revision_rate')
   
@classmethod @classmethod
def get_date_weeks_ago(cls, weeks_ago): def get_date_weeks_ago(cls, weeks_ago):
''' '''
@param weeks_ago: specify how many weeks ago to give count for @param weeks_ago: specify how many weeks ago to give count for
(0 = this week so far) (0 = this week so far)
''' '''
date_ = datetime.date.today() date_ = datetime.date.today()
return date_ - datetime.timedelta(days= return date_ - datetime.timedelta(days=
datetime.date.weekday(date_) + 7 * weeks_ago) datetime.date.weekday(date_) + 7 * weeks_ago)
   
@classmethod @classmethod
def get_week_dates(cls, weeks_ago): def get_week_dates(cls, weeks_ago):
''' '''
@param weeks_ago: specify how many weeks ago to give count for @param weeks_ago: specify how many weeks ago to give count for
(0 = this week so far) (0 = this week so far)
''' '''
package_revision = table('package_revision') package_revision = table('package_revision')
revision = table('revision') revision = table('revision')
today = datetime.date.today() today = datetime.date.today()
date_from = datetime.datetime(today.year, today.month, today.day) -\ date_from = datetime.datetime(today.year, today.month, today.day) -\
datetime.timedelta(days=datetime.date.weekday(today) + \ datetime.timedelta(days=datetime.date.weekday(today) + \
7 * weeks_ago) 7 * weeks_ago)
date_to = date_from + datetime.timedelta(days=7) date_to = date_from + datetime.timedelta(days=7)
return (date_from, date_to) return (date_from, date_to)
   
@classmethod @classmethod
def get_date_week_started(cls, date_): def get_date_week_started(cls, date_):
assert isinstance(date_, datetime.date) assert isinstance(date_, datetime.date)
if isinstance(date_, datetime.datetime): if isinstance(date_, datetime.datetime):
date_ = datetime2date(date_) date_ = datetime2date(date_)
return date_ - datetime.timedelta(days=datetime.date.weekday(date_)) return date_ - datetime.timedelta(days=datetime.date.weekday(date_))
   
@classmethod @classmethod
def get_package_revisions(cls): def get_package_revisions(cls):
''' '''
@return: Returns list of revisions and date of them, in @return: Returns list of revisions and date of them, in
format: [(id, date), ...] format: [(id, date), ...]
''' '''
package_revision = table('package_revision') package_revision = table('package_revision')
revision = table('revision') revision = table('revision')
s = select([package_revision.c.id, revision.c.timestamp], from_obj=[package_revision.join(revision)]).order_by(revision.c.timestamp) s = select([package_revision.c.id, revision.c.timestamp], from_obj=[package_revision.join(revision)]).order_by(revision.c.timestamp)
res = model.Session.execute(s).fetchall() # [(id, datetime), ...] res = model.Session.execute(s).fetchall() # [(id, datetime), ...]
return res return res
   
@classmethod @classmethod
def get_new_packages(cls): def get_new_packages(cls):
''' '''
@return: Returns list of new pkgs and date when they were created, in @return: Returns list of new pkgs and date when they were created, in
format: [(id, date_ordinal), ...] format: [(id, date_ordinal), ...]
''' '''
def new_packages(): def new_packages():
# Can't filter by time in select because 'min' function has to # Can't filter by time in select because 'min' function has to
# be 'for all time' else you get first revision in the time period. # be 'for all time' else you get first revision in the time period.
package_revision = table('package_revision') package_revision = table('package_revision')
revision = table('revision') revision = table('revision')
  package = table('package')
s = select([package_revision.c.id, func.min(revision.c.timestamp)], from_obj=[package_revision.join(revision)]).\ s = select([package_revision.c.id, func.min(revision.c.timestamp)], from_obj=[package_revision.join(revision)]).\
where(package.c.private == 'f').\ where(package.c.private == 'f').\
group_by(package_revision.c.id).order_by(func.min(revision.c.timestamp)) group_by(package_revision.c.id).order_by(func.min(revision.c.timestamp))
res = model.Session.execute(s).fetchall() # [(id, datetime), ...] res = model.Session.execute(s).fetchall() # [(id, datetime), ...]
res_pickleable = [] res_pickleable = []
for pkg_id, created_datetime in res: for pkg_id, created_datetime in res:
res_pickleable.append((pkg_id, created_datetime.toordinal())) res_pickleable.append((pkg_id, created_datetime.toordinal()))
return res_pickleable return res_pickleable
if cache_enabled: if cache_enabled:
week_commences = cls.get_date_week_started(datetime.date.today()) week_commences = cls.get_date_week_started(datetime.date.today())
key = 'all_new_packages_%s' + week_commences.strftime(DATE_FORMAT) key = 'all_new_packages_%s' + week_commences.strftime(DATE_FORMAT)
new_packages = our_cache.get_value(key=key, new_packages = our_cache.get_value(key=key,
createfunc=new_packages) createfunc=new_packages)
else: else:
new_packages = new_packages() new_packages = new_packages()
return new_packages return new_packages
   
@classmethod @classmethod
def get_deleted_packages(cls): def get_deleted_packages(cls):
''' '''
@return: Returns list of deleted pkgs and date when they were deleted, in @return: Returns list of deleted pkgs and date when they were deleted, in
format: [(id, date_ordinal), ...] format: [(id, date_ordinal), ...]
''' '''
def deleted_packages(): def deleted_packages():
# Can't filter by time in select because 'min' function has to # Can't filter by time in select because 'min' function has to
# be 'for all time' else you get first revision in the time period. # be 'for all time' else you get first revision in the time period.
package_revision = table('package_revision') package_revision = table('package_revision')
revision = table('revision') revision = table('revision')
s = select([package_revision.c.id, func.min(revision.c.timestamp)], from_obj=[package_revision.join(revision)]).\ s = select([package_revision.c.id, func.min(revision.c.timestamp)], from_obj=[package_revision.join(revision)]).\
where(package_revision.c.state==model.State.DELETED).\ where(package_revision.c.state==model.State.DELETED).\
group_by(package_revision.c.id).\ group_by(package_revision.c.id).\
order_by(func.min(revision.c.timestamp)) order_by(func.min(revision.c.timestamp))
res = model.Session.execute(s).fetchall() # [(id, datetime), ...] res = model.Session.execute(s).fetchall() # [(id, datetime), ...]
res_pickleable = [] res_pickleable = []
for pkg_id, deleted_datetime in res: for pkg_id, deleted_datetime in res:
res_pickleable.append((pkg_id, deleted_datetime.toordinal())) res_pickleable.append((pkg_id, deleted_datetime.toordinal()))
return res_pickleable return res_pickleable
if cache_enabled: if cache_enabled:
week_commences = cls.get_date_week_started(datetime.date.today()) week_commences = cls.get_date_week_started(datetime.date.today())
key = 'all_deleted_packages_%s' + week_commences.strftime(DATE_FORMAT) key = 'all_deleted_packages_%s' + week_commences.strftime(DATE_FORMAT)
deleted_packages = our_cache.get_value(key=key, deleted_packages = our_cache.get_value(key=key,
createfunc=deleted_packages) createfunc=deleted_packages)
else: else:
deleted_packages = deleted_packages() deleted_packages = deleted_packages()
return deleted_packages return deleted_packages
   
@classmethod @classmethod
def get_num_packages_by_week(cls): def get_num_packages_by_week(cls):
def num_packages(): def num_packages():
new_packages_by_week = cls.get_by_week('new_packages') new_packages_by_week = cls.get_by_week('new_packages')
deleted_packages_by_week = cls.get_by_week('deleted_packages') deleted_packages_by_week = cls.get_by_week('deleted_packages')
first_date = (min(datetime.datetime.strptime(new_packages_by_week[0][0], DATE_FORMAT), first_date = (min(datetime.datetime.strptime(new_packages_by_week[0][0], DATE_FORMAT),
datetime.datetime.strptime(deleted_packages_by_week[0][0], DATE_FORMAT))).date() datetime.datetime.strptime(deleted_packages_by_week[0][0], DATE_FORMAT))).date()
cls._cumulative_num_pkgs = 0 cls._cumulative_num_pkgs = 0
new_pkgs = [] new_pkgs = []
deleted_pkgs = [] deleted_pkgs = []
def build_weekly_stats(week_commences, new_pkg_ids, deleted_pkg_ids): def build_weekly_stats(week_commences, new_pkg_ids, deleted_pkg_ids):
num_pkgs = len(new_pkg_ids) - len(deleted_pkg_ids) num_pkgs = len(new_pkg_ids) - len(deleted_pkg_ids)
new_pkgs.extend([model.Session.query(model.Package).get(id).name for id in new_pkg_ids]) new_pkgs.extend([model.Session.query(model.Package).get(id).name for id in new_pkg_ids])
deleted_pkgs.extend([model.Session.query(model.Package).get(id).name for id in deleted_pkg_ids]) deleted_pkgs.extend([model.Session.query(model.Package).get(id).name for id in deleted_pkg_ids])
cls._cumulative_num_pkgs += num_pkgs cls._cumulative_num_pkgs += num_pkgs
return (week_commences.strftime(DATE_FORMAT), return (week_commences.strftime(DATE_FORMAT),
num_pkgs, cls._cumulative_num_pkgs) num_pkgs, cls._cumulative_num_pkgs)
week_ends = first_date week_ends = first_date
today = datetime.date.today() today = datetime.date.today()
new_package_week_index = 0 new_package_week_index = 0
deleted_package_week_index = 0 deleted_package_week_index = 0
weekly_numbers = [] # [(week_commences, num_packages, cumulative_num_pkgs])] weekly_numbers = [] # [(week_commences, num_packages, cumulative_num_pkgs])]
while week_ends <= today: while week_ends <= today:
week_commences = week_ends week_commences = week_ends
week_ends = week_commences + datetime.timedelta(days=7) week_ends = week_commences + datetime.timedelta(days=7)
if datetime.datetime.strptime(new_packages_by_week[new_package_week_index][0], DATE_FORMAT).date() == week_commences: if datetime.datetime.strptime(new_packages_by_week[new_package_week_index][0], DATE_FORMAT).date() == week_commences:
new_pkg_ids = new_packages_by_week[new_package_week_index][1] new_pkg_ids = new_packages_by_week[new_package_week_index][1]
new_package_week_index += 1 new_package_week_index += 1