import datetime |
import datetime |
|
|
from pylons import config |
from pylons import config |
from sqlalchemy import Table, select, func, and_ |
from sqlalchemy import Table, select, func, and_ |
|
from sqlalchemy.sql.expression import text |
|
|
import ckan.plugins as p |
import ckan.plugins as p |
import ckan.model as model |
import ckan.model as model |
|
|
cache_enabled = p.toolkit.asbool(config.get('ckanext.stats.cache_enabled', 'True')) |
cache_enabled = p.toolkit.asbool(config.get('ckanext.stats.cache_enabled', 'True')) |
|
|
if cache_enabled: |
if cache_enabled: |
from pylons import cache |
from pylons import cache |
our_cache = cache.get_cache('stats', type='dbm') |
our_cache = cache.get_cache('stats', type='dbm') |
|
|
DATE_FORMAT = '%Y-%m-%d' |
DATE_FORMAT = '%Y-%m-%d' |
|
|
def table(name): |
def table(name): |
return Table(name, model.meta.metadata, autoload=True) |
return Table(name, model.meta.metadata, autoload=True) |
|
|
def datetime2date(datetime_): |
def datetime2date(datetime_): |
return datetime.date(datetime_.year, datetime_.month, datetime_.day) |
return datetime.date(datetime_.year, datetime_.month, datetime_.day) |
|
|
|
|
class Stats(object): |
class Stats(object): |
@classmethod |
@classmethod |
def top_rated_packages(cls, limit=10): |
def top_rated_packages(cls, limit=10): |
# NB Not using sqlalchemy as sqla 0.4 doesn't work using both group_by |
# NB Not using sqlalchemy as sqla 0.4 doesn't work using both group_by |
# and apply_avg |
# and apply_avg |
package = table('package') |
package = table('package') |
rating = table('rating') |
rating = table('rating') |
sql = select([package.c.id, func.avg(rating.c.rating), func.count(rating.c.rating)], from_obj=[package.join(rating)]).\ |
sql = select([package.c.id, func.avg(rating.c.rating), func.count(rating.c.rating)], from_obj=[package.join(rating)]).\ |
where(package.c.private == 'f').\ |
where(package.c.private == 'f').\ |
group_by(package.c.id).\ |
group_by(package.c.id).\ |
order_by(func.avg(rating.c.rating).desc(), func.count(rating.c.rating).desc()).\ |
order_by(func.avg(rating.c.rating).desc(), func.count(rating.c.rating).desc()).\ |
limit(limit) |
limit(limit) |
res_ids = model.Session.execute(sql).fetchall() |
res_ids = model.Session.execute(sql).fetchall() |
res_pkgs = [(model.Session.query(model.Package).get(unicode(pkg_id)), avg, num) for pkg_id, avg, num in res_ids] |
res_pkgs = [(model.Session.query(model.Package).get(unicode(pkg_id)), avg, num) for pkg_id, avg, num in res_ids] |
return res_pkgs |
return res_pkgs |
|
|
@classmethod |
@classmethod |
def most_edited_packages(cls, limit=10): |
def most_edited_packages(cls, limit=10): |
package_revision = table('package_revision') |
package_revision = table('package_revision') |
package = table('package') |
package = table('package') |
s = select([package_revision.c.id, func.count(package_revision.c.revision_id)], from_obj=[package_revision.join(package)]).\ |
s = select([package_revision.c.id, func.count(package_revision.c.revision_id)], from_obj=[package_revision.join(package)]).\ |
where(package.c.private == 'f').\ |
where(package.c.private == 'f').\ |
group_by(package_revision.c.id).\ |
group_by(package_revision.c.id).\ |
order_by(func.count(package_revision.c.revision_id).desc()).\ |
order_by(func.count(package_revision.c.revision_id).desc()).\ |
limit(limit) |
limit(limit) |
res_ids = model.Session.execute(s).fetchall() |
res_ids = model.Session.execute(s).fetchall() |
res_pkgs = [(model.Session.query(model.Package).get(unicode(pkg_id)), val) for pkg_id, val in res_ids] |
res_pkgs = [(model.Session.query(model.Package).get(unicode(pkg_id)), val) for pkg_id, val in res_ids] |
return res_pkgs |
return res_pkgs |
|
|
@classmethod |
@classmethod |
def largest_groups(cls, limit=10): |
def largest_groups(cls, limit=10): |
member = table('member') |
member = table('member') |
s = select([member.c.group_id, func.count(member.c.table_id)]).\ |
s = select([member.c.group_id, func.count(member.c.table_id)]).\ |
group_by(member.c.group_id).\ |
group_by(member.c.group_id).\ |
where(and_(member.c.group_id!=None, member.c.table_name=='package')).\ |
where(member.c.group_id!=None).\ |
order_by(func.count(member.c.table_id).desc()).\ |
where(member.c.table_name=='package').\ |
limit(limit) |
where(member.c.capacity=='public').\ |
|
order_by(func.count(member.c.table_id).desc()) |
|
#limit(limit) |
|
|
res_ids = model.Session.execute(s).fetchall() |
res_ids = model.Session.execute(s).fetchall() |
res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), val) for group_id, val in res_ids] |
res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), val) for group_id, val in res_ids] |
return res_groups |
return res_groups |
|
|
@classmethod |
@classmethod |
def by_org(cls, limit=10): |
def by_org(cls, limit=10): |
group = table('group') |
connection = model.Session.connection() |
package = table('package') |
res = connection.execute("select package.owner_org, package.private, count(*) from package \ |
s = select([group.c.id, package.c.private, func.count(package.c.private)]).\ |
inner join \"group\" on package.owner_org = \"group\".id \ |
group_by(group.c.id, package.c.private).\ |
where package.state='active'\ |
order_by(group.c.id).\ |
group by package.owner_org,\"group\".name, package.private \ |
limit(limit) |
order by \"group\".name, package.private;").fetchall(); |
|
res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), private, val) for group_id, private, val in res] |
res_ids = model.Session.execute(s).fetchall() |
|
res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), private, val) for group_id, private, val in res_ids] |
|
return res_groups |
return res_groups |
|
|
@classmethod |
@classmethod |
def top_tags(cls, limit=10, returned_tag_info='object'): # by package |
def top_tags(cls, limit=10, returned_tag_info='object'): # by package |
assert returned_tag_info in ('name', 'id', 'object') |
assert returned_tag_info in ('name', 'id', 'object') |
tag = table('tag') |
tag = table('tag') |
package_tag = table('package_tag') |
package_tag = table('package_tag') |
package = table('package') |
package = table('package') |
#TODO filter out tags with state=deleted |
#TODO filter out tags with state=deleted |
if returned_tag_info == 'name': |
if returned_tag_info == 'name': |
from_obj = [package_tag.join(tag)] |
from_obj = [package_tag.join(tag)] |
tag_column = tag.c.name |
tag_column = tag.c.name |
else: |
else: |
from_obj = None |
from_obj = None |
tag_column = package_tag.c.tag_id |
tag_column = package_tag.c.tag_id |
s = select([tag_column, func.count(package_tag.c.package_id)], |
s = select([tag_column, func.count(package_tag.c.package_id)], |
from_obj=from_obj) |
from_obj=from_obj) |
s = s.group_by(tag_column).\ |
s = s.group_by(tag_column).\ |
where(package.c.private == 'f').\ |
where(package.c.private == 'f').\ |
order_by(func.count(package_tag.c.package_id).desc()).\ |
order_by(func.count(package_tag.c.package_id).desc()).\ |
limit(limit) |
limit(limit) |
res_col = model.Session.execute(s).fetchall() |
res_col = model.Session.execute(s).fetchall() |
if returned_tag_info in ('id', 'name'): |
if returned_tag_info in ('id', 'name'): |
return res_col |
return res_col |
elif returned_tag_info == 'object': |
elif returned_tag_info == 'object': |
res_tags = [(model.Session.query(model.Tag).get(unicode(tag_id)), val) for tag_id, val in res_col] |
res_tags = [(model.Session.query(model.Tag).get(unicode(tag_id)), val) for tag_id, val in res_col] |
return res_tags |
return res_tags |
|
|
@classmethod |
@classmethod |
def top_package_owners(cls, limit=10): |
def top_package_owners(cls, limit=10): |
package_role = table('package_role') |
package_role = table('package_role') |
user_object_role = table('user_object_role') |
user_object_role = table('user_object_role') |
package = table('package') |
package = table('package') |
# join(package, package_role.c.package_id == package.c.id).\ |
s = select([user_object_role.c.user_id, func.count(user_object_role.c.role)], from_obj=[user_object_role.join(package_role).join(package, package_role.c.package_id == package.c.id)]).\ |
# where(package.c.private == 'f').\ |
|
s = select([user_object_role.c.user_id, func.count(user_object_role.c.role)], from_obj=[user_object_role.join(package_role)]).\ |
|
where(user_object_role.c.role==model.authz.Role.ADMIN).\ |
where(user_object_role.c.role==model.authz.Role.ADMIN).\ |
|
where(package.c.private == 'f').\ |
where(user_object_role.c.user_id!=None).\ |
where(user_object_role.c.user_id!=None).\ |
group_by(user_object_role.c.user_id).\ |
group_by(user_object_role.c.user_id).\ |
order_by(func.count(user_object_role.c.role).desc()).\ |
order_by(func.count(user_object_role.c.role).desc()).\ |
limit(limit) |
limit(limit) |
res_ids = model.Session.execute(s).fetchall() |
res_ids = model.Session.execute(s).fetchall() |
res_users = [(model.Session.query(model.User).get(unicode(user_id)), val) for user_id, val in res_ids] |
res_users = [(model.Session.query(model.User).get(unicode(user_id)), val) for user_id, val in res_ids] |
return res_users |
return res_users |
|
|
@classmethod |
@classmethod |
def summary_stats(cls): |
def summary_stats(cls): |
connection = model.Session.connection() |
connection = model.Session.connection() |
|
|
res = connection.execute("SELECT 'Total Organisations', count(*) from \"group\" where type = 'organization' and state = 'active' union \ |
res = connection.execute("SELECT 'Total Organisations', count(*) from \"group\" where type = 'organization' and state = 'active' union \ |
select 'Total Datasets', count(*) from package where state='active' or state='draft' or state='draft-complete' union \ |
select 'Total Datasets', count(*) from package where (state='active' or state='draft' or state='draft-complete') and private = 'f' union \ |
select 'Total Data Files/Resources', count(*) from resource where state='active'").fetchall(); |
select 'Total Archived Datasets', count(*) from package where (state='active' or state='draft' or state='draft-complete') and private = 't' union \ |
|
select 'Total Data Files/Resources', count(*) from resource where state='active' union \ |
|
select 'Total Machine Readable/Data API Resources', count(*) from resource where state='active' and webstore_url = 'active'\ |
|
").fetchall(); |
return res |
return res |
|
|
|
|
@classmethod |
@classmethod |
def activity_counts(cls): |
def activity_counts(cls): |
connection = model.Session.connection() |
connection = model.Session.connection() |
res = connection.execute("select to_char(timestamp, 'YYYY-MM') as month,activity_type, count(*) from activity group by month, activity_type order by month;").fetchall(); |
res = connection.execute("select to_char(timestamp, 'YYYY-MM') as month,activity_type, count(*) from activity group by month, activity_type order by month;").fetchall(); |
return res |
return res |
|
|
@classmethod |
@classmethod |
def user_access_list(cls): |
def user_access_list(cls): |
connection = model.Session.connection() |
connection = model.Session.connection() |
res = connection.execute("select name,sysadmin,role from user_object_role right outer join \"user\" on user_object_role.user_id = \"user\".id where name not in ('logged_in','visitor') group by name,sysadmin,role order by sysadmin desc, role asc;").fetchall(); |
res = connection.execute("select name,sysadmin,role from user_object_role right outer join \"user\" on user_object_role.user_id = \"user\".id where name not in ('logged_in','visitor') group by name,sysadmin,role order by sysadmin desc, role asc;").fetchall(); |
return res |
return res |
|
|
|
@classmethod |
|
def recent_datasets(cls): |
|
activity = table('activity') |
|
package = table('package') |
|
s = select([func.max(activity.c.timestamp),package.c.id, activity.c.activity_type], from_obj=[activity.join(package,activity.c.object_id == package.c.id)]).where(package.c.private == 'f').\ |
|
where(activity.c.timestamp > func.now() - text("interval '60 day'")).group_by(package.c.id,activity.c.activity_type).order_by(func.max(activity.c.timestamp)) |
|
result = model.Session.execute(s).fetchall() |
|
return [(datetime2date(timestamp), model.Session.query(model.Package).get(unicode(package_id)), activity_type) for timestamp,package_id,activity_type in result] |
|
|
|
|
|
|
class RevisionStats(object): |
class RevisionStats(object): |
@classmethod |
@classmethod |
def package_addition_rate(cls, weeks_ago=0): |
def package_addition_rate(cls, weeks_ago=0): |
week_commenced = cls.get_date_weeks_ago(weeks_ago) |
week_commenced = cls.get_date_weeks_ago(weeks_ago) |
return cls.get_objects_in_a_week(week_commenced, |
return cls.get_objects_in_a_week(week_commenced, |
type_='package_addition_rate') |
type_='package_addition_rate') |
|
|
@classmethod |
@classmethod |
def package_revision_rate(cls, weeks_ago=0): |
def package_revision_rate(cls, weeks_ago=0): |
week_commenced = cls.get_date_weeks_ago(weeks_ago) |
week_commenced = cls.get_date_weeks_ago(weeks_ago) |
return cls.get_objects_in_a_week(week_commenced, |
return cls.get_objects_in_a_week(week_commenced, |
type_='package_revision_rate') |
type_='package_revision_rate') |
|
|
@classmethod |
@classmethod |
def get_date_weeks_ago(cls, weeks_ago): |
def get_date_weeks_ago(cls, weeks_ago): |
''' |
''' |
@param weeks_ago: specify how many weeks ago to give count for |
@param weeks_ago: specify how many weeks ago to give count for |
(0 = this week so far) |
(0 = this week so far) |
''' |
''' |
date_ = datetime.date.today() |
date_ = datetime.date.today() |
return date_ - datetime.timedelta(days= |
return date_ - datetime.timedelta(days= |
datetime.date.weekday(date_) + 7 * weeks_ago) |
datetime.date.weekday(date_) + 7 * weeks_ago) |
|
|
@classmethod |
@classmethod |
def get_week_dates(cls, weeks_ago): |
def get_week_dates(cls, weeks_ago): |
''' |
''' |
@param weeks_ago: specify how many weeks ago to give count for |
@param weeks_ago: specify how many weeks ago to give count for |
(0 = this week so far) |
(0 = this week so far) |
''' |
''' |
package_revision = table('package_revision') |
package_revision = table('package_revision') |
revision = table('revision') |
revision = table('revision') |
today = datetime.date.today() |
today = datetime.date.today() |
date_from = datetime.datetime(today.year, today.month, today.day) -\ |
date_from = datetime.datetime(today.year, today.month, today.day) -\ |
datetime.timedelta(days=datetime.date.weekday(today) + \ |
datetime.timedelta(days=datetime.date.weekday(today) + \ |
7 * weeks_ago) |
7 * weeks_ago) |
date_to = date_from + datetime.timedelta(days=7) |
date_to = date_from + datetime.timedelta(days=7) |
return (date_from, date_to) |
return (date_from, date_to) |
|
|
@classmethod |
@classmethod |
def get_date_week_started(cls, date_): |
def get_date_week_started(cls, date_): |
assert isinstance(date_, datetime.date) |
assert isinstance(date_, datetime.date) |
if isinstance(date_, datetime.datetime): |
if isinstance(date_, datetime.datetime): |
date_ = datetime2date(date_) |
date_ = datetime2date(date_) |
return date_ - datetime.timedelta(days=datetime.date.weekday(date_)) |
return date_ - datetime.timedelta(days=datetime.date.weekday(date_)) |
|
|
@classmethod |
@classmethod |
def get_package_revisions(cls): |
def get_package_revisions(cls): |
''' |
''' |
@return: Returns list of revisions and date of them, in |
@return: Returns list of revisions and date of them, in |
format: [(id, date), ...] |
format: [(id, date), ...] |
''' |
''' |
package_revision = table('package_revision') |
package_revision = table('package_revision') |
revision = table('revision') |
revision = table('revision') |
s = select([package_revision.c.id, revision.c.timestamp], from_obj=[package_revision.join(revision)]).order_by(revision.c.timestamp) |
s = select([package_revision.c.id, revision.c.timestamp], from_obj=[package_revision.join(revision)]).order_by(revision.c.timestamp) |
res = model.Session.execute(s).fetchall() # [(id, datetime), ...] |
res = model.Session.execute(s).fetchall() # [(id, datetime), ...] |
return res |
return res |
|
|
@classmethod |
@classmethod |
def get_new_packages(cls): |
def get_new_packages(cls): |
''' |
''' |
@return: Returns list of new pkgs and date when they were created, in |
@return: Returns list of new pkgs and date when they were created, in |
format: [(id, date_ordinal), ...] |
format: [(id, date_ordinal), ...] |
''' |
''' |
def new_packages(): |
def new_packages(): |
# Can't filter by time in select because 'min' function has to |
# Can't filter by time in select because 'min' function has to |
# be 'for all time' else you get first revision in the time period. |
# be 'for all time' else you get first revision in the time period. |
package_revision = table('package_revision') |
package_revision = table('package_revision') |
revision = table('revision') |
revision = table('revision') |
package = table('package') |
package = table('package') |
s = select([package_revision.c.id, func.min(revision.c.timestamp)], from_obj=[package_revision.join(revision)]).\ |
s = select([package_revision.c.id, func.min(revision.c.timestamp)], from_obj=[package_revision.join(revision).join(package)]).\ |
where(package.c.private == 'f').\ |
where(package.c.private == 'f').\ |
group_by(package_revision.c.id).order_by(func.min(revision.c.timestamp)) |
group_by(package_revision.c.id).order_by(func.min(revision.c.timestamp)) |
res = model.Session.execute(s).fetchall() # [(id, datetime), ...] |
res = model.Session.execute(s).fetchall() # [(id, datetime), ...] |
res_pickleable = [] |
res_pickleable = [] |
for pkg_id, created_datetime in res: |
for pkg_id, created_datetime in res: |
res_pickleable.append((pkg_id, created_datetime.toordinal())) |
res_pickleable.append((pkg_id, created_datetime.toordinal())) |
return res_pickleable |
return res_pickleable |
if cache_enabled: |
if cache_enabled: |
week_commences = cls.get_date_week_started(datetime.date.today()) |
week_commences = cls.get_date_week_started(datetime.date.today()) |
key = 'all_new_packages_%s' + week_commences.strftime(DATE_FORMAT) |
key = 'all_new_packages_%s' + week_commences.strftime(DATE_FORMAT) |
new_packages = our_cache.get_value(key=key, |
new_packages = our_cache.get_value(key=key, |
createfunc=new_packages) |
createfunc=new_packages) |
else: |
else: |
new_packages = new_packages() |
new_packages = new_packages() |
return new_packages |
return new_packages |