From: root Date: Tue, 22 Apr 2014 07:51:00 +0000 Subject: Merge branch 'master' of git+ssh://maxious.lambdacomplex.org/git/ckanext-dga-stats X-Git-Url: http://maxious.lambdacomplex.org/git/?p=ckanext-dga-stats.git&a=commitdiff&h=190db120ec3138a683f2782fdd6e535632bd8c92 --- Merge branch 'master' of git+ssh://maxious.lambdacomplex.org/git/ckanext-dga-stats Conflicts: ckanext/dga_stats/stats.py --- --- a/ckanext/dga_stats/controller.py +++ b/ckanext/dga_stats/controller.py @@ -12,12 +12,14 @@ c.top_rated_packages = stats.top_rated_packages() c.most_edited_packages = stats.most_edited_packages() c.largest_groups = stats.largest_groups() - c.top_tags = stats.top_tags() c.top_package_owners = stats.top_package_owners() c.summary_stats = stats.summary_stats() c.activity_counts = stats.activity_counts() c.by_org = stats.by_org() + c.res_by_org = stats.res_by_org() + c.top_active_orgs = stats.top_active_orgs() c.user_access_list = stats.user_access_list() + c.recent_datasets = stats.recent_datasets() c.new_packages_by_week = rev_stats.get_by_week('new_packages') c.deleted_packages_by_week = rev_stats.get_by_week('deleted_packages') c.num_packages_by_week = rev_stats.get_num_packages_by_week() --- a/ckanext/dga_stats/stats.py +++ b/ckanext/dga_stats/stats.py @@ -2,9 +2,12 @@ from pylons import config from sqlalchemy import Table, select, func, and_ +from sqlalchemy.sql.expression import text import ckan.plugins as p import ckan.model as model + +import re cache_enabled = p.toolkit.asbool(config.get('ckanext.stats.cache_enabled', 'True')) @@ -55,9 +58,11 @@ member = table('member') s = select([member.c.group_id, func.count(member.c.table_id)]).\ group_by(member.c.group_id).\ - where(and_(member.c.group_id!=None, member.c.table_name=='package')).\ - order_by(func.count(member.c.table_id).desc()).\ - limit(limit) + where(member.c.group_id!=None).\ + where(member.c.table_name=='package').\ + where(member.c.capacity=='public').\ + order_by(func.count(member.c.table_id).desc()) + #limit(limit) res_ids = model.Session.execute(s).fetchall() res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), val) for group_id, val in res_ids] @@ -65,52 +70,63 @@ @classmethod def by_org(cls, limit=10): - group = table('group') - package = table('package') - s = select([group.c.id, package.c.private, func.count(package.c.private)]).\ - group_by(group.c.id, package.c.private).\ - order_by(group.c.id).\ - limit(limit) - - res_ids = model.Session.execute(s).fetchall() - res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), private, val) for group_id, private, val in res_ids] + connection = model.Session.connection() + res = connection.execute("select package.owner_org, package.private, count(*) from package \ + inner join (select distinct package_id from resource_group inner join resource on resource.resource_group_id = resource_group.id) as r on package.id = r.package_id \ + inner join \"group\" on package.owner_org = \"group\".id \ + where package.state='active'\ + group by package.owner_org,\"group\".name, package.private \ + order by \"group\".name, package.private;").fetchall(); + res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), private, val) for group_id, private, val in res] return res_groups @classmethod - def top_tags(cls, limit=10, returned_tag_info='object'): # by package - assert returned_tag_info in ('name', 'id', 'object') - tag = table('tag') - package_tag = table('package_tag') - package = table('package') - #TODO filter out tags with state=deleted - if returned_tag_info == 'name': - from_obj = [package_tag.join(tag)] - tag_column = tag.c.name - else: - from_obj = None - tag_column = package_tag.c.tag_id - s = select([tag_column, func.count(package_tag.c.package_id)], - from_obj=from_obj) - s = s.group_by(tag_column).\ - where(package.c.private == 'f').\ - order_by(func.count(package_tag.c.package_id).desc()).\ - limit(limit) - res_col = model.Session.execute(s).fetchall() - if returned_tag_info in ('id', 'name'): - return res_col - elif returned_tag_info == 'object': - res_tags = [(model.Session.query(model.Tag).get(unicode(tag_id)), val) for tag_id, val in res_col] - return res_tags + def res_by_org(cls, limit=10): + connection = model.Session.connection() + reses = connection.execute("select owner_org,format,count(*) from \ + resource inner join resource_group on resource.resource_group_id = resource_group.id \ + inner join package on resource_group.package_id = package.id group by owner_org,format order by count desc;").fetchall(); + group_ids = [] + group_tab = {} + group_spatial = {} + group_other = {} + for group_id,format,count in reses: + if group_id not in group_ids: + group_ids.append(group_id) + group_tab[group_id] = 0 + group_spatial[group_id] = 0 + group_other[group_id] = 0 + if re.search('xls|csv|ms-excel|spreadsheetml.sheet|zip|netcdf',format, re.IGNORECASE): + group_tab[group_id] = group_tab[group_id] + count + elif re.search('wms|wfs|wcs|shp|kml|kmz',format, re.IGNORECASE): + group_spatial[group_id] = group_spatial[group_id] + count + else: + group_other[group_id] = group_other[group_id] + count + return [(model.Session.query(model.Group).get(unicode(group_id)), group_tab[group_id],group_spatial[group_id],group_other[group_id], group_tab[group_id]+group_spatial[group_id]+group_other[group_id]) for group_id in group_ids] + + @classmethod + def top_active_orgs(cls, limit=10): + connection = model.Session.connection() + res = connection.execute("select package.owner_org, count(*) from package \ + inner join (select distinct package_id from resource_group inner join resource on resource.resource_group_id = resource_group.id) as r on package.id = r.package_id \ + inner join \"group\" on package.owner_org = \"group\".id \ + inner join (select distinct object_id from activity where activity.timestamp > (now() - interval '60 day')) \ + latestactivities on latestactivities.object_id = package.id \ + where package.state='active' \ + and package.private = 'f' \ + group by package.owner_org \ + order by count(*) desc;").fetchall(); + res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), val) for group_id, val in res] + return res_groups @classmethod def top_package_owners(cls, limit=10): package_role = table('package_role') user_object_role = table('user_object_role') package = table('package') -# join(package, package_role.c.package_id == package.c.id).\ -# where(package.c.private == 'f').\ - s = select([user_object_role.c.user_id, func.count(user_object_role.c.role)], from_obj=[user_object_role.join(package_role)]).\ + s = select([user_object_role.c.user_id, func.count(user_object_role.c.role)], from_obj=[user_object_role.join(package_role).join(package, package_role.c.package_id == package.c.id)]).\ where(user_object_role.c.role==model.authz.Role.ADMIN).\ + where(package.c.private == 'f').\ where(user_object_role.c.user_id!=None).\ group_by(user_object_role.c.user_id).\ order_by(func.count(user_object_role.c.role).desc()).\ @@ -124,8 +140,10 @@ connection = model.Session.connection() res = connection.execute("SELECT 'Total Organisations', count(*) from \"group\" where type = 'organization' and state = 'active' union \ - select 'Total Datasets', count(*) from package where state='active' or state='draft' or state='draft-complete' union \ - select 'Total Data Files/Resources', count(*) from resource where state='active'").fetchall(); + select 'Total Datasets', count(*) from package inner join (select distinct package_id from resource_group inner join resource on resource.resource_group_id = resource_group.id) as r on package.id = r.package_id where (package.state='active' or package.state='draft' or package.state='draft-complete') and private = 'f' union \ + select 'Total Archived Datasets', count(*) from package where (state='active' or state='draft' or state='draft-complete') and private = 't' union \ + select 'Total Data Files/Resources', count(*) from resource where state='active' union \ + select 'Total Machine Readable/Data API Resources', count(*) from resource where state='active' and (webstore_url = 'active' or format='wms')").fetchall(); return res @@ -140,6 +158,16 @@ connection = model.Session.connection() res = connection.execute("select name,sysadmin,role from user_object_role right outer join \"user\" on user_object_role.user_id = \"user\".id where name not in ('logged_in','visitor') group by name,sysadmin,role order by sysadmin desc, role asc;").fetchall(); return res + + @classmethod + def recent_datasets(cls): + activity = table('activity') + package = table('package') + s = select([func.max(activity.c.timestamp),package.c.id, activity.c.activity_type], from_obj=[activity.join(package,activity.c.object_id == package.c.id)]).where(package.c.private == 'f').\ + where(activity.c.timestamp > func.now() - text("interval '60 day'")).group_by(package.c.id,activity.c.activity_type).order_by(func.max(activity.c.timestamp)) + result = model.Session.execute(s).fetchall() + return [(datetime2date(timestamp), model.Session.query(model.Package).get(unicode(package_id)), activity_type) for timestamp,package_id,activity_type in result] + class RevisionStats(object): @@ -211,7 +239,7 @@ package_revision = table('package_revision') revision = table('revision') package = table('package') - s = select([package_revision.c.id, func.min(revision.c.timestamp)], from_obj=[package_revision.join(revision)]).\ + s = select([package_revision.c.id, func.min(revision.c.timestamp)], from_obj=[package_revision.join(revision).join(package)]).\ where(package.c.private == 'f').\ group_by(package_revision.c.id).order_by(func.min(revision.c.timestamp)) res = model.Session.execute(s).fetchall() # [(id, datetime), ...] @@ -239,8 +267,10 @@ # be 'for all time' else you get first revision in the time period. package_revision = table('package_revision') revision = table('revision') - s = select([package_revision.c.id, func.min(revision.c.timestamp)], from_obj=[package_revision.join(revision)]).\ + package = table('package') + s = select([package_revision.c.id, func.min(revision.c.timestamp)], from_obj=[package_revision.join(revision).join(package)]).\ where(package_revision.c.state==model.State.DELETED).\ + where(package.c.private == 'f').\ group_by(package_revision.c.id).\ order_by(func.min(revision.c.timestamp)) res = model.Session.execute(s).fetchall() # [(id, datetime), ...] --- a/ckanext/dga_stats/templates/ckanext/stats/index.html +++ b/ckanext/dga_stats/templates/ckanext/stats/index.html @@ -6,30 +6,8 @@ {% block primary_content %}
-
-

{{ _('Summary') }}

- {% if c.summary_stats %} - - - - - - - - - {% for measure,value in c.summary_stats %} - - - - - {% endfor %} - -
{{ _('Measure') }}{{ _('Value') }}
{{measure}}{{ value }}
- {% else %} -

{{ _('No groups') }}

- {% endif %} -
-
+{% if h.check_access('sysadmin') %} +

{{ _('Site Activity Log') }}

{% if c.activity_counts %} @@ -54,8 +32,32 @@

{{ _('No groups') }}

{% endif %} -{% if h.check_access('sysadmin') %} -
+
+

{{ _('Recent Datasets') }}

+ {% if c.recent_datasets %} +
+ + + + + + + + + {% for date,package,newmodified in c.recent_datasets %} + + + + + + {% endfor %} + +
{{ _('Date') }}{{ _('Dataset') }}{{ _('New/Modified') }}
{{ date }}{{ h.link_to(package.title or package.name, h.url_for(controller='package', action='read', id=package.name)) }}{{ newmodified }}
+ {% else %} +

{{ _('No groups') }}

+ {% endif %} +
+

{{ _('User Access List') }}

{% if c.user_access_list %} @@ -129,31 +131,6 @@
-
-

{{ _('Top Rated Datasets') }}

- {% if c.top_rated_packages %} - - - - - - - - - - {% for package, rating, num_ratings in c.top_rated_packages %} - - - - - - {% endfor %} - -
Dataset{{ _('Average rating') }}{{ _('Number of ratings') }}
{{ h.link_to(package.title or package.name, h.url_for(controller='package', action='read', id=package.name)) }}{{ rating }}{{ num_ratings }}
- {% else %} -

{{ _('No ratings') }}

- {% endif %} -

{{ _('Most Edited Datasets') }}

@@ -202,59 +179,20 @@

{{ _('No groups') }}

{% endif %}
- -
-

{{ _('Top Tags') }}

- - - - - - - - - {% for tag, num_packages in c.top_tags %} - - - - - {% endfor %} - -
{{ _('Tag Name') }}{{ _('Number of Datasets') }}
{{ h.link_to(tag.name, h.url_for(controller='package', action='search', tags=tag.name)) }}{{ num_packages }}
-
- -
-

{{ _('Users Owning Most Datasets') }}

- - - - - - - - - {% for user, num_packages in c.top_package_owners %} - - - - - {% endfor %} - -
{{ _('User') }}{{ _('Number of Datasets') }}
{{ h.linked_user(user) }}{{ num_packages }}
-

{{ _('Datasets by Organization') }}

{% if c.by_org %} - + {% for group,private, num_packages in c.by_org %} + {% if private == False or h.check_access('sysadmin') %} {% if private == True %} @@ -264,14 +202,91 @@ {% endif %} - {% endfor %} - -
{{ _('Group') }}{{ _('Organisation') }} {{ _('Public/Archived') }} {{ _('Number of datasets') }}
{{ h.link_to(group.title or group.name, h.url_for(controller='organization', action='read', id=group.name)) }}{{ num_packages }}
- {% else %} -

{{ _('No groups') }}

- {% endif %} -
- + {% endif %} + {% endfor %} + + + {% else %} +

{{ _('No groups') }}

+ {% endif %} +
+
+

{{ _('Resources by Organization') }}

+ {% if c.res_by_org %} + + + + + + + + + + + + {% for group,t,s,o,tot in c.res_by_org %} + + + + + + + + {% endfor %} + +
{{ _('Organisation') }}{{ _('Tabular') }}{{ _('Spatial') }}{{ _('Other') }}{{ _('Total') }}
{{ h.link_to(group.title or group.name, h.url_for(controller='organization', action='read', id=group.name)) }}{{ t }}{{ s }}{{ o }}{{ tot }}
+ {% else %} +

{{ _('No groups') }}

+ {% endif %} +
+
+

{{ _('Most Active Organisations') }}

+ {% if c.top_active_orgs %} + + + + + + + + + {% for group, num_packages in c.top_active_orgs %} + + + + + {% endfor %} + +
{{ _('Organisation') }}{{ _('Number of datasets updated recently') }}
{{ h.link_to(group.title or group.name, h.url_for(controller='organization', action='read', id=group.name)) }}{{ num_packages }}
+ {% else %} +

{{ _('No groups') }}

+ {% endif %} +
+
+

{{ _('Summary') }}

+ {% if c.summary_stats %} + + + + + + + + + {% for measure,value in c.summary_stats %} + {% if 'Archived' not in measure or h.check_access('sysadmin') %} + + + + + {% endif %} + {% endfor %} + +
{{ _('Measure') }}{{ _('Value') }}
{{measure}}{{ value }}
+ {% else %} +

{{ _('No groups') }}

+ {% endif %} +
{% endblock %} @@ -280,18 +295,18 @@

{{ _('Statistics Menu') }}