Fix total datasets
[ckanext-dga-stats.git] / ckanext / dga_stats / stats.py
blob:a/ckanext/dga_stats/stats.py -> blob:b/ckanext/dga_stats/stats.py
--- a/ckanext/dga_stats/stats.py
+++ b/ckanext/dga_stats/stats.py
@@ -6,6 +6,8 @@
 
 import ckan.plugins as p
 import ckan.model as model
+
+import re
 
 cache_enabled = p.toolkit.asbool(config.get('ckanext.stats.cache_enabled', 'True'))
 
@@ -56,7 +58,9 @@
          member = table('member')
          s = select([member.c.group_id, func.count(member.c.table_id)]).\
             group_by(member.c.group_id).\
-            where(and_(member.c.group_id!=None, member.c.table_name=='package')).\
+            where(member.c.group_id!=None).\
+	    where(member.c.table_name=='package').\
+	    where(member.c.capacity=='public').\
             order_by(func.count(member.c.table_id).desc())
             #limit(limit)
 
@@ -66,43 +70,54 @@
 
     @classmethod
     def by_org(cls, limit=10):
-        group = table('group')
-        package = table('package')
-        s = select([group.c.id, package.c.private, func.count('*')], group_by=[group.c.id, package.c.private]).\
-	    where(group.c.is_organization == True).\
-            group_by(group.c.id, package.c.private).\
-            order_by(group.c.name)
-            #limit(limit)
-
-        res_ids = model.Session.execute(s).fetchall()
-        res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), private, val) for group_id, private, val in res_ids]
+        connection = model.Session.connection()
+        res = connection.execute("select package.owner_org, package.private, count(*) from package \
+		inner join (select distinct package_id from resource_group inner join resource on resource.resource_group_id = resource_group.id) as r on package.id = r.package_id \
+		inner join \"group\" on package.owner_org = \"group\".id \
+		where package.state='active'\
+		group by package.owner_org,\"group\".name, package.private \
+		order by \"group\".name, package.private;").fetchall();
+        res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), private, val) for group_id, private, val in res]
         return res_groups
 
     @classmethod
-    def top_tags(cls, limit=10, returned_tag_info='object'): # by package
-        assert returned_tag_info in ('name', 'id', 'object')
-        tag = table('tag')
-        package_tag = table('package_tag')
-        package = table('package')
-        #TODO filter out tags with state=deleted
-        if returned_tag_info == 'name':
-            from_obj = [package_tag.join(tag)]
-            tag_column = tag.c.name
-        else:
-            from_obj = None
-            tag_column = package_tag.c.tag_id
-        s = select([tag_column, func.count(package_tag.c.package_id)],
-                    from_obj=from_obj)
-        s = s.group_by(tag_column).\
-	      where(package.c.private == 'f').\
-            order_by(func.count(package_tag.c.package_id).desc()).\
-            limit(limit)
-        res_col = model.Session.execute(s).fetchall()
-        if returned_tag_info in ('id', 'name'):
-            return res_col
-        elif returned_tag_info == 'object':
-            res_tags = [(model.Session.query(model.Tag).get(unicode(tag_id)), val) for tag_id, val in res_col]
-            return res_tags
+    def res_by_org(cls, limit=10):
+        connection = model.Session.connection()
+        reses = connection.execute("select owner_org,format,count(*) from \
+		resource inner join resource_group on resource.resource_group_id = resource_group.id \
+		inner join package on resource_group.package_id = package.id group by owner_org,format order by count desc;").fetchall();
+	group_ids = []
+	group_tab = {}
+	group_spatial = {}
+	group_other = {}
+        for group_id,format,count in reses:
+		if group_id not in group_ids:
+			group_ids.append(group_id) 
+			group_tab[group_id] = 0
+			group_spatial[group_id] = 0 
+			group_other[group_id] = 0
+		if re.search('xls|csv|ms-excel|spreadsheetml.sheet|zip|netcdf',format, re.IGNORECASE):
+			group_tab[group_id] = group_tab[group_id] + count
+		elif re.search('wms|wfs|wcs|shp|kml|kmz',format, re.IGNORECASE):
+			group_spatial[group_id] = group_spatial[group_id] + count
+		else:
+			group_other[group_id] = group_other[group_id] + count
+	return [(model.Session.query(model.Group).get(unicode(group_id)), group_tab[group_id],group_spatial[group_id],group_other[group_id], group_tab[group_id]+group_spatial[group_id]+group_other[group_id]) for group_id in group_ids]
+
+    @classmethod
+    def top_active_orgs(cls, limit=10):
+        connection = model.Session.connection()
+        res = connection.execute("select package.owner_org, count(*) from package \
+		inner join (select distinct package_id from resource_group inner join resource on resource.resource_group_id = resource_group.id) as r on package.id = r.package_id \
+		inner join \"group\" on package.owner_org = \"group\".id \
+                inner join (select distinct object_id from activity where activity.timestamp > (now() - interval '60 day')) \
+                latestactivities on latestactivities.object_id = package.id \
+                where package.state='active' \
+                and package.private = 'f' \
+                group by package.owner_org \
+                order by count(*) desc;").fetchall();
+        res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), val) for group_id, val in res]
+        return res_groups
 
     @classmethod
     def top_package_owners(cls, limit=10):
@@ -124,10 +139,11 @@
     def summary_stats(cls):
        connection = model.Session.connection()
 
-#				select 'Total Archived Datasets', count(*) from package where (state='active' or state='draft' or state='draft-complete') and private = 't' union \
        res = connection.execute("SELECT 'Total Organisations', count(*) from \"group\" where type = 'organization' and state = 'active' union \
-				select 'Total Datasets', count(*) from package where (state='active' or state='draft' or state='draft-complete') and private = 'f' union \
-				select 'Total Data Files/Resources', count(*) from resource where state='active'").fetchall();
+				select 'Total Datasets', count(*) from package inner join (select distinct package_id from resource_group inner join resource on resource.resource_group_id = resource_group.id) as r on package.id = r.package_id where (package.state='active' or package.state='draft' or package.state='draft-complete') and private = 'f' union \
+				select 'Total Archived Datasets', count(*) from package where (state='active' or state='draft' or state='draft-complete') and private = 't' union \
+				select 'Total Data Files/Resources', count(*) from resource where state='active' union \
+				select 'Total Machine Readable/Data API Resources', count(*) from resource where state='active' and (webstore_url = 'active' or format='wms')").fetchall();
        return res