Merge branch 'master' of git+ssh://maxious.lambdacomplex.org/git/ckanext-dga-stats
Merge branch 'master' of git+ssh://maxious.lambdacomplex.org/git/ckanext-dga-stats

Conflicts:
ckanext/dga_stats/stats.py

--- a/ckanext/dga_stats/controller.py
+++ b/ckanext/dga_stats/controller.py
@@ -12,11 +12,12 @@
         c.top_rated_packages = stats.top_rated_packages()
         c.most_edited_packages = stats.most_edited_packages()
         c.largest_groups = stats.largest_groups()
-        c.top_tags = stats.top_tags()
         c.top_package_owners = stats.top_package_owners()
         c.summary_stats = stats.summary_stats()
         c.activity_counts = stats.activity_counts()
         c.by_org = stats.by_org()
+        c.res_by_org = stats.res_by_org()
+        c.top_active_orgs = stats.top_active_orgs()
         c.user_access_list = stats.user_access_list()
         c.recent_datasets = stats.recent_datasets()
         c.new_packages_by_week = rev_stats.get_by_week('new_packages')

--- a/ckanext/dga_stats/stats.py
+++ b/ckanext/dga_stats/stats.py
@@ -6,6 +6,8 @@
 
 import ckan.plugins as p
 import ckan.model as model
+
+import re
 
 cache_enabled = p.toolkit.asbool(config.get('ckanext.stats.cache_enabled', 'True'))
 
@@ -56,7 +58,9 @@
          member = table('member')
          s = select([member.c.group_id, func.count(member.c.table_id)]).\
             group_by(member.c.group_id).\
-            where(and_(member.c.group_id!=None, member.c.table_name=='package')).\
+            where(member.c.group_id!=None).\
+	    where(member.c.table_name=='package').\
+	    where(member.c.capacity=='public').\
             order_by(func.count(member.c.table_id).desc())
             #limit(limit)
 
@@ -66,43 +70,54 @@
 
     @classmethod
     def by_org(cls, limit=10):
-        group = table('group')
-        package = table('package')
-        s = select([group.c.id, package.c.private, func.count('*')], group_by=[group.c.id, package.c.private]).\
-	    where(group.c.is_organization == True).\
-            group_by(group.c.id, package.c.private).\
-            order_by(group.c.name)
-            #limit(limit)
-
-        res_ids = model.Session.execute(s).fetchall()
-        res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), private, val) for group_id, private, val in res_ids]
+        connection = model.Session.connection()
+        res = connection.execute("select package.owner_org, package.private, count(*) from package \
+		inner join (select distinct package_id from resource_group inner join resource on resource.resource_group_id = resource_group.id) as r on package.id = r.package_id \
+		inner join \"group\" on package.owner_org = \"group\".id \
+		where package.state='active'\
+		group by package.owner_org,\"group\".name, package.private \
+		order by \"group\".name, package.private;").fetchall();
+        res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), private, val) for group_id, private, val in res]
         return res_groups
 
     @classmethod
-    def top_tags(cls, limit=10, returned_tag_info='object'): # by package
-        assert returned_tag_info in ('name', 'id', 'object')
-        tag = table('tag')
-        package_tag = table('package_tag')
-        package = table('package')
-        #TODO filter out tags with state=deleted
-        if returned_tag_info == 'name':
-            from_obj = [package_tag.join(tag)]
-            tag_column = tag.c.name
-        else:
-            from_obj = None
-            tag_column = package_tag.c.tag_id
-        s = select([tag_column, func.count(package_tag.c.package_id)],
-                    from_obj=from_obj)
-        s = s.group_by(tag_column).\
-	      where(package.c.private == 'f').\
-            order_by(func.count(package_tag.c.package_id).desc()).\
-            limit(limit)
-        res_col = model.Session.execute(s).fetchall()
-        if returned_tag_info in ('id', 'name'):
-            return res_col
-        elif returned_tag_info == 'object':
-            res_tags = [(model.Session.query(model.Tag).get(unicode(tag_id)), val) for tag_id, val in res_col]
-            return res_tags
+    def res_by_org(cls, limit=10):
+        connection = model.Session.connection()
+        reses = connection.execute("select owner_org,format,count(*) from \
+		resource inner join resource_group on resource.resource_group_id = resource_group.id \
+		inner join package on resource_group.package_id = package.id group by owner_org,format order by count desc;").fetchall();
+	group_ids = []
+	group_tab = {}
+	group_spatial = {}
+	group_other = {}
+        for group_id,format,count in reses:
+		if group_id not in group_ids:
+			group_ids.append(group_id) 
+			group_tab[group_id] = 0
+			group_spatial[group_id] = 0 
+			group_other[group_id] = 0
+		if re.search('xls|csv|ms-excel|spreadsheetml.sheet|zip|netcdf',format, re.IGNORECASE):
+			group_tab[group_id] = group_tab[group_id] + count
+		elif re.search('wms|wfs|wcs|shp|kml|kmz',format, re.IGNORECASE):
+			group_spatial[group_id] = group_spatial[group_id] + count
+		else:
+			group_other[group_id] = group_other[group_id] + count
+	return [(model.Session.query(model.Group).get(unicode(group_id)), group_tab[group_id],group_spatial[group_id],group_other[group_id], group_tab[group_id]+group_spatial[group_id]+group_other[group_id]) for group_id in group_ids]
+
+    @classmethod
+    def top_active_orgs(cls, limit=10):
+        connection = model.Session.connection()
+        res = connection.execute("select package.owner_org, count(*) from package \
+		inner join (select distinct package_id from resource_group inner join resource on resource.resource_group_id = resource_group.id) as r on package.id = r.package_id \
+		inner join \"group\" on package.owner_org = \"group\".id \
+                inner join (select distinct object_id from activity where activity.timestamp > (now() - interval '60 day')) \
+                latestactivities on latestactivities.object_id = package.id \
+                where package.state='active' \
+                and package.private = 'f' \
+                group by package.owner_org \
+                order by count(*) desc;").fetchall();
+        res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), val) for group_id, val in res]
+        return res_groups
 
     @classmethod
     def top_package_owners(cls, limit=10):
@@ -124,10 +139,11 @@
     def summary_stats(cls):
        connection = model.Session.connection()
 
-#				select 'Total Archived Datasets', count(*) from package where (state='active' or state='draft' or state='draft-complete') and private = 't' union \
        res = connection.execute("SELECT 'Total Organisations', count(*) from \"group\" where type = 'organization' and state = 'active' union \
-				select 'Total Datasets', count(*) from package where (state='active' or state='draft' or state='draft-complete') and private = 'f' union \
-				select 'Total Data Files/Resources', count(*) from resource where state='active'").fetchall();
+				select 'Total Datasets', count(*) from package inner join (select distinct package_id from resource_group inner join resource on resource.resource_group_id = resource_group.id) as r on package.id = r.package_id where (package.state='active' or package.state='draft' or package.state='draft-complete') and private = 'f' union \
+				select 'Total Archived Datasets', count(*) from package where (state='active' or state='draft' or state='draft-complete') and private = 't' union \
+				select 'Total Data Files/Resources', count(*) from resource where state='active' union \
+				select 'Total Machine Readable/Data API Resources', count(*) from resource where state='active' and (webstore_url = 'active' or format='wms')").fetchall();
        return res
 
 

--- a/ckanext/dga_stats/templates/ckanext/stats/index.html
+++ b/ckanext/dga_stats/templates/ckanext/stats/index.html
@@ -6,31 +6,8 @@
 
 {% block primary_content %}
   <article class="module">
-    <section id="summary" class="module-content tab-content">
-      <h2>{{ _('Summary') }}</h2>
-      {% if c.summary_stats %}
-        <table class="table table-chunky table-bordered table-striped">
-          <thead>
-            <tr>
-              <th>{{ _('Measure') }}</th>
-              <th class="metric">{{ _('Value') }}</th>
-            </tr>
-          </thead>
-          <tbody>
-            {% for measure,value in c.summary_stats %}
-              <tr>
-                <td>{{measure}}</td>
-                <td class="metric">{{ value }}</td>
-              </tr>
-            {% endfor %}
-          </tbody>
-        </table>
-      {% else %}
-        <p class="empty">{{ _('No groups') }}</p>
-      {% endif %}
-    </section>
 {% if h.check_access('sysadmin') %}
-    <section id="activity-counts" class="module-content tab-content">
+    <section id="stats-activity-counts" class="module-content tab-content">
       <h2>{{ _('Site Activity Log') }}</h2>
       {% if c.activity_counts %}
         <table class="table table-chunky table-bordered table-striped">
@@ -55,7 +32,7 @@
         <p class="empty">{{ _('No groups') }}</p>
       {% endif %}
     </section>
-    <section id="recent-datasets" class="module-content tab-content">
+    <section id="stats-recent-datasets" class="module-content tab-content">
       <h2>{{ _('Recent Datasets') }}</h2>
       {% if c.recent_datasets %}
         <table class="table table-chunky table-bordered table-striped">
@@ -80,7 +57,7 @@
         <p class="empty">{{ _('No groups') }}</p>
       {% endif %}
     </section>
-    <section id="user-access-list" class="module-content tab-content">
+    <section id="stats-user-access-list" class="module-content tab-content">
       <h2>{{ _('User Access List') }}</h2>
       {% if c.user_access_list %}
         <table class="table table-chunky table-bordered table-striped">
@@ -154,31 +131,6 @@
       </table>
     </section>
 
-    <section id="stats-top-rated" class="module-content tab-content">
-      <h2>{{ _('Top Rated Datasets') }}</h2>
-      {% if c.top_rated_packages %}
-        <table class="table table-chunky table-bordered table-striped">
-          <thead>
-            <tr>
-              <th>Dataset</th>
-              <th class="metric">{{ _('Average rating') }}</th>
-              <th class="metric">{{ _('Number of ratings') }}</th>
-            </tr>
-          </thead>
-          <tbody>
-            {% for package, rating, num_ratings in c.top_rated_packages %}
-              <tr>
-                <th>{{ h.link_to(package.title or package.name, h.url_for(controller='package', action='read', id=package.name)) }}</th>
-                <td class="metric">{{ rating }}</td>
-                <td class="metric">{{ num_ratings }}</td>
-              </tr>
-            {% endfor %}
-          </tbody>
-        </table>
-      {% else %}
-        <p class="empty">{{ _('No ratings') }}</p>
-      {% endif %}
-    </section>
 
     <section id="stats-most-edited" class="module-content tab-content">
       <h2>{{ _('Most Edited Datasets') }}</h2>
@@ -227,59 +179,20 @@
        <p class="empty">{{ _('No groups') }}</p>
       {% endif %}
     </section>
-<!--
-    <section id="stats-top-tags" class="module-content tab-content">
-      <h2>{{ _('Top Tags') }}</h2>
-      <table class="table table-chunky table-bordered table-striped">
-        <thead>
-          <tr>
-            <th>{{ _('Tag Name') }}</th>
-            <th class="metric">{{ _('Number of Datasets') }}</th>
-          </tr>
-        </thead>
-        <tbody>
-          {% for tag, num_packages in c.top_tags %}
-            <tr>
-              <td>{{ h.link_to(tag.name, h.url_for(controller='package', action='search', tags=tag.name)) }}</td>
-              <td class="metric">{{ num_packages }}</td>
-            </tr>
-          {% endfor %}
-        </tbody>
-      </table>
-    </section>
--->
-    <section id="stats-most-owned" class="module-content tab-content">
-      <h2>{{ _('Users Owning Most Datasets') }}</h2>
-      <table class="table table-chunky table-bordered table-striped">
-        <thead>
-          <tr>
-            <th>{{ _('User') }}</th>
-            <th class="metric">{{ _('Number of Datasets') }}</th>
-          </tr>
-        </thead>
-        <tbody>
-          {% for user, num_packages in c.top_package_owners %}
-            <tr>
-              <td class="media">{{ h.linked_user(user) }}</td>
-              <td class="metric">{{ num_packages }}</td>
-            </tr>
-          {% endfor %}
-        </tbody>
-      </table>
-    </section>
-<!--    <section id="stats-by-org" class="module-content tab-content">
+    <section id="stats-by-org" class="module-content tab-content">
       <h2>{{ _('Datasets by Organization') }}</h2>
       {% if c.by_org %}
         <table class="table table-chunky table-bordered table-striped">
           <thead>
             <tr>
-              <th>{{ _('Group') }}</th>
+              <th>{{ _('Organisation') }}</th>
               <th>{{ _('Public/Archived') }}</th>
               <th class="metric">{{ _('Number of datasets') }}</th>
             </tr>
           </thead>
           <tbody>
             {% for group,private, num_packages in c.by_org %}
+		{% if private == False or h.check_access('sysadmin') %}
               <tr>
                 <td>{{ h.link_to(group.title or group.name, h.url_for(controller='organization', action='read', id=group.name)) }}</td>
 		{% if private == True %}
@@ -289,14 +202,91 @@
 		{% endif %}
                 <td class="metric">{{ num_packages }}</td>
               </tr>
-            {% endfor %}
-          </tbody>
-        </table>
-      {% else %}
-        <p class="empty">{{ _('No groups') }}</p>
-      {% endif %}
-    </section>
--->
+		{% endif %}
+            {% endfor %}
+          </tbody>
+        </table>
+      {% else %}
+        <p class="empty">{{ _('No groups') }}</p>
+      {% endif %}
+    </section>
+    <section id="stats-res-by-org" class="module-content tab-content">
+      <h2>{{ _('Resources by Organization') }}</h2>
+      {% if c.res_by_org %}
+        <table class="table table-chunky table-bordered table-striped">
+          <thead>
+            <tr>
+              <th>{{ _('Organisation') }}</th>
+              <th>{{ _('Tabular') }}</th>
+              <th>{{ _('Spatial') }}</th>
+              <th>{{ _('Other') }}</th>
+              <th class="metric">{{ _('Total') }}</th>
+            </tr>
+          </thead>
+          <tbody>
+            {% for group,t,s,o,tot in c.res_by_org %}
+              <tr>
+                <td>{{ h.link_to(group.title or group.name, h.url_for(controller='organization', action='read', id=group.name)) }}</td>
+                <td>{{ t }}</td>
+                <td>{{ s }}</td>
+                <td>{{ o }}</td>
+                <td class="metric">{{ tot }}</td>
+              </tr>
+            {% endfor %}
+          </tbody>
+        </table>
+      {% else %}
+        <p class="empty">{{ _('No groups') }}</p>
+      {% endif %}
+    </section>
+    <section id="stats-activity-org" class="module-content tab-content">
+      <h2>{{ _('Most Active Organisations') }}</h2>
+      {% if c.top_active_orgs %}
+        <table class="table table-chunky table-bordered table-striped">
+          <thead>
+            <tr>
+              <th>{{ _('Organisation') }}</th>
+              <th class="metric">{{ _('Number of datasets updated recently') }}</th>
+            </tr>
+          </thead>
+          <tbody>
+            {% for group, num_packages in c.top_active_orgs %}
+              <tr>
+                <td>{{ h.link_to(group.title or group.name, h.url_for(controller='organization', action='read', id=group.name)) }}</td>
+                <td class="metric">{{ num_packages }}</td>
+              </tr>
+            {% endfor %}
+          </tbody>
+        </table>
+      {% else %}
+        <p class="empty">{{ _('No groups') }}</p>
+      {% endif %}
+    </section>
+    <section id="stats-summary" class="module-content tab-content">
+      <h2>{{ _('Summary') }}</h2>
+      {% if c.summary_stats %}
+        <table class="table table-chunky table-bordered table-striped">
+          <thead>
+            <tr>
+              <th>{{ _('Measure') }}</th>
+              <th class="metric">{{ _('Value') }}</th>
+            </tr>
+          </thead>
+          <tbody>
+            {% for measure,value in c.summary_stats %}
+		{%  if 'Archived' not in measure or h.check_access('sysadmin') %}
+              <tr>
+                <td>{{measure}}</td>
+                <td class="metric">{{ value }}</td>
+              </tr>
+		{% endif %}
+            {% endfor %}
+          </tbody>
+        </table>
+      {% else %}
+        <p class="empty">{{ _('No groups') }}</p>
+      {% endif %}
+    </section>
   </article>
 {% endblock %}
 
@@ -305,19 +295,18 @@
     <h2 class="module-heading"><i class="icon-bar-chart icon-medium"></i> {{ _('Statistics Menu') }}</h2>
     <nav data-module="stats-nav">
       <ul class="unstyled nav nav-simple">
-        <li class="nav-item active"><a href="#summary" data-toggle="tab">{{ _('Summary') }}</a></li>
 {% if h.check_access('sysadmin') %}
-        <li class="nav-item"><a href="#activity-counts" data-toggle="tab">{{ _('Site Activity Log') }}</a></li>
-        <li class="nav-item"><a href="#recent-datasets" data-toggle="tab">{{ _('Recent Datasets') }}</a></li>
-        <li class="nav-item"><a href="#user-access-list" data-toggle="tab">{{ _('User Access List') }}</a></li>
+        <li class="nav-item"><a href="#stats-recent-datasets" data-toggle="tab">{{ _('Recent Datasets') }}</a></li>
+        <li class="nav-item"><a href="#stats-user-access-list" data-toggle="tab">{{ _('User Access List') }}</a></li>
 {% endif %}
         <li class="nav-item"><a href="#stats-total-datasets" data-toggle="tab">{{ _('Total Number of Datasets') }}</a></li>
         <li class="nav-item"><a href="#stats-dataset-revisions" data-toggle="tab">{{ _('Dataset Revisions per Week') }}</a></li>
-<!--        <li class="nav-item"><a href="#stats-top-rated" data-toggle="tab">{{ _('Top Rated Datasets') }}</a></li> -->
         <li class="nav-item"><a href="#stats-most-edited" data-toggle="tab">{{ _('Most Edited Datasets') }}</a></li>
-<!--        <li class="nav-item"><a href="#stats-top-tags" data-toggle="tab">{{ _('Top Tags') }}</a></li> -->
-        <li class="nav-item"><a href="#stats-most-owned" data-toggle="tab">{{ _('Users Owning Most Datasets') }}</a></li>
-<!--        <li class="nav-item"><a href="#stats-by-org" data-toggle="tab">{{ _('Datasets by Organization') }}</a></li> -->
+        <li class="nav-item"><a href="#stats-by-org" data-toggle="tab">{{ _('Datasets by Organization') }}</a></li> 
+        <li class="nav-item"><a href="#stats-res-by-org" data-toggle="tab">{{ _('Resources by Organization') }}</a></li> 
+        <li class="nav-item"><a href="#stats-activity-org" data-toggle="tab">{{ _('Most Active Organisations') }}</a></li>
+        <li class="nav-item active"><a href="//data.gov.au/site-usage" data-toggle="tab">Site Analytics</a></li>
+        <li class="nav-item active"><a href="#stats-summary" data-toggle="tab">{{ _('Summary') }}</a></li> 
       </ul>
     </nav>
   </section>