fix copy pasta error master
fix copy pasta error

file:a/.gitignore -> file:b/.gitignore
*.pyc *.py[cod]
   
  # C extensions
  *.so
   
  # Packages
  *.egg
  *.egg-info
  dist
  build
  eggs
  parts
  bin
  var
  sdist
  develop-eggs
  .installed.cfg
  lib
  lib64
   
  # Installer logs
  pip-log.txt
   
  # Unit test / coverage reports
  .coverage
  .tox
  nosetests.xml
   
  # Translations
  *.mo
   
  # Mr Developer
  .mr.developer.cfg
  .project
  .pydevproject
   
file:a/ckanext/__init__.pyc (deleted)
 Binary files a/ckanext/__init__.pyc and /dev/null differ
 Binary files a/ckanext/dga_stats/__init__.pyc and /dev/null differ
import ckan.plugins as p import ckan.plugins as p
from ckan.lib.base import BaseController, config from ckan.lib.base import BaseController, config
import stats as stats_lib import stats as stats_lib
import ckan.lib.helpers as h import ckan.lib.helpers as h
   
class StatsController(BaseController): class StatsController(BaseController):
   
def index(self): def index(self):
c = p.toolkit.c c = p.toolkit.c
stats = stats_lib.Stats() stats = stats_lib.Stats()
rev_stats = stats_lib.RevisionStats() rev_stats = stats_lib.RevisionStats()
c.top_rated_packages = stats.top_rated_packages() c.top_rated_packages = stats.top_rated_packages()
c.most_edited_packages = stats.most_edited_packages() c.most_edited_packages = stats.most_edited_packages()
c.largest_groups = stats.largest_groups() c.largest_groups = stats.largest_groups()
c.top_tags = stats.top_tags()  
c.top_package_owners = stats.top_package_owners() c.top_package_owners = stats.top_package_owners()
c.summary_stats = stats.summary_stats() c.summary_stats = stats.summary_stats()
c.activity_counts = stats.activity_counts() c.activity_counts = stats.activity_counts()
c.by_org = stats.by_org() c.by_org = stats.by_org()
  c.res_by_org = stats.res_by_org()
  c.top_active_orgs = stats.top_active_orgs()
  c.user_access_list = stats.user_access_list()
  c.recent_datasets = stats.recent_datasets()
c.new_packages_by_week = rev_stats.get_by_week('new_packages') c.new_packages_by_week = rev_stats.get_by_week('new_packages')
c.deleted_packages_by_week = rev_stats.get_by_week('deleted_packages') c.deleted_packages_by_week = rev_stats.get_by_week('deleted_packages')
c.num_packages_by_week = rev_stats.get_num_packages_by_week() c.num_packages_by_week = rev_stats.get_num_packages_by_week()
c.package_revisions_by_week = rev_stats.get_by_week('package_revisions') c.package_revisions_by_week = rev_stats.get_by_week('package_revisions')
   
# Used in the legacy CKAN templates. # Used in the legacy CKAN templates.
c.packages_by_week = [] c.packages_by_week = []
   
# Used in new CKAN templates gives more control to the templates for formatting. # Used in new CKAN templates gives more control to the templates for formatting.
c.raw_packages_by_week = [] c.raw_packages_by_week = []
for week_date, num_packages, cumulative_num_packages in c.num_packages_by_week: for week_date, num_packages, cumulative_num_packages in c.num_packages_by_week:
c.packages_by_week.append('[new Date(%s), %s]' % (week_date.replace('-', ','), cumulative_num_packages)) c.packages_by_week.append('[new Date(%s), %s]' % (week_date.replace('-', ','), cumulative_num_packages))
c.raw_packages_by_week.append({'date': h.date_str_to_datetime(week_date), 'total_packages': cumulative_num_packages}) c.raw_packages_by_week.append({'date': h.date_str_to_datetime(week_date), 'total_packages': cumulative_num_packages})
   
c.all_package_revisions = [] c.all_package_revisions = []
c.raw_all_package_revisions = [] c.raw_all_package_revisions = []
for week_date, revs, num_revisions, cumulative_num_revisions in c.package_revisions_by_week: for week_date, revs, num_revisions, cumulative_num_revisions in c.package_revisions_by_week:
c.all_package_revisions.append('[new Date(%s), %s]' % (week_date.replace('-', ','), num_revisions)) c.all_package_revisions.append('[new Date(%s), %s]' % (week_date.replace('-', ','), num_revisions))
c.raw_all_package_revisions.append({'date': h.date_str_to_datetime(week_date), 'total_revisions': num_revisions}) c.raw_all_package_revisions.append({'date': h.date_str_to_datetime(week_date), 'total_revisions': num_revisions})
   
c.new_datasets = [] c.new_datasets = []
c.raw_new_datasets = [] c.raw_new_datasets = []
for week_date, pkgs, num_packages, cumulative_num_packages in c.new_packages_by_week: for week_date, pkgs, num_packages, cumulative_num_packages in c.new_packages_by_week:
c.new_datasets.append('[new Date(%s), %s]' % (week_date.replace('-', ','), num_packages)) c.new_datasets.append('[new Date(%s), %s]' % (week_date.replace('-', ','), num_packages))
c.raw_new_datasets.append({'date': h.date_str_to_datetime(week_date), 'new_packages': num_packages}) c.raw_new_datasets.append({'date': h.date_str_to_datetime(week_date), 'new_packages': num_packages})
   
return p.toolkit.render('ckanext/stats/index.html') return p.toolkit.render('ckanext/stats/index.html')
   
def leaderboard(self, id=None): def leaderboard(self, id=None):
c = p.toolkit.c c = p.toolkit.c
c.solr_core_url = config.get('ckanext.stats.solr_core_url', c.solr_core_url = config.get('ckanext.stats.solr_core_url',
'http://solr.okfn.org/solr/ckan') 'http://solr.okfn.org/solr/ckan')
return p.toolkit.render('ckanext/stats/leaderboard.html') return p.toolkit.render('ckanext/stats/leaderboard.html')
   
   
 Binary files a/ckanext/dga_stats/controller.pyc and /dev/null differ
 Binary files a/ckanext/dga_stats/plugin.pyc and /dev/null differ
import datetime import datetime
   
from pylons import config from pylons import config
from sqlalchemy import Table, select, func, and_ from sqlalchemy import Table, select, func, and_
  from sqlalchemy.sql.expression import text
   
import ckan.plugins as p import ckan.plugins as p
import ckan.model as model import ckan.model as model
   
  import re
   
cache_enabled = p.toolkit.asbool(config.get('ckanext.stats.cache_enabled', 'True')) cache_enabled = p.toolkit.asbool(config.get('ckanext.stats.cache_enabled', 'True'))
   
if cache_enabled: if cache_enabled:
from pylons import cache from pylons import cache
our_cache = cache.get_cache('stats', type='dbm') our_cache = cache.get_cache('stats', type='dbm')
   
DATE_FORMAT = '%Y-%m-%d' DATE_FORMAT = '%Y-%m-%d'
   
def table(name): def table(name):
return Table(name, model.meta.metadata, autoload=True) return Table(name, model.meta.metadata, autoload=True)
   
def datetime2date(datetime_): def datetime2date(datetime_):
return datetime.date(datetime_.year, datetime_.month, datetime_.day) return datetime.date(datetime_.year, datetime_.month, datetime_.day)
   
   
class Stats(object): class Stats(object):
@classmethod @classmethod
def top_rated_packages(cls, limit=10): def top_rated_packages(cls, limit=10):
# NB Not using sqlalchemy as sqla 0.4 doesn't work using both group_by # NB Not using sqlalchemy as sqla 0.4 doesn't work using both group_by
# and apply_avg # and apply_avg
package = table('package') package = table('package')
rating = table('rating') rating = table('rating')
sql = select([package.c.id, func.avg(rating.c.rating), func.count(rating.c.rating)], from_obj=[package.join(rating)]).\ sql = select([package.c.id, func.avg(rating.c.rating), func.count(rating.c.rating)], from_obj=[package.join(rating)]).\
where(package.c.private == 'f').\ where(package.c.private == 'f').\
group_by(package.c.id).\ group_by(package.c.id).\
order_by(func.avg(rating.c.rating).desc(), func.count(rating.c.rating).desc()).\ order_by(func.avg(rating.c.rating).desc(), func.count(rating.c.rating).desc()).\
limit(limit) limit(limit)
res_ids = model.Session.execute(sql).fetchall() res_ids = model.Session.execute(sql).fetchall()
res_pkgs = [(model.Session.query(model.Package).get(unicode(pkg_id)), avg, num) for pkg_id, avg, num in res_ids] res_pkgs = [(model.Session.query(model.Package).get(unicode(pkg_id)), avg, num) for pkg_id, avg, num in res_ids]
return res_pkgs return res_pkgs
   
@classmethod @classmethod
def most_edited_packages(cls, limit=10): def most_edited_packages(cls, limit=10):
package_revision = table('package_revision') package_revision = table('package_revision')
package = table('package') package = table('package')
s = select([package_revision.c.id, func.count(package_revision.c.revision_id)], from_obj=[package_revision.join(package)]).\ s = select([package_revision.c.id, func.count(package_revision.c.revision_id)], from_obj=[package_revision.join(package)]).\
where(package.c.private == 'f').\ where(package.c.private == 'f').\
group_by(package_revision.c.id).\ group_by(package_revision.c.id).\
order_by(func.count(package_revision.c.revision_id).desc()).\ order_by(func.count(package_revision.c.revision_id).desc()).\
limit(limit) limit(limit)
res_ids = model.Session.execute(s).fetchall() res_ids = model.Session.execute(s).fetchall()
res_pkgs = [(model.Session.query(model.Package).get(unicode(pkg_id)), val) for pkg_id, val in res_ids] res_pkgs = [(model.Session.query(model.Package).get(unicode(pkg_id)), val) for pkg_id, val in res_ids]
return res_pkgs return res_pkgs
   
@classmethod @classmethod
def largest_groups(cls, limit=10): def largest_groups(cls, limit=10):
member = table('member') member = table('member')
s = select([member.c.group_id, func.count(member.c.table_id)]).\ s = select([member.c.group_id, func.count(member.c.table_id)]).\
group_by(member.c.group_id).\ group_by(member.c.group_id).\
where(and_(member.c.group_id!=None, member.c.table_name=='package')).\ where(member.c.group_id!=None).\
order_by(func.count(member.c.table_id).desc()).\ where(member.c.table_name=='package').\
limit(limit) where(member.c.capacity=='public').\
  order_by(func.count(member.c.table_id).desc())
  #limit(limit)
   
res_ids = model.Session.execute(s).fetchall() res_ids = model.Session.execute(s).fetchall()
res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), val) for group_id, val in res_ids] res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), val) for group_id, val in res_ids]
return res_groups return res_groups
   
@classmethod @classmethod
def by_org(cls, limit=10): def by_org(cls, limit=10):
group = table('group') connection = model.Session.connection()
package = table('package') res = connection.execute("select package.owner_org, package.private, count(*) from package \
s = select([group.c.id, package.c.private, func.count(package.c.private)]).\ inner join (select distinct package_id from resource_group inner join resource on resource.resource_group_id = resource_group.id) as r on package.id = r.package_id \
group_by(group.c.id, package.c.private).\ inner join \"group\" on package.owner_org = \"group\".id \
order_by(group.c.id).\ where package.state='active'\
limit(limit) group by package.owner_org,\"group\".name, package.private \
  order by \"group\".name, package.private;").fetchall();
res_ids = model.Session.execute(s).fetchall() res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), private, val) for group_id, private, val in res]
res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), private, val) for group_id, private, val in res_ids]  
return res_groups return res_groups
   
@classmethod @classmethod
def top_tags(cls, limit=10, returned_tag_info='object'): # by package def res_by_org(cls, limit=10):
assert returned_tag_info in ('name', 'id', 'object') connection = model.Session.connection()
tag = table('tag') reses = connection.execute("select owner_org,format,count(*) from \
package_tag = table('package_tag') resource inner join resource_group on resource.resource_group_id = resource_group.id \
package = table('package') inner join package on resource_group.package_id = package.id group by owner_org,format order by count desc;").fetchall();
#TODO filter out tags with state=deleted group_ids = []
if returned_tag_info == 'name': group_tab = {}
from_obj = [package_tag.join(tag)] group_spatial = {}
tag_column = tag.c.name group_other = {}
else: for group_id,format,count in reses:
from_obj = None if group_id not in group_ids:
tag_column = package_tag.c.tag_id group_ids.append(group_id)
s = select([tag_column, func.count(package_tag.c.package_id)], group_tab[group_id] = 0
from_obj=from_obj) group_spatial[group_id] = 0
s = s.group_by(tag_column).\ group_other[group_id] = 0
where(package.c.private == 'f').\ if re.search('xls|csv|ms-excel|spreadsheetml.sheet|zip|netcdf',format, re.IGNORECASE):
order_by(func.count(package_tag.c.package_id).desc()).\ group_tab[group_id] = group_tab[group_id] + count
limit(limit) elif re.search('wms|wfs|wcs|shp|kml|kmz',format, re.IGNORECASE):
res_col = model.Session.execute(s).fetchall() group_spatial[group_id] = group_spatial[group_id] + count
if returned_tag_info in ('id', 'name'): else:
return res_col group_other[group_id] = group_other[group_id] + count
elif returned_tag_info == 'object': return [(model.Session.query(model.Group).get(unicode(group_id)), group_tab[group_id],group_spatial[group_id],group_other[group_id], group_tab[group_id]+group_spatial[group_id]+group_other[group_id]) for group_id in group_ids]
res_tags = [(model.Session.query(model.Tag).get(unicode(tag_id)), val) for tag_id, val in res_col]  
return res_tags @classmethod
  def top_active_orgs(cls, limit=10):
  connection = model.Session.connection()
  res = connection.execute("select package.owner_org, count(*) from package \
  inner join (select distinct package_id from resource_group inner join resource on resource.resource_group_id = resource_group.id) as r on package.id = r.package_id \
  inner join \"group\" on package.owner_org = \"group\".id \
  inner join (select distinct object_id from activity where activity.timestamp > (now() - interval '60 day')) \
  latestactivities on latestactivities.object_id = package.id \
  where package.state='active' \
  and package.private = 'f' \
  group by package.owner_org \
  order by count(*) desc;").fetchall();
  res_groups = [(model.Session.query(model.Group).get(unicode(group_id)), val) for group_id, val in res]
  return res_groups
   
@classmethod @classmethod
def top_package_owners(cls, limit=10): def top_package_owners(cls, limit=10):
package_role = table('package_role') package_role = table('package_role')
user_object_role = table('user_object_role') user_object_role = table('user_object_role')
package = table('package') package = table('package')
# join(package, package_role.c.package_id == package.c.id).\ s = select([user_object_role.c.user_id, func.count(user_object_role.c.role)], from_obj=[user_object_role.join(package_role).join(package, package_role.c.package_id == package.c.id)]).\
# where(package.c.private == 'f').\  
s = select([user_object_role.c.user_id, func.count(user_object_role.c.role)], from_obj=[user_object_role.join(package_role)]).\  
where(user_object_role.c.role==model.authz.Role.ADMIN).\ where(user_object_role.c.role==model.authz.Role.ADMIN).\
  where(package.c.private == 'f').\
where(user_object_role.c.user_id!=None).\ where(user_object_role.c.user_id!=None).\
group_by(user_object_role.c.user_id).\ group_by(user_object_role.c.user_id).\
order_by(func.count(user_object_role.c.role).desc()).\ order_by(func.count(user_object_role.c.role).desc()).\
limit(limit) limit(limit)
res_ids = model.Session.execute(s).fetchall() res_ids = model.Session.execute(s).fetchall()
res_users = [(model.Session.query(model.User).get(unicode(user_id)), val) for user_id, val in res_ids] res_users = [(model.Session.query(model.User).get(unicode(user_id)), val) for user_id, val in res_ids]
return res_users return res_users
   
@classmethod @classmethod
def summary_stats(cls): def summary_stats(cls):
connection = model.Session.connection() connection = model.Session.connection()
# select name,role from user_object_role inner join \"user\" on user_object_role.user_id = \"user\".id where name not in ('logged_in','visitor') group by name,role"  
   
res = connection.execute("SELECT 'Total Organisations', count(*) from \"group\" where type = 'organization' and state = 'active' union \ res = connection.execute("SELECT 'Total Organisations', count(*) from \"group\" where type = 'organization' and state = 'active' union \
select 'Total Datasets', count(*) from package where state='active' or state='draft' or state='draft-complete' union \ select 'Total Datasets', count(*) from package inner join (select distinct package_id from resource_group inner join resource on resource.resource_group_id = resource_group.id) as r on package.id = r.package_id where (package.state='active' or package.state='draft' or package.state='draft-complete') and private = 'f' union \
select 'Total Data Files/Resources', count(*) from resource where state='active'").fetchall(); select 'Total Archived Datasets', count(*) from package where (state='active' or state='draft' or state='draft-complete') and private = 't' union \
  select 'Total Data Files/Resources', count(*) from resource where state='active' union \
  select 'Total Machine Readable/Data API Resources', count(*) from resource where state='active' and (webstore_url = 'active' or format='wms')").fetchall();
return res return res
   
   
@classmethod @classmethod
def activity_counts(cls): def activity_counts(cls):
connection = model.Session.connection() connection = model.Session.connection()
res = connection.execute("select to_char(timestamp, 'YYYY-MM') as month,activity_type, count(*) from activity group by month, activity_type order by month;").fetchall(); res = connection.execute("select to_char(timestamp, 'YYYY-MM') as month,activity_type, count(*) from activity group by month, activity_type order by month;").fetchall();
return res return res
   
  @classmethod
  def user_access_list(cls):
  connection = model.Session.connection()
  res = connection.execute("select name,sysadmin,role from user_object_role right outer join \"user\" on user_object_role.user_id = \"user\".id where name not in ('logged_in','visitor') group by name,sysadmin,role order by sysadmin desc, role asc;").fetchall();
  return res
   
  @classmethod
  def recent_datasets(cls):
  activity = table('activity')
  package = table('package')
  s = select([func.max(activity.c.timestamp),package.c.id, activity.c.activity_type], from_obj=[activity.join(package,activity.c.object_id == package.c.id)]).where(package.c.private == 'f').\
  where(activity.c.timestamp > func.now() - text("interval '60 day'")).group_by(package.c.id,activity.c.activity_type).order_by(func.max(activity.c.timestamp))
  result = model.Session.execute(s).fetchall()
  return [(datetime2date(timestamp), model.Session.query(model.Package).get(unicode(package_id)), activity_type) for timestamp,package_id,activity_type in result]
   
   
   
class RevisionStats(object): class RevisionStats(object):
@classmethod @classmethod
def package_addition_rate(cls, weeks_ago=0): def package_addition_rate(cls, weeks_ago=0):
week_commenced = cls.get_date_weeks_ago(weeks_ago) week_commenced = cls.get_date_weeks_ago(weeks_ago)
return cls.get_objects_in_a_week(week_commenced, return cls.get_objects_in_a_week(week_commenced,
type_='package_addition_rate') type_='package_addition_rate')
   
@classmethod @classmethod
def package_revision_rate(cls, weeks_ago=0): def package_revision_rate(cls, weeks_ago=0):
week_commenced = cls.get_date_weeks_ago(weeks_ago) week_commenced = cls.get_date_weeks_ago(weeks_ago)
return cls.get_objects_in_a_week(week_commenced, return cls.get_objects_in_a_week(week_commenced,
type_='package_revision_rate') type_='package_revision_rate')
   
@classmethod @classmethod
def get_date_weeks_ago(cls, weeks_ago): def get_date_weeks_ago(cls, weeks_ago):
''' '''
@param weeks_ago: specify how many weeks ago to give count for @param weeks_ago: specify how many weeks ago to give count for