varnish config dec 2013
varnish config dec 2013

# This is a basic VCL configuration file for varnish. See the vcl(7) # This is a basic VCL configuration file for varnish. See the vcl(7)
# man page for details on VCL syntax and semantics. # man page for details on VCL syntax and semantics.
# #
# Default backend definition. Set this to point to your content # Default backend definition. Set this to point to your content
# server. # server.
# #
backend default { backend default {
.host = "127.0.0.1"; .host = "127.0.0.1";
.port = "8080"; .port = "8080";
} }
  backend geoserver {
  .host = "172.31.18.207";
  .port = "8983";
  }
   
sub vcl_fetch { sub vcl_fetch {
set beresp.grace = 1h; set beresp.grace = 1h;
  unset beresp.http.Server;
if (beresp.http.content-type ~ "(text|application)") { # These status codes should always pass through and never cache.
  if ( beresp.status >= 500 ) {
  set beresp.ttl = 0s;
  }
  if (beresp.http.content-type ~ "(text|javascript|json|xml|html)") {
set beresp.do_gzip = true; set beresp.do_gzip = true;
} }
  # CKAN cache headers are used by Varnish cache, but should not be propagated to
  # the Internet. Tell browsers and proxies not to cache. This means Varnish always
  # gets the responsibility to server the right content at all times.
  if (beresp.http.Cache-Control ~ "max-age") {
  unset beresp.http.set-cookie;
  set beresp.http.Cache-Control = "no-cache";
  }
   
  # Encourage assets to be cached by proxies and browsers
  # JS and CSS may be gzipped depending on headers
  # see https://developers.google.com/speed/docs/best-practices/caching
  if (req.url ~ "\.(css|js)") {
  set beresp.http.Vary = "Accept-Encoding";
  }
   
  # Encourage assets to be cached by proxies and browsers for 1 day
  if (req.url ~ "\.(png|gif|jpg|swf|css|js)") {
  unset beresp.http.set-cookie;
  set beresp.http.Cache-Control = "public, max-age=86400";
  set beresp.ttl = 1d;
  }
   
  # Encourage CKAN vendor assets (which are versioned) to be cached by
  # by proxies and browsers for 1 year
  if (req.url ~ "^/scripts/vendor/") {
  unset beresp.http.set-cookie;
  set beresp.http.Cache-Control = "public, max-age=31536000";
  set beresp.ttl = 12m;
  }
  }
  sub vcl_recv {
  if (req.http.user-agent ~ "Ezooms" || req.http.user-agent ~ "Ahrefs") {
  error 403;
  }
  if (req.url ~ "^/geoserver/") {
  set req.backend = geoserver;
  } else {
  set req.backend = default;
  #redirect secure traffic to https
  if ( (req.http.Cookie ~ "auth_tkt" || req.http.Cookie ~ "ckan" || req.url ~ "user/(reset|login)") && req.http.X-Forwarded-Proto !~ "(?i)https") {
  set req.http.x-Redir-Url = "https://data.gov.au" + req.url;
  error 753 req.http.x-Redir-Url;
  }
  # remove locale links
  if (req.url ~ "/((?!js)..|.._..|sr_Latn)/") {
  set req.http.x-Redir-Url = regsub(req.url, "/((?!js)..|.._..|sr_Latn)/", "/");
  error 751 req.http.x-Redir-Url;
  }
  # rewrite broken resources
  if (req.url ~ "leaflet") {
  set req.url = regsub(req.url, "fanstatic/ckanext-spatial/:version:2013-09-13T02:32:17.87/:bundle:js/vendor/leaflet/images", "js/vendor/leaflet/images");
  }
  # remove old hostnames
  if (req.http.host ~ "data.australia.gov.au") {
  set req.http.x-Redir-Url = "http://data.gov.au" + req.url;
  error 751 req.http.x-Redir-Url;
  }
   
  if (req.url ~ "^/_tracking") {
  // exclude web spiders from statistics
  if (req.http.user-agent ~ "Googlebot" || req.http.user-agent ~ "baidu" || req.http.user-agent ~ "bing") {
  error 200;
  } else {
  return (pass);
  }
  }
if (req.url ~ "\.(png|gif|jpg|jpeg|swf|css|js|woff|eot)$") { if (req.url ~ "\.(png|gif|jpg|jpeg|swf|css|js|woff|eot)$") {
unset beresp.http.set-cookie; //Varnish to deliver content from cache even if the request othervise indicates that the request should be passed
  return(lookup);
} }
} }
sub vcl_recv { // Remove has_js and Google Analytics cookies. Evan added sharethis cookies
if (req.url ~ "^/_tracking") { set req.http.Cookie = regsuball(req.http.Cookie, "(^|;\s*)(__[a-z]+|has_js|cookie-agreed-en|_csoot|_csuid|_chartbeat2)=[^;]*", "");
return (pass);  
} // Remove a ";" prefix, if present.
if (req.url ~ "\.(png|gif|jpg|jpeg|swf|css|js|woff|eot)$") { set req.http.Cookie = regsub(req.http.Cookie, "^;\s*", "");
return(lookup); // Remove empty cookies.
} if (req.http.Cookie ~ "^\s*$") {
if (req.url ~ "/(..|.._..)/") { unset req.http.Cookie;
set req.url = regsub(req.url, "/((?!js)..|.._..)/", "/"); }
}  
if (req.http.Cookie) { remove req.http.X-Forwarded-For;
set req.http.Cookie = regsuball(req.http.Cookie, "(^|; ) *__utm.=[^;]+;? *", "\1"); # removes all cookies named __utm? (utma, utmb...) - tracking thing set req.http.X-Forwarded-For = req.http.X-Real-IP;
   
if (req.http.Cookie == "") {  
remove req.http.Cookie;  
}  
}  
} }
sub vcl_hash { sub vcl_hash {
# http://serverfault.com/questions/112531/ignoring-get-parameters-in-varnish-vcl # http://serverfault.com/questions/112531/ignoring-get-parameters-in-varnish-vcl
set req.url = regsub(req.url, "(?:(.com|.au))/((?!js)..|.._..)/", "/"); set req.url = regsub(req.url, "(?:(.com|.au))/((?!js)..|.._..|sr_Latn)/", "/");
hash_data(req.url); hash_data(req.url);
if (req.http.host) { if (req.http.host) {
hash_data(req.http.host); hash_data(req.http.host);
} else { } else {
hash_data(server.ip); hash_data(server.ip);
} }
return (hash); if (req.http.Cookie) {
  hash_data(req.http.Cookie);
  }
} }
sub vcl_deliver { sub vcl_deliver {
if (!resp.http.Vary) { if (!resp.http.Vary) {
set resp.http.Vary = "Accept-Encoding"; set resp.http.Vary = "Accept-Encoding";
} else if (resp.http.Vary !~ "(?i)Accept-Encoding") { } else if (resp.http.Vary !~ "(?i)Accept-Encoding") {
set resp.http.Vary = resp.http.Vary + ",Accept-Encoding"; set resp.http.Vary = resp.http.Vary + ",Accept-Encoding";
} }
if (obj.hits > 0) { remove resp.http.X-Varnish;
set resp.http.X-Cache = "HIT"; remove resp.http.Via;
} else { remove resp.http.Age;
set resp.http.X-Cache = "MISS"; remove resp.http.X-Powered-By;
} if (req.url ~ "^/geoserver/") {
  set resp.http.Access-Control-Allow-Origin = "*";
  set resp.http.Access-Control-Allow-Methods = "GET, POST, PUT, DELETE";
  set resp.http.Access-Control-Allow-Headers = "Origin, X-Requested-With, Content-Type, Accept";
  }
} }
  sub vcl_error {
  remove obj.http.Server;
  if (obj.status == 751) {
  set obj.http.Location = obj.response;
  set obj.status = 301;
  return (deliver);
  }
  if (obj.status == 753) {
  set obj.http.Location = obj.response;
  set obj.status = 301;
  return (deliver);
  }
  }
# #
# Below is a commented-out copy of the default VCL logic. If you # Below is a commented-out copy of the default VCL logic. If you
# redefine any of these subroutines, the built-in logic will be # redefine any of these subroutines, the built-in logic will be
# appended to your code. # appended to your code.
# sub vcl_recv { # sub vcl_recv {
# if (req.restarts == 0) { # if (req.restarts == 0) {
# if (req.http.x-forwarded-for) { # if (req.http.x-forwarded-for) {
# set req.http.X-Forwarded-For = # set req.http.X-Forwarded-For =
# req.http.X-Forwarded-For + ", " + client.ip; # req.http.X-Forwarded-For + ", " + client.ip;
# } else { # } else {
# set req.http.X-Forwarded-For = client.ip; # set req.http.X-Forwarded-For = client.ip;
# } # }
# } # }
# if (req.request != "GET" && # if (req.request != "GET" &&
# req.request != "HEAD" && # req.request != "HEAD" &&
# req.request != "PUT" && # req.request != "PUT" &&
# req.request != "POST" && # req.request != "POST" &&
# req.request != "TRACE" && # req.request != "TRACE" &&
# req.request != "OPTIONS" && # req.request != "OPTIONS" &&
# req.request != "DELETE") { # req.request != "DELETE") {
# /* Non-RFC2616 or CONNECT which is weird. */ # /* Non-RFC2616 or CONNECT which is weird. */
# return (pipe); # return (pipe);
# } # }
# if (req.request != "GET" && req.request != "HEAD") { # if (req.request != "GET" && req.request != "HEAD") {
# /* We only deal with GET and HEAD by default */ # /* We only deal with GET and HEAD by default */
# return (pass); # return (pass);
# } # }
# if (req.http.Authorization || req.http.Cookie) { # if (req.http.Authorization || req.http.Cookie) {
# /* Not cacheable by default */ # /* Not cacheable by default */
# return (pass); # return (pass);
# } # }
# return (lookup); # return (lookup);
# } # }
# #
# sub vcl_pipe { # sub vcl_pipe {
# # Note that only the first request to the backend will have # # Note that only the first request to the backend will have
# # X-Forwarded-For set. If you use X-Forwarded-For and want to # # X-Forwarded-For set. If you use X-Forwarded-For and want to
# # have it set for all requests, make sure to have: # # have it set for all requests, make sure to have:
# # set bereq.http.connection = "close"; # # set bereq.http.connection = "close";
# # here. It is not set by default as it might break some broken web # # here. It is not set by default as it might break some broken web
# # applications, like IIS with NTLM authentication. # # applications, like IIS with NTLM authentication.
# return (pipe); # return (pipe);
# } # }
# #
# sub vcl_pass { # sub vcl_pass {
# return (pass); # return (pass);
# } # }
# #
# sub vcl_hash { # sub vcl_hash {
# hash_data(req.url); # hash_data(req.url);
# if (req.http.host) { # if (req.http.host) {
# hash_data(req.http.host); # hash_data(req.http.host);
# } else { # } else {
# hash_data(server.ip); # hash_data(server.ip);
# } # }
# return (hash); # return (hash);
# } # }
# #
# sub vcl_hit { # sub vcl_hit {
# return (deliver); # return (deliver);
# } # }
# #
# sub vcl_miss { # sub vcl_miss {
# return (fetch); # return (fetch);
# } # }
# #
# sub vcl_fetch { # sub vcl_fetch {
# if (beresp.ttl <= 0s || # if (beresp.ttl <= 0s ||
# beresp.http.Set-Cookie || # beresp.http.Set-Cookie ||
# beresp.http.Vary == "*") { # beresp.http.Vary == "*") {
# /* # /*
# * Mark as "Hit-For-Pass" for the next 2 minutes # * Mark as "Hit-For-Pass" for the next 2 minutes
# */ # */
# set beresp.ttl = 120 s; # set beresp.ttl = 120 s;
# return (hit_for_pass); # return (hit_for_pass);
# } # }
# return (deliver); # return (deliver);
# } # }
# #
# sub vcl_deliver { # sub vcl_deliver {
# return (deliver); # return (deliver);
# } # }
# #
# sub vcl_error { # sub vcl_error {
# set obj.http.Content-Type = "text/html; charset=utf-8"; # set obj.http.Content-Type = "text/html; charset=utf-8";
# set obj.http.Retry-After = "5"; # set obj.http.Retry-After = "5";
# synthetic {" # synthetic {"
# <?xml version="1.0" encoding="utf-8"?> # <?xml version="1.0" encoding="utf-8"?>
# <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" # <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
# "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> # "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
# <html> # <html>
# <head> # <head>
# <title>"} + obj.status + " " + obj.response + {"</title> # <title>"} + obj.status + " " + obj.response + {"</title>
# </head> # </head>
# <body> # <body>
# <h1>Error "} + obj.status + " " + obj.response + {"</h1> # <h1>Error "} + obj.status + " " + obj.response + {"</h1>
# <p>"} + obj.response + {"</p> # <p>"} + obj.response + {"</p>
# <h3>Guru Meditation:</h3> # <h3>Guru Meditation:</h3>
# <p>XID: "} + req.xid + {"</p> # <p>XID: "} + req.xid + {"</p>
# <hr> # <hr>
# <p>Varnish cache server</p> # <p>Varnish cache server</p>
# </body> # </body>
# </html> # </html>
# "}; # "};
# return (deliver); # return (deliver);
# } # }
# #
# sub vcl_init { # sub vcl_init {
# return (ok); # return (ok);
# } # }
# #
# sub vcl_fini { # sub vcl_fini {
# return (ok); # return (ok);
# } # }
   
import logging import logging
   
import ckan.plugins as plugins import ckan.plugins as plugins
import ckan.lib as lib import ckan.lib as lib
import ckan.lib.dictization.model_dictize as model_dictize import ckan.lib.dictization.model_dictize as model_dictize
import ckan.plugins.toolkit as tk import ckan.plugins.toolkit as tk
import ckan.model as model import ckan.model as model
from pylons import config from pylons import config
from routes.mapper import SubMapper, Mapper as _Mapper from routes.mapper import SubMapper, Mapper as _Mapper
   
from sqlalchemy import orm from sqlalchemy import orm
import ckan.model import ckan.model
   
#parse the activity feed for last active non-system user #parse the activity feed for last active non-system user
def get_last_active_user(id): def get_last_active_user(id):
system_user = lib.helpers.get_action('user_show',{'id': config.get('ckan.site_id', 'ckan_site_user')}) system_user = lib.helpers.get_action('user_show',{'id': config.get('ckan.site_id', 'ckan_site_user')})
user_list = [x for x in lib.helpers.get_action('package_activity_list',{'id':id}) if x['user_id'] != system_user['id']] user_list = [x for x in lib.helpers.get_action('package_activity_list',{'id':id}) if x['user_id'] != system_user['id']]
user = None user = None
if len(user_list) > 0: if len(user_list) > 0:
user = user_list[0].get('user_id', None) user = user_list[0].get('user_id', None)
if user is None: if user is None:
return system_user return system_user
else: else:
return lib.helpers.get_action('user_show',{'id':user}) return lib.helpers.get_action('user_show',{'id':user})
   
# get user created datasets and those they have edited # get user created datasets and those they have edited
def get_user_datasets(user_dict): def get_user_datasets(user_dict):
created_datasets_list = user_dict['datasets'] created_datasets_list = user_dict['datasets']
active_datasets_list = [x['data']['package'] for x in active_datasets_list = [x['data']['package'] for x in
lib.helpers.get_action('user_activity_list',{'id':user_dict['id']}) if x['data'].get('package')] lib.helpers.get_action('user_activity_list',{'id':user_dict['id']}) if x['data'].get('package')]
return created_datasets_list + active_datasets_list raw_list = created_datasets_list + active_datasets_list
  filtered_dict = {}
  for dataset in raw_list:
  if dataset['id'] not in filtered_dict.keys():
  filtered_dict[dataset['id']] = dataset
  return filtered_dict.values()
   
class DataGovAuPlugin(plugins.SingletonPlugin, class DataGovAuPlugin(plugins.SingletonPlugin,
tk.DefaultDatasetForm): tk.DefaultDatasetForm):
'''An example IDatasetForm CKAN plugin. '''An example IDatasetForm CKAN plugin.
   
Uses a tag vocabulary to add a custom metadata field to datasets. Uses a tag vocabulary to add a custom metadata field to datasets.
   
''' '''
plugins.implements(plugins.IConfigurer, inherit=False) plugins.implements(plugins.IConfigurer, inherit=False)
plugins.implements(plugins.IDatasetForm, inherit=False) plugins.implements(plugins.IDatasetForm, inherit=False)
plugins.implements(plugins.ITemplateHelpers, inherit=False) plugins.implements(plugins.ITemplateHelpers, inherit=False)
plugins.implements(plugins.IRoutes, inherit=True) plugins.implements(plugins.IRoutes, inherit=True)
   
def before_map(self, map): def before_map(self, map):
   
# Helpers to reduce code clutter # Helpers to reduce code clutter
GET = dict(method=['GET']) GET = dict(method=['GET'])
PUT = dict(method=['PUT']) PUT = dict(method=['PUT'])
POST = dict(method=['POST']) POST = dict(method=['POST'])
DELETE = dict(method=['DELETE']) DELETE = dict(method=['DELETE'])
GET_POST = dict(method=['GET', 'POST']) GET_POST = dict(method=['GET', 'POST'])
# intercept API calls that we want to capture analytics on # intercept API calls that we want to capture analytics on
register_list = [ register_list = [
'package', 'package',
'dataset', 'dataset',
'resource', 'resource',
'tag', 'tag',
'group', 'group',
'related', 'related',
'revision', 'revision',
'licenses', 'licenses',
'rating', 'rating',
'user', 'user',
'activity' 'activity'
] ]
register_list_str = '|'.join(register_list) register_list_str = '|'.join(register_list)
# /api ver 3 or none # /api ver 3 or none
with SubMapper(map, controller='ckanext.datagovau.controller:DGAApiController', path_prefix='/api{ver:/3|}', with SubMapper(map, controller='ckanext.datagovau.controller:DGAApiController', path_prefix='/api{ver:/3|}',
ver='/3') as m: ver='/3') as m:
m.connect('/action/{logic_function}', action='action', m.connect('/action/{logic_function}', action='action',
conditions=GET_POST) conditions=GET_POST)
   
# /api ver 1, 2, 3 or none # /api ver 1, 2, 3 or none
with SubMapper(map, controller='ckanext.datagovau.controller:DGAApiController', path_prefix='/api{ver:/1|/2|/3|}', with SubMapper(map, controller='ckanext.datagovau.controller:DGAApiController', path_prefix='/api{ver:/1|/2|/3|}',
ver='/1') as m: ver='/1') as m:
m.connect('/search/{register}', action='search') m.connect('/search/{register}', action='search')
# /api/rest ver 1, 2 or none # /api/rest ver 1, 2 or none
with SubMapper(map, controller='ckanext.datagovau.controller:DGAApiController', path_prefix='/api{ver:/1|/2|}', with SubMapper(map, controller='ckanext.datagovau.controller:DGAApiController', path_prefix='/api{ver:/1|/2|}',
ver='/1', requirements=dict(register=register_list_str) ver='/1', requirements=dict(register=register_list_str)
) as m: ) as m:
   
m.connect('/rest/{register}', action='list', conditions=GET) m.connect('/rest/{register}', action='list', conditions=GET)
m.connect('/rest/{register}', action=�