varnish config dec 2013
--- a/README.rst
+++ b/README.rst
@@ -4,9 +4,11 @@
* A CKAN Extension "plugin" at ``ckanext/datagovau/plugin.py`` which, when
loaded, overrides various settings in the core ``ini``-file to provide:
- * A path to local customisations of the core templates
+ * A path to local customisations of the core templates to include AGLS/Dublin Core minimum metadata
* A custom Package edit form that defaults to cc-by licence
* A custom n3/rdf output format
+ * Replaces links with http/https protocol independent versions
+ * Provides HTML to users to embed data previews on their own website
* A cut down licenses.json file
--- a/admin/default.vcl
+++ b/admin/default.vcl
@@ -8,45 +8,117 @@
.host = "127.0.0.1";
.port = "8080";
}
+backend geoserver {
+ .host = "172.31.18.207";
+ .port = "8983";
+}
sub vcl_fetch {
set beresp.grace = 1h;
-
- if (beresp.http.content-type ~ "(text|application)") {
+ unset beresp.http.Server;
+ # These status codes should always pass through and never cache.
+ if ( beresp.status >= 500 ) {
+ set beresp.ttl = 0s;
+ }
+ if (beresp.http.content-type ~ "(text|javascript|json|xml|html)") {
set beresp.do_gzip = true;
}
+ # CKAN cache headers are used by Varnish cache, but should not be propagated to
+ # the Internet. Tell browsers and proxies not to cache. This means Varnish always
+ # gets the responsibility to server the right content at all times.
+ if (beresp.http.Cache-Control ~ "max-age") {
+ unset beresp.http.set-cookie;
+ set beresp.http.Cache-Control = "no-cache";
+ }
+
+ # Encourage assets to be cached by proxies and browsers
+ # JS and CSS may be gzipped depending on headers
+ # see https://developers.google.com/speed/docs/best-practices/caching
+ if (req.url ~ "\.(css|js)") {
+ set beresp.http.Vary = "Accept-Encoding";
+ }
+
+ # Encourage assets to be cached by proxies and browsers for 1 day
+ if (req.url ~ "\.(png|gif|jpg|swf|css|js)") {
+ unset beresp.http.set-cookie;
+ set beresp.http.Cache-Control = "public, max-age=86400";
+ set beresp.ttl = 1d;
+ }
+
+ # Encourage CKAN vendor assets (which are versioned) to be cached by
+ # by proxies and browsers for 1 year
+ if (req.url ~ "^/scripts/vendor/") {
+ unset beresp.http.set-cookie;
+ set beresp.http.Cache-Control = "public, max-age=31536000";
+ set beresp.ttl = 12m;
+ }
+}
+sub vcl_recv {
+ if (req.http.user-agent ~ "Ezooms" || req.http.user-agent ~ "Ahrefs") {
+ error 403;
+ }
+if (req.url ~ "^/geoserver/") {
+ set req.backend = geoserver;
+ } else {
+ set req.backend = default;
+ #redirect secure traffic to https
+ if ( (req.http.Cookie ~ "auth_tkt" || req.http.Cookie ~ "ckan" || req.url ~ "user/(reset|login)") && req.http.X-Forwarded-Proto !~ "(?i)https") {
+ set req.http.x-Redir-Url = "https://data.gov.au" + req.url;
+ error 753 req.http.x-Redir-Url;
+ }
+ # remove locale links
+ if (req.url ~ "/((?!js)..|.._..|sr_Latn)/") {
+ set req.http.x-Redir-Url = regsub(req.url, "/((?!js)..|.._..|sr_Latn)/", "/");
+ error 751 req.http.x-Redir-Url;
+ }
+ # rewrite broken resources
+ if (req.url ~ "leaflet") {
+ set req.url = regsub(req.url, "fanstatic/ckanext-spatial/:version:2013-09-13T02:32:17.87/:bundle:js/vendor/leaflet/images", "js/vendor/leaflet/images");
+ }
+ # remove old hostnames
+ if (req.http.host ~ "data.australia.gov.au") {
+ set req.http.x-Redir-Url = "http://data.gov.au" + req.url;
+ error 751 req.http.x-Redir-Url;
+ }
+
+ if (req.url ~ "^/_tracking") {
+ // exclude web spiders from statistics
+ if (req.http.user-agent ~ "Googlebot" || req.http.user-agent ~ "baidu" || req.http.user-agent ~ "bing") {
+ error 200;
+ } else {
+ return (pass);
+ }
+ }
if (req.url ~ "\.(png|gif|jpg|jpeg|swf|css|js|woff|eot)$") {
- unset beresp.http.set-cookie;
+ //Varnish to deliver content from cache even if the request othervise indicates that the request should be passed
+ return(lookup);
}
}
-sub vcl_recv {
- if (req.url ~ "^/_tracking") {
- return (pass);
- }
- if (req.url ~ "\.(png|gif|jpg|jpeg|swf|css|js|woff|eot)$") {
- return(lookup);
- }
-if (req.url ~ "/(..|.._..)/") {
- set req.url = regsub(req.url, "/((?!js)..|.._..)/", "/");
-}
- if (req.http.Cookie) {
- set req.http.Cookie = regsuball(req.http.Cookie, "(^|; ) *__utm.=[^;]+;? *", "\1"); # removes all cookies named __utm? (utma, utmb...) - tracking thing
-
- if (req.http.Cookie == "") {
- remove req.http.Cookie;
- }
- }
+ // Remove has_js and Google Analytics cookies. Evan added sharethis cookies
+ set req.http.Cookie = regsuball(req.http.Cookie, "(^|;\s*)(__[a-z]+|has_js|cookie-agreed-en|_csoot|_csuid|_chartbeat2)=[^;]*", "");
+
+ // Remove a ";" prefix, if present.
+ set req.http.Cookie = regsub(req.http.Cookie, "^;\s*", "");
+ // Remove empty cookies.
+ if (req.http.Cookie ~ "^\s*$") {
+ unset req.http.Cookie;
+ }
+
+ remove req.http.X-Forwarded-For;
+ set req.http.X-Forwarded-For = req.http.X-Real-IP;
}
sub vcl_hash {
# http://serverfault.com/questions/112531/ignoring-get-parameters-in-varnish-vcl
- set req.url = regsub(req.url, "(?:(.com|.au))/((?!js)..|.._..)/", "/");
+ set req.url = regsub(req.url, "(?:(.com|.au))/((?!js)..|.._..|sr_Latn)/", "/");
hash_data(req.url);
if (req.http.host) {
hash_data(req.http.host);
} else {
hash_data(server.ip);
}
- return (hash);
+ if (req.http.Cookie) {
+ hash_data(req.http.Cookie);
+}
}
sub vcl_deliver {
if (!resp.http.Vary) {
@@ -54,13 +126,29 @@
} else if (resp.http.Vary !~ "(?i)Accept-Encoding") {
set resp.http.Vary = resp.http.Vary + ",Accept-Encoding";
}
- if (obj.hits > 0) {
- set resp.http.X-Cache = "HIT";
- } else {
- set resp.http.X-Cache = "MISS";
- }
+ remove resp.http.X-Varnish;
+ remove resp.http.Via;
+ remove resp.http.Age;
+ remove resp.http.X-Powered-By;
+if (req.url ~ "^/geoserver/") {
+ set resp.http.Access-Control-Allow-Origin = "*";
+ set resp.http.Access-Control-Allow-Methods = "GET, POST, PUT, DELETE";
+ set resp.http.Access-Control-Allow-Headers = "Origin, X-Requested-With, Content-Type, Accept";
+}
}
-
+sub vcl_error {
+ remove obj.http.Server;
+ if (obj.status == 751) {
+ set obj.http.Location = obj.response;
+ set obj.status = 301;
+ return (deliver);
+ }
+ if (obj.status == 753) {
+ set obj.http.Location = obj.response;
+ set obj.status = 301;
+ return (deliver);
+ }
+}
#
# Below is a commented-out copy of the default VCL logic. If you
# redefine any of these subroutines, the built-in logic will be
--- /dev/null
+++ b/admin/reset.sh
@@ -1,1 +1,17 @@
+paster --plugin=ckan db clean --config=development.ini
+echo "drop extension postgis cascade;" | psql -d ckantest
+paster --plugin=ckan db clean --config=development.ini
+#to initiate for first time instead of load from dump
+#paster --plugin=ckan db init --config=development.in
+#paster --plugin=ckan user add maxious password=snmc email=maxious@gmail.com
+#paster --plugin=ckan sysadmin add maxious
+#paster --plugin=ckan db dump dump.db
+
+#paster --plugin=ckan db load --config=development.ini dump.db
+paster --plugin=ckan db load --config=development.ini dump.harvest.db
+echo "create extension postgis;" | psql -d ckantest
+#sleep 2
+paster --plugin=ckan search-index rebuild --config=development.ini
+#rm -r /tmp/pairtree_*
+
--- /dev/null
+++ b/ckanext/datagovau/controller.py
@@ -1,1 +1,84 @@
+import urllib
+import json
+from pprint import pprint
+import logging
+import ckan.logic as logic
+import hashlib
+import threading
+from ckan.common import _, c, request, response
+from pylons import config
+from webob.multidict import UnicodeMultiDict
+from paste.util.multidict import MultiDict
+log = logging.getLogger(__name__)
+
+from ckan.controllers.api import ApiController
+
+class DGAApiController(ApiController):
+
+ def _post_analytics(self,user,request_obj_type,request_function,request_id):
+ if (config.get('googleanalytics.id') != None):
+ data = urllib.urlencode({
+ "v":1,
+ "tid":config.get('googleanalytics.id'),
+ "cid":hashlib.md5(user).hexdigest(),
+ "t":"event",
+ "dh":c.environ['HTTP_HOST'],
+ "dp":c.environ['PATH_INFO'],
+ "dr":c.environ.get('HTTP_REFERER',''),
+ "ec":"CKAN API Request",
+ "ea":request_obj_type+request_function,
+ "el":request_id,
+ })
+ log.debug("Sending API Analytics Data: "+data)
+ # send analytics asynchronously
+ threading.Thread(target=urllib.urlopen,args=("http://www.google-analytics.com/collect", data)).start()
+
+
+ def action(self, logic_function, ver=None):
+ try:
+ function = logic.get_action(logic_function)
+ except Exception,e:
+ log.debug(e)
+ pass
+ try:
+ side_effect_free = getattr(function, 'side_effect_free', False)
+ request_data = self._get_request_data(try_url_params=side_effect_free)
+ if isinstance(request_data, dict):
+ id = request_data.get('id','')
+ if 'q' in request_data.keys():
+ id = request_data['q']
+ if 'query' in request_data.keys():
+ id = request_data['query']
+ self._post_analytics(c.user,logic_function,'', id)
+ except Exception,e:
+ print log.debug(e)
+ pass
+
+ return ApiController.action(self,logic_function, ver)
+
+ def list(self, ver=None, register=None, subregister=None, id=None):
+ self._post_analytics(c.user,register+("_"+str(subregister) if subregister else ""),"list",id)
+ return ApiController.list(self,ver, register, subregister, id)
+ def show(self, ver=None, register=None, subregister=None, id=None, id2=None):
+ self._post_analytics(c.user,register+("_"+str(subregister) if subregister else ""),"show",id)
+ return ApiController.show(self,ver, register, subregister, id,id2)
+ def update(self, ver=None, register=None, subregister=None, id=None, id2=None):
+ self._post_analytics(c.user,register+("_"+str(subregister) if subregister else ""),"update",id)
+ return ApiController.update(self,ver, register, subregister, id,id2)
+ def delete(self, ver=None, register=None, subregister=None, id=None, id2=None):
+ self._post_analytics(c.user,register+("_"+str(subregister) if subregister else ""),"delete",id)
+ return ApiController.delete(self,ver, register, subregister, id,id2)
+ def search(self, ver=None, register=None):
+ id = None
+ try:
+ params = MultiDict(self._get_search_params(request.params))
+ if 'q' in params.keys():
+ id = params['q']
+ if 'query' in params.keys():
+ id = params['query']
+ except ValueError, e:
+ print str(e)
+ pass
+ self._post_analytics(c.user,register,"search",id)
+
--- a/ckanext/datagovau/plugin.py
+++ b/ckanext/datagovau/plugin.py
@@ -6,6 +6,10 @@
import ckan.plugins.toolkit as tk
import ckan.model as model
from pylons import config
+from routes.mapper import SubMapper, Mapper as _Mapper
+
+from sqlalchemy import orm
+import ckan.model
#parse the activity feed for last active non-system user
def get_last_active_user(id):
@@ -24,7 +28,12 @@
created_datasets_list = user_dict['datasets']
active_datasets_list = [x['data']['package'] for x in
lib.helpers.get_action('user_activity_list',{'id':user_dict['id']}) if x['data'].get('package')]
- return created_datasets_list + active_datasets_list
+ raw_list = created_datasets_list + active_datasets_list
+ filtered_dict = {}
+ for dataset in raw_list:
+ if dataset['id'] not in filtered_dict.keys():
+ filtered_dict[dataset['id']] = dataset
+ return filtered_dict.values()
class DataGovAuPlugin(plugins.SingletonPlugin,
tk.DefaultDatasetForm):
@@ -36,6 +45,55 @@
plugins.implements(plugins.IConfigurer, inherit=False)
plugins.implements(plugins.IDatasetForm, inherit=False)
plugins.implements(plugins.ITemplateHelpers, inherit=False)
+ plugins.implements(plugins.IRoutes, inherit=True)
+
+ def before_map(self, map):
+
+ # Helpers to reduce code clutter
+ GET = dict(method=['GET'])
+ PUT = dict(method=['PUT'])
+ POST = dict(method=['POST'])
+ DELETE = dict(method=['DELETE'])
+ GET_POST = dict(method=['GET', 'POST'])
+ # intercept API calls that we want to capture analytics on
+ register_list = [
+ 'package',
+ 'dataset',
+ 'resource',
+ 'tag',
+ 'group',
+ 'related',
+ 'revision',
+ 'licenses',
+ 'rating',
+ 'user',
+ 'activity'
+ ]
+ register_list_str = '|'.join(register_list)
+ # /api ver 3 or none
+ with SubMapper(map, controller='ckanext.datagovau.controller:DGAApiController', path_prefix='/api{ver:/3|}',
+ ver='/3') as m:
+ m.connect('/action/{logic_function}', action='action',
+ conditions=GET_POST)
+
+ # /api ver 1, 2, 3 or none
+ with SubMapper(map, controller='ckanext.datagovau.controller:DGAApiController', path_prefix='/api{ver:/1|/2|/3|}',
+ ver='/1') as m:
+ m.connect('/search/{register}', action='search')
+
+ # /api/rest ver 1, 2 or none
+ with SubMapper(map, controller='ckanext.datagovau.controller:DGAApiController', path_prefix='/api{ver:/1|/2|}',
+ ver='/1', requirements=dict(register=register_list_str)
+ ) as m:
+
+ m.connect('/rest/{register}', action='list', conditions=GET)
+ m.connect('/rest/{register}', action='create', conditions=POST)
+ m.connect('/rest/{register}/{id}', action='show', conditions=GET)
+ m.connect('/rest/{register}/{id}', action='update', conditions=PUT)
+ m.connect('/rest/{register}/{id}', action='update', conditions=POST)
+ m.connect('/rest/{register}/{id}', action='delete', conditions=DELETE)
+
+ return map
def update_config(self, config):
# Add this plugin's templates dir to CKAN's extra_template_paths, so
--- /dev/null
+++ b/ckanext/datagovau/templates/base.html
@@ -1,1 +1,7 @@
+{% ckan_extends %}
+ {% block meta %}
+ {{ super() }}
+<meta name="google-site-verification" content="B8scmW2_LMm4IW_ogXK9lojznO57GBpYlWrFjfGz3X8" />
+ {% endblock %}
+
--- /dev/null
+++ b/ckanext/datagovau/templates/dataviewer/base.html
@@ -1,1 +1,11 @@
+{% ckan_extends %}
+{% block scripts %}
+ {{ super() }}
+ <script>
+ preload_resource.url = preload_resource.url.replace("https:","").replace("http:","");
+ preload_resource.original_url = preload_resource.original_url.replace("https:","").replace("http:","");
+ </script>
+{% endblock %}
+
+
--- /dev/null
+++ b/ckanext/datagovau/templates/dataviewer/snippets/data_preview.html
@@ -1,1 +1,32 @@
+<div class="module-content ckanext-datapreview">
+ {% if embed %}
+ {# images can be embedded directly #}
+ <img src="{{ resource_url }}"></img>
+ {% else %}
+ <div class="data-viewer-error js-hide">
+ <p class="text-error">
+ <i class="icon-info-sign"></i>
+ {{ _('This resource can not be previewed at the moment.') }}
+ <a href="#" data-toggle="collapse" data-target="#data-view-error">
+ {{ _('Click here for more information.') }}
+ </a>
+ </p>
+ <p id="data-view-error" class="collapse"></p>
+ <p>
+ <a href="{{ raw_resource_url }}" class="btn btn-large resource-url-analytics" target="_blank">
+ <i class="icon-large icon-download"></i>
+ {{ _('Download resource') }}
+ </a>
+ </p>
+ </div>
+ <iframe src="{{ resource_url }}" frameborder="0" width="100%" data-module="data-viewer">
+ <p>{{ _('Your browser does not support iframes.') }}</p>
+ </iframe>
+ {% endif %}
+</div>
+<div class="embedhint" style="padding-left: 15px;">
+<h4>Embed this visualisation in your own website...</h4>
+Copy the HTML in the box below and you can display this visualisation on your own website.<br/>
+<textarea style="width:97%"><iframe src="{{ resource_url }}" frameborder="0" width="100%" height="600px"></iframe></textarea>
+</div>
--- a/ckanext/datagovau/templates/package/read.html
+++ b/ckanext/datagovau/templates/package/read.html
@@ -1,7 +1,6 @@
{% ckan_extends %}
{% block primary_content_inner %}
{{ super() }}
-
<div class="module-content">
{{ h.disqus_comments() }}
</div>
@@ -128,3 +127,13 @@
{% endblock %}
+{% block secondary_content %}
+ {{ super() }}
+
+ {% set dataset_extent = h.get_pkg_dict_extra(c.pkg_dict, 'spatial', '') %}
+ {% if dataset_extent %}
+ {% snippet "spatial/snippets/dataset_map_sidebar.html", extent=dataset_extent %}
+ {% endif %}
+
+{% endblock %}
+
--- /dev/null
+++ b/ckanext/datagovau/templates/package/search.html
@@ -1,1 +1,7 @@
+{% ckan_extends %}
+{% block secondary_content %}
+{{ super() }}
+ {% snippet "spatial/snippets/spatial_query.html", default_extent="[[-11, 114], [-42, 154]]" %}
+{% endblock %}
+