From: root Date: Fri, 20 Dec 2013 00:32:03 +0000 Subject: varnish config dec 2013 X-Git-Url: https://maxious.lambdacomplex.org/git/?p=ckanext-datagovau.git&a=commitdiff&h=b18448b25ae578c68b7d643d9a6754153a7411b7 --- varnish config dec 2013 --- --- a/README.rst +++ b/README.rst @@ -4,9 +4,11 @@ * A CKAN Extension "plugin" at ``ckanext/datagovau/plugin.py`` which, when loaded, overrides various settings in the core ``ini``-file to provide: - * A path to local customisations of the core templates + * A path to local customisations of the core templates to include AGLS/Dublin Core minimum metadata * A custom Package edit form that defaults to cc-by licence * A custom n3/rdf output format + * Replaces links with http/https protocol independent versions + * Provides HTML to users to embed data previews on their own website * A cut down licenses.json file --- a/admin/default.vcl +++ b/admin/default.vcl @@ -8,45 +8,117 @@ .host = "127.0.0.1"; .port = "8080"; } +backend geoserver { + .host = "172.31.18.207"; + .port = "8983"; +} sub vcl_fetch { set beresp.grace = 1h; - - if (beresp.http.content-type ~ "(text|application)") { + unset beresp.http.Server; + # These status codes should always pass through and never cache. + if ( beresp.status >= 500 ) { + set beresp.ttl = 0s; + } + if (beresp.http.content-type ~ "(text|javascript|json|xml|html)") { set beresp.do_gzip = true; } + # CKAN cache headers are used by Varnish cache, but should not be propagated to + # the Internet. Tell browsers and proxies not to cache. This means Varnish always + # gets the responsibility to server the right content at all times. + if (beresp.http.Cache-Control ~ "max-age") { + unset beresp.http.set-cookie; + set beresp.http.Cache-Control = "no-cache"; + } + + # Encourage assets to be cached by proxies and browsers + # JS and CSS may be gzipped depending on headers + # see https://developers.google.com/speed/docs/best-practices/caching + if (req.url ~ "\.(css|js)") { + set beresp.http.Vary = "Accept-Encoding"; + } + + # Encourage assets to be cached by proxies and browsers for 1 day + if (req.url ~ "\.(png|gif|jpg|swf|css|js)") { + unset beresp.http.set-cookie; + set beresp.http.Cache-Control = "public, max-age=86400"; + set beresp.ttl = 1d; + } + + # Encourage CKAN vendor assets (which are versioned) to be cached by + # by proxies and browsers for 1 year + if (req.url ~ "^/scripts/vendor/") { + unset beresp.http.set-cookie; + set beresp.http.Cache-Control = "public, max-age=31536000"; + set beresp.ttl = 12m; + } +} +sub vcl_recv { + if (req.http.user-agent ~ "Ezooms" || req.http.user-agent ~ "Ahrefs") { + error 403; + } +if (req.url ~ "^/geoserver/") { + set req.backend = geoserver; + } else { + set req.backend = default; + #redirect secure traffic to https + if ( (req.http.Cookie ~ "auth_tkt" || req.http.Cookie ~ "ckan" || req.url ~ "user/(reset|login)") && req.http.X-Forwarded-Proto !~ "(?i)https") { + set req.http.x-Redir-Url = "https://data.gov.au" + req.url; + error 753 req.http.x-Redir-Url; + } + # remove locale links + if (req.url ~ "/((?!js)..|.._..|sr_Latn)/") { + set req.http.x-Redir-Url = regsub(req.url, "/((?!js)..|.._..|sr_Latn)/", "/"); + error 751 req.http.x-Redir-Url; + } + # rewrite broken resources + if (req.url ~ "leaflet") { + set req.url = regsub(req.url, "fanstatic/ckanext-spatial/:version:2013-09-13T02:32:17.87/:bundle:js/vendor/leaflet/images", "js/vendor/leaflet/images"); + } + # remove old hostnames + if (req.http.host ~ "data.australia.gov.au") { + set req.http.x-Redir-Url = "http://data.gov.au" + req.url; + error 751 req.http.x-Redir-Url; + } + + if (req.url ~ "^/_tracking") { + // exclude web spiders from statistics + if (req.http.user-agent ~ "Googlebot" || req.http.user-agent ~ "baidu" || req.http.user-agent ~ "bing") { + error 200; + } else { + return (pass); + } + } if (req.url ~ "\.(png|gif|jpg|jpeg|swf|css|js|woff|eot)$") { - unset beresp.http.set-cookie; + //Varnish to deliver content from cache even if the request othervise indicates that the request should be passed + return(lookup); } } -sub vcl_recv { - if (req.url ~ "^/_tracking") { - return (pass); - } - if (req.url ~ "\.(png|gif|jpg|jpeg|swf|css|js|woff|eot)$") { - return(lookup); - } -if (req.url ~ "/(..|.._..)/") { - set req.url = regsub(req.url, "/((?!js)..|.._..)/", "/"); -} - if (req.http.Cookie) { - set req.http.Cookie = regsuball(req.http.Cookie, "(^|; ) *__utm.=[^;]+;? *", "\1"); # removes all cookies named __utm? (utma, utmb...) - tracking thing - - if (req.http.Cookie == "") { - remove req.http.Cookie; - } - } + // Remove has_js and Google Analytics cookies. Evan added sharethis cookies + set req.http.Cookie = regsuball(req.http.Cookie, "(^|;\s*)(__[a-z]+|has_js|cookie-agreed-en|_csoot|_csuid|_chartbeat2)=[^;]*", ""); + + // Remove a ";" prefix, if present. + set req.http.Cookie = regsub(req.http.Cookie, "^;\s*", ""); + // Remove empty cookies. + if (req.http.Cookie ~ "^\s*$") { + unset req.http.Cookie; + } + + remove req.http.X-Forwarded-For; + set req.http.X-Forwarded-For = req.http.X-Real-IP; } sub vcl_hash { # http://serverfault.com/questions/112531/ignoring-get-parameters-in-varnish-vcl - set req.url = regsub(req.url, "(?:(.com|.au))/((?!js)..|.._..)/", "/"); + set req.url = regsub(req.url, "(?:(.com|.au))/((?!js)..|.._..|sr_Latn)/", "/"); hash_data(req.url); if (req.http.host) { hash_data(req.http.host); } else { hash_data(server.ip); } - return (hash); + if (req.http.Cookie) { + hash_data(req.http.Cookie); +} } sub vcl_deliver { if (!resp.http.Vary) { @@ -54,13 +126,29 @@ } else if (resp.http.Vary !~ "(?i)Accept-Encoding") { set resp.http.Vary = resp.http.Vary + ",Accept-Encoding"; } - if (obj.hits > 0) { - set resp.http.X-Cache = "HIT"; - } else { - set resp.http.X-Cache = "MISS"; - } + remove resp.http.X-Varnish; + remove resp.http.Via; + remove resp.http.Age; + remove resp.http.X-Powered-By; +if (req.url ~ "^/geoserver/") { + set resp.http.Access-Control-Allow-Origin = "*"; + set resp.http.Access-Control-Allow-Methods = "GET, POST, PUT, DELETE"; + set resp.http.Access-Control-Allow-Headers = "Origin, X-Requested-With, Content-Type, Accept"; +} } - +sub vcl_error { + remove obj.http.Server; + if (obj.status == 751) { + set obj.http.Location = obj.response; + set obj.status = 301; + return (deliver); + } + if (obj.status == 753) { + set obj.http.Location = obj.response; + set obj.status = 301; + return (deliver); + } +} # # Below is a commented-out copy of the default VCL logic. If you # redefine any of these subroutines, the built-in logic will be --- /dev/null +++ b/admin/reset.sh @@ -1,1 +1,17 @@ +paster --plugin=ckan db clean --config=development.ini +echo "drop extension postgis cascade;" | psql -d ckantest +paster --plugin=ckan db clean --config=development.ini +#to initiate for first time instead of load from dump +#paster --plugin=ckan db init --config=development.in +#paster --plugin=ckan user add maxious password=snmc email=maxious@gmail.com +#paster --plugin=ckan sysadmin add maxious +#paster --plugin=ckan db dump dump.db + +#paster --plugin=ckan db load --config=development.ini dump.db +paster --plugin=ckan db load --config=development.ini dump.harvest.db +echo "create extension postgis;" | psql -d ckantest +#sleep 2 +paster --plugin=ckan search-index rebuild --config=development.ini +#rm -r /tmp/pairtree_* + --- /dev/null +++ b/ckanext/datagovau/controller.py @@ -1,1 +1,84 @@ +import urllib +import json +from pprint import pprint +import logging +import ckan.logic as logic +import hashlib +import threading +from ckan.common import _, c, request, response +from pylons import config +from webob.multidict import UnicodeMultiDict +from paste.util.multidict import MultiDict +log = logging.getLogger(__name__) + +from ckan.controllers.api import ApiController + +class DGAApiController(ApiController): + + def _post_analytics(self,user,request_obj_type,request_function,request_id): + if (config.get('googleanalytics.id') != None): + data = urllib.urlencode({ + "v":1, + "tid":config.get('googleanalytics.id'), + "cid":hashlib.md5(user).hexdigest(), + "t":"event", + "dh":c.environ['HTTP_HOST'], + "dp":c.environ['PATH_INFO'], + "dr":c.environ.get('HTTP_REFERER',''), + "ec":"CKAN API Request", + "ea":request_obj_type+request_function, + "el":request_id, + }) + log.debug("Sending API Analytics Data: "+data) + # send analytics asynchronously + threading.Thread(target=urllib.urlopen,args=("http://www.google-analytics.com/collect", data)).start() + + + def action(self, logic_function, ver=None): + try: + function = logic.get_action(logic_function) + except Exception,e: + log.debug(e) + pass + try: + side_effect_free = getattr(function, 'side_effect_free', False) + request_data = self._get_request_data(try_url_params=side_effect_free) + if isinstance(request_data, dict): + id = request_data.get('id','') + if 'q' in request_data.keys(): + id = request_data['q'] + if 'query' in request_data.keys(): + id = request_data['query'] + self._post_analytics(c.user,logic_function,'', id) + except Exception,e: + print log.debug(e) + pass + + return ApiController.action(self,logic_function, ver) + + def list(self, ver=None, register=None, subregister=None, id=None): + self._post_analytics(c.user,register+("_"+str(subregister) if subregister else ""),"list",id) + return ApiController.list(self,ver, register, subregister, id) + def show(self, ver=None, register=None, subregister=None, id=None, id2=None): + self._post_analytics(c.user,register+("_"+str(subregister) if subregister else ""),"show",id) + return ApiController.show(self,ver, register, subregister, id,id2) + def update(self, ver=None, register=None, subregister=None, id=None, id2=None): + self._post_analytics(c.user,register+("_"+str(subregister) if subregister else ""),"update",id) + return ApiController.update(self,ver, register, subregister, id,id2) + def delete(self, ver=None, register=None, subregister=None, id=None, id2=None): + self._post_analytics(c.user,register+("_"+str(subregister) if subregister else ""),"delete",id) + return ApiController.delete(self,ver, register, subregister, id,id2) + def search(self, ver=None, register=None): + id = None + try: + params = MultiDict(self._get_search_params(request.params)) + if 'q' in params.keys(): + id = params['q'] + if 'query' in params.keys(): + id = params['query'] + except ValueError, e: + print str(e) + pass + self._post_analytics(c.user,register,"search",id) + --- a/ckanext/datagovau/plugin.py +++ b/ckanext/datagovau/plugin.py @@ -6,6 +6,10 @@ import ckan.plugins.toolkit as tk import ckan.model as model from pylons import config +from routes.mapper import SubMapper, Mapper as _Mapper + +from sqlalchemy import orm +import ckan.model #parse the activity feed for last active non-system user def get_last_active_user(id): @@ -24,7 +28,12 @@ created_datasets_list = user_dict['datasets'] active_datasets_list = [x['data']['package'] for x in lib.helpers.get_action('user_activity_list',{'id':user_dict['id']}) if x['data'].get('package')] - return created_datasets_list + active_datasets_list + raw_list = created_datasets_list + active_datasets_list + filtered_dict = {} + for dataset in raw_list: + if dataset['id'] not in filtered_dict.keys(): + filtered_dict[dataset['id']] = dataset + return filtered_dict.values() class DataGovAuPlugin(plugins.SingletonPlugin, tk.DefaultDatasetForm): @@ -36,6 +45,55 @@ plugins.implements(plugins.IConfigurer, inherit=False) plugins.implements(plugins.IDatasetForm, inherit=False) plugins.implements(plugins.ITemplateHelpers, inherit=False) + plugins.implements(plugins.IRoutes, inherit=True) + + def before_map(self, map): + + # Helpers to reduce code clutter + GET = dict(method=['GET']) + PUT = dict(method=['PUT']) + POST = dict(method=['POST']) + DELETE = dict(method=['DELETE']) + GET_POST = dict(method=['GET', 'POST']) + # intercept API calls that we want to capture analytics on + register_list = [ + 'package', + 'dataset', + 'resource', + 'tag', + 'group', + 'related', + 'revision', + 'licenses', + 'rating', + 'user', + 'activity' + ] + register_list_str = '|'.join(register_list) + # /api ver 3 or none + with SubMapper(map, controller='ckanext.datagovau.controller:DGAApiController', path_prefix='/api{ver:/3|}', + ver='/3') as m: + m.connect('/action/{logic_function}', action='action', + conditions=GET_POST) + + # /api ver 1, 2, 3 or none + with SubMapper(map, controller='ckanext.datagovau.controller:DGAApiController', path_prefix='/api{ver:/1|/2|/3|}', + ver='/1') as m: + m.connect('/search/{register}', action='search') + + # /api/rest ver 1, 2 or none + with SubMapper(map, controller='ckanext.datagovau.controller:DGAApiController', path_prefix='/api{ver:/1|/2|}', + ver='/1', requirements=dict(register=register_list_str) + ) as m: + + m.connect('/rest/{register}', action='list', conditions=GET) + m.connect('/rest/{register}', action='create', conditions=POST) + m.connect('/rest/{register}/{id}', action='show', conditions=GET) + m.connect('/rest/{register}/{id}', action='update', conditions=PUT) + m.connect('/rest/{register}/{id}', action='update', conditions=POST) + m.connect('/rest/{register}/{id}', action='delete', conditions=DELETE) + + return map def update_config(self, config): # Add this plugin's templates dir to CKAN's extra_template_paths, so --- /dev/null +++ b/ckanext/datagovau/templates/base.html @@ -1,1 +1,7 @@ +{% ckan_extends %} + {% block meta %} + {{ super() }} + + {% endblock %} + --- /dev/null +++ b/ckanext/datagovau/templates/dataviewer/base.html @@ -1,1 +1,11 @@ +{% ckan_extends %} +{% block scripts %} + {{ super() }} + +{% endblock %} + + --- /dev/null +++ b/ckanext/datagovau/templates/dataviewer/snippets/data_preview.html @@ -1,1 +1,32 @@ +
+ {% if embed %} + {# images can be embedded directly #} + + {% else %} +
+

+ + {{ _('This resource can not be previewed at the moment.') }} + + {{ _('Click here for more information.') }} + +

+

+

+ + + {{ _('Download resource') }} + +

+
+ + {% endif %} +
+
+

Embed this visualisation in your own website...

+Copy the HTML in the box below and you can display this visualisation on your own website.
+ +
--- a/ckanext/datagovau/templates/package/read.html +++ b/ckanext/datagovau/templates/package/read.html @@ -1,7 +1,6 @@ {% ckan_extends %} {% block primary_content_inner %} {{ super() }} -
{{ h.disqus_comments() }}
@@ -128,3 +127,13 @@ {% endblock %} +{% block secondary_content %} + {{ super() }} + + {% set dataset_extent = h.get_pkg_dict_extra(c.pkg_dict, 'spatial', '') %} + {% if dataset_extent %} + {% snippet "spatial/snippets/dataset_map_sidebar.html", extent=dataset_extent %} + {% endif %} + +{% endblock %} + --- /dev/null +++ b/ckanext/datagovau/templates/package/search.html @@ -1,1 +1,7 @@ +{% ckan_extends %} +{% block secondary_content %} +{{ super() }} + {% snippet "spatial/snippets/spatial_query.html", default_extent="[[-11, 114], [-42, 154]]" %} +{% endblock %} +