varnish config dec 2013
varnish config dec 2013

file:a/README.rst -> file:b/README.rst
--- a/README.rst
+++ b/README.rst
@@ -4,9 +4,11 @@
 
 * A CKAN Extension "plugin" at ``ckanext/datagovau/plugin.py`` which, when
   loaded, overrides various settings in the core ``ini``-file to provide:
-  * A path to local customisations of the core templates
+  * A path to local customisations of the core templates to include AGLS/Dublin Core minimum metadata
   * A custom Package edit form that defaults to cc-by licence
   * A custom n3/rdf output format
+  * Replaces links with http/https protocol independent versions
+  * Provides HTML to users to embed data previews on their own website
 
 * A cut down licenses.json file
 

--- a/admin/default.vcl
+++ b/admin/default.vcl
@@ -8,45 +8,117 @@
     .host = "127.0.0.1";
     .port = "8080";
 }
+backend geoserver {
+    .host = "172.31.18.207";
+    .port = "8983";
+}
 
 sub vcl_fetch {
     set beresp.grace = 1h;
- 
-    if (beresp.http.content-type ~ "(text|application)") {
+    unset beresp.http.Server;
+    # These status codes should always pass through and never cache.
+  if ( beresp.status >= 500 ) {
+    set beresp.ttl = 0s;
+  }
+    if (beresp.http.content-type ~ "(text|javascript|json|xml|html)") {
         set beresp.do_gzip = true;
     }
+  # CKAN cache headers are used by Varnish cache, but should not be propagated to
+  # the Internet. Tell browsers and proxies not to cache. This means Varnish always
+  # gets the responsibility to server the right content at all times.
+  if (beresp.http.Cache-Control ~ "max-age") {
+    unset beresp.http.set-cookie;
+    set beresp.http.Cache-Control = "no-cache";
+  }
+
+  # Encourage assets to be cached by proxies and browsers
+  # JS and CSS may be gzipped depending on headers
+  # see https://developers.google.com/speed/docs/best-practices/caching
+  if (req.url ~ "\.(css|js)") {
+    set beresp.http.Vary = "Accept-Encoding";
+  }
+
+  # Encourage assets to be cached by proxies and browsers for 1 day
+  if (req.url ~ "\.(png|gif|jpg|swf|css|js)") {
+    unset beresp.http.set-cookie;
+    set beresp.http.Cache-Control = "public, max-age=86400";
+    set beresp.ttl = 1d;
+  }
+
+  # Encourage CKAN vendor assets (which are versioned) to be cached by
+  # by proxies and browsers for 1 year
+  if (req.url ~ "^/scripts/vendor/") {
+    unset beresp.http.set-cookie;
+    set beresp.http.Cache-Control = "public, max-age=31536000";
+    set beresp.ttl = 12m;
+  }
+}
+sub vcl_recv {
+    if (req.http.user-agent ~ "Ezooms" || req.http.user-agent ~ "Ahrefs") {
+	error 403;
+    } 
+if (req.url ~ "^/geoserver/") {
+        set req.backend = geoserver;
+    } else {
+        set req.backend = default;
+	#redirect secure traffic to https
+	if ( (req.http.Cookie ~ "auth_tkt" || req.http.Cookie ~ "ckan" || req.url ~ "user/(reset|login)") && req.http.X-Forwarded-Proto !~ "(?i)https") {
+		set req.http.x-Redir-Url = "https://data.gov.au" + req.url;
+		error 753 req.http.x-Redir-Url;
+	}
+	# remove locale links
+	if (req.url ~ "/((?!js)..|.._..|sr_Latn)/") {
+	        set req.http.x-Redir-Url = regsub(req.url, "/((?!js)..|.._..|sr_Latn)/", "/");
+		error 751 req.http.x-Redir-Url;
+	}
+	# rewrite broken resources
+	if (req.url ~ "leaflet") {
+	        set req.url = regsub(req.url, "fanstatic/ckanext-spatial/:version:2013-09-13T02:32:17.87/:bundle:js/vendor/leaflet/images", "js/vendor/leaflet/images");
+	}
+	# remove old hostnames
+	if (req.http.host ~ "data.australia.gov.au") {
+		set req.http.x-Redir-Url = "http://data.gov.au" + req.url;
+		error 751 req.http.x-Redir-Url;
+	}
+
+	if (req.url ~ "^/_tracking") {
+	// exclude web spiders from statistics
+	    	if (req.http.user-agent ~ "Googlebot" || req.http.user-agent ~ "baidu" || req.http.user-agent ~ "bing") {
+			error 200;
+	    	} else {
+			return (pass);
+   		}
+ 	}
  if (req.url ~ "\.(png|gif|jpg|jpeg|swf|css|js|woff|eot)$") {
-   unset beresp.http.set-cookie;
+   //Varnish to deliver content from cache even if the request othervise indicates that the request should be passed
+   return(lookup);
  }
 }
-sub vcl_recv {
-  if (req.url ~ "^/_tracking") {
-    return (pass);
-  }
- if (req.url ~ "\.(png|gif|jpg|jpeg|swf|css|js|woff|eot)$") {
-    return(lookup);
- }
-if (req.url ~ "/(..|.._..)/") {
-    set req.url = regsub(req.url, "/((?!js)..|.._..)/", "/");
-}
-  if (req.http.Cookie) {
-    set req.http.Cookie = regsuball(req.http.Cookie, "(^|; ) *__utm.=[^;]+;? *", "\1"); # removes all cookies named __utm? (utma, utmb...) - tracking thing
-
-    if (req.http.Cookie == "") {
-        remove req.http.Cookie;
-    }
-  }
+  // Remove has_js and Google Analytics cookies. Evan added sharethis cookies
+  set req.http.Cookie = regsuball(req.http.Cookie, "(^|;\s*)(__[a-z]+|has_js|cookie-agreed-en|_csoot|_csuid|_chartbeat2)=[^;]*", "");
+
+  // Remove a ";" prefix, if present.
+  set req.http.Cookie = regsub(req.http.Cookie, "^;\s*", "");
+  // Remove empty cookies.
+  if (req.http.Cookie ~ "^\s*$") {
+    unset req.http.Cookie;
+  }
+
+  remove req.http.X-Forwarded-For;
+  set req.http.X-Forwarded-For = req.http.X-Real-IP;
 } 
 sub vcl_hash {
      # http://serverfault.com/questions/112531/ignoring-get-parameters-in-varnish-vcl
-     set req.url = regsub(req.url, "(?:(.com|.au))/((?!js)..|.._..)/", "/");
+     set req.url = regsub(req.url, "(?:(.com|.au))/((?!js)..|.._..|sr_Latn)/", "/");
      hash_data(req.url);
      if (req.http.host) {
          hash_data(req.http.host);
      } else {
          hash_data(server.ip);
      }
-     return (hash);
+  if (req.http.Cookie) {
+    hash_data(req.http.Cookie);
+}
 }
 sub vcl_deliver {
     if (!resp.http.Vary) {
@@ -54,13 +126,29 @@
     } else if (resp.http.Vary !~ "(?i)Accept-Encoding") {
         set resp.http.Vary = resp.http.Vary + ",Accept-Encoding";
     }    
-    if (obj.hits > 0) {
-        set resp.http.X-Cache = "HIT";
-    } else {
-        set resp.http.X-Cache = "MISS";
-    }
+    remove resp.http.X-Varnish;
+    remove resp.http.Via;
+    remove resp.http.Age;
+    remove resp.http.X-Powered-By;
+if (req.url ~ "^/geoserver/") {
+  set resp.http.Access-Control-Allow-Origin = "*";
+  set resp.http.Access-Control-Allow-Methods = "GET, POST, PUT, DELETE";
+  set resp.http.Access-Control-Allow-Headers = "Origin, X-Requested-With, Content-Type, Accept";
+}
 }   
-
+sub vcl_error {
+    remove obj.http.Server;
+	if (obj.status == 751) {
+		set obj.http.Location = obj.response;
+		set obj.status = 301;
+		return (deliver);
+	}
+	if (obj.status == 753) {
+		set obj.http.Location = obj.response;
+		set obj.status = 301;
+		return (deliver);
+	}
+}
 # 
 # Below is a commented-out copy of the default VCL logic.  If you
 # redefine any of these subroutines, the built-in logic will be

--- a/ckanext/datagovau/plugin.py
+++ b/ckanext/datagovau/plugin.py
@@ -28,8 +28,12 @@
     created_datasets_list = user_dict['datasets']
     active_datasets_list = [x['data']['package'] for x in 
 				lib.helpers.get_action('user_activity_list',{'id':user_dict['id']}) if x['data'].get('package')]
-    return created_datasets_list + active_datasets_list
-
+    raw_list = created_datasets_list + active_datasets_list
+    filtered_dict = {}
+    for dataset in raw_list:
+	if dataset['id'] not in filtered_dict.keys():
+		filtered_dict[dataset['id']] = dataset
+    return filtered_dict.values()
 
 class DataGovAuPlugin(plugins.SingletonPlugin,
                                 tk.DefaultDatasetForm):

--- /dev/null
+++ b/ckanext/datagovau/templates/dataviewer/snippets/data_preview.html
@@ -1,1 +1,32 @@
+<div class="module-content ckanext-datapreview">
+  {% if embed %}
+    {# images can be embedded directly #}
+    <img src="{{ resource_url }}"></img>
+  {% else %}
+  <div class="data-viewer-error js-hide">
+    <p class="text-error">
+      <i class="icon-info-sign"></i>
+      {{ _('This resource can not be previewed at the moment.') }}
+      <a href="#" data-toggle="collapse" data-target="#data-view-error">
+        {{ _('Click here for more information.') }}
+      </a>
+    </p>
+    <p id="data-view-error" class="collapse"></p>
+    <p>
+      <a href="{{ raw_resource_url }}" class="btn btn-large resource-url-analytics" target="_blank">
+        <i class="icon-large icon-download"></i>
+        {{ _('Download resource') }}
+      </a>
+    </p>
+  </div>
+  <iframe src="{{ resource_url }}" frameborder="0" width="100%" data-module="data-viewer">
+    <p>{{ _('Your browser does not support iframes.') }}</p>
+  </iframe>
+  {% endif %}
+</div>
+<div class="embedhint" style="padding-left: 15px;">
+<h4>Embed this visualisation in your own website...</h4>
+Copy the HTML in the box below and you can display this visualisation on your own website.<br/>
+<textarea style="width:97%">&lt;iframe src="{{ resource_url }}" frameborder="0" width="100%" height="600px"&gt;&lt;/iframe&gt;</textarea>
+</div>