<?xml version="1.0" encoding="UTF-8"?> | <?xml version="1.0" encoding="UTF-8"?> |
<project version="4"> | <project version="4"> |
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_6" assert-keyword="true" jdk-15="true"> | <component name="ProjectRootManager" version="2" languageLevel="JDK_1_6" assert-keyword="true" jdk-15="true"> |
<output url="file://$PROJECT_DIR$/out" /> | <output url="file://$PROJECT_DIR$/out" /> |
</component> | </component> |
<component name="RegexUtilComponent" text="1900-01-01 2007/08/13 1900.01.01 1900 01 01 1900-01.01 1900 13 01 1900 02 31" flags="0" regex="(19|20)\d\d([- /.])(0[1-9]|1[012])\2(0[1-9]|[12][0-9]|3[01])" mode="0" /> | |
</project> | </project> |
This CKAN Extension customises a CKAN instance for the hosting of data.gov.au. | This CKAN Extension customises a CKAN instance for the hosting of data.gov.au. |
It comprises: | It comprises: |
* A CKAN Extension "plugin" at ``ckanext/datagovau/plugin.py`` which, when | * A custom Package edit form that defaults to cc-by licence |
loaded, overrides various settings in the core ``ini``-file to provide: | * Replaces links with http/https protocol independent versions |
* A path to local customisations of the core templates | * Provides HTML to users to embed data previews on their own website |
* A custom Package edit form that defaults to cc-by licence | * A cut down licenses.json file |
* A custom n3/rdf output format | |
* A cut down licenses.json file | This extension is complemented by ckanext-agls for AGLS metadata, ckanext-googleanalytics for Google Analytics tracking of API usage and ckanext-dga-stats for the customised site statistics page. |
Installation | Installation |
============ | ============ |
To install this package, from your CKAN virtualenv, run the following from your CKAN base folder (e.g. ``pyenv/``):: | To install this package, from your CKAN virtualenv, run the following from your CKAN base folder (e.g. ``pyenv/``):: |
pip install -e git+https://github.com/okfn/ckanext-datagovau#egg=ckanext-datagovau | pip install -e git+https://github.com/okfn/ckanext-datagovau#egg=ckanext-datagovau |
Then activate it by setting ``ckan.plugins = datagovau`` in your main ``ini``-file. | Then activate it by setting ``ckan.plugins = datagovau`` in your main ``ini``-file. |
To add the cut down licenses.json set ``licenses_group_url = http://%(ckan.site_url)/licenses.json`` | To add the cut down licenses.json set ``licenses_group_url = http://%(ckan.site_url)/licenses.json`` |
or copy ``ckanext/datagovau/theme/public/licenses.json`` to the same folder as your CKAN config ini file | or copy ``ckanext/datagovau/theme/public/licenses.json`` to the same folder as your CKAN config ini file |
and set ``licenses_group_url = file://%(here)s/licenses.json`` | and set ``licenses_group_url = file://%(here)s/licenses.json`` |
<VirtualHost 127.0.0.1:8080> | <VirtualHost 127.0.0.1:8080> |
ServerAdmin webmaster@localhost | ServerAdmin webmaster@localhost |
ServerName data.gov.au | ServerName data.gov.au |
ServerAlias www.data.gov.au, opendata.linkdigital.com.au | ServerAlias www.data.gov.au, opendata.linkdigital.com.au |
#allow access to local static content eg. logos | #allow access to local static content eg. logos |
Alias /index.html /var/www/index.html | Alias /index.html /var/www/index.html |
Alias /logos /var/www/logos | Alias /logos /var/www/logos |
Alias /tools /var/www/tools.shtml | Alias /tools /var/www/tools.shtml |
Alias /opendata /var/www/opendata.shtml | Alias /opendata /var/www/opendata.shtml |
Alias /opengov /var/www/opengov.shtml | Alias /opengov /var/www/opengov.shtml |
DocumentRoot /var/www | DocumentRoot /var/www |
<IfModule mod_expires.c> | <IfModule mod_expires.c> |
<FilesMatch "\.(jpe?g|png|gif|js|css)$"> | <FilesMatch "\.(jpe?g|png|gif|js|css)$"> |
ExpiresActive On | ExpiresActive On |
ExpiresDefault "access plus 1 week" | ExpiresDefault "access plus 1 week" |
</FilesMatch> | </FilesMatch> |
</IfModule> | </IfModule> |
AddType application/octet-stream .woff | |
<FilesMatch "\.(eot|otf|woff|ttf)$"> | |
SetEnvIf Origin "^http(s)?://(.+\.)?(data.gov.au)$" origin_is=$0 | |
Header set Access-Control-Allow-Origin %{origin_is}e env=origin_is | |
</FilesMatch> | |
<Directory /> | <Directory /> |
Options FollowSymLinks | Options FollowSymLinks |
AllowOverride None | AllowOverride None |
</Directory> | </Directory> |
<Directory /var/www/> | <Directory /var/www/> |
Options -Indexes -FollowSymLinks -MultiViews +Includes | Options -Indexes -FollowSymLinks -MultiViews +Includes |
AllowOverride None | AllowOverride None |
Order allow,deny | Order allow,deny |
allow from all | allow from all |
AddType text/html .shtml | AddType text/html .shtml |
AddOutputFilter INCLUDES .shtml | AddOutputFilter INCLUDES .shtml |
</Directory> | </Directory> |
ErrorLog ${APACHE_LOG_DIR}/error.log | ErrorLog ${APACHE_LOG_DIR}/error.log |
# Possible values include: debug, info, notice, warn, error, crit, alert, emerg. | # Possible values include: debug, info, notice, warn, error, crit, alert, emerg. |
LogLevel warn | LogLevel warn |
CustomLog ${APACHE_LOG_DIR}/access.log varnishcombined | CustomLog ${APACHE_LOG_DIR}/access.log varnishcombined |
RewriteEngine On | RewriteEngine On |
RewriteMap lc int:tolower | RewriteMap lc int:tolower |
# RewriteRule ^/apps/antenna-mate/?$ http://antennamate.com/ [PT] | # RewriteRule ^/apps/antenna-mate/?$ http://antennamate.com/ [PT] |
# RewriteRule ^/apps/postcode-finder/?$ http://www.aus-emaps.com/postcode_finder.php [PT] | # RewriteRule ^/apps/postcode-finder/?$ http://www.aus-emaps.com/postcode_finder.php [PT] |
# RewriteRule ^/apps/the-australian-cost-of-living-heatmap/?$ http://www.creditcardcompare.com.au/tools/cost-of-living-heatmap/ [PT] | # RewriteRule ^/apps/the-australian-cost-of-living-heatmap/?$ http://www.creditcardcompare.com.au/tools/cost-of-living-heatmap/ [PT] |
RewriteRule ^/catalogues/?$ /organization [PT] | RewriteRule ^/catalogues/?$ /organization [PT] |
RewriteRule ^/dataset/“cycling-in-new-south-wales-what-the-data-tells-us”-and-related-data/? /dataset/2809cycling-in-new-south-wales-what-the-data-tells-us2809-and-related-data [PT] | RewriteRule ^/dataset/“cycling-in-new-south-wales-what-the-data-tells-us”-and-related-data/? /dataset/2809cycling-in-new-south-wales-what-the-data-tells-us2809-and-related-data [PT] |
RewriteRule ^/dataset/apvma-pubcris-dataset-for-registered-agricultural-and-veterinary-chemical-products-and-approved-actives/? /dataset/apvma-pubcris-dataset-for-registered-agricultural-and-veterinary-chemical-products-and-approved-acti [PT] | RewriteRule ^/dataset/apvma-pubcris-dataset-for-registered-agricultural-and-veterinary-chemical-products-and-approved-actives/? /dataset/apvma-pubcris-dataset-for-registered-agricultural-and-veterinary-chemical-products-and-approved-acti [PT] |
RewriteRule ^/dataset_category/business/?$ /group/business [PT] | RewriteRule ^/dataset_category/business/?$ /group/business [PT] |
RewriteRule ^/dataset_category/community/?$ /group/community [PT] | RewriteRule ^/dataset_category/community/?$ /group/community [PT] |
RewriteRule ^/dataset_category/geography/?$ /group/geography [PT] | RewriteRule ^/dataset_category/geography/?$ /group/geography [PT] |
RewriteRule ^/dataset_category/government/?$ /group/government [PT] | RewriteRule ^/dataset_category/government/?$ /group/government [PT] |
RewriteRule ^/dataset_category/health/?$ /group/health [PT] | RewriteRule ^/dataset_category/health/?$ /group/health [PT] |
RewriteRule ^/dataset_category/?$ /group [PT] | RewriteRule ^/dataset_category/?$ /group [PT] |
RewriteRule ^/feed/?$ /feeds/dataset.atom [PT] | RewriteRule ^/feed/?$ /feeds/dataset.atom [PT] |
RewriteRule ^/jurisdiction/australian-capital-territory/?$ /dataset?q=act [PT] | RewriteRule ^/jurisdiction/australian-capital-territory/?$ /dataset?q=act [PT] |
RewriteRule ^/jurisdiction/new-south-wales/?$ /dataset?q=new+south+wales [PT] | RewriteRule ^/jurisdiction/new-south-wales/?$ /dataset?q=new+south+wales [PT] |
RewriteRule ^/jurisdiction/queensland/?$ /dataset?q=queensland [PT] | RewriteRule ^/jurisdiction/queensland/?$ /dataset?q=queensland [PT] |
RewriteRule ^/jurisdiction/south-australia/?$ /dataset?q=south+australia [PT] | RewriteRule ^/jurisdiction/south-australia/?$ /dataset?q=south+australia [PT] |
RewriteRule ^/jurisdiction/victoria/?$ /dataset?q=victoria [PT] | RewriteRule ^/jurisdiction/victoria/?$ /dataset?q=victoria [PT] |
RewriteRule ^/jurisdiction/western-australia/?$ /dataset?q=western+australia [PT] | RewriteRule ^/jurisdiction/western-australia/?$ /dataset?q=western+australia [PT] |
RewriteRule ^/jurisdiction/?$ /dataset [PT] | RewriteRule ^/jurisdiction/?$ /dataset [PT] |
# RewriteCond %{QUERY_STRING} ^category=([a-z]+) | # RewriteCond %{QUERY_STRING} ^category=([a-z]+) |
# RewriteRule ^/data/?$ /group/${lc:%1}? [PT] | # RewriteRule ^/data/?$ /group/${lc:%1}? [PT] |
# RewriteCond %{QUERY_STRING} ^format=Shapefile | # RewriteCond %{QUERY_STRING} ^format=Shapefile |
# RewriteRule ^/data/?$ /dataset?res_format=shp [PT] | # RewriteRule ^/data/?$ /dataset?res_format=shp [PT] |
RewriteRule ^/data/?$ /dataset? [PT] | RewriteRule ^/data/?$ /dataset? [PT] |
RewriteCond %{QUERY_STRING} ^s=(.*) | RewriteCond %{QUERY_STRING} ^s=(.*) |
RewriteRule ^/ /dataset?q=%1 [PT] | RewriteRule ^/ /dataset?q=%1 [PT] |
FilterDeclare OPENDATA | FilterDeclare OPENDATA |
FilterProvider OPENDATA SUBSTITUTE resp=Content-Type $text/ | FilterProvider OPENDATA SUBSTITUTE resp=Content-Type $text/ |
FilterProvider OPENDATA SUBSTITUTE resp=Content-Type $/javascript | FilterProvider OPENDATA SUBSTITUTE resp=Content-Type $/javascript |
FilterProvider OPENDATA SUBSTITUTE resp=Content-Type $/json | FilterProvider OPENDATA SUBSTITUTE resp=Content-Type $/json |
FilterProvider OPENDATA SUBSTITUTE resp=Content-Type $/xml | FilterProvider OPENDATA SUBSTITUTE resp=Content-Type $/xml |
FilterChain OPENDATA | FilterChain OPENDATA |
Substitute "s#//opendata.linkdigital.com.au#//data.gov.au#" | Substitute "s#//opendata.linkdigital.com.au#//data.gov.au#" |
WSGIScriptAlias / /var/lib/ckan/dga/pyenv/bin/dga.py | WSGIScriptAlias / /var/lib/ckan/dga/pyenv/bin/dga.py |
WSGIPassAuthorization On | WSGIPassAuthorization On |
ErrorLog /var/log/apache2/ckandga.error.log | ErrorLog /var/log/apache2/ckandga.error.log |
CustomLog /var/log/apache2/ckandga.custom.log combined | CustomLog /var/log/apache2/ckandga.custom.log combined |
</VirtualHost> | </VirtualHost> |
# This is a basic VCL configuration file for varnish. See the vcl(7) | # This is a basic VCL configuration file for varnish. See the vcl(7) |
# man page for details on VCL syntax and semantics. | # man page for details on VCL syntax and semantics. |
# | # |
# Default backend definition. Set this to point to your content | # Default backend definition. Set this to point to your content |
# server. | # server. |
# | # |
backend default { | backend default { |
.host = "127.0.0.1"; | .host = "127.0.0.1"; |
.port = "8080"; | .port = "8080"; |
} | } |
backend geoserver { | |
.host = "172.31.18.207"; | |
.port = "8983"; | |
} | |
sub vcl_fetch { | sub vcl_fetch { |
set beresp.grace = 1h; | set beresp.grace = 1h; |
unset beresp.http.Server; | |
if (beresp.http.content-type ~ "(text|application)") { | # These status codes should always pass through and never cache. |
if ( beresp.status >= 500 ) { | |
set beresp.ttl = 0s; | |
} | |
if (beresp.http.content-type ~ "(text|javascript|json|xml|html)") { | |
set beresp.do_gzip = true; | set beresp.do_gzip = true; |
} | } |
# CKAN cache headers are used by Varnish cache, but should not be propagated to | |
# the Internet. Tell browsers and proxies not to cache. This means Varnish always | |
# gets the responsibility to server the right content at all times. | |
if (beresp.http.Cache-Control ~ "max-age") { | |
unset beresp.http.set-cookie; | |
set beresp.http.Cache-Control = "no-cache"; | |
} | |
# Encourage assets to be cached by proxies and browsers | |
# JS and CSS may be gzipped depending on headers | |
# see https://developers.google.com/speed/docs/best-practices/caching | |
if (req.url ~ "\.(css|js)") { | |
set beresp.http.Vary = "Accept-Encoding"; | |
} | |
# Encourage assets to be cached by proxies and browsers for 1 day | |
if (req.url ~ "\.(png|gif|jpg|swf|css|js)") { | |
unset beresp.http.set-cookie; | |
set beresp.http.Cache-Control = "public, max-age=86400"; | |
set beresp.ttl = 1d; | |
} | |
# Encourage CKAN vendor assets (which are versioned) to be cached by | |
# by proxies and browsers for 1 year | |
if (req.url ~ "^/scripts/vendor/") { | |
unset beresp.http.set-cookie; | |
set beresp.http.Cache-Control = "public, max-age=31536000"; | |
set beresp.ttl = 12m; | |
} | |
} | |
sub vcl_recv { | |
if (req.http.user-agent ~ "Ezooms" || req.http.user-agent ~ "Ahrefs") { | |
error 403; | |
} | |
if (req.url ~ "^/geoserver/") { | |
set req.backend = geoserver; | |
} else { | |
set req.backend = default; | |
#redirect secure traffic to https | |
if ( (req.http.Cookie ~ "auth_tkt" || req.http.Cookie ~ "ckan" || req.url ~ "user/(reset|login)") && req.http.X-Forwarded-Proto !~ "(?i)https") { | |
set req.http.x-Redir-Url = "https://data.gov.au" + req.url; | |
error 753 req.http.x-Redir-Url; | |
} | |
# remove locale links | |
if (req.url ~ "/((?!js)..|.._..|sr_Latn)/") { | |
set req.http.x-Redir-Url = regsub(req.url, "/((?!js)..|.._..|sr_Latn)/", "/"); | |
error 751 req.http.x-Redir-Url; | |
} | |
# rewrite broken resources | |
if (req.url ~ "leaflet") { | |
set req.url = regsub(req.url, "fanstatic/ckanext-spatial/:version:2013-09-13T02:32:17.87/:bundle:js/vendor/leaflet/images", "js/vendor/leaflet/images"); | |
} | |
# remove old hostnames | |
if (req.http.host ~ "data.australia.gov.au") { | |
set req.http.x-Redir-Url = "http://data.gov.au" + req.url; | |
error 751 req.http.x-Redir-Url; | |
} | |
if (req.url ~ "^/_tracking") { | |
// exclude web spiders from statistics | |
if (req.http.user-agent ~ "Googlebot" || req.http.user-agent ~ "baidu" || req.http.user-agent ~ "bing") { | |
error 200; | |
} else { | |
return (pass); | |
} | |
} | |
if (req.url ~ "\.(png|gif|jpg|jpeg|swf|css|js|woff|eot)$") { | if (req.url ~ "\.(png|gif|jpg|jpeg|swf|css|js|woff|eot)$") { |
unset beresp.http.set-cookie; | //Varnish to deliver content from cache even if the request othervise indicates that the request should be passed |
return(lookup); | |
} | } |
} | } |
sub vcl_recv { | // Remove has_js and Google Analytics cookies. Evan added sharethis cookies |
if (req.url ~ "^/_tracking") { | set req.http.Cookie = regsuball(req.http.Cookie, "(^|;\s*)(__[a-z]+|has_js|cookie-agreed-en|_csoot|_csuid|_chartbeat2)=[^;]*", ""); |
return (pass); | |
} | // Remove a ";" prefix, if present. |
if (req.url ~ "\.(png|gif|jpg|jpeg|swf|css|js|woff|eot)$") { | set req.http.Cookie = regsub(req.http.Cookie, "^;\s*", ""); |
return(lookup); | // Remove empty cookies. |
} | if (req.http.Cookie ~ "^\s*$") { |
unset req.http.Cookie; | |
} | |
remove req.http.X-Forwarded-For; | |
set req.http.X-Forwarded-For = req.http.X-Real-IP; | |
} | |
sub vcl_hash { | |
# http://serverfault.com/questions/112531/ignoring-get-parameters-in-varnish-vcl | |
set req.url = regsub(req.url, "(?:(.com|.au))/((?!js)..|.._..|sr_Latn)/", "/"); | |
hash_data(req.url); | |
if (req.http.host) { | |
hash_data(req.http.host); | |
} else { | |
hash_data(server.ip); | |
} | |
if (req.http.Cookie) { | if (req.http.Cookie) { |
set req.http.Cookie = regsuball(req.http.Cookie, "(^|; ) *__utm.=[^;]+;? *", "\1"); # removes all cookies named __utm? (utma, utmb...) - tracking thing | hash_data(req.http.Cookie); |
} | |
if (req.http.Cookie == "") { | } |
remove req.http.Cookie; | |
} | |
} | |
} | |
sub vcl_deliver { | sub vcl_deliver { |
if (!resp.http.Vary) { | if (!resp.http.Vary) { |
set resp.http.Vary = "Accept-Encoding"; | set resp.http.Vary = "Accept-Encoding"; |
} else if (resp.http.Vary !~ "(?i)Accept-Encoding") { | } else if (resp.http.Vary !~ "(?i)Accept-Encoding") { |
set resp.http.Vary = resp.http.Vary + ",Accept-Encoding"; | set resp.http.Vary = resp.http.Vary + ",Accept-Encoding"; |
} | } |
if (obj.hits > 0) { | remove resp.http.X-Varnish; |
set resp.http.X-Cache = "HIT"; | remove resp.http.Via; |
} else { | remove resp.http.Age; |
set resp.http.X-Cache = "MISS"; | remove resp.http.X-Powered-By; |
} | if (req.url ~ "^/geoserver/") { |
set resp.http.Access-Control-Allow-Origin = "*"; | |
set resp.http.Access-Control-Allow-Methods = "GET, POST, PUT, DELETE"; | |
set resp.http.Access-Control-Allow-Headers = "Origin, X-Requested-With, Content-Type, Accept"; | |
} | |
} | } |
sub vcl_error { | |
remove obj.http.Server; | |
if (obj.status == 751) { | |
set obj.http.Location = obj.response; | |
set obj.status = 301; | |
return (deliver); | |
} | |
if (obj.status == 753) { | |
set obj.http.Location = obj.response; | |
set obj.status = 301; | |
return (deliver); | |
} | |
} | |
# | # |
# Below is a commented-out copy of the default VCL logic. If you | # Below is a commented-out copy of the default VCL logic. If you |
# redefine any of these subroutines, the built-in logic will be | # redefine any of these subroutines, the built-in logic will be |
# appended to your code. | # appended to your code. |
# sub vcl_recv { | # sub vcl_recv { |
# if (req.restarts == 0) { | # if (req.restarts == 0) { |
# if (req.http.x-forwarded-for) { | # if (req.http.x-forwarded-for) { |
# set req.http.X-Forwarded-For = | # set req.http.X-Forwarded-For = |
# req.http.X-Forwarded-For + ", " + client.ip; | # req.http.X-Forwarded |