--- a/busui/owa/modules/base/classes/sanitize.php +++ b/busui/owa/modules/base/classes/sanitize.php @@ -1,1 +1,318 @@ - +<?php + +// +// Open Web Analytics - An Open Source Web Analytics Framework +// +// Copyright 2006-2010 Peter Adams. All rights reserved. +// +// Licensed under GPL v2.0 http://www.gnu.org/copyleft/gpl.html +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// $Id$ +// + +/** + * Sanitize Class + * + * Responsible sanitizing input and escaping output + * + * @author Peter Adams <peter@openwebanalytics.com> + * @copyright Copyright © 2006-2010 Peter Adams <peter@openwebanalytics.com> + * @license http://www.gnu.org/copyleft/gpl.html GPL v2.0 + * @category owa + * @package owa + * @version $Revision$ + * @since owa 1.3.0 + */ + +class owa_sanitize { + + /** + * Remove Non alpha or numeric characters + * + * @param string|array $input String or array contain input to sanitize. + * @param array $exceptions An array of additional characters that should be allowed. + * @return string|array $sanitzed A Santized string or array + */ + public static function removeNonAlphaNumeric($input, $exceptions = array()) { + + $allow = ''; + + // add exceptions to allowed char part of regex + if ( !empty( $exceptions ) ) { + foreach ( $exceptions as $value ) { + $allowed_chars .= "\\$value"; + } + } + + $regex = "/[^{$allowed_chars}a-zA-Z0-9]/"; + + // check to see if string is an array + if ( is_array ( $input ) ) { + $sanitized = array(); + foreach ( $input as $key => $item ) { + $sanitized[$key] = preg_replace( $regex, '', $item ); + } + // assume input is a singel string + } else { + $sanitized = preg_replace( $regex, '', $input ); + } + + return $sanitized; + } + + /** + * Escapes a string for use in display output + * + * @param string $string The string to be escaped + * @param string $encoding The charset to use in encoding. + * @param string $quotes The php constant for encodig quotations used by htmlentities + * @return string html encoded string + * @link http://www.php.net/manual/en/function.htmlentities.php + * @access public + */ + public static function escapeForDisplay($string, $encoding = 'UTF-8', $quotes = '') { + + if (!$quotes) { + //use mode to ocnvert both single and double quotes. + $quotes = ENT_QUOTES; + } + + return htmlentities($string, $quotes, $encoding); + } + + + /** + * Strip Whitespace + * + * @param string $str String to strip + * @return string whitespace sanitized input + * @access public + */ + public static function stripWhitespace( $input ) { + + $output = preg_replace( '/[\n\r\t]+/', '', $input ); + return preg_replace( '/\s{2,}/', ' ', $output ); + } + + /** + * Strip IMG html tags + * + * @param string $input String to sanitize + * @return string String with no img tags + * @access public + */ + public static function stripImages( $input ) { + + $output = preg_replace('/(<a[^>]*>)(<img[^>]+alt=")([^"]*)("[^>]*>)(<\/a>)/i', '$1$3$5<br />', $input); + $output = preg_replace('/(<img[^>]+alt=")([^"]*)("[^>]*>)/i', '$2<br />', $output); + $output = preg_replace('/<img[^>]*>/i', '', $output); + return $output; + } + + /** + * Strip Scripts and Stylesheets + * + * @param string $input String to sanitize + * @return string String with <script>, <style>, <link> elements removed. + * @access public + * @static + */ + public static function stripScriptsAndCss( $input ) { + + return preg_replace( + '/(<link[^>]+rel="[^"]*stylesheet"[^>]*>|<img[^>]*>|style="[^"]*")|<script[^>]*>.*?<\/script>|<style[^>]*>.*?<\/style>|<!--.*?-->/is', + '', + $input ); + } + + /** + * Strip whitespace, images, scripts and stylesheets + * + * @param string $input String to sanitize + * @return string sanitized string + * @access public + */ + public static function stripAllTags( $input = '' ) { + + //$output = owa_sanitize::stripWhitespace( $input ); + $output = owa_sanitize::stripScriptsAndCss( $input ); + $output = owa_sanitize::stripImages( $output ); + $output = owa_sanitize::stripHtml( $output ); + + return $output; + } + + /** + * Strips specified html tags + * + * @param string $input String to sanitize + * @param array $tags Tag to remove + * @return string sanitized String + * @access public + * @static + */ + public static function stripHtml( $input = '', $tags = array() ) { + + if ($tags) { + foreach ( $tags as $tag ) { + $output = preg_replace( '/<' . $tag . '\b[^>]*>/i', '', $input ); + $output = preg_replace( '/<\/' . $tag . '[^>]*>/i', '', $output ); + } + } else { + $output = strip_tags($input); + } + + return $output; + } + + public static function removeHiddenSpaces( $input = '' ) { + + return str_replace( chr( 0xCA ), '', str_replace( ' ', ' ', $input ) ); + } + + public static function escapeUnicode( $input = '' ) { + + return preg_replace( "/&#([0-9]+);/s", "&#\\1;", $input ); + } + + public static function escapeBackslash( $input = '' ) { + + return preg_replace( "/\\\(?!&#|\?#)/", "\\", $input ); + } + + public static function stirpCarriageReturns( $input = '' ) { + + return str_replace( "\r", "", $input ); + } + + public static function escapeDollarSigns( $input = '' ) { + + return str_replace( "\\\$", "$", $input ); + } + + public static function escapeOctets ( $input = '' ) { + + $match = array(); + $found = false; + while ( preg_match('/%[a-f0-9]{2}/i', $input, $match) ) { + $input = str_replace($match[0], '', $input); + $found = true; + } + + if ( $found ) { + // Strip out the whitespace that may now exist after removing the octets. + $filtered_input = trim( preg_replace( '/ +/', ' ', $input ) ); + } + } + + /** + * Sanitizes for safe input. Takes an array of options: + * + * - hidden_spaces - removes any non space whitespace characters + * - escape_html - Encode any html entities. Encode must be true for the `remove_html` to work. + * - dollar - Escape `$` with `\$` + * - carriage - Remove `\r` + * - unicode + * - backslash - + * - remove_html - Strip HTML with strip_tags. `encode` must be true for this option to work. + * + * @param mixed $data Data to sanitize + * @param array $options + * @return mixed Sanitized data + * @access public + * @static + */ + function cleanInput($input, $options = array()) { + + if (empty($input)) { + return; + } + + $options = array_merge( + array( + 'hidden_spaces' => true, + 'remove_html' => false, + 'encode' => true, + 'dollar' => true, + 'carriage' => true, + 'unicode' => true, + 'escape_html' => true, + 'backslash' => true), + $options); + + if (is_array($input)) { + + $output = array(); + foreach ($input as $k => $v) { + $output[$k] = owa_sanitize::cleanInput($v, $options); + } + return $output; + + } else { + + if ($options['hidden_spaces']) { + $output = owa_sanitize::removeHiddenSpaces($input); + } + + if ($options['remove_html']) { + $output = owa_sanitize::stripAllTags($output); + } + + if ($options['dollar']) { + $output = owa_sanitize::escapeDollarSigns($output); + } + + if ($options['carriage']) { + $output = owa_sanitize::stripCarriageReturns($output); + } + + if ($options['unicode']) { + $output = owa_sanitize::escapeUnicode($output); + } + + if ($options['escape_html']) { + $output = owa_sanitize::escapeForDisplay($output); + } + + if ($options['backslash']) { + $output = owa_sanitize::escapeBackslash($output); + } + + return $output; + } + } + + public static function cleanFilename( $str ) { + + $str = str_replace("http://", "", $str); + $str = str_replace("/", "", $str); + $str = str_replace("\\", "", $str); + $str = str_replace("../", "", $str); + $str = str_replace("..", "", $str); + $str = str_replace("?", "", $str); + $str = str_replace("%00", "", $str); + + if (strpos($str, '%00')) { + $str = ''; + } + + if (strpos($str, null)) { + $str = ''; + } + + return $str; + } + + public static function cleanUrl( $url ) { + + return; + } +} + +?>