TYPO3 CMS  TYPO3_7-6
RemoveXSS.php
Go to the documentation of this file.
1 <?php
19 class RemoveXSS
20 {
30  public static function process($value, $replaceString = '<x>')
31  {
32  // Don't use empty $replaceString because then no XSS-remove will be done
33  if ($replaceString == '') {
34  $replaceString = '<x>';
35  }
36  // Remove all non-printable characters. CR(0a) and LF(0b) and TAB(9) are allowed.
37  // This prevents some character re-spacing such as <java\0script>
38  // Note that you have to handle splits with \n, \r, and \t later since they *are* allowed in some inputs
39  $value = preg_replace('/([\x00-\x08]|[\x0b-\x0c]|[\x0e-\x19])/', '', $value);
40 
41  // Straight replacements, the user should never need these since they're normal characters.
42  // This prevents like <IMG SRC=&#X40&#X61&#X76&#X61&#X73&#X63&#X72&#X69&#X70&#X74&#X3A&#X61&#X6C&#X65&#X72&#X74&#X28&#X27&#X58&#X53&#X53&#X27&#X29>
43  $searchHexEncodings = '/&#[xX]0{0,8}(21|22|23|24|25|26|27|28|29|2a|2b|2d|2f|30|31|32|33|34|35|36|37|38|39|3a|3b|3d|3f|40|41|42|43|44|45|46|47|48|49|4a|4b|4c|4d|4e|4f|50|51|52|53|54|55|56|57|58|59|5a|5b|5c|5d|5e|5f|60|61|62|63|64|65|66|67|68|69|6a|6b|6c|6d|6e|6f|70|71|72|73|74|75|76|77|78|79|7a|7b|7c|7d|7e);?/i';
44  $searchUnicodeEncodings = '/&#0{0,8}(33|34|35|36|37|38|39|40|41|42|43|45|47|48|49|50|51|52|53|54|55|56|57|58|59|61|63|64|65|66|67|68|69|70|71|72|73|74|75|76|77|78|79|80|81|82|83|84|85|86|87|88|89|90|91|92|93|94|95|96|97|98|99|100|101|102|103|104|105|106|107|108|109|110|111|112|113|114|115|116|117|118|119|120|121|122|123|124|125|126);?/i';
45  while (preg_match($searchHexEncodings, $value) || preg_match($searchUnicodeEncodings, $value)) {
46  $value = preg_replace_callback(
47  $searchHexEncodings,
48  function ($matches) {
49  return chr(hexdec($matches[1]));
50  },
51  $value
52  );
53  $value = preg_replace_callback(
54  $searchUnicodeEncodings,
55  function ($matches) {
56  return chr($matches[1]);
57  },
58  $value
59  );
60  }
61 
62  // Now the only remaining whitespace attacks are \t, \n, and \r
63  $allKeywords = ['javascript', 'vbscript', 'expression', 'applet', 'meta', 'xml', 'blink', 'link', 'style', 'script', 'embed',
64  'object', 'iframe', 'frame', 'frameset', 'ilayer', 'layer', 'bgsound', 'title', 'base', 'video', 'audio', 'track',
65  'canvas', 'onabort', 'onactivate', 'onafterprint', 'onafterupdate', 'onbeforeactivate', 'onbeforecopy', 'onbeforecut',
66  'onbeforedeactivate', 'onbeforeeditfocus', 'onbeforepaste', 'onbeforeprint', 'onbeforeunload', 'onbeforeupdate',
67  'onblur', 'onbounce', 'oncanplay', 'oncanplaythrough', 'oncellchange', 'onchange', 'onclick', 'oncontextmenu',
68  'oncontrolselect', 'oncopy', 'oncuechange', 'oncut', 'ondataavailable', 'ondatasetchanged', 'ondatasetcomplete',
69  'ondblclick', 'ondeactivate', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart',
70  'ondrop', 'ondurationchange', 'onemptied', 'onended', 'onerror', 'onerrorupdate', 'onfilterchange', 'onfinish',
71  'onfocus', 'onfocusin', 'onfocusout', 'onhashchange', 'onhelp', 'oninput', 'oninvalid', 'onkeydown', 'onkeypress',
72  'onkeyup', 'onlayoutcomplete', 'onload', 'onloadeddata', 'onloadedmetadata', 'onloadstart', 'onlosecapture',
73  'onmessage', 'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup',
74  'onmousewheel', 'onmove', 'onmoveend', 'onmovestart', 'onoffline', 'ononline', 'onpagehide', 'onpageshow', 'onpaste',
75  'onpause', 'onplay', 'onplaying', 'onpopstate', 'onprogress', 'onpropertychange', 'onratechange', 'onreadystatechange',
76  'onreset', 'onresize', 'onresizeend', 'onresizestart', 'onrowenter', 'onrowexit', 'onrowsdelete', 'onrowsinserted',
77  'onscroll', 'onseeked', 'onseeking', 'onselect', 'onselectionchange', 'onselectstart', 'onshow', 'onstalled', 'onstart',
78  'onstop', 'onstorage', 'onsubmit', 'onsuspend', 'ontimeupdate', 'onunload', 'onvolumechange', 'onwaiting'];
79  $tagKeywords = ['applet', 'meta', 'xml', 'blink', 'link', 'style', 'script', 'embed', 'object', 'iframe', 'frame',
80  'frameset', 'ilayer', 'layer', 'bgsound', 'title', 'base', 'video', 'audio', 'track', 'canvas'];
81  $attributeKeywords = ['style', 'onabort', 'onactivate', 'onafterprint', 'onafterupdate', 'onbeforeactivate',
82  'onbeforecopy', 'onbeforecut', 'onbeforedeactivate', 'onbeforeeditfocus', 'onbeforepaste', 'onbeforeprint',
83  'onbeforeunload', 'onbeforeupdate', 'onblur', 'onbounce', 'oncanplay', 'oncanplaythrough', 'oncellchange', 'onchange',
84  'onclick', 'oncontextmenu', 'oncontrolselect', 'oncopy', 'oncuechange', 'oncut', 'ondataavailable', 'ondatasetchanged',
85  'ondatasetcomplete', 'ondblclick', 'ondeactivate', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover',
86  'ondragstart', 'ondrop', 'ondurationchange', 'onemptied', 'onended', 'onerror', 'onerrorupdate', 'onfilterchange',
87  'onfinish', 'onfocus', 'onfocusin', 'onfocusout', 'onhashchange', 'onhelp', 'oninput', 'oninvalid,', 'onkeydown',
88  'onkeypress', 'onkeyup', 'onlayoutcomplete', 'onload', 'onloadeddata', 'onloadedmetadata', 'onloadstart',
89  'onlosecapture', 'onmessage', 'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout',
90  'onmouseover', 'onmouseup', 'onmousewheel', 'onmove', 'onmoveend', 'onmovestart', 'onoffline', 'ononline',
91  'onpagehide', 'onpageshow', 'onpaste', 'onpause', 'onplay', 'onplaying', 'onpopstate', 'onprogress',
92  'onpropertychange', 'onratechange', 'onreadystatechange', 'onredo', 'onreset', 'onresize', 'onresizeend',
93  'onresizestart', 'onrowenter', 'onrowexit', 'onrowsdelete', 'onrowsinserted', 'onscroll', 'onseeked', 'onseeking',
94  'onselect', 'onselectionchange', 'onselectstart', 'onshow', 'onstalled', 'onstart', 'onstop', 'onstorage', 'onsubmit',
95  'onsuspend', 'ontimeupdate', 'onundo', 'onunload', 'onvolumechange', 'onwaiting'];
96  $protocolKeywords = ['javascript', 'vbscript', 'expression'];
97 
98  // Remove the potential &#xxx; stuff for testing
99  $valueForQuickCheck = preg_replace('/(&#[xX]?0{0,8}(9|10|13|a|b);?)*\s*/i', '', $value);
100  $potentialKeywords = [];
101 
102  foreach ($allKeywords as $keyword) {
103  // Stripos is faster than the regular expressions used later and because the words we're looking for only have
104  // chars < 0x80 we can use the non-multibyte safe version.
105  if (stripos($valueForQuickCheck, $keyword) !== false) {
106  //keep list of potential words that were found
107  if (in_array($keyword, $protocolKeywords, true)) {
108  $potentialKeywords[] = [$keyword, 'protocol'];
109  }
110  if (in_array($keyword, $tagKeywords, true)) {
111  $potentialKeywords[] = [$keyword, 'tag'];
112  }
113  if (in_array($keyword, $attributeKeywords, true)) {
114  $potentialKeywords[] = [$keyword, 'attribute'];
115  }
116  // Some keywords appear in more than one array.
117  // These get multiple entries in $potentialKeywords, each with the appropriate type
118  }
119  }
120  // Only process potential words
121  if (!empty($potentialKeywords)) {
122  // Keep replacing as long as the previous round replaced something
123  $found = true;
124  while ($found) {
125  $valueBeforeReplacement = $value;
126  foreach ($potentialKeywords as $potentialKeywordItem) {
127  list($keyword, $type) = $potentialKeywordItem;
128  $keywordLength = strlen($keyword);
129  // Build pattern with each letter of the keyword and potential (encoded) whitespace in between
130  $pattern = $keyword[0];
131  if ($keywordLength > 1) {
132  for ($j = 1; $j < $keywordLength; $j++) {
133  $pattern .= '((&#[xX]0{0,8}([9ab]);?)|(&#0{0,8}(9|10|13);?)|\s)*' . $keyword[$j];
134  }
135  }
136  // Handle each type a little different (extra conditions to prevent false positives a bit better)
137  switch ($type) {
138  case 'protocol':
139  // These take the form of e.g. 'javascript:'
140  $pattern .= '((&#[xX]0{0,8}([9ab]);?)|(&#0{0,8}(9|10|13);?)|\s)*(?=:)';
141  break;
142  case 'tag':
143  // These take the form of e.g. '<SCRIPT[^\da-z] ....';
144  $pattern = '(?<=<)' . $pattern . '((&#[xX]0{0,8}([9ab]);?)|(&#0{0,8}(9|10|13);?)|\s)*(?=[^\da-z])';
145  break;
146  case 'attribute':
147  // These take the form of e.g. 'onload=' Beware that a lot of characters are allowed
148  // between the attribute and the equal sign!
149  $pattern .= '[\s\!\#\$\%\&\(\)\*\~\+\-\_\.\,\:\;\?\@\[\/\|\\\\\]\^\`]*(?==)';
150  break;
151  }
152  $pattern = '/' . $pattern . '/i';
153  // Inject the replacement to render the potential problem harmless
154  $replacement = substr_replace($keyword, $replaceString, 2, 0);
155  // Perform the actual replacement
156  $value = preg_replace($pattern, $replacement, $value);
157  // If no replacements were made exit the loop
158  $found = ($valueBeforeReplacement !== $value);
159  }
160  }
161  }
162  return $value;
163  }
164 }
static process($value, $replaceString='< x >')
Definition: RemoveXSS.php:30