TYPO3 CMS  TYPO3_8-7
RemoveXSS.php
Go to the documentation of this file.
1 <?php
20 
26 class RemoveXSS
27 {
38  public static function process($value, $replaceString = '<x>')
39  {
40  GeneralUtility::logDeprecatedFunction();
41  // Don't use empty $replaceString because then no XSS-remove will be done
42  if ($replaceString == '') {
43  $replaceString = '<x>';
44  }
45  // Remove all non-printable characters. CR(0a) and LF(0b) and TAB(9) are allowed.
46  // This prevents some character re-spacing such as <java\0script>
47  // Note that you have to handle splits with \n, \r, and \t later since they *are* allowed in some inputs
48  $value = preg_replace('/([\x00-\x08]|[\x0b-\x0c]|[\x0e-\x19])/', '', $value);
49 
50  // Straight replacements, the user should never need these since they're normal characters.
51  // This prevents like <IMG SRC=&#X40&#X61&#X76&#X61&#X73&#X63&#X72&#X69&#X70&#X74&#X3A&#X61&#X6C&#X65&#X72&#X74&#X28&#X27&#X58&#X53&#X53&#X27&#X29>
52  $searchHexEncodings = '/&#[xX]0{0,8}(21|22|23|24|25|26|27|28|29|2a|2b|2d|2f|30|31|32|33|34|35|36|37|38|39|3a|3b|3d|3f|40|41|42|43|44|45|46|47|48|49|4a|4b|4c|4d|4e|4f|50|51|52|53|54|55|56|57|58|59|5a|5b|5c|5d|5e|5f|60|61|62|63|64|65|66|67|68|69|6a|6b|6c|6d|6e|6f|70|71|72|73|74|75|76|77|78|79|7a|7b|7c|7d|7e);?/i';
53  $searchUnicodeEncodings = '/&#0{0,8}(33|34|35|36|37|38|39|40|41|42|43|45|47|48|49|50|51|52|53|54|55|56|57|58|59|61|63|64|65|66|67|68|69|70|71|72|73|74|75|76|77|78|79|80|81|82|83|84|85|86|87|88|89|90|91|92|93|94|95|96|97|98|99|100|101|102|103|104|105|106|107|108|109|110|111|112|113|114|115|116|117|118|119|120|121|122|123|124|125|126);?/i';
54  while (preg_match($searchHexEncodings, $value) || preg_match($searchUnicodeEncodings, $value)) {
55  $value = preg_replace_callback(
56  $searchHexEncodings,
57  function ($matches) {
58  return chr(hexdec($matches[1]));
59  },
60  $value
61  );
62  $value = preg_replace_callback(
63  $searchUnicodeEncodings,
64  function ($matches) {
65  return chr($matches[1]);
66  },
67  $value
68  );
69  }
70 
71  // Now the only remaining whitespace attacks are \t, \n, and \r
72  $allKeywords = ['javascript', 'vbscript', 'expression', 'applet', 'meta', 'xml', 'blink', 'link', 'style', 'script', 'embed',
73  'object', 'iframe', 'frame', 'frameset', 'ilayer', 'layer', 'bgsound', 'title', 'base', 'video', 'audio', 'track',
74  'canvas', 'onabort', 'onactivate', 'onafterprint', 'onafterupdate', 'onbeforeactivate', 'onbeforecopy', 'onbeforecut',
75  'onbeforedeactivate', 'onbeforeeditfocus', 'onbeforepaste', 'onbeforeprint', 'onbeforeunload', 'onbeforeupdate',
76  'onblur', 'onbounce', 'oncanplay', 'oncanplaythrough', 'oncellchange', 'onchange', 'onclick', 'oncontextmenu',
77  'oncontrolselect', 'oncopy', 'oncuechange', 'oncut', 'ondataavailable', 'ondatasetchanged', 'ondatasetcomplete',
78  'ondblclick', 'ondeactivate', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart',
79  'ondrop', 'ondurationchange', 'onemptied', 'onended', 'onerror', 'onerrorupdate', 'onfilterchange', 'onfinish',
80  'onfocus', 'onfocusin', 'onfocusout', 'onhashchange', 'onhelp', 'oninput', 'oninvalid', 'onkeydown', 'onkeypress',
81  'onkeyup', 'onlayoutcomplete', 'onload', 'onloadeddata', 'onloadedmetadata', 'onloadstart', 'onlosecapture',
82  'onmessage', 'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup',
83  'onmousewheel', 'onmove', 'onmoveend', 'onmovestart', 'onoffline', 'ononline', 'onpagehide', 'onpageshow', 'onpaste',
84  'onpause', 'onplay', 'onplaying', 'onpopstate', 'onprogress', 'onpropertychange', 'onratechange', 'onreadystatechange',
85  'onreset', 'onresize', 'onresizeend', 'onresizestart', 'onrowenter', 'onrowexit', 'onrowsdelete', 'onrowsinserted',
86  'onscroll', 'onseeked', 'onseeking', 'onselect', 'onselectionchange', 'onselectstart', 'onshow', 'onstalled', 'onstart',
87  'onstop', 'onstorage', 'onsubmit', 'onsuspend', 'ontimeupdate', 'onunload', 'onvolumechange', 'onwaiting'];
88  $tagKeywords = ['applet', 'meta', 'xml', 'blink', 'link', 'style', 'script', 'embed', 'object', 'iframe', 'frame',
89  'frameset', 'ilayer', 'layer', 'bgsound', 'title', 'base', 'video', 'audio', 'track', 'canvas'];
90  $attributeKeywords = ['style', 'onabort', 'onactivate', 'onafterprint', 'onafterupdate', 'onbeforeactivate',
91  'onbeforecopy', 'onbeforecut', 'onbeforedeactivate', 'onbeforeeditfocus', 'onbeforepaste', 'onbeforeprint',
92  'onbeforeunload', 'onbeforeupdate', 'onblur', 'onbounce', 'oncanplay', 'oncanplaythrough', 'oncellchange', 'onchange',
93  'onclick', 'oncontextmenu', 'oncontrolselect', 'oncopy', 'oncuechange', 'oncut', 'ondataavailable', 'ondatasetchanged',
94  'ondatasetcomplete', 'ondblclick', 'ondeactivate', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover',
95  'ondragstart', 'ondrop', 'ondurationchange', 'onemptied', 'onended', 'onerror', 'onerrorupdate', 'onfilterchange',
96  'onfinish', 'onfocus', 'onfocusin', 'onfocusout', 'onhashchange', 'onhelp', 'oninput', 'oninvalid,', 'onkeydown',
97  'onkeypress', 'onkeyup', 'onlayoutcomplete', 'onload', 'onloadeddata', 'onloadedmetadata', 'onloadstart',
98  'onlosecapture', 'onmessage', 'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout',
99  'onmouseover', 'onmouseup', 'onmousewheel', 'onmove', 'onmoveend', 'onmovestart', 'onoffline', 'ononline',
100  'onpagehide', 'onpageshow', 'onpaste', 'onpause', 'onplay', 'onplaying', 'onpopstate', 'onprogress',
101  'onpropertychange', 'onratechange', 'onreadystatechange', 'onredo', 'onreset', 'onresize', 'onresizeend',
102  'onresizestart', 'onrowenter', 'onrowexit', 'onrowsdelete', 'onrowsinserted', 'onscroll', 'onseeked', 'onseeking',
103  'onselect', 'onselectionchange', 'onselectstart', 'onshow', 'onstalled', 'onstart', 'onstop', 'onstorage', 'onsubmit',
104  'onsuspend', 'ontimeupdate', 'onundo', 'onunload', 'onvolumechange', 'onwaiting'];
105  $protocolKeywords = ['javascript', 'vbscript', 'expression'];
106 
107  // Remove the potential &#xxx; stuff for testing
108  $valueForQuickCheck = preg_replace('/(&#[xX]?0{0,8}(9|10|13|a|b);?)*\s*/i', '', $value);
109  $potentialKeywords = [];
110 
111  foreach ($allKeywords as $keyword) {
112  // Stripos is faster than the regular expressions used later and because the words we're looking for only have
113  // chars < 0x80 we can use the non-multibyte safe version.
114  if (stripos($valueForQuickCheck, $keyword) !== false) {
115  //keep list of potential words that were found
116  if (in_array($keyword, $protocolKeywords, true)) {
117  $potentialKeywords[] = [$keyword, 'protocol'];
118  }
119  if (in_array($keyword, $tagKeywords, true)) {
120  $potentialKeywords[] = [$keyword, 'tag'];
121  }
122  if (in_array($keyword, $attributeKeywords, true)) {
123  $potentialKeywords[] = [$keyword, 'attribute'];
124  }
125  // Some keywords appear in more than one array.
126  // These get multiple entries in $potentialKeywords, each with the appropriate type
127  }
128  }
129  // Only process potential words
130  if (!empty($potentialKeywords)) {
131  // Keep replacing as long as the previous round replaced something
132  $found = true;
133  while ($found) {
134  $valueBeforeReplacement = $value;
135  foreach ($potentialKeywords as $potentialKeywordItem) {
136  list($keyword, $type) = $potentialKeywordItem;
137  $keywordLength = strlen($keyword);
138  // Build pattern with each letter of the keyword and potential (encoded) whitespace in between
139  $pattern = $keyword[0];
140  if ($keywordLength > 1) {
141  for ($j = 1; $j < $keywordLength; $j++) {
142  $pattern .= '((&#[xX]0{0,8}([9ab]);?)|(&#0{0,8}(9|10|13);?)|\s)*' . $keyword[$j];
143  }
144  }
145  // Handle each type a little different (extra conditions to prevent false positives a bit better)
146  switch ($type) {
147  case 'protocol':
148  // These take the form of e.g. 'javascript:'
149  $pattern .= '((&#[xX]0{0,8}([9ab]);?)|(&#0{0,8}(9|10|13);?)|\s)*(?=:)';
150  break;
151  case 'tag':
152  // These take the form of e.g. '<SCRIPT[^\da-z] ....';
153  $pattern = '(?<=<)' . $pattern . '((&#[xX]0{0,8}([9ab]);?)|(&#0{0,8}(9|10|13);?)|\s)*(?=[^\da-z])';
154  break;
155  case 'attribute':
156  // These take the form of e.g. 'onload=' Beware that a lot of characters are allowed
157  // between the attribute and the equal sign!
158  $pattern .= '[\s\!\#\$\%\&\(\)\*\~\+\-\_\.\,\:\;\?\@\[\/\|\\\\\]\^\`]*(?==)';
159  break;
160  }
161  $pattern = '/' . $pattern . '/i';
162  // Inject the replacement to render the potential problem harmless
163  $replacement = substr_replace($keyword, $replaceString, 2, 0);
164  // Perform the actual replacement
165  $value = preg_replace($pattern, $replacement, $value);
166  // If no replacements were made exit the loop
167  $found = ($valueBeforeReplacement !== $value);
168  }
169  }
170  }
171  return $value;
172  }
173 }
static process($value, $replaceString='< x >')
Definition: RemoveXSS.php:38