‪TYPO3CMS  ‪main
HtmlParser.php
Go to the documentation of this file.
1 <?php
2 
3 /*
4  * This file is part of the TYPO3 CMS project.
5  *
6  * It is free software; you can redistribute it and/or modify it under
7  * the terms of the GNU General Public License, either version 2
8  * of the License, or any later version.
9  *
10  * For the full copyright and license information, please read the
11  * LICENSE.txt file that was distributed with this source code.
12  *
13  * The TYPO3 project - inspiring people to share!
14  */
15 
16 namespace ‪TYPO3\CMS\Core\Html;
17 
20 
26 {
27  protected array ‪$caseShift_cache = [];
28 
29  // Void elements that do not have closing tags, as defined by HTML5, except link element
30  public const ‪VOID_ELEMENTS = 'area|base|br|col|command|embed|hr|img|input|keygen|meta|param|source|track|wbr';
31 
32  /************************************
33  *
34  * Parsing HTML code
35  *
36  ************************************/
49  public function ‪splitIntoBlock($tag, $content, $eliminateExtraEndTags = false)
50  {
51  $tags = array_unique(‪GeneralUtility::trimExplode(',', $tag, true));
52  array_walk($tags, static function (&$tag) {
53  $tag = preg_quote($tag, '/');
54  });
55  $regexStr = '/\\<\\/?(' . implode('|', $tags) . ')(\\s*\\>|\\s[^\\>]*\\>)/si';
56  $parts = preg_split($regexStr, $content);
57  if (empty($parts)) {
58  return [];
59  }
60  $newParts = [];
61  $pointer = strlen($parts[0]);
62  $buffer = $parts[0];
63  $nested = 0;
64  reset($parts);
65  // We skip the first element in foreach loop
66  $partsSliced = array_slice($parts, 1, null, true);
67  foreach ($partsSliced as $v) {
68  $isEndTag = substr($content, $pointer, 2) === '</';
69  $tagLen = strcspn(substr($content, $pointer), '>') + 1;
70  // We meet a start-tag:
71  if (!$isEndTag) {
72  // Ground level:
73  if (!$nested) {
74  // Previous buffer stored
75  $newParts[] = $buffer;
76  $buffer = '';
77  }
78  // We are inside now!
79  $nested++;
80  // New buffer set and pointer increased
81  $mbuffer = substr($content, $pointer, strlen($v) + $tagLen);
82  $pointer += strlen($mbuffer);
83  $buffer .= $mbuffer;
84  } else {
85  // If we meet an endtag:
86  // Decrease nested-level
87  $nested--;
88  $eliminated = 0;
89  if ($eliminateExtraEndTags && $nested < 0) {
90  $nested = 0;
91  $eliminated = 1;
92  } else {
93  // In any case, add the endtag to current buffer and increase pointer
94  $buffer .= substr($content, $pointer, $tagLen);
95  }
96  $pointer += $tagLen;
97  // if we're back on ground level, (and not by eliminating tags...
98  if (!$nested && !$eliminated) {
99  $newParts[] = $buffer;
100  $buffer = '';
101  }
102  // New buffer set and pointer increased
103  $mbuffer = substr($content, $pointer, strlen($v));
104  $pointer += strlen($mbuffer);
105  $buffer .= $mbuffer;
106  }
107  }
108  $newParts[] = $buffer;
109  return $newParts;
110  }
111 
124  public function ‪splitIntoBlockRecursiveProc($tag, $content, &$procObj, $callBackContent, $callBackTags, $level = 0)
125  {
126  $parts = $this->‪splitIntoBlock($tag, $content, true);
127  foreach ($parts as $k => $v) {
128  if ($k % 2) {
129  $firstTagName = $this->‪getFirstTagName($v, true);
130  $tagsArray = [];
131  $tagsArray['tag_start'] = $this->‪getFirstTag($v);
132  $tagsArray['tag_end'] = '</' . $firstTagName . '>';
133  $tagsArray['tag_name'] = strtolower($firstTagName);
134  $tagsArray['content'] = $this->‪splitIntoBlockRecursiveProc($tag, $this->‪removeFirstAndLastTag($v), $procObj, $callBackContent, $callBackTags, $level + 1);
135  if ($callBackTags) {
136  $tagsArray = $procObj->{$callBackTags}($tagsArray, $level);
137  }
138  $parts[$k] = $tagsArray['tag_start'] . $tagsArray['content'] . $tagsArray['tag_end'];
139  } else {
140  if ($callBackContent) {
141  $parts[$k] = $procObj->{$callBackContent}($parts[$k], $level);
142  }
143  }
144  }
145  return implode('', $parts);
146  }
147 
159  public function ‪splitTags($tag, $content)
160  {
161  $tags = GeneralUtility::trimExplode(',', $tag, true);
162  array_walk($tags, static function (&$tag) {
163  $tag = preg_quote($tag, '/');
164  });
165  $regexStr = '/\\<(' . implode('|', $tags) . ')(\\s[^>]*)?\\/?>/si';
166  $parts = preg_split($regexStr, $content);
167  if (empty($parts)) {
168  return [];
169  }
170  $pointer = strlen($parts[0]);
171  $newParts = [];
172  $newParts[] = $parts[0];
173  reset($parts);
174  // We skip the first element in foreach loop
175  $partsSliced = array_slice($parts, 1, null, true);
176  foreach ($partsSliced as $v) {
177  $tagLen = strcspn(substr($content, $pointer), '>') + 1;
178  // Set tag:
179  // New buffer set and pointer increased
180  $tag = substr($content, $pointer, $tagLen);
181  $newParts[] = $tag;
182  $pointer += strlen($tag);
183  // Set content:
184  $newParts[] = $v;
185  $pointer += strlen($v);
186  }
187  return $newParts;
188  }
189 
197  public function ‪removeFirstAndLastTag($str)
198  {
199  ‪$parser = SimpleParser::fromString($str);
200  $first = ‪$parser->getFirstNode(SimpleNode::TYPE_ELEMENT);
201  $last = ‪$parser->getLastNode(SimpleNode::TYPE_ELEMENT);
202  if ($first === null || $first === $last) {
203  return '';
204  }
205  $sequence = array_slice(
206  ‪$parser->getNodes(),
207  $first->getIndex() + 1,
208  $last->getIndex() - $first->getIndex() - 1
209  );
210  return implode('', array_map('strval', $sequence));
211  }
212 
220  public function ‪getFirstTag($str)
221  {
222  ‪$parser = SimpleParser::fromString($str);
223  $first = ‪$parser->getFirstNode(SimpleNode::TYPE_ELEMENT);
224  if ($first === null) {
225  return '';
226  }
227  $sequence = array_slice(
228  ‪$parser->getNodes(),
229  0,
230  $first->getIndex() + 1
231  );
232  return implode('', array_map('strval', $sequence));
233  }
234 
243  public function ‪getFirstTagName($str, $preserveCase = false)
244  {
245  ‪$parser = SimpleParser::fromString($str);
246  $elements = ‪$parser->getNodes(SimpleNode::TYPE_ELEMENT);
247  foreach ($elements as $element) {
248  $name = $element->getElementName();
249  if ($name === null) {
250  continue;
251  }
252  return $preserveCase ? $name : strtoupper($name);
253  }
254  return '';
255  }
256 
269  public function ‪get_tag_attributes($tag, $deHSC = false)
270  {
271  [$components, $metaC] = $this->split_tag_attributes($tag);
272  // Attribute name is stored here
273  $name = '';
274  $valuemode = false;
275  $attributes = [];
276  $attributesMeta = [];
277  if (is_array($components)) {
278  foreach ($components as $key => $val) {
279  // Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value
280  if ($val !== '=') {
281  if ($valuemode) {
282  if ($name) {
283  $attributes[$name] = $deHSC ? htmlspecialchars_decode($val) : $val;
284  $attributesMeta[$name]['dashType'] = $metaC[$key];
285  $name = '';
286  }
287  } else {
288  if ($namekey = preg_replace('/[^[:alnum:]_\\:\\-]/', '', $val) ?? '') {
289  $name = strtolower((string)$namekey);
290  $attributesMeta[$name] = [];
291  $attributesMeta[$name]['origTag'] = $namekey;
292  $attributes[$name] = '';
293  }
294  }
295  $valuemode = false;
296  } else {
297  $valuemode = true;
298  }
299  }
300  return [$attributes, $attributesMeta];
301  }
302  return [null, null];
303  }
304 
319  public function ‪split_tag_attributes($tag)
320  {
321  $matches = [];
322  if (preg_match('/(\\<[^\\s]+\\s+)?(.*?)\\s*(\\>)?$/s', $tag, $matches) !== 1) {
323  return [[], []];
324  }
325  $tag_tmp = $matches[2];
326  $metaValue = [];
327  $value = [];
328  $matches = [];
329  if (preg_match_all('/("[^"]*"|\'[^\']*\'|[^\\s"\'\\=]+|\\=)/s', $tag_tmp, $matches) > 0) {
330  foreach ($matches[1] as $part) {
331  $firstChar = $part[0];
332  if ($firstChar === '"' || $firstChar === '\'') {
333  $metaValue[] = $firstChar;
334  $value[] = substr($part, 1, -1);
335  } else {
336  $metaValue[] = '';
337  $value[] = $part;
338  }
339  }
340  }
341  return [$value, $metaValue];
342  }
343 
344  /*********************************
345  *
346  * Clean HTML code
347  *
348  *********************************/
387  public function ‪HTMLcleaner($content, $tags = [], $keepAll = 0, $hSC = 0, $addConfig = [])
388  {
389  $newContent = [];
390  $tokArr = explode('<', $content);
391  $newContent[] = $this->bidir_htmlspecialchars(current($tokArr), $hSC);
392  // We skip the first element in foreach loop
393  $tokArrSliced = array_slice($tokArr, 1, null, true);
394  $c = 1;
395  $tagRegister = [];
396  $tagStack = [];
397  $inComment = false;
398  $inCdata = false;
399  $skipTag = false;
400  foreach ($tokArrSliced as $tok) {
401  if ($inComment) {
402  if (($eocPos = strpos($tok, '-->')) === false) {
403  // End of comment is not found in the token. Go further until end of comment is found in other tokens.
404  $newContent[$c++] = '<' . $tok;
405  continue;
406  }
407  // Comment ends in the middle of the token: add comment and proceed with rest of the token
408  $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 3);
409  $tok = substr($tok, $eocPos + 3);
410  $inComment = false;
411  $skipTag = true;
412  } elseif ($inCdata) {
413  if (($eocPos = strpos($tok, '/*]]>*/')) === false) {
414  // End of comment is not found in the token. Go further until end of comment is found in other tokens.
415  $newContent[$c++] = '<' . $tok;
416  continue;
417  }
418  // Comment ends in the middle of the token: add comment and proceed with rest of the token
419  $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10);
420  $tok = substr($tok, $eocPos + 10);
421  $inCdata = false;
422  $skipTag = true;
423  } elseif (str_starts_with($tok, '!--')) {
424  if (($eocPos = strpos($tok, '-->')) === false) {
425  // Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment
426  $newContent[$c++] = '<' . $tok;
427  $inComment = true;
428  continue;
429  }
430  // Start and end of comment are both in the current token. Add comment and proceed with rest of the token
431  $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 3);
432  $tok = substr($tok, $eocPos + 3);
433  $skipTag = true;
434  } elseif (str_starts_with($tok, '![CDATA[*/')) {
435  if (($eocPos = strpos($tok, '/*]]>*/')) === false) {
436  // Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment
437  $newContent[$c++] = '<' . $tok;
438  $inCdata = true;
439  continue;
440  }
441  // Start and end of comment are both in the current token. Add comment and proceed with rest of the token
442  $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10);
443  $tok = substr($tok, $eocPos + 10);
444  $skipTag = true;
445  }
446  $firstChar = $tok[0] ?? null;
447  // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..>
448  if (!$skipTag && preg_match('/[[:alnum:]\\/]/', (string)$firstChar) === 1) {
449  $tagEnd = strpos($tok, '>');
450  // If there is and end-bracket... tagEnd can't be 0 as the first character can't be a >
451  if ($tagEnd) {
452  $endTag = $firstChar === '/' ? 1 : 0;
453  $tagContent = substr($tok, $endTag, $tagEnd - $endTag);
454  $tagParts = preg_split('/\\s+/s', $tagContent, 2);
455  $tagName = strtolower($tagParts[0]);
456  $emptyTag = 0;
457  if (isset($tags[$tagName])) {
458  // If there is processing to do for the tag:
459  if (is_array($tags[$tagName])) {
460  if (preg_match('/^(' . self::VOID_ELEMENTS . ' )$/i', $tagName)) {
461  $emptyTag = 1;
462  }
463  // If NOT an endtag, do attribute processing (added dec. 2003)
464  if (!$endTag) {
465  // Override attributes
466  if (isset($tags[$tagName]['overrideAttribs']) && (string)$tags[$tagName]['overrideAttribs'] !== '') {
467  $tagParts[1] = $tags[$tagName]['overrideAttribs'];
468  }
469  // Allowed tags
470  if (isset($tags[$tagName]['allowedAttribs']) && (string)$tags[$tagName]['allowedAttribs'] !== '') {
471  // No attribs allowed
472  if ((string)$tags[$tagName]['allowedAttribs'] === '0') {
473  $tagParts[1] = '';
474  } elseif (isset($tagParts[1]) && trim($tagParts[1])) {
475  $tagAttrib = $this->get_tag_attributes($tagParts[1]);
476  $tagParts[1] = '';
477  $newTagAttrib = [];
478  $tList = (array)(
479  $tags[$tagName]['_allowedAttribs']
480  ?? GeneralUtility::trimExplode(',', strtolower($tags[$tagName]['allowedAttribs']), true)
481  );
482  foreach ($tList as $allowTag) {
483  if (isset($tagAttrib[0][$allowTag])) {
484  $newTagAttrib[$allowTag] = $tagAttrib[0][$allowTag];
485  }
486  }
487 
488  $tagParts[1] = $this->compileTagAttribs($newTagAttrib, $tagAttrib[1]);
489  }
490  }
491  // Fixed attrib values
492  if (isset($tags[$tagName]['fixAttrib']) && is_array($tags[$tagName]['fixAttrib'])) {
493  $tagAttrib = $this->get_tag_attributes($tagParts[1] ?? '');
494  $tagParts[1] = '';
495  foreach ($tags[$tagName]['fixAttrib'] as $attr => $params) {
496  if (isset($params['set']) && $params['set'] !== '') {
497  $tagAttrib[0][$attr] = $params['set'];
498  }
499  if (!empty($params['unset'])) {
500  unset($tagAttrib[0][$attr]);
501  }
502  if (!empty($params['default']) && !isset($tagAttrib[0][$attr])) {
503  $tagAttrib[0][$attr] = $params['default'];
504  }
505  if (($params['always'] ?? false) || isset($tagAttrib[0][$attr])) {
506  if ($params['trim'] ?? false) {
507  $tagAttrib[0][$attr] = trim($tagAttrib[0][$attr]);
508  }
509  if ($params['intval'] ?? false) {
510  $tagAttrib[0][$attr] = (int)$tagAttrib[0][$attr];
511  }
512  if ($params['lower'] ?? false) {
513  $tagAttrib[0][$attr] = strtolower($tagAttrib[0][$attr]);
514  }
515  if ($params['upper'] ?? false) {
516  $tagAttrib[0][$attr] = strtoupper($tagAttrib[0][$attr]);
517  }
518  if ($params['range'] ?? false) {
519  if (isset($params['range'][1])) {
520  $tagAttrib[0][$attr] = MathUtility::forceIntegerInRange($tagAttrib[0][$attr], (int)$params['range'][0], (int)$params['range'][1]);
521  } else {
522  $tagAttrib[0][$attr] = MathUtility::forceIntegerInRange($tagAttrib[0][$attr], (int)$params['range'][0]);
523  }
524  }
525  if (isset($params['list']) && is_array($params['list'])) {
526  // For the class attribute, remove from the attribute value any class not in the list
527  // Classes are case sensitive
528  if ($attr === 'class') {
529  $newClasses = [];
530  $classes = GeneralUtility::trimExplode(' ', $tagAttrib[0][$attr] ?? '', true);
531  foreach ($classes as $class) {
532  if (in_array($class, $params['list'])) {
533  $newClasses[] = $class;
534  }
535  }
536  if (!empty($newClasses)) {
537  $tagAttrib[0][$attr] = implode(' ', $newClasses);
538  } else {
539  $tagAttrib[0][$attr] = $params['list'][0];
540  }
541  } else {
542  if (!in_array($this->caseShift($tagAttrib[0][$attr] ?? '', $params['casesensitiveComp'] ?? false), (array)$this->caseShift($params['list'], $params['casesensitiveComp'], $tagName))) {
543  $tagAttrib[0][$attr] = $params['list'][0];
544  }
545  }
546  }
547  if (
548  (($params['removeIfFalse'] ?? false) && $params['removeIfFalse'] !== 'blank' && !$tagAttrib[0][$attr])
549  || (($params['removeIfFalse'] ?? false) && $params['removeIfFalse'] === 'blank' && (string)$tagAttrib[0][$attr] === '')
550  ) {
551  unset($tagAttrib[0][$attr]);
552  }
553  if (
554  (string)($params['removeIfEquals'] ?? '') !== ''
555  && $this->caseShift($tagAttrib[0][$attr], (bool)($params['casesensitiveComp'] ?? false)) === $this->caseShift($params['removeIfEquals'], (bool)($params['casesensitiveComp'] ?? false))
556  ) {
557  unset($tagAttrib[0][$attr]);
558  }
559  if ($params['prefixRelPathWith'] ?? false) {
560  $urlParts = parse_url($tagAttrib[0][$attr]);
561  if (is_array($urlParts) && empty($urlParts['scheme']) && !empty($urlParts['path']) && !str_starts_with($urlParts['path'], '/')) {
562  // If it is NOT an absolute URL (by http: or starting "/")
563  $tagAttrib[0][$attr] = $params['prefixRelPathWith'] . $tagAttrib[0][$attr];
564  }
565  }
566  if ($params['userFunc'] ?? false) {
567  if (is_array($params['userFunc.'] ?? null)) {
568  $params['userFunc.']['attributeValue'] = $tagAttrib[0][$attr];
569  } else {
570  $params['userFunc.'] = $tagAttrib[0][$attr];
571  }
572  $tagAttrib[0][$attr] = GeneralUtility::callUserFunction($params['userFunc'], $params['userFunc.'], $this);
573  }
574  }
575  }
576  $tagParts[1] = $this->compileTagAttribs($tagAttrib[0], $tagAttrib[1]);
577  }
578  } else {
579  // If endTag, remove any possible attributes:
580  $tagParts[1] = '';
581  }
582  // Protecting the tag by converting < and > to &lt; and &gt; ??
583  if (!empty($tags[$tagName]['protect'])) {
584  $lt = '&lt;';
585  $gt = '&gt;';
586  } else {
587  $lt = '<';
588  $gt = '>';
589  }
590  // Remapping tag name?
591  if (!empty($tags[$tagName]['remap'])) {
592  $tagParts[0] = $tags[$tagName]['remap'];
593  }
594  // rmTagIfNoAttrib
595  if ($endTag || empty($tags[$tagName]['rmTagIfNoAttrib']) || trim($tagParts[1] ?? '')) {
596  $setTag = true;
597  // Remove this closing tag if $tagName was among $TSconfig['removeTags']
598  if ($endTag
599  && isset($tags[$tagName]['allowedAttribs']) && $tags[$tagName]['allowedAttribs'] === 0
600  && isset($tags[$tagName]['rmTagIfNoAttrib']) && $tags[$tagName]['rmTagIfNoAttrib'] === 1
601  ) {
602  $setTag = false;
603  }
604  if (isset($tags[$tagName]['nesting'])) {
605  if (!isset($tagRegister[$tagName])) {
606  $tagRegister[$tagName] = [];
607  }
608  if ($endTag) {
609  $correctTag = true;
610  if ($tags[$tagName]['nesting'] === 'global') {
611  $lastEl = end($tagStack);
612  if ($tagName !== $lastEl) {
613  if (in_array($tagName, $tagStack, true)) {
614  while (!empty($tagStack) && $tagName !== $lastEl) {
615  $elPos = end($tagRegister[$lastEl]);
616  unset($newContent[$elPos]);
617  array_pop($tagRegister[$lastEl]);
618  array_pop($tagStack);
619  $lastEl = end($tagStack);
620  }
621  } else {
622  // In this case the
623  $correctTag = false;
624  }
625  }
626  }
627  if (empty($tagRegister[$tagName]) || !$correctTag) {
628  $setTag = false;
629  } else {
630  array_pop($tagRegister[$tagName]);
631  if ($tags[$tagName]['nesting'] === 'global') {
632  array_pop($tagStack);
633  }
634  }
635  } else {
636  $tagRegister[$tagName][] = $c;
637  if ($tags[$tagName]['nesting'] === 'global') {
638  $tagStack[] = $tagName;
639  }
640  }
641  }
642  if ($setTag) {
643  // Setting the tag
644  $newContent[$c++] = $lt . ($endTag ? '/' : '') . trim($tagParts[0] . ' ' . ($tagParts[1] ?? '')) . ($emptyTag ? ' /' : '') . $gt;
645  }
646  }
647  } else {
648  $newContent[$c++] = '<' . ($endTag ? '/' : '') . $tagContent . '>';
649  }
650  } elseif ($keepAll) {
651  // This is if the tag was not defined in the array for processing:
652  if ($keepAll === 'protect') {
653  $lt = '&lt;';
654  $gt = '&gt;';
655  } else {
656  $lt = '<';
657  $gt = '>';
658  }
659  $newContent[$c++] = $lt . ($endTag ? '/' : '') . $tagContent . $gt;
660  }
661  $newContent[$c++] = $this->bidir_htmlspecialchars(substr($tok, $tagEnd + 1), $hSC);
662  } else {
663  $newContent[$c++] = $this->bidir_htmlspecialchars('<' . $tok, $hSC);
664  }
665  } else {
666  $newContent[$c++] = $this->bidir_htmlspecialchars(($skipTag ? '' : '<') . $tok, $hSC);
667  // It was not a tag anyways
668  $skipTag = false;
669  }
670  }
671  // Unsetting tags:
672  foreach ($tagRegister as $tag => $positions) {
673  foreach ($positions as $pKey) {
674  unset($newContent[$pKey]);
675  }
676  }
677  $newContent = implode('', $newContent);
678  $newContent = $this->stripEmptyTagsIfConfigured($newContent, $addConfig);
679  return $newContent;
680  }
681 
689  public function ‪bidir_htmlspecialchars($value, ‪$dir)
690  {
691  switch ((int)‪$dir) {
692  case 1:
693  return htmlspecialchars($value);
694  case 2:
695  return htmlspecialchars($value, ENT_COMPAT, 'UTF-8', false);
696  case -1:
697  return htmlspecialchars_decode($value);
698  default:
699  return $value;
700  }
701  }
702 
713  public function ‪prefixResourcePath($main_prefix, $content, $alternatives = [], $suffix = '')
714  {
715  $parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a,param,source', $content);
716  foreach ($parts as $k => $v) {
717  if ($k % 2) {
718  $params = $this->get_tag_attributes($v);
719  // Detect tag-ending so that it is re-applied correctly.
720  $tagEnd = substr($v, -2) === '/>' ? ' />' : '>';
721  // The 'name' of the first tag
722  $firstTagName = $this->getFirstTagName($v);
723  $prefixedRelPath = false;
724  $prefix = $alternatives[strtoupper($firstTagName)] ?? $main_prefix;
725  switch (strtolower($firstTagName)) {
726  case 'td':
727  case 'body':
728  case 'table':
729  if (isset($params[0]['background'])) {
730  $params[0]['background'] = $this->prefixRelPath($prefix, $params[0]['background'], $suffix);
731  $prefixedRelPath = true;
732  }
733  break;
734  case 'img':
735  case 'input':
736  case 'script':
737  case 'embed':
738  if (isset($params[0]['src'])) {
739  $params[0]['src'] = $this->prefixRelPath($prefix, $params[0]['src'], $suffix);
740  $prefixedRelPath = true;
741  }
742  break;
743  case 'link':
744  case 'a':
745  if (isset($params[0]['href'])) {
746  $params[0]['href'] = $this->prefixRelPath($prefix, $params[0]['href'], $suffix);
747  $prefixedRelPath = true;
748  }
749  break;
750  case 'form':
751  if (isset($params[0]['action'])) {
752  $params[0]['action'] = $this->prefixRelPath($prefix, $params[0]['action'], $suffix);
753  $prefixedRelPath = true;
754  }
755  break;
756  case 'param':
757  if (isset($params[0]['name']) && $params[0]['name'] === 'movie' && isset($params[0]['value'])) {
758  $params[0]['value'] = $this->prefixRelPath($prefix, $params[0]['value'], $suffix);
759  $prefixedRelPath = true;
760  }
761  break;
762  case 'source':
763  if (isset($params[0]['srcset'])) {
764  $srcsetImagePaths = GeneralUtility::trimExplode(',', $params[0]['srcset']);
765  for ($i = 0; $i < count($srcsetImagePaths); $i++) {
766  $srcsetImagePaths[$i] = $this->prefixRelPath($prefix, $srcsetImagePaths[$i], $suffix);
767  }
768  $params[0]['srcset'] = implode(', ', $srcsetImagePaths);
769  $prefixedRelPath = true;
770  }
771  break;
772  }
773  if ($prefixedRelPath) {
774  $tagParts = preg_split('/\\s+/s', $v, 2);
775  $tagParts[1] = $this->compileTagAttribs($params[0], $params[1]);
776  $parts[$k] = '<' . trim(strtolower($firstTagName) . ' ' . $tagParts[1]) . $tagEnd;
777  }
778  }
779  }
780  $content = implode('', $parts);
781  // Fix <style> section:
782  $prefix = $alternatives['style'] ?? $main_prefix;
783  if ((string)$prefix !== '') {
784  $parts = $this->splitIntoBlock('style', $content);
785  foreach ($parts as $k => &$part) {
786  if ($k % 2) {
787  $part = preg_replace('/(url[[:space:]]*\\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\\))/i', '\\1' . $prefix . '\\2' . $suffix . '\\3', $part);
788  }
789  }
790  unset($part);
791  $content = implode('', $parts);
792  }
793  return $content;
794  }
795 
805  public function ‪prefixRelPath($prefix, $srcVal, $suffix = '')
806  {
807  // Only prefix if it's not an absolute URL or
808  // only a link to a section within the page.
809  if ($srcVal[0] !== '/' && $srcVal[0] !== '#') {
810  $urlParts = parse_url($srcVal);
811  // Only prefix URLs without a scheme
812  if (!isset($urlParts['scheme'])) {
813  $srcVal = $prefix . $srcVal . $suffix;
814  }
815  }
816  return $srcVal;
817  }
818 
828  public function ‪caseShift($str, $caseSensitiveComparison, $cacheKey = '')
829  {
830  if ($caseSensitiveComparison) {
831  return $str;
832  }
833  if (is_array($str)) {
834  // Fetch from runlevel cache
835  if ($cacheKey && isset($this->caseShift_cache[$cacheKey])) {
836  $str = $this->caseShift_cache[$cacheKey];
837  } else {
838  array_walk($str, static function (&$value) {
839  $value = strtoupper($value);
840  });
841  if ($cacheKey) {
842  $this->caseShift_cache[$cacheKey] = $str;
843  }
844  }
845  } else {
846  $str = strtoupper($str);
847  }
848  return $str;
849  }
850 
859  public function ‪compileTagAttribs($tagAttrib, $meta = [])
860  {
861  $accu = [];
862  foreach ($tagAttrib as $k => $v) {
863  $attr = $meta[$k]['origTag'] ?? $k;
864  if (strcmp($v, '') || isset($meta[$k]['dashType'])) {
865  $dash = $meta[$k]['dashType'] ?? (MathUtility::canBeInterpretedAsInteger($v) ? '' : '"');
866  $attr .= '=' . $dash . $v . $dash;
867  }
868  $accu[] = $attr;
869  }
870  return implode(' ', $accu);
871  }
872 
881  public function ‪HTMLparserConfig($TSconfig, $keepTags = [])
882  {
883  // Allow tags (base list, merged with incoming array)
884  $alTags = array_flip(GeneralUtility::trimExplode(',', strtolower($TSconfig['allowTags'] ?? ''), true));
885  $keepTags = array_merge($alTags, $keepTags);
886  // Set config properties.
887  if (isset($TSconfig['tags.']) && is_array($TSconfig['tags.'])) {
888  foreach ($TSconfig['tags.'] as $key => $tagC) {
889  if (!is_array($tagC) && $key == strtolower($key)) {
890  if ((string)$tagC === '0') {
891  unset($keepTags[$key]);
892  }
893  if ((string)$tagC === '1' && !isset($keepTags[$key])) {
894  $keepTags[$key] = 1;
895  }
896  }
897  }
898  foreach ($TSconfig['tags.'] as $key => $tagC) {
899  if (is_array($tagC) && $key == strtolower($key)) {
900  $key = substr($key, 0, -1);
901  if (!is_array($keepTags[$key] ?? null)) {
902  $keepTags[$key] = [];
903  }
904  if (isset($tagC['fixAttrib.']) && is_array($tagC['fixAttrib.'])) {
905  foreach ($tagC['fixAttrib.'] as $atName => $atConfig) {
906  if (is_array($atConfig)) {
907  $atName = substr($atName, 0, -1);
908  if (!is_array($keepTags[$key]['fixAttrib'][$atName] ?? null)) {
909  $keepTags[$key]['fixAttrib'][$atName] = [];
910  }
911  $keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName], $atConfig);
912  if ((string)($keepTags[$key]['fixAttrib'][$atName]['range'] ?? '') !== '') {
913  $keepTags[$key]['fixAttrib'][$atName]['range'] = GeneralUtility::trimExplode(',', $keepTags[$key]['fixAttrib'][$atName]['range']);
914  }
915  if ((string)($keepTags[$key]['fixAttrib'][$atName]['list'] ?? '') !== '') {
916  $keepTags[$key]['fixAttrib'][$atName]['list'] = GeneralUtility::trimExplode(',', $keepTags[$key]['fixAttrib'][$atName]['list']);
917  }
918  }
919  }
920  }
921  unset($tagC['fixAttrib.'], $tagC['fixAttrib']);
922  if (!empty($tagC['rmTagIfNoAttrib']) && empty($tagC['nesting'])) {
923  $tagC['nesting'] = 1;
924  }
925  $keepTags[$key] = array_merge($keepTags[$key], $tagC);
926  }
927  }
928  }
929  // LocalNesting
930  if (!empty($TSconfig['localNesting'])) {
931  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['localNesting']), true);
932  foreach ($lN as $tn) {
933  if (isset($keepTags[$tn])) {
934  if (!is_array($keepTags[$tn])) {
935  $keepTags[$tn] = [];
936  }
937  $keepTags[$tn]['nesting'] = 1;
938  }
939  }
940  }
941  if (!empty($TSconfig['globalNesting'])) {
942  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['globalNesting']), true);
943  foreach ($lN as $tn) {
944  if (isset($keepTags[$tn])) {
945  if (!is_array($keepTags[$tn])) {
946  $keepTags[$tn] = [];
947  }
948  $keepTags[$tn]['nesting'] = 'global';
949  }
950  }
951  }
952  if (!empty($TSconfig['rmTagIfNoAttrib'])) {
953  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['rmTagIfNoAttrib']), true);
954  foreach ($lN as $tn) {
955  if (isset($keepTags[$tn])) {
956  if (!is_array($keepTags[$tn])) {
957  $keepTags[$tn] = [];
958  }
959  $keepTags[$tn]['rmTagIfNoAttrib'] = 1;
960  if (empty($keepTags[$tn]['nesting'])) {
961  $keepTags[$tn]['nesting'] = 1;
962  }
963  }
964  }
965  }
966  if (!empty($TSconfig['noAttrib'])) {
967  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['noAttrib']), true);
968  foreach ($lN as $tn) {
969  if (isset($keepTags[$tn])) {
970  if (!is_array($keepTags[$tn])) {
971  $keepTags[$tn] = [];
972  }
973  $keepTags[$tn]['allowedAttribs'] = 0;
974  }
975  }
976  }
977  if (!empty($TSconfig['removeTags'])) {
978  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['removeTags']), true);
979  foreach ($lN as $tn) {
980  $keepTags[$tn] = [];
981  $keepTags[$tn]['allowedAttribs'] = 0;
982  $keepTags[$tn]['rmTagIfNoAttrib'] = 1;
983  }
984  }
985  // Create additional configuration:
986  $addConfig = [];
987  if (isset($TSconfig['stripEmptyTags'])) {
988  $addConfig['stripEmptyTags'] = $TSconfig['stripEmptyTags'];
989  if (isset($TSconfig['stripEmptyTags.'])) {
990  $addConfig['stripEmptyTags.'] = $TSconfig['stripEmptyTags.'];
991  }
992  }
993  return [
994  $keepTags,
995  '' . ($TSconfig['keepNonMatchedTags'] ?? ''),
996  (int)($TSconfig['htmlSpecialChars'] ?? 0),
997  $addConfig,
998  ];
999  }
1000 
1011  public function ‪stripEmptyTags($content, $tagList = '', $treatNonBreakingSpaceAsEmpty = false, $keepTags = false)
1012  {
1013  if (!empty($tagList)) {
1014  $tagRegEx = implode('|', GeneralUtility::trimExplode(',', $tagList, true));
1015  if ($keepTags) {
1016  $tagRegEx = '(?!' . $tagRegEx . ')[^ >]+';
1017  }
1018  } else {
1019  $tagRegEx = '[^ >]+'; // all characters until you reach a > or space;
1020  }
1021  $count = 1;
1022  $nbspRegex = $treatNonBreakingSpaceAsEmpty ? '|(&nbsp;)' : '';
1023  $finalRegex = sprintf('/<(%s)[^>]*>( %s)*<\/\\1[^>]*>/i', $tagRegEx, $nbspRegex);
1024  while ($count !== 0) {
1025  $content = preg_replace($finalRegex, '', $content, -1, $count) ?? $content;
1026  }
1027  return $content;
1028  }
1029 
1033  protected function ‪stripEmptyTagsIfConfigured(string $value, array $configuration): string
1034  {
1035  if (empty($configuration['stripEmptyTags'])) {
1036  return $value;
1037  }
1038 
1039  $tags = null;
1040  $keepTags = false;
1041  if (!empty($configuration['stripEmptyTags.']['keepTags'])) {
1042  $tags = $configuration['stripEmptyTags.']['keepTags'];
1043  $keepTags = true;
1044  } elseif (!empty($configuration['stripEmptyTags.']['tags'])) {
1045  $tags = $configuration['stripEmptyTags.']['tags'];
1046  }
1047 
1048  $treatNonBreakingSpaceAsEmpty = !empty($configuration['stripEmptyTags.']['treatNonBreakingSpaceAsEmpty']);
1049 
1050  return $this->stripEmptyTags($value, $tags, $treatNonBreakingSpaceAsEmpty, $keepTags);
1051  }
1052 }
‪TYPO3\CMS\Core\Html
Definition: DefaultSanitizerBuilder.php:18
‪TYPO3\CMS\Core\Utility\GeneralUtility\trimExplode
‪static list< string > trimExplode($delim, $string, $removeEmptyValues=false, $limit=0)
Definition: GeneralUtility.php:916
‪TYPO3\CMS\Core\Html\HtmlParser\HTMLparserConfig
‪array HTMLparserConfig($TSconfig, $keepTags=[])
Definition: HtmlParser.php:881
‪TYPO3\CMS\Core\Html\HtmlParser\getFirstTagName
‪string getFirstTagName($str, $preserveCase=false)
Definition: HtmlParser.php:243
‪TYPO3\CMS\Core\Html\HtmlParser\split_tag_attributes
‪array split_tag_attributes($tag)
Definition: HtmlParser.php:319
‪TYPO3\CMS\Core\Html\HtmlParser\VOID_ELEMENTS
‪const VOID_ELEMENTS
Definition: HtmlParser.php:30
‪TYPO3\CMS\Core\Html\HtmlParser\caseShift
‪array string caseShift($str, $caseSensitiveComparison, $cacheKey='')
Definition: HtmlParser.php:828
‪TYPO3\CMS\Core\Html\HtmlParser\stripEmptyTags
‪string stripEmptyTags($content, $tagList='', $treatNonBreakingSpaceAsEmpty=false, $keepTags=false)
Definition: HtmlParser.php:1011
‪TYPO3\CMS\Core\Html\HtmlParser\$caseShift_cache
‪array $caseShift_cache
Definition: HtmlParser.php:27
‪TYPO3\CMS\Core\Html\HtmlParser
Definition: HtmlParser.php:26
‪$parser
‪$parser
Definition: annotationChecker.php:108
‪TYPO3\CMS\Core\Html\HtmlParser\stripEmptyTagsIfConfigured
‪stripEmptyTagsIfConfigured(string $value, array $configuration)
Definition: HtmlParser.php:1033
‪$dir
‪$dir
Definition: validateRstFiles.php:257
‪TYPO3\CMS\Core\Html\HtmlParser\getFirstTag
‪string getFirstTag($str)
Definition: HtmlParser.php:220
‪TYPO3\CMS\Core\Html\HtmlParser\prefixRelPath
‪string prefixRelPath($prefix, $srcVal, $suffix='')
Definition: HtmlParser.php:805
‪TYPO3\CMS\Core\Html\HtmlParser\splitTags
‪array splitTags($tag, $content)
Definition: HtmlParser.php:159
‪TYPO3\CMS\Core\Html\HtmlParser\prefixResourcePath
‪string prefixResourcePath($main_prefix, $content, $alternatives=[], $suffix='')
Definition: HtmlParser.php:713
‪TYPO3\CMS\Core\Html\HtmlParser\get_tag_attributes
‪array get_tag_attributes($tag, $deHSC=false)
Definition: HtmlParser.php:269
‪TYPO3\CMS\Core\Html\HtmlParser\compileTagAttribs
‪string compileTagAttribs($tagAttrib, $meta=[])
Definition: HtmlParser.php:859
‪TYPO3\CMS\Core\Html\HtmlParser\HTMLcleaner
‪string HTMLcleaner($content, $tags=[], $keepAll=0, $hSC=0, $addConfig=[])
Definition: HtmlParser.php:387
‪TYPO3\CMS\Core\Html\HtmlParser\removeFirstAndLastTag
‪string removeFirstAndLastTag($str)
Definition: HtmlParser.php:197
‪TYPO3\CMS\Core\Html\HtmlParser\splitIntoBlockRecursiveProc
‪string splitIntoBlockRecursiveProc($tag, $content, &$procObj, $callBackContent, $callBackTags, $level=0)
Definition: HtmlParser.php:124
‪TYPO3\CMS\Core\Html\HtmlParser\splitIntoBlock
‪array splitIntoBlock($tag, $content, $eliminateExtraEndTags=false)
Definition: HtmlParser.php:49
‪TYPO3\CMS\Core\Utility\MathUtility
Definition: MathUtility.php:24
‪TYPO3\CMS\Core\Html\HtmlParser\bidir_htmlspecialchars
‪string bidir_htmlspecialchars($value, $dir)
Definition: HtmlParser.php:689
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:51