‪TYPO3CMS  ‪main
HtmlParser.php
Go to the documentation of this file.
1 <?php
2 
3 /*
4  * This file is part of the TYPO3 CMS project.
5  *
6  * It is free software; you can redistribute it and/or modify it under
7  * the terms of the GNU General Public License, either version 2
8  * of the License, or any later version.
9  *
10  * For the full copyright and license information, please read the
11  * LICENSE.txt file that was distributed with this source code.
12  *
13  * The TYPO3 project - inspiring people to share!
14  */
15 
16 namespace ‪TYPO3\CMS\Core\Html;
17 
20 
26 {
27  // Void elements that do not have closing tags, as defined by HTML5, except link element
28  public const ‪VOID_ELEMENTS = 'area|base|br|col|command|embed|hr|img|input|keygen|meta|param|source|track|wbr';
29 
30  /************************************
31  *
32  * Parsing HTML code
33  *
34  ************************************/
47  public function ‪splitIntoBlock($tag, $content, $eliminateExtraEndTags = false)
48  {
49  $tags = array_unique(‪GeneralUtility::trimExplode(',', $tag, true));
50  array_walk($tags, static function (string &$tag): void {
51  $tag = preg_quote($tag, '/');
52  });
53  $regexStr = '/\\<\\/?(' . implode('|', $tags) . ')(\\s*\\>|\\s[^\\>]*\\>)/si';
54  $parts = preg_split($regexStr, $content);
55  if (empty($parts)) {
56  return [];
57  }
58  $newParts = [];
59  $pointer = strlen($parts[0]);
60  $buffer = $parts[0];
61  $nested = 0;
62  reset($parts);
63  // We skip the first element in foreach loop
64  $partsSliced = array_slice($parts, 1, null, true);
65  foreach ($partsSliced as $v) {
66  $isEndTag = substr($content, $pointer, 2) === '</';
67  $tagLen = strcspn(substr($content, $pointer), '>') + 1;
68  // We meet a start-tag:
69  if (!$isEndTag) {
70  // Ground level:
71  if (!$nested) {
72  // Previous buffer stored
73  $newParts[] = $buffer;
74  $buffer = '';
75  }
76  // We are inside now!
77  $nested++;
78  // New buffer set and pointer increased
79  $mbuffer = substr($content, $pointer, strlen($v) + $tagLen);
80  $pointer += strlen($mbuffer);
81  $buffer .= $mbuffer;
82  } else {
83  // If we meet an endtag:
84  // Decrease nested-level
85  $nested--;
86  $eliminated = 0;
87  if ($eliminateExtraEndTags && $nested < 0) {
88  $nested = 0;
89  $eliminated = 1;
90  } else {
91  // In any case, add the endtag to current buffer and increase pointer
92  $buffer .= substr($content, $pointer, $tagLen);
93  }
94  $pointer += $tagLen;
95  // if we're back on ground level, (and not by eliminating tags...
96  if (!$nested && !$eliminated) {
97  $newParts[] = $buffer;
98  $buffer = '';
99  }
100  // New buffer set and pointer increased
101  $mbuffer = substr($content, $pointer, strlen($v));
102  $pointer += strlen($mbuffer);
103  $buffer .= $mbuffer;
104  }
105  }
106  $newParts[] = $buffer;
107  return $newParts;
108  }
109 
122  public function ‪splitIntoBlockRecursiveProc($tag, $content, &$procObj, $callBackContent, $callBackTags, $level = 0)
123  {
124  $parts = $this->‪splitIntoBlock($tag, $content, true);
125  foreach ($parts as $k => $v) {
126  if ($k % 2) {
127  $firstTagName = $this->‪getFirstTagName($v, true);
128  $tagsArray = [];
129  $tagsArray['tag_start'] = $this->‪getFirstTag($v);
130  $tagsArray['tag_end'] = '</' . $firstTagName . '>';
131  $tagsArray['tag_name'] = strtolower($firstTagName);
132  $tagsArray['content'] = $this->‪splitIntoBlockRecursiveProc($tag, $this->‪removeFirstAndLastTag($v), $procObj, $callBackContent, $callBackTags, $level + 1);
133  if ($callBackTags) {
134  $tagsArray = $procObj->{$callBackTags}($tagsArray, $level);
135  }
136  $parts[$k] = $tagsArray['tag_start'] . $tagsArray['content'] . $tagsArray['tag_end'];
137  } else {
138  if ($callBackContent) {
139  $parts[$k] = $procObj->{$callBackContent}($parts[$k], $level);
140  }
141  }
142  }
143  return implode('', $parts);
144  }
145 
157  public function ‪splitTags($tag, $content)
158  {
159  $tags = GeneralUtility::trimExplode(',', $tag, true);
160  array_walk($tags, static function (string &$tag): void {
161  $tag = preg_quote($tag, '/');
162  });
163  $regexStr = '/\\<(' . implode('|', $tags) . ')(\\s[^>]*)?\\/?>/si';
164  $parts = preg_split($regexStr, $content);
165  if (empty($parts)) {
166  return [];
167  }
168  $pointer = strlen($parts[0]);
169  $newParts = [];
170  $newParts[] = $parts[0];
171  reset($parts);
172  // We skip the first element in foreach loop
173  $partsSliced = array_slice($parts, 1, null, true);
174  foreach ($partsSliced as $v) {
175  $tagLen = strcspn(substr($content, $pointer), '>') + 1;
176  // Set tag:
177  // New buffer set and pointer increased
178  $tag = substr($content, $pointer, $tagLen);
179  $newParts[] = $tag;
180  $pointer += strlen($tag);
181  // Set content:
182  $newParts[] = $v;
183  $pointer += strlen($v);
184  }
185  return $newParts;
186  }
187 
195  public function ‪removeFirstAndLastTag($str)
196  {
197  ‪$parser = SimpleParser::fromString($str);
198  $first = ‪$parser->getFirstNode(SimpleNode::TYPE_ELEMENT);
199  $last = ‪$parser->getLastNode(SimpleNode::TYPE_ELEMENT);
200  if ($first === null || $first === $last) {
201  return '';
202  }
203  $sequence = array_slice(
204  ‪$parser->getNodes(),
205  $first->getIndex() + 1,
206  $last->getIndex() - $first->getIndex() - 1
207  );
208  return implode('', array_map(strval(...), $sequence));
209  }
210 
218  public function ‪getFirstTag($str)
219  {
220  ‪$parser = SimpleParser::fromString($str);
221  $first = ‪$parser->getFirstNode(SimpleNode::TYPE_ELEMENT);
222  if ($first === null) {
223  return '';
224  }
225  $sequence = array_slice(
226  ‪$parser->getNodes(),
227  0,
228  $first->getIndex() + 1
229  );
230  return implode('', array_map(strval(...), $sequence));
231  }
232 
241  public function ‪getFirstTagName($str, $preserveCase = false)
242  {
243  ‪$parser = SimpleParser::fromString($str);
244  $elements = ‪$parser->getNodes(SimpleNode::TYPE_ELEMENT);
245  foreach ($elements as $element) {
246  $name = $element->getElementName();
247  if ($name === null) {
248  continue;
249  }
250  return $preserveCase ? $name : strtoupper($name);
251  }
252  return '';
253  }
254 
267  public function ‪get_tag_attributes($tag, $deHSC = false)
268  {
269  [$components, $metaC] = $this->split_tag_attributes($tag);
270  // Attribute name is stored here
271  $name = '';
272  $valuemode = false;
273  $attributes = [];
274  $attributesMeta = [];
275  if (is_array($components)) {
276  foreach ($components as $key => $val) {
277  // Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value
278  if ($val !== '=') {
279  if ($valuemode) {
280  if ($name) {
281  $attributes[$name] = $deHSC ? htmlspecialchars_decode($val) : $val;
282  $attributesMeta[$name]['dashType'] = $metaC[$key];
283  $name = '';
284  }
285  } else {
286  if ($namekey = preg_replace('/[^[:alnum:]_\\:\\-]/', '', $val) ?? '') {
287  $name = strtolower((string)$namekey);
288  $attributesMeta[$name] = [];
289  $attributesMeta[$name]['origTag'] = $namekey;
290  $attributes[$name] = '';
291  }
292  }
293  $valuemode = false;
294  } else {
295  $valuemode = true;
296  }
297  }
298  return [$attributes, $attributesMeta];
299  }
300  return [null, null];
301  }
302 
317  public function ‪split_tag_attributes($tag)
318  {
319  $matches = [];
320  if (preg_match('/(\\<[^\\s]+\\s+)?(.*?)\\s*(\\>)?$/s', $tag, $matches) !== 1) {
321  return [[], []];
322  }
323  $tag_tmp = $matches[2];
324  $metaValue = [];
325  $value = [];
326  $matches = [];
327  if (preg_match_all('/("[^"]*"|\'[^\']*\'|[^\\s"\'\\=]+|\\=)/s', $tag_tmp, $matches) > 0) {
328  foreach ($matches[1] as $part) {
329  $firstChar = $part[0];
330  if ($firstChar === '"' || $firstChar === '\'') {
331  $metaValue[] = $firstChar;
332  $value[] = substr($part, 1, -1);
333  } else {
334  $metaValue[] = '';
335  $value[] = $part;
336  }
337  }
338  }
339  return [$value, $metaValue];
340  }
341 
342  /*********************************
343  *
344  * Clean HTML code
345  *
346  *********************************/
385  public function ‪HTMLcleaner($content, $tags = [], $keepAll = 0, $hSC = 0, $addConfig = [])
386  {
387  $newContent = [];
388  $tokArr = explode('<', $content);
389  $newContent[] = $this->bidir_htmlspecialchars(current($tokArr), $hSC);
390  // We skip the first element in foreach loop
391  $tokArrSliced = array_slice($tokArr, 1, null, true);
392  $c = 1;
393  $tagRegister = [];
394  $tagStack = [];
395  $inComment = false;
396  $inCdata = false;
397  $skipTag = false;
398  foreach ($tokArrSliced as $tok) {
399  if ($inComment) {
400  if (($eocPos = strpos($tok, '-->')) === false) {
401  // End of comment is not found in the token. Go further until end of comment is found in other tokens.
402  $newContent[$c++] = '<' . $tok;
403  continue;
404  }
405  // Comment ends in the middle of the token: add comment and proceed with rest of the token
406  $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 3);
407  $tok = substr($tok, $eocPos + 3);
408  $inComment = false;
409  $skipTag = true;
410  } elseif ($inCdata) {
411  if (($eocPos = strpos($tok, '/*]]>*/')) === false) {
412  // End of comment is not found in the token. Go further until end of comment is found in other tokens.
413  $newContent[$c++] = '<' . $tok;
414  continue;
415  }
416  // Comment ends in the middle of the token: add comment and proceed with rest of the token
417  $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10);
418  $tok = substr($tok, $eocPos + 10);
419  $inCdata = false;
420  $skipTag = true;
421  } elseif (str_starts_with($tok, '!--')) {
422  if (($eocPos = strpos($tok, '-->')) === false) {
423  // Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment
424  $newContent[$c++] = '<' . $tok;
425  $inComment = true;
426  continue;
427  }
428  // Start and end of comment are both in the current token. Add comment and proceed with rest of the token
429  $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 3);
430  $tok = substr($tok, $eocPos + 3);
431  $skipTag = true;
432  } elseif (str_starts_with($tok, '![CDATA[*/')) {
433  if (($eocPos = strpos($tok, '/*]]>*/')) === false) {
434  // Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment
435  $newContent[$c++] = '<' . $tok;
436  $inCdata = true;
437  continue;
438  }
439  // Start and end of comment are both in the current token. Add comment and proceed with rest of the token
440  $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10);
441  $tok = substr($tok, $eocPos + 10);
442  $skipTag = true;
443  }
444  $firstChar = $tok[0] ?? null;
445  // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..>
446  if (!$skipTag && preg_match('/[[:alnum:]\\/]/', (string)$firstChar) === 1) {
447  $tagEnd = strpos($tok, '>');
448  // If there is and end-bracket... tagEnd can't be 0 as the first character can't be a >
449  if ($tagEnd) {
450  $endTag = $firstChar === '/' ? 1 : 0;
451  $tagContent = substr($tok, $endTag, $tagEnd - $endTag);
452  $tagParts = preg_split('/\\s+/s', $tagContent, 2);
453  $tagName = strtolower(rtrim($tagParts[0], '/'));
454  $emptyTag = 0;
455  if (isset($tags[$tagName])) {
456  // If there is processing to do for the tag:
457  if (is_array($tags[$tagName])) {
458  if (preg_match('/^(' . self::VOID_ELEMENTS . ' )$/i', $tagName)) {
459  $emptyTag = 1;
460  }
461  // If NOT an endtag, do attribute processing (added dec. 2003)
462  if (!$endTag) {
463  // Override attributes
464  if (isset($tags[$tagName]['overrideAttribs']) && (string)$tags[$tagName]['overrideAttribs'] !== '') {
465  $tagParts[1] = $tags[$tagName]['overrideAttribs'];
466  }
467  // Allowed tags
468  if (isset($tags[$tagName]['allowedAttribs']) && (string)$tags[$tagName]['allowedAttribs'] !== '') {
469  // No attribs allowed
470  if ((string)$tags[$tagName]['allowedAttribs'] === '0') {
471  $tagParts[1] = '';
472  } elseif (isset($tagParts[1]) && trim($tagParts[1])) {
473  $tagAttrib = $this->get_tag_attributes($tagParts[1]);
474  $tagParts[1] = '';
475  $newTagAttrib = [];
476  $tList = (array)(
477  $tags[$tagName]['_allowedAttribs']
478  ?? GeneralUtility::trimExplode(',', strtolower($tags[$tagName]['allowedAttribs']), true)
479  );
480  foreach ($tList as $allowTag) {
481  if (isset($tagAttrib[0][$allowTag])) {
482  $newTagAttrib[$allowTag] = $tagAttrib[0][$allowTag];
483  }
484  }
485 
486  $tagParts[1] = $this->compileTagAttribs($newTagAttrib, $tagAttrib[1]);
487  }
488  }
489  // Fixed attrib values
490  if (isset($tags[$tagName]['fixAttrib']) && is_array($tags[$tagName]['fixAttrib'])) {
491  $tagAttrib = $this->get_tag_attributes($tagParts[1] ?? '');
492  $tagParts[1] = '';
493  foreach ($tags[$tagName]['fixAttrib'] as $attr => $params) {
494  if (isset($params['set']) && $params['set'] !== '') {
495  $tagAttrib[0][$attr] = $params['set'];
496  }
497  if (!empty($params['unset'])) {
498  unset($tagAttrib[0][$attr]);
499  }
500  if (!empty($params['default']) && !isset($tagAttrib[0][$attr])) {
501  $tagAttrib[0][$attr] = $params['default'];
502  }
503  if (($params['always'] ?? false) || isset($tagAttrib[0][$attr])) {
504  if ($params['trim'] ?? false) {
505  $tagAttrib[0][$attr] = trim($tagAttrib[0][$attr]);
506  }
507  if ($params['intval'] ?? false) {
508  $tagAttrib[0][$attr] = (int)$tagAttrib[0][$attr];
509  }
510  if ($params['lower'] ?? false) {
511  $tagAttrib[0][$attr] = strtolower($tagAttrib[0][$attr]);
512  }
513  if ($params['upper'] ?? false) {
514  $tagAttrib[0][$attr] = strtoupper($tagAttrib[0][$attr]);
515  }
516  if ($params['range'] ?? false) {
517  if (isset($params['range'][1])) {
518  $tagAttrib[0][$attr] = MathUtility::forceIntegerInRange($tagAttrib[0][$attr], (int)$params['range'][0], (int)$params['range'][1]);
519  } else {
520  $tagAttrib[0][$attr] = MathUtility::forceIntegerInRange($tagAttrib[0][$attr], (int)$params['range'][0]);
521  }
522  }
523  if (isset($params['list']) && is_array($params['list'])) {
524  // For the class attribute, remove from the attribute value any class not in the list
525  // Classes are case sensitive
526  if ($attr === 'class') {
527  $newClasses = [];
528  $classes = GeneralUtility::trimExplode(' ', $tagAttrib[0][$attr] ?? '', true);
529  foreach ($classes as $class) {
530  if (in_array($class, $params['list'])) {
531  $newClasses[] = $class;
532  }
533  }
534  if (!empty($newClasses)) {
535  $tagAttrib[0][$attr] = implode(' ', $newClasses);
536  } else {
537  $tagAttrib[0][$attr] = $params['list'][0];
538  }
539  } else {
540  $normalizedSearchWord = $tagAttrib[0][$attr] ?? '';
541  $normalizedSearchList = $params['list'];
542  if (!($params['casesensitiveComp'] ?? false)) {
543  // Case-sensitive comparison is not wanted, normalize all values
544  $normalizedSearchWord = strtoupper($tagAttrib[0][$attr] ?? '');
545  array_walk($normalizedSearchList, strtoupper(...));
546  }
547  if (!in_array($normalizedSearchWord, $normalizedSearchList, true)) {
548  $tagAttrib[0][$attr] = $params['list'][0];
549  }
550  }
551  }
552  if (
553  (($params['removeIfFalse'] ?? false) && $params['removeIfFalse'] !== 'blank' && !$tagAttrib[0][$attr])
554  || (($params['removeIfFalse'] ?? false) && $params['removeIfFalse'] === 'blank' && (string)$tagAttrib[0][$attr] === '')
555  ) {
556  unset($tagAttrib[0][$attr]);
557  }
558  if ((string)($params['removeIfEquals'] ?? '') !== '') {
559  $normalizedAttribute = $tagAttrib[0][$attr];
560  $normalizedRemoveIfEquals = $params['removeIfEquals'];
561  if (!($params['casesensitiveComp'] ?? false)) {
562  // Case-sensitive comparison is not wanted, normalize all values
563  $normalizedAttribute = strtoupper($tagAttrib[0][$attr]);
564  $normalizedRemoveIfEquals = strtoupper($params['removeIfEquals']);
565  }
566 
567  if ($normalizedAttribute === $normalizedRemoveIfEquals) {
568  unset($tagAttrib[0][$attr]);
569  }
570  }
571  if ($params['prefixRelPathWith'] ?? false) {
572  $urlParts = parse_url($tagAttrib[0][$attr]);
573  if (is_array($urlParts) && empty($urlParts['scheme']) && !empty($urlParts['path']) && !str_starts_with($urlParts['path'], '/')) {
574  // If it is NOT an absolute URL (by http: or starting "/")
575  $tagAttrib[0][$attr] = $params['prefixRelPathWith'] . $tagAttrib[0][$attr];
576  }
577  }
578  if ($params['userFunc'] ?? false) {
579  if (is_array($params['userFunc.'] ?? null)) {
580  $params['userFunc.']['attributeValue'] = $tagAttrib[0][$attr];
581  } else {
582  $params['userFunc.'] = $tagAttrib[0][$attr];
583  }
584  $tagAttrib[0][$attr] = GeneralUtility::callUserFunction($params['userFunc'], $params['userFunc.'], $this);
585  }
586  }
587  }
588  $tagParts[1] = $this->compileTagAttribs($tagAttrib[0], $tagAttrib[1]);
589  }
590  } else {
591  // If endTag, remove any possible attributes:
592  $tagParts[1] = '';
593  }
594  // Protecting the tag by converting < and > to &lt; and &gt; ??
595  if (!empty($tags[$tagName]['protect'])) {
596  $lt = '&lt;';
597  $gt = '&gt;';
598  } else {
599  $lt = '<';
600  $gt = '>';
601  }
602  // Remapping tag name?
603  if (!empty($tags[$tagName]['remap'])) {
604  $tagParts[0] = $tags[$tagName]['remap'];
605  }
606  // rmTagIfNoAttrib
607  if ($endTag || empty($tags[$tagName]['rmTagIfNoAttrib']) || trim($tagParts[1] ?? '')) {
608  $setTag = true;
609  // Remove this closing tag if $tagName was among $TSconfig['removeTags']
610  if ($endTag
611  && isset($tags[$tagName]['allowedAttribs']) && $tags[$tagName]['allowedAttribs'] === 0
612  && isset($tags[$tagName]['rmTagIfNoAttrib']) && $tags[$tagName]['rmTagIfNoAttrib'] === 1
613  ) {
614  $setTag = false;
615  }
616  if (isset($tags[$tagName]['nesting'])) {
617  if (!isset($tagRegister[$tagName])) {
618  $tagRegister[$tagName] = [];
619  }
620  if ($endTag) {
621  $correctTag = true;
622  if ($tags[$tagName]['nesting'] === 'global') {
623  $lastEl = end($tagStack);
624  if ($tagName !== $lastEl) {
625  if (in_array($tagName, $tagStack, true)) {
626  while (!empty($tagStack) && $tagName !== $lastEl) {
627  $elPos = end($tagRegister[$lastEl]);
628  unset($newContent[$elPos]);
629  array_pop($tagRegister[$lastEl]);
630  array_pop($tagStack);
631  $lastEl = end($tagStack);
632  }
633  } else {
634  // In this case the
635  $correctTag = false;
636  }
637  }
638  }
639  if (empty($tagRegister[$tagName]) || !$correctTag) {
640  $setTag = false;
641  } else {
642  array_pop($tagRegister[$tagName]);
643  if ($tags[$tagName]['nesting'] === 'global') {
644  array_pop($tagStack);
645  }
646  }
647  } else {
648  $tagRegister[$tagName][] = $c;
649  if ($tags[$tagName]['nesting'] === 'global') {
650  $tagStack[] = $tagName;
651  }
652  }
653  }
654  if ($setTag) {
655  // Setting the tag
656  $newContent[$c++] = $lt . ($endTag ? '/' : '') . trim($tagParts[0] . ' ' . ($tagParts[1] ?? '')) . ($emptyTag ? ' /' : '') . $gt;
657  }
658  }
659  } else {
660  $newContent[$c++] = '<' . ($endTag ? '/' : '') . $tagContent . '>';
661  }
662  } elseif ($keepAll) {
663  // This is if the tag was not defined in the array for processing:
664  if ($keepAll === 'protect') {
665  $lt = '&lt;';
666  $gt = '&gt;';
667  } else {
668  $lt = '<';
669  $gt = '>';
670  }
671  $newContent[$c++] = $lt . ($endTag ? '/' : '') . $tagContent . $gt;
672  }
673  $newContent[$c++] = $this->bidir_htmlspecialchars(substr($tok, $tagEnd + 1), $hSC);
674  } else {
675  $newContent[$c++] = $this->bidir_htmlspecialchars('<' . $tok, $hSC);
676  }
677  } else {
678  $newContent[$c++] = $this->bidir_htmlspecialchars(($skipTag ? '' : '<') . $tok, $hSC);
679  // It was not a tag anyways
680  $skipTag = false;
681  }
682  }
683  // Unsetting tags:
684  foreach ($tagRegister as $tag => $positions) {
685  foreach ($positions as $pKey) {
686  unset($newContent[$pKey]);
687  }
688  }
689  $newContent = implode('', $newContent);
690  $newContent = $this->stripEmptyTagsIfConfigured($newContent, $addConfig);
691  return $newContent;
692  }
693 
701  public function ‪bidir_htmlspecialchars($value, ‪$dir)
702  {
703  switch ((int)‪$dir) {
704  case 1:
705  return htmlspecialchars($value);
706  case 2:
707  return htmlspecialchars($value, ENT_COMPAT, 'UTF-8', false);
708  case -1:
709  return htmlspecialchars_decode($value);
710  default:
711  return $value;
712  }
713  }
714 
725  public function ‪prefixResourcePath($main_prefix, $content, $alternatives = [], $suffix = '')
726  {
727  $parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a,param,source', $content);
728  foreach ($parts as $k => $v) {
729  if ($k % 2) {
730  $params = $this->get_tag_attributes($v);
731  // Detect tag-ending so that it is re-applied correctly.
732  $tagEnd = substr($v, -2) === '/>' ? ' />' : '>';
733  // The 'name' of the first tag
734  $firstTagName = $this->getFirstTagName($v);
735  $prefixedRelPath = false;
736  $prefix = $alternatives[strtoupper($firstTagName)] ?? $main_prefix;
737  switch (strtolower($firstTagName)) {
738  case 'td':
739  case 'body':
740  case 'table':
741  if (isset($params[0]['background'])) {
742  $params[0]['background'] = $this->prefixRelPath($prefix, $params[0]['background'], $suffix);
743  $prefixedRelPath = true;
744  }
745  break;
746  case 'img':
747  case 'input':
748  case 'script':
749  case 'embed':
750  if (isset($params[0]['src'])) {
751  $params[0]['src'] = $this->prefixRelPath($prefix, $params[0]['src'], $suffix);
752  $prefixedRelPath = true;
753  }
754  break;
755  case 'link':
756  case 'a':
757  if (isset($params[0]['href'])) {
758  $params[0]['href'] = $this->prefixRelPath($prefix, $params[0]['href'], $suffix);
759  $prefixedRelPath = true;
760  }
761  break;
762  case 'form':
763  if (isset($params[0]['action'])) {
764  $params[0]['action'] = $this->prefixRelPath($prefix, $params[0]['action'], $suffix);
765  $prefixedRelPath = true;
766  }
767  break;
768  case 'param':
769  if (isset($params[0]['name']) && $params[0]['name'] === 'movie' && isset($params[0]['value'])) {
770  $params[0]['value'] = $this->prefixRelPath($prefix, $params[0]['value'], $suffix);
771  $prefixedRelPath = true;
772  }
773  break;
774  case 'source':
775  if (isset($params[0]['srcset'])) {
776  $srcsetImagePaths = GeneralUtility::trimExplode(',', $params[0]['srcset']);
777  for ($i = 0; $i < count($srcsetImagePaths); $i++) {
778  $srcsetImagePaths[$i] = $this->prefixRelPath($prefix, $srcsetImagePaths[$i], $suffix);
779  }
780  $params[0]['srcset'] = implode(', ', $srcsetImagePaths);
781  $prefixedRelPath = true;
782  }
783  break;
784  }
785  if ($prefixedRelPath) {
786  $tagParts = preg_split('/\\s+/s', $v, 2);
787  $tagParts[1] = $this->compileTagAttribs($params[0], $params[1]);
788  $parts[$k] = '<' . trim(strtolower($firstTagName) . ' ' . $tagParts[1]) . $tagEnd;
789  }
790  }
791  }
792  $content = implode('', $parts);
793  // Fix <style> section:
794  $prefix = $alternatives['style'] ?? $main_prefix;
795  if ((string)$prefix !== '') {
796  $parts = $this->splitIntoBlock('style', $content);
797  foreach ($parts as $k => &$part) {
798  if ($k % 2) {
799  $part = preg_replace('/(url[[:space:]]*\\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\\))/i', '\\1' . $prefix . '\\2' . $suffix . '\\3', $part);
800  }
801  }
802  unset($part);
803  $content = implode('', $parts);
804  }
805  return $content;
806  }
807 
817  public function ‪prefixRelPath($prefix, $srcVal, $suffix = '')
818  {
819  // Only prefix if it's not an absolute URL or
820  // only a link to a section within the page.
821  if ($srcVal[0] !== '/' && $srcVal[0] !== '#') {
822  $urlParts = parse_url($srcVal);
823  // Only prefix URLs without a scheme
824  if (!isset($urlParts['scheme'])) {
825  $srcVal = $prefix . $srcVal . $suffix;
826  }
827  }
828  return $srcVal;
829  }
830 
839  public function ‪compileTagAttribs($tagAttrib, $meta = [])
840  {
841  $accu = [];
842  foreach ($tagAttrib as $k => $v) {
843  $attr = $meta[$k]['origTag'] ?? $k;
844  if (strcmp($v, '') || isset($meta[$k]['dashType'])) {
845  $dash = $meta[$k]['dashType'] ?? (MathUtility::canBeInterpretedAsInteger($v) ? '' : '"');
846  $attr .= '=' . $dash . $v . $dash;
847  }
848  $accu[] = $attr;
849  }
850  return implode(' ', $accu);
851  }
852 
861  public function ‪HTMLparserConfig($TSconfig, $keepTags = [])
862  {
863  // Allow tags (base list, merged with incoming array)
864  $alTags = array_flip(GeneralUtility::trimExplode(',', strtolower($TSconfig['allowTags'] ?? ''), true));
865  $keepTags = array_merge($alTags, $keepTags);
866  // Set config properties.
867  if (isset($TSconfig['tags.']) && is_array($TSconfig['tags.'])) {
868  foreach ($TSconfig['tags.'] as $key => $tagC) {
869  if (!is_array($tagC) && $key == strtolower($key)) {
870  if ((string)$tagC === '0') {
871  unset($keepTags[$key]);
872  }
873  if ((string)$tagC === '1' && !isset($keepTags[$key])) {
874  $keepTags[$key] = 1;
875  }
876  }
877  }
878  foreach ($TSconfig['tags.'] as $key => $tagC) {
879  if (is_array($tagC) && $key == strtolower($key)) {
880  $key = substr($key, 0, -1);
881  if (!is_array($keepTags[$key] ?? null)) {
882  $keepTags[$key] = [];
883  }
884  if (isset($tagC['fixAttrib.']) && is_array($tagC['fixAttrib.'])) {
885  foreach ($tagC['fixAttrib.'] as $atName => $atConfig) {
886  if (is_array($atConfig)) {
887  $atName = substr($atName, 0, -1);
888  if (!is_array($keepTags[$key]['fixAttrib'][$atName] ?? null)) {
889  $keepTags[$key]['fixAttrib'][$atName] = [];
890  }
891  $keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName], $atConfig);
892  if ((string)($keepTags[$key]['fixAttrib'][$atName]['range'] ?? '') !== '') {
893  $keepTags[$key]['fixAttrib'][$atName]['range'] = GeneralUtility::trimExplode(',', $keepTags[$key]['fixAttrib'][$atName]['range']);
894  }
895  if ((string)($keepTags[$key]['fixAttrib'][$atName]['list'] ?? '') !== '') {
896  $keepTags[$key]['fixAttrib'][$atName]['list'] = GeneralUtility::trimExplode(',', $keepTags[$key]['fixAttrib'][$atName]['list']);
897  }
898  }
899  }
900  }
901  unset($tagC['fixAttrib.'], $tagC['fixAttrib']);
902  if (!empty($tagC['rmTagIfNoAttrib']) && empty($tagC['nesting'])) {
903  $tagC['nesting'] = 1;
904  }
905  $keepTags[$key] = array_merge($keepTags[$key], $tagC);
906  }
907  }
908  }
909  // LocalNesting
910  if (!empty($TSconfig['localNesting'])) {
911  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['localNesting']), true);
912  foreach ($lN as $tn) {
913  if (isset($keepTags[$tn])) {
914  if (!is_array($keepTags[$tn])) {
915  $keepTags[$tn] = [];
916  }
917  $keepTags[$tn]['nesting'] = 1;
918  }
919  }
920  }
921  if (!empty($TSconfig['globalNesting'])) {
922  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['globalNesting']), true);
923  foreach ($lN as $tn) {
924  if (isset($keepTags[$tn])) {
925  if (!is_array($keepTags[$tn])) {
926  $keepTags[$tn] = [];
927  }
928  $keepTags[$tn]['nesting'] = 'global';
929  }
930  }
931  }
932  if (!empty($TSconfig['rmTagIfNoAttrib'])) {
933  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['rmTagIfNoAttrib']), true);
934  foreach ($lN as $tn) {
935  if (isset($keepTags[$tn])) {
936  if (!is_array($keepTags[$tn])) {
937  $keepTags[$tn] = [];
938  }
939  $keepTags[$tn]['rmTagIfNoAttrib'] = 1;
940  if (empty($keepTags[$tn]['nesting'])) {
941  $keepTags[$tn]['nesting'] = 1;
942  }
943  }
944  }
945  }
946  if (!empty($TSconfig['noAttrib'])) {
947  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['noAttrib']), true);
948  foreach ($lN as $tn) {
949  if (isset($keepTags[$tn])) {
950  if (!is_array($keepTags[$tn])) {
951  $keepTags[$tn] = [];
952  }
953  $keepTags[$tn]['allowedAttribs'] = 0;
954  }
955  }
956  }
957  if (!empty($TSconfig['removeTags'])) {
958  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['removeTags']), true);
959  foreach ($lN as $tn) {
960  $keepTags[$tn] = [];
961  $keepTags[$tn]['allowedAttribs'] = 0;
962  $keepTags[$tn]['rmTagIfNoAttrib'] = 1;
963  }
964  }
965  // Create additional configuration:
966  $addConfig = [];
967  if (isset($TSconfig['stripEmptyTags'])) {
968  $addConfig['stripEmptyTags'] = $TSconfig['stripEmptyTags'];
969  if (isset($TSconfig['stripEmptyTags.'])) {
970  $addConfig['stripEmptyTags.'] = $TSconfig['stripEmptyTags.'];
971  }
972  }
973  return [
974  $keepTags,
975  '' . ($TSconfig['keepNonMatchedTags'] ?? ''),
976  (int)($TSconfig['htmlSpecialChars'] ?? 0),
977  $addConfig,
978  ];
979  }
980 
991  public function ‪stripEmptyTags($content, $tagList = '', $treatNonBreakingSpaceAsEmpty = false, $keepTags = false)
992  {
993  if (!empty($tagList)) {
994  $tagRegEx = implode('|', GeneralUtility::trimExplode(',', $tagList, true));
995  if ($keepTags) {
996  $tagRegEx = '(?!' . $tagRegEx . ')[^ >]+';
997  }
998  } else {
999  $tagRegEx = '[^ >]+'; // all characters until you reach a > or space;
1000  }
1001  $count = 1;
1002  $nbspRegex = $treatNonBreakingSpaceAsEmpty ? '|(&nbsp;)' : '';
1003  $finalRegex = sprintf('/<(%s)[^>]*>( %s)*<\/\\1[^>]*>/i', $tagRegEx, $nbspRegex);
1004  while ($count !== 0) {
1005  $content = preg_replace($finalRegex, '', $content, -1, $count) ?? $content;
1006  }
1007  return $content;
1008  }
1009 
1013  protected function ‪stripEmptyTagsIfConfigured(string $value, array $configuration): string
1014  {
1015  if (empty($configuration['stripEmptyTags'])) {
1016  return $value;
1017  }
1018 
1019  $tags = null;
1020  $keepTags = false;
1021  if (!empty($configuration['stripEmptyTags.']['keepTags'])) {
1022  $tags = $configuration['stripEmptyTags.']['keepTags'];
1023  $keepTags = true;
1024  } elseif (!empty($configuration['stripEmptyTags.']['tags'])) {
1025  $tags = $configuration['stripEmptyTags.']['tags'];
1026  }
1027 
1028  $treatNonBreakingSpaceAsEmpty = !empty($configuration['stripEmptyTags.']['treatNonBreakingSpaceAsEmpty']);
1029 
1030  return $this->stripEmptyTags($value, $tags, $treatNonBreakingSpaceAsEmpty, $keepTags);
1031  }
1032 }
‪TYPO3\CMS\Core\Html
Definition: DefaultSanitizerBuilder.php:18
‪TYPO3\CMS\Core\Html\HtmlParser\HTMLparserConfig
‪array HTMLparserConfig($TSconfig, $keepTags=[])
Definition: HtmlParser.php:861
‪TYPO3\CMS\Core\Html\HtmlParser\getFirstTagName
‪string getFirstTagName($str, $preserveCase=false)
Definition: HtmlParser.php:241
‪TYPO3\CMS\Core\Html\HtmlParser\split_tag_attributes
‪array split_tag_attributes($tag)
Definition: HtmlParser.php:317
‪TYPO3\CMS\Core\Html\HtmlParser\VOID_ELEMENTS
‪const VOID_ELEMENTS
Definition: HtmlParser.php:28
‪TYPO3\CMS\Core\Html\HtmlParser\stripEmptyTags
‪string stripEmptyTags($content, $tagList='', $treatNonBreakingSpaceAsEmpty=false, $keepTags=false)
Definition: HtmlParser.php:991
‪TYPO3\CMS\Core\Html\HtmlParser
Definition: HtmlParser.php:26
‪$parser
‪$parser
Definition: annotationChecker.php:103
‪TYPO3\CMS\Core\Html\HtmlParser\stripEmptyTagsIfConfigured
‪stripEmptyTagsIfConfigured(string $value, array $configuration)
Definition: HtmlParser.php:1013
‪$dir
‪$dir
Definition: validateRstFiles.php:257
‪TYPO3\CMS\Core\Html\HtmlParser\getFirstTag
‪string getFirstTag($str)
Definition: HtmlParser.php:218
‪TYPO3\CMS\Core\Html\HtmlParser\prefixRelPath
‪string prefixRelPath($prefix, $srcVal, $suffix='')
Definition: HtmlParser.php:817
‪TYPO3\CMS\Core\Html\HtmlParser\splitTags
‪array splitTags($tag, $content)
Definition: HtmlParser.php:157
‪TYPO3\CMS\Core\Html\HtmlParser\prefixResourcePath
‪string prefixResourcePath($main_prefix, $content, $alternatives=[], $suffix='')
Definition: HtmlParser.php:725
‪TYPO3\CMS\Core\Html\HtmlParser\get_tag_attributes
‪array get_tag_attributes($tag, $deHSC=false)
Definition: HtmlParser.php:267
‪TYPO3\CMS\Core\Html\HtmlParser\compileTagAttribs
‪string compileTagAttribs($tagAttrib, $meta=[])
Definition: HtmlParser.php:839
‪TYPO3\CMS\Core\Html\HtmlParser\HTMLcleaner
‪string HTMLcleaner($content, $tags=[], $keepAll=0, $hSC=0, $addConfig=[])
Definition: HtmlParser.php:385
‪TYPO3\CMS\Core\Html\HtmlParser\removeFirstAndLastTag
‪string removeFirstAndLastTag($str)
Definition: HtmlParser.php:195
‪TYPO3\CMS\Core\Html\HtmlParser\splitIntoBlockRecursiveProc
‪string splitIntoBlockRecursiveProc($tag, $content, &$procObj, $callBackContent, $callBackTags, $level=0)
Definition: HtmlParser.php:122
‪TYPO3\CMS\Core\Html\HtmlParser\splitIntoBlock
‪array splitIntoBlock($tag, $content, $eliminateExtraEndTags=false)
Definition: HtmlParser.php:47
‪TYPO3\CMS\Core\Utility\MathUtility
Definition: MathUtility.php:24
‪TYPO3\CMS\Core\Html\HtmlParser\bidir_htmlspecialchars
‪string bidir_htmlspecialchars($value, $dir)
Definition: HtmlParser.php:701
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:52
‪TYPO3\CMS\Core\Utility\GeneralUtility\trimExplode
‪static list< string > trimExplode(string $delim, string $string, bool $removeEmptyValues=false, int $limit=0)
Definition: GeneralUtility.php:822