‪TYPO3CMS  10.4
HtmlParser.php
Go to the documentation of this file.
1 <?php
2 
3 /*
4  * This file is part of the TYPO3 CMS project.
5  *
6  * It is free software; you can redistribute it and/or modify it under
7  * the terms of the GNU General Public License, either version 2
8  * of the License, or any later version.
9  *
10  * For the full copyright and license information, please read the
11  * LICENSE.txt file that was distributed with this source code.
12  *
13  * The TYPO3 project - inspiring people to share!
14  */
15 
16 namespace ‪TYPO3\CMS\Core\Html;
17 
21 
27 {
31  protected ‪$caseShift_cache = [];
32 
33  // Void elements that do not have closing tags, as defined by HTML5, except link element
34  const ‪VOID_ELEMENTS = 'area|base|br|col|command|embed|hr|img|input|keygen|meta|param|source|track|wbr';
35 
36  /************************************
37  *
38  * Parsing HTML code
39  *
40  ************************************/
53  public function ‪splitIntoBlock($tag, $content, $eliminateExtraEndTags = false)
54  {
55  $tags = array_unique(‪GeneralUtility::trimExplode(',', $tag, true));
56  array_walk($tags, function (&$tag) {
57  $tag = preg_quote($tag, '/');
58  });
59  $regexStr = '/\\<\\/?(' . implode('|', $tags) . ')(\\s*\\>|\\s[^\\>]*\\>)/si';
60  $parts = preg_split($regexStr, $content);
61  if (empty($parts)) {
62  return [];
63  }
64  $newParts = [];
65  $pointer = strlen($parts[0]);
66  $buffer = $parts[0];
67  $nested = 0;
68  reset($parts);
69  // We skip the first element in foreach loop
70  $partsSliced = array_slice($parts, 1, null, true);
71  foreach ($partsSliced as $v) {
72  $isEndTag = substr($content, $pointer, 2) === '</';
73  $tagLen = strcspn(substr($content, $pointer), '>') + 1;
74  // We meet a start-tag:
75  if (!$isEndTag) {
76  // Ground level:
77  if (!$nested) {
78  // Previous buffer stored
79  $newParts[] = $buffer;
80  $buffer = '';
81  }
82  // We are inside now!
83  $nested++;
84  // New buffer set and pointer increased
85  $mbuffer = substr($content, $pointer, strlen($v) + $tagLen);
86  $pointer += strlen($mbuffer);
87  $buffer .= $mbuffer;
88  } else {
89  // If we meet an endtag:
90  // Decrease nested-level
91  $nested--;
92  $eliminated = 0;
93  if ($eliminateExtraEndTags && $nested < 0) {
94  $nested = 0;
95  $eliminated = 1;
96  } else {
97  // In any case, add the endtag to current buffer and increase pointer
98  $buffer .= substr($content, $pointer, $tagLen);
99  }
100  $pointer += $tagLen;
101  // if we're back on ground level, (and not by eliminating tags...
102  if (!$nested && !$eliminated) {
103  $newParts[] = $buffer;
104  $buffer = '';
105  }
106  // New buffer set and pointer increased
107  $mbuffer = substr($content, $pointer, strlen($v));
108  $pointer += strlen($mbuffer);
109  $buffer .= $mbuffer;
110  }
111  }
112  $newParts[] = $buffer;
113  return $newParts;
114  }
115 
128  public function ‪splitIntoBlockRecursiveProc($tag, $content, &$procObj, $callBackContent, $callBackTags, $level = 0)
129  {
130  $parts = $this->‪splitIntoBlock($tag, $content, true);
131  foreach ($parts as $k => $v) {
132  if ($k % 2) {
133  $firstTagName = $this->‪getFirstTagName($v, true);
134  $tagsArray = [];
135  $tagsArray['tag_start'] = $this->‪getFirstTag($v);
136  $tagsArray['tag_end'] = '</' . $firstTagName . '>';
137  $tagsArray['tag_name'] = strtolower($firstTagName);
138  $tagsArray['content'] = $this->‪splitIntoBlockRecursiveProc($tag, $this->‪removeFirstAndLastTag($v), $procObj, $callBackContent, $callBackTags, $level + 1);
139  if ($callBackTags) {
140  $tagsArray = $procObj->{$callBackTags}($tagsArray, $level);
141  }
142  $parts[$k] = $tagsArray['tag_start'] . $tagsArray['content'] . $tagsArray['tag_end'];
143  } else {
144  if ($callBackContent) {
145  $parts[$k] = $procObj->{$callBackContent}($parts[$k], $level);
146  }
147  }
148  }
149  return implode('', $parts);
150  }
151 
163  public function splitTags($tag, $content)
164  {
165  $tags = GeneralUtility::trimExplode(',', $tag, true);
166  array_walk($tags, function (&$tag) {
167  $tag = preg_quote($tag, '/');
168  });
169  $regexStr = '/\\<(' . implode('|', $tags) . ')(\\s[^>]*)?\\/?>/si';
170  $parts = preg_split($regexStr, $content);
171  if (empty($parts)) {
172  return [];
173  }
174  $pointer = strlen($parts[0]);
175  $newParts = [];
176  $newParts[] = $parts[0];
177  reset($parts);
178  // We skip the first element in foreach loop
179  $partsSliced = array_slice($parts, 1, null, true);
180  foreach ($partsSliced as $v) {
181  $tagLen = strcspn(substr($content, $pointer), '>') + 1;
182  // Set tag:
183  // New buffer set and pointer increased
184  $tag = substr($content, $pointer, $tagLen);
185  $newParts[] = $tag;
186  $pointer += strlen($tag);
187  // Set content:
188  $newParts[] = $v;
189  $pointer += strlen($v);
190  }
191  return $newParts;
192  }
193 
201  public function removeFirstAndLastTag($str)
202  {
203  ‪$parser = SimpleParser::fromString($str);
204  $first = ‪$parser->getFirstNode(SimpleNode::TYPE_ELEMENT);
205  $last = ‪$parser->getLastNode(SimpleNode::TYPE_ELEMENT);
206  if ($first === null || $first === $last) {
207  return '';
208  }
209  $sequence = array_slice(
210  ‪$parser->getNodes(),
211  $first->getIndex() + 1,
212  $last->getIndex() - $first->getIndex() - 1
213  );
214  return implode('', array_map('strval', $sequence));
215  }
216 
224  public function getFirstTag($str)
225  {
226  ‪$parser = SimpleParser::fromString($str);
227  $first = ‪$parser->getFirstNode(SimpleNode::TYPE_ELEMENT);
228  if ($first === null) {
229  return '';
230  }
231  $sequence = array_slice(
232  ‪$parser->getNodes(),
233  0,
234  $first->getIndex() + 1
235  );
236  return implode('', array_map('strval', $sequence));
237  }
238 
247  public function getFirstTagName($str, $preserveCase = false)
248  {
249  ‪$parser = SimpleParser::fromString($str);
250  $elements = ‪$parser->getNodes(SimpleNode::TYPE_ELEMENT);
251  foreach ($elements as $element) {
252  $name = $element->getElementName();
253  if ($name === null) {
254  continue;
255  }
256  return $preserveCase ? $name : strtoupper($name);
257  }
258  return '';
259  }
260 
273  public function get_tag_attributes($tag, $deHSC = false)
274  {
275  [$components, $metaC] = $this->split_tag_attributes($tag);
276  // Attribute name is stored here
277  $name = '';
278  $valuemode = false;
279  $attributes = [];
280  $attributesMeta = [];
281  if (is_array($components)) {
282  foreach ($components as $key => $val) {
283  // Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value
284  if ($val !== '=') {
285  if ($valuemode) {
286  if ($name) {
287  $attributes[$name] = $deHSC ? htmlspecialchars_decode($val) : $val;
288  $attributesMeta[$name]['dashType'] = $metaC[$key];
289  $name = '';
290  }
291  } else {
292  if ($namekey = preg_replace('/[^[:alnum:]_\\:\\-]/', '', $val) ?? '') {
293  $name = strtolower($namekey);
294  $attributesMeta[$name] = [];
295  $attributesMeta[$name]['origTag'] = $namekey;
296  $attributes[$name] = '';
297  }
298  }
299  $valuemode = false;
300  } else {
301  $valuemode = true;
302  }
303  }
304  return [$attributes, $attributesMeta];
305  }
306  return [null, null];
307  }
308 
323  public function split_tag_attributes($tag)
324  {
325  $matches = [];
326  if (preg_match('/(\\<[^\\s]+\\s+)?(.*?)\\s*(\\>)?$/s', $tag, $matches) !== 1) {
327  return [[], []];
328  }
329  $tag_tmp = $matches[2];
330  $metaValue = [];
331  $value = [];
332  $matches = [];
333  if (preg_match_all('/("[^"]*"|\'[^\']*\'|[^\\s"\'\\=]+|\\=)/s', $tag_tmp, $matches) > 0) {
334  foreach ($matches[1] as $part) {
335  $firstChar = $part[0];
336  if ($firstChar === '"' || $firstChar === '\'') {
337  $metaValue[] = $firstChar;
338  $value[] = substr($part, 1, -1);
339  } else {
340  $metaValue[] = '';
341  $value[] = $part;
342  }
343  }
344  }
345  return [$value, $metaValue];
346  }
347 
348  /*********************************
349  *
350  * Clean HTML code
351  *
352  *********************************/
389  public function HTMLcleaner($content, $tags = [], $keepAll = 0, $hSC = 0, $addConfig = [])
390  {
391  $newContent = [];
392  $tokArr = explode('<', $content);
393  $newContent[] = $this->bidir_htmlspecialchars(current($tokArr), $hSC);
394  // We skip the first element in foreach loop
395  $tokArrSliced = array_slice($tokArr, 1, null, true);
396  $c = 1;
397  $tagRegister = [];
398  $tagStack = [];
399  $inComment = false;
400  $inCdata = false;
401  $skipTag = false;
402  foreach ($tokArrSliced as $tok) {
403  if ($inComment) {
404  if (($eocPos = strpos($tok, '-->')) === false) {
405  // End of comment is not found in the token. Go further until end of comment is found in other tokens.
406  $newContent[$c++] = '<' . $tok;
407  continue;
408  }
409  // Comment ends in the middle of the token: add comment and proceed with rest of the token
410  $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 3);
411  $tok = substr($tok, $eocPos + 3);
412  $inComment = false;
413  $skipTag = true;
414  } elseif ($inCdata) {
415  if (($eocPos = strpos($tok, '/*]]>*/')) === false) {
416  // End of comment is not found in the token. Go further until end of comment is found in other tokens.
417  $newContent[$c++] = '<' . $tok;
418  continue;
419  }
420  // Comment ends in the middle of the token: add comment and proceed with rest of the token
421  $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10);
422  $tok = substr($tok, $eocPos + 10);
423  $inCdata = false;
424  $skipTag = true;
425  } elseif (strpos($tok, '!--') === 0) {
426  if (($eocPos = strpos($tok, '-->')) === false) {
427  // Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment
428  $newContent[$c++] = '<' . $tok;
429  $inComment = true;
430  continue;
431  }
432  // Start and end of comment are both in the current token. Add comment and proceed with rest of the token
433  $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 3);
434  $tok = substr($tok, $eocPos + 3);
435  $skipTag = true;
436  } elseif (strpos($tok, '![CDATA[*/') === 0) {
437  if (($eocPos = strpos($tok, '/*]]>*/')) === false) {
438  // Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment
439  $newContent[$c++] = '<' . $tok;
440  $inCdata = true;
441  continue;
442  }
443  // Start and end of comment are both in the current token. Add comment and proceed with rest of the token
444  $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10);
445  $tok = substr($tok, $eocPos + 10);
446  $skipTag = true;
447  }
448  $firstChar = $tok[0] ?? null;
449  // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..>
450  if (!$skipTag && preg_match('/[[:alnum:]\\/]/', (string)$firstChar) === 1) {
451  $tagEnd = strpos($tok, '>');
452  // If there is and end-bracket... tagEnd can't be 0 as the first character can't be a >
453  if ($tagEnd) {
454  $endTag = $firstChar === '/' ? 1 : 0;
455  $tagContent = substr($tok, $endTag, $tagEnd - $endTag);
456  $tagParts = preg_split('/\\s+/s', $tagContent, 2);
457  $tagName = strtolower($tagParts[0]);
458  $emptyTag = 0;
459  if (isset($tags[$tagName])) {
460  // If there is processing to do for the tag:
461  if (is_array($tags[$tagName])) {
462  if (preg_match('/^(' . self::VOID_ELEMENTS . ' )$/i', $tagName)) {
463  $emptyTag = 1;
464  }
465  // If NOT an endtag, do attribute processing (added dec. 2003)
466  if (!$endTag) {
467  // Override attributes
468  if (isset($tags[$tagName]['overrideAttribs']) && (string)$tags[$tagName]['overrideAttribs'] !== '') {
469  $tagParts[1] = $tags[$tagName]['overrideAttribs'];
470  }
471  // Allowed tags
472  if (isset($tags[$tagName]['allowedAttribs']) && (string)$tags[$tagName]['allowedAttribs'] !== '') {
473  // No attribs allowed
474  if ((string)$tags[$tagName]['allowedAttribs'] === '0') {
475  $tagParts[1] = '';
476  } elseif (isset($tagParts[1]) && trim($tagParts[1])) {
477  $tagAttrib = $this->get_tag_attributes($tagParts[1]);
478  $tagParts[1] = '';
479  $newTagAttrib = [];
480  $tList = (array)(
481  $tags[$tagName]['_allowedAttribs']
482  ?? GeneralUtility::trimExplode(',', strtolower($tags[$tagName]['allowedAttribs']), true)
483  );
484  foreach ($tList as $allowTag) {
485  if (isset($tagAttrib[0][$allowTag])) {
486  $newTagAttrib[$allowTag] = $tagAttrib[0][$allowTag];
487  }
488  }
489 
490  $tagParts[1] = $this->compileTagAttribs($newTagAttrib, $tagAttrib[1]);
491  }
492  }
493  // Fixed attrib values
494  if (isset($tags[$tagName]['fixAttrib']) && is_array($tags[$tagName]['fixAttrib'])) {
495  $tagAttrib = $this->get_tag_attributes($tagParts[1]);
496  $tagParts[1] = '';
497  foreach ($tags[$tagName]['fixAttrib'] as $attr => $params) {
498  if (isset($params['set']) && $params['set'] !== '') {
499  $tagAttrib[0][$attr] = $params['set'];
500  }
501  if (!empty($params['unset'])) {
502  unset($tagAttrib[0][$attr]);
503  }
504  if (!empty($params['default']) && !isset($tagAttrib[0][$attr])) {
505  $tagAttrib[0][$attr] = $params['default'];
506  }
507  if ($params['always'] || isset($tagAttrib[0][$attr])) {
508  if ($params['trim']) {
509  $tagAttrib[0][$attr] = trim($tagAttrib[0][$attr]);
510  }
511  if ($params['intval']) {
512  $tagAttrib[0][$attr] = (int)$tagAttrib[0][$attr];
513  }
514  if ($params['lower']) {
515  $tagAttrib[0][$attr] = strtolower($tagAttrib[0][$attr]);
516  }
517  if ($params['upper']) {
518  $tagAttrib[0][$attr] = strtoupper($tagAttrib[0][$attr]);
519  }
520  if ($params['range']) {
521  if (isset($params['range'][1])) {
522  $tagAttrib[0][$attr] = MathUtility::forceIntegerInRange($tagAttrib[0][$attr], (int)$params['range'][0], (int)$params['range'][1]);
523  } else {
524  $tagAttrib[0][$attr] = MathUtility::forceIntegerInRange($tagAttrib[0][$attr], (int)$params['range'][0]);
525  }
526  }
527  if (isset($params['list']) && is_array($params['list'])) {
528  // For the class attribute, remove from the attribute value any class not in the list
529  // Classes are case sensitive
530  if ($attr === 'class') {
531  $newClasses = [];
532  $classes = GeneralUtility::trimExplode(' ', $tagAttrib[0][$attr], true);
533  foreach ($classes as $class) {
534  if (in_array($class, $params['list'])) {
535  $newClasses[] = $class;
536  }
537  }
538  if (!empty($newClasses)) {
539  $tagAttrib[0][$attr] = implode(' ', $newClasses);
540  } else {
541  $tagAttrib[0][$attr] = $params['list'][0];
542  }
543  } else {
544  if (!in_array($this->caseShift($tagAttrib[0][$attr], $params['casesensitiveComp']), $this->caseShift($params['list'], $params['casesensitiveComp'], $tagName))) {
545  $tagAttrib[0][$attr] = $params['list'][0];
546  }
547  }
548  }
549  if ($params['removeIfFalse'] && $params['removeIfFalse'] !== 'blank' && !$tagAttrib[0][$attr] || $params['removeIfFalse'] === 'blank' && (string)$tagAttrib[0][$attr] === '') {
550  unset($tagAttrib[0][$attr]);
551  }
552  if ((string)$params['removeIfEquals'] !== '' && $this->caseShift($tagAttrib[0][$attr], $params['casesensitiveComp']) === $this->caseShift($params['removeIfEquals'], $params['casesensitiveComp'])) {
553  unset($tagAttrib[0][$attr]);
554  }
555  if ($params['prefixLocalAnchors']) {
556  if ($tagAttrib[0][$attr][0] === '#') {
557  if ($params['prefixLocalAnchors'] == 2) {
559  $contentObjectRenderer = GeneralUtility::makeInstance(ContentObjectRenderer::class);
560  $prefix = $contentObjectRenderer->getUrlToCurrentLocation();
561  } else {
562  $prefix = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL');
563  }
564  $tagAttrib[0][$attr] = $prefix . $tagAttrib[0][$attr];
565  }
566  }
567  if ($params['prefixRelPathWith']) {
568  $urlParts = parse_url($tagAttrib[0][$attr]);
569  if (!$urlParts['scheme'] && $urlParts['path'][0] !== '/') {
570  // If it is NOT an absolute URL (by http: or starting "/")
571  $tagAttrib[0][$attr] = $params['prefixRelPathWith'] . $tagAttrib[0][$attr];
572  }
573  }
574  if ($params['userFunc']) {
575  if (is_array($params['userFunc.'])) {
576  $params['userFunc.']['attributeValue'] = $tagAttrib[0][$attr];
577  } else {
578  $params['userFunc.'] = $tagAttrib[0][$attr];
579  }
580  $tagAttrib[0][$attr] = GeneralUtility::callUserFunction($params['userFunc'], $params['userFunc.'], $this);
581  }
582  }
583  }
584  $tagParts[1] = $this->compileTagAttribs($tagAttrib[0], $tagAttrib[1]);
585  }
586  } else {
587  // If endTag, remove any possible attributes:
588  $tagParts[1] = '';
589  }
590  // Protecting the tag by converting < and > to &lt; and &gt; ??
591  if (!empty($tags[$tagName]['protect'])) {
592  $lt = '&lt;';
593  $gt = '&gt;';
594  } else {
595  $lt = '<';
596  $gt = '>';
597  }
598  // Remapping tag name?
599  if (!empty($tags[$tagName]['remap'])) {
600  $tagParts[0] = $tags[$tagName]['remap'];
601  }
602  // rmTagIfNoAttrib
603  if ($endTag || empty($tags[$tagName]['rmTagIfNoAttrib']) || trim($tagParts[1] ?? '')) {
604  $setTag = true;
605  // Remove this closing tag if $tagName was among $TSconfig['removeTags']
606  if ($endTag && isset($tags[$tagName]['allowedAttribs']) && $tags[$tagName]['allowedAttribs'] === 0 && $tags[$tagName]['rmTagIfNoAttrib'] === 1) {
607  $setTag = false;
608  }
609  if (isset($tags[$tagName]['nesting'])) {
610  if (!isset($tagRegister[$tagName])) {
611  $tagRegister[$tagName] = [];
612  }
613  if ($endTag) {
614  $correctTag = true;
615  if ($tags[$tagName]['nesting'] === 'global') {
616  $lastEl = end($tagStack);
617  if ($tagName !== $lastEl) {
618  if (in_array($tagName, $tagStack, true)) {
619  while (!empty($tagStack) && $tagName !== $lastEl) {
620  $elPos = end($tagRegister[$lastEl]);
621  unset($newContent[$elPos]);
622  array_pop($tagRegister[$lastEl]);
623  array_pop($tagStack);
624  $lastEl = end($tagStack);
625  }
626  } else {
627  // In this case the
628  $correctTag = false;
629  }
630  }
631  }
632  if (empty($tagRegister[$tagName]) || !$correctTag) {
633  $setTag = false;
634  } else {
635  array_pop($tagRegister[$tagName]);
636  if ($tags[$tagName]['nesting'] === 'global') {
637  array_pop($tagStack);
638  }
639  }
640  } else {
641  $tagRegister[$tagName][] = $c;
642  if ($tags[$tagName]['nesting'] === 'global') {
643  $tagStack[] = $tagName;
644  }
645  }
646  }
647  if ($setTag) {
648  // Setting the tag
649  $newContent[$c++] = $lt . ($endTag ? '/' : '') . trim($tagParts[0] . ' ' . ($tagParts[1] ?? '')) . ($emptyTag ? ' /' : '') . $gt;
650  }
651  }
652  } else {
653  $newContent[$c++] = '<' . ($endTag ? '/' : '') . $tagContent . '>';
654  }
655  } elseif ($keepAll) {
656  // This is if the tag was not defined in the array for processing:
657  if ($keepAll === 'protect') {
658  $lt = '&lt;';
659  $gt = '&gt;';
660  } else {
661  $lt = '<';
662  $gt = '>';
663  }
664  $newContent[$c++] = $lt . ($endTag ? '/' : '') . $tagContent . $gt;
665  }
666  $newContent[$c++] = $this->bidir_htmlspecialchars(substr($tok, $tagEnd + 1), $hSC);
667  } else {
668  $newContent[$c++] = $this->bidir_htmlspecialchars('<' . $tok, $hSC);
669  }
670  } else {
671  $newContent[$c++] = $this->bidir_htmlspecialchars(($skipTag ? '' : '<') . $tok, $hSC);
672  // It was not a tag anyways
673  $skipTag = false;
674  }
675  }
676  // Unsetting tags:
677  foreach ($tagRegister as $tag => $positions) {
678  foreach ($positions as $pKey) {
679  unset($newContent[$pKey]);
680  }
681  }
682  $newContent = implode('', $newContent);
683  $newContent = $this->stripEmptyTagsIfConfigured($newContent, $addConfig);
684  return $newContent;
685  }
686 
694  public function bidir_htmlspecialchars($value, ‪$dir)
695  {
696  switch ((int)‪$dir) {
697  case 1:
698  return htmlspecialchars($value);
699  case 2:
700  return htmlspecialchars($value, ENT_COMPAT, 'UTF-8', false);
701  case -1:
702  return htmlspecialchars_decode($value);
703  default:
704  return $value;
705  }
706  }
707 
717  public function prefixResourcePath($main_prefix, $content, $alternatives = [], $suffix = '')
718  {
719  $parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a,param', $content);
720  foreach ($parts as $k => $v) {
721  if ($k % 2) {
722  $params = $this->get_tag_attributes($v);
723  // Detect tag-ending so that it is re-applied correctly.
724  $tagEnd = substr($v, -2) === '/>' ? ' />' : '>';
725  // The 'name' of the first tag
726  $firstTagName = $this->getFirstTagName($v);
727  $somethingDone = 0;
728  $prefix = $alternatives[strtoupper($firstTagName)] ?? $main_prefix;
729  switch (strtolower($firstTagName)) {
730  case 'td':
731 
732  case 'body':
733 
734  case 'table':
735  $src = $params[0]['background'];
736  if ($src) {
737  $params[0]['background'] = $this->prefixRelPath($prefix, $params[0]['background'], $suffix);
738  $somethingDone = 1;
739  }
740  break;
741  case 'img':
742 
743  case 'input':
744 
745  case 'script':
746 
747  case 'embed':
748  $src = $params[0]['src'];
749  if ($src) {
750  $params[0]['src'] = $this->prefixRelPath($prefix, $params[0]['src'], $suffix);
751  $somethingDone = 1;
752  }
753  break;
754  case 'link':
755 
756  case 'a':
757  $src = $params[0]['href'];
758  if ($src) {
759  $params[0]['href'] = $this->prefixRelPath($prefix, $params[0]['href'], $suffix);
760  $somethingDone = 1;
761  }
762  break;
763  case 'form':
764  $src = $params[0]['action'];
765  if ($src) {
766  $params[0]['action'] = $this->prefixRelPath($prefix, $params[0]['action'], $suffix);
767  $somethingDone = 1;
768  }
769  break;
770  case 'param':
771  $test = $params[0]['name'];
772  if ($test && $test === 'movie') {
773  if ($params[0]['value']) {
774  $params[0]['value'] = $this->prefixRelPath($prefix, $params[0]['value'], $suffix);
775  $somethingDone = 1;
776  }
777  }
778  break;
779  }
780  if ($somethingDone) {
781  $tagParts = preg_split('/\\s+/s', $v, 2);
782  $tagParts[1] = $this->compileTagAttribs($params[0], $params[1]);
783  $parts[$k] = '<' . trim(strtolower($firstTagName) . ' ' . $tagParts[1]) . $tagEnd;
784  }
785  }
786  }
787  $content = implode('', $parts);
788  // Fix <style> section:
789  $prefix = $alternatives['style'] ?? $main_prefix;
790  if ((string)$prefix !== '') {
791  $parts = $this->splitIntoBlock('style', $content);
792  foreach ($parts as $k => &$part) {
793  if ($k % 2) {
794  $part = preg_replace('/(url[[:space:]]*\\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\\))/i', '\\1' . $prefix . '\\2' . $suffix . '\\3', $part);
795  }
796  }
797  unset($part);
798  $content = implode('', $parts);
799  }
800  return $content;
801  }
802 
812  public function prefixRelPath($prefix, $srcVal, $suffix = '')
813  {
814  // Only prefix if it's not an absolute URL or
815  // only a link to a section within the page.
816  if ($srcVal[0] !== '/' && $srcVal[0] !== '#') {
817  $urlParts = parse_url($srcVal);
818  // Only prefix URLs without a scheme
819  if (!$urlParts['scheme']) {
820  $srcVal = $prefix . $srcVal . $suffix;
821  }
822  }
823  return $srcVal;
824  }
825 
835  public function caseShift($str, $caseSensitiveComparison, $cacheKey = '')
836  {
837  if ($caseSensitiveComparison) {
838  return $str;
839  }
840  if (is_array($str)) {
841  // Fetch from runlevel cache
842  if ($cacheKey && isset($this->caseShift_cache[$cacheKey])) {
843  $str = $this->caseShift_cache[$cacheKey];
844  } else {
845  array_walk($str, function (&$value) {
846  $value = strtoupper($value);
847  });
848  if ($cacheKey) {
849  $this->caseShift_cache[$cacheKey] = $str;
850  }
851  }
852  } else {
853  $str = strtoupper($str);
854  }
855  return $str;
856  }
857 
866  public function compileTagAttribs($tagAttrib, $meta = [])
867  {
868  $accu = [];
869  foreach ($tagAttrib as $k => $v) {
870  $attr = $meta[$k]['origTag'] ?: $k;
871  if (strcmp($v, '') || isset($meta[$k]['dashType'])) {
872  $dash = $meta[$k]['dashType'] ?: (MathUtility::canBeInterpretedAsInteger($v) ? '' : '"');
873  $attr .= '=' . $dash . $v . $dash;
874  }
875  $accu[] = $attr;
876  }
877  return implode(' ', $accu);
878  }
879 
888  public function HTMLparserConfig($TSconfig, $keepTags = [])
889  {
890  // Allow tags (base list, merged with incoming array)
891  $alTags = array_flip(GeneralUtility::trimExplode(',', strtolower($TSconfig['allowTags'] ?? ''), true));
892  $keepTags = array_merge($alTags, $keepTags);
893  // Set config properties.
894  if (isset($TSconfig['tags.']) && is_array($TSconfig['tags.'])) {
895  foreach ($TSconfig['tags.'] as $key => $tagC) {
896  if (!is_array($tagC) && $key == strtolower($key)) {
897  if ((string)$tagC === '0') {
898  unset($keepTags[$key]);
899  }
900  if ((string)$tagC === '1' && !isset($keepTags[$key])) {
901  $keepTags[$key] = 1;
902  }
903  }
904  }
905  foreach ($TSconfig['tags.'] as $key => $tagC) {
906  if (is_array($tagC) && $key == strtolower($key)) {
907  $key = substr($key, 0, -1);
908  if (!is_array($keepTags[$key])) {
909  $keepTags[$key] = [];
910  }
911  if (isset($tagC['fixAttrib.']) && is_array($tagC['fixAttrib.'])) {
912  foreach ($tagC['fixAttrib.'] as $atName => $atConfig) {
913  if (is_array($atConfig)) {
914  $atName = substr($atName, 0, -1);
915  if (!is_array($keepTags[$key]['fixAttrib'][$atName])) {
916  $keepTags[$key]['fixAttrib'][$atName] = [];
917  }
918  $keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName], $atConfig);
919  if ((string)$keepTags[$key]['fixAttrib'][$atName]['range'] !== '') {
920  $keepTags[$key]['fixAttrib'][$atName]['range'] = GeneralUtility::trimExplode(',', $keepTags[$key]['fixAttrib'][$atName]['range']);
921  }
922  if ((string)$keepTags[$key]['fixAttrib'][$atName]['list'] !== '') {
923  $keepTags[$key]['fixAttrib'][$atName]['list'] = GeneralUtility::trimExplode(',', $keepTags[$key]['fixAttrib'][$atName]['list']);
924  }
925  }
926  }
927  }
928  unset($tagC['fixAttrib.'], $tagC['fixAttrib']);
929  if (!empty($tagC['rmTagIfNoAttrib']) && empty($tagC['nesting'])) {
930  $tagC['nesting'] = 1;
931  }
932  $keepTags[$key] = array_merge($keepTags[$key], $tagC);
933  }
934  }
935  }
936  // LocalNesting
937  if (!empty($TSconfig['localNesting'])) {
938  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['localNesting']), true);
939  foreach ($lN as $tn) {
940  if (isset($keepTags[$tn])) {
941  if (!is_array($keepTags[$tn])) {
942  $keepTags[$tn] = [];
943  }
944  $keepTags[$tn]['nesting'] = 1;
945  }
946  }
947  }
948  if (!empty($TSconfig['globalNesting'])) {
949  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['globalNesting']), true);
950  foreach ($lN as $tn) {
951  if (isset($keepTags[$tn])) {
952  if (!is_array($keepTags[$tn])) {
953  $keepTags[$tn] = [];
954  }
955  $keepTags[$tn]['nesting'] = 'global';
956  }
957  }
958  }
959  if (!empty($TSconfig['rmTagIfNoAttrib'])) {
960  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['rmTagIfNoAttrib']), true);
961  foreach ($lN as $tn) {
962  if (isset($keepTags[$tn])) {
963  if (!is_array($keepTags[$tn])) {
964  $keepTags[$tn] = [];
965  }
966  $keepTags[$tn]['rmTagIfNoAttrib'] = 1;
967  if (empty($keepTags[$tn]['nesting'])) {
968  $keepTags[$tn]['nesting'] = 1;
969  }
970  }
971  }
972  }
973  if (!empty($TSconfig['noAttrib'])) {
974  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['noAttrib']), true);
975  foreach ($lN as $tn) {
976  if (isset($keepTags[$tn])) {
977  if (!is_array($keepTags[$tn])) {
978  $keepTags[$tn] = [];
979  }
980  $keepTags[$tn]['allowedAttribs'] = 0;
981  }
982  }
983  }
984  if (!empty($TSconfig['removeTags'])) {
985  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['removeTags']), true);
986  foreach ($lN as $tn) {
987  $keepTags[$tn] = [];
988  $keepTags[$tn]['allowedAttribs'] = 0;
989  $keepTags[$tn]['rmTagIfNoAttrib'] = 1;
990  }
991  }
992  // Create additional configuration:
993  $addConfig = [];
994  if (isset($TSconfig['stripEmptyTags'])) {
995  $addConfig['stripEmptyTags'] = $TSconfig['stripEmptyTags'];
996  if (isset($TSconfig['stripEmptyTags.'])) {
997  $addConfig['stripEmptyTags.'] = $TSconfig['stripEmptyTags.'];
998  }
999  }
1000  return [
1001  $keepTags,
1002  '' . ($TSconfig['keepNonMatchedTags'] ?? ''),
1003  (int)($TSconfig['htmlSpecialChars'] ?? 0),
1004  $addConfig
1005  ];
1006  }
1007 
1018  public function stripEmptyTags($content, $tagList = '', $treatNonBreakingSpaceAsEmpty = false, $keepTags = false)
1019  {
1020  if (!empty($tagList)) {
1021  $tagRegEx = implode('|', GeneralUtility::trimExplode(',', $tagList, true));
1022  if ($keepTags) {
1023  $tagRegEx = '(?!' . $tagRegEx . ')[^ >]+';
1024  }
1025  } else {
1026  $tagRegEx = '[^ >]+'; // all characters until you reach a > or space;
1027  }
1028  $count = 1;
1029  $nbspRegex = $treatNonBreakingSpaceAsEmpty ? '|(&nbsp;)' : '';
1030  $finalRegex = sprintf('/<(%s)[^>]*>( %s)*<\/\\1[^>]*>/i', $tagRegEx, $nbspRegex);
1031  while ($count !== 0) {
1032  $content = preg_replace($finalRegex, '', $content, -1, $count) ?? $content;
1033  }
1034  return $content;
1035  }
1036 
1044  protected function stripEmptyTagsIfConfigured($value, $configuration)
1045  {
1046  if (empty($configuration['stripEmptyTags'])) {
1047  return $value;
1048  }
1049 
1050  $tags = null;
1051  $keepTags = false;
1052  if (!empty($configuration['stripEmptyTags.']['keepTags'])) {
1053  $tags = $configuration['stripEmptyTags.']['keepTags'];
1054  $keepTags = true;
1055  } elseif (!empty($configuration['stripEmptyTags.']['tags'])) {
1056  $tags = $configuration['stripEmptyTags.']['tags'];
1057  }
1058 
1059  $treatNonBreakingSpaceAsEmpty = !empty($configuration['stripEmptyTags.']['treatNonBreakingSpaceAsEmpty']);
1060 
1061  return $this->stripEmptyTags($value, $tags, $treatNonBreakingSpaceAsEmpty, $keepTags);
1062  }
1063 }
‪TYPO3\CMS\Core\Html
Definition: DefaultSanitizerBuilder.php:15
‪TYPO3\CMS\Core\Html\HtmlParser\HTMLparserConfig
‪array HTMLparserConfig($TSconfig, $keepTags=[])
Definition: HtmlParser.php:887
‪TYPO3\CMS\Core\Html\HtmlParser\getFirstTagName
‪string getFirstTagName($str, $preserveCase=false)
Definition: HtmlParser.php:246
‪TYPO3\CMS\Core\Html\HtmlParser\split_tag_attributes
‪array split_tag_attributes($tag)
Definition: HtmlParser.php:322
‪TYPO3\CMS\Core\Html\HtmlParser\VOID_ELEMENTS
‪const VOID_ELEMENTS
Definition: HtmlParser.php:33
‪TYPO3\CMS\Core\Html\HtmlParser\caseShift
‪array string caseShift($str, $caseSensitiveComparison, $cacheKey='')
Definition: HtmlParser.php:834
‪TYPO3\CMS\Core\Html\HtmlParser\stripEmptyTags
‪string stripEmptyTags($content, $tagList='', $treatNonBreakingSpaceAsEmpty=false, $keepTags=false)
Definition: HtmlParser.php:1017
‪TYPO3\CMS\Core\Html\HtmlParser\$caseShift_cache
‪array $caseShift_cache
Definition: HtmlParser.php:30
‪TYPO3\CMS\Core\Html\HtmlParser
Definition: HtmlParser.php:27
‪$parser
‪$parser
Definition: annotationChecker.php:108
‪$dir
‪$dir
Definition: validateRstFiles.php:213
‪TYPO3\CMS\Core\Html\HtmlParser\getFirstTag
‪string getFirstTag($str)
Definition: HtmlParser.php:223
‪TYPO3\CMS\Core\Html\HtmlParser\prefixRelPath
‪string prefixRelPath($prefix, $srcVal, $suffix='')
Definition: HtmlParser.php:811
‪TYPO3\CMS\Core\Html\HtmlParser\splitTags
‪array splitTags($tag, $content)
Definition: HtmlParser.php:162
‪TYPO3\CMS\Core\Html\HtmlParser\prefixResourcePath
‪string prefixResourcePath($main_prefix, $content, $alternatives=[], $suffix='')
Definition: HtmlParser.php:716
‪TYPO3\CMS\Core\Html\HtmlParser\get_tag_attributes
‪array get_tag_attributes($tag, $deHSC=false)
Definition: HtmlParser.php:272
‪TYPO3\CMS\Core\Html\HtmlParser\compileTagAttribs
‪string compileTagAttribs($tagAttrib, $meta=[])
Definition: HtmlParser.php:865
‪TYPO3\CMS\Core\Html\HtmlParser\HTMLcleaner
‪string HTMLcleaner($content, $tags=[], $keepAll=0, $hSC=0, $addConfig=[])
Definition: HtmlParser.php:388
‪TYPO3\CMS\Core\Html\HtmlParser\removeFirstAndLastTag
‪string removeFirstAndLastTag($str)
Definition: HtmlParser.php:200
‪TYPO3\CMS\Core\Html\HtmlParser\splitIntoBlockRecursiveProc
‪string splitIntoBlockRecursiveProc($tag, $content, &$procObj, $callBackContent, $callBackTags, $level=0)
Definition: HtmlParser.php:127
‪TYPO3\CMS\Core\Html\HtmlParser\splitIntoBlock
‪array splitIntoBlock($tag, $content, $eliminateExtraEndTags=false)
Definition: HtmlParser.php:52
‪TYPO3\CMS\Core\Utility\GeneralUtility\trimExplode
‪static string[] trimExplode($delim, $string, $removeEmptyValues=false, $limit=0)
Definition: GeneralUtility.php:1059
‪TYPO3\CMS\Core\Utility\MathUtility
Definition: MathUtility.php:22
‪TYPO3\CMS\Frontend\ContentObject\ContentObjectRenderer
Definition: ContentObjectRenderer.php:97
‪TYPO3\CMS\Core\Html\HtmlParser\bidir_htmlspecialchars
‪string bidir_htmlspecialchars($value, $dir)
Definition: HtmlParser.php:693
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:46
‪TYPO3\CMS\Core\Html\HtmlParser\stripEmptyTagsIfConfigured
‪string stripEmptyTagsIfConfigured($value, $configuration)
Definition: HtmlParser.php:1043