‪TYPO3CMS  9.5
HtmlParser.php
Go to the documentation of this file.
1 <?php
2 namespace ‪TYPO3\CMS\Core\Html;
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
20 
26 {
30  protected ‪$caseShift_cache = [];
31 
32  // Void elements that do not have closing tags, as defined by HTML5, except link element
33  const ‪VOID_ELEMENTS = 'area|base|br|col|command|embed|hr|img|input|keygen|meta|param|source|track|wbr';
34 
35  /************************************
36  *
37  * Parsing HTML code
38  *
39  ************************************/
51  public function ‪splitIntoBlock($tag, $content, $eliminateExtraEndTags = false)
52  {
53  $tags = array_unique(GeneralUtility::trimExplode(',', $tag, true));
54  array_walk($tags, function (&$tag) {
55  $tag = preg_quote($tag, '/');
56  });
57  $regexStr = '/\\<\\/?(' . implode('|', $tags) . ')(\\s*\\>|\\s[^\\>]*\\>)/si';
58  $parts = preg_split($regexStr, $content);
59  $newParts = [];
60  $pointer = strlen($parts[0]);
61  $buffer = $parts[0];
62  $nested = 0;
63  reset($parts);
64  // We skip the first element in foreach loop
65  $partsSliced = array_slice($parts, 1, null, true);
66  foreach ($partsSliced as $v) {
67  $isEndTag = substr($content, $pointer, 2) === '</';
68  $tagLen = strcspn(substr($content, $pointer), '>') + 1;
69  // We meet a start-tag:
70  if (!$isEndTag) {
71  // Ground level:
72  if (!$nested) {
73  // Previous buffer stored
74  $newParts[] = $buffer;
75  $buffer = '';
76  }
77  // We are inside now!
78  $nested++;
79  // New buffer set and pointer increased
80  $mbuffer = substr($content, $pointer, strlen($v) + $tagLen);
81  $pointer += strlen($mbuffer);
82  $buffer .= $mbuffer;
83  } else {
84  // If we meet an endtag:
85  // Decrease nested-level
86  $nested--;
87  $eliminated = 0;
88  if ($eliminateExtraEndTags && $nested < 0) {
89  $nested = 0;
90  $eliminated = 1;
91  } else {
92  // In any case, add the endtag to current buffer and increase pointer
93  $buffer .= substr($content, $pointer, $tagLen);
94  }
95  $pointer += $tagLen;
96  // if we're back on ground level, (and not by eliminating tags...
97  if (!$nested && !$eliminated) {
98  $newParts[] = $buffer;
99  $buffer = '';
100  }
101  // New buffer set and pointer increased
102  $mbuffer = substr($content, $pointer, strlen($v));
103  $pointer += strlen($mbuffer);
104  $buffer .= $mbuffer;
105  }
106  }
107  $newParts[] = $buffer;
108  return $newParts;
109  }
110 
123  public function ‪splitIntoBlockRecursiveProc($tag, $content, &$procObj, $callBackContent, $callBackTags, $level = 0)
124  {
125  $parts = $this->‪splitIntoBlock($tag, $content, true);
126  foreach ($parts as $k => $v) {
127  if ($k % 2) {
128  $firstTagName = $this->‪getFirstTagName($v, true);
129  $tagsArray = [];
130  $tagsArray['tag_start'] = $this->‪getFirstTag($v);
131  $tagsArray['tag_end'] = '</' . $firstTagName . '>';
132  $tagsArray['tag_name'] = strtolower($firstTagName);
133  $tagsArray['content'] = $this->‪splitIntoBlockRecursiveProc($tag, $this->‪removeFirstAndLastTag($v), $procObj, $callBackContent, $callBackTags, $level + 1);
134  if ($callBackTags) {
135  $tagsArray = $procObj->{$callBackTags}($tagsArray, $level);
136  }
137  $parts[$k] = $tagsArray['tag_start'] . $tagsArray['content'] . $tagsArray['tag_end'];
138  } else {
139  if ($callBackContent) {
140  $parts[$k] = $procObj->{$callBackContent}($parts[$k], $level);
141  }
142  }
143  }
144  return implode('', $parts);
145  }
146 
157  public function splitTags($tag, $content)
158  {
159  $tags = GeneralUtility::trimExplode(',', $tag, true);
160  array_walk($tags, function (&$tag) {
161  $tag = preg_quote($tag, '/');
162  });
163  $regexStr = '/\\<(' . implode('|', $tags) . ')(\\s[^>]*)?\\/?>/si';
164  $parts = preg_split($regexStr, $content);
165  $pointer = strlen($parts[0]);
166  $newParts = [];
167  $newParts[] = $parts[0];
168  reset($parts);
169  // We skip the first element in foreach loop
170  $partsSliced = array_slice($parts, 1, null, true);
171  foreach ($partsSliced as $v) {
172  $tagLen = strcspn(substr($content, $pointer), '>') + 1;
173  // Set tag:
174  // New buffer set and pointer increased
175  $tag = substr($content, $pointer, $tagLen);
176  $newParts[] = $tag;
177  $pointer += strlen($tag);
178  // Set content:
179  $newParts[] = $v;
180  $pointer += strlen($v);
181  }
182  return $newParts;
183  }
184 
192  public function removeFirstAndLastTag($str)
193  {
194  ‪$parser = SimpleParser::fromString($str);
195  $first = ‪$parser->getFirstNode(SimpleNode::TYPE_ELEMENT);
196  $last = ‪$parser->getLastNode(SimpleNode::TYPE_ELEMENT);
197  if ($first === null || $first === $last) {
198  return '';
199  }
200  $sequence = array_slice(
201  ‪$parser->getNodes(),
202  $first->getIndex() + 1,
203  $last->getIndex() - $first->getIndex() - 1
204  );
205  return implode('', array_map('strval', $sequence));
206  }
207 
215  public function getFirstTag($str)
216  {
217  ‪$parser = SimpleParser::fromString($str);
218  $first = ‪$parser->getFirstNode(SimpleNode::TYPE_ELEMENT);
219  if ($first === null) {
220  return '';
221  }
222  $sequence = array_slice(
223  ‪$parser->getNodes(),
224  0,
225  $first->getIndex() + 1
226  );
227  return implode('', array_map('strval', $sequence));
228  }
229 
238  public function getFirstTagName($str, $preserveCase = false)
239  {
240  ‪$parser = SimpleParser::fromString($str);
241  $elements = ‪$parser->getNodes(SimpleNode::TYPE_ELEMENT);
242  foreach ($elements as $element) {
243  $name = $element->getElementName();
244  if ($name === null) {
245  continue;
246  }
247  return $preserveCase ? $name : strtoupper($name);
248  }
249  return '';
250  }
251 
264  public function get_tag_attributes($tag, $deHSC = false)
265  {
266  list($components, $metaC) = $this->split_tag_attributes($tag);
267  // Attribute name is stored here
268  $name = '';
269  $valuemode = false;
270  $attributes = [];
271  $attributesMeta = [];
272  if (is_array($components)) {
273  foreach ($components as $key => $val) {
274  // Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value
275  if ($val !== '=') {
276  if ($valuemode) {
277  if ($name) {
278  $attributes[$name] = $deHSC ? htmlspecialchars_decode($val) : $val;
279  $attributesMeta[$name]['dashType'] = $metaC[$key];
280  $name = '';
281  }
282  } else {
283  if ($namekey = preg_replace('/[^[:alnum:]_\\:\\-]/', '', $val)) {
284  $name = strtolower($namekey);
285  $attributesMeta[$name] = [];
286  $attributesMeta[$name]['origTag'] = $namekey;
287  $attributes[$name] = '';
288  }
289  }
290  $valuemode = false;
291  } else {
292  $valuemode = true;
293  }
294  }
295  return [$attributes, $attributesMeta];
296  }
297  }
298 
313  public function split_tag_attributes($tag)
314  {
315  $matches = [];
316  if (preg_match('/(\\<[^\\s]+\\s+)?(.*?)\\s*(\\>)?$/s', $tag, $matches) !== 1) {
317  return [[], []];
318  }
319  $tag_tmp = $matches[2];
320  $metaValue = [];
321  $value = [];
322  $matches = [];
323  if (preg_match_all('/("[^"]*"|\'[^\']*\'|[^\\s"\'\\=]+|\\=)/s', $tag_tmp, $matches) > 0) {
324  foreach ($matches[1] as $part) {
325  $firstChar = $part[0];
326  if ($firstChar === '"' || $firstChar === '\'') {
327  $metaValue[] = $firstChar;
328  $value[] = substr($part, 1, -1);
329  } else {
330  $metaValue[] = '';
331  $value[] = $part;
332  }
333  }
334  }
335  return [$value, $metaValue];
336  }
337 
338  /*********************************
339  *
340  * Clean HTML code
341  *
342  *********************************/
379  public function HTMLcleaner($content, $tags = [], $keepAll = 0, $hSC = 0, $addConfig = [])
380  {
381  $newContent = [];
382  $tokArr = explode('<', $content);
383  $newContent[] = $this->bidir_htmlspecialchars(current($tokArr), $hSC);
384  // We skip the first element in foreach loop
385  $tokArrSliced = array_slice($tokArr, 1, null, true);
386  $c = 1;
387  $tagRegister = [];
388  $tagStack = [];
389  $inComment = false;
390  $inCdata = false;
391  $skipTag = false;
392  foreach ($tokArrSliced as $tok) {
393  if ($inComment) {
394  if (($eocPos = strpos($tok, '-->')) === false) {
395  // End of comment is not found in the token. Go further until end of comment is found in other tokens.
396  $newContent[$c++] = '<' . $tok;
397  continue;
398  }
399  // Comment ends in the middle of the token: add comment and proceed with rest of the token
400  $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 3);
401  $tok = substr($tok, $eocPos + 3);
402  $inComment = false;
403  $skipTag = true;
404  } elseif ($inCdata) {
405  if (($eocPos = strpos($tok, '/*]]>*/')) === false) {
406  // End of comment is not found in the token. Go further until end of comment is found in other tokens.
407  $newContent[$c++] = '<' . $tok;
408  continue;
409  }
410  // Comment ends in the middle of the token: add comment and proceed with rest of the token
411  $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10);
412  $tok = substr($tok, $eocPos + 10);
413  $inCdata = false;
414  $skipTag = true;
415  } elseif (strpos($tok, '!--') === 0) {
416  if (($eocPos = strpos($tok, '-->')) === false) {
417  // Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment
418  $newContent[$c++] = '<' . $tok;
419  $inComment = true;
420  continue;
421  }
422  // Start and end of comment are both in the current token. Add comment and proceed with rest of the token
423  $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 3);
424  $tok = substr($tok, $eocPos + 3);
425  $skipTag = true;
426  } elseif (strpos($tok, '![CDATA[*/') === 0) {
427  if (($eocPos = strpos($tok, '/*]]>*/')) === false) {
428  // Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment
429  $newContent[$c++] = '<' . $tok;
430  $inCdata = true;
431  continue;
432  }
433  // Start and end of comment are both in the current token. Add comment and proceed with rest of the token
434  $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10);
435  $tok = substr($tok, $eocPos + 10);
436  $skipTag = true;
437  }
438  $firstChar = $tok[0] ?? null;
439  // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..>
440  if (!$skipTag && preg_match('/[[:alnum:]\\/]/', $firstChar) === 1) {
441  $tagEnd = strpos($tok, '>');
442  // If there is and end-bracket... tagEnd can't be 0 as the first character can't be a >
443  if ($tagEnd) {
444  $endTag = $firstChar === '/' ? 1 : 0;
445  $tagContent = substr($tok, $endTag, $tagEnd - $endTag);
446  $tagParts = preg_split('/\\s+/s', $tagContent, 2);
447  $tagName = strtolower($tagParts[0]);
448  $emptyTag = 0;
449  if (isset($tags[$tagName])) {
450  // If there is processing to do for the tag:
451  if (is_array($tags[$tagName])) {
452  if (preg_match('/^(' . self::VOID_ELEMENTS . ' )$/i', $tagName)) {
453  $emptyTag = 1;
454  }
455  // If NOT an endtag, do attribute processing (added dec. 2003)
456  if (!$endTag) {
457  // Override attributes
458  if (isset($tags[$tagName]['overrideAttribs']) && (string)$tags[$tagName]['overrideAttribs'] !== '') {
459  $tagParts[1] = $tags[$tagName]['overrideAttribs'];
460  }
461  // Allowed tags
462  if (isset($tags[$tagName]['allowedAttribs']) && (string)$tags[$tagName]['allowedAttribs'] !== '') {
463  // No attribs allowed
464  if ((string)$tags[$tagName]['allowedAttribs'] === '0') {
465  $tagParts[1] = '';
466  } elseif (isset($tagParts[1]) && trim($tagParts[1])) {
467  $tagAttrib = $this->get_tag_attributes($tagParts[1]);
468  $tagParts[1] = '';
469  $newTagAttrib = [];
470  $tList = (array)(
471  $tags[$tagName]['_allowedAttribs']
472  ?? GeneralUtility::trimExplode(',', strtolower($tags[$tagName]['allowedAttribs']), true)
473  );
474  foreach ($tList as $allowTag) {
475  if (isset($tagAttrib[0][$allowTag])) {
476  $newTagAttrib[$allowTag] = $tagAttrib[0][$allowTag];
477  }
478  }
479 
480  $tagParts[1] = $this->compileTagAttribs($newTagAttrib, $tagAttrib[1]);
481  }
482  }
483  // Fixed attrib values
484  if (isset($tags[$tagName]['fixAttrib']) && is_array($tags[$tagName]['fixAttrib'])) {
485  $tagAttrib = $this->get_tag_attributes($tagParts[1]);
486  $tagParts[1] = '';
487  foreach ($tags[$tagName]['fixAttrib'] as $attr => $params) {
488  if (isset($params['set']) && $params['set'] !== '') {
489  $tagAttrib[0][$attr] = $params['set'];
490  }
491  if (!empty($params['unset'])) {
492  unset($tagAttrib[0][$attr]);
493  }
494  if (!empty($params['default']) && !isset($tagAttrib[0][$attr])) {
495  $tagAttrib[0][$attr] = $params['default'];
496  }
497  if ($params['always'] || isset($tagAttrib[0][$attr])) {
498  if ($params['trim']) {
499  $tagAttrib[0][$attr] = trim($tagAttrib[0][$attr]);
500  }
501  if ($params['intval']) {
502  $tagAttrib[0][$attr] = (int)$tagAttrib[0][$attr];
503  }
504  if ($params['lower']) {
505  $tagAttrib[0][$attr] = strtolower($tagAttrib[0][$attr]);
506  }
507  if ($params['upper']) {
508  $tagAttrib[0][$attr] = strtoupper($tagAttrib[0][$attr]);
509  }
510  if ($params['range']) {
511  if (isset($params['range'][1])) {
512  $tagAttrib[0][$attr] = MathUtility::forceIntegerInRange($tagAttrib[0][$attr], (int)$params['range'][0], (int)$params['range'][1]);
513  } else {
514  $tagAttrib[0][$attr] = MathUtility::forceIntegerInRange($tagAttrib[0][$attr], (int)$params['range'][0]);
515  }
516  }
517  if (isset($params['list']) && is_array($params['list'])) {
518  // For the class attribute, remove from the attribute value any class not in the list
519  // Classes are case sensitive
520  if ($attr === 'class') {
521  $newClasses = [];
522  $classes = GeneralUtility::trimExplode(' ', $tagAttrib[0][$attr], true);
523  foreach ($classes as $class) {
524  if (in_array($class, $params['list'])) {
525  $newClasses[] = $class;
526  }
527  }
528  if (!empty($newClasses)) {
529  $tagAttrib[0][$attr] = implode(' ', $newClasses);
530  } else {
531  $tagAttrib[0][$attr] = $params['list'][0];
532  }
533  } else {
534  if (!in_array($this->caseShift($tagAttrib[0][$attr], $params['casesensitiveComp']), $this->caseShift($params['list'], $params['casesensitiveComp'], $tagName))) {
535  $tagAttrib[0][$attr] = $params['list'][0];
536  }
537  }
538  }
539  if ($params['removeIfFalse'] && $params['removeIfFalse'] !== 'blank' && !$tagAttrib[0][$attr] || $params['removeIfFalse'] === 'blank' && (string)$tagAttrib[0][$attr] === '') {
540  unset($tagAttrib[0][$attr]);
541  }
542  if ((string)$params['removeIfEquals'] !== '' && $this->caseShift($tagAttrib[0][$attr], $params['casesensitiveComp']) === $this->caseShift($params['removeIfEquals'], $params['casesensitiveComp'])) {
543  unset($tagAttrib[0][$attr]);
544  }
545  if ($params['prefixLocalAnchors']) {
546  if ($tagAttrib[0][$attr][0] === '#') {
547  if ($params['prefixLocalAnchors'] == 2) {
549  $contentObjectRenderer = GeneralUtility::makeInstance(ContentObjectRenderer::class);
550  $prefix = $contentObjectRenderer->getUrlToCurrentLocation();
551  } else {
552  $prefix = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL');
553  }
554  $tagAttrib[0][$attr] = $prefix . $tagAttrib[0][$attr];
555  }
556  }
557  if ($params['prefixRelPathWith']) {
558  $urlParts = parse_url($tagAttrib[0][$attr]);
559  if (!$urlParts['scheme'] && $urlParts['path'][0] !== '/') {
560  // If it is NOT an absolute URL (by http: or starting "/")
561  $tagAttrib[0][$attr] = $params['prefixRelPathWith'] . $tagAttrib[0][$attr];
562  }
563  }
564  if ($params['userFunc']) {
565  if (is_array($params['userFunc.'])) {
566  $params['userFunc.']['attributeValue'] = $tagAttrib[0][$attr];
567  } else {
568  $params['userFunc.'] = $tagAttrib[0][$attr];
569  }
570  $tagAttrib[0][$attr] = GeneralUtility::callUserFunction($params['userFunc'], $params['userFunc.'], $this);
571  }
572  }
573  }
574  $tagParts[1] = $this->compileTagAttribs($tagAttrib[0], $tagAttrib[1]);
575  }
576  } else {
577  // If endTag, remove any possible attributes:
578  $tagParts[1] = '';
579  }
580  // Protecting the tag by converting < and > to &lt; and &gt; ??
581  if (!empty($tags[$tagName]['protect'])) {
582  $lt = '&lt;';
583  $gt = '&gt;';
584  } else {
585  $lt = '<';
586  $gt = '>';
587  }
588  // Remapping tag name?
589  if (!empty($tags[$tagName]['remap'])) {
590  $tagParts[0] = $tags[$tagName]['remap'];
591  }
592  // rmTagIfNoAttrib
593  if ($endTag || empty($tags[$tagName]['rmTagIfNoAttrib']) || trim($tagParts[1] ?? '')) {
594  $setTag = true;
595  // Remove this closing tag if $tagName was among $TSconfig['removeTags']
596  if ($endTag && isset($tags[$tagName]['allowedAttribs']) && $tags[$tagName]['allowedAttribs'] === 0 && $tags[$tagName]['rmTagIfNoAttrib'] === 1) {
597  $setTag = false;
598  }
599  if (isset($tags[$tagName]['nesting'])) {
600  if (!isset($tagRegister[$tagName])) {
601  $tagRegister[$tagName] = [];
602  }
603  if ($endTag) {
604  $correctTag = true;
605  if ($tags[$tagName]['nesting'] === 'global') {
606  $lastEl = end($tagStack);
607  if ($tagName !== $lastEl) {
608  if (in_array($tagName, $tagStack, true)) {
609  while (!empty($tagStack) && $tagName !== $lastEl) {
610  $elPos = end($tagRegister[$lastEl]);
611  unset($newContent[$elPos]);
612  array_pop($tagRegister[$lastEl]);
613  array_pop($tagStack);
614  $lastEl = end($tagStack);
615  }
616  } else {
617  // In this case the
618  $correctTag = false;
619  }
620  }
621  }
622  if (empty($tagRegister[$tagName]) || !$correctTag) {
623  $setTag = false;
624  } else {
625  array_pop($tagRegister[$tagName]);
626  if ($tags[$tagName]['nesting'] === 'global') {
627  array_pop($tagStack);
628  }
629  }
630  } else {
631  $tagRegister[$tagName][] = $c;
632  if ($tags[$tagName]['nesting'] === 'global') {
633  $tagStack[] = $tagName;
634  }
635  }
636  }
637  if ($setTag) {
638  // Setting the tag
639  $newContent[$c++] = $lt . ($endTag ? '/' : '') . trim($tagParts[0] . ' ' . ($tagParts[1] ?? '')) . ($emptyTag ? ' /' : '') . $gt;
640  }
641  }
642  } else {
643  $newContent[$c++] = '<' . ($endTag ? '/' : '') . $tagContent . '>';
644  }
645  } elseif ($keepAll) {
646  // This is if the tag was not defined in the array for processing:
647  if ($keepAll === 'protect') {
648  $lt = '&lt;';
649  $gt = '&gt;';
650  } else {
651  $lt = '<';
652  $gt = '>';
653  }
654  $newContent[$c++] = $lt . ($endTag ? '/' : '') . $tagContent . $gt;
655  }
656  $newContent[$c++] = $this->bidir_htmlspecialchars(substr($tok, $tagEnd + 1), $hSC);
657  } else {
658  $newContent[$c++] = $this->bidir_htmlspecialchars('<' . $tok, $hSC);
659  }
660  } else {
661  $newContent[$c++] = $this->bidir_htmlspecialchars(($skipTag ? '' : '<') . $tok, $hSC);
662  // It was not a tag anyways
663  $skipTag = false;
664  }
665  }
666  // Unsetting tags:
667  foreach ($tagRegister as $tag => $positions) {
668  foreach ($positions as $pKey) {
669  unset($newContent[$pKey]);
670  }
671  }
672  $newContent = implode('', $newContent);
673  $newContent = $this->stripEmptyTagsIfConfigured($newContent, $addConfig);
674  return $newContent;
675  }
676 
684  public function bidir_htmlspecialchars($value, ‪$dir)
685  {
686  switch ((int)‪$dir) {
687  case 1:
688  return htmlspecialchars($value);
689  case 2:
690  return htmlspecialchars($value, ENT_COMPAT, 'UTF-8', false);
691  case -1:
692  return htmlspecialchars_decode($value);
693  default:
694  return $value;
695  }
696  }
697 
707  public function prefixResourcePath($main_prefix, $content, $alternatives = [], $suffix = '')
708  {
709  $parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a,param', $content);
710  foreach ($parts as $k => $v) {
711  if ($k % 2) {
712  $params = $this->get_tag_attributes($v);
713  // Detect tag-ending so that it is re-applied correctly.
714  $tagEnd = substr($v, -2) === '/>' ? ' />' : '>';
715  // The 'name' of the first tag
716  $firstTagName = $this->getFirstTagName($v);
717  $somethingDone = 0;
718  $prefix = $alternatives[strtoupper($firstTagName)] ?? $main_prefix;
719  switch (strtolower($firstTagName)) {
720  case 'td':
721 
722  case 'body':
723 
724  case 'table':
725  $src = $params[0]['background'];
726  if ($src) {
727  $params[0]['background'] = $this->prefixRelPath($prefix, $params[0]['background'], $suffix);
728  $somethingDone = 1;
729  }
730  break;
731  case 'img':
732 
733  case 'input':
734 
735  case 'script':
736 
737  case 'embed':
738  $src = $params[0]['src'];
739  if ($src) {
740  $params[0]['src'] = $this->prefixRelPath($prefix, $params[0]['src'], $suffix);
741  $somethingDone = 1;
742  }
743  break;
744  case 'link':
745 
746  case 'a':
747  $src = $params[0]['href'];
748  if ($src) {
749  $params[0]['href'] = $this->prefixRelPath($prefix, $params[0]['href'], $suffix);
750  $somethingDone = 1;
751  }
752  break;
753  case 'form':
754  $src = $params[0]['action'];
755  if ($src) {
756  $params[0]['action'] = $this->prefixRelPath($prefix, $params[0]['action'], $suffix);
757  $somethingDone = 1;
758  }
759  break;
760  case 'param':
761  $test = $params[0]['name'];
762  if ($test && $test === 'movie') {
763  if ($params[0]['value']) {
764  $params[0]['value'] = $this->prefixRelPath($prefix, $params[0]['value'], $suffix);
765  $somethingDone = 1;
766  }
767  }
768  break;
769  }
770  if ($somethingDone) {
771  $tagParts = preg_split('/\\s+/s', $v, 2);
772  $tagParts[1] = $this->compileTagAttribs($params[0], $params[1]);
773  $parts[$k] = '<' . trim(strtolower($firstTagName) . ' ' . $tagParts[1]) . $tagEnd;
774  }
775  }
776  }
777  $content = implode('', $parts);
778  // Fix <style> section:
779  $prefix = $alternatives['style'] ?? $main_prefix;
780  if ((string)$prefix !== '') {
781  $parts = $this->splitIntoBlock('style', $content);
782  foreach ($parts as $k => &$part) {
783  if ($k % 2) {
784  $part = preg_replace('/(url[[:space:]]*\\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\\))/i', '\\1' . $prefix . '\\2' . $suffix . '\\3', $part);
785  }
786  }
787  unset($part);
788  $content = implode('', $parts);
789  }
790  return $content;
791  }
792 
802  public function prefixRelPath($prefix, $srcVal, $suffix = '')
803  {
804  // Only prefix if it's not an absolute URL or
805  // only a link to a section within the page.
806  if ($srcVal[0] !== '/' && $srcVal[0] !== '#') {
807  $urlParts = parse_url($srcVal);
808  // Only prefix URLs without a scheme
809  if (!$urlParts['scheme']) {
810  $srcVal = $prefix . $srcVal . $suffix;
811  }
812  }
813  return $srcVal;
814  }
815 
825  public function caseShift($str, $caseSensitiveComparison, $cacheKey = '')
826  {
827  if ($caseSensitiveComparison) {
828  return $str;
829  }
830  if (is_array($str)) {
831  // Fetch from runlevel cache
832  if ($cacheKey && isset($this->caseShift_cache[$cacheKey])) {
833  $str = $this->caseShift_cache[$cacheKey];
834  } else {
835  array_walk($str, function (&$value) {
836  $value = strtoupper($value);
837  });
838  if ($cacheKey) {
839  $this->caseShift_cache[$cacheKey] = $str;
840  }
841  }
842  } else {
843  $str = strtoupper($str);
844  }
845  return $str;
846  }
847 
856  public function compileTagAttribs($tagAttrib, $meta = [])
857  {
858  $accu = [];
859  foreach ($tagAttrib as $k => $v) {
860  $attr = $meta[$k]['origTag'] ?: $k;
861  if (strcmp($v, '') || isset($meta[$k]['dashType'])) {
862  $dash = $meta[$k]['dashType'] ?: (MathUtility::canBeInterpretedAsInteger($v) ? '' : '"');
863  $attr .= '=' . $dash . $v . $dash;
864  }
865  $accu[] = $attr;
866  }
867  return implode(' ', $accu);
868  }
869 
878  public function HTMLparserConfig($TSconfig, $keepTags = [])
879  {
880  // Allow tags (base list, merged with incoming array)
881  $alTags = array_flip(GeneralUtility::trimExplode(',', strtolower($TSconfig['allowTags'] ?? ''), true));
882  $keepTags = array_merge($alTags, $keepTags);
883  // Set config properties.
884  if (isset($TSconfig['tags.']) && is_array($TSconfig['tags.'])) {
885  foreach ($TSconfig['tags.'] as $key => $tagC) {
886  if (!is_array($tagC) && $key == strtolower($key)) {
887  if ((string)$tagC === '0') {
888  unset($keepTags[$key]);
889  }
890  if ((string)$tagC === '1' && !isset($keepTags[$key])) {
891  $keepTags[$key] = 1;
892  }
893  }
894  }
895  foreach ($TSconfig['tags.'] as $key => $tagC) {
896  if (is_array($tagC) && $key == strtolower($key)) {
897  $key = substr($key, 0, -1);
898  if (!is_array($keepTags[$key])) {
899  $keepTags[$key] = [];
900  }
901  if (isset($tagC['fixAttrib.']) && is_array($tagC['fixAttrib.'])) {
902  foreach ($tagC['fixAttrib.'] as $atName => $atConfig) {
903  if (is_array($atConfig)) {
904  $atName = substr($atName, 0, -1);
905  if (!is_array($keepTags[$key]['fixAttrib'][$atName])) {
906  $keepTags[$key]['fixAttrib'][$atName] = [];
907  }
908  $keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName], $atConfig);
909  if ((string)$keepTags[$key]['fixAttrib'][$atName]['range'] !== '') {
910  $keepTags[$key]['fixAttrib'][$atName]['range'] = GeneralUtility::trimExplode(',', $keepTags[$key]['fixAttrib'][$atName]['range']);
911  }
912  if ((string)$keepTags[$key]['fixAttrib'][$atName]['list'] !== '') {
913  $keepTags[$key]['fixAttrib'][$atName]['list'] = GeneralUtility::trimExplode(',', $keepTags[$key]['fixAttrib'][$atName]['list']);
914  }
915  }
916  }
917  }
918  unset($tagC['fixAttrib.'], $tagC['fixAttrib']);
919  if (!empty($tagC['rmTagIfNoAttrib']) && empty($tagC['nesting'])) {
920  $tagC['nesting'] = 1;
921  }
922  $keepTags[$key] = array_merge($keepTags[$key], $tagC);
923  }
924  }
925  }
926  // LocalNesting
927  if (!empty($TSconfig['localNesting'])) {
928  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['localNesting']), true);
929  foreach ($lN as $tn) {
930  if (isset($keepTags[$tn])) {
931  if (!is_array($keepTags[$tn])) {
932  $keepTags[$tn] = [];
933  }
934  $keepTags[$tn]['nesting'] = 1;
935  }
936  }
937  }
938  if (!empty($TSconfig['globalNesting'])) {
939  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['globalNesting']), true);
940  foreach ($lN as $tn) {
941  if (isset($keepTags[$tn])) {
942  if (!is_array($keepTags[$tn])) {
943  $keepTags[$tn] = [];
944  }
945  $keepTags[$tn]['nesting'] = 'global';
946  }
947  }
948  }
949  if (!empty($TSconfig['rmTagIfNoAttrib'])) {
950  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['rmTagIfNoAttrib']), true);
951  foreach ($lN as $tn) {
952  if (isset($keepTags[$tn])) {
953  if (!is_array($keepTags[$tn])) {
954  $keepTags[$tn] = [];
955  }
956  $keepTags[$tn]['rmTagIfNoAttrib'] = 1;
957  if (empty($keepTags[$tn]['nesting'])) {
958  $keepTags[$tn]['nesting'] = 1;
959  }
960  }
961  }
962  }
963  if (!empty($TSconfig['noAttrib'])) {
964  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['noAttrib']), true);
965  foreach ($lN as $tn) {
966  if (isset($keepTags[$tn])) {
967  if (!is_array($keepTags[$tn])) {
968  $keepTags[$tn] = [];
969  }
970  $keepTags[$tn]['allowedAttribs'] = 0;
971  }
972  }
973  }
974  if (!empty($TSconfig['removeTags'])) {
975  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['removeTags']), true);
976  foreach ($lN as $tn) {
977  $keepTags[$tn] = [];
978  $keepTags[$tn]['allowedAttribs'] = 0;
979  $keepTags[$tn]['rmTagIfNoAttrib'] = 1;
980  }
981  }
982  // Create additional configuration:
983  $addConfig = [];
984  if (isset($TSconfig['stripEmptyTags'])) {
985  $addConfig['stripEmptyTags'] = $TSconfig['stripEmptyTags'];
986  if (isset($TSconfig['stripEmptyTags.'])) {
987  $addConfig['stripEmptyTags.'] = $TSconfig['stripEmptyTags.'];
988  }
989  }
990  return [
991  $keepTags,
992  '' . ($TSconfig['keepNonMatchedTags'] ?? ''),
993  (int)($TSconfig['htmlSpecialChars'] ?? 0),
994  $addConfig
995  ];
996  }
997 
1008  public function stripEmptyTags($content, $tagList = '', $treatNonBreakingSpaceAsEmpty = false, $keepTags = false)
1009  {
1010  if (!empty($tagList)) {
1011  $tagRegEx = implode('|', GeneralUtility::trimExplode(',', $tagList, true));
1012  if ($keepTags) {
1013  $tagRegEx = '(?!' . $tagRegEx . ')[^ >]+';
1014  }
1015  } else {
1016  $tagRegEx = '[^ >]+'; // all characters until you reach a > or space;
1017  }
1018  $count = 1;
1019  $nbspRegex = $treatNonBreakingSpaceAsEmpty ? '|(&nbsp;)' : '';
1020  $finalRegex = sprintf('/<(%s)[^>]*>( %s)*<\/\\1[^>]*>/i', $tagRegEx, $nbspRegex);
1021  while ($count !== 0) {
1022  $content = preg_replace($finalRegex, '', $content, -1, $count);
1023  }
1024  return $content;
1025  }
1026 
1034  protected function stripEmptyTagsIfConfigured($value, $configuration)
1035  {
1036  if (empty($configuration['stripEmptyTags'])) {
1037  return $value;
1038  }
1039 
1040  $tags = null;
1041  $keepTags = false;
1042  if (!empty($configuration['stripEmptyTags.']['keepTags'])) {
1043  $tags = $configuration['stripEmptyTags.']['keepTags'];
1044  $keepTags = true;
1045  } elseif (!empty($configuration['stripEmptyTags.']['tags'])) {
1046  $tags = $configuration['stripEmptyTags.']['tags'];
1047  }
1048 
1049  $treatNonBreakingSpaceAsEmpty = !empty($configuration['stripEmptyTags.']['treatNonBreakingSpaceAsEmpty']);
1050 
1051  return $this->stripEmptyTags($value, $tags, $treatNonBreakingSpaceAsEmpty, $keepTags);
1052  }
1053 }
‪TYPO3\CMS\Core\Html
Definition: DefaultSanitizerBuilder.php:15
‪TYPO3\CMS\Core\Html\HtmlParser\HTMLparserConfig
‪array HTMLparserConfig($TSconfig, $keepTags=[])
Definition: HtmlParser.php:877
‪TYPO3\CMS\Core\Html\HtmlParser\getFirstTagName
‪string getFirstTagName($str, $preserveCase=false)
Definition: HtmlParser.php:237
‪TYPO3\CMS\Core\Html\HtmlParser\split_tag_attributes
‪array split_tag_attributes($tag)
Definition: HtmlParser.php:312
‪TYPO3\CMS\Core\Html\HtmlParser\VOID_ELEMENTS
‪const VOID_ELEMENTS
Definition: HtmlParser.php:32
‪TYPO3\CMS\Core\Html\HtmlParser\stripEmptyTags
‪string stripEmptyTags($content, $tagList='', $treatNonBreakingSpaceAsEmpty=false, $keepTags=false)
Definition: HtmlParser.php:1007
‪TYPO3\CMS\Core\Html\HtmlParser\$caseShift_cache
‪array $caseShift_cache
Definition: HtmlParser.php:29
‪TYPO3\CMS\Core\Html\HtmlParser
Definition: HtmlParser.php:26
‪TYPO3\CMS\Core\Html\HtmlParser\caseShift
‪string caseShift($str, $caseSensitiveComparison, $cacheKey='')
Definition: HtmlParser.php:824
‪$parser
‪$parser
Definition: annotationChecker.php:100
‪$dir
‪$dir
Definition: validateRstFiles.php:213
‪TYPO3\CMS\Core\Html\HtmlParser\getFirstTag
‪string getFirstTag($str)
Definition: HtmlParser.php:214
‪TYPO3\CMS\Core\Html\HtmlParser\prefixRelPath
‪string prefixRelPath($prefix, $srcVal, $suffix='')
Definition: HtmlParser.php:801
‪TYPO3\CMS\Core\Html\HtmlParser\splitTags
‪array splitTags($tag, $content)
Definition: HtmlParser.php:156
‪TYPO3\CMS\Core\Html\HtmlParser\prefixResourcePath
‪string prefixResourcePath($main_prefix, $content, $alternatives=[], $suffix='')
Definition: HtmlParser.php:706
‪TYPO3\CMS\Core\Html\HtmlParser\get_tag_attributes
‪array get_tag_attributes($tag, $deHSC=false)
Definition: HtmlParser.php:263
‪TYPO3\CMS\Core\Html\HtmlParser\compileTagAttribs
‪string compileTagAttribs($tagAttrib, $meta=[])
Definition: HtmlParser.php:855
‪TYPO3\CMS\Core\Html\HtmlParser\HTMLcleaner
‪string HTMLcleaner($content, $tags=[], $keepAll=0, $hSC=0, $addConfig=[])
Definition: HtmlParser.php:378
‪TYPO3\CMS\Core\Html\HtmlParser\removeFirstAndLastTag
‪string removeFirstAndLastTag($str)
Definition: HtmlParser.php:191
‪TYPO3\CMS\Core\Html\HtmlParser\splitIntoBlockRecursiveProc
‪string splitIntoBlockRecursiveProc($tag, $content, &$procObj, $callBackContent, $callBackTags, $level=0)
Definition: HtmlParser.php:122
‪TYPO3\CMS\Core\Html\HtmlParser\splitIntoBlock
‪array splitIntoBlock($tag, $content, $eliminateExtraEndTags=false)
Definition: HtmlParser.php:50
‪TYPO3\CMS\Core\Utility\MathUtility
Definition: MathUtility.php:21
‪TYPO3\CMS\Frontend\ContentObject\ContentObjectRenderer
Definition: ContentObjectRenderer.php:91
‪TYPO3\CMS\Core\Html\HtmlParser\bidir_htmlspecialchars
‪string bidir_htmlspecialchars($value, $dir)
Definition: HtmlParser.php:683
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:45
‪TYPO3\CMS\Core\Html\HtmlParser\stripEmptyTagsIfConfigured
‪string stripEmptyTagsIfConfigured($value, $configuration)
Definition: HtmlParser.php:1033