‪TYPO3CMS  11.5
RteHtmlParser.php
Go to the documentation of this file.
1 <?php
2 
3 /*
4  * This file is part of the TYPO3 CMS project.
5  *
6  * It is free software; you can redistribute it and/or modify it under
7  * the terms of the GNU General Public License, either version 2
8  * of the License, or any later version.
9  *
10  * For the full copyright and license information, please read the
11  * LICENSE.txt file that was distributed with this source code.
12  *
13  * The TYPO3 project - inspiring people to share!
14  */
15 
16 namespace ‪TYPO3\CMS\Core\Html;
17 
18 use Psr\EventDispatcher\EventDispatcherInterface;
19 use Psr\Log\LoggerAwareInterface;
20 use Psr\Log\LoggerAwareTrait;
27 use TYPO3\HtmlSanitizer\Builder\BuilderInterface;
28 
37 class ‪RteHtmlParser extends ‪HtmlParser implements LoggerAwareInterface
38 {
39  use LoggerAwareTrait;
40 
45  protected ‪$blockElementList = 'DIV,TABLE,BLOCKQUOTE,PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE,FIGURE';
46 
51  protected ‪$defaultAllowedTagsList = 'b,i,u,a,img,br,div,center,pre,figure,figcaption,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span,abbr,acronym,dfn';
52 
58  protected ‪$procOptions = [];
59 
65  protected ‪$TS_transform_db_safecounter = 100;
66 
72  protected ‪$getKeepTags_cache = [];
73 
79  protected ‪$allowedClasses = [];
80 
88  'class',
89  'align',
90  'id',
91  'title',
92  'dir',
93  'lang',
94  'xml:lang',
95  'itemscope',
96  'itemtype',
97  'itemprop',
98  ];
99 
108  'address',
109  'article',
110  'aside',
111  'blockquote',
112  'div',
113  'footer',
114  'figure',
115  'figcaption',
116  'header',
117  'hr',
118  'nav',
119  'section',
120  ];
121 
125  protected ‪$eventDispatcher;
126 
127  public function ‪__construct(EventDispatcherInterface ‪$eventDispatcher)
128  {
129  $this->eventDispatcher = ‪$eventDispatcher;
130  }
131 
138  protected function ‪setProcessingConfiguration(array $processingConfiguration): void
139  {
140  $this->procOptions = $processingConfiguration;
141  $this->getKeepTags_cache = [];
142 
143  if (isset($this->procOptions['allowedClasses.'])) {
144  $this->allowedClasses = (array)$this->procOptions['allowedClasses.'];
145  } else {
146  $this->allowedClasses = ‪GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'] ?? '', true);
147  }
148 
149  // Dynamic configuration of blockElementList
150  if (!empty($this->procOptions['blockElementList'])) {
151  $this->blockElementList = $this->procOptions['blockElementList'];
152  }
153 
154  // Define which attributes are allowed on <p> tags
155  if (isset($this->procOptions['allowAttributes.'])) {
156  $this->allowedAttributesForParagraphTags = $this->procOptions['allowAttributes.'];
157  }
158  // Override tags which are allowed outside of <p> tags
159  if (isset($this->procOptions['allowTagsOutside'])) {
160  if (!isset($this->procOptions['allowTagsOutside.'])) {
161  $this->allowedTagsOutsideOfParagraphs = ‪GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside']), true);
162  } else {
163  $this->allowedTagsOutsideOfParagraphs = (array)$this->procOptions['allowTagsOutside.'];
164  }
165  }
166  }
167 
176  public function ‪transformTextForRichTextEditor(string $value, array $processingConfiguration): string
177  {
178  $this->‪setProcessingConfiguration($processingConfiguration);
179  $modes = $this->‪resolveAppliedTransformationModes('rte');
180  $value = $this->‪streamlineLineBreaksForProcessing($value);
181  // If an entry HTML cleaner was configured, pass the content through the HTMLcleaner
182  $value = $this->‪runHtmlParserIfConfigured($value, 'entryHTMLparser_rte');
183  // Traverse modes
184  foreach ($modes as $cmd) {
185  // Checking for user defined transformation:
186  if (!empty(‪$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd])) {
187  trigger_error(
188  'The hook "t3lib/class.t3lib_parsehtml_proc.php->transformation"' .
189  ' will be removed in TYPO3 v12. ',
190  E_USER_DEPRECATED
191  );
192  $_procObj = GeneralUtility::makeInstance(‪$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]);
193  $_procObj->pObj = $this;
194  $value = $_procObj->transform_rte($value, $this);
195  } else {
196  // ... else use defaults:
197  switch ($cmd) {
198  case 'detectbrokenlinks':
199  $value = $this->‪markBrokenLinks($value);
200  break;
201  case 'css_transform':
202  $value = $this->‪TS_transform_rte($value);
203  break;
204  default:
205  // Do nothing
206  }
207  }
208  }
209  // If an exit HTML cleaner was configured, pass the content through the HTMLcleaner
210  $value = $this->‪runHtmlParserIfConfigured($value, 'exitHTMLparser_rte');
211  // Final clean up of linebreaks
212  $value = $this->‪streamlineLineBreaksAfterProcessing($value);
213  return $value;
214  }
215 
223  public function ‪transformTextForPersistence(string $value, array $processingConfiguration): string
224  {
225  $this->‪setProcessingConfiguration($processingConfiguration);
226  $modes = $this->‪resolveAppliedTransformationModes('db');
227  $value = $this->‪streamlineLineBreaksForProcessing($value);
228  // If an entry HTML cleaner was configured, pass the content through the HTMLcleaner
229  $value = $this->‪runHtmlParserIfConfigured($value, 'entryHTMLparser_db');
230  // Traverse modes
231  foreach ($modes as $cmd) {
232  // Checking for user defined transformation:
233  if (!empty(‪$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd])) {
234  trigger_error(
235  'The hook "t3lib/class.t3lib_parsehtml_proc.php->transformation"' .
236  ' will be removed in TYPO3 v12. ',
237  E_USER_DEPRECATED
238  );
239  $_procObj = GeneralUtility::makeInstance(‪$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]);
240  $_procObj->pObj = $this;
241  $_procObj->transformationKey = $cmd;
242  $value = $_procObj->transform_db($value, $this);
243  } else {
244  // ... else use defaults:
245  switch ($cmd) {
246  case 'detectbrokenlinks':
247  $value = $this->‪removeBrokenLinkMarkers($value);
248  break;
249  case 'ts_links':
250  $value = $this->‪TS_links_db($value);
251  break;
252  case 'css_transform':
253  // Transform empty paragraphs into spacing paragraphs
254  $value = str_replace('<p></p>', '<p>&nbsp;</p>', $value);
255  // Double any trailing spacing paragraph so that it does not get removed by divideIntoLines()
256  $value = preg_replace('/<p>&nbsp;<\/p>$/', '<p>&nbsp;</p><p>&nbsp;</p>', $value) ?? $value;
257  $value = $this->‪TS_transform_db($value);
258  break;
259  default:
260  // Do nothing
261  }
262  }
263  }
264  // process markup with HTML Sanitizer
265  $value = $this->‪htmlSanitize($value, $this->procOptions['HTMLparser_db.'] ?? []);
266  // If an exit HTML cleaner was configured, pass the content through the HTMLcleaner
267  $value = $this->‪runHtmlParserIfConfigured($value, 'exitHTMLparser_db');
268  // Final clean up of linebreaks
269  $value = $this->‪streamlineLineBreaksAfterProcessing($value);
270  return $value;
271  }
272 
279  protected function ‪resolveAppliedTransformationModes(string $direction): array
280  {
281  // Setting modes / transformations to be called
282  if ((string)($this->procOptions['overruleMode'] ?? '') !== '') {
283  $modes = ‪GeneralUtility::trimExplode(',', $this->procOptions['overruleMode']);
284  } else {
285  $modes = [$this->procOptions['mode']];
286  }
287 
288  $modeList = implode(',', $modes);
289 
290  // Replace the shortcut "default" with all custom modes
291  $modeList = str_replace('default', 'detectbrokenlinks,css_transform,ts_links', $modeList);
292 
293  // Make list unique
294  $modes = array_unique(‪GeneralUtility::trimExplode(',', $modeList, true));
295  // Reverse order if direction is "rte"
296  if ($direction === 'rte') {
297  $modes = array_reverse($modes);
298  }
299 
300  return $modes;
301  }
302 
314  protected function ‪runHtmlParserIfConfigured($content, $configurationDirective)
315  {
316  if (!empty($this->procOptions[$configurationDirective])) {
317  [$keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration] = $this->‪HTMLparserConfig($this->procOptions[$configurationDirective . '.']);
318  $content = $this->‪HTMLcleaner($content, $keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration);
319  }
320  return $content;
321  }
322 
323  /************************************
324  *
325  * Specific RTE TRANSFORMATION functions
326  *
327  *************************************/
328 
339  protected function ‪TS_links_db($value)
340  {
341  $blockSplit = $this->‪splitIntoBlock('A', $value);
342  foreach ($blockSplit as $k => $v) {
343  if ($k % 2) {
344  [$tagAttributes] = $this->‪get_tag_attributes($this->‪getFirstTag($v), true);
345 
346  // Anchors would not have an href attribute
347  if (!isset($tagAttributes['href'])) {
348  continue;
349  }
350  $linkService = GeneralUtility::makeInstance(LinkService::class);
351  // Store the link as <a> tag as default by TYPO3, with the link service syntax
352  try {
353  $linkInformation = $linkService->resolve($tagAttributes['href'] ?? '');
354  $tagAttributes['href'] = $linkService->asString($linkInformation);
355  } catch (UnknownLinkHandlerException $e) {
356  $tagAttributes['href'] = $linkInformation['href'] ?? $tagAttributes['href'];
357  }
358 
359  $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($tagAttributes, true) . '>'
360  . $this->‪TS_links_db($this->‪removeFirstAndLastTag($blockSplit[$k])) . '</a>';
361  }
362  }
363  return implode('', $blockSplit);
364  }
365 
374  protected function ‪TS_transform_db($value)
375  {
376  // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
377  $this->TS_transform_db_safecounter--;
378  if ($this->TS_transform_db_safecounter < 0) {
379  return $value;
380  }
381  // Split the content from RTE by the occurrence of these blocks:
382  $blockSplit = $this->‪splitIntoBlock($this->blockElementList, $value);
383 
384  // Avoid superfluous linebreaks by transform_db after ending headListTag
385  while (count($blockSplit) > 0 && trim(end($blockSplit)) === '') {
386  array_pop($blockSplit);
387  }
388 
389  // Traverse the blocks
390  foreach ($blockSplit as $k => $v) {
391  if ($k % 2) {
392  // Inside block:
393  // Init:
394  $tag = $this->‪getFirstTag($v);
395  $tagName = strtolower($this->‪getFirstTagName($v));
396  // Process based on the tag:
397  switch ($tagName) {
398  case 'blockquote':
399  case 'dd':
400  case 'div':
401  case 'header':
402  case 'section':
403  case 'footer':
404  case 'nav':
405  case 'article':
406  case 'aside':
407  $blockSplit[$k] = $tag . $this->‪TS_transform_db($this->‪removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
408  break;
409  case 'pre':
410  break;
411  default:
412  // usually <hx> tags and <table> tags where no other block elements are within the tags
413  // Eliminate true linebreaks inside block element tags
414  $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]);
415  }
416  } else {
417  // NON-block:
418  if (trim($blockSplit[$k]) !== '') {
419  $string = $blockSplit[$k];
420  $string = preg_replace('#<([a-z]+)/>#', '<$1 />', $string);
421  // Remove linebreaks preceding hr tags
422  $string = preg_replace('/[' . LF . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $string) ?? '';
423  // Remove linebreaks following hr tags
424  $string = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . ']+/', '<$1$2/>', $string) ?? '';
425  // Replace other linebreaks with space
426  $string = preg_replace('/[' . LF . ']+/', ' ', $string);
427  // process allowed/removed tags
428  $string = $this->‪HTMLcleaner(
429  (string)$string,
430  $this->‪getKeepTags('db'),
431  $this->procOptions['HTMLparser_db.']['keepNonMatchedTags'] ?? '',
432  (int)($this->procOptions['HTMLparser_db.']['htmlSpecialChars'] ?? 0)
433  );
434  $blockSplit[$k] = (string)$this->‪divideIntoLines($string);
435  } else {
436  unset($blockSplit[$k]);
437  }
438  }
439  }
440  $this->TS_transform_db_safecounter++;
441  return implode(LF, $blockSplit);
442  }
443 
452  protected function ‪TS_transform_rte($value)
453  {
454  // Split the content from database by the occurrence of the block elements
455  $blockSplit = $this->‪splitIntoBlock($this->blockElementList, $value);
456  // Traverse the blocks
457  foreach ($blockSplit as $k => $v) {
458  if ($k % 2) {
459  // Inside one of the blocks:
460  // Init:
461  $tag = $this->‪getFirstTag($v);
462  $tagName = strtolower($this->‪getFirstTagName($v));
463  // Based on tagname, we do transformations:
464  switch ($tagName) {
465  case 'blockquote':
466  case 'dd':
467  case 'div':
468  case 'header':
469  case 'section':
470  case 'footer':
471  case 'nav':
472  case 'article':
473  case 'aside':
474  $blockSplit[$k] = $tag . $this->‪TS_transform_rte($this->‪removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
475  break;
476  }
477  if (!isset($blockSplit[$k + 1])) {
478  $blockSplit[$k + 1] = '';
479  }
480  $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]);
481  } else {
482  // NON-block:
483  $nextFTN = $this->‪getFirstTagName($blockSplit[$k + 1] ?? '');
484  $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1);
485  // If the line is followed by a block or is the last line:
486  if (GeneralUtility::inList($this->blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) {
487  // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1
488  if (!$onlyLineBreaks) {
489  $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]);
490  } else {
491  // If the line contains only linebreaks, remove the leading linebreak
492  $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]);
493  }
494  }
495  // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks
496  if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) {
497  unset($blockSplit[$k]);
498  } else {
499  $blockSplit[$k] = $this->‪setDivTags($blockSplit[$k]);
500  }
501  }
502  }
503  return implode(LF, $blockSplit);
504  }
505 
506  /***************************************************************
507  *
508  * Generic RTE transformation, analysis and helper functions
509  *
510  **************************************************************/
511 
521  protected function ‪HTMLcleaner_db($content)
522  {
523  $keepTags = $this->‪getKeepTags('db');
524  return $this->‪HTMLcleaner($content, $keepTags, false);
525  }
526 
535  protected function ‪getKeepTags($direction = 'rte')
536  {
537  if (!isset($this->getKeepTags_cache[$direction]) || !is_array($this->getKeepTags_cache[$direction])) {
538  // Setting up allowed tags:
539  // Default is to get allowed/denied tags from internal array of processing options:
540  // Construct default list of tags to keep:
541  if (isset($this->procOptions['allowTags.']) && is_array($this->procOptions['allowTags.'])) {
542  $keepTags = implode(',', $this->procOptions['allowTags.']);
543  } else {
544  $keepTags = $this->procOptions['allowTags'] ?? '';
545  }
546  $keepTags = array_flip(‪GeneralUtility::trimExplode(',', $this->defaultAllowedTagsList . ',' . strtolower($keepTags), true));
547  // For tags to deny, remove them from $keepTags array:
548  $denyTags = ‪GeneralUtility::trimExplode(',', $this->procOptions['denyTags'] ?? '', true);
549  foreach ($denyTags as $dKe) {
550  unset($keepTags[$dKe]);
551  }
552  // Based on the direction of content, set further options:
553  switch ($direction) {
554  case 'rte':
555  // Transforming keepTags array so it can be understood by the HTMLcleaner function.
556  // This basically converts the format of the array from TypoScript (having dots) to plain multi-dimensional array.
557  [$keepTags] = $this->‪HTMLparserConfig($this->procOptions['HTMLparser_rte.'] ?? [], $keepTags);
558  break;
559  case 'db':
560  // Setting up span tags if they are allowed:
561  if (isset($keepTags['span'])) {
562  $keepTags['span'] = [
563  'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop',
564  'fixAttrib' => [
565  'class' => [
566  'removeIfFalse' => 1,
567  ],
568  ],
569  'rmTagIfNoAttrib' => 1,
570  ];
571  if (!empty($this->allowedClasses)) {
572  $keepTags['span']['fixAttrib']['class']['list'] = ‪$this->allowedClasses;
573  }
574  }
575  // Setting further options, getting them from the processing options
576  $TSc = $this->procOptions['HTMLparser_db.'] ?? [];
577  if (empty($TSc['globalNesting'])) {
578  $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';
579  }
580  if (empty($TSc['noAttrib'])) {
581  $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
582  }
583  // Transforming the array from TypoScript to regular array:
584  [$keepTags] = $this->‪HTMLparserConfig($TSc, $keepTags);
585  break;
586  }
587  // Caching (internally, in object memory) the result
588  $this->getKeepTags_cache[$direction] = $keepTags;
589  }
590  // Return result:
591  return $this->getKeepTags_cache[$direction];
592  }
593 
606  protected function ‪divideIntoLines($value, $count = 5, $returnArray = false)
607  {
608  // Setting the third param will eliminate false end-tags. Maybe this is a good thing to do...?
609  $paragraphBlocks = $this->‪splitIntoBlock('p', $value, true);
610  // Returns plainly the content if there was no p sections in it
611  if (count($paragraphBlocks) <= 1 || $count <= 0) {
612  return $this->‪sanitizeLineBreaksForContentOnly($value);
613  }
614 
615  // Traverse the splitted sections
616  foreach ($paragraphBlocks as $k => $v) {
617  if ($k % 2) {
618  // Inside a <p> section
619  $v = $this->‪removeFirstAndLastTag($v);
620  // Fetching 'sub-lines' - which will explode any further p nesting recursively
621  $subLines = $this->‪divideIntoLines($v, $count - 1, true);
622  // So, if there happened to be sub-nesting of p, this is written directly as the new content of THIS section. (This would be considered 'an error')
623  if (is_array($subLines)) {
624  $paragraphBlocks[$k] = implode(LF, $subLines);
625  } else {
626  //... but if NO subsection was found, we process it as a TRUE line without erroneous content:
627  $paragraphBlocks[$k] = $this->‪processContentWithinParagraph($subLines, $paragraphBlocks[$k]);
628  }
629  // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
630  // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
631  // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
632  if (trim(strip_tags($paragraphBlocks[$k])) === '&nbsp;' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $paragraphBlocks[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($paragraphBlocks[$k]))) {
633  $paragraphBlocks[$k] = '';
634  }
635  } else {
636  // Outside a paragraph, if there is still something in there, just add a <p> tag
637  // Remove positions which are outside <p> tags and without content
638  $paragraphBlocks[$k] = trim(strip_tags($paragraphBlocks[$k], '<' . implode('><', $this->allowedTagsOutsideOfParagraphs) . '>'));
639  $paragraphBlocks[$k] = $this->‪sanitizeLineBreaksForContentOnly($paragraphBlocks[$k]);
640  if ((string)$paragraphBlocks[$k] === '') {
641  unset($paragraphBlocks[$k]);
642  } else {
643  // add <p> tags around the content
644  $paragraphBlocks[$k] = str_replace(strip_tags($paragraphBlocks[$k]), '<p>' . strip_tags($paragraphBlocks[$k]) . '</p>', $paragraphBlocks[$k]);
645  }
646  }
647  }
648  return $returnArray ? $paragraphBlocks : implode(LF, $paragraphBlocks);
649  }
650 
659  protected function ‪setDivTags($value)
660  {
661  // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
662  $keepTags = $this->‪getKeepTags('rte');
663  // Divide the content into lines
664  $parts = explode(LF, $value);
665  foreach ($parts as $k => $v) {
666  // Processing of line content:
667  // If the line is blank, set it to &nbsp;
668  if (trim($parts[$k]) === '') {
669  $parts[$k] = '&nbsp;';
670  } else {
671  // Clean the line content, keeping unknown tags (as they can be removed in the entryHTMLparser)
672  $parts[$k] = $this->‪HTMLcleaner($parts[$k], $keepTags, 'protect');
673  // convert double-encoded &nbsp; into regular &nbsp; however this could also be reversed via the exitHTMLparser
674  // This was previously an option to disable called "dontConvAmpInNBSP_rte"
675  $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);
676  }
677  $partFirstTagName = strtolower($this->‪getFirstTagName($parts[$k] ?? ''));
678  // Wrapping the line in <p> tags if not already wrapped and does not contain an hr tag and is not allowed outside of paragraphs.
679  if (!in_array($partFirstTagName, $this->allowedTagsOutsideOfParagraphs, true) && !preg_match('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', $partFirstTagName)) {
680  $testStr = strtolower(trim($parts[$k]));
681  if (strpos($testStr, '<div') !== 0 || substr($testStr, -6) !== '</div>') {
682  if (strpos($testStr, '<p') !== 0 || substr($testStr, -4) !== '</p>') {
683  // Only set p-tags if there is not already div or p tags:
684  $parts[$k] = '<p>' . $parts[$k] . '</p>';
685  }
686  }
687  }
688  }
689  // Implode result:
690  return implode(LF, $parts);
691  }
692 
705  protected function ‪processContentWithinParagraph(string $content, string $fullContentWithTag)
706  {
707  // clean up the content
708  $content = $this->‪HTMLcleaner_db($content);
709  // Get the <p> tag, and validate the attributes
710  $fTag = $this->‪getFirstTag($fullContentWithTag);
711  // Check which attributes of the <p> tag to keep attributes
712  if (!empty($this->allowedAttributesForParagraphTags)) {
713  [$tagAttributes] = $this->‪get_tag_attributes($fTag);
714  // Make sure the tag attributes only contain the ones that are defined to be allowed
715  $tagAttributes = array_intersect_key($tagAttributes, array_flip($this->allowedAttributesForParagraphTags));
716 
717  // Only allow classes that are whitelisted in $this->allowedClasses
718  if (isset($tagAttributes['class']) && trim($tagAttributes['class']) !== '' && !empty($this->allowedClasses) && !in_array($tagAttributes['class'], $this->allowedClasses, true)) {
719  $classes = ‪GeneralUtility::trimExplode(' ', $tagAttributes['class'], true);
720  $classes = array_intersect($classes, $this->allowedClasses);
721  if (!empty($classes)) {
722  $tagAttributes['class'] = implode(' ', $classes);
723  } else {
724  unset($tagAttributes['class']);
725  }
726  }
727  } else {
728  $tagAttributes = [];
729  }
730  // Remove any line break
731  $content = str_replace(LF, '', $content);
732  // Compile the surrounding <p> tag
733  $content = '<' . rtrim('p ' . $this->‪compileTagAttribs($tagAttributes)) . '>' . $content . '</p>';
734  return $content;
735  }
736 
743  protected function ‪sanitizeLineBreaksForContentOnly(string $content)
744  {
745  $content = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $content) ?? $content;
746  $content = str_replace(LF . LF, LF, $content);
747  $content = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $content) ?? $content;
748  return $content;
749  }
750 
761  protected function ‪streamlineLineBreaksForProcessing(string $content)
762  {
763  return str_replace(CR, '', $content);
764  }
765 
776  protected function ‪streamlineLineBreaksAfterProcessing(string $content)
777  {
778  // Make sure no \r\n sequences has entered in the meantime
779  $content = $this->‪streamlineLineBreaksForProcessing($content);
780  // ... and then change all \n into \r\n
781  return str_replace(LF, CRLF, $content);
782  }
783 
792  protected function ‪markBrokenLinks(string $content): string
793  {
794  $blocks = $this->‪splitIntoBlock('A', $content);
795  $linkService = GeneralUtility::makeInstance(LinkService::class);
796  foreach ($blocks as $position => $value) {
797  if ($position % 2 === 0) {
798  continue;
799  }
800  [$attributes] = $this->‪get_tag_attributes($this->‪getFirstTag($value), true);
801  if (empty($attributes['href'])) {
802  continue;
803  }
804 
805  try {
806  $hrefInformation = $linkService->resolve($attributes['href']);
807 
808  $brokenLinkAnalysis = new ‪BrokenLinkAnalysisEvent($hrefInformation['type'], $hrefInformation);
809  $this->eventDispatcher->dispatch($brokenLinkAnalysis);
810  if ($brokenLinkAnalysis->isBrokenLink()) {
811  $attributes['data-rte-error'] = $brokenLinkAnalysis->getReason();
812  }
814  // do nothing if user doesn't have access to the file/folder
815  } catch (‪UnknownLinkHandlerException $e) {
816  $attributes['data-rte-error'] = $e->getMessage();
817  }
818 
819  // Always rewrite the block to allow the nested calling even if a page is found
820  $blocks[$position] =
821  '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
822  . $this->‪markBrokenLinks($this->‪removeFirstAndLastTag($blocks[$position]))
823  . '</a>';
824  }
825  return implode('', $blocks);
826  }
827 
835  protected function ‪removeBrokenLinkMarkers(string $content): string
836  {
837  $blocks = $this->‪splitIntoBlock('A', $content);
838  foreach ($blocks as $position => $value) {
839  if ($position % 2 === 0) {
840  continue;
841  }
842  [$attributes] = $this->‪get_tag_attributes($this->‪getFirstTag($value), true);
843  if (empty($attributes['href'])) {
844  continue;
845  }
846  // Always remove the styling again (regardless of the page was found or not)
847  // so the database does not contain ugly stuff
848  unset($attributes['data-rte-error']);
849  if (isset($attributes['style'])) {
850  $attributes['style'] = trim(str_replace('background-color: yellow; border:2px red solid; color: black;', '', $attributes['style']));
851  if (empty($attributes['style'])) {
852  unset($attributes['style']);
853  }
854  }
855  $blocks[$position] =
856  '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
857  . $this->‪removeBrokenLinkMarkers($this->‪removeFirstAndLastTag($blocks[$position]))
858  . '</a>';
859  }
860  return implode('', $blocks);
861  }
862 
863  protected function ‪htmlSanitize(string $content, array $configuration): string
864  {
865  $features = GeneralUtility::makeInstance(Features::class);
866  // either `htmlSanitize = null` or `htmlSanitize = false`
867  // or feature flag `security.backend.htmlSanitizeRte` is disabled
868  if (array_key_exists('htmlSanitize', $configuration) && empty($configuration['htmlSanitize'])
869  || !$features->isFeatureEnabled('security.backend.htmlSanitizeRte')
870  ) {
871  return $content;
872  }
873 
874  $build = $configuration['htmlSanitize.']['build'] ?? 'default';
875  if (class_exists($build) && is_a($build, BuilderInterface::class, true)) {
876  $builder = GeneralUtility::makeInstance($build);
877  } else {
878  $factory = GeneralUtility::makeInstance(SanitizerBuilderFactory::class);
879  $builder = $factory->build($build);
880  }
881  $sanitizer = $builder->build();
882  $initiator = GeneralUtility::makeInstance(SanitizerInitiator::class, get_class($this));
883  return $sanitizer->sanitize($content, $initiator);
884  }
885 }
‪TYPO3\CMS\Core\Html
Definition: DefaultSanitizerBuilder.php:15
‪TYPO3\CMS\Core\Html\RteHtmlParser\transformTextForPersistence
‪string transformTextForPersistence(string $value, array $processingConfiguration)
Definition: RteHtmlParser.php:214
‪TYPO3\CMS\Core\Html\RteHtmlParser\$eventDispatcher
‪EventDispatcherInterface $eventDispatcher
Definition: RteHtmlParser.php:116
‪TYPO3\CMS\Core\Utility\GeneralUtility\trimExplode
‪static list< string > trimExplode($delim, $string, $removeEmptyValues=false, $limit=0)
Definition: GeneralUtility.php:999
‪TYPO3\CMS\Core\Html\HtmlParser\HTMLparserConfig
‪array HTMLparserConfig($TSconfig, $keepTags=[])
Definition: HtmlParser.php:893
‪TYPO3\CMS\Core\Html\RteHtmlParser\removeBrokenLinkMarkers
‪string removeBrokenLinkMarkers(string $content)
Definition: RteHtmlParser.php:826
‪TYPO3\CMS\Core\Html\HtmlParser\getFirstTagName
‪string getFirstTagName($str, $preserveCase=false)
Definition: HtmlParser.php:246
‪TYPO3\CMS\Core\Resource\Exception\InsufficientFolderAccessPermissionsException
Definition: InsufficientFolderAccessPermissionsException.php:23
‪TYPO3\CMS\Core\Html\HtmlParser
Definition: HtmlParser.php:27
‪TYPO3\CMS\Core\Html\RteHtmlParser\$allowedTagsOutsideOfParagraphs
‪array $allowedTagsOutsideOfParagraphs
Definition: RteHtmlParser.php:99
‪TYPO3\CMS\Core\Html\RteHtmlParser\runHtmlParserIfConfigured
‪string runHtmlParserIfConfigured($content, $configurationDirective)
Definition: RteHtmlParser.php:305
‪TYPO3\CMS\Core\Html\RteHtmlParser\sanitizeLineBreaksForContentOnly
‪string sanitizeLineBreaksForContentOnly(string $content)
Definition: RteHtmlParser.php:734
‪TYPO3\CMS\Core\Html\RteHtmlParser\getKeepTags
‪array getKeepTags($direction='rte')
Definition: RteHtmlParser.php:526
‪TYPO3\CMS\Core\Html\RteHtmlParser\htmlSanitize
‪htmlSanitize(string $content, array $configuration)
Definition: RteHtmlParser.php:854
‪TYPO3\CMS\Core\Html\RteHtmlParser\$TS_transform_db_safecounter
‪int $TS_transform_db_safecounter
Definition: RteHtmlParser.php:61
‪TYPO3\CMS\Core\Html\RteHtmlParser\divideIntoLines
‪string array divideIntoLines($value, $count=5, $returnArray=false)
Definition: RteHtmlParser.php:597
‪TYPO3\CMS\Core\Html\HtmlParser\getFirstTag
‪string getFirstTag($str)
Definition: HtmlParser.php:223
‪TYPO3\CMS\Core\Html\RteHtmlParser\$getKeepTags_cache
‪array $getKeepTags_cache
Definition: RteHtmlParser.php:67
‪TYPO3\CMS\Core\Html\RteHtmlParser\setDivTags
‪string setDivTags($value)
Definition: RteHtmlParser.php:650
‪TYPO3\CMS\Core\Html\RteHtmlParser\processContentWithinParagraph
‪string processContentWithinParagraph(string $content, string $fullContentWithTag)
Definition: RteHtmlParser.php:696
‪TYPO3\CMS\Core\Html\HtmlParser\get_tag_attributes
‪array get_tag_attributes($tag, $deHSC=false)
Definition: HtmlParser.php:272
‪TYPO3\CMS\Core\Html\RteHtmlParser\resolveAppliedTransformationModes
‪array resolveAppliedTransformationModes(string $direction)
Definition: RteHtmlParser.php:270
‪TYPO3\CMS\Core\Html\RteHtmlParser\streamlineLineBreaksAfterProcessing
‪string streamlineLineBreaksAfterProcessing(string $content)
Definition: RteHtmlParser.php:767
‪TYPO3\CMS\Core\Html\HtmlParser\compileTagAttribs
‪string compileTagAttribs($tagAttrib, $meta=[])
Definition: HtmlParser.php:871
‪TYPO3\CMS\Core\Configuration\Features
Definition: Features.php:56
‪TYPO3\CMS\Core\Html\RteHtmlParser
Definition: RteHtmlParser.php:38
‪TYPO3\CMS\Core\Html\HtmlParser\HTMLcleaner
‪string HTMLcleaner($content, $tags=[], $keepAll=0, $hSC=0, $addConfig=[])
Definition: HtmlParser.php:388
‪TYPO3\CMS\Core\Html\HtmlParser\removeFirstAndLastTag
‪string removeFirstAndLastTag($str)
Definition: HtmlParser.php:200
‪TYPO3\CMS\Core\Html\RteHtmlParser\markBrokenLinks
‪string markBrokenLinks(string $content)
Definition: RteHtmlParser.php:783
‪TYPO3\CMS\Core\Html\RteHtmlParser\HTMLcleaner_db
‪string HTMLcleaner_db($content)
Definition: RteHtmlParser.php:512
‪TYPO3\CMS\Core\Html\RteHtmlParser\$blockElementList
‪string $blockElementList
Definition: RteHtmlParser.php:44
‪TYPO3\CMS\Core\Html\HtmlParser\splitIntoBlock
‪array splitIntoBlock($tag, $content, $eliminateExtraEndTags=false)
Definition: HtmlParser.php:52
‪TYPO3\CMS\Core\Html\RteHtmlParser\streamlineLineBreaksForProcessing
‪string streamlineLineBreaksForProcessing(string $content)
Definition: RteHtmlParser.php:752
‪TYPO3\CMS\Core\Html\RteHtmlParser\TS_transform_db
‪string TS_transform_db($value)
Definition: RteHtmlParser.php:365
‪TYPO3\CMS\Core\Html\RteHtmlParser\transformTextForRichTextEditor
‪string transformTextForRichTextEditor(string $value, array $processingConfiguration)
Definition: RteHtmlParser.php:167
‪$GLOBALS
‪$GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['adminpanel']['modules']
Definition: ext_localconf.php:25
‪TYPO3\CMS\Core\Html\RteHtmlParser\setProcessingConfiguration
‪setProcessingConfiguration(array $processingConfiguration)
Definition: RteHtmlParser.php:129
‪TYPO3\CMS\Core\Html\RteHtmlParser\$allowedAttributesForParagraphTags
‪array $allowedAttributesForParagraphTags
Definition: RteHtmlParser.php:80
‪TYPO3\CMS\Core\Html\RteHtmlParser\$defaultAllowedTagsList
‪string $defaultAllowedTagsList
Definition: RteHtmlParser.php:49
‪TYPO3\CMS\Core\Html\RteHtmlParser\$allowedClasses
‪array $allowedClasses
Definition: RteHtmlParser.php:73
‪TYPO3\CMS\Core\Html\RteHtmlParser\$procOptions
‪array $procOptions
Definition: RteHtmlParser.php:55
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:50
‪TYPO3\CMS\Core\Html\RteHtmlParser\TS_links_db
‪string TS_links_db($value)
Definition: RteHtmlParser.php:330
‪TYPO3\CMS\Core\Html\RteHtmlParser\TS_transform_rte
‪string TS_transform_rte($value)
Definition: RteHtmlParser.php:443
‪TYPO3\CMS\Core\Html\RteHtmlParser\__construct
‪__construct(EventDispatcherInterface $eventDispatcher)
Definition: RteHtmlParser.php:118