‪TYPO3CMS  10.4
RteHtmlParser.php
Go to the documentation of this file.
1 <?php
2 
3 /*
4  * This file is part of the TYPO3 CMS project.
5  *
6  * It is free software; you can redistribute it and/or modify it under
7  * the terms of the GNU General Public License, either version 2
8  * of the License, or any later version.
9  *
10  * For the full copyright and license information, please read the
11  * LICENSE.txt file that was distributed with this source code.
12  *
13  * The TYPO3 project - inspiring people to share!
14  */
15 
16 namespace ‪TYPO3\CMS\Core\Html;
17 
18 use Psr\EventDispatcher\EventDispatcherInterface;
19 use Psr\Log\LoggerAwareInterface;
20 use Psr\Log\LoggerAwareTrait;
27 use TYPO3\HtmlSanitizer\Builder\BuilderInterface;
28 
37 class ‪RteHtmlParser extends ‪HtmlParser implements LoggerAwareInterface
38 {
39  use LoggerAwareTrait;
40 
45  protected ‪$blockElementList = 'DIV,TABLE,BLOCKQUOTE,PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE,FIGURE';
46 
51  protected ‪$defaultAllowedTagsList = 'b,i,u,a,img,br,div,center,pre,figure,figcaption,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span,abbr,acronym,dfn';
52 
58  protected ‪$procOptions = [];
59 
65  protected ‪$TS_transform_db_safecounter = 100;
66 
72  protected ‪$getKeepTags_cache = [];
73 
79  protected ‪$allowedClasses = [];
80 
88  'class',
89  'align',
90  'id',
91  'title',
92  'dir',
93  'lang',
94  'xml:lang',
95  'itemscope',
96  'itemtype',
97  'itemprop'
98  ];
99 
108  'address',
109  'article',
110  'aside',
111  'blockquote',
112  'div',
113  'footer',
114  'figure',
115  'figcaption',
116  'header',
117  'hr',
118  'nav',
119  'section'
120  ];
121 
125  protected ‪$eventDispatcher;
126 
127  public function ‪__construct(EventDispatcherInterface ‪$eventDispatcher)
128  {
129  $this->eventDispatcher = ‪$eventDispatcher;
130  }
131 
139  public function ‪init($elRef = '', $recPid = 0)
140  {
141  trigger_error('RteHtmlParser->init() is not needed anymore for RTE transformation, and will be removed in TYPO3 v11.0.', E_USER_DEPRECATED);
142  }
143 
150  protected function ‪setProcessingConfiguration(array $processingConfiguration): void
151  {
152  $this->procOptions = $processingConfiguration;
153  if (isset($this->procOptions['allowedClasses.'])) {
154  $this->allowedClasses = (array)$this->procOptions['allowedClasses.'];
155  } else {
156  $this->allowedClasses = ‪GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'] ?? '', true);
157  }
158 
159  // Dynamic configuration of blockElementList
160  if (!empty($this->procOptions['blockElementList'])) {
161  $this->blockElementList = $this->procOptions['blockElementList'];
162  }
163 
164  // Define which attributes are allowed on <p> tags
165  if (isset($this->procOptions['allowAttributes.'])) {
166  $this->allowedAttributesForParagraphTags = $this->procOptions['allowAttributes.'];
167  }
168  // Override tags which are allowed outside of <p> tags
169  if (isset($this->procOptions['allowTagsOutside'])) {
170  if (!isset($this->procOptions['allowTagsOutside.'])) {
171  $this->allowedTagsOutsideOfParagraphs = ‪GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside']), true);
172  } else {
173  $this->allowedTagsOutsideOfParagraphs = (array)$this->procOptions['allowTagsOutside.'];
174  }
175  }
176  }
177 
186  public function ‪transformTextForRichTextEditor(string $value, array $processingConfiguration): string
187  {
188  $this->‪setProcessingConfiguration($processingConfiguration);
189  $modes = $this->‪resolveAppliedTransformationModes('rte');
190  $value = $this->‪streamlineLineBreaksForProcessing($value);
191  // If an entry HTML cleaner was configured, pass the content through the HTMLcleaner
192  $value = $this->‪runHtmlParserIfConfigured($value, 'entryHTMLparser_rte');
193  // Traverse modes
194  foreach ($modes as $cmd) {
195  // Checking for user defined transformation:
196  if (!empty(‪$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd])) {
197  $_procObj = GeneralUtility::makeInstance(‪$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]);
198  $_procObj->pObj = $this;
199  $value = $_procObj->transform_rte($value, $this);
200  } else {
201  // ... else use defaults:
202  switch ($cmd) {
203  case 'detectbrokenlinks':
204  $value = $this->‪markBrokenLinks($value);
205  break;
206  case 'css_transform':
207  $value = $this->‪TS_transform_rte($value);
208  break;
209  default:
210  // Do nothing
211  }
212  }
213  }
214  // If an exit HTML cleaner was configured, pass the content through the HTMLcleaner
215  $value = $this->‪runHtmlParserIfConfigured($value, 'exitHTMLparser_rte');
216  // Final clean up of linebreaks
217  $value = $this->‪streamlineLineBreaksAfterProcessing($value);
218  return $value;
219  }
220 
228  public function ‪transformTextForPersistence(string $value, array $processingConfiguration): string
229  {
230  $this->‪setProcessingConfiguration($processingConfiguration);
231  $modes = $this->‪resolveAppliedTransformationModes('db');
232  $value = $this->‪streamlineLineBreaksForProcessing($value);
233  // If an entry HTML cleaner was configured, pass the content through the HTMLcleaner
234  $value = $this->‪runHtmlParserIfConfigured($value, 'entryHTMLparser_db');
235  // Traverse modes
236  foreach ($modes as $cmd) {
237  // Checking for user defined transformation:
238  if (!empty(‪$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd])) {
239  $_procObj = GeneralUtility::makeInstance(‪$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]);
240  $_procObj->pObj = $this;
241  $_procObj->transformationKey = $cmd;
242  $value = $_procObj->transform_db($value, $this);
243  } else {
244  // ... else use defaults:
245  switch ($cmd) {
246  case 'detectbrokenlinks':
247  $value = $this->‪removeBrokenLinkMarkers($value);
248  break;
249  case 'ts_links':
250  $value = $this->‪TS_links_db($value);
251  break;
252  case 'css_transform':
253  // Transform empty paragraphs into spacing paragraphs
254  $value = str_replace('<p></p>', '<p>&nbsp;</p>', $value);
255  // Double any trailing spacing paragraph so that it does not get removed by divideIntoLines()
256  $value = preg_replace('/<p>&nbsp;<\/p>$/', '<p>&nbsp;</p><p>&nbsp;</p>', $value) ?? $value;
257  $value = $this->‪TS_transform_db($value);
258  break;
259  default:
260  // Do nothing
261  }
262  }
263  }
264  // process markup with HTML Sanitizer
265  $value = $this->‪htmlSanitize($value, $this->procOptions['HTMLparser_db.'] ?? []);
266  // If an exit HTML cleaner was configured, pass the content through the HTMLcleaner
267  $value = $this->‪runHtmlParserIfConfigured($value, 'exitHTMLparser_db');
268  // Final clean up of linebreaks
269  $value = $this->‪streamlineLineBreaksAfterProcessing($value);
270  return $value;
271  }
272 
273  /**********************************************
274  *
275  * Main function
276  *
277  **********************************************/
294  public function ‪RTE_transform($value, $_ = null, $direction = 'rte', $thisConfig = [])
295  {
296  trigger_error('RteHtmlParser->RTE_transform() will be removed in TYPO3 v11.0. Use the transformTextFor* methods in the same class instead', E_USER_DEPRECATED);
297  if ($direction === 'rte') {
298  return $this->‪transformTextForRichTextEditor($value, $thisConfig['proc.'] ?? []);
299  }
300  if ($direction === 'db') {
301  return $this->‪transformTextForPersistence($value, $thisConfig['proc.'] ?? []);
302  }
303  return $value;
304  }
305 
312  protected function ‪resolveAppliedTransformationModes(string $direction): array
313  {
314  // Setting modes / transformations to be called
315  if ((string)$this->procOptions['overruleMode'] !== '') {
316  $modes = ‪GeneralUtility::trimExplode(',', $this->procOptions['overruleMode']);
317  } else {
318  $modes = [$this->procOptions['mode']];
319  }
320 
321  $modeList = implode(',', $modes);
322 
323  // Replace the shortcut "default" with all custom modes
324  $modeList = str_replace('default', 'detectbrokenlinks,css_transform,ts_links', $modeList);
325 
326  // Make list unique
327  $modes = array_unique(‪GeneralUtility::trimExplode(',', $modeList, true));
328  // Reverse order if direction is "rte"
329  if ($direction === 'rte') {
330  $modes = array_reverse($modes);
331  }
332 
333  return $modes;
334  }
335 
347  protected function ‪runHtmlParserIfConfigured($content, $configurationDirective)
348  {
349  if (!empty($this->procOptions[$configurationDirective])) {
350  [$keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration] = $this->‪HTMLparserConfig($this->procOptions[$configurationDirective . '.']);
351  $content = $this->‪HTMLcleaner($content, $keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration);
352  }
353  return $content;
354  }
355 
356  /************************************
357  *
358  * Specific RTE TRANSFORMATION functions
359  *
360  *************************************/
361 
372  protected function ‪TS_links_db($value)
373  {
374  $blockSplit = $this->‪splitIntoBlock('A', $value);
375  foreach ($blockSplit as $k => $v) {
376  if ($k % 2) {
377  [$tagAttributes] = $this->‪get_tag_attributes($this->‪getFirstTag($v), true);
378 
379  // Anchors would not have an href attribute
380  if (!isset($tagAttributes['href'])) {
381  continue;
382  }
383  $linkService = GeneralUtility::makeInstance(LinkService::class);
384  // Store the link as <a> tag as default by TYPO3, with the link service syntax
385  try {
386  $linkInformation = $linkService->resolve($tagAttributes['href'] ?? '');
387  $tagAttributes['href'] = $linkService->asString($linkInformation);
388  } catch (UnknownLinkHandlerException $e) {
389  $tagAttributes['href'] = $linkInformation['href'] ?? $tagAttributes['href'];
390  }
391 
392  $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($tagAttributes, true) . '>'
393  . $this->‪TS_links_db($this->‪removeFirstAndLastTag($blockSplit[$k])) . '</a>';
394  }
395  }
396  return implode('', $blockSplit);
397  }
398 
407  protected function ‪TS_transform_db($value)
408  {
409  // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
410  $this->TS_transform_db_safecounter--;
411  if ($this->TS_transform_db_safecounter < 0) {
412  return $value;
413  }
414  // Split the content from RTE by the occurrence of these blocks:
415  $blockSplit = $this->‪splitIntoBlock($this->blockElementList, $value);
416 
417  // Avoid superfluous linebreaks by transform_db after ending headListTag
418  while (count($blockSplit) > 0 && trim(end($blockSplit)) === '') {
419  array_pop($blockSplit);
420  }
421 
422  // Traverse the blocks
423  foreach ($blockSplit as $k => $v) {
424  if ($k % 2) {
425  // Inside block:
426  // Init:
427  $tag = $this->‪getFirstTag($v);
428  $tagName = strtolower($this->‪getFirstTagName($v));
429  // Process based on the tag:
430  switch ($tagName) {
431  case 'blockquote':
432  case 'dd':
433  case 'div':
434  case 'header':
435  case 'section':
436  case 'footer':
437  case 'nav':
438  case 'article':
439  case 'aside':
440  $blockSplit[$k] = $tag . $this->‪TS_transform_db($this->‪removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
441  break;
442  case 'pre':
443  break;
444  default:
445  // usually <hx> tags and <table> tags where no other block elements are within the tags
446  // Eliminate true linebreaks inside block element tags
447  $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]);
448  }
449  } else {
450  // NON-block:
451  if (trim($blockSplit[$k]) !== '') {
452  $blockSplit[$k] = preg_replace('#<([a-z]+)/>#', '<$1 />', $blockSplit[$k]);
453  // Remove linebreaks preceding hr tags
454  $blockSplit[$k] = preg_replace('/[' . LF . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $blockSplit[$k]);
455  // Remove linebreaks following hr tags
456  $blockSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . ']+/', '<$1$2/>', $blockSplit[$k]);
457  // Replace other linebreaks with space
458  $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]);
459  // process allowed/removed tags
460  $blockSplit[$k] = $this->‪HTMLcleaner(
461  (string)$blockSplit[$k],
462  $this->‪getKeepTags('db'),
463  $this->procOptions['HTMLparser_db.']['keepNonMatchedTags'] ?? '',
464  (int)($this->procOptions['HTMLparser_db.']['htmlSpecialChars'] ?? 0)
465  );
466  $blockSplit[$k] = $this->‪divideIntoLines($blockSplit[$k]);
467  } else {
468  unset($blockSplit[$k]);
469  }
470  }
471  }
472  $this->TS_transform_db_safecounter++;
473  return implode(LF, $blockSplit);
474  }
475 
484  protected function ‪TS_transform_rte($value)
485  {
486  // Split the content from database by the occurrence of the block elements
487  $blockSplit = $this->‪splitIntoBlock($this->blockElementList, $value);
488  // Traverse the blocks
489  foreach ($blockSplit as $k => $v) {
490  if ($k % 2) {
491  // Inside one of the blocks:
492  // Init:
493  $tag = $this->‪getFirstTag($v);
494  $tagName = strtolower($this->‪getFirstTagName($v));
495  // Based on tagname, we do transformations:
496  switch ($tagName) {
497  case 'blockquote':
498  case 'dd':
499  case 'div':
500  case 'header':
501  case 'section':
502  case 'footer':
503  case 'nav':
504  case 'article':
505  case 'aside':
506  $blockSplit[$k] = $tag . $this->‪TS_transform_rte($this->‪removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
507  break;
508  }
509  $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]);
510  } else {
511  // NON-block:
512  $nextFTN = $this->‪getFirstTagName($blockSplit[$k + 1] ?? '');
513  $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1);
514  // If the line is followed by a block or is the last line:
515  if (GeneralUtility::inList($this->blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) {
516  // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1
517  if (!$onlyLineBreaks) {
518  $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]);
519  } else {
520  // If the line contains only linebreaks, remove the leading linebreak
521  $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]);
522  }
523  }
524  // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks
525  if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) {
526  unset($blockSplit[$k]);
527  } else {
528  $blockSplit[$k] = $this->‪setDivTags($blockSplit[$k]);
529  }
530  }
531  }
532  return implode(LF, $blockSplit);
533  }
534 
535  /***************************************************************
536  *
537  * Generic RTE transformation, analysis and helper functions
538  *
539  **************************************************************/
540 
550  protected function ‪HTMLcleaner_db($content)
551  {
552  $keepTags = $this->‪getKeepTags('db');
553  return $this->‪HTMLcleaner($content, $keepTags, false);
554  }
555 
564  protected function ‪getKeepTags($direction = 'rte')
565  {
566  if (!isset($this->getKeepTags_cache[$direction]) || !is_array($this->getKeepTags_cache[$direction])) {
567  // Setting up allowed tags:
568  // Default is to get allowed/denied tags from internal array of processing options:
569  // Construct default list of tags to keep:
570  if (isset($this->procOptions['allowTags.']) && is_array($this->procOptions['allowTags.'])) {
571  $keepTags = implode(',', $this->procOptions['allowTags.']);
572  } else {
573  $keepTags = $this->procOptions['allowTags'] ?? '';
574  }
575  $keepTags = array_flip(‪GeneralUtility::trimExplode(',', $this->defaultAllowedTagsList . ',' . strtolower($keepTags), true));
576  // For tags to deny, remove them from $keepTags array:
577  $denyTags = ‪GeneralUtility::trimExplode(',', $this->procOptions['denyTags'] ?? '', true);
578  foreach ($denyTags as $dKe) {
579  unset($keepTags[$dKe]);
580  }
581  // Based on the direction of content, set further options:
582  switch ($direction) {
583  case 'rte':
584  // Transforming keepTags array so it can be understood by the HTMLcleaner function.
585  // This basically converts the format of the array from TypoScript (having dots) to plain multi-dimensional array.
586  [$keepTags] = $this->‪HTMLparserConfig($this->procOptions['HTMLparser_rte.'] ?? [], $keepTags);
587  break;
588  case 'db':
589  // Setting up span tags if they are allowed:
590  if (isset($keepTags['span'])) {
591  $keepTags['span'] = [
592  'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop',
593  'fixAttrib' => [
594  'class' => [
595  'removeIfFalse' => 1
596  ]
597  ],
598  'rmTagIfNoAttrib' => 1
599  ];
600  if (!empty($this->allowedClasses)) {
601  $keepTags['span']['fixAttrib']['class']['list'] = ‪$this->allowedClasses;
602  }
603  }
604  // Setting further options, getting them from the processing options
605  $TSc = $this->procOptions['HTMLparser_db.'] ?? [];
606  if (empty($TSc['globalNesting'])) {
607  $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';
608  }
609  if (empty($TSc['noAttrib'])) {
610  $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
611  }
612  // Transforming the array from TypoScript to regular array:
613  [$keepTags] = $this->‪HTMLparserConfig($TSc, $keepTags);
614  break;
615  }
616  // Caching (internally, in object memory) the result
617  $this->getKeepTags_cache[$direction] = $keepTags;
618  }
619  // Return result:
620  return $this->getKeepTags_cache[$direction];
621  }
622 
635  protected function ‪divideIntoLines($value, $count = 5, $returnArray = false)
636  {
637  // Setting the third param will eliminate false end-tags. Maybe this is a good thing to do...?
638  $paragraphBlocks = $this->‪splitIntoBlock('p', $value, true);
639  // Returns plainly the content if there was no p sections in it
640  if (count($paragraphBlocks) <= 1 || $count <= 0) {
641  return $this->‪sanitizeLineBreaksForContentOnly($value);
642  }
643 
644  // Traverse the splitted sections
645  foreach ($paragraphBlocks as $k => $v) {
646  if ($k % 2) {
647  // Inside a <p> section
648  $v = $this->‪removeFirstAndLastTag($v);
649  // Fetching 'sub-lines' - which will explode any further p nesting recursively
650  $subLines = $this->‪divideIntoLines($v, $count - 1, true);
651  // So, if there happened to be sub-nesting of p, this is written directly as the new content of THIS section. (This would be considered 'an error')
652  if (is_array($subLines)) {
653  $paragraphBlocks[$k] = implode(LF, $subLines);
654  } else {
655  //... but if NO subsection was found, we process it as a TRUE line without erroneous content:
656  $paragraphBlocks[$k] = $this->‪processContentWithinParagraph($subLines, $paragraphBlocks[$k]);
657  }
658  // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
659  // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
660  // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
661  if (trim(strip_tags($paragraphBlocks[$k])) === '&nbsp;' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $paragraphBlocks[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($paragraphBlocks[$k]))) {
662  $paragraphBlocks[$k] = '';
663  }
664  } else {
665  // Outside a paragraph, if there is still something in there, just add a <p> tag
666  // Remove positions which are outside <p> tags and without content
667  $paragraphBlocks[$k] = trim(strip_tags($paragraphBlocks[$k], '<' . implode('><', $this->allowedTagsOutsideOfParagraphs) . '>'));
668  $paragraphBlocks[$k] = $this->‪sanitizeLineBreaksForContentOnly($paragraphBlocks[$k]);
669  if ((string)$paragraphBlocks[$k] === '') {
670  unset($paragraphBlocks[$k]);
671  } else {
672  // add <p> tags around the content
673  $paragraphBlocks[$k] = str_replace(strip_tags($paragraphBlocks[$k]), '<p>' . strip_tags($paragraphBlocks[$k]) . '</p>', $paragraphBlocks[$k]);
674  }
675  }
676  }
677  return $returnArray ? $paragraphBlocks : implode(LF, $paragraphBlocks);
678  }
679 
688  protected function ‪setDivTags($value)
689  {
690  // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
691  $keepTags = $this->‪getKeepTags('rte');
692  // Divide the content into lines
693  $parts = explode(LF, $value);
694  foreach ($parts as $k => $v) {
695  // Processing of line content:
696  // If the line is blank, set it to &nbsp;
697  if (trim($parts[$k]) === '') {
698  $parts[$k] = '&nbsp;';
699  } else {
700  // Clean the line content, keeping unknown tags (as they can be removed in the entryHTMLparser)
701  $parts[$k] = $this->‪HTMLcleaner($parts[$k], $keepTags, 'protect');
702  // convert double-encoded &nbsp; into regular &nbsp; however this could also be reversed via the exitHTMLparser
703  // This was previously an option to disable called "dontConvAmpInNBSP_rte"
704  $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);
705  }
706  // Wrapping the line in <p> tags if not already wrapped and does not contain an hr tag
707  if (!preg_match('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', $parts[$k])) {
708  $testStr = strtolower(trim($parts[$k]));
709  if (strpos($testStr, '<div') !== 0 || substr($testStr, -6) !== '</div>') {
710  if (strpos($testStr, '<p') !== 0 || substr($testStr, -4) !== '</p>') {
711  // Only set p-tags if there is not already div or p tags:
712  $parts[$k] = '<p>' . $parts[$k] . '</p>';
713  }
714  }
715  }
716  }
717  // Implode result:
718  return implode(LF, $parts);
719  }
720 
733  protected function ‪processContentWithinParagraph(string $content, string $fullContentWithTag)
734  {
735  // clean up the content
736  $content = $this->‪HTMLcleaner_db($content);
737  // Get the <p> tag, and validate the attributes
738  $fTag = $this->‪getFirstTag($fullContentWithTag);
739  // Check which attributes of the <p> tag to keep attributes
740  if (!empty($this->allowedAttributesForParagraphTags)) {
741  [$tagAttributes] = $this->‪get_tag_attributes($fTag);
742  // Make sure the tag attributes only contain the ones that are defined to be allowed
743  $tagAttributes = array_intersect_key($tagAttributes, array_flip($this->allowedAttributesForParagraphTags));
744 
745  // Only allow classes that are whitelisted in $this->allowedClasses
746  if (isset($tagAttributes['class']) && trim($tagAttributes['class']) !== '' && !empty($this->allowedClasses) && !in_array($tagAttributes['class'], $this->allowedClasses, true)) {
747  $classes = ‪GeneralUtility::trimExplode(' ', $tagAttributes['class'], true);
748  $classes = array_intersect($classes, $this->allowedClasses);
749  if (!empty($classes)) {
750  $tagAttributes['class'] = implode(' ', $classes);
751  } else {
752  unset($tagAttributes['class']);
753  }
754  }
755  } else {
756  $tagAttributes = [];
757  }
758  // Remove any line break
759  $content = str_replace(LF, '', $content);
760  // Compile the surrounding <p> tag
761  $content = '<' . rtrim('p ' . $this->‪compileTagAttribs($tagAttributes)) . '>' . $content . '</p>';
762  return $content;
763  }
764 
771  protected function ‪sanitizeLineBreaksForContentOnly(string $content)
772  {
773  $content = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $content) ?? $content;
774  $content = str_replace(LF . LF, LF, $content);
775  $content = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $content) ?? $content;
776  return $content;
777  }
778 
789  protected function ‪streamlineLineBreaksForProcessing(string $content)
790  {
791  return str_replace(CR, '', $content);
792  }
793 
804  protected function ‪streamlineLineBreaksAfterProcessing(string $content)
805  {
806  // Make sure no \r\n sequences has entered in the meantime
807  $content = $this->‪streamlineLineBreaksForProcessing($content);
808  // ... and then change all \n into \r\n
809  return str_replace(LF, CRLF, $content);
810  }
811 
820  protected function ‪markBrokenLinks(string $content): string
821  {
822  $blocks = $this->‪splitIntoBlock('A', $content);
823  $linkService = GeneralUtility::makeInstance(LinkService::class);
824  foreach ($blocks as $position => $value) {
825  if ($position % 2 === 0) {
826  continue;
827  }
828  [$attributes] = $this->‪get_tag_attributes($this->‪getFirstTag($value), true);
829  if (empty($attributes['href'])) {
830  continue;
831  }
832 
833  try {
834  $hrefInformation = $linkService->resolve($attributes['href']);
835 
836  $brokenLinkAnalysis = new ‪BrokenLinkAnalysisEvent($hrefInformation['type'], $hrefInformation);
837  $this->eventDispatcher->dispatch($brokenLinkAnalysis);
838  if ($brokenLinkAnalysis->isBrokenLink()) {
839  $attributes['data-rte-error'] = $brokenLinkAnalysis->getReason();
840  }
842  // do nothing if user doesn't have access to the file/folder
843  } catch (‪UnknownLinkHandlerException $e) {
844  $attributes['data-rte-error'] = $e->getMessage();
845  }
846 
847  // Always rewrite the block to allow the nested calling even if a page is found
848  $blocks[$position] =
849  '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
850  . $this->‪markBrokenLinks($this->‪removeFirstAndLastTag($blocks[$position]))
851  . '</a>';
852  }
853  return implode('', $blocks);
854  }
855 
863  protected function ‪removeBrokenLinkMarkers(string $content): string
864  {
865  $blocks = $this->‪splitIntoBlock('A', $content);
866  foreach ($blocks as $position => $value) {
867  if ($position % 2 === 0) {
868  continue;
869  }
870  [$attributes] = $this->‪get_tag_attributes($this->‪getFirstTag($value), true);
871  if (empty($attributes['href'])) {
872  continue;
873  }
874  // Always remove the styling again (regardless of the page was found or not)
875  // so the database does not contain ugly stuff
876  unset($attributes['data-rte-error']);
877  if (isset($attributes['style'])) {
878  $attributes['style'] = trim(str_replace('background-color: yellow; border:2px red solid; color: black;', '', $attributes['style']));
879  if (empty($attributes['style'])) {
880  unset($attributes['style']);
881  }
882  }
883  $blocks[$position] =
884  '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
885  . $this->‪removeBrokenLinkMarkers($this->‪removeFirstAndLastTag($blocks[$position]))
886  . '</a>';
887  }
888  return implode('', $blocks);
889  }
890 
891  protected function ‪htmlSanitize(string $content, array $configuration): string
892  {
893  $features = GeneralUtility::makeInstance(Features::class);
894  // either `htmlSanitize = null` or `htmlSanitize = false`
895  // or feature flag `security.backend.htmlSanitizeRte` is disabled
896  if (array_key_exists('htmlSanitize', $configuration) && empty($configuration['htmlSanitize'])
897  || !$features->isFeatureEnabled('security.backend.htmlSanitizeRte')
898  ) {
899  return $content;
900  }
901 
902  $build = $configuration['htmlSanitize.']['build'] ?? 'default';
903  if (class_exists($build) && is_a($build, BuilderInterface::class, true)) {
904  $builder = GeneralUtility::makeInstance($build);
905  } else {
906  $factory = GeneralUtility::makeInstance(SanitizerBuilderFactory::class);
907  $builder = $factory->build($build);
908  }
909  $sanitizer = $builder->build();
910  $initiator = GeneralUtility::makeInstance(SanitizerInitiator::class, get_class($this));
911  return $sanitizer->sanitize($content, $initiator);
912  }
913 }
‪TYPO3\CMS\Core\Html
Definition: DefaultSanitizerBuilder.php:15
‪TYPO3\CMS\Core\Html\RteHtmlParser\transformTextForPersistence
‪string transformTextForPersistence(string $value, array $processingConfiguration)
Definition: RteHtmlParser.php:219
‪TYPO3\CMS\Core\Html\RteHtmlParser\$eventDispatcher
‪EventDispatcherInterface $eventDispatcher
Definition: RteHtmlParser.php:116
‪TYPO3\CMS\Core\Html\HtmlParser\HTMLparserConfig
‪array HTMLparserConfig($TSconfig, $keepTags=[])
Definition: HtmlParser.php:887
‪TYPO3\CMS\Core\Html\RteHtmlParser\removeBrokenLinkMarkers
‪string removeBrokenLinkMarkers(string $content)
Definition: RteHtmlParser.php:854
‪TYPO3\CMS\Core\Html\HtmlParser\getFirstTagName
‪string getFirstTagName($str, $preserveCase=false)
Definition: HtmlParser.php:246
‪TYPO3\CMS\Core\Resource\Exception\InsufficientFolderAccessPermissionsException
Definition: InsufficientFolderAccessPermissionsException.php:24
‪TYPO3\CMS\Core\Html\HtmlParser
Definition: HtmlParser.php:27
‪TYPO3\CMS\Core\Html\RteHtmlParser\$allowedTagsOutsideOfParagraphs
‪array $allowedTagsOutsideOfParagraphs
Definition: RteHtmlParser.php:99
‪TYPO3\CMS\Core\Html\RteHtmlParser\runHtmlParserIfConfigured
‪string runHtmlParserIfConfigured($content, $configurationDirective)
Definition: RteHtmlParser.php:338
‪TYPO3\CMS\Core\Html\RteHtmlParser\init
‪init($elRef='', $recPid=0)
Definition: RteHtmlParser.php:130
‪TYPO3\CMS\Core\Html\RteHtmlParser\sanitizeLineBreaksForContentOnly
‪string sanitizeLineBreaksForContentOnly(string $content)
Definition: RteHtmlParser.php:762
‪TYPO3\CMS\Core\Html\RteHtmlParser\getKeepTags
‪array getKeepTags($direction='rte')
Definition: RteHtmlParser.php:555
‪TYPO3\CMS\Core\Html\RteHtmlParser\htmlSanitize
‪htmlSanitize(string $content, array $configuration)
Definition: RteHtmlParser.php:882
‪TYPO3\CMS\Core\Html\RteHtmlParser\$TS_transform_db_safecounter
‪int $TS_transform_db_safecounter
Definition: RteHtmlParser.php:61
‪TYPO3\CMS\Core\Html\RteHtmlParser\divideIntoLines
‪string array divideIntoLines($value, $count=5, $returnArray=false)
Definition: RteHtmlParser.php:626
‪TYPO3\CMS\Core\Html\HtmlParser\getFirstTag
‪string getFirstTag($str)
Definition: HtmlParser.php:223
‪TYPO3\CMS\Core\Html\RteHtmlParser\RTE_transform
‪string RTE_transform($value, $_=null, $direction='rte', $thisConfig=[])
Definition: RteHtmlParser.php:285
‪TYPO3\CMS\Core\Html\RteHtmlParser\$getKeepTags_cache
‪array $getKeepTags_cache
Definition: RteHtmlParser.php:67
‪TYPO3\CMS\Core\Html\RteHtmlParser\setDivTags
‪string setDivTags($value)
Definition: RteHtmlParser.php:679
‪TYPO3\CMS\Core\Html\RteHtmlParser\processContentWithinParagraph
‪string processContentWithinParagraph(string $content, string $fullContentWithTag)
Definition: RteHtmlParser.php:724
‪TYPO3\CMS\Core\Html\HtmlParser\get_tag_attributes
‪array get_tag_attributes($tag, $deHSC=false)
Definition: HtmlParser.php:272
‪TYPO3\CMS\Core\Html\RteHtmlParser\resolveAppliedTransformationModes
‪array resolveAppliedTransformationModes(string $direction)
Definition: RteHtmlParser.php:303
‪TYPO3\CMS\Core\Html\RteHtmlParser\streamlineLineBreaksAfterProcessing
‪string streamlineLineBreaksAfterProcessing(string $content)
Definition: RteHtmlParser.php:795
‪TYPO3\CMS\Core\Html\HtmlParser\compileTagAttribs
‪string compileTagAttribs($tagAttrib, $meta=[])
Definition: HtmlParser.php:865
‪TYPO3\CMS\Core\Configuration\Features
Definition: Features.php:56
‪TYPO3\CMS\Core\Html\RteHtmlParser
Definition: RteHtmlParser.php:38
‪TYPO3\CMS\Core\Html\HtmlParser\HTMLcleaner
‪string HTMLcleaner($content, $tags=[], $keepAll=0, $hSC=0, $addConfig=[])
Definition: HtmlParser.php:388
‪TYPO3\CMS\Core\Html\HtmlParser\removeFirstAndLastTag
‪string removeFirstAndLastTag($str)
Definition: HtmlParser.php:200
‪TYPO3\CMS\Core\Html\RteHtmlParser\markBrokenLinks
‪string markBrokenLinks(string $content)
Definition: RteHtmlParser.php:811
‪TYPO3\CMS\Core\Html\RteHtmlParser\HTMLcleaner_db
‪string HTMLcleaner_db($content)
Definition: RteHtmlParser.php:541
‪TYPO3\CMS\Core\Html\RteHtmlParser\$blockElementList
‪string $blockElementList
Definition: RteHtmlParser.php:44
‪TYPO3\CMS\Core\Html\HtmlParser\splitIntoBlock
‪array splitIntoBlock($tag, $content, $eliminateExtraEndTags=false)
Definition: HtmlParser.php:52
‪TYPO3\CMS\Core\Utility\GeneralUtility\trimExplode
‪static string[] trimExplode($delim, $string, $removeEmptyValues=false, $limit=0)
Definition: GeneralUtility.php:1059
‪TYPO3\CMS\Core\Html\RteHtmlParser\streamlineLineBreaksForProcessing
‪string streamlineLineBreaksForProcessing(string $content)
Definition: RteHtmlParser.php:780
‪TYPO3\CMS\Core\Html\RteHtmlParser\TS_transform_db
‪string TS_transform_db($value)
Definition: RteHtmlParser.php:398
‪TYPO3\CMS\Core\Html\RteHtmlParser\transformTextForRichTextEditor
‪string transformTextForRichTextEditor(string $value, array $processingConfiguration)
Definition: RteHtmlParser.php:177
‪$GLOBALS
‪$GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['adminpanel']['modules']
Definition: ext_localconf.php:5
‪TYPO3\CMS\Core\Html\RteHtmlParser\setProcessingConfiguration
‪setProcessingConfiguration(array $processingConfiguration)
Definition: RteHtmlParser.php:141
‪TYPO3\CMS\Core\Html\RteHtmlParser\$allowedAttributesForParagraphTags
‪array $allowedAttributesForParagraphTags
Definition: RteHtmlParser.php:80
‪TYPO3\CMS\Core\Html\RteHtmlParser\$defaultAllowedTagsList
‪string $defaultAllowedTagsList
Definition: RteHtmlParser.php:49
‪TYPO3\CMS\Core\Html\RteHtmlParser\$allowedClasses
‪array $allowedClasses
Definition: RteHtmlParser.php:73
‪TYPO3\CMS\Core\Html\RteHtmlParser\$procOptions
‪array $procOptions
Definition: RteHtmlParser.php:55
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:46
‪TYPO3\CMS\Core\Html\RteHtmlParser\TS_links_db
‪string TS_links_db($value)
Definition: RteHtmlParser.php:363
‪TYPO3\CMS\Core\Html\RteHtmlParser\TS_transform_rte
‪string TS_transform_rte($value)
Definition: RteHtmlParser.php:475
‪TYPO3\CMS\Core\Html\RteHtmlParser\__construct
‪__construct(EventDispatcherInterface $eventDispatcher)
Definition: RteHtmlParser.php:118