‪TYPO3CMS  9.5
RteHtmlParser.php
Go to the documentation of this file.
1 <?php
2 namespace ‪TYPO3\CMS\Core\Html;
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
17 use Psr\Log\LoggerAwareInterface;
18 use Psr\Log\LoggerAwareTrait;
29 use TYPO3\HtmlSanitizer\Builder\BuilderInterface;
30 
39 class ‪RteHtmlParser extends ‪HtmlParser implements LoggerAwareInterface
40 {
41  use LoggerAwareTrait;
44 
46  'blockElementList' => 'Using $blockElementList of class RteHtmlParser from the outside is discouraged, as this property is only used for internal storage.',
47  'recPid' => 'Using $recPid of class RteHtmlParser from the outside is discouraged, as this property is only used for internal storage.',
48  'elRef' => 'Using $elRef of class RteHtmlParser from the outside is discouraged, as this property is only used for internal storage.',
49  'tsConfig' => 'Using $tsConfig of class RteHtmlParser from the outside is discouraged, as this property is only used for internal storage.',
50  'procOptions' => 'Using $procOptions of class RteHtmlParser from the outside is discouraged, as this property is only used for internal storage.',
51  'TS_transform_db_safecounter' => 'Using $TS_transform_db_safecounter of class RteHtmlParser from the outside is discouraged, as this property is only used for internal storage.',
52  'getKeepTags_cache' => 'Using $getKeepTags_cache of class RteHtmlParser from the outside is discouraged, as this property is only used for internal storage.',
53  'allowedClasses' => 'Using $allowedClasses of class RteHtmlParser from the outside is discouraged, as this property is only used for internal storage.',
54  ];
55 
57  'TS_images_db' => 'Using TS_images_db() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
58  'TS_links_db' => 'Using TS_links_db() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
59  'TS_transform_db' => 'Using TS_transform_db() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
60  'TS_transform_rte' => 'Using TS_transform_rte() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
61  'HTMLcleaner_db' => 'Using HTMLcleaner_db() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
62  'getKeepTags' => 'Using getKeepTags() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
63  'divideIntoLines' => 'Using divideIntoLines() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
64  'setDivTags' => 'Using setDivTags() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
65  'getWHFromAttribs' => 'Using getWHFromAttribs() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
66  'urlInfoForLinkTags' => 'Using urlInfoForLinkTags() of class RteHtmlParser from the outside is discouraged, as this method is not in use anymore and will be removed.',
67  'TS_AtagToAbs' => 'Using TS_AtagToAbs() of class RteHtmlParser from the outside is discouraged, as this method is only available for internal purposes.',
68  ];
69 
74  protected ‪$blockElementList = 'DIV,TABLE,BLOCKQUOTE,PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE';
75 
80  protected ‪$defaultAllowedTagsList = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span,abbr,acronym,dfn';
81 
87  protected ‪$recPid = 0;
88 
94  protected ‪$elRef = '';
95 
101  protected ‪$tsConfig = [];
102 
108  protected ‪$procOptions = [];
109 
115  protected ‪$TS_transform_db_safecounter = 100;
116 
122  protected ‪$getKeepTags_cache = [];
123 
129  protected ‪$allowedClasses = [];
130 
138  'class',
139  'align',
140  'id',
141  'title',
142  'dir',
143  'lang',
144  'xml:lang',
145  'itemscope',
146  'itemtype',
147  'itemprop'
148  ];
149 
158  'address',
159  'article',
160  'aside',
161  'blockquote',
162  'div',
163  'footer',
164  'header',
165  'hr',
166  'nav',
167  'section'
168  ];
169 
176  public function ‪init(‪$elRef = '', ‪$recPid = 0)
177  {
178  $this->recPid = ‪$recPid;
179  $this->elRef = ‪$elRef;
180  }
181 
182  /**********************************************
183  *
184  * Main function
185  *
186  **********************************************/
197  public function ‪RTE_transform($value, $_ = null, $direction = 'rte', $thisConfig = [])
198  {
199  $this->tsConfig = $thisConfig;
200  $this->procOptions = (array)$thisConfig['proc.'];
201  if (isset($this->procOptions['allowedClasses.'])) {
202  $this->allowedClasses = (array)$this->procOptions['allowedClasses.'];
203  } else {
204  $this->allowedClasses = GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'] ?? '', true);
205  }
206 
207  // Dynamic configuration of blockElementList
208  if (!empty($this->procOptions['blockElementList'])) {
209  $this->blockElementList = $this->procOptions['blockElementList'];
210  }
211 
212  // Define which attributes are allowed on <p> tags
213  if (isset($this->procOptions['allowAttributes.'])) {
214  $this->allowedAttributesForParagraphTags = $this->procOptions['allowAttributes.'];
215  } elseif (isset($this->procOptions['keepPDIVattribs'])) {
216  trigger_error('HTML parsing option "keepPDIVattribs" will not be evaluated anymore in TYPO3 v10.0. Use "allowedAttributes" instead.', E_USER_DEPRECATED);
217  $this->allowedAttributesForParagraphTags = GeneralUtility::trimExplode(',', strtolower($this->procOptions['keepPDIVattribs']), true);
218  }
219  // Override tags which are allowed outside of <p> tags
220  if (isset($this->procOptions['allowTagsOutside'])) {
221  if (!isset($this->procOptions['allowTagsOutside.'])) {
222  $this->allowedTagsOutsideOfParagraphs = GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside']), true);
223  } else {
224  $this->allowedTagsOutsideOfParagraphs = (array)$this->procOptions['allowTagsOutside.'];
225  }
226  }
227 
228  // Setting modes / transformations to be called
229  if ((string)$this->procOptions['overruleMode'] !== '') {
230  $modes = GeneralUtility::trimExplode(',', $this->procOptions['overruleMode']);
231  } else {
232  $modes = [$this->procOptions['mode']];
233  }
234  $modes = $this->‪resolveAppliedTransformationModes($direction, $modes);
235 
236  $value = $this->‪streamlineLineBreaksForProcessing($value);
237 
238  // If an entry HTML cleaner was configured, pass the content through the HTMLcleaner
239  $value = $this->‪runHtmlParserIfConfigured($value, 'entryHTMLparser_' . $direction);
240 
241  // Traverse modes
242  foreach ($modes as $cmd) {
243  if ($direction === 'db') {
244  // Checking for user defined transformation:
245  if (!empty(‪$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd])) {
246  $_procObj = GeneralUtility::makeInstance(‪$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]);
247  $_procObj->pObj = $this;
248  $_procObj->transformationKey = $cmd;
249  $value = $_procObj->transform_db($value, $this);
250  } else {
251  // ... else use defaults:
252  switch ($cmd) {
253  case 'detectbrokenlinks':
254  $value = $this->‪removeBrokenLinkMarkers($value);
255  break;
256  case 'ts_images':
257  $value = $this->‪TS_images_db($value);
258  break;
259  case 'ts_links':
260  $value = $this->‪TS_links_db($value);
261  break;
262  case 'css_transform':
263  // Transform empty paragraphs into spacing paragraphs
264  $value = str_replace('<p></p>', '<p>&nbsp;</p>', $value);
265  // Double any trailing spacing paragraph so that it does not get removed by divideIntoLines()
266  $value = preg_replace('/<p>&nbsp;<\/p>$/', '<p>&nbsp;</p>' . '<p>&nbsp;</p>', $value);
267  $value = $this->‪TS_transform_db($value);
268  break;
269  default:
270  // Do nothing
271  }
272  }
273  } elseif ($direction === 'rte') {
274  // Checking for user defined transformation:
275  if (!empty(‪$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd])) {
276  $_procObj = GeneralUtility::makeInstance(‪$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]);
277  $_procObj->pObj = $this;
278  $value = $_procObj->transform_rte($value, $this);
279  } else {
280  // ... else use defaults:
281  switch ($cmd) {
282  case 'detectbrokenlinks':
283  $value = $this->‪markBrokenLinks($value);
284  break;
285  case 'ts_images':
286  $value = $this->‪TS_images_rte($value);
287  break;
288  case 'ts_links':
289  $value = $this->‪TS_links_rte($value, true);
290  break;
291  case 'css_transform':
292  $value = $this->‪TS_transform_rte($value);
293  break;
294  default:
295  // Do nothing
296  }
297  }
298  }
299  }
300 
301  if ($direction === 'db') {
302  // process markup with HTML Sanitizer
303  $value = $this->‪htmlSanitize($value, $this->procOptions['HTMLparser_db.'] ?? []);
304  }
305 
306  // If an exit HTML cleaner was configured, pass the content through the HTMLcleaner
307  $value = $this->‪runHtmlParserIfConfigured($value, 'exitHTMLparser_' . $direction);
308 
309  // Final clean up of linebreaks
310  $value = $this->‪streamlineLineBreaksAfterProcessing($value);
311 
312  return $value;
313  }
314 
322  protected function ‪resolveAppliedTransformationModes(string $direction, array $modes)
323  {
324  $modeList = implode(',', $modes);
325 
326  // Replace the shortcut "default" with all custom modes
327  $modeList = str_replace('default', 'detectbrokenlinks,css_transform,ts_images,ts_links', $modeList);
328 
329  // Make list unique
330  $modes = array_unique(GeneralUtility::trimExplode(',', $modeList, true));
331  // Reverse order if direction is "rte"
332  if ($direction === 'rte') {
333  $modes = array_reverse($modes);
334  }
335 
336  return $modes;
337  }
338 
350  protected function ‪runHtmlParserIfConfigured($content, $configurationDirective)
351  {
352  if (!empty($this->procOptions[$configurationDirective])) {
353  list($keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration) = $this->‪HTMLparserConfig($this->procOptions[$configurationDirective . '.']);
354  $content = $this->‪HTMLcleaner($content, $keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration);
355  }
356  return $content;
357  }
358 
359  /************************************
360  *
361  * Specific RTE TRANSFORMATION functions
362  *
363  *************************************/
375  protected function ‪TS_images_db($value)
376  {
377  // Split content by <img> tags and traverse the resulting array for processing:
378  $imgSplit = $this->‪splitTags('img', $value);
379  if (count($imgSplit) > 1) {
380  $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
381  $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
383  $resourceFactory = ‪Resource\ResourceFactory::getInstance();
385  $magicImageService = GeneralUtility::makeInstance(Resource\Service\MagicImageService::class);
386  $magicImageService->setMagicImageMaximumDimensions($this->tsConfig);
387  foreach ($imgSplit as $k => $v) {
388  // Image found, do processing:
389  if ($k % 2) {
390  // Get attributes
391  list($attribArray) = $this->‪get_tag_attributes($v, true);
392  // It's always an absolute URL coming from the RTE into the Database.
393  $absoluteUrl = trim($attribArray['src']);
394  // Make path absolute if it is relative and we have a site path which is not '/'
395  $pI = pathinfo($absoluteUrl);
396  if ($sitePath && !$pI['scheme'] && GeneralUtility::isFirstPartOfStr($absoluteUrl, $sitePath)) {
397  // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
398  $absoluteUrl = substr($absoluteUrl, strlen($sitePath));
399  $absoluteUrl = $siteUrl . $absoluteUrl;
400  }
401  // Image dimensions set in the img tag, if any
402  $imgTagDimensions = $this->‪getWHFromAttribs($attribArray);
403  if ($imgTagDimensions[0]) {
404  $attribArray['width'] = $imgTagDimensions[0];
405  }
406  if ($imgTagDimensions[1]) {
407  $attribArray['height'] = $imgTagDimensions[1];
408  }
409  $originalImageFile = null;
410  if ($attribArray['data-htmlarea-file-uid']) {
411  // An original image file uid is available
412  try {
414  $originalImageFile = $resourceFactory->getFileObject((int)$attribArray['data-htmlarea-file-uid']);
415  } catch (Resource\Exception\FileDoesNotExistException $fileDoesNotExistException) {
416  // Log the fact the file could not be retrieved.
417  $message = sprintf('Could not find file with uid "%s"', $attribArray['data-htmlarea-file-uid']);
418  $this->logger->error($message);
419  }
420  }
421  if ($originalImageFile instanceof Resource\File) {
422  // Public url of local file is relative to the site url, absolute otherwise
423  if ($absoluteUrl == $originalImageFile->getPublicUrl() || $absoluteUrl == $siteUrl . $originalImageFile->getPublicUrl()) {
424  // This is a plain image, i.e. reference to the original image
425  if ($this->procOptions['plainImageMode']) {
426  // "plain image mode" is configured
427  // Find the dimensions of the original image
428  $imageInfo = [
429  $originalImageFile->getProperty('width'),
430  $originalImageFile->getProperty('height')
431  ];
432  if (!$imageInfo[0] || !$imageInfo[1]) {
433  $filePath = $originalImageFile->getForLocalProcessing(false);
434  $imageInfoObject = GeneralUtility::makeInstance(ImageInfo::class, $filePath);
435  $imageInfo = [
436  $imageInfoObject->getWidth(),
437  $imageInfoObject->getHeight()
438  ];
439  }
440  $attribArray = $this->‪applyPlainImageModeSettings($imageInfo, $attribArray);
441  }
442  } else {
443  // Magic image case: get a processed file with the requested configuration
444  $imageConfiguration = [
445  'width' => $imgTagDimensions[0],
446  'height' => $imgTagDimensions[1]
447  ];
448  $magicImage = $magicImageService->createMagicImage($originalImageFile, $imageConfiguration);
449  $attribArray['width'] = $magicImage->getProperty('width');
450  $attribArray['height'] = $magicImage->getProperty('height');
451  $attribArray['src'] = $magicImage->getPublicUrl();
452  }
453  } elseif (!GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl) && !$this->procOptions['dontFetchExtPictures'] && TYPO3_MODE === 'BE') {
454  // External image from another URL: in that case, fetch image, unless the feature is disabled or we are not in backend mode
455  // Fetch the external image
456  $externalFile = GeneralUtility::getUrl($absoluteUrl);
457  if ($externalFile) {
458  $pU = parse_url($absoluteUrl);
459  $pI = pathinfo($pU['path']);
460  $extension = strtolower($pI['extension']);
461  if ($extension === 'jpg' || $extension === 'jpeg' || $extension === 'gif' || $extension === 'png') {
462  $fileName = GeneralUtility::shortMD5($absoluteUrl) . '.' . $pI['extension'];
463  // We insert this image into the user default upload folder
464  list($table, $field) = explode(':', $this->elRef);
466  $folder = ‪$GLOBALS['BE_USER']->getDefaultUploadFolder($this->recPid, $table, $field);
468  $fileObject = $folder->createFile($fileName)->setContents($externalFile);
469  $imageConfiguration = [
470  'width' => $attribArray['width'],
471  'height' => $attribArray['height']
472  ];
473  $magicImage = $magicImageService->createMagicImage($fileObject, $imageConfiguration);
474  $attribArray['width'] = $magicImage->getProperty('width');
475  $attribArray['height'] = $magicImage->getProperty('height');
476  $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
477  $attribArray['src'] = $magicImage->getPublicUrl();
478  }
479  }
480  } elseif (GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl)) {
481  // Finally, check image as local file (siteURL equals the one of the image)
482  // Image has no data-htmlarea-file-uid attribute
483  // Relative path, rawurldecoded for special characters.
484  $path = rawurldecode(substr($absoluteUrl, strlen($siteUrl)));
485  // Absolute filepath, locked to relative path of this project
486  $filepath = GeneralUtility::getFileAbsFileName($path);
487  // Check file existence (in relative directory to this installation!)
488  if ($filepath && @is_file($filepath)) {
489  // Treat it as a plain image
490  if ($this->procOptions['plainImageMode']) {
491  // If "plain image mode" has been configured
492  // Find the original dimensions of the image
493  $imageInfoObject = GeneralUtility::makeInstance(ImageInfo::class, $filepath);
494  $imageInfo = [
495  $imageInfoObject->getWidth(),
496  $imageInfoObject->getHeight()
497  ];
498  $attribArray = $this->‪applyPlainImageModeSettings($imageInfo, $attribArray);
499  }
500  // Let's try to find a file uid for this image
501  try {
502  $fileOrFolderObject = $resourceFactory->retrieveFileOrFolderObject($path);
503  if ($fileOrFolderObject instanceof Resource\FileInterface) {
504  $fileIdentifier = $fileOrFolderObject->getIdentifier();
506  $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
507  // @todo if the retrieved file is a processed file, get the original file...
508  $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
509  }
510  } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
511  // Nothing to be done if file/folder not found
512  }
513  }
514  }
515  // Remove width and height from style attribute
516  $attribArray['style'] = preg_replace('/(?:^|[^-])(\\s*(?:width|height)\\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
517  // Must have alt attribute
518  if (!isset($attribArray['alt'])) {
519  $attribArray['alt'] = '';
520  }
521  // Convert absolute to relative url
522  if (GeneralUtility::isFirstPartOfStr($attribArray['src'], $siteUrl)) {
523  $attribArray['src'] = substr($attribArray['src'], strlen($siteUrl));
524  }
525  $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, true, true) . ' />';
526  }
527  }
528  }
529  return implode('', $imgSplit);
530  }
531 
540  public function ‪TS_images_rte($value)
541  {
542  // Split content by <img> tags and traverse the resulting array for processing:
543  $imgSplit = $this->‪splitTags('img', $value);
544  if (count($imgSplit) > 1) {
545  $siteUrl = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
546  $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
547  foreach ($imgSplit as $k => $v) {
548  // Image found
549  if ($k % 2) {
550  // Get the attributes of the img tag
551  list($attribArray) = $this->‪get_tag_attributes($v, true);
552  $absoluteUrl = trim($attribArray['src']);
553  // Transform the src attribute into an absolute url, if it not already
554  if (stripos($absoluteUrl, 'http') !== 0) {
555  // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
556  $attribArray['src'] = preg_replace('#^' . preg_quote($sitePath, '#') . '#', '', $attribArray['src']);
557  $attribArray['src'] = $siteUrl . $attribArray['src'];
558  }
559  // Must have alt attribute
560  if (!isset($attribArray['alt'])) {
561  $attribArray['alt'] = '';
562  }
563  $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, true, true) . ' />';
564  }
565  }
566  }
567  // Return processed content:
568  return implode('', $imgSplit);
569  }
570 
582  protected function ‪TS_links_db($value)
583  {
584  $blockSplit = $this->‪splitIntoBlock('A', $value);
585  foreach ($blockSplit as $k => $v) {
586  if ($k % 2) {
587  list($tagAttributes) = $this->‪get_tag_attributes($this->‪getFirstTag($v), true);
588 
589  // Anchors would not have an href attribute
590  if (!isset($tagAttributes['href'])) {
591  continue;
592  }
593  $linkService = GeneralUtility::makeInstance(LinkService::class);
594  $linkInformation = $linkService->resolve($tagAttributes['href'] ?? '');
595 
596  // Modify parameters, this hook should be deprecated
597  if (isset(‪$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])) {
598  trigger_error('The hook "t3lib/class.t3lib_parsehtml_proc.php->modifyParams_LinksDb_PostProc" will be removed in TYPO3 v10.0, use LinkService syntax to modify links to be stored in the database.', E_USER_DEPRECATED);
599  $parameters = [
600  'currentBlock' => $v,
601  'linkInformation' => $linkInformation,
602  'url' => $linkInformation['href'],
603  'attributes' => $tagAttributes
604  ];
605  foreach (‪$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'] ?? [] as $className) {
606  $processor = GeneralUtility::makeInstance($className);
607  $blockSplit[$k] = $processor->modifyParamsLinksDb($parameters, $this);
608  }
609  } else {
610  // Otherwise store the link as <a> tag as default by TYPO3, with the new link service syntax
611  try {
612  $tagAttributes['href'] = $linkService->asString($linkInformation);
613  } catch (UnknownLinkHandlerException $e) {
614  $tagAttributes['href'] = $linkInformation['href'] ?? $tagAttributes['href'];
615  }
616 
617  $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($tagAttributes, true) . '>'
618  . $this->‪TS_links_db($this->‪removeFirstAndLastTag($blockSplit[$k])) . '</a>';
619  }
620  }
621  }
622  return implode('', $blockSplit);
623  }
624 
637  public function ‪TS_links_rte($value, $internallyCalledFromCore = null)
638  {
639  if ($internallyCalledFromCore === null) {
640  trigger_error('RteHtmlParser->TS_links_rte() will be removed in TYPO3 v10.0, use TS_AtagToAbs() directly and do not use <link> syntax anymore.', E_USER_DEPRECATED);
641  }
642  $hasLinkTags = false;
643  $value = $this->‪TS_AtagToAbs($value);
644  // Split content by the TYPO3 pseudo tag "<link>"
645  $blockSplit = $this->‪splitIntoBlock('link', $value, true);
646  foreach ($blockSplit as $k => $v) {
647  // Block
648  if ($k % 2) {
649  $hasLinkTags = true;
650  // Split away the first "<link " part
651  $typoLinkData = explode(' ', substr($this->‪getFirstTag($v), 0, -1), 2)[1];
652  $tagCode = GeneralUtility::makeInstance(TypoLinkCodecService::class)->decode($typoLinkData);
653 
654  // Parsing the TypoLink data. This parsing is done like in \TYPO3\CMS\Frontend\ContentObject->typoLink()
655  $linkService = GeneralUtility::makeInstance(LinkService::class);
656  $linkInformation = $linkService->resolve($tagCode['url']);
657 
658  try {
659  $href = $linkService->asString($linkInformation);
660  } catch (UnknownLinkHandlerException $e) {
661  $href = '';
662  }
663 
664  // Modify parameters by a hook
665  if (is_array(‪$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] ?? false)) {
666  trigger_error('The hook "t3lib/class.t3lib_parsehtml_proc.php->modifyParams_LinksRte_PostProc" will be removed in TYPO3 v10.0, use the link service to properly use .', E_USER_DEPRECATED);
667  // backwards-compatibility: show an error message if the page is not found
668  $error = '';
669  if ($linkInformation['type'] === ‪LinkService::TYPE_PAGE) {
670  $pageRecord = ‪BackendUtility::getRecord('pages', $linkInformation['pageuid']);
671  // Page does not exist
672  if (!is_array($pageRecord)) {
673  $error = 'Page with ID ' . $linkInformation['pageuid'] . ' not found';
674  }
675  }
676  $parameters = [
677  'currentBlock' => $v,
678  'url' => $href,
679  'tagCode' => $tagCode,
680  'external' => $linkInformation['type'] === ‪LinkService::TYPE_URL,
681  'error' => $error
682  ];
683  foreach (‪$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] as $className) {
684  $processor = GeneralUtility::makeInstance($className);
685  $blockSplit[$k] = $processor->modifyParamsLinksRte($parameters, $this);
686  }
687  } else {
688  $anchorAttributes = [
689  'href' => $href,
690  'target' => $tagCode['target'],
691  'class' => $tagCode['class'],
692  'title' => $tagCode['title']
693  ];
694 
695  // Setting the <a> tag
696  $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($anchorAttributes, true) . '>'
697  . $this->‪TS_links_rte($this->‪removeFirstAndLastTag($blockSplit[$k]), $internallyCalledFromCore)
698  . '</a>';
699  }
700  }
701  }
702  if ($hasLinkTags) {
703  trigger_error('Content with <link> syntax was found, update your content to use the t3:// syntax, and migrate your content via the upgrade wizard in the install tool.', E_USER_DEPRECATED);
704  }
705  return implode('', $blockSplit);
706  }
707 
716  protected function ‪TS_transform_db($value)
717  {
718  // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
719  $this->TS_transform_db_safecounter--;
720  if ($this->TS_transform_db_safecounter < 0) {
721  return $value;
722  }
723  // Split the content from RTE by the occurrence of these blocks:
724  $blockSplit = $this->‪splitIntoBlock($this->blockElementList, $value);
725 
726  // Avoid superfluous linebreaks by transform_db after ending headListTag
727  while (count($blockSplit) > 0 && trim(end($blockSplit)) === '') {
728  array_pop($blockSplit);
729  }
730 
731  // Traverse the blocks
732  foreach ($blockSplit as $k => $v) {
733  if ($k % 2) {
734  // Inside block:
735  // Init:
736  $tag = $this->‪getFirstTag($v);
737  $tagName = strtolower($this->‪getFirstTagName($v));
738  // Process based on the tag:
739  switch ($tagName) {
740  case 'blockquote':
741  case 'dd':
742  case 'div':
743  case 'header':
744  case 'section':
745  case 'footer':
746  case 'nav':
747  case 'article':
748  case 'aside':
749  $blockSplit[$k] = $tag . $this->‪TS_transform_db($this->‪removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
750  break;
751  case 'pre':
752  break;
753  default:
754  // usually <hx> tags and <table> tags where no other block elements are within the tags
755  // Eliminate true linebreaks inside block element tags
756  $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]);
757  }
758  } else {
759  // NON-block:
760  if (trim($blockSplit[$k]) !== '') {
761  $blockSplit[$k] = str_replace('<hr/>', '<hr />', $blockSplit[$k]);
762  // Remove linebreaks preceding hr tags
763  $blockSplit[$k] = preg_replace('/[' . LF . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $blockSplit[$k]);
764  // Remove linebreaks following hr tags
765  $blockSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . ']+/', '<$1$2/>', $blockSplit[$k]);
766  // Replace other linebreaks with space
767  $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]);
768  // process allowed/removed tags
769  $blockSplit[$k] = $this->‪HTMLcleaner(
770  (string)$blockSplit[$k],
771  $this->‪getKeepTags('db'),
772  $this->procOptions['HTMLparser_db.']['keepNonMatchedTags'] ?? '',
773  (int)($this->procOptions['HTMLparser_db.']['htmlSpecialChars'] ?? 0)
774  );
775  $blockSplit[$k] = $this->‪divideIntoLines($blockSplit[$k]);
776  } else {
777  unset($blockSplit[$k]);
778  }
779  }
780  }
781  $this->TS_transform_db_safecounter++;
782  return implode(LF, $blockSplit);
783  }
784 
796  public function ‪transformStyledATags($value)
797  {
798  trigger_error('RteHtmlParser->transformStyledATags() will be removed in TYPO3 v10.0. TYPO3 can handle style attribute in anchor tags properly since TYPO3 v8 LTS.', E_USER_DEPRECATED);
799  $blockSplit = $this->‪splitIntoBlock('A', $value);
800  foreach ($blockSplit as $k => $v) {
801  // If an A-tag was found
802  if ($k % 2) {
803  list($attribArray) = $this->‪get_tag_attributes($this->‪getFirstTag($v), true);
804  // If "style" attribute is set and rteerror is not set!
805  if ($attribArray['style'] && !$attribArray['rteerror']) {
806  $attribArray_copy['style'] = $attribArray['style'];
807  unset($attribArray['style']);
808  $bTag = '<span ' . GeneralUtility::implodeAttributes($attribArray_copy, true) . '><a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
809  $eTag = '</a></span>';
810  $blockSplit[$k] = $bTag . $this->‪removeFirstAndLastTag($blockSplit[$k]) . $eTag;
811  }
812  }
813  }
814  return implode('', $blockSplit);
815  }
816 
825  protected function ‪TS_transform_rte($value)
826  {
827  // Split the content from database by the occurrence of the block elements
828  $blockSplit = $this->‪splitIntoBlock($this->blockElementList, $value);
829  // Traverse the blocks
830  foreach ($blockSplit as $k => $v) {
831  if ($k % 2) {
832  // Inside one of the blocks:
833  // Init:
834  $tag = $this->‪getFirstTag($v);
835  $tagName = strtolower($this->‪getFirstTagName($v));
836  // Based on tagname, we do transformations:
837  switch ($tagName) {
838  case 'blockquote':
839  case 'dd':
840  case 'div':
841  case 'header':
842  case 'section':
843  case 'footer':
844  case 'nav':
845  case 'article':
846  case 'aside':
847  $blockSplit[$k] = $tag . $this->‪TS_transform_rte($this->‪removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';
848  break;
849  }
850  $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]);
851  } else {
852  // NON-block:
853  $nextFTN = $this->‪getFirstTagName($blockSplit[$k + 1] ?? '');
854  $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1);
855  // If the line is followed by a block or is the last line:
856  if (GeneralUtility::inList($this->blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) {
857  // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1
858  if (!$onlyLineBreaks) {
859  $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]);
860  } else {
861  // If the line contains only linebreaks, remove the leading linebreak
862  $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]);
863  }
864  }
865  // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks
866  if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) {
867  unset($blockSplit[$k]);
868  } else {
869  $blockSplit[$k] = $this->‪setDivTags($blockSplit[$k]);
870  }
871  }
872  }
873  return implode(LF, $blockSplit);
874  }
875 
876  /***************************************************************
877  *
878  * Generic RTE transformation, analysis and helper functions
879  *
880  **************************************************************/
881 
891  protected function ‪HTMLcleaner_db($content)
892  {
893  $keepTags = $this->‪getKeepTags('db');
894  // Default: remove unknown tags.
895  if (isset($this->procOptions['dontRemoveUnknownTags_db'])) {
896  trigger_error('HTMLParser option "dontRemoveUnknownTags_db" will not be evaluted anymore in TYPO3 v10.0. Remove its usages.', E_USER_DEPRECATED);
897  }
898  $keepUnknownTags = (bool)($this->procOptions['dontRemoveUnknownTags_db'] ?? false);
899  return $this->‪HTMLcleaner($content, $keepTags, $keepUnknownTags);
900  }
901 
910  protected function ‪getKeepTags($direction = 'rte')
911  {
912  if (!isset($this->getKeepTags_cache[$direction]) || !is_array($this->getKeepTags_cache[$direction])) {
913  // Setting up allowed tags:
914  // Default is to get allowed/denied tags from internal array of processing options:
915  // Construct default list of tags to keep:
916  if (isset($this->procOptions['allowTags.']) && is_array($this->procOptions['allowTags.'])) {
917  $keepTags = implode(',', $this->procOptions['allowTags.']);
918  } else {
919  $keepTags = $this->procOptions['allowTags'] ?? '';
920  }
921  $keepTags = array_flip(GeneralUtility::trimExplode(',', $this->defaultAllowedTagsList . ',' . strtolower($keepTags), true));
922  // For tags to deny, remove them from $keepTags array:
923  $denyTags = GeneralUtility::trimExplode(',', $this->procOptions['denyTags'] ?? '', true);
924  foreach ($denyTags as $dKe) {
925  unset($keepTags[$dKe]);
926  }
927  // Based on the direction of content, set further options:
928  switch ($direction) {
929  case 'rte':
930  // Transforming keepTags array so it can be understood by the HTMLcleaner function.
931  // This basically converts the format of the array from TypoScript (having dots) to plain multi-dimensional array.
932  list($keepTags) = $this->‪HTMLparserConfig($this->procOptions['HTMLparser_rte.'] ?? [], $keepTags);
933  break;
934  case 'db':
935  // Setting up span tags if they are allowed:
936  if (isset($keepTags['span'])) {
937  $keepTags['span'] = [
938  'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop',
939  'fixAttrib' => [
940  'class' => [
941  'removeIfFalse' => 1
942  ]
943  ],
944  'rmTagIfNoAttrib' => 1
945  ];
946  if (!empty($this->allowedClasses)) {
947  $keepTags['span']['fixAttrib']['class']['list'] = ‪$this->allowedClasses;
948  }
949  }
950  // Setting further options, getting them from the processing options
951  $TSc = $this->procOptions['HTMLparser_db.'] ?? [];
952  if (empty($TSc['globalNesting'])) {
953  $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';
954  }
955  if (empty($TSc['noAttrib'])) {
956  $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
957  }
958  // Transforming the array from TypoScript to regular array:
959  list($keepTags) = $this->‪HTMLparserConfig($TSc, $keepTags);
960  break;
961  }
962  // Caching (internally, in object memory) the result
963  $this->getKeepTags_cache[$direction] = $keepTags;
964  }
965  // Return result:
966  return $this->getKeepTags_cache[$direction];
967  }
968 
981  protected function ‪divideIntoLines($value, $count = 5, $returnArray = false)
982  {
983  // Setting the third param will eliminate false end-tags. Maybe this is a good thing to do...?
984  $paragraphBlocks = $this->‪splitIntoBlock('p', $value, true);
985  // Returns plainly the content if there was no p sections in it
986  if (count($paragraphBlocks) <= 1 || $count <= 0) {
987  return $this->‪sanitizeLineBreaksForContentOnly($value);
988  }
989 
990  // Traverse the splitted sections
991  foreach ($paragraphBlocks as $k => $v) {
992  if ($k % 2) {
993  // Inside a <p> section
994  $v = $this->‪removeFirstAndLastTag($v);
995  // Fetching 'sub-lines' - which will explode any further p nesting recursively
996  $subLines = $this->‪divideIntoLines($v, $count - 1, true);
997  // So, if there happened to be sub-nesting of p, this is written directly as the new content of THIS section. (This would be considered 'an error')
998  if (is_array($subLines)) {
999  $paragraphBlocks[$k] = implode(LF, $subLines);
1000  } else {
1001  //... but if NO subsection was found, we process it as a TRUE line without erroneous content:
1002  $paragraphBlocks[$k] = $this->‪processContentWithinParagraph($subLines, $paragraphBlocks[$k]);
1003  }
1004  // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
1005  // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
1006  // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
1007  if (trim(strip_tags($paragraphBlocks[$k])) === '&nbsp;' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $paragraphBlocks[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($paragraphBlocks[$k]))) {
1008  $paragraphBlocks[$k] = '';
1009  }
1010  } else {
1011  // Outside a paragraph, if there is still something in there, just add a <p> tag
1012  // Remove positions which are outside <p> tags and without content
1013  $paragraphBlocks[$k] = trim(strip_tags($paragraphBlocks[$k], '<' . implode('><', $this->allowedTagsOutsideOfParagraphs) . '>'));
1014  $paragraphBlocks[$k] = $this->‪sanitizeLineBreaksForContentOnly($paragraphBlocks[$k]);
1015  if ((string)$paragraphBlocks[$k] === '') {
1016  unset($paragraphBlocks[$k]);
1017  } else {
1018  // add <p> tags around the content
1019  $paragraphBlocks[$k] = str_replace(strip_tags($paragraphBlocks[$k]), '<p>' . strip_tags($paragraphBlocks[$k]) . '</p>', $paragraphBlocks[$k]);
1020  }
1021  }
1022  }
1023  return $returnArray ? $paragraphBlocks : implode(LF, $paragraphBlocks);
1024  }
1025 
1034  protected function ‪setDivTags($value)
1035  {
1036  // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
1037  $keepTags = $this->‪getKeepTags('rte');
1038  // Divide the content into lines
1039  $parts = explode(LF, $value);
1040  foreach ($parts as $k => $v) {
1041  // Processing of line content:
1042  // If the line is blank, set it to &nbsp;
1043  if (trim($parts[$k]) === '') {
1044  $parts[$k] = '&nbsp;';
1045  } else {
1046  // Clean the line content, keeping unknown tags (as they can be removed in the entryHTMLparser)
1047  $parts[$k] = $this->‪HTMLcleaner($parts[$k], $keepTags, 'protect');
1048  // convert double-encoded &nbsp; into regular &nbsp; however this could also be reversed via the exitHTMLparser
1049  // This was previously an option to disable called "dontConvAmpInNBSP_rte"
1050  $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);
1051  }
1052  // Wrapping the line in <p> tags if not already wrapped and does not contain an hr tag
1053  if (!preg_match('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', $parts[$k])) {
1054  $testStr = strtolower(trim($parts[$k]));
1055  if (strpos($testStr, '<div') !== 0 || substr($testStr, -6) !== '</div>') {
1056  if (strpos($testStr, '<p') !== 0 || substr($testStr, -4) !== '</p>') {
1057  // Only set p-tags if there is not already div or p tags:
1058  $parts[$k] = '<p>' . $parts[$k] . '</p>';
1059  }
1060  }
1061  }
1062  }
1063  // Implode result:
1064  return implode(LF, $parts);
1065  }
1066 
1079  protected function ‪processContentWithinParagraph(string $content, string $fullContentWithTag)
1080  {
1081  // clean up the content
1082  $content = $this->‪HTMLcleaner_db($content);
1083  // Get the <p> tag, and validate the attributes
1084  $fTag = $this->‪getFirstTag($fullContentWithTag);
1085  // Check which attributes of the <p> tag to keep attributes
1086  if (!empty($this->allowedAttributesForParagraphTags)) {
1087  list($tagAttributes) = $this->‪get_tag_attributes($fTag);
1088  // Make sure the tag attributes only contain the ones that are defined to be allowed
1089  $tagAttributes = array_intersect_key($tagAttributes, array_flip($this->allowedAttributesForParagraphTags));
1090 
1091  // Only allow classes that are whitelisted in $this->allowedClasses
1092  if (isset($tagAttributes['class']) && trim($tagAttributes['class']) !== '' && !empty($this->allowedClasses) && !in_array($tagAttributes['class'], $this->allowedClasses, true)) {
1093  $classes = GeneralUtility::trimExplode(' ', $tagAttributes['class'], true);
1094  $classes = array_intersect($classes, $this->allowedClasses);
1095  if (!empty($classes)) {
1096  $tagAttributes['class'] = implode(' ', $classes);
1097  } else {
1098  unset($tagAttributes['class']);
1099  }
1100  }
1101  } else {
1102  $tagAttributes = [];
1103  }
1104  // Remove any line break
1105  $content = str_replace(LF, '', $content);
1106  // Compile the surrounding <p> tag
1107  $content = '<' . rtrim('p ' . $this->‪compileTagAttribs($tagAttributes)) . '>' . $content . '</p>';
1108  return $content;
1109  }
1110 
1117  protected function ‪sanitizeLineBreaksForContentOnly(string $content)
1118  {
1119  $content = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $content);
1120  $content = str_replace(LF . LF, LF, $content);
1121  $content = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $content);
1122  return $content;
1123  }
1124 
1132  protected function ‪getWHFromAttribs($attribArray)
1133  {
1134  $style = trim($attribArray['style']);
1135  $w = 0;
1136  $h = 0;
1137  if ($style) {
1138  $regex = '[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1139  // Width
1140  $reg = [];
1141  preg_match('/width' . $regex . '/i', $style, $reg);
1142  $w = (int)$reg[1];
1143  // Height
1144  preg_match('/height' . $regex . '/i', $style, $reg);
1145  $h = (int)$reg[1];
1146  }
1147  if (!$w) {
1148  $w = $attribArray['width'];
1149  }
1150  if (!$h) {
1151  $h = $attribArray['height'];
1152  }
1153  return [(int)$w, (int)$h];
1154  }
1155 
1164  protected function ‪urlInfoForLinkTags($url)
1165  {
1166  $info = [];
1167  $url = trim($url);
1168  if (strpos(strtolower($url), 'mailto:') === 0) {
1169  $info['url'] = trim(substr($url, 7));
1170  $info['type'] = 'email';
1171  } elseif (strpos($url, '?file:') !== false) {
1172  $info['type'] = 'file';
1173  $info['url'] = rawurldecode(substr($url, strpos($url, '?file:') + 1));
1174  } else {
1175  $curURL = GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
1176  $urlLength = strlen($url);
1177  $a = 0;
1178  for (; $a < $urlLength; $a++) {
1179  if ($url[$a] != $curURL[$a]) {
1180  break;
1181  }
1182  }
1183  $info['relScriptPath'] = substr($curURL, $a);
1184  $info['relUrl'] = substr($url, $a);
1185  $info['url'] = $url;
1186  $info['type'] = 'ext';
1187  $siteUrl_parts = parse_url($url);
1188  $curUrl_parts = parse_url($curURL);
1189  // Hosts should match
1190  if ($siteUrl_parts['host'] == $curUrl_parts['host'] && (!$info['relScriptPath'] || defined('TYPO3_mainDir') && strpos($info['relScriptPath'], TYPO3_mainDir) === 0)) {
1191  // If the script path seems to match or is empty (FE-EDIT)
1192  // New processing order 100502
1193  $uP = parse_url($info['relUrl']);
1194  if ($info['relUrl'] === '#' . $siteUrl_parts['fragment']) {
1195  $info['url'] = $info['relUrl'];
1196  $info['type'] = 'anchor';
1197  } elseif (!trim($uP['path']) || $uP['path'] === 'index.php') {
1198  // URL is a page (id parameter)
1199  $pp = preg_split('/^id=/', $uP['query']);
1200  $pp[1] = preg_replace('/&id=[^&]*/', '', $pp[1]);
1201  $parameters = explode('&', $pp[1]);
1202  $id = array_shift($parameters);
1203  if ($id) {
1204  $info['pageid'] = $id;
1205  $info['cElement'] = $uP['fragment'];
1206  $info['url'] = $id . ($info['cElement'] ? '#' . $info['cElement'] : '');
1207  $info['type'] = 'page';
1208  $info['query'] = $parameters[0] ? '&' . implode('&', $parameters) : '';
1209  }
1210  } else {
1211  $info['url'] = $info['relUrl'];
1212  $info['type'] = 'file';
1213  }
1214  } else {
1215  unset($info['relScriptPath']);
1216  unset($info['relUrl']);
1217  }
1218  }
1219  return $info;
1220  }
1221 
1228  protected function ‪TS_AtagToAbs($value)
1229  {
1230  if (func_num_args() > 1) {
1231  trigger_error('Second argument of RteHtmlParser->TS_AtagToAbs() is not in use and will be removed in TYPO3 v10.0, however the argument in the callers code can be removed without side-effects.', E_USER_DEPRECATED);
1232  }
1233  $blockSplit = $this->‪splitIntoBlock('A', $value);
1234  foreach ($blockSplit as $k => $v) {
1235  // Block
1236  if ($k % 2) {
1237  list($attribArray) = $this->‪get_tag_attributes($this->‪getFirstTag($v), true);
1238  // Checking if there is a scheme, and if not, prepend the current url.
1239  // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1240  if (($attribArray['href'] ?? '') !== '') {
1241  $uP = parse_url(strtolower($attribArray['href']));
1242  if (!$uP['scheme']) {
1243  $attribArray['href'] = GeneralUtility::getIndpEnv('TYPO3_SITE_URL') . $attribArray['href'];
1244  }
1245  }
1246  $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, true) . '>';
1247  $eTag = '</a>';
1248  $blockSplit[$k] = $bTag . $this->‪TS_AtagToAbs($this->‪removeFirstAndLastTag($blockSplit[$k])) . $eTag;
1249  }
1250  }
1251  return implode('', $blockSplit);
1252  }
1253 
1262  protected function ‪applyPlainImageModeSettings($imageInfo, $attribArray)
1263  {
1264  if ($this->procOptions['plainImageMode']) {
1265  // Perform corrections to aspect ratio based on configuration
1266  switch ((string)$this->procOptions['plainImageMode']) {
1267  case 'lockDimensions':
1268  $attribArray['width'] = $imageInfo[0];
1269  $attribArray['height'] = $imageInfo[1];
1270  break;
1271  case 'lockRatioWhenSmaller':
1272  if ($attribArray['width'] > $imageInfo[0]) {
1273  $attribArray['width'] = $imageInfo[0];
1274  }
1275  if ($imageInfo[0] > 0) {
1276  $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1277  }
1278  break;
1279  case 'lockRatio':
1280  if ($imageInfo[0] > 0) {
1281  $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1282  }
1283  break;
1284  }
1285  }
1286  return $attribArray;
1287  }
1299  protected function ‪streamlineLineBreaksForProcessing(string $content)
1300  {
1301  return str_replace(CR, '', $content);
1302  }
1314  protected function ‪streamlineLineBreaksAfterProcessing(string $content)
1315  {
1316  // Make sure no \r\n sequences has entered in the meantime
1317  $content = $this->‪streamlineLineBreaksForProcessing($content);
1318  // ... and then change all \n into \r\n
1319  return str_replace(LF, CRLF, $content);
1320  }
1321 
1330  protected function ‪markBrokenLinks(string $content): string
1331  {
1332  $blocks = $this->‪splitIntoBlock('A', $content);
1333  $linkService = GeneralUtility::makeInstance(LinkService::class);
1334  foreach ($blocks as $position => $value) {
1335  if ($position % 2 === 0) {
1336  continue;
1337  }
1338  list($attributes) = $this->‪get_tag_attributes($this->‪getFirstTag($value), true);
1339  if (empty($attributes['href'])) {
1340  continue;
1341  }
1342  $hrefInformation = $linkService->resolve($attributes['href']);
1343  if ($hrefInformation['type'] === ‪LinkService::TYPE_PAGE && $hrefInformation['pageuid'] !== 'current') {
1344  $pageRecord = ‪BackendUtility::getRecord('pages', $hrefInformation['pageuid']);
1345  if (!is_array($pageRecord)) {
1346  // Page does not exist
1347  $attributes['data-rte-error'] = 'Page with ID ' . $hrefInformation['pageuid'] . ' not found';
1348  }
1349  }
1350  // Always rewrite the block to allow the nested calling even if a page is found
1351  $blocks[$position] =
1352  '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1353  . $this->‪markBrokenLinks($this->‪removeFirstAndLastTag($blocks[$position]))
1354  . '</a>';
1355  }
1356  return implode('', $blocks);
1357  }
1358 
1366  protected function ‪removeBrokenLinkMarkers(string $content): string
1367  {
1368  $blocks = $this->‪splitIntoBlock('A', $content);
1369  foreach ($blocks as $position => $value) {
1370  if ($position % 2 === 0) {
1371  continue;
1372  }
1373  list($attributes) = $this->‪get_tag_attributes($this->‪getFirstTag($value), true);
1374  if (empty($attributes['href'])) {
1375  continue;
1376  }
1377  // Always remove the styling again (regardless of the page was found or not)
1378  // so the database does not contain ugly stuff
1379  unset($attributes['data-rte-error']);
1380  if (isset($attributes['style'])) {
1381  $attributes['style'] = trim(str_replace('background-color: yellow; border:2px red solid; color: black;', '', $attributes['style']));
1382  if (empty($attributes['style'])) {
1383  unset($attributes['style']);
1384  }
1385  }
1386  $blocks[$position] =
1387  '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'
1388  . $this->‪removeBrokenLinkMarkers($this->‪removeFirstAndLastTag($blocks[$position]))
1389  . '</a>';
1390  }
1391  return implode('', $blocks);
1392  }
1393 
1394  protected function ‪htmlSanitize(string $content, array $configuration): string
1395  {
1396  $features = GeneralUtility::makeInstance(Features::class);
1397  // either `htmlSanitize = null` or `htmlSanitize = false`
1398  // or feature flag `security.backend.htmlSanitizeRte` is disabled
1399  if (array_key_exists('htmlSanitize', $configuration) && empty($configuration['htmlSanitize'])
1400  || !$features->isFeatureEnabled('security.backend.htmlSanitizeRte')
1401  ) {
1402  return $content;
1403  }
1404 
1405  $build = $configuration['htmlSanitize.']['build'] ?? 'default';
1406  if (class_exists($build) && is_a($build, BuilderInterface::class, true)) {
1407  $builder = GeneralUtility::makeInstance($build);
1408  } else {
1409  $factory = GeneralUtility::makeInstance(SanitizerBuilderFactory::class);
1410  $builder = $factory->build($build);
1411  }
1412  $sanitizer = $builder->build();
1413  $initiator = GeneralUtility::makeInstance(SanitizerInitiator::class, get_class($this));
1414  return $sanitizer->sanitize($content, $initiator);
1415  }
1416 }
‪TYPO3\CMS\Core\Html
Definition: DefaultSanitizerBuilder.php:15
‪TYPO3\CMS\Core\Html\HtmlParser\HTMLparserConfig
‪array HTMLparserConfig($TSconfig, $keepTags=[])
Definition: HtmlParser.php:877
‪TYPO3\CMS\Core\Html\RteHtmlParser\$deprecatedPublicMethods
‪$deprecatedPublicMethods
Definition: RteHtmlParser.php:56
‪TYPO3\CMS\Core\Html\RteHtmlParser\removeBrokenLinkMarkers
‪string removeBrokenLinkMarkers(string $content)
Definition: RteHtmlParser.php:1355
‪TYPO3\CMS\Core\Html\HtmlParser\getFirstTagName
‪string getFirstTagName($str, $preserveCase=false)
Definition: HtmlParser.php:237
‪TYPO3\CMS\Core\Resource\FileInterface
Definition: FileInterface.php:21
‪TYPO3\CMS\Core\Html\HtmlParser
Definition: HtmlParser.php:26
‪TYPO3\CMS\Core\Html\RteHtmlParser\$allowedTagsOutsideOfParagraphs
‪array $allowedTagsOutsideOfParagraphs
Definition: RteHtmlParser.php:146
‪TYPO3\CMS\Core\Html\RteHtmlParser\$deprecatedPublicProperties
‪$deprecatedPublicProperties
Definition: RteHtmlParser.php:45
‪TYPO3\CMS\Core\Html\RteHtmlParser\runHtmlParserIfConfigured
‪string runHtmlParserIfConfigured($content, $configurationDirective)
Definition: RteHtmlParser.php:339
‪TYPO3\CMS\Core\Html\RteHtmlParser\init
‪init($elRef='', $recPid=0)
Definition: RteHtmlParser.php:165
‪TYPO3\CMS\Core\Html\RteHtmlParser\sanitizeLineBreaksForContentOnly
‪string sanitizeLineBreaksForContentOnly(string $content)
Definition: RteHtmlParser.php:1106
‪TYPO3\CMS\Core\Resource\ResourceFactory\getInstance
‪static ResourceFactory getInstance()
Definition: ResourceFactory.php:39
‪TYPO3\CMS\Core\Html\RteHtmlParser\getKeepTags
‪array getKeepTags($direction='rte')
Definition: RteHtmlParser.php:899
‪TYPO3\CMS\Core\Html\RteHtmlParser\TS_images_db
‪string TS_images_db($value)
Definition: RteHtmlParser.php:364
‪TYPO3\CMS\Core\Html\RteHtmlParser\urlInfoForLinkTags
‪array urlInfoForLinkTags($url)
Definition: RteHtmlParser.php:1153
‪TYPO3\CMS\Core\Html\RteHtmlParser\$tsConfig
‪array $tsConfig
Definition: RteHtmlParser.php:96
‪TYPO3\CMS\Core\Html\RteHtmlParser\htmlSanitize
‪htmlSanitize(string $content, array $configuration)
Definition: RteHtmlParser.php:1383
‪TYPO3\CMS\Core\Html\RteHtmlParser\$TS_transform_db_safecounter
‪int $TS_transform_db_safecounter
Definition: RteHtmlParser.php:108
‪TYPO3\CMS\Core\Html\RteHtmlParser\divideIntoLines
‪string array divideIntoLines($value, $count=5, $returnArray=false)
Definition: RteHtmlParser.php:970
‪TYPO3\CMS\Core\Html\HtmlParser\getFirstTag
‪string getFirstTag($str)
Definition: HtmlParser.php:214
‪TYPO3\CMS\Core\Html\RteHtmlParser\RTE_transform
‪string RTE_transform($value, $_=null, $direction='rte', $thisConfig=[])
Definition: RteHtmlParser.php:186
‪TYPO3\CMS\Core\Html\RteHtmlParser\$getKeepTags_cache
‪array $getKeepTags_cache
Definition: RteHtmlParser.php:114
‪TYPO3\CMS\Core\Html\RteHtmlParser\TS_links_rte
‪string TS_links_rte($value, $internallyCalledFromCore=null)
Definition: RteHtmlParser.php:626
‪TYPO3\CMS\Core\Html\HtmlParser\splitTags
‪array splitTags($tag, $content)
Definition: HtmlParser.php:156
‪TYPO3\CMS\Core\Html\RteHtmlParser\setDivTags
‪string setDivTags($value)
Definition: RteHtmlParser.php:1023
‪TYPO3\CMS\Core\Html\RteHtmlParser\processContentWithinParagraph
‪string processContentWithinParagraph(string $content, string $fullContentWithTag)
Definition: RteHtmlParser.php:1068
‪TYPO3\CMS\Core\Html\HtmlParser\get_tag_attributes
‪array get_tag_attributes($tag, $deHSC=false)
Definition: HtmlParser.php:263
‪TYPO3\CMS\Core\Html\RteHtmlParser\streamlineLineBreaksAfterProcessing
‪string streamlineLineBreaksAfterProcessing(string $content)
Definition: RteHtmlParser.php:1303
‪TYPO3\CMS\Core\Html\HtmlParser\compileTagAttribs
‪string compileTagAttribs($tagAttrib, $meta=[])
Definition: HtmlParser.php:855
‪TYPO3\CMS\Core\Resource\Exception\ResourceDoesNotExistException
Definition: ResourceDoesNotExistException.php:21
‪TYPO3\CMS\Core\Resource\File
Definition: File.php:23
‪TYPO3\CMS\Core\Configuration\Features
Definition: Features.php:54
‪TYPO3\CMS\Core\Html\RteHtmlParser
Definition: RteHtmlParser.php:40
‪TYPO3\CMS\Core\Html\HtmlParser\HTMLcleaner
‪string HTMLcleaner($content, $tags=[], $keepAll=0, $hSC=0, $addConfig=[])
Definition: HtmlParser.php:378
‪TYPO3\CMS\Core\Html\HtmlParser\removeFirstAndLastTag
‪string removeFirstAndLastTag($str)
Definition: HtmlParser.php:191
‪TYPO3\CMS\Core\Html\RteHtmlParser\markBrokenLinks
‪string markBrokenLinks(string $content)
Definition: RteHtmlParser.php:1319
‪TYPO3\CMS\Core\Html\RteHtmlParser\TS_AtagToAbs
‪string TS_AtagToAbs($value)
Definition: RteHtmlParser.php:1217
‪TYPO3\CMS\Core\Html\RteHtmlParser\TS_images_rte
‪string TS_images_rte($value)
Definition: RteHtmlParser.php:529
‪TYPO3\CMS\Core\Html\RteHtmlParser\HTMLcleaner_db
‪string HTMLcleaner_db($content)
Definition: RteHtmlParser.php:880
‪TYPO3\CMS\Core\Html\RteHtmlParser\$blockElementList
‪string $blockElementList
Definition: RteHtmlParser.php:73
‪TYPO3\CMS\Core\Html\RteHtmlParser\resolveAppliedTransformationModes
‪array resolveAppliedTransformationModes(string $direction, array $modes)
Definition: RteHtmlParser.php:311
‪TYPO3\CMS\Core\Compatibility\PublicMethodDeprecationTrait
Definition: PublicMethodDeprecationTrait.php:68
‪TYPO3\CMS\Backend\Utility\BackendUtility
Definition: BackendUtility.php:72
‪TYPO3\CMS\Core\Type\File\ImageInfo
Definition: ImageInfo.php:25
‪TYPO3\CMS\Backend\Utility\BackendUtility\getRecord
‪static array null getRecord($table, $uid, $fields=' *', $where='', $useDeleteClause=true)
Definition: BackendUtility.php:130
‪TYPO3\CMS\Core\Html\HtmlParser\splitIntoBlock
‪array splitIntoBlock($tag, $content, $eliminateExtraEndTags=false)
Definition: HtmlParser.php:50
‪TYPO3\CMS\Core\Resource
Definition: generateMimeTypes.php:37
‪TYPO3\CMS\Core\Html\RteHtmlParser\streamlineLineBreaksForProcessing
‪string streamlineLineBreaksForProcessing(string $content)
Definition: RteHtmlParser.php:1288
‪TYPO3\CMS\Core\Resource\Exception
Definition: Exception.php:21
‪TYPO3\CMS\Core\Html\RteHtmlParser\TS_transform_db
‪string TS_transform_db($value)
Definition: RteHtmlParser.php:705
‪$GLOBALS
‪$GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['adminpanel']['modules']
Definition: ext_localconf.php:5
‪TYPO3\CMS\Core\Html\RteHtmlParser\transformStyledATags
‪string transformStyledATags($value)
Definition: RteHtmlParser.php:785
‪TYPO3\CMS\Core\Html\RteHtmlParser\$allowedAttributesForParagraphTags
‪array $allowedAttributesForParagraphTags
Definition: RteHtmlParser.php:127
‪TYPO3\CMS\Core\Compatibility\PublicPropertyDeprecationTrait
Definition: PublicPropertyDeprecationTrait.php:66
‪TYPO3\CMS\Core\Html\RteHtmlParser\$defaultAllowedTagsList
‪string $defaultAllowedTagsList
Definition: RteHtmlParser.php:78
‪TYPO3\CMS\Core\Html\RteHtmlParser\$allowedClasses
‪array $allowedClasses
Definition: RteHtmlParser.php:120
‪TYPO3\CMS\Core\Html\RteHtmlParser\getWHFromAttribs
‪array getWHFromAttribs($attribArray)
Definition: RteHtmlParser.php:1121
‪TYPO3\CMS\Core\Html\RteHtmlParser\$elRef
‪string $elRef
Definition: RteHtmlParser.php:90
‪TYPO3\CMS\Core\Html\RteHtmlParser\$procOptions
‪array $procOptions
Definition: RteHtmlParser.php:102
‪TYPO3\CMS\Core\Html\RteHtmlParser\applyPlainImageModeSettings
‪array applyPlainImageModeSettings($imageInfo, $attribArray)
Definition: RteHtmlParser.php:1251
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:45
‪TYPO3\CMS\Core\Html\RteHtmlParser\TS_links_db
‪string TS_links_db($value)
Definition: RteHtmlParser.php:571
‪TYPO3\CMS\Core\Html\RteHtmlParser\TS_transform_rte
‪string TS_transform_rte($value)
Definition: RteHtmlParser.php:814
‪TYPO3\CMS\Core\Html\RteHtmlParser\$recPid
‪int $recPid
Definition: RteHtmlParser.php:84