main/_rte_html_parser_8php_source.html

<?php


declare(strict_types=1);


/*

 * This file is part of the TYPO3 CMS project.

 *

 * It is free software; you can redistribute it and/or modify it under

 * the terms of the GNU General Public License, either version 2

 * of the License, or any later version.

 *

 * For the full copyright and license information, please read the

 * LICENSE.txt file that was distributed with this source code.

 *

 * The TYPO3 project - inspiring people to share!

 */


namespace ‪TYPO3\CMS\Core\Html;


use Psr\EventDispatcher\EventDispatcherInterface;

use Psr\Log\LoggerAwareInterface;

use Psr\Log\LoggerAwareTrait;

use ‪TYPO3\CMS\Core\Configuration\Features;

use ‪TYPO3\CMS\Core\Html\Event\BrokenLinkAnalysisEvent;

use ‪TYPO3\CMS\Core\LinkHandling\Exception\UnknownLinkHandlerException;

use ‪TYPO3\CMS\Core\LinkHandling\LinkService;

use ‪TYPO3\CMS\Core\Resource\Exception\InsufficientFolderAccessPermissionsException;

use ‪TYPO3\CMS\Core\Utility\GeneralUtility;

use TYPO3\HtmlSanitizer\Builder\BuilderInterface;


class ‪RteHtmlParser extends ‪HtmlParser implements LoggerAwareInterface

{

    use LoggerAwareTrait;


    protected string ‪$blockElementList = 'DIV,TABLE,BLOCKQUOTE,PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE,FIGURE,FIGCAPTION';


    protected string ‪$defaultAllowedTagsList = 'b,i,u,a,img,br,div,center,pre,figure,figcaption,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span,abbr,acronym,dfn';


    protected array ‪$procOptions = [];


    protected int ‪$TS_transform_db_safecounter = 100;


    protected array ‪$getKeepTags_cache = [];


    protected array ‪$allowedClasses = [];


    protected array ‪$allowedAttributesForParagraphTags = [

        'class',

        'align',

        'id',

        'title',

        'dir',

        'lang',

        'xml:lang',

        'itemscope',

        'itemtype',

        'itemprop',

    ];


    protected array ‪$allowedTagsOutsideOfParagraphs = [

        'address',

        'article',

        'aside',

        'blockquote',

        'div',

        'footer',

        'figure',

        'figcaption',

        'header',

        'hr',

        'nav',

        'section',

    ];


    public function ‪__construct(

        protected readonly EventDispatcherInterface $eventDispatcher

    ) {}


    protected function ‪setProcessingConfiguration(array $processingConfiguration): void

    {

        $this->procOptions = $processingConfiguration;

        $this->getKeepTags_cache = [];


        if (isset($this->procOptions['allowedClasses.'])) {

            $this->allowedClasses = (array)$this->procOptions['allowedClasses.'];

        } else {

            $this->allowedClasses = ‪GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'] ?? '', true);

        }


        // Dynamic configuration of blockElementList

        if (!empty($this->procOptions['blockElementList'])) {

            $this->blockElementList = $this->procOptions['blockElementList'];

        }


        // Define which attributes are allowed on <p> tags

        if (isset($this->procOptions['allowAttributes.'])) {

            $this->allowedAttributesForParagraphTags = $this->procOptions['allowAttributes.'];

        }

        // Override tags which are allowed outside of <p> tags

        if (isset($this->procOptions['allowTagsOutside'])) {

            if (!isset($this->procOptions['allowTagsOutside.'])) {

                $this->allowedTagsOutsideOfParagraphs = ‪GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside']), true);

            } else {

                $this->allowedTagsOutsideOfParagraphs = (array)$this->procOptions['allowTagsOutside.'];

            }

        }

    }


    public function ‪transformTextForRichTextEditor(string $value, array $processingConfiguration): string

    {

        $this->‪setProcessingConfiguration($processingConfiguration);

        $modes = $this->‪resolveAppliedTransformationModes('rte');

        $value = $this->‪streamlineLineBreaksForProcessing($value);

        // If an entry HTML cleaner was configured, pass the content through the HTMLcleaner

        $value = $this->‪runHtmlParserIfConfigured($value, 'entryHTMLparser_rte');

        // Traverse modes

        foreach ($modes as $cmd) {

            switch ($cmd) {

                case 'detectbrokenlinks':

                    $value = $this->‪markBrokenLinks($value);

                    break;

                case 'css_transform':

                    $value = $this->‪TS_transform_rte($value);

                    break;

                default:

                    // Do nothing

            }

        }

        // If an exit HTML cleaner was configured, pass the content through the HTMLcleaner

        $value = $this->‪runHtmlParserIfConfigured($value, 'exitHTMLparser_rte');

        // Final clean up of linebreaks

        $value = $this->‪streamlineLineBreaksAfterProcessing($value);

        return $value;

    }


    public function ‪transformTextForPersistence(string $value, array $processingConfiguration): string

    {

        $this->‪setProcessingConfiguration($processingConfiguration);

        $modes = $this->‪resolveAppliedTransformationModes('db');

        $value = $this->‪streamlineLineBreaksForProcessing($value);

        // If an entry HTML cleaner was configured, pass the content through the HTMLcleaner

        $value = $this->‪runHtmlParserIfConfigured($value, 'entryHTMLparser_db');

        // Traverse modes

        foreach ($modes as $cmd) {

            switch ($cmd) {

                case 'detectbrokenlinks':

                    $value = $this->‪removeBrokenLinkMarkers($value);

                    break;

                case 'ts_links':

                    $value = $this->‪TS_links_db($value);

                    break;

                case 'css_transform':

                    // Transform empty paragraphs into spacing paragraphs

                    $value = str_replace('<p></p>', '<p>&nbsp;</p>', $value);

                    // Double any trailing spacing paragraph so that it does not get removed by divideIntoLines()

                    $value = preg_replace('/<p>&nbsp;<\/p>$/', '<p>&nbsp;</p><p>&nbsp;</p>', $value) ?? $value;

                    $value = $this->‪TS_transform_db($value);

                    break;

                default:

                    // Do nothing

            }

        }

        // process markup with HTML Sanitizer

        $value = $this->‪htmlSanitize($value, $this->procOptions['HTMLparser_db.'] ?? []);

        // If an exit HTML cleaner was configured, pass the content through the HTMLcleaner

        $value = $this->‪runHtmlParserIfConfigured($value, 'exitHTMLparser_db');

        // Final clean up of linebreaks

        $value = $this->‪streamlineLineBreaksAfterProcessing($value);

        return $value;

    }


    protected function ‪resolveAppliedTransformationModes(string $direction): array

    {

        // Setting modes / transformations to be called

        if ((string)($this->procOptions['overruleMode'] ?? '') !== '') {

            $modes = ‪GeneralUtility::trimExplode(',', $this->procOptions['overruleMode']);

        } else {

            $modes = [$this->procOptions['mode']];

        }


        $modeList = implode(',', $modes);


        // Replace the shortcut "default" with all custom modes

        $modeList = str_replace('default', 'detectbrokenlinks,css_transform,ts_links', $modeList);


        // Make list unique

        $modes = array_unique(‪GeneralUtility::trimExplode(',', $modeList, true));

        // Reverse order if direction is "rte"

        if ($direction === 'rte') {

            $modes = array_reverse($modes);

        }


        return $modes;

    }


    protected function ‪runHtmlParserIfConfigured(string $content, string $configurationDirective): string

    {

        if (!empty($this->procOptions[$configurationDirective])) {

            [$keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration] = $this->‪HTMLparserConfig($this->procOptions[$configurationDirective . '.']);

            $content = $this->‪HTMLcleaner($content, $keepTags, $keepNonMatchedTags, $hscMode, $additionalConfiguration);

        }

        return $content;

    }


    /************************************

     *

     * Specific RTE TRANSFORMATION functions

     *

     *************************************/


    protected function ‪TS_links_db(string $value): string

    {

        $blockSplit = $this->‪splitIntoBlock('A', $value);

        foreach ($blockSplit as $k => $v) {

            if ($k % 2) {

                [$tagAttributes] = $this->‪get_tag_attributes($this->‪getFirstTag($v), true);


                // Anchors would not have an href attribute

                if (!isset($tagAttributes['href'])) {

                    continue;

                }

                $linkService = GeneralUtility::makeInstance(LinkService::class);

                // Store the link as <a> tag as default by TYPO3, with the link service syntax

                try {

                    $linkInformation = $linkService->resolve($tagAttributes['href'] ?? '');

                    $tagAttributes['href'] = $linkService->asString($linkInformation);

                } catch (‪UnknownLinkHandlerException $e) {

                    $tagAttributes['href'] = $linkInformation['href'] ?? $tagAttributes['href'];

                }


                $blockSplit[$k] = '<a ' . GeneralUtility::implodeAttributes($tagAttributes, true) . '>'

                    . $this->‪TS_links_db($this->‪removeFirstAndLastTag($blockSplit[$k])) . '</a>';

            }

        }

        return implode('', $blockSplit);

    }


    protected function ‪TS_transform_db(string $value): string

    {

        // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)

        $this->TS_transform_db_safecounter--;

        if ($this->TS_transform_db_safecounter < 0) {

            return $value;

        }

        // Split the content from RTE by the occurrence of these blocks:

        $blockSplit = $this->‪splitIntoBlock($this->blockElementList, $value);


        // Avoid superfluous linebreaks by transform_db after ending headListTag

        while (count($blockSplit) > 0 && trim(end($blockSplit)) === '') {

            array_pop($blockSplit);

        }


        // Traverse the blocks

        foreach ($blockSplit as $k => $v) {

            if ($k % 2) {

                // Inside block:

                // Init:

                $tag = $this->‪getFirstTag($v);

                $tagName = strtolower($this->‪getFirstTagName($v));

                // Process based on the tag:

                switch ($tagName) {

                    case 'blockquote':

                    case 'dd':

                    case 'div':

                    case 'header':

                    case 'section':

                    case 'footer':

                    case 'nav':

                    case 'article':

                    case 'aside':

                        $blockSplit[$k] = $tag . $this->‪TS_transform_db($this->‪removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';

                        break;

                    case 'pre':

                        break;

                    default:

                        // usually <hx> tags and <table> tags where no other block elements are within the tags

                        // Eliminate true linebreaks inside block element tags

                        $blockSplit[$k] = preg_replace('/[' . LF . ']+/', ' ', $blockSplit[$k]);

                }

            } else {

                // NON-block:

                if (trim($blockSplit[$k]) !== '') {

                    $string = $blockSplit[$k];

                    $string = preg_replace('#<([a-z]+)/>#', '<$1 />', $string);

                    // Remove linebreaks preceding hr tags

                    $string = preg_replace('/[' . LF . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $string) ?? '';

                    // Remove linebreaks following hr tags

                    $string = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . ']+/', '<$1$2/>', $string) ?? '';

                    // Replace other linebreaks with space

                    $string = preg_replace('/[' . LF . ']+/', ' ', $string);

                    // process allowed/removed tags

                    $string = $this->‪HTMLcleaner(

                        (string)$string,

                        $this->‪getKeepTags('db'),

                        $this->procOptions['HTMLparser_db.']['keepNonMatchedTags'] ?? '',

                        (int)($this->procOptions['HTMLparser_db.']['htmlSpecialChars'] ?? 0)

                    );

                    $blockSplit[$k] = (string)$this->‪divideIntoLines($string);

                } else {

                    unset($blockSplit[$k]);

                }

            }

        }

        $this->TS_transform_db_safecounter++;

        return implode(LF, $blockSplit);

    }


    protected function ‪TS_transform_rte(string $value): string

    {

        // Split the content from database by the occurrence of the block elements

        $blockSplit = $this->‪splitIntoBlock($this->blockElementList, $value);

        // Traverse the blocks

        foreach ($blockSplit as $k => $v) {

            if ($k % 2) {

                // Inside one of the blocks:

                // Init:

                $tag = $this->‪getFirstTag($v);

                $tagName = strtolower($this->‪getFirstTagName($v));

                // Based on tagname, we do transformations:

                switch ($tagName) {

                    case 'blockquote':

                    case 'dd':

                    case 'div':

                    case 'header':

                    case 'section':

                    case 'footer':

                    case 'nav':

                    case 'article':

                    case 'aside':

                        $blockSplit[$k] = $tag . $this->‪TS_transform_rte($this->‪removeFirstAndLastTag($blockSplit[$k])) . '</' . $tagName . '>';

                        break;

                }

                if (!isset($blockSplit[$k + 1])) {

                    $blockSplit[$k + 1] = '';

                }

                $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]);

            } else {

                // NON-block:

                $nextFTN = $this->‪getFirstTagName($blockSplit[$k + 1] ?? '');

                $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1);

                // If the line is followed by a block or is the last line:

                if (‪GeneralUtility::inList($this->blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) {

                    // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1

                    if (!$onlyLineBreaks) {

                        $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]);

                    } else {

                        // If the line contains only linebreaks, remove the leading linebreak

                        $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]);

                    }

                }

                // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks

                if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) {

                    unset($blockSplit[$k]);

                } else {

                    $blockSplit[$k] = $this->‪setDivTags($blockSplit[$k]);

                }

            }

        }

        return implode(LF, $blockSplit);

    }


    /***************************************************************

     *

     * Generic RTE transformation, analysis and helper functions

     *

     **************************************************************/


    protected function ‪HTMLcleaner_db(string $content): string

    {

        $keepTags = $this->‪getKeepTags('db');

        return $this->‪HTMLcleaner($content, $keepTags, false);

    }


    protected function ‪getKeepTags(string $direction): array

    {

        if (!isset($this->getKeepTags_cache[$direction]) || !is_array($this->getKeepTags_cache[$direction])) {

            // Setting up allowed tags:

            // Default is to get allowed/denied tags from internal array of processing options:

            // Construct default list of tags to keep:

            if (isset($this->procOptions['allowTags.']) && is_array($this->procOptions['allowTags.'])) {

                $keepTags = implode(',', $this->procOptions['allowTags.']);

            } else {

                $keepTags = $this->procOptions['allowTags'] ?? '';

            }

            $keepTags = array_flip(‪GeneralUtility::trimExplode(',', $this->defaultAllowedTagsList . ',' . strtolower($keepTags), true));

            // For tags to deny, remove them from $keepTags array:

            $denyTags = ‪GeneralUtility::trimExplode(',', $this->procOptions['denyTags'] ?? '', true);

            foreach ($denyTags as $dKe) {

                unset($keepTags[$dKe]);

            }

            // Based on the direction of content, set further options:

            switch ($direction) {

                case 'rte':

                    // Transforming keepTags array so it can be understood by the HTMLcleaner function.

                    // This basically converts the format of the array from TypoScript (having dots) to plain multi-dimensional array.

                    [$keepTags] = $this->‪HTMLparserConfig($this->procOptions['HTMLparser_rte.'] ?? [], $keepTags);

                    break;

                case 'db':

                    // Setting up span tags if they are allowed:

                    if (isset($keepTags['span'])) {

                        $keepTags['span'] = [

                            'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop',

                            'fixAttrib' => [

                                'class' => [

                                    'removeIfFalse' => 1,

                                ],

                            ],

                            'rmTagIfNoAttrib' => 1,

                        ];

                        if (!empty($this->allowedClasses)) {

                            $keepTags['span']['fixAttrib']['class']['list'] = ‪$this->allowedClasses;

                        }

                    }

                    // Setting further options, getting them from the processing options

                    $TSc = $this->procOptions['HTMLparser_db.'] ?? [];

                    if (empty($TSc['globalNesting'])) {

                        $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';

                    }

                    if (empty($TSc['noAttrib'])) {

                        $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';

                    }

                    // Transforming the array from TypoScript to regular array:

                    [$keepTags] = $this->‪HTMLparserConfig($TSc, $keepTags);

                    break;

            }

            // Caching (internally, in object memory) the result

            $this->getKeepTags_cache[$direction] = $keepTags;

        }

        // Return result:

        return $this->getKeepTags_cache[$direction];

    }


    protected function ‪divideIntoLines(string $value, int $count = 5, bool $returnArray = false)

    {

        // Setting the third param will eliminate false end-tags. Maybe this is a good thing to do...?

        $paragraphBlocks = $this->‪splitIntoBlock('p', $value, true);

        // Returns plainly the content if there was no p sections in it

        if (count($paragraphBlocks) <= 1 || $count <= 0) {

            return $this->‪sanitizeLineBreaksForContentOnly($value);

        }


        // Traverse the splitted sections

        foreach ($paragraphBlocks as $k => $v) {

            if ($k % 2) {

                // Inside a <p> section

                $v = $this->‪removeFirstAndLastTag($v);

                // Fetching 'sub-lines' - which will explode any further p nesting recursively

                $subLines = $this->‪divideIntoLines($v, $count - 1, true);

                // So, if there happened to be sub-nesting of p, this is written directly as the new content of THIS section. (This would be considered 'an error')

                if (is_array($subLines)) {

                    $paragraphBlocks[$k] = implode(LF, $subLines);

                } else {

                    //... but if NO subsection was found, we process it as a TRUE line without erroneous content:

                    $paragraphBlocks[$k] = $this->‪processContentWithinParagraph($subLines, $paragraphBlocks[$k]);

                }

                // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.

                // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.

                // Those attributes should have been filtered before; if they are still there they must be considered as possible content.

                if (trim(strip_tags($paragraphBlocks[$k])) === '&nbsp;' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $paragraphBlocks[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($paragraphBlocks[$k]))) {

                    $paragraphBlocks[$k] = '';

                }

            } else {

                // Outside a paragraph, if there is still something in there, just add a <p> tag

                // Remove positions which are outside <p> tags and without content

                $paragraphBlocks[$k] = trim(strip_tags($paragraphBlocks[$k], '<' . implode('><', $this->allowedTagsOutsideOfParagraphs) . '>'));

                $paragraphBlocks[$k] = $this->‪sanitizeLineBreaksForContentOnly($paragraphBlocks[$k]);

                if ((string)$paragraphBlocks[$k] === '') {

                    unset($paragraphBlocks[$k]);

                } else {

                    // add <p> tags around the content

                    $paragraphBlocks[$k] = str_replace(strip_tags($paragraphBlocks[$k]), '<p>' . strip_tags($paragraphBlocks[$k]) . '</p>', $paragraphBlocks[$k]);

                }

            }

        }

        return $returnArray ? $paragraphBlocks : implode(LF, $paragraphBlocks);

    }


    protected function ‪setDivTags(string $value): string

    {

        // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE

        $keepTags = $this->‪getKeepTags('rte');

        // Divide the content into lines

        $parts = explode(LF, $value);

        foreach ($parts as $k => $v) {

            // Processing of line content:

            // If the line is blank, set it to &nbsp;

            if (trim($parts[$k]) === '') {

                $parts[$k] = '&nbsp;';

            } else {

                // Clean the line content, keeping unknown tags (as they can be removed in the entryHTMLparser)

                $parts[$k] = $this->‪HTMLcleaner($parts[$k], $keepTags, 'protect');

                // convert double-encoded &nbsp; into regular &nbsp; however this could also be reversed via the exitHTMLparser

                // This was previously an option to disable called "dontConvAmpInNBSP_rte"

                $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);

            }

            $partFirstTagName = strtolower($this->‪getFirstTagName($parts[$k] ?? ''));

            // Wrapping the line in <p> tags if not already wrapped and does not contain an hr tag and is not allowed outside of paragraphs.

            if (!in_array($partFirstTagName, $this->allowedTagsOutsideOfParagraphs, true) && !preg_match('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', $partFirstTagName)) {

                $testStr = strtolower(trim($parts[$k]));

                if (!str_starts_with($testStr, '<div') || !str_ends_with($testStr, '</div>')) {

                    if (!str_starts_with($testStr, '<p') || !str_ends_with($testStr, '</p>')) {

                        // Only set p-tags if there is not already div or p tags:

                        $parts[$k] = '<p>' . $parts[$k] . '</p>';

                    }

                }

            }

        }

        // Implode result:

        return implode(LF, $parts);

    }


    protected function ‪processContentWithinParagraph(string $content, string $fullContentWithTag): string

    {

        // clean up the content

        $content = $this->‪HTMLcleaner_db($content);

        // Get the <p> tag, and validate the attributes

        $fTag = $this->‪getFirstTag($fullContentWithTag);

        // Check which attributes of the <p> tag to keep attributes

        if (!empty($this->allowedAttributesForParagraphTags)) {

            [$tagAttributes] = $this->‪get_tag_attributes($fTag);

            // Make sure the tag attributes only contain the ones that are defined to be allowed

            $tagAttributes = array_intersect_key($tagAttributes, array_flip($this->allowedAttributesForParagraphTags));


            // Only allow classes that are whitelisted in $this->allowedClasses

            if (isset($tagAttributes['class']) && trim($tagAttributes['class']) !== '' && !empty($this->allowedClasses) && !in_array($tagAttributes['class'], $this->allowedClasses, true)) {

                $classes = ‪GeneralUtility::trimExplode(' ', $tagAttributes['class'], true);

                $classes = array_intersect($classes, $this->allowedClasses);

                if (!empty($classes)) {

                    $tagAttributes['class'] = implode(' ', $classes);

                } else {

                    unset($tagAttributes['class']);

                }

            }

        } else {

            $tagAttributes = [];

        }

        // Remove any line break

        $content = str_replace(LF, '', $content);

        // Compile the surrounding <p> tag

        $content = '<' . rtrim('p ' . $this->‪compileTagAttribs($tagAttributes)) . '>' . $content . '</p>';

        return $content;

    }


    protected function ‪sanitizeLineBreaksForContentOnly(string $content): string

    {

        $content = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $content) ?? $content;

        $content = str_replace(LF . LF, LF, $content);

        $content = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $content) ?? $content;

        return $content;

    }


    protected function ‪streamlineLineBreaksForProcessing(string $content): string

    {

        return str_replace(CR, '', $content);

    }


    protected function ‪streamlineLineBreaksAfterProcessing(string $content): string

    {

        // Make sure no \r\n sequences has entered in the meantime

        $content = $this->‪streamlineLineBreaksForProcessing($content);

        // ... and then change all \n into \r\n

        return str_replace(LF, CRLF, $content);

    }


    protected function ‪markBrokenLinks(string $content): string

    {

        $blocks = $this->‪splitIntoBlock('A', $content);

        $linkService = GeneralUtility::makeInstance(LinkService::class);

        foreach ($blocks as $position => $value) {

            if ($position % 2 === 0) {

                continue;

            }

            [$attributes] = $this->‪get_tag_attributes($this->‪getFirstTag($value), true);

            if (empty($attributes['href'])) {

                continue;

            }


            try {

                $hrefInformation = $linkService->resolve($attributes['href']);


                $brokenLinkAnalysis = new ‪BrokenLinkAnalysisEvent($hrefInformation['type'], $hrefInformation);

                $this->eventDispatcher->dispatch($brokenLinkAnalysis);

                if ($brokenLinkAnalysis->isBrokenLink()) {

                    $attributes['data-rte-error'] = $brokenLinkAnalysis->getReason();

                }

            } catch (‪InsufficientFolderAccessPermissionsException $e) {

                // do nothing if user doesn't have access to the file/folder

            } catch (‪UnknownLinkHandlerException $e) {

                $attributes['data-rte-error'] = $e->getMessage();

            }


            // Always rewrite the block to allow the nested calling even if a page is found

            $blocks[$position] =

                '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'

                . $this->‪markBrokenLinks($this->‪removeFirstAndLastTag($blocks[$position]))

                . '</a>';

        }

        return implode('', $blocks);

    }


    protected function ‪removeBrokenLinkMarkers(string $content): string

    {

        $blocks = $this->‪splitIntoBlock('A', $content);

        foreach ($blocks as $position => $value) {

            if ($position % 2 === 0) {

                continue;

            }

            [$attributes] = $this->‪get_tag_attributes($this->‪getFirstTag($value), true);

            if (empty($attributes['href'])) {

                continue;

            }

            // Always remove the styling again (regardless of the page was found or not)

            // so the database does not contain ugly stuff

            unset($attributes['data-rte-error']);

            if (isset($attributes['style'])) {

                $attributes['style'] = trim(str_replace('background-color: yellow; border:2px red solid; color: black;', '', $attributes['style']));

                if (empty($attributes['style'])) {

                    unset($attributes['style']);

                }

            }

            $blocks[$position] =

                '<a ' . GeneralUtility::implodeAttributes($attributes, true, true) . '>'

                . $this->‪removeBrokenLinkMarkers($this->‪removeFirstAndLastTag($blocks[$position]))

                . '</a>';

        }

        return implode('', $blocks);

    }


    protected function ‪htmlSanitize(string $content, array $configuration): string

    {

        $features = GeneralUtility::makeInstance(Features::class);

        // either `htmlSanitize = null` or `htmlSanitize = false`

        // or feature flag `security.backend.htmlSanitizeRte` is disabled

        if (array_key_exists('htmlSanitize', $configuration) && empty($configuration['htmlSanitize'])

            || !$features->isFeatureEnabled('security.backend.htmlSanitizeRte')

        ) {

            return $content;

        }


        $build = $configuration['htmlSanitize.']['build'] ?? 'default';

        if (class_exists($build) && is_a($build, BuilderInterface::class, true)) {

            $builder = GeneralUtility::makeInstance($build);

        } else {

            $factory = GeneralUtility::makeInstance(SanitizerBuilderFactory::class);

            $builder = $factory->build($build);

        }

        $sanitizer = $builder->build();

        $initiator = GeneralUtility::makeInstance(SanitizerInitiator::class, static::class);

        return $sanitizer->sanitize($content, $initiator);

    }

}