‪TYPO3CMS  ‪main
HtmlWorker.php
Go to the documentation of this file.
1 <?php
2 
3 declare(strict_types=1);
4 
5 /*
6  * This file is part of the TYPO3 CMS project.
7  *
8  * It is free software; you can redistribute it and/or modify it under
9  * the terms of the GNU General Public License, either version 2
10  * of the License, or any later version.
11  *
12  * For the full copyright and license information, please read the
13  * LICENSE.txt file that was distributed with this source code.
14  *
15  * The TYPO3 project - inspiring people to share!
16  */
17 
19 
20 use Masterminds\HTML5;
25 
29 class HtmlWorker
30 {
35  public const REMOVE_TAG_ON_FAILURE = 1;
36 
41  public const REMOVE_ATTR_ON_FAILURE = 2;
42 
47  public const REMOVE_ENCLOSURE_ON_FAILURE = 4;
48 
49  protected ?\DOMNode $mount = null;
50  protected ?\DOMDocument $document = null;
51 
52  public function __construct(
53  protected readonly ‪LinkFactory $linkFactory,
54  protected readonly HTML5 ‪$parser
55  ) {}
56 
57  public function __toString(): string
58  {
59  if (!$this->mount instanceof \DOMNode || !$this->document instanceof \DOMDocument) {
60  return '';
61  }
62  return $this->parser->saveHTML($this->mount->childNodes);
63  }
64 
65  public function parse(string $html): self
66  {
67  // use document fragment to separate markup from default structure (html, body, ...)
68  $fragment = $this->parser->parseFragment($html);
69  // mount fragment to make it accessible in current document
70  $this->mount = $this->mountFragment($fragment);
71  $this->document = $this->mount->ownerDocument;
72  return $this;
73  }
74 
79  public function addNonceAttribute(string|ConsumableNonce $nonce, string ...$nodeNames): self
80  {
81  if ($nodeNames === []) {
82  return $this;
83  }
84  $xpath = new \DOMXPath($this->document);
85  foreach ($nodeNames as $nodeName) {
86  $expression = sprintf('//%s[not(@*)]', $nodeName);
88  foreach ($xpath->query($expression, $this->mount) as $element) {
89  $element->setAttribute('nonce', (string)$nonce);
90  }
91  }
92  return $this;
93  }
94 
95  public function transformUri(string $selector, int $flags = 0): self
96  {
97  if (!$this->mount instanceof \DOMNode || !$this->document instanceof \DOMDocument) {
98  return $this;
99  }
100  $subjects = $this->parseSelector($selector);
101  // use xpath to traverse potential candidates having "links"
102  $xpath = new \DOMXPath($this->document);
103  foreach ($subjects as $subject) {
104  $attrName = $subject['attr'];
105  $expression = sprintf('//%s[@%s]', $subject['node'], $attrName);
107  foreach ($xpath->query($expression, $this->mount) as $element) {
108  $elementAttrValue = $element->getAttribute($attrName);
109  $scheme = parse_url($elementAttrValue, PHP_URL_SCHEME);
110  // skip values not having a URI-scheme
111  if (empty($scheme)) {
112  continue;
113  }
114  try {
115  $linkResult = $this->linkFactory->createUri($elementAttrValue);
116  } catch (UnableToLinkException $exception) {
117  $this->onTransformUriFailure($element, $subject, $flags);
118  continue;
119  }
120  $linkResultAttrValues = array_filter($linkResult->getAttributes());
121  // usually link results contain `href` attr value, which needs to be assigned
122  // to a different value in case selector (e.g. `img.src` instead f `a.href`)
123  if (isset($linkResultAttrValues['href']) && $attrName !== 'href') {
124  $element->setAttribute($attrName, $linkResultAttrValues['href']);
125  unset($linkResultAttrValues['href']);
126  }
127  foreach ($linkResultAttrValues as $name => $value) {
128  $element->setAttribute($name, (string)$value);
129  }
130  }
131  }
132  return $this;
133  }
134 
139  protected function onTransformUriFailure(\DOMElement $element, array $subject, int $flags): void
140  {
141  if (($flags & self::REMOVE_TAG_ON_FAILURE) === self::REMOVE_TAG_ON_FAILURE) {
142  $element->parentNode->removeChild($element);
143  } elseif (($flags & self::REMOVE_ATTR_ON_FAILURE) === self::REMOVE_ATTR_ON_FAILURE) {
144  $attrName = $subject['attr'];
145  $element->removeAttribute($attrName);
146  } elseif (($flags & self::REMOVE_ENCLOSURE_ON_FAILURE) === self::REMOVE_ENCLOSURE_ON_FAILURE) {
147  // moves children out of element's enclosure, then removes (empty) element
148  // eg `<ELEMENT><a><b><c></ELEMENT><NEXT>`
149  // 1) `<ELEMENT><b><c></ELEMENT><a><NEXT>`
150  // 2) `<ELEMENT><c></ELEMENT><a><b><NEXT>`
151  // 3) `<ELEMENT></ELEMENT><a><b><c><NEXT>`
152  // rm `<a><b><c><NEXT>`
153  foreach ($element->childNodes as $child) {
154  $element->parentNode->insertBefore($child, $element->nextSibling);
155  }
156  $element->parentNode->removeChild($element);
157  }
158  }
159 
163  protected function parseSelector(string $selector): array
164  {
165  $items = ‪GeneralUtility::trimExplode(',', $selector, true);
166  $items = array_map(
167  static function (string $item): ?array {
168  $parts = explode('.', $item);
169  if (count($parts) !== 2) {
170  return null;
171  }
172  return [
173  'node' => $parts[0] ?: '*',
174  'attr' => $parts[1],
175  ];
176  },
177  $items
178  );
179  return array_filter($items);
180  }
181 
182  protected function mountFragment(\DOMDocumentFragment $fragment): \DOMNode
183  {
184  $document = $fragment->ownerDocument;
185  $mount = $document->createElement('div');
186  $document->appendChild($mount);
187  if ($fragment->hasChildNodes()) {
188  $mount->appendChild($fragment);
189  }
190  return $mount;
191  }
192 }
‪TYPO3\CMS\Frontend\Html
Definition: HtmlWorker.php:18
‪TYPO3\CMS\Core\Security\ContentSecurityPolicy\ConsumableNonce
Definition: ConsumableNonce.php:24
‪$parser
‪$parser
Definition: annotationChecker.php:103
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:52
‪TYPO3\CMS\Core\Utility\GeneralUtility\trimExplode
‪static list< string > trimExplode(string $delim, string $string, bool $removeEmptyValues=false, int $limit=0)
Definition: GeneralUtility.php:822