‪TYPO3CMS  11.5
HtmlWorker.php
Go to the documentation of this file.
1 <?php
2 
3 declare(strict_types=1);
4 
5 /*
6  * This file is part of the TYPO3 CMS project.
7  *
8  * It is free software; you can redistribute it and/or modify it under
9  * the terms of the GNU General Public License, either version 2
10  * of the License, or any later version.
11  *
12  * For the full copyright and license information, please read the
13  * LICENSE.txt file that was distributed with this source code.
14  *
15  * The TYPO3 project - inspiring people to share!
16  */
17 
19 
20 use DOMDocument;
21 use DOMDocumentFragment;
22 use DOMElement;
23 use DOMNode;
24 use DOMXPath;
25 use ‪Exception;
26 use Masterminds\HTML5;
29 
33 class HtmlWorker
34 {
39  public const REMOVE_TAG_ON_FAILURE = 1;
40 
45  public const REMOVE_ATTR_ON_FAILURE = 2;
46 
51  public const REMOVE_ENCLOSURE_ON_FAILURE = 4;
52 
53  protected ‪LinkResultFactory $linkResultFactory;
54  protected HTML5 ‪$parser;
55 
56  protected ?DOMNode $mount = null;
57  protected ?DOMDocument $document = null;
58 
59  public function __construct(‪LinkResultFactory $linkResultFactory, HTML5 ‪$parser)
60  {
61  $this->linkResultFactory = $linkResultFactory;
62  $this->parser = ‪$parser;
63  }
64 
65  public function __toString(): string
66  {
67  if (!$this->mount instanceof DOMNode || !$this->document instanceof DOMDocument) {
68  return '';
69  }
70  return $this->parser->saveHTML($this->mount->childNodes);
71  }
72 
73  public function parse(string $html): self
74  {
75  // use document fragment to separate markup from default structure (html, body, ...)
76  $fragment = $this->parser->parseFragment($html);
77  // mount fragment to make it accessible in current document
78  $this->mount = $this->mountFragment($fragment);
79  $this->document = $this->mount->ownerDocument;
80  return $this;
81  }
82 
83  public function transformUri(string $selector, int $flags = 0): self
84  {
85  if (!$this->mount instanceof DOMNode || !$this->document instanceof DOMDocument) {
86  return $this;
87  }
88  $subjects = $this->parseSelector($selector);
89  // use xpath to traverse potential candidates having "links"
90  $xpath = new DOMXPath($this->document);
91  foreach ($subjects as $subject) {
92  $attrName = $subject['attr'];
93  $expression = sprintf('//%s[@%s]', $subject['node'], $attrName);
95  foreach ($xpath->query($expression, $this->mount) as $element) {
96  $elementAttrValue = $element->getAttribute($attrName);
97  $scheme = parse_url($elementAttrValue, PHP_URL_SCHEME);
98  // skip values not having a URI-scheme
99  if (empty($scheme)) {
100  continue;
101  }
102  try {
103  $linkResult = $this->linkResultFactory->createFromUriString($elementAttrValue);
104  } catch (Exception $exception) {
105  // @todo: Link building should be checked and adapted to throw only specific exceptions
106  // which can then be caught here. Catching generic Exception hides programmatic
107  // exceptions, which can be hard to track down.
108  $this->onTransformUriFailure($element, $subject, $flags);
109  continue;
110  }
111  $linkResultAttrValues = array_filter($linkResult->getAttributes());
112  // usually link results contain `href` attr value, which needs to be assigned
113  // to a different value in case selector (e.g. `img.src` instead f `a.href`)
114  if (isset($linkResultAttrValues['href']) && $attrName !== 'href') {
115  $element->setAttribute($attrName, $linkResultAttrValues['href']);
116  unset($linkResultAttrValues['href']);
117  }
118  foreach ($linkResultAttrValues as $name => $value) {
119  $element->setAttribute($name, (string)$value);
120  }
121  }
122  }
123  return $this;
124  }
125 
131  protected function onTransformUriFailure(DOMElement $element, array $subject, int $flags): void
132  {
133  if (($flags & self::REMOVE_TAG_ON_FAILURE) === self::REMOVE_TAG_ON_FAILURE) {
134  $element->parentNode->removeChild($element);
135  } elseif (($flags & self::REMOVE_ATTR_ON_FAILURE) === self::REMOVE_ATTR_ON_FAILURE) {
136  $attrName = $subject['attr'];
137  $element->removeAttribute($attrName);
138  } elseif (($flags & self::REMOVE_ENCLOSURE_ON_FAILURE) === self::REMOVE_ENCLOSURE_ON_FAILURE) {
139  // moves children out of element's enclosure, then removes (empty) element
140  // eg `<ELEMENT><a><b><c></ELEMENT><NEXT>`
141  // 1) `<ELEMENT><b><c></ELEMENT><a><NEXT>`
142  // 2) `<ELEMENT><c></ELEMENT><a><b><NEXT>`
143  // 3) `<ELEMENT></ELEMENT><a><b><c><NEXT>`
144  // rm `<a><b><c><NEXT>`
145  foreach ($element->childNodes as $child) {
146  $element->parentNode->insertBefore($child, $element->nextSibling);
147  }
148  $element->parentNode->removeChild($element);
149  }
150  }
151 
156  protected function parseSelector(string $selector): array
157  {
158  $items = ‪GeneralUtility::trimExplode(',', $selector, true);
159  $items = array_map(
160  static function (string $item): ?array {
161  $parts = explode('.', $item);
162  if (count($parts) !== 2) {
163  return null;
164  }
165  return [
166  'node' => $parts[0] ?: '*',
167  'attr' => $parts[1],
168  ];
169  },
170  $items
171  );
172  return array_filter($items);
173  }
174 
175  protected function mountFragment(DOMDocumentFragment $fragment): DOMNode
176  {
177  $document = $fragment->ownerDocument;
178  $mount = $document->createElement('div');
179  $document->appendChild($mount);
180  if ($fragment->hasChildNodes()) {
181  $mount->appendChild($fragment);
182  }
183  return $mount;
184  }
185 }
‪TYPO3\CMS\Core\Utility\GeneralUtility\trimExplode
‪static list< string > trimExplode($delim, $string, $removeEmptyValues=false, $limit=0)
Definition: GeneralUtility.php:999
‪TYPO3\CMS\Frontend\Html
Definition: HtmlWorker.php:18
‪$parser
‪$parser
Definition: annotationChecker.php:110
‪TYPO3\CMS\Frontend\Exception
Definition: Exception.php:23
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:50