‪TYPO3CMS  ‪main
Typo3XmlParser.php
Go to the documentation of this file.
1 <?php
2 
3 declare(strict_types=1);
4 
5 /*
6  * This file is part of the TYPO3 CMS project.
7  *
8  * It is free software; you can redistribute it and/or modify it under
9  * the terms of the GNU General Public License, either version 2
10  * of the License, or any later version.
11  *
12  * For the full copyright and license information, please read the
13  * LICENSE.txt file that was distributed with this source code.
14  *
15  * The TYPO3 project - inspiring people to share!
16  */
17 
19 
22 
38 {
50  string $xml,
51  ‪Typo3XmlSerializerOptions $options = null
52  ): array|string {
53  try {
54  return $this->‪decode($xml, $options);
55  } catch (\Throwable $e) {
56  return $e->getMessage();
57  }
58  }
59 
66  public function ‪decode(
67  string $xml,
68  ‪Typo3XmlSerializerOptions $options = null
69  ): array|string {
70  $xml = trim($xml);
71  if ($xml === '') {
72  throw new ‪InvalidDataException(
73  'Invalid XML data, it can not be empty.',
74  1630773210
75  );
76  }
77 
78  $options = $options ?? new ‪Typo3XmlSerializerOptions();
79 
80  if ($options->allowUndefinedNamespaces()) {
81  $xml = $this->‪disableNamespaceInNodeNames($xml);
82  }
83 
84  $internalErrors = libxml_use_internal_errors(true);
85  libxml_clear_errors();
86 
87  $dom = new \DOMDocument();
88  $dom->loadXML($xml, $options->getLoadOptions());
89 
90  libxml_use_internal_errors($internalErrors);
91 
92  if ($error = libxml_get_last_error()) {
93  libxml_clear_errors();
94  throw new ‪InvalidDataException(
95  'Line ' . $error->line . ': ' . xml_error_string($error->code),
96  1630773230
97  );
98  }
99 
100  $rootNode = null;
101  foreach ($dom->childNodes as $child) {
102  if ($child->nodeType === \XML_DOCUMENT_TYPE_NODE) {
103  throw new ‪InvalidDataException(
104  'Document types are not allowed.',
105  1630773261
106  );
107  }
108  if (in_array($child->nodeType, $options->getIgnoredNodeTypes(), true)) {
109  continue;
110  }
111  $rootNode = $child;
112  break;
113  }
114  if ($rootNode === null) {
115  throw new ‪InvalidDataException(
116  'Root node cannot be determined.',
117  1630773276
118  );
119  }
120 
121  $rootNodeName = $rootNode->nodeName;
122  if ($options->allowUndefinedNamespaces()) {
123  $rootNodeName = $this->‪reactivateNamespaceInNodeName($rootNodeName);
124  }
125  if (!$rootNode->hasChildNodes()) {
126  if ($options->includeRootNode()) {
127  $result = [$rootNodeName => $rootNode->nodeValue];
128  } else {
129  $result = $rootNode->nodeValue;
130  }
131  } else {
132  if ($options->includeRootNode()) {
133  $result = [$rootNodeName => $this->‪parseXml($rootNode, $options)];
134  } else {
135  $result = $this->‪parseXml($rootNode, $options);
136  }
137  }
138  if ($options->returnRootNodeName() && is_array($result)) {
139  $result['_DOCUMENT_TAG'] = $rootNodeName;
140  }
141 
142  return $result;
143  }
144 
149  protected function ‪disableNamespaceInNodeNames(string $value): string
150  {
151  return preg_replace(
152  ['#<([/]?)([[:alnum:]_-]*):([[:alnum:]_-]*)([ >]?)#'],
153  ['<$1$2___$3$4'],
154  $value
155  );
156  }
157 
161  protected function ‪reactivateNamespaceInNodeNames(string $value): string
162  {
163  if (!str_contains($value, '___')) {
164  return $value;
165  }
166 
167  return preg_replace(
168  ['#<([/]?)([[:alnum:]_-]*)___([[:alnum:]_-]*)([ >]?)#'],
169  ['<$1$2:$3$4'],
170  $value
171  );
172  }
173 
177  protected function ‪reactivateNamespaceInNodeName(string $value): string
178  {
179  return str_replace('___', ':', $value);
180  }
181 
182  protected function ‪parseXml(\DOMNode $node, ‪Typo3XmlSerializerOptions $options): array|string|null
183  {
184  if (!$node->hasChildNodes()) {
185  return $node->nodeValue;
186  }
187 
188  if ($node->childNodes->length === 1
189  && in_array($node->firstChild->nodeType, [\XML_TEXT_NODE, \XML_CDATA_SECTION_NODE])
190  ) {
191  $value = $node->firstChild->nodeValue;
192  if ($options->‪allowUndefinedNamespaces()) {
193  $value = $this->‪reactivateNamespaceInNodeNames($value);
194  }
195  return $value;
196  }
197 
198  $result = [];
199  foreach ($node->childNodes as $child) {
200  if (in_array($child->nodeType, $options->‪getIgnoredNodeTypes(), true)) {
201  continue;
202  }
203 
204  $value = $this->‪parseXml($child, $options);
205 
206  if ($child instanceof \DOMElement && $child->hasAttribute('index')) {
207  $key = $child->getAttribute('index');
208  } else {
209  $key = $child->nodeName;
210  if ($options->‪allowUndefinedNamespaces()) {
211  $key = $this->‪reactivateNamespaceInNodeName($key);
212  }
213  if ($options->‪hasNamespacePrefix()
214  && str_starts_with($key, $options->‪getNamespacePrefix())
215  ) {
216  $key = substr($key, strlen($options->‪getNamespacePrefix()));
217  }
218  if (str_starts_with($key, 'n')
219  && ‪MathUtility::canBeInterpretedAsInteger($index = substr($key, 1))
220  ) {
221  $key = (int)$index;
222  }
223  }
224 
225  if ($child instanceof \DOMElement && $child->hasAttribute('base64') && is_string($value)) {
226  $value = base64_decode($value);
227  } elseif ($child instanceof \DOMElement && $child->hasAttribute('type')) {
228  switch ($child->getAttribute('type')) {
229  case 'integer':
230  $value = (int)$value;
231  break;
232  case 'double':
233  $value = (float)$value;
234  break;
235  case 'boolean':
236  $value = (bool)$value;
237  break;
238  case 'NULL':
239  $value = null;
240  break;
241  case 'array':
242  $value = is_array($value) ? $value : (empty(trim($value)) ? [] : (array)$value);
243  break;
244  }
245  }
246  $result[$key] = $value;
247  }
248  return $result;
249  }
250 }
‪TYPO3\CMS\Core\Serializer\Typo3XmlSerializerOptions\getIgnoredNodeTypes
‪getIgnoredNodeTypes()
Definition: Typo3XmlSerializerOptions.php:55
‪TYPO3\CMS\Core\Serializer\Typo3XmlParser\reactivateNamespaceInNodeName
‪reactivateNamespaceInNodeName(string $value)
Definition: Typo3XmlParser.php:177
‪TYPO3\CMS\Core\Serializer\Typo3XmlSerializerOptions
Definition: Typo3XmlSerializerOptions.php:24
‪TYPO3\CMS\Core\Serializer\Typo3XmlSerializerOptions\hasNamespacePrefix
‪hasNamespacePrefix()
Definition: Typo3XmlSerializerOptions.php:63
‪TYPO3\CMS\Core\Serializer\Typo3XmlParser
Definition: Typo3XmlParser.php:38
‪TYPO3\CMS\Core\Serializer\Exception\InvalidDataException
Definition: InvalidDataException.php:25
‪TYPO3\CMS\Core\Serializer\Typo3XmlParser\decode
‪array string decode(string $xml, Typo3XmlSerializerOptions $options=null)
Definition: Typo3XmlParser.php:66
‪TYPO3\CMS\Core\Utility\MathUtility\canBeInterpretedAsInteger
‪static bool canBeInterpretedAsInteger(mixed $var)
Definition: MathUtility.php:69
‪TYPO3\CMS\Core\Serializer\Typo3XmlParser\parseXml
‪parseXml(\DOMNode $node, Typo3XmlSerializerOptions $options)
Definition: Typo3XmlParser.php:182
‪TYPO3\CMS\Core\Serializer\Typo3XmlSerializerOptions\allowUndefinedNamespaces
‪allowUndefinedNamespaces()
Definition: Typo3XmlSerializerOptions.php:71
‪TYPO3\CMS\Core\Serializer\Typo3XmlParser\decodeWithReturningExceptionAsString
‪array string decodeWithReturningExceptionAsString(string $xml, Typo3XmlSerializerOptions $options=null)
Definition: Typo3XmlParser.php:49
‪TYPO3\CMS\Core\Serializer\Typo3XmlParser\reactivateNamespaceInNodeNames
‪reactivateNamespaceInNodeNames(string $value)
Definition: Typo3XmlParser.php:161
‪TYPO3\CMS\Core\Serializer\Typo3XmlParser\disableNamespaceInNodeNames
‪disableNamespaceInNodeNames(string $value)
Definition: Typo3XmlParser.php:149
‪TYPO3\CMS\Core\Serializer\Typo3XmlSerializerOptions\getNamespacePrefix
‪getNamespacePrefix()
Definition: Typo3XmlSerializerOptions.php:67
‪TYPO3\CMS\Core\Serializer
‪TYPO3\CMS\Core\Utility\MathUtility
Definition: MathUtility.php:24