‪TYPO3CMS  ‪main
CharsetConverterTest.php
Go to the documentation of this file.
1 <?php
2 
3 declare(strict_types=1);
4 
5 /*
6  * This file is part of the TYPO3 CMS project.
7  *
8  * It is free software; you can redistribute it and/or modify it under
9  * the terms of the GNU General Public License, either version 2
10  * of the License, or any later version.
11  *
12  * For the full copyright and license information, please read the
13  * LICENSE.txt file that was distributed with this source code.
14  *
15  * The TYPO3 project - inspiring people to share!
16  */
17 
19 
20 use PHPUnit\Framework\Attributes\DataProvider;
21 use PHPUnit\Framework\Attributes\Test;
23 use TYPO3\TestingFramework\Core\Unit\UnitTestCase;
24 
25 final class ‪CharsetConverterTest extends UnitTestCase
26 {
27  #[Test]
28  public function ‪utf8DecodeACharacterToAscii(): void
29  {
30  $charsetConverter = new ‪CharsetConverter();
31 
32  $string = "\x41"; // A
33  self::assertSame(1, mb_strlen($string));
34  self::assertSame(1, strlen($string));
35  self::assertSame('UTF-8', mb_detect_encoding($string, ['UTF-8', 'ASCII']));
36 
37  // test decoding to ascii
38  self::assertSame('A', $charsetConverter->utf8_decode($string, 'ascii'));
39  self::assertSame('A', $charsetConverter->utf8_decode($string, 'ascii', true));
40 
41  $targetString = $charsetConverter->utf8_decode($string, 'ascii');
42  self::assertSame('ASCII', mb_detect_encoding($targetString, ['ASCII', 'UTF-8']));
43  }
44 
45  #[Test]
46  public function ‪utf8DecodeACharacterToIso885915(): void
47  {
48  $charsetConverter = new ‪CharsetConverter();
49 
50  $string = "\xE2\x82\xAC"; // €
51  self::assertSame(1, mb_strlen($string));
52  self::assertSame(3, strlen($string));
53  self::assertSame('UTF-8', mb_detect_encoding($string, ['ASCII', 'UTF-8']));
54 
55  // test decoding to ascii
56  self::assertSame('?', $charsetConverter->utf8_decode($string, 'ascii'));
57  self::assertSame('&#x20ac;', $charsetConverter->utf8_decode($string, 'ascii', true));
58 
59  // test decoding to iso-8859-15
60  $targetString = $charsetConverter->utf8_decode($string, 'iso-8859-15');
61  self::assertSame('ISO-8859-15', mb_detect_encoding($targetString, ['ASCII', 'UTF-8', 'ISO-8859-15']));
62  self::assertNotSame($string, $targetString);
63  }
64 
65  #[Test]
67  {
68  $charsetConverter = new ‪CharsetConverter();
69 
70  $string = "\xE2\x82\xAC"; // €
71  self::assertSame(1, mb_strlen($string));
72  self::assertSame(3, strlen($string));
73  self::assertSame('UTF-8', mb_detect_encoding($string, ['ASCII', 'UTF-8']));
74 
75  // test decoding to ascii
76  self::assertSame('?', $charsetConverter->utf8_decode($string, 'ascii'));
77  self::assertSame('&#x20ac;', $charsetConverter->utf8_decode($string, 'ascii', true));
78 
79  // test decoding to iso-8859-15
80  $targetString = $charsetConverter->utf8_decode($string, 'iso-8859-15');
81  self::assertSame('ISO-8859-15', mb_detect_encoding($targetString, ['ASCII', 'UTF-8', 'ISO-8859-15']));
82  self::assertNotSame($string, $targetString);
83  }
84 
85  #[Test]
86  public function ‪utf8DecodeAKanjiToBig5(): void
87  {
88  $charsetConverter = new ‪CharsetConverter();
89 
90  $string = "\xE6\xBC\x80"; // 漀
91  self::assertSame(1, mb_strlen($string));
92  self::assertSame(3, strlen($string));
93  self::assertSame('UTF-8', mb_detect_encoding($string, ['ASCII', 'UTF-8']));
94 
95  // test decoding to ascii
96  self::assertSame('?', $charsetConverter->utf8_decode($string, 'ascii'));
97  self::assertSame('&#x6f00;', $charsetConverter->utf8_decode($string, 'ascii', true));
98 
99  // test decoding to big5
100  $targetString = $charsetConverter->utf8_decode($string, 'big5');
101  self::assertSame('BIG-5', mb_detect_encoding($targetString, ['ASCII', 'UTF-8', 'BIG-5']));
102  self::assertNotSame($string, $targetString);
103  }
104 
105  #[Test]
107  {
108  $charsetConverter = new ‪CharsetConverter();
109 
110  $string = "\xF0\x9F\x98\x82"; // 😂
111  self::assertSame(1, mb_strlen($string));
112  self::assertSame(4, strlen($string));
113  self::assertSame('UTF-8', mb_detect_encoding($string, ['ASCII', 'UTF-8']));
114 
115  // test decoding to ascii
116  self::assertSame('?', $charsetConverter->utf8_decode($string, 'ascii'));
117  self::assertSame('&#x1f602;', $charsetConverter->utf8_decode($string, 'ascii', true));
118  }
119 
120  #[Test]
122  {
123  self::assertSame(
124  "\xF0\x9F\x98\x82",
125  (new ‪CharsetConverter())->utf8_decode("\xF0\x9F\x98\x82", 'utf-8')
126  );
127  }
128 
129  #[Test]
130  public function ‪utf8EncodeIso885915ACharacter(): void
131  {
132  $string = "\x41"; // A
133  $targetString = (new ‪CharsetConverter())->utf8_encode($string, 'iso-8859-15');
134 
135  self::assertSame(1, strlen($string));
136  self::assertSame('A', $targetString);
137  self::assertSame(1, mb_strlen($targetString));
138  self::assertSame(1, strlen($targetString));
139  self::assertSame($string, $targetString);
140  }
141 
142  #[Test]
143  public function ‪utf8EncodeIso885915EuroSign(): void
144  {
145  $string = "\xA4"; // € sign encoded as iso-8859-15
146  $targetString = (new ‪CharsetConverter())->utf8_encode($string, 'iso-8859-15');
147 
148  self::assertSame('€', $targetString);
149  self::assertSame(1, mb_strlen($targetString));
150  self::assertSame(3, strlen($targetString));
151  self::assertNotSame($string, $targetString);
152  }
153 
154  #[Test]
155  public function ‪utf8EncodeABig5EncodedSign(): void
156  {
157  $string = "\xA2\xC5"; // 〣 sign encoded as big5
158  $targetString = (new ‪CharsetConverter())->utf8_encode($string, 'big5');
159 
160  self::assertSame(2, strlen($string));
161  self::assertSame('〣', $targetString);
162  self::assertSame(1, mb_strlen($targetString));
163  self::assertSame(3, strlen($targetString));
164  self::assertNotSame($string, $targetString);
165  }
166 
167  #[Test]
168  public function ‪utf8EncodeAlreadyUtf8EncodedSign(): void
169  {
170  self::assertSame(
171  "\xF0\x9F\x98\x82",
172  (new ‪CharsetConverter())->utf8_encode("\xF0\x9F\x98\x82", 'utf-8')
173  );
174  }
175 
176  #[Test]
177  public function ‪utf8ToNumberArray(): void
178  {
179  $string = "\xF0\x9F\x98\x82 &ndash; a joyful emoji";
180  $expectedArray = [
181  '😂',
182  ' ',
183  '–',
184  ' ',
185  'a',
186  ' ',
187  'j',
188  'o',
189  'y',
190  'f',
191  'u',
192  'l',
193  ' ',
194  'e',
195  'm',
196  'o',
197  'j',
198  'i',
199  ];
200 
201  self::assertSame($expectedArray, (new ‪CharsetConverter())->utf8_to_numberarray($string));
202  }
203 
209  public static function ‪validInputForSpecCharsToAscii(): array
210  {
211  return [
212  'scandinavian input' => [
213  'Näe ja koe',
214  // See issue #20612 - this is actually a wrong transition, but the way the method currently works
215  'Naee ja koe',
216  ],
217  'german input' => [
218  'Größere Änderungswünsche Weißräm',
219  'Groessere AEnderungswuensche Weissraem',
220  ],
221  ];
222  }
223 
224  #[DataProvider('validInputForSpecCharsToAscii')]
225  #[Test]
227  string $input,
228  string $expectedString
229  ): void {
230  $subject = new ‪CharsetConverter();
231  self::assertSame($expectedString, $subject->specCharsToASCII('utf-8', $input));
232  }
233 
239  public static function ‪invalidInputForSpecCharsToAscii(): array
240  {
241  return [
242  'integer input' => [
243  1,
244  ],
245  'null input' => [
246  null,
247  ],
248  'boolean input' => [
249  true,
250  ],
251  'floating point input' => [
252  3.14,
253  ],
254  ];
255  }
256 
260  #[DataProvider('invalidInputForSpecCharsToAscii')]
261  #[Test]
263  {
264  $subject = new ‪CharsetConverter();
265  self::assertSame('', $subject->specCharsToASCII('utf-8', $input));
266  }
267 }
‪TYPO3\CMS\Core\Tests\Unit\Charset\CharsetConverterTest\specCharsToAsciiConvertsUmlautsToAscii
‪specCharsToAsciiConvertsUmlautsToAscii(string $input, string $expectedString)
Definition: CharsetConverterTest.php:226
‪TYPO3\CMS\Core\Tests\Unit\Charset\CharsetConverterTest\validInputForSpecCharsToAscii
‪static string[][] validInputForSpecCharsToAscii()
Definition: CharsetConverterTest.php:209
‪TYPO3\CMS\Core\Tests\Unit\Charset\CharsetConverterTest\specCharsToAsciiConvertsInvalidInputToEmptyString
‪specCharsToAsciiConvertsInvalidInputToEmptyString($input)
Definition: CharsetConverterTest.php:262
‪TYPO3\CMS\Core\Tests\Unit\Charset\CharsetConverterTest\utf8EncodeABig5EncodedSign
‪utf8EncodeABig5EncodedSign()
Definition: CharsetConverterTest.php:155
‪TYPO3\CMS\Core\Charset\CharsetConverter
Definition: CharsetConverter.php:54
‪TYPO3\CMS\Core\Tests\Unit\Charset\CharsetConverterTest\utf8DecodeToUtf8ReturnsTheSameSign
‪utf8DecodeToUtf8ReturnsTheSameSign()
Definition: CharsetConverterTest.php:121
‪TYPO3\CMS\Core\Tests\Unit\Charset\CharsetConverterTest\utf8EncodeIso885915ACharacter
‪utf8EncodeIso885915ACharacter()
Definition: CharsetConverterTest.php:130
‪TYPO3\CMS\Core\Tests\Unit\Charset
Definition: CharsetConverterTest.php:18
‪TYPO3\CMS\Core\Tests\Unit\Charset\CharsetConverterTest\utf8DecodeACharacterToIso885915
‪utf8DecodeACharacterToIso885915()
Definition: CharsetConverterTest.php:46
‪TYPO3\CMS\Core\Tests\Unit\Charset\CharsetConverterTest\utf8ToNumberArray
‪utf8ToNumberArray()
Definition: CharsetConverterTest.php:177
‪TYPO3\CMS\Core\Tests\Unit\Charset\CharsetConverterTest\utf8DecodeACharacterToAscii
‪utf8DecodeACharacterToAscii()
Definition: CharsetConverterTest.php:28
‪TYPO3\CMS\Core\Tests\Unit\Charset\CharsetConverterTest\utf8EncodeIso885915EuroSign
‪utf8EncodeIso885915EuroSign()
Definition: CharsetConverterTest.php:143
‪TYPO3\CMS\Core\Tests\Unit\Charset\CharsetConverterTest
Definition: CharsetConverterTest.php:26
‪TYPO3\CMS\Core\Tests\Unit\Charset\CharsetConverterTest\convertingAUtf8EmojiSignToNonExistingAsciiRepresentationResultsInAQuestionMarkSign
‪convertingAUtf8EmojiSignToNonExistingAsciiRepresentationResultsInAQuestionMarkSign()
Definition: CharsetConverterTest.php:106
‪TYPO3\CMS\Core\Tests\Unit\Charset\CharsetConverterTest\utf8DecodeAKanjiToBig5
‪utf8DecodeAKanjiToBig5()
Definition: CharsetConverterTest.php:86
‪TYPO3\CMS\Core\Tests\Unit\Charset\CharsetConverterTest\utf8EncodeAlreadyUtf8EncodedSign
‪utf8EncodeAlreadyUtf8EncodedSign()
Definition: CharsetConverterTest.php:168
‪TYPO3\CMS\Core\Tests\Unit\Charset\CharsetConverterTest\utf8DecodeEuroSignCharacterToIso885915
‪utf8DecodeEuroSignCharacterToIso885915()
Definition: CharsetConverterTest.php:66
‪TYPO3\CMS\Core\Tests\Unit\Charset\CharsetConverterTest\invalidInputForSpecCharsToAscii
‪static array[] invalidInputForSpecCharsToAscii()
Definition: CharsetConverterTest.php:239