‪TYPO3CMS  ‪main
CharsetConverter.php
Go to the documentation of this file.
1 <?php
2 
3 /*
4  * This file is part of the TYPO3 CMS project.
5  *
6  * It is free software; you can redistribute it and/or modify it under
7  * the terms of the GNU General Public License, either version 2
8  * of the License, or any later version.
9  *
10  * For the full copyright and license information, please read the
11  * LICENSE.txt file that was distributed with this source code.
12  *
13  * The TYPO3 project - inspiring people to share!
14  */
15 
17 
22 
54 {
58  protected int ‪$noCharByteVal = 63;
59 
63  protected array ‪$parsedCharsets = [];
64 
68  protected array ‪$toASCII = [];
69 
73  protected array ‪$twoByteSets = [
74  'ucs-2' => 1,
75  ];
76 
80  protected array ‪$eucBasedSets = [
81  'gb2312' => 1, // Chinese, simplified.
82  'big5' => 1, // Chinese, traditional.
83  'euc-kr' => 1, // Korean
84  'shift_jis' => 1,
85  ];
86 
87  /********************************************
88  *
89  * Charset Conversion functions
90  *
91  ********************************************/
100  public function ‪conv(string $inputString, string $fromCharset, string $toCharset): string
101  {
102  if ($fromCharset === $toCharset) {
103  return $inputString;
104  }
105  // PHP-libs don't support fallback to SGML entities, but UTF-8 handles everything
106  if ($toCharset === 'utf-8') {
107  // Returns FALSE for unsupported charsets
108  $convertedString = mb_convert_encoding($inputString, $toCharset, $fromCharset);
109  if ($convertedString !== false) {
110  return $convertedString;
111  }
112  }
113  if ($fromCharset !== 'utf-8') {
114  $inputString = $this->‪utf8_encode($inputString, $fromCharset);
115  }
116  if ($toCharset !== 'utf-8') {
117  $inputString = $this->‪utf8_decode($inputString, $toCharset, true);
118  }
119  return $inputString;
120  }
121 
129  public function ‪utf8_encode(string $str, string $charset): string
130  {
131  if ($charset === 'utf-8') {
132  return $str;
133  }
134  // Charset is case-insensitive
135  // Parse conv. table if not already
136  if (!$this->‪initCharset($charset)) {
137  return '';
138  }
139  $strLen = strlen($str);
140  $outStr = '';
141  // Traverse each char in string
142  for ($a = 0; $a < $strLen; $a++) {
143  $chr = substr($str, $a, 1);
144  $ord = ord($chr);
145  // If the charset has two bytes per char
146  if (isset($this->twoByteSets[$charset])) {
147  // TYPO3 cannot convert from ucs-2 as the according conversion table is not present
148  $ord2 = ord($str[$a + 1]);
149  // Assume big endian
150  $ord = $ord << 8 | $ord2;
151  // If the local char-number was found in parsed conv. table then we use that, otherwise 127 (no char?)
152  if (isset($this->parsedCharsets[$charset]['local'][$ord])) {
153  $outStr .= $this->parsedCharsets[$charset]['local'][$ord];
154  } else {
155  $outStr .= chr($this->noCharByteVal);
156  }
157  // No char exists
158  $a++;
159  } elseif ($ord > 127) {
160  // If char has value over 127 it's a multibyte char in UTF-8
161  // EUC uses two-bytes above 127; we get both and advance pointer and make $ord a 16bit int.
162  if (isset($this->eucBasedSets[$charset])) {
163  // Shift-JIS: chars between 160 and 223 are single byte
164  if ($charset !== 'shift_jis' || ($ord < 160 || $ord > 223)) {
165  $a++;
166  $ord2 = ord(substr($str, $a, 1));
167  $ord = $ord * 256 + $ord2;
168  }
169  }
170  if (isset($this->parsedCharsets[$charset]['local'][$ord])) {
171  // If the local char-number was found in parsed conv. table then we use that, otherwise 127 (no char?)
172  $outStr .= $this->parsedCharsets[$charset]['local'][$ord];
173  } else {
174  $outStr .= chr($this->noCharByteVal);
175  }
176  } else {
177  $outStr .= $chr;
178  }
179  }
180  return $outStr;
181  }
182 
191  public function ‪utf8_decode(string $str, string $charset, bool $useEntityForNoChar = false): string
192  {
193  if ($charset === 'utf-8') {
194  return $str;
195  }
196  // Charset is case-insensitive.
197  // Parse conv. table if not already
198  if (!$this->‪initCharset($charset)) {
199  return '';
200  }
201  $strLen = strlen($str);
202  $outStr = '';
203  // Traverse each char in UTF-8 string
204  for ($a = 0, $i = 0; $a < $strLen; $a++, $i++) {
205  $chr = substr($str, $a, 1);
206  $ord = ord($chr);
207  // This means multibyte! (first byte!)
208  if ($ord > 127) {
209  // Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence.
210  if ($ord & 64) {
211  // Add first byte
212  $buf = $chr;
213  // For each byte in multibyte string
214  for ($b = 0; $b < 8; $b++) {
215  // Shift it left and
216  $ord = $ord << 1;
217  // ... and with 8th bit - if that is set, then there are still bytes in sequence.
218  if ($ord & 128) {
219  $a++;
220  // ... and add the next char.
221  $buf .= substr($str, $a, 1);
222  } else {
223  break;
224  }
225  }
226  // If the UTF-8 char-sequence is found then...
227  if (isset($this->parsedCharsets[$charset]['utf8'][$buf])) {
228  // The local number
229  $mByte = $this->parsedCharsets[$charset]['utf8'][$buf];
230  // If the local number is greater than 255 we will need to split the byte (16bit word assumed) in two chars.
231  if ($mByte > 255) {
232  $outStr .= chr($mByte >> 8 & 255) . chr($mByte & 255);
233  } else {
234  $outStr .= chr($mByte);
235  }
236  } elseif ($useEntityForNoChar) {
237  // Create num entity:
238  $outStr .= '&#' . $this->‪utf8CharToUnumber($buf, true) . ';';
239  } else {
240  $outStr .= chr($this->noCharByteVal);
241  }
242  } else {
243  $outStr .= chr($this->noCharByteVal);
244  }
245  } else {
246  $outStr .= $chr;
247  }
248  }
249  return $outStr;
250  }
251 
260  public function ‪utf8_to_numberarray(string $str): array
261  {
262  // Entities must be registered as well
263  $str = html_entity_decode($str, ENT_COMPAT, 'utf-8');
264 
265  // Do conversion:
266  $strLen = strlen($str);
267  $outArr = [];
268  // Traverse each char in UTF-8 string.
269  for ($a = 0; $a < $strLen; $a++) {
270  $chr = substr($str, $a, 1);
271  $ord = ord($chr);
272  // This means multibyte! (first byte!)
273  if ($ord > 127) {
274  // Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence.
275  if ($ord & 64) {
276  // Add first byte
277  $buf = $chr;
278  // For each byte in multibyte string...
279  for ($b = 0; $b < 8; $b++) {
280  // Shift it left and ...
281  $ord = $ord << 1;
282  // ... and with 8th bit - if that is set, then there are still bytes in sequence.
283  if ($ord & 128) {
284  $a++;
285  // ... and add the next char.
286  $buf .= substr($str, $a, 1);
287  } else {
288  break;
289  }
290  }
291  $outArr[] = $buf;
292  } else {
293  $outArr[] = chr($this->noCharByteVal);
294  }
295  } else {
296  $outArr[] = chr($ord);
297  }
298  }
299  return $outArr;
300  }
301 
325  public function ‪UnumberToChar($unicodeInteger)
326  {
327  $str = '';
328  if ($unicodeInteger < 128) {
329  $str .= chr($unicodeInteger);
330  } elseif ($unicodeInteger < 2048) {
331  $str .= chr(192 | $unicodeInteger >> 6);
332  $str .= chr(128 | $unicodeInteger & 63);
333  } elseif ($unicodeInteger < 65536) {
334  $str .= chr(224 | $unicodeInteger >> 12);
335  $str .= chr(128 | $unicodeInteger >> 6 & 63);
336  $str .= chr(128 | $unicodeInteger & 63);
337  } elseif ($unicodeInteger < 2097152) {
338  $str .= chr(240 | $unicodeInteger >> 18);
339  $str .= chr(128 | $unicodeInteger >> 12 & 63);
340  $str .= chr(128 | $unicodeInteger >> 6 & 63);
341  $str .= chr(128 | $unicodeInteger & 63);
342  } elseif ($unicodeInteger < 67108864) {
343  $str .= chr(248 | $unicodeInteger >> 24);
344  $str .= chr(128 | $unicodeInteger >> 18 & 63);
345  $str .= chr(128 | $unicodeInteger >> 12 & 63);
346  $str .= chr(128 | $unicodeInteger >> 6 & 63);
347  $str .= chr(128 | $unicodeInteger & 63);
348  } elseif ($unicodeInteger < 2147483648) {
349  $str .= chr(252 | $unicodeInteger >> 30);
350  $str .= chr(128 | $unicodeInteger >> 24 & 63);
351  $str .= chr(128 | $unicodeInteger >> 18 & 63);
352  $str .= chr(128 | $unicodeInteger >> 12 & 63);
353  $str .= chr(128 | $unicodeInteger >> 6 & 63);
354  $str .= chr(128 | $unicodeInteger & 63);
355  } else {
356  // Cannot express a 32-bit character in UTF-8
357  $str .= chr($this->noCharByteVal);
358  }
359  return $str;
360  }
361 
371  public function ‪utf8CharToUnumber(string $str, bool $hex = false)
372  {
373  // First char
374  $ord = ord($str[0]);
375  // This verifies that it IS a multi byte string
376  if (($ord & 192) === 192) {
377  $binBuf = '';
378  $b = 0;
379  // For each byte in multibyte string...
380  for (; $b < 8; $b++) {
381  // Shift it left and ...
382  $ord = $ord << 1;
383  // ... and with 8th bit - if that is set, then there are still bytes in sequence.
384  if ($ord & 128) {
385  $binBuf .= substr('00000000' . decbin(ord(substr($str, $b + 1, 1))), -6);
386  } else {
387  break;
388  }
389  }
390  $binBuf = substr('00000000' . decbin(ord($str[0])), -(6 - $b)) . $binBuf;
391  $int = bindec($binBuf);
392  } else {
393  $int = $ord;
394  }
395  return $hex ? 'x' . dechex((int)$int) : $int;
396  }
397 
398  /********************************************
399  *
400  * Init functions
401  *
402  ********************************************/
413  protected function ‪initCharset(string $charset): bool
414  {
415  // Only process if the charset is not yet loaded:
416  if (!empty($this->parsedCharsets[$charset])) {
417  return true;
418  }
419  if (!$charset) {
420  throw new ‪UnknownCharsetException(sprintf('Empty charset "%s"', $charset), 1508912031);
421  }
422  // Conversion table filename:
423  $charsetConvTableFile = ‪ExtensionManagementUtility::extPath('core') . 'Resources/Private/Charsets/csconvtbl/' . $charset . '.tbl';
424  // If the conversion table is found:
425  if (@is_file($charsetConvTableFile)) {
426  // Cache file for charsets
427  // Caching brought parsing time for gb2312 down from 2400 ms to 150 ms. For other charsets we are talking 11 ms down to zero.
428  $cacheFile = ‪Environment::getVarPath() . '/charset/charset_' . $charset . '.tbl';
429  if (@is_file($cacheFile)) {
430  $this->parsedCharsets[$charset] = unserialize((string)file_get_contents($cacheFile), ['allowed_classes' => false]);
431  } else {
432  // Parse conversion table into lines:
433  $lines = ‪GeneralUtility::trimExplode(LF, (string)file_get_contents($charsetConvTableFile), true);
434  // Initialize the internal variable holding the conv. table:
435  $this->parsedCharsets[$charset] = ['local' => [], 'utf8' => []];
436  // traverse the lines:
437  $detectedType = '';
438  foreach ($lines as $value) {
439  // Comment line or blanks are ignored.
440  if (trim($value) && $value[0] !== '#') {
441  // Detect type if not done yet: (Done on first real line)
442  // The "whitespaced" type is on the syntax "0x0A 0x000A #LINE FEED" while "ms-token" is like "B9 = U+00B9 : SUPERSCRIPT ONE"
443  if (!$detectedType) {
444  $detectedType = preg_match('/[[:space:]]*0x([[:xdigit:]]*)[[:space:]]+0x([[:xdigit:]]*)[[:space:]]+/', $value) ? 'whitespaced' : 'ms-token';
445  }
446  $hexbyte = '';
447  $utf8 = '';
448  if ($detectedType === 'ms-token') {
449  [$hexbyte, $utf8] = preg_split('/[=:]/', $value, 3);
450  } elseif ($detectedType === 'whitespaced') {
451  $regA = [];
452  preg_match('/[[:space:]]*0x([[:xdigit:]]*)[[:space:]]+0x([[:xdigit:]]*)[[:space:]]+/', $value, $regA);
453  if (empty($regA)) {
454  // No match => skip this item
455  continue;
456  }
457  $hexbyte = $regA[1];
458  $utf8 = 'U+' . $regA[2];
459  }
460  $decval = hexdec(trim($hexbyte));
461  if ($decval > 127) {
462  $utf8decval = hexdec(substr(trim($utf8), 2));
463  $this->parsedCharsets[$charset]['local'][$decval] = $this->‪UnumberToChar((int)$utf8decval);
464  $this->parsedCharsets[$charset]['utf8'][$this->parsedCharsets[$charset]['local'][$decval]] = $decval;
465  }
466  }
467  }
468  ‪GeneralUtility::writeFileToTypo3tempDir($cacheFile, serialize($this->parsedCharsets[$charset]));
469  }
470  return true;
471  }
472  throw new ‪UnknownCharsetException(sprintf('Unknown charset "%s"', $charset), 1508916031);
473  }
474 
482  protected function ‪initUnicodeData(): bool
483  {
484  // Only process if the tables are not yet loaded
485  if (isset($this->toASCII['utf-8']) && is_array($this->toASCII['utf-8'])) {
486  return true;
487  }
488  // Cache file
489  $cacheFileASCII = ‪Environment::getVarPath() . '/charset/csascii_utf-8.tbl';
490  // Use cached version if possible
491  if (@is_file($cacheFileASCII)) {
492  $this->toASCII['utf-8'] = unserialize((string)file_get_contents($cacheFileASCII), ['allowed_classes' => false]);
493  return true;
494  }
495  // Process main Unicode data file
496  $unicodeDataFile = ‪ExtensionManagementUtility::extPath('core') . 'Resources/Private/Charsets/unidata/UnicodeData.txt';
497  if (!(GeneralUtility::validPathStr($unicodeDataFile) && @is_file($unicodeDataFile))) {
498  return false;
499  }
500  $fh = fopen($unicodeDataFile, 'rb');
501  if (!$fh) {
502  return false;
503  }
504  // Array of temp. decompositions
505  $decomposition = [];
506  // Array of chars that are marks (eg. composing accents)
507  $mark = [];
508  // Array of chars that are numbers (eg. digits)
509  $number = [];
510  // Array of chars to be omitted (eg. Russian hard sign)
511  $omit = [];
512  while (!feof($fh)) {
513  $line = (string)fgets($fh, 4096);
514  // Has a lot of info
515  [$char, $name, $cat, , , $decomp, , , $num] = explode(';', rtrim($line));
516  $ord = hexdec($char);
517  if ($ord > 65535) {
518  // Only process the BMP
519  break;
520  }
521  switch ($cat[0]) {
522  case 'M':
523  // mark (accent, umlaut, ...)
524  $mark['U+' . $char] = 1;
525  break;
526  case 'N':
527  // numeric value
528  if ($ord > 128 && $num !== '') {
529  $number['U+' . $char] = $num;
530  }
531  }
532  // Accented Latin letters without "official" decomposition
533  $match = [];
534  if (preg_match('/^LATIN (SMALL|CAPITAL) LETTER ([A-Z]) WITH/', $name, $match) && !$decomp) {
535  $c = ord($match[2]);
536  if ($match[1] === 'SMALL') {
537  $c += 32;
538  }
539  $decomposition['U+' . $char] = [dechex($c)];
540  continue;
541  }
542  $match = [];
543  if (preg_match('/(<.*>)? *(.+)/', $decomp, $match)) {
544  switch ($match[1]) {
545  case '<circle>':
546  // add parenthesis as circle replacement, eg (1)
547  $match[2] = '0028 ' . $match[2] . ' 0029';
548  break;
549  case '<square>':
550  // add square brackets as square replacement, eg [1]
551  $match[2] = '005B ' . $match[2] . ' 005D';
552  break;
553  case '<compat>':
554  // ignore multi char decompositions that start with a space
555  if (preg_match('/^0020 /', $match[2])) {
556  continue 2;
557  }
558  break;
559  case '<initial>':
560  case '<medial>':
561  case '<final>':
562  case '<isolated>':
563  case '<vertical>':
564  continue 2;
565  }
566  $decomposition['U+' . $char] = explode(' ', $match[2]);
567  }
568  }
569  fclose($fh);
570  // Process custom decompositions
571  $customTranslitFile = ‪ExtensionManagementUtility::extPath('core') . 'Resources/Private/Charsets/unidata/Translit.txt';
572  if (GeneralUtility::validPathStr($customTranslitFile) && @is_file($customTranslitFile)) {
573  $fh = fopen($customTranslitFile, 'rb');
574  if ($fh) {
575  while (!feof($fh)) {
576  $line = fgets($fh, 4096);
577  if ($line === false) {
578  continue;
579  }
580  if ($line[0] !== '#' && trim($line) !== '') {
581  [$char, $translit] = ‪GeneralUtility::trimExplode(';', $line);
582  if (!$translit) {
583  $omit['U+' . $char] = 1;
584  }
585  $decomposition['U+' . $char] = explode(' ', $translit);
586  }
587  }
588  fclose($fh);
589  }
590  }
591  // Decompose and remove marks; inspired by unac (Loic Dachary <loic@senga.org>)
592  foreach ($decomposition as $from => $to) {
593  $code_decomp = [];
594  while ($code_value = array_shift($to)) {
595  // Do recursive decomposition
596  if (isset($decomposition['U+' . $code_value])) {
597  foreach (array_reverse($decomposition['U+' . $code_value]) as $cv) {
598  array_unshift($to, $cv);
599  }
600  } elseif (!isset($mark['U+' . $code_value])) {
601  // remove mark
602  $code_decomp[] = $code_value;
603  }
604  }
605  if (!empty($code_decomp) || isset($omit[$from])) {
606  $decomposition[$from] = $code_decomp;
607  } else {
608  unset($decomposition[$from]);
609  }
610  }
611  // Create ascii only mapping
612  $this->toASCII['utf-8'] = [];
613  foreach ($decomposition as $from => $to) {
614  $code_decomp = [];
615  while ($code_value = array_shift($to)) {
616  $ord = (int)hexdec($code_value);
617  if ($ord > 127) {
618  continue 2;
619  }
620  // Skip decompositions containing non-ASCII chars
621  $code_decomp[] = chr($ord);
622  }
623  $this->toASCII['utf-8'][$this->‪UnumberToChar((int)hexdec(substr($from, 2)))] = implode('', $code_decomp);
624  }
625  // Add numeric decompositions
626  foreach ($number as $from => $to) {
627  $utf8_char = $this->‪UnumberToChar((int)hexdec(substr($from, 2)));
628  if (!isset($this->toASCII['utf-8'][$utf8_char])) {
629  $this->toASCII['utf-8'][$utf8_char] = $to;
630  }
631  }
632  ‪GeneralUtility::writeFileToTypo3tempDir($cacheFileASCII, serialize($this->toASCII['utf-8']));
633  return true;
634  }
635 
643  protected function ‪initToASCII(string $charset): bool
644  {
645  // Only process if the case table is not yet loaded:
646  if (isset($this->toASCII[$charset]) && is_array($this->toASCII[$charset])) {
647  return true;
648  }
649  // Use cached version if possible
650  $cacheFile = ‪Environment::getVarPath() . '/charset/csascii_' . $charset . '.tbl';
651  if (@is_file($cacheFile)) {
652  $this->toASCII[$charset] = unserialize((string)file_get_contents($cacheFile), ['allowed_classes' => false]);
653  return true;
654  }
655  // Init UTF-8 conversion for this charset
656  if (!$this->‪initCharset($charset)) {
657  return false;
658  }
659  // UTF-8/ASCII transliteration is used as the base conversion table
660  if (!$this->‪initUnicodeData()) {
661  return false;
662  }
663  foreach ($this->parsedCharsets[$charset]['local'] as $utf8) {
664  // Reconvert to charset (don't use chr() of numeric value, might be muli-byte)
665  $c = $this->‪utf8_decode($utf8, $charset);
666  if (isset($this->toASCII['utf-8'][$utf8])) {
667  $this->toASCII[$charset][$c] = $this->toASCII['utf-8'][$utf8];
668  }
669  }
670  ‪GeneralUtility::writeFileToTypo3tempDir($cacheFile, serialize($this->toASCII[$charset]));
671  return true;
672  }
673 
674  /********************************************
675  *
676  * String operation functions
677  *
678  ********************************************/
679 
687  public function ‪specCharsToASCII(string $charset, $string): string
688  {
689  if (!is_string($string)) {
690  return '';
691  }
692  if ($charset === 'utf-8') {
693  $string = $this->‪utf8_char_mapping($string);
694  } elseif (isset($this->eucBasedSets[$charset])) {
695  $string = $this->‪euc_char_mapping($string, $charset);
696  } else {
697  // Treat everything else as single-byte encoding
698  $string = $this->‪sb_char_mapping($string, $charset);
699  }
700  return $string;
701  }
702 
703  /********************************************
704  *
705  * Internal string operation functions
706  *
707  ********************************************/
715  public function ‪sb_char_mapping(string $str, string $charset): string
716  {
717  if (!$this->‪initToASCII($charset)) {
718  return $str;
719  }
720  // Do nothing
721  $map = &$this->toASCII[$charset];
722  $out = '';
723  for ($i = 0; isset($str[$i]); $i++) {
724  $c = $str[$i];
725  if (isset($map[$c])) {
726  $out .= $map[$c];
727  } else {
728  $out .= $c;
729  }
730  }
731  return $out;
732  }
733 
734  /********************************************
735  *
736  * Internal UTF-8 string operation functions
737  *
738  ********************************************/
739 
746  public function ‪utf8_char_mapping(string $str): string
747  {
748  if (!$this->‪initUnicodeData()) {
749  // Do nothing
750  return $str;
751  }
752  $out = '';
753  $map = &$this->toASCII['utf-8'];
754  for ($i = 0; isset($str[$i]); $i++) {
755  $c = ord($str[$i]);
756  $mbc = '';
757  // single-byte (0xxxxxx)
758  if (!($c & 128)) {
759  $mbc = $str[$i];
760  } elseif (($c & 192) === 192) {
761  $bc = 0;
762  // multi-byte starting byte (11xxxxxx)
763  for (; $c & 128; $c = $c << 1) {
764  $bc++;
765  }
766  // calculate number of bytes
767  $mbc = substr($str, $i, $bc);
768  $i += $bc - 1;
769  }
770  if (isset($map[$mbc])) {
771  $out .= $map[$mbc];
772  } else {
773  $out .= $mbc;
774  }
775  }
776  return $out;
777  }
778 
779  /********************************************
780  *
781  * Internal EUC string operation functions
782  *
783  * Extended Unix Code:
784  * ASCII compatible 7bit single bytes chars
785  * 8bit two byte chars
786  *
787  * Shift-JIS is treated as a special case.
788  *
789  ********************************************/
790 
798  public function ‪euc_char_mapping(string $str, string $charset): string
799  {
800  if (!$this->‪initToASCII($charset)) {
801  return $str;
802  }
803  // do nothing
804  $map = &$this->toASCII[$charset];
805  $out = '';
806  for ($i = 0; isset($str[$i]); $i++) {
807  $mbc = $str[$i];
808  $c = ord($mbc);
809  if ($charset === 'shift_jis') {
810  // A double-byte char
811  if ($c >= 128 && $c < 160 || $c >= 224) {
812  $mbc = substr($str, $i, 2);
813  $i++;
814  }
815  } else {
816  // A double-byte char
817  if ($c >= 128) {
818  $mbc = substr($str, $i, 2);
819  $i++;
820  }
821  }
822  if (isset($map[$mbc])) {
823  $out .= $map[$mbc];
824  } else {
825  $out .= $mbc;
826  }
827  }
828  return $out;
829  }
830 }
‪TYPO3\CMS\Core\Charset\CharsetConverter\$noCharByteVal
‪int $noCharByteVal
Definition: CharsetConverter.php:58
‪TYPO3\CMS\Core\Charset\CharsetConverter\initCharset
‪bool initCharset(string $charset)
Definition: CharsetConverter.php:413
‪TYPO3\CMS\Core\Charset\CharsetConverter\UnumberToChar
‪string UnumberToChar($unicodeInteger)
Definition: CharsetConverter.php:325
‪TYPO3\CMS\Core\Charset\CharsetConverter\utf8_encode
‪string utf8_encode(string $str, string $charset)
Definition: CharsetConverter.php:129
‪TYPO3\CMS\Core\Charset\CharsetConverter\$parsedCharsets
‪array $parsedCharsets
Definition: CharsetConverter.php:63
‪TYPO3\CMS\Core\Charset\CharsetConverter\$twoByteSets
‪array $twoByteSets
Definition: CharsetConverter.php:73
‪TYPO3\CMS\Core\Charset\CharsetConverter\utf8CharToUnumber
‪int utf8CharToUnumber(string $str, bool $hex=false)
Definition: CharsetConverter.php:371
‪TYPO3\CMS\Core\Charset\CharsetConverter\conv
‪string conv(string $inputString, string $fromCharset, string $toCharset)
Definition: CharsetConverter.php:100
‪TYPO3\CMS\Core\Charset\CharsetConverter
Definition: CharsetConverter.php:54
‪TYPO3\CMS\Core\Core\Environment\getVarPath
‪static getVarPath()
Definition: Environment.php:197
‪TYPO3\CMS\Core\Utility\ExtensionManagementUtility
Definition: ExtensionManagementUtility.php:32
‪TYPO3\CMS\Core\Charset
Definition: CharsetConverter.php:16
‪TYPO3\CMS\Core\Utility\GeneralUtility\writeFileToTypo3tempDir
‪static string null writeFileToTypo3tempDir(string $filepath, string $content)
Definition: GeneralUtility.php:1561
‪TYPO3\CMS\Core\Utility\ExtensionManagementUtility\extPath
‪static extPath(string $key, string $script='')
Definition: ExtensionManagementUtility.php:82
‪TYPO3\CMS\Core\Charset\CharsetConverter\sb_char_mapping
‪string sb_char_mapping(string $str, string $charset)
Definition: CharsetConverter.php:715
‪TYPO3\CMS\Core\Charset\CharsetConverter\$eucBasedSets
‪array $eucBasedSets
Definition: CharsetConverter.php:80
‪TYPO3\CMS\Core\Charset\UnknownCharsetException
Definition: UnknownCharsetException.php:23
‪TYPO3\CMS\Core\Charset\CharsetConverter\initToASCII
‪bool initToASCII(string $charset)
Definition: CharsetConverter.php:643
‪TYPO3\CMS\Core\Charset\CharsetConverter\euc_char_mapping
‪string euc_char_mapping(string $str, string $charset)
Definition: CharsetConverter.php:798
‪TYPO3\CMS\Core\Charset\CharsetConverter\$toASCII
‪array $toASCII
Definition: CharsetConverter.php:68
‪TYPO3\CMS\Core\Charset\CharsetConverter\utf8_char_mapping
‪string utf8_char_mapping(string $str)
Definition: CharsetConverter.php:746
‪TYPO3\CMS\Core\SingletonInterface
Definition: SingletonInterface.php:22
‪TYPO3\CMS\Core\Core\Environment
Definition: Environment.php:41
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:52
‪TYPO3\CMS\Core\Charset\CharsetConverter\utf8_decode
‪string utf8_decode(string $str, string $charset, bool $useEntityForNoChar=false)
Definition: CharsetConverter.php:191
‪TYPO3\CMS\Core\Charset\CharsetConverter\utf8_to_numberarray
‪array utf8_to_numberarray(string $str)
Definition: CharsetConverter.php:260
‪TYPO3\CMS\Core\Charset\CharsetConverter\initUnicodeData
‪bool initUnicodeData()
Definition: CharsetConverter.php:482
‪TYPO3\CMS\Core\Utility\GeneralUtility\trimExplode
‪static list< string > trimExplode(string $delim, string $string, bool $removeEmptyValues=false, int $limit=0)
Definition: GeneralUtility.php:822
‪TYPO3\CMS\Core\Charset\CharsetConverter\specCharsToASCII
‪string specCharsToASCII(string $charset, $string)
Definition: CharsetConverter.php:687