‪TYPO3CMS  10.4
CharsetConverter.php
Go to the documentation of this file.
1 <?php
2 
3 /*
4  * This file is part of the TYPO3 CMS project.
5  *
6  * It is free software; you can redistribute it and/or modify it under
7  * the terms of the GNU General Public License, either version 2
8  * of the License, or any later version.
9  *
10  * For the full copyright and license information, please read the
11  * LICENSE.txt file that was distributed with this source code.
12  *
13  * The TYPO3 project - inspiring people to share!
14  */
15 
17 
22 
54 {
60  protected ‪$noCharByteVal = 63;
61 
67  protected ‪$parsedCharsets = [];
68 
74  protected ‪$toASCII = [];
75 
81  protected ‪$twoByteSets = [
82  'ucs-2' => 1
83  ];
84 
90  protected ‪$eucBasedSets = [
91  'gb2312' => 1, // Chinese, simplified.
92  'big5' => 1, // Chinese, traditional.
93  'euc-kr' => 1, // Korean
94  'shift_jis' => 1
95  ];
96 
97  /********************************************
98  *
99  * Charset Conversion functions
100  *
101  ********************************************/
110  public function ‪conv($inputString, $fromCharset, $toCharset)
111  {
112  if ($fromCharset === $toCharset) {
113  return $inputString;
114  }
115  // PHP-libs don't support fallback to SGML entities, but UTF-8 handles everything
116  if ($toCharset === 'utf-8') {
117  // Returns FALSE for unsupported charsets
118  $convertedString = mb_convert_encoding($inputString, $toCharset, $fromCharset);
119  if (false !== $convertedString) {
120  return $convertedString;
121  }
122  }
123  if ($fromCharset !== 'utf-8') {
124  $inputString = $this->‪utf8_encode($inputString, $fromCharset);
125  }
126  if ($toCharset !== 'utf-8') {
127  $inputString = $this->‪utf8_decode($inputString, $toCharset, true);
128  }
129  return $inputString;
130  }
131 
139  public function ‪utf8_encode($str, $charset)
140  {
141  if ($charset === 'utf-8') {
142  return $str;
143  }
144  // Charset is case-insensitive
145  // Parse conv. table if not already
146  if ($this->‪initCharset($charset)) {
147  $strLen = strlen($str);
148  $outStr = '';
149  // Traverse each char in string
150  for ($a = 0; $a < $strLen; $a++) {
151  $chr = substr($str, $a, 1);
152  $ord = ord($chr);
153  // If the charset has two bytes per char
154  if (isset($this->twoByteSets[$charset])) {
155  // TYPO3 cannot convert from ucs-2 as the according conversion table is not present
156  $ord2 = ord($str[$a + 1]);
157  // Assume big endian
158  $ord = $ord << 8 | $ord2;
159  // If the local char-number was found in parsed conv. table then we use that, otherwise 127 (no char?)
160  if (isset($this->parsedCharsets[$charset]['local'][$ord])) {
161  $outStr .= $this->parsedCharsets[$charset]['local'][$ord];
162  } else {
163  $outStr .= chr($this->noCharByteVal);
164  }
165  // No char exists
166  $a++;
167  } elseif ($ord > 127) {
168  // If char has value over 127 it's a multibyte char in UTF-8
169  // EUC uses two-bytes above 127; we get both and advance pointer and make $ord a 16bit int.
170  if (isset($this->eucBasedSets[$charset])) {
171  // Shift-JIS: chars between 160 and 223 are single byte
172  if ($charset !== 'shift_jis' || ($ord < 160 || $ord > 223)) {
173  $a++;
174  $ord2 = ord(substr($str, $a, 1));
175  $ord = $ord * 256 + $ord2;
176  }
177  }
178  if (isset($this->parsedCharsets[$charset]['local'][$ord])) {
179  // If the local char-number was found in parsed conv. table then we use that, otherwise 127 (no char?)
180  $outStr .= $this->parsedCharsets[$charset]['local'][$ord];
181  } else {
182  $outStr .= chr($this->noCharByteVal);
183  }
184  } else {
185  $outStr .= $chr;
186  }
187  }
188  return $outStr;
189  }
190  return '';
191  }
192 
201  public function ‪utf8_decode($str, $charset, $useEntityForNoChar = false)
202  {
203  if ($charset === 'utf-8') {
204  return $str;
205  }
206  // Charset is case-insensitive.
207  // Parse conv. table if not already
208  if ($this->‪initCharset($charset)) {
209  $strLen = strlen($str);
210  $outStr = '';
211  // Traverse each char in UTF-8 string
212  for ($a = 0, $i = 0; $a < $strLen; $a++, $i++) {
213  $chr = substr($str, $a, 1);
214  $ord = ord($chr);
215  // This means multibyte! (first byte!)
216  if ($ord > 127) {
217  // Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence.
218  if ($ord & 64) {
219  // Add first byte
220  $buf = $chr;
221  // For each byte in multibyte string
222  for ($b = 0; $b < 8; $b++) {
223  // Shift it left and
224  $ord = $ord << 1;
225  // ... and with 8th bit - if that is set, then there are still bytes in sequence.
226  if ($ord & 128) {
227  $a++;
228  // ... and add the next char.
229  $buf .= substr($str, $a, 1);
230  } else {
231  break;
232  }
233  }
234  // If the UTF-8 char-sequence is found then...
235  if (isset($this->parsedCharsets[$charset]['utf8'][$buf])) {
236  // The local number
237  $mByte = $this->parsedCharsets[$charset]['utf8'][$buf];
238  // If the local number is greater than 255 we will need to split the byte (16bit word assumed) in two chars.
239  if ($mByte > 255) {
240  $outStr .= chr($mByte >> 8 & 255) . chr($mByte & 255);
241  } else {
242  $outStr .= chr($mByte);
243  }
244  } elseif ($useEntityForNoChar) {
245  // Create num entity:
246  $outStr .= '&#' . $this->‪utf8CharToUnumber($buf, true) . ';';
247  } else {
248  $outStr .= chr($this->noCharByteVal);
249  }
250  } else {
251  $outStr .= chr($this->noCharByteVal);
252  }
253  } else {
254  $outStr .= $chr;
255  }
256  }
257  return $outStr;
258  }
259  return '';
260  }
261 
270  public function ‪utf8_to_numberarray($str)
271  {
272  // Entities must be registered as well
273  $str = html_entity_decode($str, ENT_COMPAT, 'utf-8');
274 
275  // Do conversion:
276  $strLen = strlen($str);
277  $outArr = [];
278  // Traverse each char in UTF-8 string.
279  for ($a = 0; $a < $strLen; $a++) {
280  $chr = substr($str, $a, 1);
281  $ord = ord($chr);
282  // This means multibyte! (first byte!)
283  if ($ord > 127) {
284  // Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence.
285  if ($ord & 64) {
286  // Add first byte
287  $buf = $chr;
288  // For each byte in multibyte string...
289  for ($b = 0; $b < 8; $b++) {
290  // Shift it left and ...
291  $ord = $ord << 1;
292  // ... and with 8th bit - if that is set, then there are still bytes in sequence.
293  if ($ord & 128) {
294  $a++;
295  // ... and add the next char.
296  $buf .= substr($str, $a, 1);
297  } else {
298  break;
299  }
300  }
301  $outArr[] = $buf;
302  } else {
303  $outArr[] = chr($this->noCharByteVal);
304  }
305  } else {
306  $outArr[] = chr($ord);
307  }
308  }
309  return $outArr;
310  }
311 
332  public function ‪UnumberToChar($unicodeInteger)
333  {
334  $str = '';
335  if ($unicodeInteger < 128) {
336  $str .= chr($unicodeInteger);
337  } elseif ($unicodeInteger < 2048) {
338  $str .= chr(192 | $unicodeInteger >> 6);
339  $str .= chr(128 | $unicodeInteger & 63);
340  } elseif ($unicodeInteger < 65536) {
341  $str .= chr(224 | $unicodeInteger >> 12);
342  $str .= chr(128 | $unicodeInteger >> 6 & 63);
343  $str .= chr(128 | $unicodeInteger & 63);
344  } elseif ($unicodeInteger < 2097152) {
345  $str .= chr(240 | $unicodeInteger >> 18);
346  $str .= chr(128 | $unicodeInteger >> 12 & 63);
347  $str .= chr(128 | $unicodeInteger >> 6 & 63);
348  $str .= chr(128 | $unicodeInteger & 63);
349  } elseif ($unicodeInteger < 67108864) {
350  $str .= chr(248 | $unicodeInteger >> 24);
351  $str .= chr(128 | $unicodeInteger >> 18 & 63);
352  $str .= chr(128 | $unicodeInteger >> 12 & 63);
353  $str .= chr(128 | $unicodeInteger >> 6 & 63);
354  $str .= chr(128 | $unicodeInteger & 63);
355  } elseif ($unicodeInteger < 2147483648) {
356  $str .= chr(252 | $unicodeInteger >> 30);
357  $str .= chr(128 | $unicodeInteger >> 24 & 63);
358  $str .= chr(128 | $unicodeInteger >> 18 & 63);
359  $str .= chr(128 | $unicodeInteger >> 12 & 63);
360  $str .= chr(128 | $unicodeInteger >> 6 & 63);
361  $str .= chr(128 | $unicodeInteger & 63);
362  } else {
363  // Cannot express a 32-bit character in UTF-8
364  $str .= chr($this->noCharByteVal);
365  }
366  return $str;
367  }
368 
378  public function ‪utf8CharToUnumber($str, $hex = false)
379  {
380  // First char
381  $ord = ord($str[0]);
382  // This verifies that it IS a multi byte string
383  if (($ord & 192) === 192) {
384  $binBuf = '';
385  $b = 0;
386  // For each byte in multibyte string...
387  for (; $b < 8; $b++) {
388  // Shift it left and ...
389  $ord = $ord << 1;
390  // ... and with 8th bit - if that is set, then there are still bytes in sequence.
391  if ($ord & 128) {
392  $binBuf .= substr('00000000' . decbin(ord(substr($str, $b + 1, 1))), -6);
393  } else {
394  break;
395  }
396  }
397  $binBuf = substr('00000000' . decbin(ord($str[0])), -(6 - $b)) . $binBuf;
398  $int = bindec($binBuf);
399  } else {
400  $int = $ord;
401  }
402  return $hex ? 'x' . dechex($int) : $int;
403  }
404 
405  /********************************************
406  *
407  * Init functions
408  *
409  ********************************************/
420  protected function ‪initCharset($charset)
421  {
422  // Only process if the charset is not yet loaded:
423  if (empty($this->parsedCharsets[$charset])) {
424  // Conversion table filename:
425  $charsetConvTableFile = ‪ExtensionManagementUtility::extPath('core') . 'Resources/Private/Charsets/csconvtbl/' . $charset . '.tbl';
426  // If the conversion table is found:
427  if ($charset && GeneralUtility::validPathStr($charsetConvTableFile) && @is_file($charsetConvTableFile)) {
428  // Cache file for charsets:
429  // Caching brought parsing time for gb2312 down from 2400 ms to 150 ms. For other charsets we are talking 11 ms down to zero.
430  $cacheFile = ‪Environment::getVarPath() . '/charset/charset_' . $charset . '.tbl';
431  if ($cacheFile && @is_file($cacheFile)) {
432  $this->parsedCharsets[$charset] = unserialize((string)file_get_contents($cacheFile), ['allowed_classes' => false]);
433  } else {
434  // Parse conversion table into lines:
435  $lines = ‪GeneralUtility::trimExplode(LF, (string)file_get_contents($charsetConvTableFile), true);
436  // Initialize the internal variable holding the conv. table:
437  $this->parsedCharsets[$charset] = ['local' => [], 'utf8' => []];
438  // traverse the lines:
439  $detectedType = '';
440  foreach ($lines as $value) {
441  // Comment line or blanks are ignored.
442  if (trim($value) && $value[0] !== '#') {
443  // Detect type if not done yet: (Done on first real line)
444  // The "whitespaced" type is on the syntax "0x0A 0x000A #LINE FEED" while "ms-token" is like "B9 = U+00B9 : SUPERSCRIPT ONE"
445  if (!$detectedType) {
446  $detectedType = preg_match('/[[:space:]]*0x([[:xdigit:]]*)[[:space:]]+0x([[:xdigit:]]*)[[:space:]]+/', $value) ? 'whitespaced' : 'ms-token';
447  }
448  $hexbyte = '';
449  $utf8 = '';
450  if ($detectedType === 'ms-token') {
451  [$hexbyte, $utf8] = preg_split('/[=:]/', $value, 3);
452  } elseif ($detectedType === 'whitespaced') {
453  $regA = [];
454  preg_match('/[[:space:]]*0x([[:xdigit:]]*)[[:space:]]+0x([[:xdigit:]]*)[[:space:]]+/', $value, $regA);
455  if (empty($regA)) {
456  // No match => skip this item
457  continue;
458  }
459  $hexbyte = $regA[1];
460  $utf8 = 'U+' . $regA[2];
461  }
462  $decval = hexdec(trim($hexbyte));
463  if ($decval > 127) {
464  $utf8decval = hexdec(substr(trim($utf8), 2));
465  $this->parsedCharsets[$charset]['local'][$decval] = $this->‪UnumberToChar((int)$utf8decval);
466  $this->parsedCharsets[$charset]['utf8'][$this->parsedCharsets[$charset]['local'][$decval]] = $decval;
467  }
468  }
469  }
470  if ($cacheFile) {
471  ‪GeneralUtility::writeFileToTypo3tempDir($cacheFile, serialize($this->parsedCharsets[$charset]));
472  }
473  }
474  return 2;
475  }
476  throw new ‪UnknownCharsetException(sprintf('Unknown charset "%s"', $charset), 1508916031);
477  }
478  return 1;
479  }
480 
488  protected function ‪initUnicodeData()
489  {
490  // Cache file
491  $cacheFileASCII = ‪Environment::getVarPath() . '/charset/csascii_utf-8.tbl';
492  // Only process if the tables are not yet loaded
493  if (isset($this->toASCII['utf-8']) && is_array($this->toASCII['utf-8'])) {
494  return 1;
495  }
496  // Use cached version if possible
497  if ($cacheFileASCII && @is_file($cacheFileASCII)) {
498  $this->toASCII['utf-8'] = unserialize((string)file_get_contents($cacheFileASCII), ['allowed_classes' => false]);
499  return 2;
500  }
501  // Process main Unicode data file
502  $unicodeDataFile = ‪ExtensionManagementUtility::extPath('core') . 'Resources/Private/Charsets/unidata/UnicodeData.txt';
503  if (!(GeneralUtility::validPathStr($unicodeDataFile) && @is_file($unicodeDataFile))) {
504  return false;
505  }
506  $fh = fopen($unicodeDataFile, 'rb');
507  if (!$fh) {
508  return false;
509  }
510  // Array of temp. decompositions
511  $decomposition = [];
512  // Array of chars that are marks (eg. composing accents)
513  $mark = [];
514  // Array of chars that are numbers (eg. digits)
515  $number = [];
516  // Array of chars to be omitted (eg. Russian hard sign)
517  $omit = [];
518  while (!feof($fh)) {
519  $line = (string)fgets($fh, 4096);
520  // Has a lot of info
521  [$char, $name, $cat, , , $decomp, , , $num] = explode(';', rtrim($line));
522  $ord = hexdec($char);
523  if ($ord > 65535) {
524  // Only process the BMP
525  break;
526  }
527  switch ($cat[0]) {
528  case 'M':
529  // mark (accent, umlaut, ...)
530  $mark['U+' . $char] = 1;
531  break;
532  case 'N':
533  // numeric value
534  if ($ord > 128 && $num !== '') {
535  $number['U+' . $char] = $num;
536  }
537  }
538  // Accented Latin letters without "official" decomposition
539  $match = [];
540  if (preg_match('/^LATIN (SMALL|CAPITAL) LETTER ([A-Z]) WITH/', $name, $match) && !$decomp) {
541  $c = ord($match[2]);
542  if ($match[1] === 'SMALL') {
543  $c += 32;
544  }
545  $decomposition['U+' . $char] = [dechex($c)];
546  continue;
547  }
548  $match = [];
549  if (preg_match('/(<.*>)? *(.+)/', $decomp, $match)) {
550  switch ($match[1]) {
551  case '<circle>':
552  // add parenthesis as circle replacement, eg (1)
553  $match[2] = '0028 ' . $match[2] . ' 0029';
554  break;
555  case '<square>':
556  // add square brackets as square replacement, eg [1]
557  $match[2] = '005B ' . $match[2] . ' 005D';
558  break;
559  case '<compat>':
560  // ignore multi char decompositions that start with a space
561  if (preg_match('/^0020 /', $match[2])) {
562  continue 2;
563  }
564  break;
565  case '<initial>':
566  case '<medial>':
567  case '<final>':
568  case '<isolated>':
569  case '<vertical>':
570  continue 2;
571  }
572  $decomposition['U+' . $char] = explode(' ', $match[2]);
573  }
574  }
575  fclose($fh);
576  // Process custom decompositions
577  $customTranslitFile = ‪ExtensionManagementUtility::extPath('core') . 'Resources/Private/Charsets/unidata/Translit.txt';
578  if (GeneralUtility::validPathStr($customTranslitFile) && @is_file($customTranslitFile)) {
579  $fh = fopen($customTranslitFile, 'rb');
580  if ($fh) {
581  while (!feof($fh)) {
582  $line = fgets($fh, 4096);
583  if ($line === false) {
584  continue;
585  }
586  if ($line[0] !== '#' && trim($line) !== '') {
587  [$char, $translit] = ‪GeneralUtility::trimExplode(';', $line);
588  if (!$translit) {
589  $omit['U+' . $char] = 1;
590  }
591  $decomposition['U+' . $char] = explode(' ', $translit);
592  }
593  }
594  fclose($fh);
595  }
596  }
597  // Decompose and remove marks; inspired by unac (Loic Dachary <loic@senga.org>)
598  foreach ($decomposition as $from => $to) {
599  $code_decomp = [];
600  while ($code_value = array_shift($to)) {
601  // Do recursive decomposition
602  if (isset($decomposition['U+' . $code_value])) {
603  foreach (array_reverse($decomposition['U+' . $code_value]) as $cv) {
604  array_unshift($to, $cv);
605  }
606  } elseif (!isset($mark['U+' . $code_value])) {
607  // remove mark
608  $code_decomp[] = $code_value;
609  }
610  }
611  if (!empty($code_decomp) || isset($omit[$from])) {
612  $decomposition[$from] = $code_decomp;
613  } else {
614  unset($decomposition[$from]);
615  }
616  }
617  // Create ascii only mapping
618  $this->toASCII['utf-8'] = [];
619  foreach ($decomposition as $from => $to) {
620  $code_decomp = [];
621  while ($code_value = array_shift($to)) {
622  $ord = (int)hexdec($code_value);
623  if ($ord > 127) {
624  continue 2;
625  }
626  // Skip decompositions containing non-ASCII chars
627  $code_decomp[] = chr($ord);
628  }
629  $this->toASCII['utf-8'][$this->‪UnumberToChar((int)hexdec(substr($from, 2)))] = implode('', $code_decomp);
630  }
631  // Add numeric decompositions
632  foreach ($number as $from => $to) {
633  $utf8_char = $this->‪UnumberToChar((int)hexdec(substr($from, 2)));
634  if (!isset($this->toASCII['utf-8'][$utf8_char])) {
635  $this->toASCII['utf-8'][$utf8_char] = $to;
636  }
637  }
638  if ($cacheFileASCII) {
639  ‪GeneralUtility::writeFileToTypo3tempDir($cacheFileASCII, serialize($this->toASCII['utf-8']));
640  }
641  return 3;
642  }
643 
651  protected function ‪initToASCII($charset)
652  {
653  // Only process if the case table is not yet loaded:
654  if (isset($this->toASCII[$charset]) && is_array($this->toASCII[$charset])) {
655  return 1;
656  }
657  // Use cached version if possible
658  $cacheFile = ‪Environment::getVarPath() . '/charset/csascii_' . $charset . '.tbl';
659  if ($cacheFile && @is_file($cacheFile)) {
660  $this->toASCII[$charset] = unserialize((string)file_get_contents($cacheFile), ['allowed_classes' => false]);
661  return 2;
662  }
663  // Init UTF-8 conversion for this charset
664  if (!$this->‪initCharset($charset)) {
665  return false;
666  }
667  // UTF-8/ASCII transliteration is used as the base conversion table
668  if (!$this->‪initUnicodeData()) {
669  return false;
670  }
671  foreach ($this->parsedCharsets[$charset]['local'] as $ci => $utf8) {
672  // Reconvert to charset (don't use chr() of numeric value, might be muli-byte)
673  $c = $this->‪utf8_decode($utf8, $charset);
674  if (isset($this->toASCII['utf-8'][$utf8])) {
675  $this->toASCII[$charset][$c] = $this->toASCII['utf-8'][$utf8];
676  }
677  }
678  if ($cacheFile) {
679  ‪GeneralUtility::writeFileToTypo3tempDir($cacheFile, serialize($this->toASCII[$charset]));
680  }
681  return 3;
682  }
683 
684  /********************************************
685  *
686  * String operation functions
687  *
688  ********************************************/
689 
697  public function ‪specCharsToASCII($charset, $string)
698  {
699  if ($charset === 'utf-8') {
700  $string = $this->‪utf8_char_mapping($string);
701  } elseif (isset($this->eucBasedSets[$charset])) {
702  $string = $this->‪euc_char_mapping($string, $charset);
703  } else {
704  // Treat everything else as single-byte encoding
705  $string = $this->‪sb_char_mapping($string, $charset);
706  }
707  return $string;
708  }
709 
710  /********************************************
711  *
712  * Internal string operation functions
713  *
714  ********************************************/
722  public function ‪sb_char_mapping($str, $charset)
723  {
724  if (!$this->‪initToASCII($charset)) {
725  return $str;
726  }
727  // Do nothing
728  $map = &$this->toASCII[$charset];
729  $out = '';
730  for ($i = 0; isset($str[$i]); $i++) {
731  $c = $str[$i];
732  if (isset($map[$c])) {
733  $out .= $map[$c];
734  } else {
735  $out .= $c;
736  }
737  }
738  return $out;
739  }
740 
741  /********************************************
742  *
743  * Internal UTF-8 string operation functions
744  *
745  ********************************************/
746 
753  public function ‪utf8_char_mapping($str)
754  {
755  if (!$this->‪initUnicodeData()) {
756  // Do nothing
757  return $str;
758  }
759  $out = '';
760  $map = &$this->toASCII['utf-8'];
761  for ($i = 0; isset($str[$i]); $i++) {
762  $c = ord($str[$i]);
763  $mbc = '';
764  // single-byte (0xxxxxx)
765  if (!($c & 128)) {
766  $mbc = $str[$i];
767  } elseif (($c & 192) === 192) {
768  $bc = 0;
769  // multi-byte starting byte (11xxxxxx)
770  for (; $c & 128; $c = $c << 1) {
771  $bc++;
772  }
773  // calculate number of bytes
774  $mbc = substr($str, $i, $bc);
775  $i += $bc - 1;
776  }
777  if (isset($map[$mbc])) {
778  $out .= $map[$mbc];
779  } else {
780  $out .= $mbc;
781  }
782  }
783  return $out;
784  }
785 
786  /********************************************
787  *
788  * Internal EUC string operation functions
789  *
790  * Extended Unix Code:
791  * ASCII compatible 7bit single bytes chars
792  * 8bit two byte chars
793  *
794  * Shift-JIS is treated as a special case.
795  *
796  ********************************************/
797 
805  public function ‪euc_char_mapping($str, $charset)
806  {
807  if (!$this->‪initToASCII($charset)) {
808  return $str;
809  }
810  // do nothing
811  $map = &$this->toASCII[$charset];
812  $out = '';
813  for ($i = 0; isset($str[$i]); $i++) {
814  $mbc = $str[$i];
815  $c = ord($mbc);
816  if ($charset === 'shift_jis') {
817  // A double-byte char
818  if ($c >= 128 && $c < 160 || $c >= 224) {
819  $mbc = substr($str, $i, 2);
820  $i++;
821  }
822  } else {
823  // A double-byte char
824  if ($c >= 128) {
825  $mbc = substr($str, $i, 2);
826  $i++;
827  }
828  }
829  if (isset($map[$mbc])) {
830  $out .= $map[$mbc];
831  } else {
832  $out .= $mbc;
833  }
834  }
835  return $out;
836  }
837 }
‪TYPO3\CMS\Core\Charset\CharsetConverter\euc_char_mapping
‪string euc_char_mapping($str, $charset)
Definition: CharsetConverter.php:800
‪TYPO3\CMS\Core\Charset\CharsetConverter\utf8_encode
‪string utf8_encode($str, $charset)
Definition: CharsetConverter.php:134
‪TYPO3\CMS\Core\Charset\CharsetConverter\$noCharByteVal
‪int $noCharByteVal
Definition: CharsetConverter.php:59
‪TYPO3\CMS\Core\Charset\CharsetConverter\UnumberToChar
‪string UnumberToChar($unicodeInteger)
Definition: CharsetConverter.php:327
‪TYPO3\CMS\Core\Charset\CharsetConverter\$parsedCharsets
‪array $parsedCharsets
Definition: CharsetConverter.php:65
‪TYPO3\CMS\Core\Charset\CharsetConverter\$twoByteSets
‪array $twoByteSets
Definition: CharsetConverter.php:77
‪TYPO3\CMS\Core\Charset\CharsetConverter\utf8CharToUnumber
‪int utf8CharToUnumber($str, $hex=false)
Definition: CharsetConverter.php:373
‪TYPO3\CMS\Core\Charset\CharsetConverter\utf8_to_numberarray
‪array utf8_to_numberarray($str)
Definition: CharsetConverter.php:265
‪TYPO3\CMS\Core\Charset\CharsetConverter\conv
‪string conv($inputString, $fromCharset, $toCharset)
Definition: CharsetConverter.php:105
‪TYPO3\CMS\Core\Charset\CharsetConverter
Definition: CharsetConverter.php:54
‪TYPO3\CMS\Core\Charset\CharsetConverter\initCharset
‪int initCharset($charset)
Definition: CharsetConverter.php:415
‪TYPO3\CMS\Core\Utility\ExtensionManagementUtility
Definition: ExtensionManagementUtility.php:43
‪TYPO3\CMS\Core\Charset
Definition: CharsetConverter.php:16
‪TYPO3\CMS\Core\Charset\CharsetConverter\initUnicodeData
‪int initUnicodeData()
Definition: CharsetConverter.php:483
‪TYPO3\CMS\Core\Charset\CharsetConverter\utf8_decode
‪string utf8_decode($str, $charset, $useEntityForNoChar=false)
Definition: CharsetConverter.php:196
‪TYPO3\CMS\Core\Utility\GeneralUtility\writeFileToTypo3tempDir
‪static string writeFileToTypo3tempDir($filepath, $content)
Definition: GeneralUtility.php:1928
‪TYPO3\CMS\Core\Charset\CharsetConverter\$eucBasedSets
‪array $eucBasedSets
Definition: CharsetConverter.php:85
‪TYPO3\CMS\Core\Charset\UnknownCharsetException
Definition: UnknownCharsetException.php:24
‪TYPO3\CMS\Core\Charset\CharsetConverter\specCharsToASCII
‪string specCharsToASCII($charset, $string)
Definition: CharsetConverter.php:692
‪TYPO3\CMS\Core\Utility\GeneralUtility\trimExplode
‪static string[] trimExplode($delim, $string, $removeEmptyValues=false, $limit=0)
Definition: GeneralUtility.php:1059
‪TYPO3\CMS\Core\Charset\CharsetConverter\$toASCII
‪array $toASCII
Definition: CharsetConverter.php:71
‪TYPO3\CMS\Core\SingletonInterface
Definition: SingletonInterface.php:23
‪TYPO3\CMS\Core\Charset\CharsetConverter\utf8_char_mapping
‪string utf8_char_mapping($str)
Definition: CharsetConverter.php:748
‪TYPO3\CMS\Core\Core\Environment
Definition: Environment.php:40
‪TYPO3\CMS\Core\Charset\CharsetConverter\initToASCII
‪int initToASCII($charset)
Definition: CharsetConverter.php:646
‪TYPO3\CMS\Core\Utility\ExtensionManagementUtility\extPath
‪static string extPath($key, $script='')
Definition: ExtensionManagementUtility.php:127
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:46
‪TYPO3\CMS\Core\Charset\CharsetConverter\sb_char_mapping
‪string sb_char_mapping($str, $charset)
Definition: CharsetConverter.php:717
‪TYPO3\CMS\Core\Core\Environment\getVarPath
‪static string getVarPath()
Definition: Environment.php:192