124 'us-ascii' =>
'ascii',
125 'cp819' =>
'iso-8859-1',
126 'ibm819' =>
'iso-8859-1',
127 'iso-ir-100' =>
'iso-8859-1',
128 'iso-ir-101' =>
'iso-8859-2',
129 'iso-ir-109' =>
'iso-8859-3',
130 'iso-ir-110' =>
'iso-8859-4',
131 'iso-ir-144' =>
'iso-8859-5',
132 'iso-ir-127' =>
'iso-8859-6',
133 'iso-ir-126' =>
'iso-8859-7',
134 'iso-ir-138' =>
'iso-8859-8',
135 'iso-ir-148' =>
'iso-8859-9',
136 'iso-ir-157' =>
'iso-8859-10',
137 'iso-ir-179' =>
'iso-8859-13',
138 'iso-ir-199' =>
'iso-8859-14',
139 'iso-ir-203' =>
'iso-8859-15',
140 'csisolatin1' =>
'iso-8859-1',
141 'csisolatin2' =>
'iso-8859-2',
142 'csisolatin3' =>
'iso-8859-3',
143 'csisolatin5' =>
'iso-8859-9',
144 'csisolatin8' =>
'iso-8859-14',
145 'csisolatin9' =>
'iso-8859-15',
146 'csisolatingreek' =>
'iso-8859-7',
147 'iso-celtic' =>
'iso-8859-14',
148 'latin1' =>
'iso-8859-1',
149 'latin2' =>
'iso-8859-2',
150 'latin3' =>
'iso-8859-3',
151 'latin5' =>
'iso-8859-9',
152 'latin6' =>
'iso-8859-10',
153 'latin8' =>
'iso-8859-14',
154 'latin9' =>
'iso-8859-15',
155 'l1' =>
'iso-8859-1',
156 'l2' =>
'iso-8859-2',
157 'l3' =>
'iso-8859-3',
158 'l5' =>
'iso-8859-9',
159 'l6' =>
'iso-8859-10',
160 'l8' =>
'iso-8859-14',
161 'l9' =>
'iso-8859-15',
162 'cyrillic' =>
'iso-8859-5',
163 'arabic' =>
'iso-8859-6',
164 'tis-620' =>
'iso-8859-11',
165 'win874' =>
'windows-874',
166 'win1250' =>
'windows-1250',
167 'win1251' =>
'windows-1251',
168 'win1252' =>
'windows-1252',
169 'win1253' =>
'windows-1253',
170 'win1254' =>
'windows-1254',
171 'win1255' =>
'windows-1255',
172 'win1256' =>
'windows-1256',
173 'win1257' =>
'windows-1257',
174 'win1258' =>
'windows-1258',
175 'cp1250' =>
'windows-1250',
176 'cp1251' =>
'windows-1251',
177 'cp1252' =>
'windows-1252',
178 'ms-ee' =>
'windows-1250',
179 'ms-ansi' =>
'windows-1252',
180 'ms-greek' =>
'windows-1253',
181 'ms-turk' =>
'windows-1254',
182 'winbaltrim' =>
'windows-1257',
183 'koi-8ru' =>
'koi-8r',
187 'macintosh' =>
'macroman',
188 'euc-cn' =>
'gb2312',
189 'x-euc-cn' =>
'gb2312',
195 'sjis' =>
'shift_jis',
196 'shift-jis' =>
'shift_jis',
197 'cp932' =>
'shift_jis',
214 'af' =>
'west_european',
219 'bs' =>
'east_european',
221 'cs' =>
'east_european',
223 'da' =>
'west_european',
225 'de' =>
'west_european',
227 'es' =>
'west_european',
232 'eu' =>
'west_european',
236 'fi' =>
'west_european',
238 'fo' =>
'west_european',
240 'fr' =>
'west_european',
242 'ga' =>
'west_european',
244 'gl' =>
'west_european',
251 'hr' =>
'east_european',
253 'hu' =>
'east_european',
257 'is' =>
'west_european',
259 'it' =>
'west_european',
264 'kl' =>
'west_european',
269 'lt' =>
'lithuanian',
270 'lv' =>
'west_european',
272 'nl' =>
'west_european',
274 'no' =>
'west_european',
276 'nb' =>
'west_european',
278 'nn' =>
'west_european',
280 'pl' =>
'east_european',
282 'pt' =>
'west_european',
284 'ro' =>
'east_european',
288 'sk' =>
'east_european',
290 'sl' =>
'east_european',
294 'sv' =>
'west_european',
301 'vi' =>
'vietnamese',
305 'afk' =>
'west_european',
310 'cat' =>
'west_european',
312 'chs' =>
'simpl_chinese',
313 'cht' =>
'trad_chinese',
314 'csy' =>
'east_european',
316 'dan' =>
'west_european',
318 'deu' =>
'west_european',
320 'dea' =>
'west_european',
322 'des' =>
'west_european',
324 'ena' =>
'west_european',
326 'enc' =>
'west_european',
328 'eng' =>
'west_european',
330 'enz' =>
'west_european',
332 'enu' =>
'west_european',
334 'euq' =>
'west_european',
336 'fos' =>
'west_european',
340 'fin' =>
'west_european',
342 'fra' =>
'west_european',
344 'frb' =>
'west_european',
346 'frc' =>
'west_european',
348 'frs' =>
'west_european',
352 'glg' =>
'west_european',
358 'hun' =>
'east_european',
360 'isl' =>
'west_european',
362 'ita' =>
'west_european',
364 'its' =>
'west_european',
370 'lth' =>
'lithuanian',
371 'lvi' =>
'west_european',
373 'msl' =>
'west_european',
375 'nlb' =>
'west_european',
377 'nld' =>
'west_european',
379 'nor' =>
'west_european',
381 'non' =>
'west_european',
383 'plk' =>
'east_european',
385 'ptg' =>
'west_european',
387 'ptb' =>
'west_european',
389 'rom' =>
'east_european',
393 'slv' =>
'east_european',
395 'sky' =>
'east_european',
397 'srl' =>
'east_european',
401 'esp' =>
'west_european',
403 'esm' =>
'west_european',
405 'esn' =>
'west_european',
407 'sve' =>
'west_european',
416 'afrikaans' =>
'west_european',
417 'albanian' =>
'albanian',
418 'arabic' =>
'arabic',
419 'basque' =>
'west_european',
420 'bosnian' =>
'east_european',
421 'bulgarian' =>
'east_european',
422 'catalan' =>
'west_european',
423 'croatian' =>
'east_european',
424 'czech' =>
'east_european',
425 'danish' =>
'west_european',
426 'dutch' =>
'west_european',
427 'english' =>
'west_european',
428 'esperanto' =>
'unicode',
429 'estonian' =>
'estonian',
430 'faroese' =>
'west_european',
432 'finnish' =>
'west_european',
433 'french' =>
'west_european',
434 'galician' =>
'west_european',
435 'georgian' =>
'unicode',
436 'german' =>
'west_european',
438 'greenlandic' =>
'west_european',
439 'hebrew' =>
'hebrew',
440 'hindi' =>
'unicode',
441 'hungarian' =>
'east_european',
442 'icelandic' =>
'west_european',
443 'italian' =>
'west_european',
444 'khmer' =>
'unicode',
445 'latvian' =>
'west_european',
446 'lettish' =>
'west_european',
447 'lithuanian' =>
'lithuanian',
448 'malay' =>
'west_european',
449 'norwegian' =>
'west_european',
450 'persian' =>
'arabic',
451 'polish' =>
'east_european',
452 'portuguese' =>
'west_european',
453 'russian' =>
'cyrillic',
454 'romanian' =>
'east_european',
455 'serbian' =>
'cyrillic',
456 'slovak' =>
'east_european',
457 'slovenian' =>
'east_european',
458 'spanish' =>
'west_european',
459 'svedish' =>
'west_european',
461 'turkish' =>
'turkish',
462 'ukrainian' =>
'cyrillic' 470 'west_european' =>
'iso-8859-1',
471 'estonian' =>
'iso-8859-1',
472 'east_european' =>
'iso-8859-2',
473 'baltic' =>
'iso-8859-4',
474 'cyrillic' =>
'iso-8859-5',
475 'arabic' =>
'iso-8859-6',
476 'greek' =>
'iso-8859-7',
477 'hebrew' =>
'iso-8859-8',
478 'turkish' =>
'iso-8859-9',
479 'thai' =>
'iso-8859-11',
481 'lithuanian' =>
'iso-8859-13',
482 'chinese' =>
'gb2312',
484 'japanese' =>
'euc-jp',
485 'korean' =>
'euc-kr',
486 'simpl_chinese' =>
'gb2312',
487 'trad_chinese' =>
'big5',
489 'unicode' =>
'utf-8',
490 'albanian' =>
'utf-8' 498 'east_european' =>
'windows-1250',
499 'cyrillic' =>
'windows-1251',
500 'west_european' =>
'windows-1252',
501 'greek' =>
'windows-1253',
502 'turkish' =>
'windows-1254',
503 'hebrew' =>
'windows-1255',
504 'arabic' =>
'windows-1256',
505 'baltic' =>
'windows-1257',
506 'estonian' =>
'windows-1257',
507 'lithuanian' =>
'windows-1257',
508 'vietnamese' =>
'windows-1258',
511 'chinese' =>
'gb2312',
512 'japanese' =>
'shift_jis',
513 'simpl_chinese' =>
'gb2312',
514 'trad_chinese' =>
'big5',
515 'albanian' =>
'windows-1250',
524 'japanese.euc' =>
'euc-jp',
525 'ja_jp.ujis' =>
'euc-jp',
526 'korean.euc' =>
'euc-kr',
527 'sr@Latn' =>
'iso-8859-2',
540 'ar' =>
'iso-8859-6',
541 'ba' =>
'iso-8859-2',
542 'bg' =>
'windows-1251',
544 'ca' =>
'iso-8859-15',
546 'cs' =>
'windows-1250',
547 'cz' =>
'windows-1250',
551 'el' =>
'iso-8859-7',
554 'et' =>
'iso-8859-4',
564 'gr' =>
'iso-8859-7',
568 'hr' =>
'windows-1250',
569 'hu' =>
'iso-8859-2',
579 'lt' =>
'windows-1257',
585 'pl' =>
'iso-8859-2',
589 'ro' =>
'iso-8859-2',
590 'ru' =>
'windows-1251',
592 'si' =>
'windows-1250',
593 'sk' =>
'windows-1250',
594 'sl' =>
'windows-1250',
598 'th' =>
'iso-8859-11',
599 'tr' =>
'iso-8859-9',
600 'ua' =>
'windows-1251',
601 'uk' =>
'windows-1251',
622 $charset = trim(strtolower($charset));
623 if (isset($this->synonyms[$charset])) {
624 $charset = $this->synonyms[$charset];
642 $locale = strtolower($locale);
644 if (isset($this->locale_to_charset[$locale])) {
645 return $this->locale_to_charset[$locale];
648 list($locale, $modifier) = explode(
'@', $locale);
650 list($locale, $charset) = explode(
'.', $locale);
655 if ($modifier ==
'euro') {
656 return 'iso-8859-15';
659 list($language, $country) = explode(
'_', $locale);
660 if (isset($this->lang_to_script[$language])) {
661 $script = $this->lang_to_script[$language];
663 if (TYPO3_OS ==
'WIN') {
664 $cs = $this->script_to_charset_windows[$script] ?:
'windows-1252';
666 $cs = $this->script_to_charset_unix[$script] ?:
'utf-8';
687 public function conv($str, $fromCS, $toCS, $useEntityForNoChar = 0) {
688 if ($fromCS == $toCS) {
692 if ($toCS ==
'utf-8' || !$useEntityForNoChar) {
693 switch (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_convMethod']) {
695 $conv_str = mb_convert_encoding($str, $toCS, $fromCS);
696 if (FALSE !== $conv_str) {
702 $conv_str = iconv($fromCS, $toCS .
'//TRANSLIT', $str);
703 if (FALSE !== $conv_str) {
708 $conv_str = recode_string($fromCS .
'..' . $toCS, $str);
709 if (FALSE !== $conv_str) {
715 if ($fromCS !=
'utf-8') {
718 if ($toCS !=
'utf-8') {
719 $str = $this->
utf8_decode($str, $toCS, $useEntityForNoChar);
736 public function convArray(&$array, $fromCS, $toCS, $useEntityForNoChar = 0) {
737 foreach ($array as $key => $value) {
738 if (is_array($array[$key])) {
739 $this->
convArray($array[$key], $fromCS, $toCS, $useEntityForNoChar);
740 } elseif (is_string($array[$key])) {
741 $array[$key] = $this->
conv($array[$key], $fromCS, $toCS, $useEntityForNoChar);
755 if ($charset ===
'utf-8') {
764 for ($a = 0; $a < $strLen; $a++) {
765 $chr =
substr($str, $a, 1);
768 if (isset($this->twoByteSets[$charset])) {
769 $ord2 = ord($str[$a + 1]);
771 $ord = $ord << 8 | $ord2;
773 if (isset($this->parsedCharsets[$charset][
'local'][$ord])) {
774 $outStr .= $this->parsedCharsets[$charset][
'local'][$ord];
776 $outStr .= chr($this->noCharByteVal);
780 } elseif ($ord > 127) {
783 if (isset($this->eucBasedSets[$charset])) {
785 if ($charset !=
'shift_jis' || ($ord < 160 || $ord > 223)) {
787 $ord2 = ord(
substr($str, $a, 1));
788 $ord = $ord * 256 + $ord2;
791 if (isset($this->parsedCharsets[$charset][
'local'][$ord])) {
793 $outStr .= $this->parsedCharsets[$charset][
'local'][$ord];
795 $outStr .= chr($this->noCharByteVal);
814 public function utf8_decode($str, $charset, $useEntityForNoChar = 0) {
815 if ($charset ===
'utf-8') {
825 for ($a = 0, $i = 0; $a < $strLen; $a++, $i++) {
826 $chr =
substr($str, $a, 1);
835 for ($b = 0; $b < 8; $b++) {
842 $buf .=
substr($str, $a, 1);
848 if (isset($this->parsedCharsets[$charset][
'utf8'][$buf])) {
850 $mByte = $this->parsedCharsets[$charset][
'utf8'][$buf];
853 $outStr .= chr(($mByte >> 8 & 255)) . chr(($mByte & 255));
855 $outStr .= chr($mByte);
857 } elseif ($useEntityForNoChar) {
861 $outStr .= chr($this->noCharByteVal);
864 $outStr .= chr($this->noCharByteVal);
886 for ($a = 0; $a < $strLen; $a++) {
887 $chr =
substr($str, $a, 1);
896 for ($b = 0; $b < 8; $b++) {
903 $buf .=
substr($str, $a, 1);
910 $outStr .= chr($this->noCharByteVal);
928 if ($alsoStdHtmlEnt) {
929 $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES, ENT_COMPAT,
'UTF-8'));
931 $token = md5(microtime());
932 $parts = explode($token, preg_replace(
'/(&([#[:alnum:]]*);)/', $token .
'${2}' . $token, $str));
933 foreach ($parts as $k => $v) {
940 if (
substr($v, $position, 1) ==
'#') {
942 if (
substr($v, $position, 1) ==
'x') {
943 $v = hexdec(
substr($v, ++$position));
945 $v =
substr($v, $position);
948 } elseif ($alsoStdHtmlEnt && isset($trans_tbl[
'&' . $v .
';'])) {
950 $v = $trans_tbl[
'&' . $v .
';'];
954 $parts[$k] =
'&' . $v .
';';
957 return implode(
'', $parts);
979 for ($a = 0; $a < $strLen; $a++) {
980 $chr =
substr($str, $a, 1);
989 for ($b = 0; $b < 8; $b++) {
996 $buf .=
substr($str, $a, 1);
1006 $outArr[] = $retChar ? chr($ord) : $ord;
1036 $str .= chr($cbyte);
1038 if ($cbyte < 2048) {
1039 $str .= chr(192 | $cbyte >> 6);
1040 $str .= chr(128 | $cbyte & 63);
1042 if ($cbyte < 65536) {
1043 $str .= chr(224 | $cbyte >> 12);
1044 $str .= chr(128 | $cbyte >> 6 & 63);
1045 $str .= chr(128 | $cbyte & 63);
1047 if ($cbyte < 2097152) {
1048 $str .= chr(240 | $cbyte >> 18);
1049 $str .= chr(128 | $cbyte >> 12 & 63);
1050 $str .= chr(128 | $cbyte >> 6 & 63);
1051 $str .= chr(128 | $cbyte & 63);
1053 if ($cbyte < 67108864) {
1054 $str .= chr(248 | $cbyte >> 24);
1055 $str .= chr(128 | $cbyte >> 18 & 63);
1056 $str .= chr(128 | $cbyte >> 12 & 63);
1057 $str .= chr(128 | $cbyte >> 6 & 63);
1058 $str .= chr(128 | $cbyte & 63);
1060 if ($cbyte < 2147483648) {
1061 $str .= chr(252 | $cbyte >> 30);
1062 $str .= chr(128 | $cbyte >> 24 & 63);
1063 $str .= chr(128 | $cbyte >> 18 & 63);
1064 $str .= chr(128 | $cbyte >> 12 & 63);
1065 $str .= chr(128 | $cbyte >> 6 & 63);
1066 $str .= chr(128 | $cbyte & 63);
1069 $str .= chr($this->noCharByteVal);
1091 $ord = ord($str[0]);
1093 if (($ord & 192) == 192) {
1096 for ($b = 0; $b < 8; $b++) {
1101 $binBuf .=
substr(
'00000000' . decbin(ord(
substr($str, ($b + 1), 1))), -6);
1106 $binBuf =
substr((
'00000000' . decbin(ord($str[0]))), -(6 - $b)) . $binBuf;
1107 $int = bindec($binBuf);
1111 return $hex ?
'x' . dechex($int) : $int;
1132 if (!is_array($this->parsedCharsets[$charset])) {
1140 if ($cacheFile && @is_file($cacheFile)) {
1146 $this->parsedCharsets[$charset] = array(
'local' => array(),
'utf8' => array());
1149 foreach ($lines as $value) {
1151 if (trim($value) && $value[0] !==
'#') {
1154 if (!$detectedType) {
1155 $detectedType = preg_match(
'/[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+/', $value) ?
'whitespaced' :
'ms-token';
1157 if ($detectedType ==
'ms-token') {
1158 list($hexbyte, $utf8) = preg_split(
'/[=:]/', $value, 3);
1159 } elseif ($detectedType ==
'whitespaced') {
1161 preg_match(
'/[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+/', $value, $regA);
1162 $hexbyte = $regA[1];
1163 $utf8 =
'U+' . $regA[2];
1165 $decval = hexdec(trim($hexbyte));
1166 if ($decval > 127) {
1167 $utf8decval = hexdec(
substr(trim($utf8), 2));
1168 $this->parsedCharsets[$charset][
'local'][$decval] = $this->
UnumberToChar($utf8decval);
1169 $this->parsedCharsets[$charset][
'utf8'][$this->parsedCharsets[$charset][
'local'][$decval]] = $decval;
1203 if (is_array($this->caseFolding[
'utf-8'])) {
1207 if ($cacheFileCase && @is_file($cacheFileCase)) {
1213 if (is_array($this->toASCII[
'utf-8'])) {
1217 if ($cacheFileASCII && @is_file($cacheFileASCII)) {
1228 $fh = fopen($unicodeDataFile,
'rb');
1234 $this->caseFolding[
'utf-8'] = array();
1235 $utf8CaseFolding = &$this->caseFolding[
'utf-8'];
1237 $utf8CaseFolding[
'toUpper'] = array();
1238 $utf8CaseFolding[
'toLower'] = array();
1239 $utf8CaseFolding[
'toTitle'] = array();
1241 $decomposition = array();
1248 while (!feof($fh)) {
1249 $line = fgets($fh, 4096);
1251 list($char, $name, $cat, , , $decomp, , , $num, , , , $upper, $lower, $title, ) = explode(
';', rtrim($line));
1252 $ord = hexdec($char);
1259 $utf8CaseFolding[
'toUpper'][$utf8_char] = $this->
UnumberToChar(hexdec($upper));
1262 $utf8CaseFolding[
'toLower'][$utf8_char] = $this->
UnumberToChar(hexdec($lower));
1265 if ($title && $title != $upper) {
1266 $utf8CaseFolding[
'toTitle'][$utf8_char] = $this->
UnumberToChar(hexdec($title));
1271 $mark[
'U+' . $char] = 1;
1275 if ($ord > 128 && $num !=
'') {
1276 $number[
'U+' . $char] = $num;
1281 if (preg_match(
'/^LATIN (SMALL|CAPITAL) LETTER ([A-Z]) WITH/', $name, $match) && !$decomp) {
1282 $c = ord($match[2]);
1283 if ($match[1] ==
'SMALL') {
1286 $decomposition[
'U+' . $char] = array(dechex($c));
1290 if (preg_match(
'/(<.*>)? *(.+)/', $decomp, $match)) {
1291 switch ($match[1]) {
1294 $match[2] =
'0028 ' . $match[2] .
' 0029';
1298 $match[2] =
'005B ' . $match[2] .
' 005D';
1302 if (preg_match(
'/^0020 /', $match[2])) {
1317 $decomposition[
'U+' . $char] = explode(
' ', $match[2]);
1324 $fh = fopen($specialCasingFile,
'rb');
1326 while (!feof($fh)) {
1327 $line = fgets($fh, 4096);
1328 if ($line[0] !=
'#' && trim($line) !=
'') {
1330 if ($cond ==
'' || $cond[0] ==
'#') {
1332 if ($char != $lower) {
1333 $arr = explode(
' ', $lower);
1334 for ($i = 0; isset($arr[$i]); $i++) {
1337 $utf8CaseFolding[
'toLower'][$utf8_char] = implode(
'', $arr);
1339 if ($char != $title && $title != $upper) {
1340 $arr = explode(
' ', $title);
1341 for ($i = 0; isset($arr[$i]); $i++) {
1344 $utf8CaseFolding[
'toTitle'][$utf8_char] = implode(
'', $arr);
1346 if ($char != $upper) {
1347 $arr = explode(
' ', $upper);
1348 for ($i = 0; isset($arr[$i]); $i++) {
1351 $utf8CaseFolding[
'toUpper'][$utf8_char] = implode(
'', $arr);
1362 $fh = fopen($customTranslitFile,
'rb');
1364 while (!feof($fh)) {
1365 $line = fgets($fh, 4096);
1366 if ($line[0] !=
'#' && trim($line) !=
'') {
1369 $omit[
'U+' . $char] = 1;
1371 $decomposition[
'U+' . $char] = explode(
' ', $translit);
1378 foreach ($decomposition as $from => $to) {
1379 $code_decomp = array();
1380 while ($code_value = array_shift($to)) {
1382 if (isset($decomposition[
'U+' . $code_value])) {
1383 foreach (array_reverse($decomposition[
'U+' . $code_value]) as $cv) {
1384 array_unshift($to, $cv);
1386 } elseif (!isset($mark[(
'U+' . $code_value)])) {
1388 array_push($code_decomp, $code_value);
1391 if (count($code_decomp) || isset($omit[$from])) {
1392 $decomposition[$from] = $code_decomp;
1394 unset($decomposition[$from]);
1398 $this->toASCII[
'utf-8'] = array();
1399 $ascii = &$this->toASCII[
'utf-8'];
1400 foreach ($decomposition as $from => $to) {
1401 $code_decomp = array();
1402 while ($code_value = array_shift($to)) {
1403 $ord = hexdec($code_value);
1408 array_push($code_decomp, chr($ord));
1411 $ascii[$this->
UnumberToChar(hexdec($from))] = join(
'', $code_decomp);
1414 foreach ($number as $from => $to) {
1416 if (!isset($ascii[$utf8_char])) {
1417 $ascii[$utf8_char] = $to;
1420 if ($cacheFileCase) {
1423 if ($cacheFileASCII) {
1440 if (is_array($this->caseFolding[$charset])) {
1445 if ($cacheFile && @is_file($cacheFile)) {
1457 $nochar = chr($this->noCharByteVal);
1458 foreach ($this->parsedCharsets[$charset][
'local'] as $ci => $utf8) {
1461 $cc = $this->
utf8_decode($this->caseFolding[
'utf-8'][
'toUpper'][$utf8], $charset);
1462 if ($cc !=
'' && $cc != $nochar) {
1463 $this->caseFolding[$charset][
'toUpper'][$c] = $cc;
1465 $cc = $this->
utf8_decode($this->caseFolding[
'utf-8'][
'toLower'][$utf8], $charset);
1466 if ($cc !=
'' && $cc != $nochar) {
1467 $this->caseFolding[$charset][
'toLower'][$c] = $cc;
1469 $cc = $this->
utf8_decode($this->caseFolding[
'utf-8'][
'toTitle'][$utf8], $charset);
1470 if ($cc !=
'' && $cc != $nochar) {
1471 $this->caseFolding[$charset][
'toTitle'][$c] = $cc;
1477 for ($i = $start; $i <= $end; $i++) {
1478 $this->caseFolding[$charset][
'toUpper'][chr($i)] = chr($i - 32);
1482 for ($i = $start; $i <= $end; $i++) {
1483 $this->caseFolding[$charset][
'toLower'][chr($i)] = chr($i + 32);
1502 if (is_array($this->toASCII[$charset])) {
1507 if ($cacheFile && @is_file($cacheFile)) {
1519 $nochar = chr($this->noCharByteVal);
1520 foreach ($this->parsedCharsets[$charset][
'local'] as $ci => $utf8) {
1523 if (isset($this->toASCII[
'utf-8'][$utf8])) {
1524 $this->toASCII[$charset][$c] = $this->toASCII[
'utf-8'][$utf8];
1550 public function substr($charset, $string, $start, $len = NULL) {
1551 if ($len === 0 || $string ===
'') {
1554 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ==
'mbstring') {
1558 $enc = mb_internal_encoding();
1559 mb_internal_encoding($charset);
1560 $str = mb_substr($string, $start);
1562 mb_internal_encoding($enc);
1565 return mb_substr($string, $start, $len, $charset);
1567 } elseif (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ==
'iconv') {
1571 $enc = iconv_get_encoding(
'internal_encoding');
1572 iconv_set_encoding(
'internal_encoding', $charset);
1573 $str = iconv_substr($string, $start);
1575 iconv_set_encoding(
'internal_encoding', $enc);
1578 return iconv_substr($string, $start, $len, $charset);
1580 } elseif ($charset ==
'utf-8') {
1582 } elseif ($this->eucBasedSets[$charset]) {
1583 return $this->
euc_substr($string, $start, $charset, $len);
1584 } elseif ($this->twoByteSets[$charset]) {
1585 return substr($string, $start * 2, $len * 2);
1586 } elseif ($this->fourByteSets[$charset]) {
1587 return substr($string, $start * 4, $len * 4);
1590 return $len === NULL ?
substr($string, $start) :
substr($string, $start, $len);
1604 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ==
'mbstring') {
1605 return mb_strlen($string, $charset);
1606 } elseif (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ==
'iconv') {
1607 return iconv_strlen($string, $charset);
1608 } elseif ($charset ==
'utf-8') {
1610 } elseif ($this->eucBasedSets[$charset]) {
1612 } elseif ($this->twoByteSets[$charset]) {
1613 return strlen($string) / 2;
1614 } elseif ($this->fourByteSets[$charset]) {
1615 return strlen($string) / 4;
1632 if ((
int)$len === 0 || mb_strlen($string, $charset) <= abs($len)) {
1636 $string = mb_substr($string, 0, $len, $charset) . $crop;
1638 $string = $crop . mb_substr($string, $len, mb_strlen($string, $charset), $charset);
1655 public function crop($charset, $string, $len, $crop =
'') {
1656 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ==
'mbstring') {
1657 return $this->
cropMbstring($charset, $string, $len, $crop);
1659 if ((
int)$len === 0) {
1662 if ($charset ==
'utf-8') {
1664 } elseif ($this->eucBasedSets[$charset]) {
1670 $i =
strlen($string) + $len;
1681 if (
strlen($string[$i])) {
1682 return substr($string, 0, $i) . $crop;
1685 if (
strlen($string[$i - 1])) {
1686 return $crop .
substr($string, $i);
1707 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ==
'mbstring') {
1708 return mb_strcut($string, 0, $len, $charset);
1709 } elseif ($charset ==
'utf-8') {
1711 } elseif ($this->eucBasedSets[$charset]) {
1713 } elseif ($this->twoByteSets[$charset]) {
1717 } elseif ($this->fourByteSets[$charset]) {
1723 return substr($string, 0, $len);
1742 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ==
'mbstring') {
1743 if ($case ==
'toLower') {
1744 $string = mb_strtolower($string, $charset);
1746 $string = mb_strtoupper($string, $charset);
1748 } elseif ($charset ==
'utf-8') {
1750 } elseif (isset($this->eucBasedSets[$charset])) {
1769 $firstChar = $this->
substr($charset, $string, 0, 1);
1770 $firstChar = $this->
conv_case($charset, $firstChar, $case);
1771 $remainder = $this->
substr($charset, $string, 1);
1772 return $firstChar . $remainder;
1784 if ($charset ===
'utf-8') {
1786 } elseif (isset($this->eucBasedSets[$charset])) {
1803 $allLanguageCodes = array();
1804 $selectedLanguage =
'default';
1806 foreach ($this->charSetArray as $typo3Lang => $charSet) {
1807 $allLanguageCodes[$typo3Lang] = $typo3Lang;
1812 foreach ($this->locales->getIsoMapping() as $typo3Lang => $isoLang) {
1813 $isoLang = join(
'-', explode(
'_', $isoLang));
1814 $allLanguageCodes[$typo3Lang] = $isoLang;
1817 $allLanguageCodes = array_flip($allLanguageCodes);
1820 $sortedPreferredLanguages = array();
1821 foreach ($preferredLanguages as $preferredLanguage) {
1823 if (strpos($preferredLanguage,
';q=') !== FALSE) {
1824 list($preferredLanguage, $quality) = explode(
';q=', $preferredLanguage);
1826 $sortedPreferredLanguages[$preferredLanguage] = $quality;
1829 arsort($sortedPreferredLanguages, SORT_NUMERIC);
1830 foreach ($sortedPreferredLanguages as $preferredLanguage => $quality) {
1831 if (isset($allLanguageCodes[$preferredLanguage])) {
1832 $selectedLanguage = $allLanguageCodes[$preferredLanguage];
1836 list($preferredLanguage, $preferredCountry) = explode(
'-', $preferredLanguage);
1837 if (isset($allLanguageCodes[$preferredLanguage])) {
1838 $selectedLanguage = $allLanguageCodes[$preferredLanguage];
1842 if (!$selectedLanguage || $selectedLanguage ==
'en') {
1843 $selectedLanguage =
'default';
1845 return $selectedLanguage;
1870 $map = &$this->caseFolding[$charset][$opt];
1877 $map = &$this->toASCII[$charset];
1883 for ($i = 0;
strlen($str[$i]); $i++) {
1885 if (isset($map[$c])) {
1911 if ((
string)$len ===
'0') {
1915 if ($byte_start === FALSE) {
1923 $str =
substr($str, $byte_start);
1927 if ($byte_end === FALSE) {
1928 return $len < 0 ?
'' : $str;
1931 return substr($str, 0, $byte_end);
1949 for ($i = 0;
strlen($str[$i]); $i++) {
1954 } elseif (($c & 192) == 192) {
1974 if (ord($str[$i]) & 128) {
1975 for (; $i > 0 && !(ord($str[$i]) & 64); $i--) {
1982 for ($bc = 0, $mbs = ord($str[$i]); $mbs & 128; $mbs = $mbs << 1) {
1986 if ($bc + $i > $len) {
1987 return substr($str, 0, $i);
1990 return substr($str, 0, $len);
2004 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ==
'mbstring') {
2005 return mb_strpos($haystack, $needle, $offset,
'utf-8');
2006 } elseif (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ==
'iconv') {
2007 return iconv_strpos($haystack, $needle, $offset,
'utf-8');
2010 if ($byte_offset === FALSE) {
2014 $byte_pos = strpos($haystack, $needle, $byte_offset);
2015 if ($byte_pos === FALSE) {
2032 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ==
'mbstring') {
2033 return mb_strrpos($haystack, $needle,
'utf-8');
2034 } elseif (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ==
'iconv') {
2035 return iconv_strrpos($haystack, $needle,
'utf-8');
2037 $byte_pos = strrpos($haystack, $needle);
2038 if ($byte_pos === FALSE) {
2066 for (;
strlen($str[$i]) && $n < $p; $i += $d) {
2067 $c = (int)ord($str[$i]);
2071 } elseif (($c & 192) == 192) {
2082 while (ord($str[$i]) & 128 && !(ord($str[$i]) & 64)) {
2104 for ($i = $pos; $i > 0; $i--) {
2105 $c = (int)ord($str[$i]);
2109 } elseif (($c & 192) == 192) {
2138 $map = &$this->caseFolding[
'utf-8'][$opt];
2141 $map = &$this->toASCII[
'utf-8'];
2146 for ($i = 0;
strlen($str[$i]); $i++) {
2151 } elseif (($c & 192) == 192) {
2153 for ($bc = 0; $c & 128; $c = $c << 1) {
2157 $mbc =
substr($str, $i, $bc);
2160 if (isset($map[$mbc])) {
2191 $sjis = $charset ==
'shift_jis';
2192 for ($i = 0;
strlen($str[$i]) && $i < $len; $i++) {
2195 if ($c >= 128 && $c < 160 || $c >= 224) {
2210 return substr($str, 0, $len - 1);
2212 return substr($str, 0, $len);
2226 public function euc_substr($str, $start, $charset, $len = NULL) {
2228 if ($byte_start === FALSE) {
2232 $str =
substr($str, $byte_start);
2236 if ($byte_end === FALSE) {
2239 return substr($str, 0, $byte_end);
2256 $sjis = $charset ==
'shift_jis';
2258 for ($i = 0;
strlen($str[$i]); $i++) {
2261 if ($c >= 128 && $c < 160 || $c >= 224) {
2284 $sjis = $charset ==
'shift_jis';
2296 for (;
strlen($str[$i]) && $n < $p; $i += $d) {
2299 if ($c >= 128 && $c < 160 || $c >= 224) {
2337 $map = &$this->caseFolding[$charset][$opt];
2344 $map = &$this->toASCII[$charset];
2349 $sjis = $charset ==
'shift_jis';
2351 for ($i = 0;
strlen($str[$i]); $i++) {
2356 if ($c >= 128 && $c < 160 || $c >= 224) {
2357 $mbc =
substr($str, $i, 2);
2363 $mbc =
substr($str, $i, 2);
2367 if (isset($map[$mbc])) {
utf8_byte2char_pos($str, $pos)
convCaseFirst($charset, $string, $case)
utf8_strpos($haystack, $needle, $offset=0)
entities_to_utf8($str, $alsoStdHtmlEnt=FALSE)
euc_char_mapping($str, $charset, $mode, $opt='')
utf8_strtrunc($str, $len)
convArray(&$array, $fromCS, $toCS, $useEntityForNoChar=0)
static validPathStr($theFile)
static extPath($key, $script='')
get_locale_charset($locale)
static writeFileToTypo3tempDir($filepath, $content)
utf8CharToUnumber($str, $hex=0)
euc_char2byte_pos($str, $pos, $charset)
static makeInstance($className)
static trimExplode($delim, $string, $removeEmptyValues=FALSE, $limit=0)
utf8_substr($str, $start, $len=NULL)
euc_strlen($str, $charset)
crop($charset, $string, $len, $crop='')
initUnicodeData($mode=NULL)
utf8_decode($str, $charset, $useEntityForNoChar=0)
euc_substr($str, $start, $charset, $len=NULL)
strlen($charset, $string)
utf8_char_mapping($str, $mode, $opt='')
static getUrl($url, $includeHeader=0, $requestHeaders=FALSE, &$report=NULL)
sb_char_mapping($str, $charset, $mode, $opt='')
euc_strtrunc($str, $len, $charset)
utf8_strrpos($haystack, $needle)
conv($str, $fromCS, $toCS, $useEntityForNoChar=0)
substr($charset, $string, $start, $len=NULL)
getPreferredClientLanguage($languageCodesList)
if(!defined('TYPO3_MODE')) $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_userauth.php']['logoff_pre_processing'][]
utf8_char2byte_pos($str, $pos)
utf8_to_numberarray($str, $convEntities=0, $retChar=0)
static getFileAbsFileName($filename, $onlyRelative=TRUE, $relToTYPO3_mainDir=FALSE)
cropMbstring($charset, $string, $len, $crop='')
initCaseFolding($charset)
strtrunc($charset, $string, $len)
conv_case($charset, $string, $case)
specCharsToASCII($charset, $string)
utf8_encode($str, $charset)
$script_to_charset_windows