‪TYPO3CMS  9.5
IdnaConvert.php
Go to the documentation of this file.
1 <?php
2 
3 // {{{ license
4 
5 /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 foldmethod=marker: */
6 //
7 // +----------------------------------------------------------------------+
8 // | This library is free software; you can redistribute it and/or modify |
9 // | it under the terms of the GNU Lesser General Public License as |
10 // | published by the Free Software Foundation; either version 2.1 of the |
11 // | License, or (at your option) any later version. |
12 // | |
13 // | This library is distributed in the hope that it will be useful, but |
14 // | WITHOUT ANY WARRANTY; without even the implied warranty of |
15 // | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 // | Lesser General Public License for more details. |
17 // | |
18 // | You should have received a copy of the GNU Lesser General Public |
19 // | License along with this library; if not, write to the Free Software |
20 // | Foundation, Inc., 51 Franklin St, Boston, MA 02110, United States |
21 // +----------------------------------------------------------------------+
22 //
23 // }}}
24 
52 namespace ‪Mso\IdnaConvert;
53 
55 {
56  const ‪Version = '1.1.0';
57  const ‪SubVersion = 'main';
58 
59  // Internal settings, do not touch!
60  protected ‪$encoding = 'utf8'; // Default input charset is UTF-8
61  protected ‪$strictMode = false; // Behave strict or not
62  protected ‪$idnVersion = '2008'; // Can be either 2003 (old) or 2008 (default)
63 
64  protected ‪$NamePrepData = null;
65  protected ‪$UnicodeTranscoder = null;
66 
73  public function ‪__construct($params = null)
74  {
76 
77  // Kept for backwarsds compatibility. Consider using the setter methods instead.
78  if (!empty($params) && is_array($params)) {
79  if (isset($params['encoding'])) {
80  $this->‪setEncoding($params['encoding']);
81  }
82 
83  if (isset($params['idn_version'])) {
84  $this->‪setIdnVersion($params['idn_version']);
85  }
86 
87  if (isset($params['strict_mode'])) {
88  $this->‪setStrictMode($params['strict_mode']);
89  }
90  }
91 
92  $this->‪setIdnVersion($this->idnVersion);
93  }
94 
95  public function ‪getClassVersion()
96  {
97  return self::Version . '-' . ‪self::SubVersion;
98  }
99 
103  public function ‪getEncoding()
104  {
105  return ‪$this->encoding;
106  }
107 
111  public function ‪setEncoding(‪$encoding)
112  {
113  switch (‪$encoding) {
114  case 'utf8':
115  case 'ucs4_string':
116  case 'ucs4_array':
117  $this->encoding = ‪$encoding;
118  break;
119  default:
120  throw new \InvalidArgumentException(sprintf('Invalid encoding %s', ‪$encoding));
121  }
122  }
123 
127  public function ‪isStrictMode()
128  {
129  return ‪$this->strictMode;
130  }
131 
136  {
137  $this->strictMode = (‪$strictMode) ? true : false;
138  }
139 
143  public function ‪getIdnVersion()
144  {
145  return ‪$this->idnVersion;
146  }
147 
152  {
153  if (in_array(‪$idnVersion, ['2003', '2008'])) {
154  if (is_null($this->‪NamePrepData) || ‪$idnVersion != $this->idnVersion) {
155  $this->‪NamePrepData = null; // Ought to destroy the object's reference
156  // Re-instantiate with different data set
157  $this->‪NamePrepData = (‪$idnVersion == 2003)
158  ? new ‪NamePrepData2003()
159  : new ‪NamePrepData();
160  }
161 
162  $this->idnVersion = ‪$idnVersion;
163  } else {
164  throw new \InvalidArgumentException(sprintf('Invalid IDN version %d', ‪$idnVersion));
165  }
166  }
167 
174  public function ‪decode($input, $one_time_encoding = null)
175  {
176  $punyCode = $this->‪punycodeFactory();
177 
178  // Optionally set
179  if ($one_time_encoding) {
180  switch ($one_time_encoding) {
181  case 'utf8':
182  case 'ucs4_string':
183  case 'ucs4_array':
184  break;
185  default:
186  throw new \InvalidArgumentException(sprintf('Invalid encoding %s', $one_time_encoding));
187  }
188  }
189  // Make sure to drop any newline characters around
190  $input = trim($input);
191 
192  // Negotiate input and try to determine, whether it is a plain string,
193  // an email address or something like a complete URL
194  if (strpos($input, '@')) { // Maybe it is an email address
195  // No no in strict mode
196  if ($this->strictMode) {
197  throw new \InvalidArgumentException('Only individual domain name parts can be handled in strict mode');
198  }
199  list($email_pref, $input) = explode('@', $input, 2);
200  $arr = explode('.', $input);
201  foreach ($arr as $k => $v) {
202  $conv = $punyCode->decode($v);
203  if ($conv) {
204  $arr[$k] = $conv;
205  }
206  }
207  $input = implode('.', $arr);
208  $arr = explode('.', $email_pref);
209  foreach ($arr as $k => $v) {
210  $conv = $punyCode->decode($v);
211  if ($conv) {
212  $arr[$k] = $conv;
213  }
214  }
215  $email_pref = implode('.', $arr);
216  $return = $email_pref . '@' . $input;
217  } elseif (preg_match('![:\./]!', $input)) { // Or a complete domain name (with or without paths / parameters)
218  // No no in strict mode
219  if ($this->strictMode) {
220  throw new \InvalidArgumentException('Only individual domain name parts can be handled in strict mode');
221  }
222  $parsed = parse_url($input);
223  if (isset($parsed['host'])) {
224  $arr = explode('.', $parsed['host']);
225  foreach ($arr as $k => $v) {
226  $conv = $punyCode->decode($v);
227  if ($conv) {
228  $arr[$k] = $conv;
229  }
230  }
231  $parsed['host'] = implode('.', $arr);
232  $return = (empty($parsed['scheme']) ? '' : $parsed['scheme'] . (strtolower($parsed['scheme']) == 'mailto' ? ':' : '://')) .
233  (empty($parsed['user']) ? '' : $parsed['user'] . (empty($parsed['pass']) ? '' : ':' . $parsed['pass']) . '@') .
234  $parsed['host'] .
235  (empty($parsed['port']) ? '' : ':' . $parsed['port']) .
236  (empty($parsed['path']) ? '' : $parsed['path']) .
237  (empty($parsed['query']) ? '' : '?' . $parsed['query']) .
238  (empty($parsed['fragment']) ? '' : '#' . $parsed['fragment']);
239  } else { // parse_url seems to have failed, try without it
240  $arr = explode('.', $input);
241  foreach ($arr as $k => $v) {
242  $conv = $punyCode->decode($v);
243  if ($conv) {
244  $arr[$k] = $conv;
245  }
246  }
247  $return = implode('.', $arr);
248  }
249  } else { // Otherwise we consider it being a pure domain name string
250  $return = $punyCode->decode($input);
251  if (!$return) {
252  $return = $input;
253  }
254  }
255  // The output is UTF-8 by default, other output formats need conversion here
256  // If one time encoding is given, use this, else the objects property
257  $outputEncoding = ($one_time_encoding) ? $one_time_encoding : $this->encoding;
258  switch ($outputEncoding) {
259  case 'utf8':
260  return $return; // break;
261  case 'ucs4_string':
262  return $this->‪UnicodeTranscoder->‪convert($return, 'utf8', 'ucs4'); // break;
263  case 'ucs4_array':
264  return $this->‪UnicodeTranscoder->‪convert($return, 'utf8', 'ucs4array'); // break;
265  default:
266  throw new \InvalidArgumentException(sprintf('Unsupported output encoding %s', $outputEncoding));
267  }
268  }
269 
276  public function ‪encode($decoded, $one_time_encoding = false)
277  {
278  // Forcing conversion of input to UCS4 array
279  // If one time encoding is given, use this, else the objects property
280  $inputEncoding = $one_time_encoding ? $one_time_encoding : ‪$this->encoding;
281  switch ($inputEncoding) {
282  case 'utf8':
283  $decoded = $this->‪UnicodeTranscoder->‪convert($decoded, 'utf8', 'ucs4array');
284  break;
285  case 'ucs4_string':
286  $decoded = $this->‪UnicodeTranscoder->‪convert($decoded, 'ucs4', 'ucs4array');
287  break;
288  case 'ucs4_array':
289  break;
290  default:
291  throw new \InvalidArgumentException(sprintf('Unsupported input encoding %s', $inputEncoding));
292  }
293 
294  // No input, no output, what else did you expect?
295  if (empty($decoded)) {
296  return '';
297  }
298 
299  $punyCode = $this->‪punycodeFactory();
300 
301  // Anchors for iteration
302  $last_begin = 0;
303  // Output string
304  ‪$output = '';
305  foreach ($decoded as $k => $v) {
306  // Make sure to use just the plain dot
307  switch ($v) {
308  case 0x3002:
309  case 0xFF0E:
310  case 0xFF61:
311  $decoded[$k] = 0x2E;
312  // Right, no break here, the above are converted to dots anyway
313  // Stumbling across an anchoring character
314  // no break
315  case 0x2E:
316  case 0x2F:
317  case 0x3A:
318  case 0x3F:
319  case 0x40:
320  // Neither email addresses nor URLs allowed in strict mode
321  if ($this->strictMode) {
322  throw new \InvalidArgumentException('Neither email addresses nor URLs are allowed in strict mode.');
323  }
324  // Skip first char
325  if ($k) {
326  $encoded = $punyCode->encode(array_slice($decoded, $last_begin, (($k) - $last_begin)));
327  if ($encoded) {
328  ‪$output .= $encoded;
329  } else {
330  ‪$output .= $this->‪UnicodeTranscoder->‪convert(array_slice($decoded, $last_begin, (($k) - $last_begin)), 'ucs4array', 'utf8');
331  }
332  ‪$output .= chr($decoded[$k]);
333  }
334  $last_begin = $k + 1;
335 
336  }
337  }
338  // Catch the rest of the string
339  if ($last_begin) {
340  $inp_len = count($decoded);
341  $encoded = $punyCode->encode(array_slice($decoded, $last_begin, (($inp_len) - $last_begin)));
342  if ($encoded) {
343  ‪$output .= $encoded;
344  } else {
345  ‪$output .= $this->‪UnicodeTranscoder->‪convert(array_slice($decoded, $last_begin, (($inp_len) - $last_begin)), 'ucs4array', 'utf8');
346  }
347  return ‪$output;
348  }
349  if (false !== (‪$output = $punyCode->encode($decoded))) {
350  return ‪$output;
351  }
352  return $this->‪UnicodeTranscoder->‪convert($decoded, 'ucs4array', 'utf8');
353  }
354 
362  public function ‪encodeUri($uri)
363  {
364  $parsed = parse_url($uri);
365  if (!isset($parsed['host'])) {
366  throw new \InvalidArgumentException('The given string does not look like a URI');
367  }
368  $arr = explode('.', $parsed['host']);
369  foreach ($arr as $k => $v) {
370  $conv = $this->‪encode($v, 'utf8');
371  if ($conv) {
372  $arr[$k] = $conv;
373  }
374  }
375  $parsed['host'] = implode('.', $arr);
376  $return = (empty($parsed['scheme']) ? '' : $parsed['scheme'] . (strtolower($parsed['scheme']) == 'mailto' ? ':' : '://')) .
377  (empty($parsed['user']) ? '' : $parsed['user'] . (empty($parsed['pass']) ? '' : ':' . $parsed['pass']) . '@') .
378  $parsed['host'] .
379  (empty($parsed['port']) ? '' : ':' . $parsed['port']) .
380  (empty($parsed['path']) ? '' : $parsed['path']) .
381  (empty($parsed['query']) ? '' : '?' . $parsed['query']) .
382  (empty($parsed['fragment']) ? '' : '#' . $parsed['fragment']);
383  return $return;
384  }
385 
392  protected function ‪punycodeFactory()
393  {
394  static $instances = [];
395 
396  if (!isset($instances[$this->idnVersion])) {
397  $instances[‪$this->idnVersion] = new ‪Punycode($this->‪NamePrepData, $this->‪UnicodeTranscoder);
398  }
399  return $instances[‪$this->idnVersion];
400  }
401 }
‪Mso\IdnaConvert\IdnaConvert\$encoding
‪$encoding
Definition: IdnaConvert.php:60
‪Mso\IdnaConvert\IdnaConvert\setIdnVersion
‪setIdnVersion($idnVersion)
Definition: IdnaConvert.php:151
‪Mso\IdnaConvert\UnicodeTranscoder\convert
‪static mixed convert($data, $from, $to, $safe_mode=false, $safe_char=0xFFFC)
Definition: UnicodeTranscoder.php:40
‪Mso\IdnaConvert\IdnaConvert\SubVersion
‪const SubVersion
Definition: IdnaConvert.php:57
‪Mso\IdnaConvert\IdnaConvert\encodeUri
‪string encodeUri($uri)
Definition: IdnaConvert.php:362
‪Mso\IdnaConvert\IdnaConvert\setStrictMode
‪setStrictMode($strictMode)
Definition: IdnaConvert.php:135
‪Mso\IdnaConvert\IdnaConvert\$NamePrepData
‪$NamePrepData
Definition: IdnaConvert.php:64
‪Mso\IdnaConvert\IdnaConvert\Version
‪const Version
Definition: IdnaConvert.php:56
‪Mso\IdnaConvert\IdnaConvert\$UnicodeTranscoder
‪$UnicodeTranscoder
Definition: IdnaConvert.php:65
‪Mso\IdnaConvert\IdnaConvert\isStrictMode
‪bool isStrictMode()
Definition: IdnaConvert.php:127
‪Mso\IdnaConvert\IdnaConvert\decode
‪string decode($input, $one_time_encoding=null)
Definition: IdnaConvert.php:174
‪Mso\IdnaConvert\NamePrepData2003
Definition: NamePrepData2003.php:6
‪Mso\IdnaConvert\IdnaConvert
Definition: IdnaConvert.php:55
‪Mso\IdnaConvert\IdnaConvert\setEncoding
‪setEncoding($encoding)
Definition: IdnaConvert.php:111
‪Mso\IdnaConvert
Definition: EncodingHelper.php:8
‪Mso\IdnaConvert\IdnaConvert\$strictMode
‪$strictMode
Definition: IdnaConvert.php:61
‪$output
‪$output
Definition: annotationChecker.php:113
‪Mso\IdnaConvert\IdnaConvert\__construct
‪__construct($params=null)
Definition: IdnaConvert.php:73
‪Mso\IdnaConvert\IdnaConvert\punycodeFactory
‪Mso IdnaConvert Punycode punycodeFactory()
Definition: IdnaConvert.php:392
‪Mso\IdnaConvert\IdnaConvert\$idnVersion
‪$idnVersion
Definition: IdnaConvert.php:62
‪Mso\IdnaConvert\IdnaConvert\getEncoding
‪string getEncoding()
Definition: IdnaConvert.php:103
‪Mso\IdnaConvert\Punycode
Definition: Punycode.php:34
‪Mso\IdnaConvert\NamePrepData
Definition: NamePrepData.php:6
‪Mso\IdnaConvert\IdnaConvert\getIdnVersion
‪int getIdnVersion()
Definition: IdnaConvert.php:143
‪Mso\IdnaConvert\IdnaConvert\getClassVersion
‪getClassVersion()
Definition: IdnaConvert.php:95
‪Mso\IdnaConvert\IdnaConvert\encode
‪string encode($decoded, $one_time_encoding=false)
Definition: IdnaConvert.php:276
‪Mso\IdnaConvert\UnicodeTranscoder
Definition: UnicodeTranscoder.php:20