‪TYPO3CMS  9.5
DoubleMetaPhoneUtility.php
Go to the documentation of this file.
1 <?php
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
24 {
28  public ‪$original = '';
29 
33  public ‪$primary = '';
34 
38  public ‪$secondary = '';
39 
43  public ‪$length = 0;
44 
48  public ‪$last = 0;
49 
53  public ‪$current = 0;
54 
55  // methods
56  // TYPO3 specific API to this class. BEGIN
64  public function ‪metaphone($string, $sys_language_uid = 0)
65  {
66  $res = $this->‪DoubleMetaPhone($string);
67  return $res['primary'];
68  }
69 
70  // TYPO3 specific API to this class. END
71  // Public method
78  public function ‪DoubleMetaPhone($string)
79  {
80  $this->primary = '';
81  $this->secondary = '';
82  $this->current = 0;
83  $this->current = 0;
84  $this->length = strlen($string);
85  $this->last = $this->length - 1;
86  $this->original = $string . ' ';
87  $this->original = strtoupper($this->original);
88  // skip this at beginning of word
89  if ($this->‪StringAt($this->original, 0, 2, ['GN', 'KN', 'PN', 'WR', 'PS'])) {
90  $this->current++;
91  }
92  // Initial 'X' is pronounced 'Z' e.g. 'Xavier'
93  if ($this->original[0] === 'X') {
94  $this->primary .= 'S';
95  // 'Z' maps to 'S'
96  $this->secondary .= 'S';
97  $this->current++;
98  }
99  // main loop
100  while (strlen($this->primary) < 4 || strlen($this->secondary) < 4) {
101  if ($this->current >= $this->length) {
102  break;
103  }
104  switch (substr($this->original, $this->current, 1)) {
105  case 'A':
106 
107  case 'E':
108 
109  case 'I':
110 
111  case 'O':
112 
113  case 'U':
114 
115  case 'Y':
116  if ($this->current == 0) {
117  // all init vowels now map to 'A'
118  $this->primary .= 'A';
119  $this->secondary .= 'A';
120  }
121  $this->current += 1;
122  break;
123  case 'B':
124  // '-mb', e.g. "dumb", already skipped over ...
125  $this->primary .= 'P';
126  $this->secondary .= 'P';
127  if (substr($this->original, $this->current + 1, 1) === 'B') {
128  $this->current += 2;
129  } else {
130  $this->current += 1;
131  }
132  break;
133  case 'Ç':
134  $this->primary .= 'S';
135  $this->secondary .= 'S';
136  $this->current += 1;
137  break;
138  case 'C':
139  // various gremanic
140  if ($this->current > 1 && !$this->‪IsVowel($this->original, $this->current - 2) && $this->‪StringAt($this->original, $this->current - 1, 3, ['ACH']) && (substr($this->original, $this->current + 2, 1) !== 'I' && (substr($this->original, $this->current + 2, 1) !== 'E' || $this->‪StringAt($this->original, $this->current - 2, 6, ['BACHER', 'MACHER'])))) {
141  $this->primary .= 'K';
142  $this->secondary .= 'K';
143  $this->current += 2;
144  break;
145  }
146  // special case 'caesar'
147  if ($this->current == 0 && $this->‪StringAt($this->original, $this->current, 6, ['CAESAR'])) {
148  $this->primary .= 'S';
149  $this->secondary .= 'S';
150  $this->current += 2;
151  break;
152  }
153  // italian 'chianti'
154  if ($this->‪StringAt($this->original, $this->current, 4, ['CHIA'])) {
155  $this->primary .= 'K';
156  $this->secondary .= 'K';
157  $this->current += 2;
158  break;
159  }
160  if ($this->‪StringAt($this->original, $this->current, 2, ['CH'])) {
161  // find 'michael'
162  if ($this->current > 0 && $this->‪StringAt($this->original, $this->current, 4, ['CHAE'])) {
163  $this->primary .= 'K';
164  $this->secondary .= 'X';
165  $this->current += 2;
166  break;
167  }
168  // greek roots e.g. 'chemistry', 'chorus'
169  if ($this->current == 0 && ($this->‪StringAt($this->original, $this->current + 1, 5, ['HARAC', 'HARIS']) || $this->‪StringAt($this->original, $this->current + 1, 3, ['HOR', 'HYM', 'HIA', 'HEM'])) && !$this->‪StringAt($this->original, 0, 5, ['CHORE'])) {
170  $this->primary .= 'K';
171  $this->secondary .= 'K';
172  $this->current += 2;
173  break;
174  }
175  // germanic, greek, or otherwise 'ch' for 'kh' sound
176  if ($this->‪StringAt($this->original, 0, 4, ['VAN ', 'VON ']) || $this->‪StringAt($this->original, 0, 3, ['SCH']) || $this->‪StringAt($this->original, $this->current - 2, 6, ['ORCHES', 'ARCHIT', 'ORCHID']) || $this->‪StringAt($this->original, $this->current + 2, 1, ['T', 'S']) || ($this->‪StringAt($this->original, $this->current - 1, 1, ['A', 'O', 'U', 'E']) || $this->current == 0) && $this->‪StringAt($this->original, $this->current + 2, 1, ['L', 'R', 'N', 'M', 'B', 'H', 'F', 'V', 'W', ' '])) {
177  $this->primary .= 'K';
178  $this->secondary .= 'K';
179  } else {
180  if ($this->current > 0) {
181  if ($this->‪StringAt($this->original, 0, 2, ['MC'])) {
182  // e.g. 'McHugh'
183  $this->primary .= 'K';
184  $this->secondary .= 'K';
185  } else {
186  $this->primary .= 'X';
187  $this->secondary .= 'K';
188  }
189  } else {
190  $this->primary .= 'X';
191  $this->secondary .= 'X';
192  }
193  }
194  $this->current += 2;
195  break;
196  }
197  // e.g. 'czerny'
198  if ($this->‪StringAt($this->original, $this->current, 2, ['CZ']) && !$this->‪StringAt(
199  $this->original,
200  $this->current - 2,
201  4,
202  ['WICZ']
203  )) {
204  $this->primary .= 'S';
205  $this->secondary .= 'X';
206  $this->current += 2;
207  break;
208  }
209  // e.g. 'focaccia'
210  if ($this->‪StringAt($this->original, $this->current + 1, 3, ['CIA'])) {
211  $this->primary .= 'X';
212  $this->secondary .= 'X';
213  $this->current += 3;
214  break;
215  }
216  // double 'C', but not McClellan'
217  if ($this->‪StringAt($this->original, $this->current, 2, ['CC']) && !($this->current == 1 && $this->original[0] === 'M')) {
218  // 'bellocchio' but not 'bacchus'
219  if ($this->‪StringAt($this->original, $this->current + 2, 1, ['I', 'E', 'H']) && !$this->‪StringAt(
220  $this->original,
221  $this->current + 2,
222  2,
223  ['HU']
224  )) {
225  // 'accident', 'accede', 'succeed'
226  if ($this->current == 1 && substr($this->original, $this->current - 1, 1) === 'A' || $this->‪StringAt($this->original, $this->current - 1, 5, ['UCCEE', 'UCCES'])) {
227  $this->primary .= 'KS';
228  $this->secondary .= 'KS';
229  } else {
230  $this->primary .= 'X';
231  $this->secondary .= 'X';
232  }
233  $this->current += 3;
234  break;
235  }
236  // Pierce's rule
237  $this->primary .= 'K';
238  $this->secondary .= 'K';
239  $this->current += 2;
240  break;
241  }
242  if ($this->‪StringAt($this->original, $this->current, 2, ['CK', 'CG', 'CQ'])) {
243  $this->primary .= 'K';
244  $this->secondary .= 'K';
245  $this->current += 2;
246  break;
247  }
248  if ($this->‪StringAt($this->original, $this->current, 2, ['CI', 'CE', 'CY'])) {
249  // italian vs. english
250  if ($this->‪StringAt($this->original, $this->current, 3, ['CIO', 'CIE', 'CIA'])) {
251  $this->primary .= 'S';
252  $this->secondary .= 'X';
253  } else {
254  $this->primary .= 'S';
255  $this->secondary .= 'S';
256  }
257  $this->current += 2;
258  break;
259  }
260  // else
261  $this->primary .= 'K';
262  $this->secondary .= 'K';
263  // name sent in 'mac caffrey', 'mac gregor'
264  if ($this->‪StringAt($this->original, $this->current + 1, 2, [' C', ' Q', ' G'])) {
265  $this->current += 3;
266  } else {
267  if ($this->‪StringAt($this->original, $this->current + 1, 1, ['C', 'K', 'Q']) && !$this->‪StringAt(
268  $this->original,
269  $this->current + 1,
270  2,
271  ['CE', 'CI']
272  )) {
273  $this->current += 2;
274  } else {
275  $this->current += 1;
276  }
277  }
278  break;
279  case 'D':
280  if ($this->‪StringAt($this->original, $this->current, 2, ['DG'])) {
281  if ($this->‪StringAt($this->original, $this->current + 2, 1, ['I', 'E', 'Y'])) {
282  // e.g. 'edge'
283  $this->primary .= 'J';
284  $this->secondary .= 'J';
285  $this->current += 3;
286  break;
287  }
288  // e.g. 'edgar'
289  $this->primary .= 'TK';
290  $this->secondary .= 'TK';
291  $this->current += 2;
292  break;
293  }
294  if ($this->‪StringAt($this->original, $this->current, 2, ['DT', 'DD'])) {
295  $this->primary .= 'T';
296  $this->secondary .= 'T';
297  $this->current += 2;
298  break;
299  }
300  // else
301  $this->primary .= 'T';
302  $this->secondary .= 'T';
303  $this->current += 1;
304  break;
305  case 'F':
306  if (substr($this->original, $this->current + 1, 1) === 'F') {
307  $this->current += 2;
308  } else {
309  $this->current += 1;
310  }
311  $this->primary .= 'F';
312  $this->secondary .= 'F';
313  break;
314  case 'G':
315  if (substr($this->original, $this->current + 1, 1) === 'H') {
316  if ($this->current > 0 && !$this->‪IsVowel($this->original, $this->current - 1)) {
317  $this->primary .= 'K';
318  $this->secondary .= 'K';
319  $this->current += 2;
320  break;
321  }
322  if ($this->current < 3) {
323  // 'ghislane', 'ghiradelli'
324  if ($this->current == 0) {
325  if (substr($this->original, $this->current + 2, 1) === 'I') {
326  $this->primary .= 'J';
327  $this->secondary .= 'J';
328  } else {
329  $this->primary .= 'K';
330  $this->secondary .= 'K';
331  }
332  $this->current += 2;
333  break;
334  }
335  }
336  // Parker's rule (with some further refinements) - e.g. 'hugh'
337  if ($this->current > 1 && $this->‪StringAt($this->original, $this->current - 2, 1, ['B', 'H', 'D']) || $this->current > 2 && $this->‪StringAt($this->original, $this->current - 3, 1, ['B', 'H', 'D']) || $this->current > 3 && $this->‪StringAt($this->original, $this->current - 4, 1, ['B', 'H'])) {
338  $this->current += 2;
339  break;
340  }
341  // e.g. 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
342  if ($this->current > 2 && substr($this->original, $this->current - 1, 1) === 'U' && $this->‪StringAt($this->original, $this->current - 3, 1, ['C', 'G', 'L', 'R', 'T'])) {
343  $this->primary .= 'F';
344  $this->secondary .= 'F';
345  } elseif ($this->current > 0 && substr($this->original, $this->current - 1, 1) !== 'I') {
346  $this->primary .= 'K';
347  $this->secondary .= 'K';
348  }
349  $this->current += 2;
350  break;
351  }
352  if (substr($this->original, $this->current + 1, 1) === 'N') {
353  if ($this->current == 1 && $this->‪IsVowel($this->original, 0) && !$this->‪SlavoGermanic($this->original)) {
354  $this->primary .= 'KN';
355  $this->secondary .= 'N';
356  } else {
357  // not e.g. 'cagney'
358  if (!$this->‪StringAt($this->original, $this->current + 2, 2, ['EY']) && substr($this->original, $this->current + 1) !== 'Y' && !$this->‪SlavoGermanic($this->original)) {
359  $this->primary .= 'N';
360  $this->secondary .= 'KN';
361  } else {
362  $this->primary .= 'KN';
363  $this->secondary .= 'KN';
364  }
365  }
366  $this->current += 2;
367  break;
368  }
369  // 'tagliaro'
370  if ($this->‪StringAt($this->original, $this->current + 1, 2, ['LI']) && !$this->‪SlavoGermanic($this->original)) {
371  $this->primary .= 'KL';
372  $this->secondary .= 'L';
373  $this->current += 2;
374  break;
375  }
376  // -ges-, -gep-, -gel- at beginning
377  if ($this->current == 0 && (substr($this->original, $this->current + 1, 1) === 'Y' || $this->‪StringAt($this->original, $this->current + 1, 2, [
378  'ES',
379  'EP',
380  'EB',
381  'EL',
382  'EY',
383  'IB',
384  'IL',
385  'IN',
386  'IE',
387  'EI',
388  'ER'
389  ]))) {
390  $this->primary .= 'K';
391  $this->secondary .= 'J';
392  $this->current += 2;
393  break;
394  }
395  // -ger-, -gy-
396  if (($this->‪StringAt($this->original, $this->current + 1, 2, ['ER']) || substr($this->original, $this->current + 1, 1) === 'Y') && !$this->‪StringAt($this->original, 0, 6, ['DANGER', 'RANGER', 'MANGER']) && !$this->‪StringAt(
397  $this->original,
398  $this->current - 1,
399  1,
400  ['E', 'I']
401  ) && !$this->‪StringAt($this->original, $this->current - 1, 3, ['RGY', 'OGY'])) {
402  $this->primary .= 'K';
403  $this->secondary .= 'J';
404  $this->current += 2;
405  break;
406  }
407  // italian e.g. 'biaggi'
408  if ($this->‪StringAt($this->original, $this->current + 1, 1, ['E', 'I', 'Y']) || $this->‪StringAt($this->original, $this->current - 1, 4, ['AGGI', 'OGGI'])) {
409  // obvious germanic
410  if ($this->‪StringAt($this->original, 0, 4, ['VAN ', 'VON ']) || $this->‪StringAt($this->original, 0, 3, ['SCH']) || $this->‪StringAt($this->original, $this->current + 1, 2, ['ET'])) {
411  $this->primary .= 'K';
412  $this->secondary .= 'K';
413  } else {
414  // always soft if french ending
415  if ($this->‪StringAt($this->original, $this->current + 1, 4, ['IER '])) {
416  $this->primary .= 'J';
417  $this->secondary .= 'J';
418  } else {
419  $this->primary .= 'J';
420  $this->secondary .= 'K';
421  }
422  }
423  $this->current += 2;
424  break;
425  }
426  if (substr($this->original, $this->current + 1, 1) === 'G') {
427  $this->current += 2;
428  } else {
429  $this->current += 1;
430  }
431  $this->primary .= 'K';
432  $this->secondary .= 'K';
433  break;
434  case 'H':
435  // only keep if first & before vowel or btw. 2 vowels
436  if (($this->current == 0 || $this->‪IsVowel($this->original, $this->current - 1)) && $this->‪IsVowel($this->original, $this->current + 1)) {
437  $this->primary .= 'H';
438  $this->secondary .= 'H';
439  $this->current += 2;
440  } else {
441  $this->current += 1;
442  }
443  break;
444  case 'J':
445  // obvious spanish, 'jose', 'san jacinto'
446  if ($this->‪StringAt($this->original, $this->current, 4, ['JOSE']) || $this->‪StringAt($this->original, 0, 4, ['SAN '])) {
447  if ($this->current == 0 && substr($this->original, $this->current + 4, 1) === ' ' || $this->‪StringAt($this->original, 0, 4, ['SAN '])) {
448  $this->primary .= 'H';
449  $this->secondary .= 'H';
450  } else {
451  $this->primary .= 'J';
452  $this->secondary .= 'H';
453  }
454  $this->current += 1;
455  break;
456  }
457  if ($this->current == 0 && !$this->‪StringAt($this->original, $this->current, 4, ['JOSE'])) {
458  $this->primary .= 'J';
459  // Yankelovich/Jankelowicz
460  $this->secondary .= 'A';
461  } else {
462  // spanish pron. of .e.g. 'bajador'
463  if ($this->‪IsVowel($this->original, $this->current - 1) && !$this->‪SlavoGermanic($this->original) && (substr($this->original, $this->current + 1, 1) === 'A' || substr($this->original, $this->current + 1, 1) === 'O')) {
464  $this->primary .= 'J';
465  $this->secondary .= 'H';
466  } else {
467  if ($this->current == $this->last) {
468  $this->primary .= 'J';
469  $this->secondary .= '';
470  } else {
471  if (!$this->‪StringAt($this->original, $this->current + 1, 1, ['L', 'T', 'K', 'S', 'N', 'M', 'B', 'Z']) && !$this->‪StringAt(
472  $this->original,
473  $this->current - 1,
474  1,
475  ['S', 'K', 'L']
476  )) {
477  $this->primary .= 'J';
478  $this->secondary .= 'J';
479  }
480  }
481  }
482  }
483  if (substr($this->original, $this->current + 1, 1) === 'J') {
484  // it could happen
485  $this->current += 2;
486  } else {
487  $this->current += 1;
488  }
489  break;
490  case 'K':
491  if (substr($this->original, $this->current + 1, 1) === 'K') {
492  $this->current += 2;
493  } else {
494  $this->current += 1;
495  }
496  $this->primary .= 'K';
497  $this->secondary .= 'K';
498  break;
499  case 'L':
500  if (substr($this->original, $this->current + 1, 1) === 'L') {
501  // spanish e.g. 'cabrillo', 'gallegos'
502  if ($this->current == $this->length - 3 && $this->‪StringAt($this->original, $this->current - 1, 4, ['ILLO', 'ILLA', 'ALLE']) || ($this->‪StringAt($this->original, $this->last - 1, 2, ['AS', 'OS']) || $this->‪StringAt($this->original, $this->last, 1, ['A', 'O'])) && $this->‪StringAt($this->original, $this->current - 1, 4, ['ALLE'])) {
503  $this->primary .= 'L';
504  $this->secondary .= '';
505  $this->current += 2;
506  break;
507  }
508  $this->current += 2;
509  } else {
510  $this->current += 1;
511  }
512  $this->primary .= 'L';
513  $this->secondary .= 'L';
514  break;
515  case 'M':
516  if ($this->‪StringAt($this->original, $this->current - 1, 3, ['UMB']) && ($this->current + 1 == $this->last || $this->‪StringAt($this->original, $this->current + 2, 2, ['ER'])) || substr($this->original, $this->current + 1, 1) === 'M') {
517  $this->current += 2;
518  } else {
519  $this->current += 1;
520  }
521  $this->primary .= 'M';
522  $this->secondary .= 'M';
523  break;
524  case 'N':
525  if (substr($this->original, $this->current + 1, 1) === 'N') {
526  $this->current += 2;
527  } else {
528  $this->current += 1;
529  }
530  $this->primary .= 'N';
531  $this->secondary .= 'N';
532  break;
533  case 'Ñ':
534  $this->current += 1;
535  $this->primary .= 'N';
536  $this->secondary .= 'N';
537  break;
538  case 'P':
539  if (substr($this->original, $this->current + 1, 1) === 'H') {
540  $this->current += 2;
541  $this->primary .= 'F';
542  $this->secondary .= 'F';
543  break;
544  }
545  // also account for "campbell" and "raspberry"
546  if ($this->‪StringAt($this->original, $this->current + 1, 1, ['P', 'B'])) {
547  $this->current += 2;
548  } else {
549  $this->current += 1;
550  }
551  $this->primary .= 'P';
552  $this->secondary .= 'P';
553  break;
554  case 'Q':
555  if (substr($this->original, $this->current + 1, 1) === 'Q') {
556  $this->current += 2;
557  } else {
558  $this->current += 1;
559  }
560  $this->primary .= 'K';
561  $this->secondary .= 'K';
562  break;
563  case 'R':
564  // french e.g. 'rogier', but exclude 'hochmeier'
565  if ($this->current == $this->last && !$this->‪SlavoGermanic($this->original) && $this->‪StringAt($this->original, $this->current - 2, 2, ['IE']) && !$this->‪StringAt(
566  $this->original,
567  $this->current - 4,
568  2,
569  ['ME', 'MA']
570  )) {
571  $this->primary .= '';
572  $this->secondary .= 'R';
573  } else {
574  $this->primary .= 'R';
575  $this->secondary .= 'R';
576  }
577  if (substr($this->original, $this->current + 1, 1) === 'R') {
578  $this->current += 2;
579  } else {
580  $this->current += 1;
581  }
582  break;
583  case 'S':
584  // special cases 'island', 'isle', 'carlisle', 'carlysle'
585  if ($this->‪StringAt($this->original, $this->current - 1, 3, ['ISL', 'YSL'])) {
586  $this->current += 1;
587  break;
588  }
589  // special case 'sugar-'
590  if ($this->current == 0 && $this->‪StringAt($this->original, $this->current, 5, ['SUGAR'])) {
591  $this->primary .= 'X';
592  $this->secondary .= 'S';
593  $this->current += 1;
594  break;
595  }
596  if ($this->‪StringAt($this->original, $this->current, 2, ['SH'])) {
597  // germanic
598  if ($this->‪StringAt($this->original, $this->current + 1, 4, ['HEIM', 'HOEK', 'HOLM', 'HOLZ'])) {
599  $this->primary .= 'S';
600  $this->secondary .= 'S';
601  } else {
602  $this->primary .= 'X';
603  $this->secondary .= 'X';
604  }
605  $this->current += 2;
606  break;
607  }
608  // italian & armenian
609  if ($this->‪StringAt($this->original, $this->current, 3, ['SIO', 'SIA']) || $this->‪StringAt($this->original, $this->current, 4, ['SIAN'])) {
610  if (!$this->‪SlavoGermanic($this->original)) {
611  $this->primary .= 'S';
612  $this->secondary .= 'X';
613  } else {
614  $this->primary .= 'S';
615  $this->secondary .= 'S';
616  }
617  $this->current += 3;
618  break;
619  }
620  // german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
621  // also, -sz- in slavic language although in hungarian it is pronounced 's'
622  if ($this->current == 0 && $this->‪StringAt($this->original, $this->current + 1, 1, ['M', 'N', 'L', 'W']) || $this->‪StringAt($this->original, $this->current + 1, 1, ['Z'])) {
623  $this->primary .= 'S';
624  $this->secondary .= 'X';
625  if ($this->‪StringAt($this->original, $this->current + 1, 1, ['Z'])) {
626  $this->current += 2;
627  } else {
628  $this->current += 1;
629  }
630  break;
631  }
632  if ($this->‪StringAt($this->original, $this->current, 2, ['SC'])) {
633  // Schlesinger's rule
634  if (substr($this->original, $this->current + 2, 1) === 'H') {
635  // dutch origin, e.g. 'school', 'schooner'
636  if ($this->‪StringAt($this->original, $this->current + 3, 2, ['OO', 'ER', 'EN', 'UY', 'ED', 'EM'])) {
637  // 'schermerhorn', 'schenker'
638  if ($this->‪StringAt($this->original, $this->current + 3, 2, ['ER', 'EN'])) {
639  $this->primary .= 'X';
640  $this->secondary .= 'SK';
641  } else {
642  $this->primary .= 'SK';
643  $this->secondary .= 'SK';
644  }
645  $this->current += 3;
646  break;
647  }
648  if ($this->current == 0 && !$this->‪IsVowel($this->original, 3) && substr($this->original, $this->current + 3, 1) !== 'W') {
649  $this->primary .= 'X';
650  $this->secondary .= 'S';
651  } else {
652  $this->primary .= 'X';
653  $this->secondary .= 'X';
654  }
655  $this->current += 3;
656  break;
657  }
658  if ($this->‪StringAt($this->original, $this->current + 2, 1, ['I', 'E', 'Y'])) {
659  $this->primary .= 'S';
660  $this->secondary .= 'S';
661  $this->current += 3;
662  break;
663  }
664  // else
665  $this->primary .= 'SK';
666  $this->secondary .= 'SK';
667  $this->current += 3;
668  break;
669  }
670  // french e.g. 'resnais', 'artois'
671  if ($this->current == $this->last && $this->‪StringAt($this->original, $this->current - 2, 2, ['AI', 'OI'])) {
672  $this->primary .= '';
673  $this->secondary .= 'S';
674  } else {
675  $this->primary .= 'S';
676  $this->secondary .= 'S';
677  }
678  if ($this->‪StringAt($this->original, $this->current + 1, 1, ['S', 'Z'])) {
679  $this->current += 2;
680  } else {
681  $this->current += 1;
682  }
683  break;
684  case 'T':
685  if ($this->‪StringAt($this->original, $this->current, 4, ['TION'])) {
686  $this->primary .= 'X';
687  $this->secondary .= 'X';
688  $this->current += 3;
689  break;
690  }
691  if ($this->‪StringAt($this->original, $this->current, 3, ['TIA', 'TCH'])) {
692  $this->primary .= 'X';
693  $this->secondary .= 'X';
694  $this->current += 3;
695  break;
696  }
697  if ($this->‪StringAt($this->original, $this->current, 2, ['TH']) || $this->‪StringAt($this->original, $this->current, 3, ['TTH'])) {
698  // special case 'thomas', 'thames' or germanic
699  if ($this->‪StringAt($this->original, $this->current + 2, 2, ['OM', 'AM']) || $this->‪StringAt($this->original, 0, 4, ['VAN ', 'VON ']) || $this->‪StringAt($this->original, 0, 3, ['SCH'])) {
700  $this->primary .= 'T';
701  $this->secondary .= 'T';
702  } else {
703  $this->primary .= '0';
704  $this->secondary .= 'T';
705  }
706  $this->current += 2;
707  break;
708  }
709  if ($this->‪StringAt($this->original, $this->current + 1, 1, ['T', 'D'])) {
710  $this->current += 2;
711  } else {
712  $this->current += 1;
713  }
714  $this->primary .= 'T';
715  $this->secondary .= 'T';
716  break;
717  case 'V':
718  if (substr($this->original, $this->current + 1, 1) === 'V') {
719  $this->current += 2;
720  } else {
721  $this->current += 1;
722  }
723  $this->primary .= 'F';
724  $this->secondary .= 'F';
725  break;
726  case 'W':
727  // can also be in middle of word
728  if ($this->‪StringAt($this->original, $this->current, 2, ['WR'])) {
729  $this->primary .= 'R';
730  $this->secondary .= 'R';
731  $this->current += 2;
732  break;
733  }
734  if ($this->current == 0 && ($this->‪IsVowel($this->original, $this->current + 1) || $this->‪StringAt($this->original, $this->current, 2, ['WH']))) {
735  // Wasserman should match Vasserman
736  if ($this->‪IsVowel($this->original, $this->current + 1)) {
737  $this->primary .= 'A';
738  $this->secondary .= 'F';
739  } else {
740  // need Uomo to match Womo
741  $this->primary .= 'A';
742  $this->secondary .= 'A';
743  }
744  }
745  // Arnow should match Arnoff
746  if ($this->current == $this->last && $this->‪IsVowel($this->original, $this->current - 1) || $this->‪StringAt($this->original, $this->current - 1, 5, ['EWSKI', 'EWSKY', 'OWSKI', 'OWSKY']) || $this->‪StringAt($this->original, 0, 3, ['SCH'])) {
747  $this->primary .= '';
748  $this->secondary .= 'F';
749  $this->current += 1;
750  break;
751  }
752  // polish e.g. 'filipowicz'
753  if ($this->‪StringAt($this->original, $this->current, 4, ['WICZ', 'WITZ'])) {
754  $this->primary .= 'TS';
755  $this->secondary .= 'FX';
756  $this->current += 4;
757  break;
758  }
759  // else skip it
760  $this->current += 1;
761  break;
762  case 'X':
763  // french e.g. breaux
764  if (!($this->current == $this->last && ($this->‪StringAt($this->original, $this->current - 3, 3, ['IAU', 'EAU']) || $this->‪StringAt($this->original, $this->current - 2, 2, ['AU', 'OU'])))) {
765  $this->primary .= 'KS';
766  $this->secondary .= 'KS';
767  }
768  if ($this->‪StringAt($this->original, $this->current + 1, 1, ['C', 'X'])) {
769  $this->current += 2;
770  } else {
771  $this->current += 1;
772  }
773  break;
774  case 'Z':
775  // chinese pinyin e.g. 'zhao'
776  if (substr($this->original, $this->current + 1, 1) === 'H') {
777  $this->primary .= 'J';
778  $this->secondary .= 'J';
779  $this->current += 2;
780  break;
781  }
782  if ($this->‪StringAt($this->original, $this->current + 1, 2, ['ZO', 'ZI', 'ZA']) || $this->‪SlavoGermanic($this->original) && ($this->current > 0 && substr($this->original, $this->current - 1, 1) !== 'T')) {
783  $this->primary .= 'S';
784  $this->secondary .= 'TS';
785  } else {
786  $this->primary .= 'S';
787  $this->secondary .= 'S';
788  }
789  if (substr($this->original, $this->current + 1, 1) === 'Z') {
790  $this->current += 2;
791  } else {
792  $this->current += 1;
793  }
794  break;
795  default:
796  $this->current += 1;
797  }
798  }
799  // end while
800  $this->primary = substr($this->primary, 0, 4);
801  $this->secondary = substr($this->secondary, 0, 4);
802  $result['primary'] = ‪$this->primary;
803  $result['secondary'] = ‪$this->secondary;
804  return $result;
805  }
806 
807  // end of function MetaPhone
808  // Private methods
818  public function ‪StringAt($string, $start, ‪$length, $list)
819  {
820  if ($start < 0 || $start >= strlen($string)) {
821  return 0;
822  }
823  $listCount = count($list);
824  for ($i = 0; $i < $listCount; $i++) {
825  if ($list[$i] == substr($string, $start, ‪$length)) {
826  return 1;
827  }
828  }
829  return 0;
830  }
831 
839  public function ‪IsVowel($string, $pos)
840  {
841  return preg_match('/[AEIOUY]/', substr($string, $pos, 1));
842  }
843 
850  public function ‪SlavoGermanic($string)
851  {
852  return preg_match('/W|K|CZ|WITZ/', $string);
853  }
854 }
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\$original
‪string $original
Definition: DoubleMetaPhoneUtility.php:27
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility
Definition: DoubleMetaPhoneUtility.php:24
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\DoubleMetaPhone
‪array DoubleMetaPhone($string)
Definition: DoubleMetaPhoneUtility.php:72
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\$secondary
‪string $secondary
Definition: DoubleMetaPhoneUtility.php:35
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\$length
‪int $length
Definition: DoubleMetaPhoneUtility.php:39
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\$primary
‪string $primary
Definition: DoubleMetaPhoneUtility.php:31
‪TYPO3\CMS\IndexedSearch\Utility
Definition: DoubleMetaPhoneUtility.php:2
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\StringAt
‪bool StringAt($string, $start, $length, $list)
Definition: DoubleMetaPhoneUtility.php:812
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\SlavoGermanic
‪bool int SlavoGermanic($string)
Definition: DoubleMetaPhoneUtility.php:844
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\$current
‪int $current
Definition: DoubleMetaPhoneUtility.php:47
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\$last
‪int $last
Definition: DoubleMetaPhoneUtility.php:43
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\metaphone
‪string metaphone($string, $sys_language_uid=0)
Definition: DoubleMetaPhoneUtility.php:58
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\IsVowel
‪bool int IsVowel($string, $pos)
Definition: DoubleMetaPhoneUtility.php:833