‪TYPO3CMS  ‪main
DoubleMetaPhoneUtility.php
Go to the documentation of this file.
1 <?php
2 
3 /*
4  * This file is part of the TYPO3 CMS project.
5  *
6  * It is free software; you can redistribute it and/or modify it under
7  * the terms of the GNU General Public License, either version 2
8  * of the License, or any later version.
9  *
10  * For the full copyright and license information, please read the
11  * LICENSE.txt file that was distributed with this source code.
12  *
13  * The TYPO3 project - inspiring people to share!
14  */
15 
17 
19 
27 {
31  public ‪$original = '';
32 
36  public ‪$primary = '';
37 
41  public ‪$secondary = '';
42 
46  public ‪$length = 0;
47 
51  public ‪$last = 0;
52 
56  public ‪$current = 0;
57 
58  public ‪Indexer ‪$pObj;
59 
60  // methods
61  // TYPO3 specific API to this class. BEGIN
69  public function ‪metaphone($string, $sys_language_uid = 0)
70  {
71  $res = $this->‪DoubleMetaPhone($string);
72  return $res['primary'];
73  }
74 
75  // TYPO3 specific API to this class. END
76  // Public method
83  public function ‪DoubleMetaPhone($string)
84  {
85  $this->primary = '';
86  $this->secondary = '';
87  $this->current = 0;
88  $this->‪length = strlen($string);
89  $this->last = $this->‪length - 1;
90  $this->original = $string . ' ';
91  $this->original = strtoupper($this->original);
92  // skip this at beginning of word
93  if ($this->‪StringAt($this->original, 0, 2, ['GN', 'KN', 'PN', 'WR', 'PS'])) {
94  $this->current++;
95  }
96  // Initial 'X' is pronounced 'Z' e.g. 'Xavier'
97  if ($this->original[0] === 'X') {
98  $this->primary .= 'S';
99  // 'Z' maps to 'S'
100  $this->secondary .= 'S';
101  $this->current++;
102  }
103  // main loop
104  while (strlen($this->primary) < 4 || strlen($this->secondary) < 4) {
105  if ($this->current >= $this->‪length) {
106  break;
107  }
108  switch (substr($this->original, $this->current, 1)) {
109  case 'A':
110 
111  case 'E':
112 
113  case 'I':
114 
115  case 'O':
116 
117  case 'U':
118 
119  case 'Y':
120  if ($this->current == 0) {
121  // all init vowels now map to 'A'
122  $this->primary .= 'A';
123  $this->secondary .= 'A';
124  }
125  $this->current += 1;
126  break;
127  case 'B':
128  // '-mb', e.g. "dumb", already skipped over ...
129  $this->primary .= 'P';
130  $this->secondary .= 'P';
131  if (substr($this->original, $this->current + 1, 1) === 'B') {
132  $this->current += 2;
133  } else {
134  $this->current += 1;
135  }
136  break;
137  case 'Ç':
138  $this->primary .= 'S';
139  $this->secondary .= 'S';
140  $this->current += 1;
141  break;
142  case 'C':
143  // various germanic
144  if ($this->current > 1 && !$this->‪IsVowel($this->original, $this->current - 2) && $this->‪StringAt($this->original, $this->current - 1, 3, ['ACH']) && (substr($this->original, $this->current + 2, 1) !== 'I' && (substr($this->original, $this->current + 2, 1) !== 'E' || $this->‪StringAt($this->original, $this->current - 2, 6, ['BACHER', 'MACHER'])))) {
145  $this->primary .= 'K';
146  $this->secondary .= 'K';
147  $this->current += 2;
148  break;
149  }
150  // special case 'caesar'
151  if ($this->current == 0 && $this->‪StringAt($this->original, $this->current, 6, ['CAESAR'])) {
152  $this->primary .= 'S';
153  $this->secondary .= 'S';
154  $this->current += 2;
155  break;
156  }
157  // italian 'chianti'
158  if ($this->‪StringAt($this->original, $this->current, 4, ['CHIA'])) {
159  $this->primary .= 'K';
160  $this->secondary .= 'K';
161  $this->current += 2;
162  break;
163  }
164  if ($this->‪StringAt($this->original, $this->current, 2, ['CH'])) {
165  // find 'michael'
166  if ($this->current > 0 && $this->‪StringAt($this->original, $this->current, 4, ['CHAE'])) {
167  $this->primary .= 'K';
168  $this->secondary .= 'X';
169  $this->current += 2;
170  break;
171  }
172  // greek roots e.g. 'chemistry', 'chorus'
173  if ($this->current == 0 && ($this->‪StringAt($this->original, $this->current + 1, 5, ['HARAC', 'HARIS']) || $this->‪StringAt($this->original, $this->current + 1, 3, ['HOR', 'HYM', 'HIA', 'HEM'])) && !$this->‪StringAt($this->original, 0, 5, ['CHORE'])) {
174  $this->primary .= 'K';
175  $this->secondary .= 'K';
176  $this->current += 2;
177  break;
178  }
179  // germanic, greek, or otherwise 'ch' for 'kh' sound
180  if ($this->‪StringAt($this->original, 0, 4, ['VAN ', 'VON ']) || $this->‪StringAt($this->original, 0, 3, ['SCH']) || $this->‪StringAt($this->original, $this->current - 2, 6, ['ORCHES', 'ARCHIT', 'ORCHID']) || $this->‪StringAt($this->original, $this->current + 2, 1, ['T', 'S']) || ($this->‪StringAt($this->original, $this->current - 1, 1, ['A', 'O', 'U', 'E']) || $this->current == 0) && $this->‪StringAt($this->original, $this->current + 2, 1, ['L', 'R', 'N', 'M', 'B', 'H', 'F', 'V', 'W', ' '])) {
181  $this->primary .= 'K';
182  $this->secondary .= 'K';
183  } else {
184  if ($this->current > 0) {
185  if ($this->‪StringAt($this->original, 0, 2, ['MC'])) {
186  // e.g. 'McHugh'
187  $this->primary .= 'K';
188  $this->secondary .= 'K';
189  } else {
190  $this->primary .= 'X';
191  $this->secondary .= 'K';
192  }
193  } else {
194  $this->primary .= 'X';
195  $this->secondary .= 'X';
196  }
197  }
198  $this->current += 2;
199  break;
200  }
201  // e.g. 'czerny'
202  if ($this->‪StringAt($this->original, $this->current, 2, ['CZ']) && !$this->‪StringAt(
203  $this->original,
204  $this->current - 2,
205  4,
206  ['WICZ']
207  )) {
208  $this->primary .= 'S';
209  $this->secondary .= 'X';
210  $this->current += 2;
211  break;
212  }
213  // e.g. 'focaccia'
214  if ($this->‪StringAt($this->original, $this->current + 1, 3, ['CIA'])) {
215  $this->primary .= 'X';
216  $this->secondary .= 'X';
217  $this->current += 3;
218  break;
219  }
220  // double 'C', but not McClellan'
221  if ($this->‪StringAt($this->original, $this->current, 2, ['CC']) && !($this->current == 1 && $this->original[0] === 'M')) {
222  // 'bellocchio' but not 'bacchus'
223  if ($this->‪StringAt($this->original, $this->current + 2, 1, ['I', 'E', 'H']) && !$this->‪StringAt(
224  $this->original,
225  $this->current + 2,
226  2,
227  ['HU']
228  )) {
229  // 'accident', 'accede', 'succeed'
230  if ($this->current == 1 && substr($this->original, $this->current - 1, 1) === 'A' || $this->‪StringAt($this->original, $this->current - 1, 5, ['UCCEE', 'UCCES'])) {
231  $this->primary .= 'KS';
232  $this->secondary .= 'KS';
233  } else {
234  $this->primary .= 'X';
235  $this->secondary .= 'X';
236  }
237  $this->current += 3;
238  break;
239  }
240  // Pierce's rule
241  $this->primary .= 'K';
242  $this->secondary .= 'K';
243  $this->current += 2;
244  break;
245  }
246  if ($this->‪StringAt($this->original, $this->current, 2, ['CK', 'CG', 'CQ'])) {
247  $this->primary .= 'K';
248  $this->secondary .= 'K';
249  $this->current += 2;
250  break;
251  }
252  if ($this->‪StringAt($this->original, $this->current, 2, ['CI', 'CE', 'CY'])) {
253  // italian vs. english
254  if ($this->‪StringAt($this->original, $this->current, 3, ['CIO', 'CIE', 'CIA'])) {
255  $this->primary .= 'S';
256  $this->secondary .= 'X';
257  } else {
258  $this->primary .= 'S';
259  $this->secondary .= 'S';
260  }
261  $this->current += 2;
262  break;
263  }
264  // else
265  $this->primary .= 'K';
266  $this->secondary .= 'K';
267  // name sent in 'mac caffrey', 'mac gregor'
268  if ($this->‪StringAt($this->original, $this->current + 1, 2, [' C', ' Q', ' G'])) {
269  $this->current += 3;
270  } else {
271  if ($this->‪StringAt($this->original, $this->current + 1, 1, ['C', 'K', 'Q']) && !$this->‪StringAt(
272  $this->original,
273  $this->current + 1,
274  2,
275  ['CE', 'CI']
276  )) {
277  $this->current += 2;
278  } else {
279  $this->current += 1;
280  }
281  }
282  break;
283  case 'D':
284  if ($this->‪StringAt($this->original, $this->current, 2, ['DG'])) {
285  if ($this->‪StringAt($this->original, $this->current + 2, 1, ['I', 'E', 'Y'])) {
286  // e.g. 'edge'
287  $this->primary .= 'J';
288  $this->secondary .= 'J';
289  $this->current += 3;
290  break;
291  }
292  // e.g. 'edgar'
293  $this->primary .= 'TK';
294  $this->secondary .= 'TK';
295  $this->current += 2;
296  break;
297  }
298  if ($this->‪StringAt($this->original, $this->current, 2, ['DT', 'DD'])) {
299  $this->primary .= 'T';
300  $this->secondary .= 'T';
301  $this->current += 2;
302  break;
303  }
304  // else
305  $this->primary .= 'T';
306  $this->secondary .= 'T';
307  $this->current += 1;
308  break;
309  case 'F':
310  if (substr($this->original, $this->current + 1, 1) === 'F') {
311  $this->current += 2;
312  } else {
313  $this->current += 1;
314  }
315  $this->primary .= 'F';
316  $this->secondary .= 'F';
317  break;
318  case 'G':
319  if (substr($this->original, $this->current + 1, 1) === 'H') {
320  if ($this->current > 0 && !$this->‪IsVowel($this->original, $this->current - 1)) {
321  $this->primary .= 'K';
322  $this->secondary .= 'K';
323  $this->current += 2;
324  break;
325  }
326  if ($this->current < 3) {
327  // 'ghislane', 'ghiradelli'
328  if ($this->current == 0) {
329  if (substr($this->original, $this->current + 2, 1) === 'I') {
330  $this->primary .= 'J';
331  $this->secondary .= 'J';
332  } else {
333  $this->primary .= 'K';
334  $this->secondary .= 'K';
335  }
336  $this->current += 2;
337  break;
338  }
339  }
340  // Parker's rule (with some further refinements) - e.g. 'hugh'
341  if ($this->current > 1 && $this->‪StringAt($this->original, $this->current - 2, 1, ['B', 'H', 'D']) || $this->current > 2 && $this->‪StringAt($this->original, $this->current - 3, 1, ['B', 'H', 'D']) || $this->current > 3 && $this->‪StringAt($this->original, $this->current - 4, 1, ['B', 'H'])) {
342  $this->current += 2;
343  break;
344  }
345  // e.g. 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
346  if ($this->current > 2 && substr($this->original, $this->current - 1, 1) === 'U' && $this->‪StringAt($this->original, $this->current - 3, 1, ['C', 'G', 'L', 'R', 'T'])) {
347  $this->primary .= 'F';
348  $this->secondary .= 'F';
349  } elseif ($this->current > 0 && substr($this->original, $this->current - 1, 1) !== 'I') {
350  $this->primary .= 'K';
351  $this->secondary .= 'K';
352  }
353  $this->current += 2;
354  break;
355  }
356  if (substr($this->original, $this->current + 1, 1) === 'N') {
357  if ($this->current == 1 && $this->‪IsVowel($this->original, 0) && !$this->‪SlavoGermanic($this->original)) {
358  $this->primary .= 'KN';
359  $this->secondary .= 'N';
360  } else {
361  // not e.g. 'cagney'
362  if (!$this->‪StringAt($this->original, $this->current + 2, 2, ['EY']) && substr($this->original, $this->current + 1) !== 'Y' && !$this->‪SlavoGermanic($this->original)) {
363  $this->primary .= 'N';
364  $this->secondary .= 'KN';
365  } else {
366  $this->primary .= 'KN';
367  $this->secondary .= 'KN';
368  }
369  }
370  $this->current += 2;
371  break;
372  }
373  // 'tagliaro'
374  if ($this->‪StringAt($this->original, $this->current + 1, 2, ['LI']) && !$this->‪SlavoGermanic($this->original)) {
375  $this->primary .= 'KL';
376  $this->secondary .= 'L';
377  $this->current += 2;
378  break;
379  }
380  // -ges-, -gep-, -gel- at beginning
381  if ($this->current == 0 && (substr($this->original, $this->current + 1, 1) === 'Y' || $this->‪StringAt($this->original, $this->current + 1, 2, [
382  'ES',
383  'EP',
384  'EB',
385  'EL',
386  'EY',
387  'IB',
388  'IL',
389  'IN',
390  'IE',
391  'EI',
392  'ER',
393  ]))) {
394  $this->primary .= 'K';
395  $this->secondary .= 'J';
396  $this->current += 2;
397  break;
398  }
399  // -ger-, -gy-
400  if (($this->‪StringAt($this->original, $this->current + 1, 2, ['ER']) || substr($this->original, $this->current + 1, 1) === 'Y') && !$this->‪StringAt($this->original, 0, 6, ['DANGER', 'RANGER', 'MANGER']) && !$this->‪StringAt(
401  $this->original,
402  $this->current - 1,
403  1,
404  ['E', 'I']
405  ) && !$this->‪StringAt($this->original, $this->current - 1, 3, ['RGY', 'OGY'])) {
406  $this->primary .= 'K';
407  $this->secondary .= 'J';
408  $this->current += 2;
409  break;
410  }
411  // italian e.g. 'biaggi'
412  if ($this->‪StringAt($this->original, $this->current + 1, 1, ['E', 'I', 'Y']) || $this->‪StringAt($this->original, $this->current - 1, 4, ['AGGI', 'OGGI'])) {
413  // obvious germanic
414  if ($this->‪StringAt($this->original, 0, 4, ['VAN ', 'VON ']) || $this->‪StringAt($this->original, 0, 3, ['SCH']) || $this->‪StringAt($this->original, $this->current + 1, 2, ['ET'])) {
415  $this->primary .= 'K';
416  $this->secondary .= 'K';
417  } else {
418  // always soft if french ending
419  if ($this->‪StringAt($this->original, $this->current + 1, 4, ['IER '])) {
420  $this->primary .= 'J';
421  $this->secondary .= 'J';
422  } else {
423  $this->primary .= 'J';
424  $this->secondary .= 'K';
425  }
426  }
427  $this->current += 2;
428  break;
429  }
430  if (substr($this->original, $this->current + 1, 1) === 'G') {
431  $this->current += 2;
432  } else {
433  $this->current += 1;
434  }
435  $this->primary .= 'K';
436  $this->secondary .= 'K';
437  break;
438  case 'H':
439  // only keep if first & before vowel or btw. 2 vowels
440  if (($this->current == 0 || $this->‪IsVowel($this->original, $this->current - 1)) && $this->‪IsVowel($this->original, $this->current + 1)) {
441  $this->primary .= 'H';
442  $this->secondary .= 'H';
443  $this->current += 2;
444  } else {
445  $this->current += 1;
446  }
447  break;
448  case 'J':
449  // obvious spanish, 'jose', 'san jacinto'
450  if ($this->‪StringAt($this->original, $this->current, 4, ['JOSE']) || $this->‪StringAt($this->original, 0, 4, ['SAN '])) {
451  if ($this->current == 0 && substr($this->original, $this->current + 4, 1) === ' ' || $this->‪StringAt($this->original, 0, 4, ['SAN '])) {
452  $this->primary .= 'H';
453  $this->secondary .= 'H';
454  } else {
455  $this->primary .= 'J';
456  $this->secondary .= 'H';
457  }
458  $this->current += 1;
459  break;
460  }
461  if ($this->current == 0 && !$this->‪StringAt($this->original, $this->current, 4, ['JOSE'])) {
462  $this->primary .= 'J';
463  // Yankelovich/Jankelowicz
464  $this->secondary .= 'A';
465  } else {
466  // spanish pron. of .e.g. 'bajador'
467  if ($this->‪IsVowel($this->original, $this->current - 1) && !$this->‪SlavoGermanic($this->original) && (substr($this->original, $this->current + 1, 1) === 'A' || substr($this->original, $this->current + 1, 1) === 'O')) {
468  $this->primary .= 'J';
469  $this->secondary .= 'H';
470  } else {
471  if ($this->current == $this->last) {
472  $this->primary .= 'J';
473  $this->secondary .= '';
474  } else {
475  if (!$this->‪StringAt($this->original, $this->current + 1, 1, ['L', 'T', 'K', 'S', 'N', 'M', 'B', 'Z']) && !$this->‪StringAt(
476  $this->original,
477  $this->current - 1,
478  1,
479  ['S', 'K', 'L']
480  )) {
481  $this->primary .= 'J';
482  $this->secondary .= 'J';
483  }
484  }
485  }
486  }
487  if (substr($this->original, $this->current + 1, 1) === 'J') {
488  // it could happen
489  $this->current += 2;
490  } else {
491  $this->current += 1;
492  }
493  break;
494  case 'K':
495  if (substr($this->original, $this->current + 1, 1) === 'K') {
496  $this->current += 2;
497  } else {
498  $this->current += 1;
499  }
500  $this->primary .= 'K';
501  $this->secondary .= 'K';
502  break;
503  case 'L':
504  if (substr($this->original, $this->current + 1, 1) === 'L') {
505  // spanish e.g. 'cabrillo', 'gallegos'
506  if ($this->current == $this->‪length - 3 && $this->‪StringAt($this->original, $this->current - 1, 4, ['ILLO', 'ILLA', 'ALLE']) || ($this->‪StringAt($this->original, $this->last - 1, 2, ['AS', 'OS']) || $this->‪StringAt($this->original, $this->last, 1, ['A', 'O'])) && $this->‪StringAt($this->original, $this->current - 1, 4, ['ALLE'])) {
507  $this->primary .= 'L';
508  $this->secondary .= '';
509  $this->current += 2;
510  break;
511  }
512  $this->current += 2;
513  } else {
514  $this->current += 1;
515  }
516  $this->primary .= 'L';
517  $this->secondary .= 'L';
518  break;
519  case 'M':
520  if ($this->‪StringAt($this->original, $this->current - 1, 3, ['UMB']) && ($this->current + 1 == $this->last || $this->‪StringAt($this->original, $this->current + 2, 2, ['ER'])) || substr($this->original, $this->current + 1, 1) === 'M') {
521  $this->current += 2;
522  } else {
523  $this->current += 1;
524  }
525  $this->primary .= 'M';
526  $this->secondary .= 'M';
527  break;
528  case 'N':
529  if (substr($this->original, $this->current + 1, 1) === 'N') {
530  $this->current += 2;
531  } else {
532  $this->current += 1;
533  }
534  $this->primary .= 'N';
535  $this->secondary .= 'N';
536  break;
537  case 'Ñ':
538  $this->current += 1;
539  $this->primary .= 'N';
540  $this->secondary .= 'N';
541  break;
542  case 'P':
543  if (substr($this->original, $this->current + 1, 1) === 'H') {
544  $this->current += 2;
545  $this->primary .= 'F';
546  $this->secondary .= 'F';
547  break;
548  }
549  // also account for "campbell" and "raspberry"
550  if ($this->‪StringAt($this->original, $this->current + 1, 1, ['P', 'B'])) {
551  $this->current += 2;
552  } else {
553  $this->current += 1;
554  }
555  $this->primary .= 'P';
556  $this->secondary .= 'P';
557  break;
558  case 'Q':
559  if (substr($this->original, $this->current + 1, 1) === 'Q') {
560  $this->current += 2;
561  } else {
562  $this->current += 1;
563  }
564  $this->primary .= 'K';
565  $this->secondary .= 'K';
566  break;
567  case 'R':
568  // french e.g. 'rogier', but exclude 'hochmeier'
569  if ($this->current == $this->last && !$this->‪SlavoGermanic($this->original) && $this->‪StringAt($this->original, $this->current - 2, 2, ['IE']) && !$this->‪StringAt(
570  $this->original,
571  $this->current - 4,
572  2,
573  ['ME', 'MA']
574  )) {
575  $this->primary .= '';
576  $this->secondary .= 'R';
577  } else {
578  $this->primary .= 'R';
579  $this->secondary .= 'R';
580  }
581  if (substr($this->original, $this->current + 1, 1) === 'R') {
582  $this->current += 2;
583  } else {
584  $this->current += 1;
585  }
586  break;
587  case 'S':
588  // special cases 'island', 'isle', 'carlisle', 'carlysle'
589  if ($this->‪StringAt($this->original, $this->current - 1, 3, ['ISL', 'YSL'])) {
590  $this->current += 1;
591  break;
592  }
593  // special case 'sugar-'
594  if ($this->current == 0 && $this->‪StringAt($this->original, $this->current, 5, ['SUGAR'])) {
595  $this->primary .= 'X';
596  $this->secondary .= 'S';
597  $this->current += 1;
598  break;
599  }
600  if ($this->‪StringAt($this->original, $this->current, 2, ['SH'])) {
601  // germanic
602  if ($this->‪StringAt($this->original, $this->current + 1, 4, ['HEIM', 'HOEK', 'HOLM', 'HOLZ'])) {
603  $this->primary .= 'S';
604  $this->secondary .= 'S';
605  } else {
606  $this->primary .= 'X';
607  $this->secondary .= 'X';
608  }
609  $this->current += 2;
610  break;
611  }
612  // italian & armenian
613  if ($this->‪StringAt($this->original, $this->current, 3, ['SIO', 'SIA']) || $this->‪StringAt($this->original, $this->current, 4, ['SIAN'])) {
614  if (!$this->‪SlavoGermanic($this->original)) {
615  $this->primary .= 'S';
616  $this->secondary .= 'X';
617  } else {
618  $this->primary .= 'S';
619  $this->secondary .= 'S';
620  }
621  $this->current += 3;
622  break;
623  }
624  // german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
625  // also, -sz- in slavic language although in hungarian it is pronounced 's'
626  if ($this->current == 0 && $this->‪StringAt($this->original, $this->current + 1, 1, ['M', 'N', 'L', 'W']) || $this->‪StringAt($this->original, $this->current + 1, 1, ['Z'])) {
627  $this->primary .= 'S';
628  $this->secondary .= 'X';
629  if ($this->‪StringAt($this->original, $this->current + 1, 1, ['Z'])) {
630  $this->current += 2;
631  } else {
632  $this->current += 1;
633  }
634  break;
635  }
636  if ($this->‪StringAt($this->original, $this->current, 2, ['SC'])) {
637  // Schlesinger's rule
638  if (substr($this->original, $this->current + 2, 1) === 'H') {
639  // dutch origin, e.g. 'school', 'schooner'
640  if ($this->‪StringAt($this->original, $this->current + 3, 2, ['OO', 'ER', 'EN', 'UY', 'ED', 'EM'])) {
641  // 'schermerhorn', 'schenker'
642  if ($this->‪StringAt($this->original, $this->current + 3, 2, ['ER', 'EN'])) {
643  $this->primary .= 'X';
644  $this->secondary .= 'SK';
645  } else {
646  $this->primary .= 'SK';
647  $this->secondary .= 'SK';
648  }
649  $this->current += 3;
650  break;
651  }
652  if ($this->current == 0 && !$this->‪IsVowel($this->original, 3) && substr($this->original, $this->current + 3, 1) !== 'W') {
653  $this->primary .= 'X';
654  $this->secondary .= 'S';
655  } else {
656  $this->primary .= 'X';
657  $this->secondary .= 'X';
658  }
659  $this->current += 3;
660  break;
661  }
662  if ($this->‪StringAt($this->original, $this->current + 2, 1, ['I', 'E', 'Y'])) {
663  $this->primary .= 'S';
664  $this->secondary .= 'S';
665  $this->current += 3;
666  break;
667  }
668  // else
669  $this->primary .= 'SK';
670  $this->secondary .= 'SK';
671  $this->current += 3;
672  break;
673  }
674  // french e.g. 'resnais', 'artois'
675  if ($this->current == $this->last && $this->‪StringAt($this->original, $this->current - 2, 2, ['AI', 'OI'])) {
676  $this->primary .= '';
677  $this->secondary .= 'S';
678  } else {
679  $this->primary .= 'S';
680  $this->secondary .= 'S';
681  }
682  if ($this->‪StringAt($this->original, $this->current + 1, 1, ['S', 'Z'])) {
683  $this->current += 2;
684  } else {
685  $this->current += 1;
686  }
687  break;
688  case 'T':
689  if ($this->‪StringAt($this->original, $this->current, 4, ['TION'])) {
690  $this->primary .= 'X';
691  $this->secondary .= 'X';
692  $this->current += 3;
693  break;
694  }
695  if ($this->‪StringAt($this->original, $this->current, 3, ['TIA', 'TCH'])) {
696  $this->primary .= 'X';
697  $this->secondary .= 'X';
698  $this->current += 3;
699  break;
700  }
701  if ($this->‪StringAt($this->original, $this->current, 2, ['TH']) || $this->‪StringAt($this->original, $this->current, 3, ['TTH'])) {
702  // special case 'thomas', 'thames' or germanic
703  if ($this->‪StringAt($this->original, $this->current + 2, 2, ['OM', 'AM']) || $this->‪StringAt($this->original, 0, 4, ['VAN ', 'VON ']) || $this->‪StringAt($this->original, 0, 3, ['SCH'])) {
704  $this->primary .= 'T';
705  $this->secondary .= 'T';
706  } else {
707  $this->primary .= '0';
708  $this->secondary .= 'T';
709  }
710  $this->current += 2;
711  break;
712  }
713  if ($this->‪StringAt($this->original, $this->current + 1, 1, ['T', 'D'])) {
714  $this->current += 2;
715  } else {
716  $this->current += 1;
717  }
718  $this->primary .= 'T';
719  $this->secondary .= 'T';
720  break;
721  case 'V':
722  if (substr($this->original, $this->current + 1, 1) === 'V') {
723  $this->current += 2;
724  } else {
725  $this->current += 1;
726  }
727  $this->primary .= 'F';
728  $this->secondary .= 'F';
729  break;
730  case 'W':
731  // can also be in middle of word
732  if ($this->‪StringAt($this->original, $this->current, 2, ['WR'])) {
733  $this->primary .= 'R';
734  $this->secondary .= 'R';
735  $this->current += 2;
736  break;
737  }
738  if ($this->current == 0 && ($this->‪IsVowel($this->original, $this->current + 1) || $this->‪StringAt($this->original, $this->current, 2, ['WH']))) {
739  // Wasserman should match Vasserman
740  if ($this->‪IsVowel($this->original, $this->current + 1)) {
741  $this->primary .= 'A';
742  $this->secondary .= 'F';
743  } else {
744  // need Uomo to match Womo
745  $this->primary .= 'A';
746  $this->secondary .= 'A';
747  }
748  }
749  // Arnow should match Arnoff
750  if ($this->current == $this->last && $this->‪IsVowel($this->original, $this->current - 1) || $this->‪StringAt($this->original, $this->current - 1, 5, ['EWSKI', 'EWSKY', 'OWSKI', 'OWSKY']) || $this->‪StringAt($this->original, 0, 3, ['SCH'])) {
751  $this->primary .= '';
752  $this->secondary .= 'F';
753  $this->current += 1;
754  break;
755  }
756  // polish e.g. 'filipowicz'
757  if ($this->‪StringAt($this->original, $this->current, 4, ['WICZ', 'WITZ'])) {
758  $this->primary .= 'TS';
759  $this->secondary .= 'FX';
760  $this->current += 4;
761  break;
762  }
763  // else skip it
764  $this->current += 1;
765  break;
766  case 'X':
767  // french e.g. breaux
768  if (!($this->current == $this->last && ($this->‪StringAt($this->original, $this->current - 3, 3, ['IAU', 'EAU']) || $this->‪StringAt($this->original, $this->current - 2, 2, ['AU', 'OU'])))) {
769  $this->primary .= 'KS';
770  $this->secondary .= 'KS';
771  }
772  if ($this->‪StringAt($this->original, $this->current + 1, 1, ['C', 'X'])) {
773  $this->current += 2;
774  } else {
775  $this->current += 1;
776  }
777  break;
778  case 'Z':
779  // chinese pinyin e.g. 'zhao'
780  if (substr($this->original, $this->current + 1, 1) === 'H') {
781  $this->primary .= 'J';
782  $this->secondary .= 'J';
783  $this->current += 2;
784  break;
785  }
786  if ($this->‪StringAt($this->original, $this->current + 1, 2, ['ZO', 'ZI', 'ZA']) || $this->‪SlavoGermanic($this->original) && ($this->current > 0 && substr($this->original, $this->current - 1, 1) !== 'T')) {
787  $this->primary .= 'S';
788  $this->secondary .= 'TS';
789  } else {
790  $this->primary .= 'S';
791  $this->secondary .= 'S';
792  }
793  if (substr($this->original, $this->current + 1, 1) === 'Z') {
794  $this->current += 2;
795  } else {
796  $this->current += 1;
797  }
798  break;
799  default:
800  $this->current += 1;
801  }
802  }
803  // end while
804  $this->primary = substr($this->primary, 0, 4);
805  $this->secondary = substr($this->secondary, 0, 4);
806  $result = [];
807  $result['primary'] = ‪$this->primary;
808  $result['secondary'] = ‪$this->secondary;
809  return $result;
810  }
811 
812  // end of function MetaPhone
813  // Private methods
823  public function ‪StringAt($string, $start, ‪$length, $list)
824  {
825  if ($start < 0 || $start >= strlen($string)) {
826  return false;
827  }
828  $listCount = count($list);
829  for ($i = 0; $i < $listCount; $i++) {
830  if ($list[$i] == substr($string, $start, ‪$length)) {
831  return true;
832  }
833  }
834  return false;
835  }
836 
844  public function ‪IsVowel($string, $pos)
845  {
846  return preg_match('/[AEIOUY]/', substr($string, $pos, 1));
847  }
848 
855  public function ‪SlavoGermanic($string)
856  {
857  return preg_match('/W|K|CZ|WITZ/', $string);
858  }
859 }
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\$original
‪string $original
Definition: DoubleMetaPhoneUtility.php:30
‪TYPO3\CMS\Core\Security\ContentSecurityPolicy\length
‪@ length
Definition: HashType.php:33
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility
Definition: DoubleMetaPhoneUtility.php:27
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\$pObj
‪Indexer $pObj
Definition: DoubleMetaPhoneUtility.php:52
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\DoubleMetaPhone
‪array DoubleMetaPhone($string)
Definition: DoubleMetaPhoneUtility.php:77
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\$secondary
‪string $secondary
Definition: DoubleMetaPhoneUtility.php:38
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\$length
‪int $length
Definition: DoubleMetaPhoneUtility.php:42
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\$primary
‪string $primary
Definition: DoubleMetaPhoneUtility.php:34
‪TYPO3\CMS\IndexedSearch\Utility
Definition: DoubleMetaPhoneUtility.php:16
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\StringAt
‪bool StringAt($string, $start, $length, $list)
Definition: DoubleMetaPhoneUtility.php:817
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\SlavoGermanic
‪bool int SlavoGermanic($string)
Definition: DoubleMetaPhoneUtility.php:849
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\$current
‪int $current
Definition: DoubleMetaPhoneUtility.php:50
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\$last
‪int $last
Definition: DoubleMetaPhoneUtility.php:46
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\metaphone
‪string metaphone($string, $sys_language_uid=0)
Definition: DoubleMetaPhoneUtility.php:63
‪TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility\IsVowel
‪bool int IsVowel($string, $pos)
Definition: DoubleMetaPhoneUtility.php:838
‪TYPO3\CMS\IndexedSearch\Indexer
Definition: Indexer.php:39