TYPO3 CMS  TYPO3_8-7
DoubleMetaPhoneUtility.php
Go to the documentation of this file.
1 <?php
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
23 {
27  public $original = '';
28 
32  public $primary = '';
33 
37  public $secondary = '';
38 
42  public $length = 0;
43 
47  public $last = 0;
48 
52  public $current = 0;
53 
54  // methods
55  // TYPO3 specific API to this class. BEGIN
63  public function metaphone($string, $sys_language_uid = 0)
64  {
65  $res = $this->DoubleMetaPhone($string);
66  return $res['primary'];
67  }
68 
69  // TYPO3 specific API to this class. END
70  // Public method
77  public function DoubleMetaPhone($string)
78  {
79  $this->primary = '';
80  $this->secondary = '';
81  $this->current = 0;
82  $this->current = 0;
83  $this->length = strlen($string);
84  $this->last = $this->length - 1;
85  $this->original = $string . ' ';
86  $this->original = strtoupper($this->original);
87  // skip this at beginning of word
88  if ($this->StringAt($this->original, 0, 2, ['GN', 'KN', 'PN', 'WR', 'PS'])) {
89  $this->current++;
90  }
91  // Initial 'X' is pronounced 'Z' e.g. 'Xavier'
92  if ($this->original[0] === 'X') {
93  $this->primary .= 'S';
94  // 'Z' maps to 'S'
95  $this->secondary .= 'S';
96  $this->current++;
97  }
98  // main loop
99  while (strlen($this->primary) < 4 || strlen($this->secondary) < 4) {
100  if ($this->current >= $this->length) {
101  break;
102  }
103  switch (substr($this->original, $this->current, 1)) {
104  case 'A':
105 
106  case 'E':
107 
108  case 'I':
109 
110  case 'O':
111 
112  case 'U':
113 
114  case 'Y':
115  if ($this->current == 0) {
116  // all init vowels now map to 'A'
117  $this->primary .= 'A';
118  $this->secondary .= 'A';
119  }
120  $this->current += 1;
121  break;
122  case 'B':
123  // '-mb', e.g. "dumb", already skipped over ...
124  $this->primary .= 'P';
125  $this->secondary .= 'P';
126  if (substr($this->original, $this->current + 1, 1) === 'B') {
127  $this->current += 2;
128  } else {
129  $this->current += 1;
130  }
131  break;
132  case 'Ç':
133  $this->primary .= 'S';
134  $this->secondary .= 'S';
135  $this->current += 1;
136  break;
137  case 'C':
138  // various gremanic
139  if ($this->current > 1 && !$this->IsVowel($this->original, ($this->current - 2)) && $this->StringAt($this->original, $this->current - 1, 3, ['ACH']) && (substr($this->original, $this->current + 2, 1) !== 'I' && (substr($this->original, $this->current + 2, 1) !== 'E' || $this->StringAt($this->original, $this->current - 2, 6, ['BACHER', 'MACHER'])))) {
140  $this->primary .= 'K';
141  $this->secondary .= 'K';
142  $this->current += 2;
143  break;
144  }
145  // special case 'caesar'
146  if ($this->current == 0 && $this->StringAt($this->original, $this->current, 6, ['CAESAR'])) {
147  $this->primary .= 'S';
148  $this->secondary .= 'S';
149  $this->current += 2;
150  break;
151  }
152  // italian 'chianti'
153  if ($this->StringAt($this->original, $this->current, 4, ['CHIA'])) {
154  $this->primary .= 'K';
155  $this->secondary .= 'K';
156  $this->current += 2;
157  break;
158  }
159  if ($this->StringAt($this->original, $this->current, 2, ['CH'])) {
160  // find 'michael'
161  if ($this->current > 0 && $this->StringAt($this->original, $this->current, 4, ['CHAE'])) {
162  $this->primary .= 'K';
163  $this->secondary .= 'X';
164  $this->current += 2;
165  break;
166  }
167  // greek roots e.g. 'chemistry', 'chorus'
168  if ($this->current == 0 && ($this->StringAt($this->original, $this->current + 1, 5, ['HARAC', 'HARIS']) || $this->StringAt($this->original, $this->current + 1, 3, ['HOR', 'HYM', 'HIA', 'HEM'])) && !$this->StringAt($this->original, 0, 5, ['CHORE'])) {
169  $this->primary .= 'K';
170  $this->secondary .= 'K';
171  $this->current += 2;
172  break;
173  }
174  // germanic, greek, or otherwise 'ch' for 'kh' sound
175  if ($this->StringAt($this->original, 0, 4, ['VAN ', 'VON ']) || $this->StringAt($this->original, 0, 3, ['SCH']) || $this->StringAt($this->original, $this->current - 2, 6, ['ORCHES', 'ARCHIT', 'ORCHID']) || $this->StringAt($this->original, $this->current + 2, 1, ['T', 'S']) || ($this->StringAt($this->original, $this->current - 1, 1, ['A', 'O', 'U', 'E']) || $this->current == 0) && $this->StringAt($this->original, $this->current + 2, 1, ['L', 'R', 'N', 'M', 'B', 'H', 'F', 'V', 'W', ' '])) {
176  $this->primary .= 'K';
177  $this->secondary .= 'K';
178  } else {
179  if ($this->current > 0) {
180  if ($this->StringAt($this->original, 0, 2, ['MC'])) {
181  // e.g. 'McHugh'
182  $this->primary .= 'K';
183  $this->secondary .= 'K';
184  } else {
185  $this->primary .= 'X';
186  $this->secondary .= 'K';
187  }
188  } else {
189  $this->primary .= 'X';
190  $this->secondary .= 'X';
191  }
192  }
193  $this->current += 2;
194  break;
195  }
196  // e.g. 'czerny'
197  if ($this->StringAt($this->original, $this->current, 2, ['CZ']) && !$this->StringAt($this->original, ($this->current - 2), 4, ['WICZ'])) {
198  $this->primary .= 'S';
199  $this->secondary .= 'X';
200  $this->current += 2;
201  break;
202  }
203  // e.g. 'focaccia'
204  if ($this->StringAt($this->original, $this->current + 1, 3, ['CIA'])) {
205  $this->primary .= 'X';
206  $this->secondary .= 'X';
207  $this->current += 3;
208  break;
209  }
210  // double 'C', but not McClellan'
211  if ($this->StringAt($this->original, $this->current, 2, ['CC']) && !($this->current == 1 && $this->original[0] === 'M')) {
212  // 'bellocchio' but not 'bacchus'
213  if ($this->StringAt($this->original, $this->current + 2, 1, ['I', 'E', 'H']) && !$this->StringAt($this->original, ($this->current + 2), 2, ['HU'])) {
214  // 'accident', 'accede', 'succeed'
215  if ($this->current == 1 && substr($this->original, $this->current - 1, 1) === 'A' || $this->StringAt($this->original, $this->current - 1, 5, ['UCCEE', 'UCCES'])) {
216  $this->primary .= 'KS';
217  $this->secondary .= 'KS';
218  } else {
219  $this->primary .= 'X';
220  $this->secondary .= 'X';
221  }
222  $this->current += 3;
223  break;
224  }
225  // Pierce's rule
226  $this->primary .= 'K';
227  $this->secondary .= 'K';
228  $this->current += 2;
229  break;
230  }
231  if ($this->StringAt($this->original, $this->current, 2, ['CK', 'CG', 'CQ'])) {
232  $this->primary .= 'K';
233  $this->secondary .= 'K';
234  $this->current += 2;
235  break;
236  }
237  if ($this->StringAt($this->original, $this->current, 2, ['CI', 'CE', 'CY'])) {
238  // italian vs. english
239  if ($this->StringAt($this->original, $this->current, 3, ['CIO', 'CIE', 'CIA'])) {
240  $this->primary .= 'S';
241  $this->secondary .= 'X';
242  } else {
243  $this->primary .= 'S';
244  $this->secondary .= 'S';
245  }
246  $this->current += 2;
247  break;
248  }
249  // else
250  $this->primary .= 'K';
251  $this->secondary .= 'K';
252  // name sent in 'mac caffrey', 'mac gregor'
253  if ($this->StringAt($this->original, $this->current + 1, 2, [' C', ' Q', ' G'])) {
254  $this->current += 3;
255  } else {
256  if ($this->StringAt($this->original, $this->current + 1, 1, ['C', 'K', 'Q']) && !$this->StringAt($this->original, ($this->current + 1), 2, ['CE', 'CI'])) {
257  $this->current += 2;
258  } else {
259  $this->current += 1;
260  }
261  }
262  break;
263  case 'D':
264  if ($this->StringAt($this->original, $this->current, 2, ['DG'])) {
265  if ($this->StringAt($this->original, $this->current + 2, 1, ['I', 'E', 'Y'])) {
266  // e.g. 'edge'
267  $this->primary .= 'J';
268  $this->secondary .= 'J';
269  $this->current += 3;
270  break;
271  }
272  // e.g. 'edgar'
273  $this->primary .= 'TK';
274  $this->secondary .= 'TK';
275  $this->current += 2;
276  break;
277  }
278  if ($this->StringAt($this->original, $this->current, 2, ['DT', 'DD'])) {
279  $this->primary .= 'T';
280  $this->secondary .= 'T';
281  $this->current += 2;
282  break;
283  }
284  // else
285  $this->primary .= 'T';
286  $this->secondary .= 'T';
287  $this->current += 1;
288  break;
289  case 'F':
290  if (substr($this->original, $this->current + 1, 1) === 'F') {
291  $this->current += 2;
292  } else {
293  $this->current += 1;
294  }
295  $this->primary .= 'F';
296  $this->secondary .= 'F';
297  break;
298  case 'G':
299  if (substr($this->original, $this->current + 1, 1) === 'H') {
300  if ($this->current > 0 && !$this->IsVowel($this->original, ($this->current - 1))) {
301  $this->primary .= 'K';
302  $this->secondary .= 'K';
303  $this->current += 2;
304  break;
305  }
306  if ($this->current < 3) {
307  // 'ghislane', 'ghiradelli'
308  if ($this->current == 0) {
309  if (substr($this->original, $this->current + 2, 1) === 'I') {
310  $this->primary .= 'J';
311  $this->secondary .= 'J';
312  } else {
313  $this->primary .= 'K';
314  $this->secondary .= 'K';
315  }
316  $this->current += 2;
317  break;
318  }
319  }
320  // Parker's rule (with some further refinements) - e.g. 'hugh'
321  if ($this->current > 1 && $this->StringAt($this->original, $this->current - 2, 1, ['B', 'H', 'D']) || $this->current > 2 && $this->StringAt($this->original, $this->current - 3, 1, ['B', 'H', 'D']) || $this->current > 3 && $this->StringAt($this->original, $this->current - 4, 1, ['B', 'H'])) {
322  $this->current += 2;
323  break;
324  }
325  // e.g. 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
326  if ($this->current > 2 && substr($this->original, $this->current - 1, 1) === 'U' && $this->StringAt($this->original, $this->current - 3, 1, ['C', 'G', 'L', 'R', 'T'])) {
327  $this->primary .= 'F';
328  $this->secondary .= 'F';
329  } elseif ($this->current > 0 && substr($this->original, $this->current - 1, 1) !== 'I') {
330  $this->primary .= 'K';
331  $this->secondary .= 'K';
332  }
333  $this->current += 2;
334  break;
335  }
336  if (substr($this->original, $this->current + 1, 1) === 'N') {
337  if ($this->current == 1 && $this->IsVowel($this->original, 0) && !$this->SlavoGermanic($this->original)) {
338  $this->primary .= 'KN';
339  $this->secondary .= 'N';
340  } else {
341  // not e.g. 'cagney'
342  if (!$this->StringAt($this->original, ($this->current + 2), 2, ['EY']) && substr($this->original, $this->current + 1) !== 'Y' && !$this->SlavoGermanic($this->original)) {
343  $this->primary .= 'N';
344  $this->secondary .= 'KN';
345  } else {
346  $this->primary .= 'KN';
347  $this->secondary .= 'KN';
348  }
349  }
350  $this->current += 2;
351  break;
352  }
353  // 'tagliaro'
354  if ($this->StringAt($this->original, $this->current + 1, 2, ['LI']) && !$this->SlavoGermanic($this->original)) {
355  $this->primary .= 'KL';
356  $this->secondary .= 'L';
357  $this->current += 2;
358  break;
359  }
360  // -ges-, -gep-, -gel- at beginning
361  if ($this->current == 0 && (substr($this->original, $this->current + 1, 1) === 'Y' || $this->StringAt($this->original, $this->current + 1, 2, [
362  'ES',
363  'EP',
364  'EB',
365  'EL',
366  'EY',
367  'IB',
368  'IL',
369  'IN',
370  'IE',
371  'EI',
372  'ER'
373  ]))) {
374  $this->primary .= 'K';
375  $this->secondary .= 'J';
376  $this->current += 2;
377  break;
378  }
379  // -ger-, -gy-
380  if (($this->StringAt($this->original, $this->current + 1, 2, ['ER']) || substr($this->original, $this->current + 1, 1) === 'Y') && !$this->StringAt($this->original, 0, 6, ['DANGER', 'RANGER', 'MANGER']) && !$this->StringAt($this->original, ($this->current - 1), 1, ['E', 'I']) && !$this->StringAt($this->original, ($this->current - 1), 3, ['RGY', 'OGY'])) {
381  $this->primary .= 'K';
382  $this->secondary .= 'J';
383  $this->current += 2;
384  break;
385  }
386  // italian e.g. 'biaggi'
387  if ($this->StringAt($this->original, $this->current + 1, 1, ['E', 'I', 'Y']) || $this->StringAt($this->original, $this->current - 1, 4, ['AGGI', 'OGGI'])) {
388  // obvious germanic
389  if ($this->StringAt($this->original, 0, 4, ['VAN ', 'VON ']) || $this->StringAt($this->original, 0, 3, ['SCH']) || $this->StringAt($this->original, $this->current + 1, 2, ['ET'])) {
390  $this->primary .= 'K';
391  $this->secondary .= 'K';
392  } else {
393  // always soft if french ending
394  if ($this->StringAt($this->original, $this->current + 1, 4, ['IER '])) {
395  $this->primary .= 'J';
396  $this->secondary .= 'J';
397  } else {
398  $this->primary .= 'J';
399  $this->secondary .= 'K';
400  }
401  }
402  $this->current += 2;
403  break;
404  }
405  if (substr($this->original, $this->current + 1, 1) === 'G') {
406  $this->current += 2;
407  } else {
408  $this->current += 1;
409  }
410  $this->primary .= 'K';
411  $this->secondary .= 'K';
412  break;
413  case 'H':
414  // only keep if first & before vowel or btw. 2 vowels
415  if (($this->current == 0 || $this->IsVowel($this->original, $this->current - 1)) && $this->IsVowel($this->original, $this->current + 1)) {
416  $this->primary .= 'H';
417  $this->secondary .= 'H';
418  $this->current += 2;
419  } else {
420  $this->current += 1;
421  }
422  break;
423  case 'J':
424  // obvious spanish, 'jose', 'san jacinto'
425  if ($this->StringAt($this->original, $this->current, 4, ['JOSE']) || $this->StringAt($this->original, 0, 4, ['SAN '])) {
426  if ($this->current == 0 && substr($this->original, $this->current + 4, 1) === ' ' || $this->StringAt($this->original, 0, 4, ['SAN '])) {
427  $this->primary .= 'H';
428  $this->secondary .= 'H';
429  } else {
430  $this->primary .= 'J';
431  $this->secondary .= 'H';
432  }
433  $this->current += 1;
434  break;
435  }
436  if ($this->current == 0 && !$this->StringAt($this->original, $this->current, 4, ['JOSE'])) {
437  $this->primary .= 'J';
438  // Yankelovich/Jankelowicz
439  $this->secondary .= 'A';
440  } else {
441  // spanish pron. of .e.g. 'bajador'
442  if ($this->IsVowel($this->original, $this->current - 1) && !$this->SlavoGermanic($this->original) && (substr($this->original, $this->current + 1, 1) === 'A' || substr($this->original, $this->current + 1, 1) === 'O')) {
443  $this->primary .= 'J';
444  $this->secondary .= 'H';
445  } else {
446  if ($this->current == $this->last) {
447  $this->primary .= 'J';
448  $this->secondary .= '';
449  } else {
450  if (!$this->StringAt($this->original, ($this->current + 1), 1, ['L', 'T', 'K', 'S', 'N', 'M', 'B', 'Z']) && !$this->StringAt($this->original, ($this->current - 1), 1, ['S', 'K', 'L'])) {
451  $this->primary .= 'J';
452  $this->secondary .= 'J';
453  }
454  }
455  }
456  }
457  if (substr($this->original, $this->current + 1, 1) === 'J') {
458  // it could happen
459  $this->current += 2;
460  } else {
461  $this->current += 1;
462  }
463  break;
464  case 'K':
465  if (substr($this->original, $this->current + 1, 1) === 'K') {
466  $this->current += 2;
467  } else {
468  $this->current += 1;
469  }
470  $this->primary .= 'K';
471  $this->secondary .= 'K';
472  break;
473  case 'L':
474  if (substr($this->original, $this->current + 1, 1) === 'L') {
475  // spanish e.g. 'cabrillo', 'gallegos'
476  if ($this->current == $this->length - 3 && $this->StringAt($this->original, $this->current - 1, 4, ['ILLO', 'ILLA', 'ALLE']) || ($this->StringAt($this->original, $this->last - 1, 2, ['AS', 'OS']) || $this->StringAt($this->original, $this->last, 1, ['A', 'O'])) && $this->StringAt($this->original, $this->current - 1, 4, ['ALLE'])) {
477  $this->primary .= 'L';
478  $this->secondary .= '';
479  $this->current += 2;
480  break;
481  }
482  $this->current += 2;
483  } else {
484  $this->current += 1;
485  }
486  $this->primary .= 'L';
487  $this->secondary .= 'L';
488  break;
489  case 'M':
490  if ($this->StringAt($this->original, $this->current - 1, 3, ['UMB']) && ($this->current + 1 == $this->last || $this->StringAt($this->original, $this->current + 2, 2, ['ER'])) || substr($this->original, $this->current + 1, 1) === 'M') {
491  $this->current += 2;
492  } else {
493  $this->current += 1;
494  }
495  $this->primary .= 'M';
496  $this->secondary .= 'M';
497  break;
498  case 'N':
499  if (substr($this->original, $this->current + 1, 1) === 'N') {
500  $this->current += 2;
501  } else {
502  $this->current += 1;
503  }
504  $this->primary .= 'N';
505  $this->secondary .= 'N';
506  break;
507  case 'Ñ':
508  $this->current += 1;
509  $this->primary .= 'N';
510  $this->secondary .= 'N';
511  break;
512  case 'P':
513  if (substr($this->original, $this->current + 1, 1) === 'H') {
514  $this->current += 2;
515  $this->primary .= 'F';
516  $this->secondary .= 'F';
517  break;
518  }
519  // also account for "campbell" and "raspberry"
520  if ($this->StringAt($this->original, $this->current + 1, 1, ['P', 'B'])) {
521  $this->current += 2;
522  } else {
523  $this->current += 1;
524  }
525  $this->primary .= 'P';
526  $this->secondary .= 'P';
527  break;
528  case 'Q':
529  if (substr($this->original, $this->current + 1, 1) === 'Q') {
530  $this->current += 2;
531  } else {
532  $this->current += 1;
533  }
534  $this->primary .= 'K';
535  $this->secondary .= 'K';
536  break;
537  case 'R':
538  // french e.g. 'rogier', but exclude 'hochmeier'
539  if ($this->current == $this->last && !$this->SlavoGermanic($this->original) && $this->StringAt($this->original, $this->current - 2, 2, ['IE']) && !$this->StringAt($this->original, ($this->current - 4), 2, ['ME', 'MA'])) {
540  $this->primary .= '';
541  $this->secondary .= 'R';
542  } else {
543  $this->primary .= 'R';
544  $this->secondary .= 'R';
545  }
546  if (substr($this->original, $this->current + 1, 1) === 'R') {
547  $this->current += 2;
548  } else {
549  $this->current += 1;
550  }
551  break;
552  case 'S':
553  // special cases 'island', 'isle', 'carlisle', 'carlysle'
554  if ($this->StringAt($this->original, $this->current - 1, 3, ['ISL', 'YSL'])) {
555  $this->current += 1;
556  break;
557  }
558  // special case 'sugar-'
559  if ($this->current == 0 && $this->StringAt($this->original, $this->current, 5, ['SUGAR'])) {
560  $this->primary .= 'X';
561  $this->secondary .= 'S';
562  $this->current += 1;
563  break;
564  }
565  if ($this->StringAt($this->original, $this->current, 2, ['SH'])) {
566  // germanic
567  if ($this->StringAt($this->original, $this->current + 1, 4, ['HEIM', 'HOEK', 'HOLM', 'HOLZ'])) {
568  $this->primary .= 'S';
569  $this->secondary .= 'S';
570  } else {
571  $this->primary .= 'X';
572  $this->secondary .= 'X';
573  }
574  $this->current += 2;
575  break;
576  }
577  // italian & armenian
578  if ($this->StringAt($this->original, $this->current, 3, ['SIO', 'SIA']) || $this->StringAt($this->original, $this->current, 4, ['SIAN'])) {
579  if (!$this->SlavoGermanic($this->original)) {
580  $this->primary .= 'S';
581  $this->secondary .= 'X';
582  } else {
583  $this->primary .= 'S';
584  $this->secondary .= 'S';
585  }
586  $this->current += 3;
587  break;
588  }
589  // german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
590  // also, -sz- in slavic language although in hungarian it is pronounced 's'
591  if ($this->current == 0 && $this->StringAt($this->original, $this->current + 1, 1, ['M', 'N', 'L', 'W']) || $this->StringAt($this->original, $this->current + 1, 1, ['Z'])) {
592  $this->primary .= 'S';
593  $this->secondary .= 'X';
594  if ($this->StringAt($this->original, $this->current + 1, 1, ['Z'])) {
595  $this->current += 2;
596  } else {
597  $this->current += 1;
598  }
599  break;
600  }
601  if ($this->StringAt($this->original, $this->current, 2, ['SC'])) {
602  // Schlesinger's rule
603  if (substr($this->original, $this->current + 2, 1) === 'H') {
604  // dutch origin, e.g. 'school', 'schooner'
605  if ($this->StringAt($this->original, $this->current + 3, 2, ['OO', 'ER', 'EN', 'UY', 'ED', 'EM'])) {
606  // 'schermerhorn', 'schenker'
607  if ($this->StringAt($this->original, $this->current + 3, 2, ['ER', 'EN'])) {
608  $this->primary .= 'X';
609  $this->secondary .= 'SK';
610  } else {
611  $this->primary .= 'SK';
612  $this->secondary .= 'SK';
613  }
614  $this->current += 3;
615  break;
616  }
617  if ($this->current == 0 && !$this->IsVowel($this->original, 3) && substr($this->original, $this->current + 3, 1) !== 'W') {
618  $this->primary .= 'X';
619  $this->secondary .= 'S';
620  } else {
621  $this->primary .= 'X';
622  $this->secondary .= 'X';
623  }
624  $this->current += 3;
625  break;
626  }
627  if ($this->StringAt($this->original, $this->current + 2, 1, ['I', 'E', 'Y'])) {
628  $this->primary .= 'S';
629  $this->secondary .= 'S';
630  $this->current += 3;
631  break;
632  }
633  // else
634  $this->primary .= 'SK';
635  $this->secondary .= 'SK';
636  $this->current += 3;
637  break;
638  }
639  // french e.g. 'resnais', 'artois'
640  if ($this->current == $this->last && $this->StringAt($this->original, $this->current - 2, 2, ['AI', 'OI'])) {
641  $this->primary .= '';
642  $this->secondary .= 'S';
643  } else {
644  $this->primary .= 'S';
645  $this->secondary .= 'S';
646  }
647  if ($this->StringAt($this->original, $this->current + 1, 1, ['S', 'Z'])) {
648  $this->current += 2;
649  } else {
650  $this->current += 1;
651  }
652  break;
653  case 'T':
654  if ($this->StringAt($this->original, $this->current, 4, ['TION'])) {
655  $this->primary .= 'X';
656  $this->secondary .= 'X';
657  $this->current += 3;
658  break;
659  }
660  if ($this->StringAt($this->original, $this->current, 3, ['TIA', 'TCH'])) {
661  $this->primary .= 'X';
662  $this->secondary .= 'X';
663  $this->current += 3;
664  break;
665  }
666  if ($this->StringAt($this->original, $this->current, 2, ['TH']) || $this->StringAt($this->original, $this->current, 3, ['TTH'])) {
667  // special case 'thomas', 'thames' or germanic
668  if ($this->StringAt($this->original, $this->current + 2, 2, ['OM', 'AM']) || $this->StringAt($this->original, 0, 4, ['VAN ', 'VON ']) || $this->StringAt($this->original, 0, 3, ['SCH'])) {
669  $this->primary .= 'T';
670  $this->secondary .= 'T';
671  } else {
672  $this->primary .= '0';
673  $this->secondary .= 'T';
674  }
675  $this->current += 2;
676  break;
677  }
678  if ($this->StringAt($this->original, $this->current + 1, 1, ['T', 'D'])) {
679  $this->current += 2;
680  } else {
681  $this->current += 1;
682  }
683  $this->primary .= 'T';
684  $this->secondary .= 'T';
685  break;
686  case 'V':
687  if (substr($this->original, $this->current + 1, 1) === 'V') {
688  $this->current += 2;
689  } else {
690  $this->current += 1;
691  }
692  $this->primary .= 'F';
693  $this->secondary .= 'F';
694  break;
695  case 'W':
696  // can also be in middle of word
697  if ($this->StringAt($this->original, $this->current, 2, ['WR'])) {
698  $this->primary .= 'R';
699  $this->secondary .= 'R';
700  $this->current += 2;
701  break;
702  }
703  if ($this->current == 0 && ($this->IsVowel($this->original, $this->current + 1) || $this->StringAt($this->original, $this->current, 2, ['WH']))) {
704  // Wasserman should match Vasserman
705  if ($this->IsVowel($this->original, $this->current + 1)) {
706  $this->primary .= 'A';
707  $this->secondary .= 'F';
708  } else {
709  // need Uomo to match Womo
710  $this->primary .= 'A';
711  $this->secondary .= 'A';
712  }
713  }
714  // Arnow should match Arnoff
715  if ($this->current == $this->last && $this->IsVowel($this->original, $this->current - 1) || $this->StringAt($this->original, $this->current - 1, 5, ['EWSKI', 'EWSKY', 'OWSKI', 'OWSKY']) || $this->StringAt($this->original, 0, 3, ['SCH'])) {
716  $this->primary .= '';
717  $this->secondary .= 'F';
718  $this->current += 1;
719  break;
720  }
721  // polish e.g. 'filipowicz'
722  if ($this->StringAt($this->original, $this->current, 4, ['WICZ', 'WITZ'])) {
723  $this->primary .= 'TS';
724  $this->secondary .= 'FX';
725  $this->current += 4;
726  break;
727  }
728  // else skip it
729  $this->current += 1;
730  break;
731  case 'X':
732  // french e.g. breaux
733  if (!($this->current == $this->last && ($this->StringAt($this->original, $this->current - 3, 3, ['IAU', 'EAU']) || $this->StringAt($this->original, $this->current - 2, 2, ['AU', 'OU'])))) {
734  $this->primary .= 'KS';
735  $this->secondary .= 'KS';
736  }
737  if ($this->StringAt($this->original, $this->current + 1, 1, ['C', 'X'])) {
738  $this->current += 2;
739  } else {
740  $this->current += 1;
741  }
742  break;
743  case 'Z':
744  // chinese pinyin e.g. 'zhao'
745  if (substr($this->original, $this->current + 1, 1) === 'H') {
746  $this->primary .= 'J';
747  $this->secondary .= 'J';
748  $this->current += 2;
749  break;
750  }
751  if ($this->StringAt($this->original, $this->current + 1, 2, ['ZO', 'ZI', 'ZA']) || $this->SlavoGermanic($this->original) && ($this->current > 0 && substr($this->original, $this->current - 1, 1) !== 'T')) {
752  $this->primary .= 'S';
753  $this->secondary .= 'TS';
754  } else {
755  $this->primary .= 'S';
756  $this->secondary .= 'S';
757  }
758  if (substr($this->original, $this->current + 1, 1) === 'Z') {
759  $this->current += 2;
760  } else {
761  $this->current += 1;
762  }
763  break;
764  default:
765  $this->current += 1;
766  }
767  }
768  // end while
769  $this->primary = substr($this->primary, 0, 4);
770  $this->secondary = substr($this->secondary, 0, 4);
771  $result['primary'] = $this->primary;
772  $result['secondary'] = $this->secondary;
773  return $result;
774  }
775 
776  // end of function MetaPhone
777  // Private methods
787  public function StringAt($string, $start, $length, $list)
788  {
789  if ($start < 0 || $start >= strlen($string)) {
790  return 0;
791  }
792  $listCount = count($list);
793  for ($i = 0; $i < $listCount; $i++) {
794  if ($list[$i] == substr($string, $start, $length)) {
795  return 1;
796  }
797  }
798  return 0;
799  }
800 
808  public function IsVowel($string, $pos)
809  {
810  return preg_match('/[AEIOUY]/', substr($string, $pos, 1));
811  }
812 
819  public function SlavoGermanic($string)
820  {
821  return preg_match('/W|K|CZ|WITZ/', $string);
822  }
823 }