TYPO3 CMS  TYPO3_7-6
DoubleMetaPhoneUtility.php
Go to the documentation of this file.
1 <?php
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
23 {
27  public $original = '';
28 
32  public $primary = '';
33 
37  public $secondary = '';
38 
42  public $length = 0;
43 
47  public $last = 0;
48 
52  public $current = 0;
53 
54  // methods
55  // TYPO3 specific API to this class. BEGIN
63  public function metaphone($string, $sys_language_uid = 0)
64  {
65  $res = $this->DoubleMetaPhone($string);
66  return $res['primary'];
67  }
68 
69  // TYPO3 specific API to this class. END
70  // Public method
77  public function DoubleMetaPhone($string)
78  {
79  $this->primary = '';
80  $this->secondary = '';
81  $this->current = 0;
82  $this->current = 0;
83  $this->length = strlen($string);
84  $this->last = $this->length - 1;
85  $this->original = $string . ' ';
86  $this->original = strtoupper($this->original);
87  // skip this at beginning of word
88  if ($this->StringAt($this->original, 0, 2, ['GN', 'KN', 'PN', 'WR', 'PS'])) {
89  $this->current++;
90  }
91  // Initial 'X' is pronounced 'Z' e.g. 'Xavier'
92  if ($this->original[0] === 'X') {
93  $this->primary .= 'S';
94  // 'Z' maps to 'S'
95  $this->secondary .= 'S';
96  $this->current++;
97  }
98  // main loop
99  while (strlen($this->primary) < 4 || strlen($this->secondary < 4)) {
100  if ($this->current >= $this->length) {
101  break;
102  }
103  switch (substr($this->original, $this->current, 1)) {
104  case 'A':
105 
106  case 'E':
107 
108  case 'I':
109 
110  case 'O':
111 
112  case 'U':
113 
114  case 'Y':
115  if ($this->current == 0) {
116  // all init vowels now map to 'A'
117  $this->primary .= 'A';
118  $this->secondary .= 'A';
119  }
120  $this->current += 1;
121  break;
122  case 'B':
123  // '-mb', e.g. "dumb", already skipped over ...
124  $this->primary .= 'P';
125  $this->secondary .= 'P';
126  if (substr($this->original, $this->current + 1, 1) == 'B') {
127  $this->current += 2;
128  } else {
129  $this->current += 1;
130  }
131  break;
132  case 'Ç':
133  $this->primary .= 'S';
134  $this->secondary .= 'S';
135  $this->current += 1;
136  break;
137  case 'C':
138  // various gremanic
139  if ($this->current > 1 && !$this->IsVowel($this->original, ($this->current - 2)) && $this->StringAt($this->original, $this->current - 1, 3, ['ACH']) && (substr($this->original, $this->current + 2, 1) != 'I' && (substr($this->original, $this->current + 2, 1) != 'E' || $this->StringAt($this->original, $this->current - 2, 6, ['BACHER', 'MACHER'])))) {
140  $this->primary .= 'K';
141  $this->secondary .= 'K';
142  $this->current += 2;
143  break;
144  }
145  // special case 'caesar'
146  if ($this->current == 0 && $this->StringAt($this->original, $this->current, 6, ['CAESAR'])) {
147  $this->primary .= 'S';
148  $this->secondary .= 'S';
149  $this->current += 2;
150  break;
151  }
152  // italian 'chianti'
153  if ($this->StringAt($this->original, $this->current, 4, ['CHIA'])) {
154  $this->primary .= 'K';
155  $this->secondary .= 'K';
156  $this->current += 2;
157  break;
158  }
159  if ($this->StringAt($this->original, $this->current, 2, ['CH'])) {
160  // find 'michael'
161  if ($this->current > 0 && $this->StringAt($this->original, $this->current, 4, ['CHAE'])) {
162  $this->primary .= 'K';
163  $this->secondary .= 'X';
164  $this->current += 2;
165  break;
166  }
167  // greek roots e.g. 'chemistry', 'chorus'
168  if ($this->current == 0 && ($this->StringAt($this->original, $this->current + 1, 5, ['HARAC', 'HARIS']) || $this->StringAt($this->original, $this->current + 1, 3, ['HOR', 'HYM', 'HIA', 'HEM'])) && !$this->StringAt($this->original, 0, 5, ['CHORE'])) {
169  $this->primary .= 'K';
170  $this->secondary .= 'K';
171  $this->current += 2;
172  break;
173  }
174  // germanic, greek, or otherwise 'ch' for 'kh' sound
175  if ($this->StringAt($this->original, 0, 4, ['VAN ', 'VON ']) || $this->StringAt($this->original, 0, 3, ['SCH']) || $this->StringAt($this->original, $this->current - 2, 6, ['ORCHES', 'ARCHIT', 'ORCHID']) || $this->StringAt($this->original, $this->current + 2, 1, ['T', 'S']) || ($this->StringAt($this->original, $this->current - 1, 1, ['A', 'O', 'U', 'E']) || $this->current == 0) && $this->StringAt($this->original, $this->current + 2, 1, ['L', 'R', 'N', 'M', 'B', 'H', 'F', 'V', 'W', ' '])) {
176  $this->primary .= 'K';
177  $this->secondary .= 'K';
178  } else {
179  if ($this->current > 0) {
180  if ($this->StringAt($this->original, 0, 2, ['MC'])) {
181  // e.g. 'McHugh'
182  $this->primary .= 'K';
183  $this->secondary .= 'K';
184  } else {
185  $this->primary .= 'X';
186  $this->secondary .= 'K';
187  }
188  } else {
189  $this->primary .= 'X';
190  $this->secondary .= 'X';
191  }
192  }
193  $this->current += 2;
194  break;
195  }
196  // e.g. 'czerny'
197  if ($this->StringAt($this->original, $this->current, 2, ['CZ']) && !$this->StringAt($this->original, ($this->current - 2), 4, ['WICZ'])) {
198  $this->primary .= 'S';
199  $this->secondary .= 'X';
200  $this->current += 2;
201  break;
202  }
203  // e.g. 'focaccia'
204  if ($this->StringAt($this->original, $this->current + 1, 3, ['CIA'])) {
205  $this->primary .= 'X';
206  $this->secondary .= 'X';
207  $this->current += 3;
208  break;
209  }
210  // double 'C', but not McClellan'
211  if ($this->StringAt($this->original, $this->current, 2, ['CC']) && !($this->current == 1 && $this->original[0] === 'M')) {
212  // 'bellocchio' but not 'bacchus'
213  if ($this->StringAt($this->original, $this->current + 2, 1, ['I', 'E', 'H']) && !$this->StringAt($this->original, ($this->current + 2), 2, ['HU'])) {
214  // 'accident', 'accede', 'succeed'
215  if ($this->current == 1 && substr($this->original, $this->current - 1, 1) == 'A' || $this->StringAt($this->original, $this->current - 1, 5, ['UCCEE', 'UCCES'])) {
216  $this->primary .= 'KS';
217  $this->secondary .= 'KS';
218  } else {
219  $this->primary .= 'X';
220  $this->secondary .= 'X';
221  }
222  $this->current += 3;
223  break;
224  } else {
225  // Pierce's rule
226  $this->primary .= 'K';
227  $this->secondary .= 'K';
228  $this->current += 2;
229  break;
230  }
231  }
232  if ($this->StringAt($this->original, $this->current, 2, ['CK', 'CG', 'CQ'])) {
233  $this->primary .= 'K';
234  $this->secondary .= 'K';
235  $this->current += 2;
236  break;
237  }
238  if ($this->StringAt($this->original, $this->current, 2, ['CI', 'CE', 'CY'])) {
239  // italian vs. english
240  if ($this->StringAt($this->original, $this->current, 3, ['CIO', 'CIE', 'CIA'])) {
241  $this->primary .= 'S';
242  $this->secondary .= 'X';
243  } else {
244  $this->primary .= 'S';
245  $this->secondary .= 'S';
246  }
247  $this->current += 2;
248  break;
249  }
250  // else
251  $this->primary .= 'K';
252  $this->secondary .= 'K';
253  // name sent in 'mac caffrey', 'mac gregor'
254  if ($this->StringAt($this->original, $this->current + 1, 2, [' C', ' Q', ' G'])) {
255  $this->current += 3;
256  } else {
257  if ($this->StringAt($this->original, $this->current + 1, 1, ['C', 'K', 'Q']) && !$this->StringAt($this->original, ($this->current + 1), 2, ['CE', 'CI'])) {
258  $this->current += 2;
259  } else {
260  $this->current += 1;
261  }
262  }
263  break;
264  case 'D':
265  if ($this->StringAt($this->original, $this->current, 2, ['DG'])) {
266  if ($this->StringAt($this->original, $this->current + 2, 1, ['I', 'E', 'Y'])) {
267  // e.g. 'edge'
268  $this->primary .= 'J';
269  $this->secondary .= 'J';
270  $this->current += 3;
271  break;
272  } else {
273  // e.g. 'edgar'
274  $this->primary .= 'TK';
275  $this->secondary .= 'TK';
276  $this->current += 2;
277  break;
278  }
279  }
280  if ($this->StringAt($this->original, $this->current, 2, ['DT', 'DD'])) {
281  $this->primary .= 'T';
282  $this->secondary .= 'T';
283  $this->current += 2;
284  break;
285  }
286  // else
287  $this->primary .= 'T';
288  $this->secondary .= 'T';
289  $this->current += 1;
290  break;
291  case 'F':
292  if (substr($this->original, $this->current + 1, 1) == 'F') {
293  $this->current += 2;
294  } else {
295  $this->current += 1;
296  }
297  $this->primary .= 'F';
298  $this->secondary .= 'F';
299  break;
300  case 'G':
301  if (substr($this->original, $this->current + 1, 1) == 'H') {
302  if ($this->current > 0 && !$this->IsVowel($this->original, ($this->current - 1))) {
303  $this->primary .= 'K';
304  $this->secondary .= 'K';
305  $this->current += 2;
306  break;
307  }
308  if ($this->current < 3) {
309  // 'ghislane', 'ghiradelli'
310  if ($this->current == 0) {
311  if (substr($this->original, $this->current + 2, 1) == 'I') {
312  $this->primary .= 'J';
313  $this->secondary .= 'J';
314  } else {
315  $this->primary .= 'K';
316  $this->secondary .= 'K';
317  }
318  $this->current += 2;
319  break;
320  }
321  }
322  // Parker's rule (with some further refinements) - e.g. 'hugh'
323  if ($this->current > 1 && $this->StringAt($this->original, $this->current - 2, 1, ['B', 'H', 'D']) || $this->current > 2 && $this->StringAt($this->original, $this->current - 3, 1, ['B', 'H', 'D']) || $this->current > 3 && $this->StringAt($this->original, $this->current - 4, 1, ['B', 'H'])) {
324  $this->current += 2;
325  break;
326  } else {
327  // e.g. 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
328  if ($this->current > 2 && substr($this->original, $this->current - 1, 1) == 'U' && $this->StringAt($this->original, $this->current - 3, 1, ['C', 'G', 'L', 'R', 'T'])) {
329  $this->primary .= 'F';
330  $this->secondary .= 'F';
331  } elseif ($this->current > 0 && substr($this->original, $this->current - 1, 1) != 'I') {
332  $this->primary .= 'K';
333  $this->secondary .= 'K';
334  }
335  $this->current += 2;
336  break;
337  }
338  }
339  if (substr($this->original, $this->current + 1, 1) == 'N') {
340  if ($this->current == 1 && $this->IsVowel($this->original, 0) && !$this->SlavoGermanic($this->original)) {
341  $this->primary .= 'KN';
342  $this->secondary .= 'N';
343  } else {
344  // not e.g. 'cagney'
345  if (!$this->StringAt($this->original, ($this->current + 2), 2, ['EY']) && substr($this->original, $this->current + 1) != 'Y' && !$this->SlavoGermanic($this->original)) {
346  $this->primary .= 'N';
347  $this->secondary .= 'KN';
348  } else {
349  $this->primary .= 'KN';
350  $this->secondary .= 'KN';
351  }
352  }
353  $this->current += 2;
354  break;
355  }
356  // 'tagliaro'
357  if ($this->StringAt($this->original, $this->current + 1, 2, ['LI']) && !$this->SlavoGermanic($this->original)) {
358  $this->primary .= 'KL';
359  $this->secondary .= 'L';
360  $this->current += 2;
361  break;
362  }
363  // -ges-, -gep-, -gel- at beginning
364  if ($this->current == 0 && (substr($this->original, $this->current + 1, 1) == 'Y' || $this->StringAt($this->original, $this->current + 1, 2, [
365  'ES',
366  'EP',
367  'EB',
368  'EL',
369  'EY',
370  'IB',
371  'IL',
372  'IN',
373  'IE',
374  'EI',
375  'ER'
376  ]))) {
377  $this->primary .= 'K';
378  $this->secondary .= 'J';
379  $this->current += 2;
380  break;
381  }
382  // -ger-, -gy-
383  if (($this->StringAt($this->original, $this->current + 1, 2, ['ER']) || substr($this->original, $this->current + 1, 1) == 'Y') && !$this->StringAt($this->original, 0, 6, ['DANGER', 'RANGER', 'MANGER']) && !$this->StringAt($this->original, ($this->current - 1), 1, ['E', 'I']) && !$this->StringAt($this->original, ($this->current - 1), 3, ['RGY', 'OGY'])) {
384  $this->primary .= 'K';
385  $this->secondary .= 'J';
386  $this->current += 2;
387  break;
388  }
389  // italian e.g. 'biaggi'
390  if ($this->StringAt($this->original, $this->current + 1, 1, ['E', 'I', 'Y']) || $this->StringAt($this->original, $this->current - 1, 4, ['AGGI', 'OGGI'])) {
391  // obvious germanic
392  if ($this->StringAt($this->original, 0, 4, ['VAN ', 'VON ']) || $this->StringAt($this->original, 0, 3, ['SCH']) || $this->StringAt($this->original, $this->current + 1, 2, ['ET'])) {
393  $this->primary .= 'K';
394  $this->secondary .= 'K';
395  } else {
396  // always soft if french ending
397  if ($this->StringAt($this->original, $this->current + 1, 4, ['IER '])) {
398  $this->primary .= 'J';
399  $this->secondary .= 'J';
400  } else {
401  $this->primary .= 'J';
402  $this->secondary .= 'K';
403  }
404  }
405  $this->current += 2;
406  break;
407  }
408  if (substr($this->original, $this->current + 1, 1) == 'G') {
409  $this->current += 2;
410  } else {
411  $this->current += 1;
412  }
413  $this->primary .= 'K';
414  $this->secondary .= 'K';
415  break;
416  case 'H':
417  // only keep if first & before vowel or btw. 2 vowels
418  if (($this->current == 0 || $this->IsVowel($this->original, $this->current - 1)) && $this->IsVowel($this->original, $this->current + 1)) {
419  $this->primary .= 'H';
420  $this->secondary .= 'H';
421  $this->current += 2;
422  } else {
423  $this->current += 1;
424  }
425  break;
426  case 'J':
427  // obvious spanish, 'jose', 'san jacinto'
428  if ($this->StringAt($this->original, $this->current, 4, ['JOSE']) || $this->StringAt($this->original, 0, 4, ['SAN '])) {
429  if ($this->current == 0 && substr($this->original, $this->current + 4, 1) == ' ' || $this->StringAt($this->original, 0, 4, ['SAN '])) {
430  $this->primary .= 'H';
431  $this->secondary .= 'H';
432  } else {
433  $this->primary .= 'J';
434  $this->secondary .= 'H';
435  }
436  $this->current += 1;
437  break;
438  }
439  if ($this->current == 0 && !$this->StringAt($this->original, $this->current, 4, ['JOSE'])) {
440  $this->primary .= 'J';
441  // Yankelovich/Jankelowicz
442  $this->secondary .= 'A';
443  } else {
444  // spanish pron. of .e.g. 'bajador'
445  if ($this->IsVowel($this->original, $this->current - 1) && !$this->SlavoGermanic($this->original) && (substr($this->original, $this->current + 1, 1) == 'A' || substr($this->original, $this->current + 1, 1) == 'O')) {
446  $this->primary .= 'J';
447  $this->secondary .= 'H';
448  } else {
449  if ($this->current == $this->last) {
450  $this->primary .= 'J';
451  $this->secondary .= '';
452  } else {
453  if (!$this->StringAt($this->original, ($this->current + 1), 1, ['L', 'T', 'K', 'S', 'N', 'M', 'B', 'Z']) && !$this->StringAt($this->original, ($this->current - 1), 1, ['S', 'K', 'L'])) {
454  $this->primary .= 'J';
455  $this->secondary .= 'J';
456  }
457  }
458  }
459  }
460  if (substr($this->original, $this->current + 1, 1) == 'J') {
461  // it could happen
462  $this->current += 2;
463  } else {
464  $this->current += 1;
465  }
466  break;
467  case 'K':
468  if (substr($this->original, $this->current + 1, 1) == 'K') {
469  $this->current += 2;
470  } else {
471  $this->current += 1;
472  }
473  $this->primary .= 'K';
474  $this->secondary .= 'K';
475  break;
476  case 'L':
477  if (substr($this->original, $this->current + 1, 1) == 'L') {
478  // spanish e.g. 'cabrillo', 'gallegos'
479  if ($this->current == $this->length - 3 && $this->StringAt($this->original, $this->current - 1, 4, ['ILLO', 'ILLA', 'ALLE']) || ($this->StringAt($this->original, $this->last - 1, 2, ['AS', 'OS']) || $this->StringAt($this->original, $this->last, 1, ['A', 'O'])) && $this->StringAt($this->original, $this->current - 1, 4, ['ALLE'])) {
480  $this->primary .= 'L';
481  $this->secondary .= '';
482  $this->current += 2;
483  break;
484  }
485  $this->current += 2;
486  } else {
487  $this->current += 1;
488  }
489  $this->primary .= 'L';
490  $this->secondary .= 'L';
491  break;
492  case 'M':
493  if ($this->StringAt($this->original, $this->current - 1, 3, ['UMB']) && ($this->current + 1 == $this->last || $this->StringAt($this->original, $this->current + 2, 2, ['ER'])) || substr($this->original, $this->current + 1, 1) == 'M') {
494  $this->current += 2;
495  } else {
496  $this->current += 1;
497  }
498  $this->primary .= 'M';
499  $this->secondary .= 'M';
500  break;
501  case 'N':
502  if (substr($this->original, $this->current + 1, 1) == 'N') {
503  $this->current += 2;
504  } else {
505  $this->current += 1;
506  }
507  $this->primary .= 'N';
508  $this->secondary .= 'N';
509  break;
510  case 'Ñ':
511  $this->current += 1;
512  $this->primary .= 'N';
513  $this->secondary .= 'N';
514  break;
515  case 'P':
516  if (substr($this->original, $this->current + 1, 1) == 'H') {
517  $this->current += 2;
518  $this->primary .= 'F';
519  $this->secondary .= 'F';
520  break;
521  }
522  // also account for "campbell" and "raspberry"
523  if ($this->StringAt($this->original, $this->current + 1, 1, ['P', 'B'])) {
524  $this->current += 2;
525  } else {
526  $this->current += 1;
527  }
528  $this->primary .= 'P';
529  $this->secondary .= 'P';
530  break;
531  case 'Q':
532  if (substr($this->original, $this->current + 1, 1) == 'Q') {
533  $this->current += 2;
534  } else {
535  $this->current += 1;
536  }
537  $this->primary .= 'K';
538  $this->secondary .= 'K';
539  break;
540  case 'R':
541  // french e.g. 'rogier', but exclude 'hochmeier'
542  if ($this->current == $this->last && !$this->SlavoGermanic($this->original) && $this->StringAt($this->original, $this->current - 2, 2, ['IE']) && !$this->StringAt($this->original, ($this->current - 4), 2, ['ME', 'MA'])) {
543  $this->primary .= '';
544  $this->secondary .= 'R';
545  } else {
546  $this->primary .= 'R';
547  $this->secondary .= 'R';
548  }
549  if (substr($this->original, $this->current + 1, 1) == 'R') {
550  $this->current += 2;
551  } else {
552  $this->current += 1;
553  }
554  break;
555  case 'S':
556  // special cases 'island', 'isle', 'carlisle', 'carlysle'
557  if ($this->StringAt($this->original, $this->current - 1, 3, ['ISL', 'YSL'])) {
558  $this->current += 1;
559  break;
560  }
561  // special case 'sugar-'
562  if ($this->current == 0 && $this->StringAt($this->original, $this->current, 5, ['SUGAR'])) {
563  $this->primary .= 'X';
564  $this->secondary .= 'S';
565  $this->current += 1;
566  break;
567  }
568  if ($this->StringAt($this->original, $this->current, 2, ['SH'])) {
569  // germanic
570  if ($this->StringAt($this->original, $this->current + 1, 4, ['HEIM', 'HOEK', 'HOLM', 'HOLZ'])) {
571  $this->primary .= 'S';
572  $this->secondary .= 'S';
573  } else {
574  $this->primary .= 'X';
575  $this->secondary .= 'X';
576  }
577  $this->current += 2;
578  break;
579  }
580  // italian & armenian
581  if ($this->StringAt($this->original, $this->current, 3, ['SIO', 'SIA']) || $this->StringAt($this->original, $this->current, 4, ['SIAN'])) {
582  if (!$this->SlavoGermanic($this->original)) {
583  $this->primary .= 'S';
584  $this->secondary .= 'X';
585  } else {
586  $this->primary .= 'S';
587  $this->secondary .= 'S';
588  }
589  $this->current += 3;
590  break;
591  }
592  // german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
593  // also, -sz- in slavic language altho in hungarian it is pronounced 's'
594  if ($this->current == 0 && $this->StringAt($this->original, $this->current + 1, 1, ['M', 'N', 'L', 'W']) || $this->StringAt($this->original, $this->current + 1, 1, ['Z'])) {
595  $this->primary .= 'S';
596  $this->secondary .= 'X';
597  if ($this->StringAt($this->original, $this->current + 1, 1, ['Z'])) {
598  $this->current += 2;
599  } else {
600  $this->current += 1;
601  }
602  break;
603  }
604  if ($this->StringAt($this->original, $this->current, 2, ['SC'])) {
605  // Schlesinger's rule
606  if (substr($this->original, $this->current + 2, 1) == 'H') {
607  // dutch origin, e.g. 'school', 'schooner'
608  if ($this->StringAt($this->original, $this->current + 3, 2, ['OO', 'ER', 'EN', 'UY', 'ED', 'EM'])) {
609  // 'schermerhorn', 'schenker'
610  if ($this->StringAt($this->original, $this->current + 3, 2, ['ER', 'EN'])) {
611  $this->primary .= 'X';
612  $this->secondary .= 'SK';
613  } else {
614  $this->primary .= 'SK';
615  $this->secondary .= 'SK';
616  }
617  $this->current += 3;
618  break;
619  } else {
620  if ($this->current == 0 && !$this->IsVowel($this->original, 3) && substr($this->original, $this->current + 3, 1) != 'W') {
621  $this->primary .= 'X';
622  $this->secondary .= 'S';
623  } else {
624  $this->primary .= 'X';
625  $this->secondary .= 'X';
626  }
627  $this->current += 3;
628  break;
629  }
630  }
631  if ($this->StringAt($this->original, $this->current + 2, 1, ['I', 'E', 'Y'])) {
632  $this->primary .= 'S';
633  $this->secondary .= 'S';
634  $this->current += 3;
635  break;
636  }
637  // else
638  $this->primary .= 'SK';
639  $this->secondary .= 'SK';
640  $this->current += 3;
641  break;
642  }
643  // french e.g. 'resnais', 'artois'
644  if ($this->current == $this->last && $this->StringAt($this->original, $this->current - 2, 2, ['AI', 'OI'])) {
645  $this->primary .= '';
646  $this->secondary .= 'S';
647  } else {
648  $this->primary .= 'S';
649  $this->secondary .= 'S';
650  }
651  if ($this->StringAt($this->original, $this->current + 1, 1, ['S', 'Z'])) {
652  $this->current += 2;
653  } else {
654  $this->current += 1;
655  }
656  break;
657  case 'T':
658  if ($this->StringAt($this->original, $this->current, 4, ['TION'])) {
659  $this->primary .= 'X';
660  $this->secondary .= 'X';
661  $this->current += 3;
662  break;
663  }
664  if ($this->StringAt($this->original, $this->current, 3, ['TIA', 'TCH'])) {
665  $this->primary .= 'X';
666  $this->secondary .= 'X';
667  $this->current += 3;
668  break;
669  }
670  if ($this->StringAt($this->original, $this->current, 2, ['TH']) || $this->StringAt($this->original, $this->current, 3, ['TTH'])) {
671  // special case 'thomas', 'thames' or germanic
672  if ($this->StringAt($this->original, $this->current + 2, 2, ['OM', 'AM']) || $this->StringAt($this->original, 0, 4, ['VAN ', 'VON ']) || $this->StringAt($this->original, 0, 3, ['SCH'])) {
673  $this->primary .= 'T';
674  $this->secondary .= 'T';
675  } else {
676  $this->primary .= '0';
677  $this->secondary .= 'T';
678  }
679  $this->current += 2;
680  break;
681  }
682  if ($this->StringAt($this->original, $this->current + 1, 1, ['T', 'D'])) {
683  $this->current += 2;
684  } else {
685  $this->current += 1;
686  }
687  $this->primary .= 'T';
688  $this->secondary .= 'T';
689  break;
690  case 'V':
691  if (substr($this->original, $this->current + 1, 1) == 'V') {
692  $this->current += 2;
693  } else {
694  $this->current += 1;
695  }
696  $this->primary .= 'F';
697  $this->secondary .= 'F';
698  break;
699  case 'W':
700  // can also be in middle of word
701  if ($this->StringAt($this->original, $this->current, 2, ['WR'])) {
702  $this->primary .= 'R';
703  $this->secondary .= 'R';
704  $this->current += 2;
705  break;
706  }
707  if ($this->current == 0 && ($this->IsVowel($this->original, $this->current + 1) || $this->StringAt($this->original, $this->current, 2, ['WH']))) {
708  // Wasserman should match Vasserman
709  if ($this->IsVowel($this->original, $this->current + 1)) {
710  $this->primary .= 'A';
711  $this->secondary .= 'F';
712  } else {
713  // need Uomo to match Womo
714  $this->primary .= 'A';
715  $this->secondary .= 'A';
716  }
717  }
718  // Arnow should match Arnoff
719  if ($this->current == $this->last && $this->IsVowel($this->original, $this->current - 1) || $this->StringAt($this->original, $this->current - 1, 5, ['EWSKI', 'EWSKY', 'OWSKI', 'OWSKY']) || $this->StringAt($this->original, 0, 3, ['SCH'])) {
720  $this->primary .= '';
721  $this->secondary .= 'F';
722  $this->current += 1;
723  break;
724  }
725  // polish e.g. 'filipowicz'
726  if ($this->StringAt($this->original, $this->current, 4, ['WICZ', 'WITZ'])) {
727  $this->primary .= 'TS';
728  $this->secondary .= 'FX';
729  $this->current += 4;
730  break;
731  }
732  // else skip it
733  $this->current += 1;
734  break;
735  case 'X':
736  // french e.g. breaux
737  if (!($this->current == $this->last && ($this->StringAt($this->original, $this->current - 3, 3, ['IAU', 'EAU']) || $this->StringAt($this->original, $this->current - 2, 2, ['AU', 'OU'])))) {
738  $this->primary .= 'KS';
739  $this->secondary .= 'KS';
740  }
741  if ($this->StringAt($this->original, $this->current + 1, 1, ['C', 'X'])) {
742  $this->current += 2;
743  } else {
744  $this->current += 1;
745  }
746  break;
747  case 'Z':
748  // chinese pinyin e.g. 'zhao'
749  if (substr($this->original, $this->current + 1, 1) == 'H') {
750  $this->primary .= 'J';
751  $this->secondary .= 'J';
752  $this->current += 2;
753  break;
754  } elseif ($this->StringAt($this->original, $this->current + 1, 2, ['ZO', 'ZI', 'ZA']) || $this->SlavoGermanic($this->original) && ($this->current > 0 && substr($this->original, $this->current - 1, 1) != 'T')) {
755  $this->primary .= 'S';
756  $this->secondary .= 'TS';
757  } else {
758  $this->primary .= 'S';
759  $this->secondary .= 'S';
760  }
761  if (substr($this->original, $this->current + 1, 1) == 'Z') {
762  $this->current += 2;
763  } else {
764  $this->current += 1;
765  }
766  break;
767  default:
768  $this->current += 1;
769  }
770  }
771  // end while
772  $this->primary = substr($this->primary, 0, 4);
773  $this->secondary = substr($this->secondary, 0, 4);
774  $result['primary'] = $this->primary;
775  $result['secondary'] = $this->secondary;
776  return $result;
777  }
778 
779  // end of function MetaPhone
780  // Private methods
790  public function StringAt($string, $start, $length, $list)
791  {
792  if ($start < 0 || $start >= strlen($string)) {
793  return 0;
794  }
795  $listCount = count($list);
796  for ($i = 0; $i < $listCount; $i++) {
797  if ($list[$i] == substr($string, $start, $length)) {
798  return 1;
799  }
800  }
801  return 0;
802  }
803 
811  public function IsVowel($string, $pos)
812  {
813  return preg_match('/[AEIOUY]/', substr($string, $pos, 1));
814  }
815 
822  public function SlavoGermanic($string)
823  {
824  return preg_match('/W|K|CZ|WITZ/', $string);
825  }
826 }