TYPO3 CMS  TYPO3_8-7
HtmlParserTest.php
Go to the documentation of this file.
1 <?php
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
18 
22 class HtmlParserTest extends \TYPO3\TestingFramework\Core\Unit\UnitTestCase
23 {
27  protected $subject = null;
28 
29  protected function setUp()
30  {
31  $this->subject = new HtmlParser();
32  }
33 
38  {
39  return [
40  'single-line CDATA' => [
41  '/*<![CDATA[*/ <hello world> /*]]>*/',
42  '/*<![CDATA[*/ <hello world> /*]]>*/',
43  ],
44  'multi-line CDATA #1' => [
45  '/*<![CDATA[*/' . LF . '<hello world> /*]]>*/',
46  '/*<![CDATA[*/' . LF . '<hello world> /*]]>*/',
47  ],
48  'multi-line CDATA #2' => [
49  '/*<![CDATA[*/ <hello world>' . LF . '/*]]>*/',
50  '/*<![CDATA[*/ <hello world>' . LF . '/*]]>*/',
51  ],
52  'multi-line CDATA #3' => [
53  '/*<![CDATA[*/' . LF . '<hello world>' . LF . '/*]]>*/',
54  '/*<![CDATA[*/' . LF . '<hello world>' . LF . '/*]]>*/',
55  ],
56  ];
57  }
58 
64  public function splitIntoBlockDataProvider()
65  {
66  return [
67  'splitBlock' => [
68  'h1,span',
69  '<body><h1>Title</h1><span>Note</span></body>',
70  false,
71  ['<body>',
72  '<h1>Title</h1>',
73  '',
74  '<span>Note</span>',
75  '</body>']
76  ],
77  'splitBlock br' => [
78  'h1,span',
79  '<body><h1>Title</h1><br /><span>Note</span><br /></body>',
80  false,
81  ['<body>',
82  '<h1>Title</h1>',
83  '<br />',
84  '<span>Note</span>',
85  '<br /></body>']
86  ],
87  'splitBlock with attribute' => [
88  'h1,span',
89  '<body><h1 class="title">Title</h1><span>Note</span></body>',
90  false,
91  ['<body>',
92  '<h1 class="title">Title</h1>',
93  '',
94  '<span>Note</span>',
95  '</body>']
96  ],
97  'splitBlock span with attribute' => [
98  'span',
99  '<body><h1>Title</h1><span class="title">Note</span></body>',
100  false,
101  ['<body><h1>Title</h1>',
102  '<span class="title">Note</span>',
103  '</body>']
104  ],
105  'splitBlock without extra end tags' => [
106  'h1,span,div',
107  '<body><h1>Title</h1><span>Note</span></body></div>',
108  true,
109  ['<body>',
110  '<h1>Title</h1>',
111  '',
112  '<span>Note</span>',
113  '</body>']
114  ],
115  ];
116  }
117 
126  public function splitIntoBlock($tag, $content, $eliminateExtraEndTags, $expected)
127  {
128  $this->assertSame($expected, $this->subject->splitIntoBlock($tag, $content, $eliminateExtraEndTags));
129  }
130 
137  public function xHtmlCleaningDoesNotModifyCDATA($source, $expected)
138  {
139  $result = $this->subject->HTMLcleaner($source, [], 1);
140  $this->assertSame($expected, $result);
141  }
142 
147  {
148  return [
149  'Span tag with no attrib' => [
150  '<span>text</span>',
151  'text'
152  ],
153  'Span tag with allowed id attrib' => [
154  '<span id="id">text</span>',
155  '<span id="id">text</span>'
156  ],
157  'Span tag with disallowed style attrib' => [
158  '<span style="line-height: 12px;">text</span>',
159  'text'
160  ]
161  ];
162  }
163 
170  public function tagCorrectlyRemovedWhenRmTagIfNoAttribIsConfigured($content, $expectedResult)
171  {
172  $tsConfig = [
173  'allowTags' => 'span',
174  'tags.' => [
175  'span.' => [
176  'allowedAttribs' => 'id',
177  'rmTagIfNoAttrib' => 1
178  ]
179  ]
180  ];
181  $this->assertEquals($expectedResult, $this->parseConfigAndCleanHtml($tsConfig, $content));
182  }
183 
188  {
189  $tsConfig = [
190  'allowTags' => 'div,span',
191  'rmTagIfNoAttrib' => 'span',
192  'globalNesting' => 'div,span'
193  ];
194  $content = '<span></span><span id="test"><div></span></div>';
195  $expectedResult = '<span id="test"></span>';
196  $this->assertEquals($expectedResult, $this->parseConfigAndCleanHtml($tsConfig, $content));
197  }
198 
205  {
206  return [
207  'Valid nesting is untouched' => [
208  '<B><I></B></I>',
209  '<B><I></B></I>'
210  ],
211  'Valid nesting with content is untouched' => [
212  'testa<B>test1<I>test2</B>test3</I>testb',
213  'testa<B>test1<I>test2</B>test3</I>testb'
214  ],
215  'Superflous tags are removed' => [
216  '</B><B><I></B></I></B>',
217  '<B><I></B></I>'
218  ],
219  'Superflous tags with content are removed' => [
220  'test1</B>test2<B>test3<I>test4</B>test5</I>test6</B>test7',
221  'test1test2<B>test3<I>test4</B>test5</I>test6test7'
222  ],
223  'Another valid nesting test' => [
224  '<span><div></span></div>',
225  '<span><div></span></div>',
226  ],
227  ];
228  }
229 
236  public function localNestingCorrectlyRemovesInvalidTags($content, $expectedResult)
237  {
238  $tsConfig = [
239  'allowTags' => 'div,span,b,i',
240  'localNesting' => 'div,span,b,i',
241  ];
242  $this->assertEquals($expectedResult, $this->parseConfigAndCleanHtml($tsConfig, $content));
243  }
244 
251  {
252  return [
253  'Valid nesting is untouched' => [
254  '<B><I></I></B>',
255  '<B><I></I></B>'
256  ],
257  'Valid nesting with content is untouched' => [
258  'testa<B>test1<I>test2</I>test3</B>testb',
259  'testa<B>test1<I>test2</I>test3</B>testb'
260  ],
261  'Invalid nesting is cleaned' => [
262  '</B><B><I></B></I></B>',
263  '<B></B>'
264  ],
265  'Invalid nesting with content is cleaned' => [
266  'test1</B>test2<B>test3<I>test4</B>test5</I>test6</B>test7',
267  'test1test2<B>test3test4</B>test5test6test7'
268  ],
269  'Another invalid nesting test' => [
270  '<span><div></span></div>',
271  '<span></span>',
272  ],
273  ];
274  }
275 
282  public function globalNestingCorrectlyRemovesInvalidTags($content, $expectedResult)
283  {
284  $tsConfig = [
285  'allowTags' => 'span,div,b,i',
286  'globalNesting' => 'span,div,b,i',
287  ];
288  $this->assertEquals($expectedResult, $this->parseConfigAndCleanHtml($tsConfig, $content));
289  }
290 
294  public function emptyTagsDataProvider()
295  {
296  return [
297  [0, null, false, '<h1></h1>', '<h1></h1>'],
298  [1, null, false, '<h1></h1>', ''],
299  [1, null, false, '<h1>hallo</h1>', '<h1>hallo</h1>'],
300  [1, null, false, '<h1 class="something"></h1>', ''],
301  [1, null, false, '<h1 class="something"></h1><h2></h2>', ''],
302  [1, 'h2', false, '<h1 class="something"></h1><h2></h2>', '<h1 class="something"></h1>'],
303  [1, 'h2, h1', false, '<h1 class="something"></h1><h2></h2>', ''],
304  [1, null, false, '<div><p></p></div>', ''],
305  [1, null, false, '<div><p>&nbsp;</p></div>', '<div><p>&nbsp;</p></div>'],
306  [1, null, true, '<div><p>&nbsp;&nbsp;</p></div>', ''],
307  [1, null, true, '<div>&nbsp;&nbsp;<p></p></div>', ''],
308  [1, null, false, '<div>Some content<p></p></div>', '<div>Some content</div>'],
309  [1, null, true, '<div>Some content<p></p></div>', '<div>Some content</div>'],
310  [1, null, false, '<div>Some content</div>', '<div>Some content</div>'],
311  [1, null, true, '<div>Some content</div>', '<div>Some content</div>'],
312  [1, null, false, '<a href="#skiplinks">Skiplinks </a><b></b>', '<a href="#skiplinks">Skiplinks </a>'],
313  [1, null, true, '<a href="#skiplinks">Skiplinks </a><b></b>', '<a href="#skiplinks">Skiplinks </a>'],
314  [0, '', false, '<h1></h1>', '<h1></h1>'],
315  [1, '', false, '<h1></h1>', ''],
316  [1, '', false, '<h1>hallo</h1>', '<h1>hallo</h1>'],
317  [1, '', false, '<h1 class="something"></h1>', ''],
318  [1, '', false, '<h1 class="something"></h1><h2></h2>', ''],
319  [1, '', false, '<div><p></p></div>', ''],
320  [1, '', false, '<div><p>&nbsp;</p></div>', '<div><p>&nbsp;</p></div>'],
321  [1, '', true, '<div><p>&nbsp;&nbsp;</p></div>', ''],
322  [1, '', true, '<div>&nbsp;&nbsp;<p></p></div>', ''],
323  [1, '', false, '<div>Some content<p></p></div>', '<div>Some content</div>'],
324  [1, '', true, '<div>Some content<p></p></div>', '<div>Some content</div>'],
325  [1, '', false, '<div>Some content</div>', '<div>Some content</div>'],
326  [1, '', true, '<div>Some content</div>', '<div>Some content</div>'],
327  [1, '', false, '<a href="#skiplinks">Skiplinks </a><b></b>', '<a href="#skiplinks">Skiplinks </a>'],
328  [1, '', true, '<a href="#skiplinks">Skiplinks </a><b></b>', '<a href="#skiplinks">Skiplinks </a>'],
329  ];
330  }
331 
341  public function stripEmptyTags($stripOn, $tagList, $treatNonBreakingSpaceAsEmpty, $content, $expectedResult)
342  {
343  $tsConfig = [
344  'keepNonMatchedTags' => 1,
345  'stripEmptyTags' => $stripOn,
346  'stripEmptyTags.' => [
347  'tags' => $tagList,
348  'treatNonBreakingSpaceAsEmpty' => $treatNonBreakingSpaceAsEmpty
349  ],
350  ];
351 
352  $result = $this->parseConfigAndCleanHtml($tsConfig, $content);
353  $this->assertEquals($expectedResult, $result);
354  }
355 
360  {
361  return [
362  [
363  'tr,td',
364  false,
365  '<div><p><tr><td></td></tr></p></div><div class="test"></div><tr></tr><p></p><td></td><i></i>',
366  '<div><p><tr><td></td></tr></p></div><tr></tr><td></td>'
367  ],
368  [
369  'tr,td',
370  true,
371  '<div><p><tr><td></td></tr></p></div><p class="test"> &nbsp; </p><tr></tr><p></p><td></td><i></i>',
372  '<div><p><tr><td></td></tr></p></div><tr></tr><td></td>'
373  ],
374  ];
375  }
376 
385  public function stripEmptyTagsKeepsConfiguredTags($tagList, $treatNonBreakingSpaceAsEmpty, $content, $expectedResult)
386  {
387  $tsConfig = [
388  'keepNonMatchedTags' => 1,
389  'stripEmptyTags' => 1,
390  'stripEmptyTags.' => [
391  'keepTags' => $tagList,
392  'treatNonBreakingSpaceAsEmpty' => $treatNonBreakingSpaceAsEmpty
393  ],
394  ];
395 
396  $result = $this->parseConfigAndCleanHtml($tsConfig, $content);
397  $this->assertEquals($expectedResult, $result);
398  }
399 
407  protected function parseConfigAndCleanHtml(array $tsConfig, $content)
408  {
409  $config = $this->subject->HTMLparserConfig($tsConfig);
410  return $this->subject->HTMLcleaner($content, $config[0], $config[1], $config[2], $config[3]);
411  }
412 
418  public function getFirstTagDataProvider()
419  {
420  return [
421  ['<body><span></span></body>', '<body>'],
422  ['<span>Wrapper<div>Some content</div></span>', '<span>'],
423  ['Something before<span>Wrapper<div>Some content</div></span>Something after', 'Something before<span>'],
424  ['Something without tag', '']
425  ];
426  }
427 
438  public function getFirstTag($str, $expected)
439  {
440  $this->assertEquals($expected, $this->subject->getFirstTag($str));
441  }
442 
448  public function getFirstTagNameDataProvider()
449  {
450  return [
451  ['<body><span></span></body>',
452  false,
453  'BODY'],
454  ['<body><span></span></body>',
455  true,
456  'body'],
457  ['<div class="test"><span></span></div>',
458  false,
459  'DIV'],
460  ['<div><span class="test"></span></div>',
461  false,
462  'DIV'],
463  ['<br /><span class="test"></span>',
464  false,
465  'BR'],
466  ['<img src="test.jpg" />',
467  false,
468  'IMG'],
469  ];
470  }
471 
482  public function getFirstTagName($str, $preserveCase, $expected)
483  {
484  $this->assertEquals($expected, $this->subject->getFirstTagName($str, $preserveCase));
485  }
486 
491  {
492  return [
493  ['<span>Wrapper<div>Some content</div></span>', 'Wrapper<div>Some content</div>'],
494  ['<td><tr>Some content</tr></td>', '<tr>Some content</tr>'],
495  ['Something before<span>Wrapper<div>Some content</div></span>Something after', 'Wrapper<div>Some content</div>'],
496  ['<span class="hidden">Wrapper<div>Some content</div></span>', 'Wrapper<div>Some content</div>'],
497  ['<span>Wrapper<div class="hidden">Some content</div></span>', 'Wrapper<div class="hidden">Some content</div>'],
498  ['Some stuff before <span>Wrapper<div class="hidden">Some content</div></span> and after', 'Wrapper<div class="hidden">Some content</div>'],
499  ];
500  }
501 
511  public function removeFirstAndLastTag($str, $expectedResult)
512  {
513  $this->assertEquals($expectedResult, $this->subject->removeFirstAndLastTag($str));
514  }
515 
520  {
521  return [
522  [
523  '<a href="" data-shortCut="DXB" required>',
524  [
525  ['href' => '', 'data-shortcut' => 'DXB', 'required' => ''],
526  ['href' => ['origTag' => 'href', 'dashType' => '"'], 'data-shortcut' => ['origTag' => 'data-shortCut', 'dashType' => '"'], 'required' => ['origTag' => 'required']]
527  ]
528  ],
529  [
530  '<ul STYLE=\'background-image: (url: "fra.png")\' data-shortcut=FRA>',
531  [
532  ['style' => 'background-image: (url: "fra.png")', 'data-shortcut' => 'FRA'],
533  ['style' => ['origTag' => 'STYLE', 'dashType' => '\''], 'data-shortcut' => ['origTag' => 'data-shortcut', 'dashType' => '']]
534  ]
535  ]
536 
537  ];
538  }
539 
549  public function getTagAttributes($tag, $expectedResult)
550  {
551  $this->assertEquals($expectedResult, $this->subject->get_tag_attributes($tag));
552  }
553 
557  public function stripEmptyTagsDataProvider()
558  {
559  return [
560  // Testing wrongly encapsulated and upper/lowercase tags
561  [
562  '<div>Denpassar</div><p> Bali</P><p></p><P></p><ul><li></li></ul>',
563  '',
564  false,
565  '<div>Denpassar</div><p> Bali</P>'
566  ],
567  // Testing incomplete tags
568  [
569  '<p><div>Klungklung</div></p><p> Semarapura<p></p><p></p><ul><li></li></ul>',
570  '',
571  false,
572  '<p><div>Klungklung</div></p><p> Semarapura'
573  ],
574  // Testing third parameter (break spaces
575  [
576  '<p><div>Badung</div></p><ul> Mangupura<p></p><p></p><ul><li>&nbsp;</li><li>Uluwatu</li></ul>',
577  '',
578  true,
579  '<p><div>Badung</div></p><ul> Mangupura<ul><li>Uluwatu</li></ul>'
580  ],
581  // Testing fourth parameter (keeping empty other tags, keeping defined used tags)
582  [
583  '<p><div>Badung</div></p><ul> Mangupura<p></p><p></p><ul><li></li></ul>',
584  'p,div',
585  true,
586  '<p><div>Badung</div></p><ul> Mangupura<ul><li></li></ul>'
587  ],
588 
589  ];
590  }
591 
603  public function rawStripEmptyTagsTest($content, $tagList, $treatNonBreakingSpaceAsEmpty, $expectedResult)
604  {
605  $this->assertEquals($expectedResult, $this->subject->stripEmptyTags($content, $tagList, $treatNonBreakingSpaceAsEmpty));
606  }
607 }
tagCorrectlyRemovedWhenRmTagIfNoAttribIsConfigured($content, $expectedResult)
rawStripEmptyTagsTest($content, $tagList, $treatNonBreakingSpaceAsEmpty, $expectedResult)
stripEmptyTags($stripOn, $tagList, $treatNonBreakingSpaceAsEmpty, $content, $expectedResult)
globalNestingCorrectlyRemovesInvalidTags($content, $expectedResult)
getFirstTagName($str, $preserveCase, $expected)
stripEmptyTagsKeepsConfiguredTags($tagList, $treatNonBreakingSpaceAsEmpty, $content, $expectedResult)
parseConfigAndCleanHtml(array $tsConfig, $content)
splitIntoBlock($tag, $content, $eliminateExtraEndTags, $expected)
localNestingCorrectlyRemovesInvalidTags($content, $expectedResult)