TYPO3 CMS  TYPO3_7-6
HtmlParserTest.php
Go to the documentation of this file.
1 <?php
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
18 
23 {
27  protected $subject = null;
28 
29  protected function setUp()
30  {
31  $this->subject = new HtmlParser();
32  }
33 
38  {
39  return [
40  'single-line CDATA' => [
41  '/*<![CDATA[*/ <hello world> /*]]>*/',
42  '/*<![CDATA[*/ <hello world> /*]]>*/',
43  ],
44  'multi-line CDATA #1' => [
45  '/*<![CDATA[*/' . LF . '<hello world> /*]]>*/',
46  '/*<![CDATA[*/' . LF . '<hello world> /*]]>*/',
47  ],
48  'multi-line CDATA #2' => [
49  '/*<![CDATA[*/ <hello world>' . LF . '/*]]>*/',
50  '/*<![CDATA[*/ <hello world>' . LF . '/*]]>*/',
51  ],
52  'multi-line CDATA #3' => [
53  '/*<![CDATA[*/' . LF . '<hello world>' . LF . '/*]]>*/',
54  '/*<![CDATA[*/' . LF . '<hello world>' . LF . '/*]]>*/',
55  ],
56  ];
57  }
58 
64  public function splitIntoBlockDataProvider()
65  {
66  return [
67  'splitBlock' => [
68  'h1,span',
69  '<body><h1>Title</h1><span>Note</span></body>',
70  false,
71  ['<body>',
72  '<h1>Title</h1>',
73  '',
74  '<span>Note</span>',
75  '</body>']
76  ],
77  'splitBlock br' => [
78  'h1,span',
79  '<body><h1>Title</h1><br /><span>Note</span><br /></body>',
80  false,
81  ['<body>',
82  '<h1>Title</h1>',
83  '<br />',
84  '<span>Note</span>',
85  '<br /></body>']
86  ],
87  'splitBlock with attribute' => [
88  'h1,span',
89  '<body><h1 class="title">Title</h1><span>Note</span></body>',
90  false,
91  ['<body>',
92  '<h1 class="title">Title</h1>',
93  '',
94  '<span>Note</span>',
95  '</body>']
96  ],
97  'splitBlock span with attribute' => [
98  'span',
99  '<body><h1>Title</h1><span class="title">Note</span></body>',
100  false,
101  ['<body><h1>Title</h1>',
102  '<span class="title">Note</span>',
103  '</body>']
104  ],
105  'splitBlock without extra end tags' => [
106  'h1,span,div',
107  '<body><h1>Title</h1><span>Note</span></body></div>',
108  true,
109  ['<body>',
110  '<h1>Title</h1>',
111  '',
112  '<span>Note</span>',
113  '</body>']
114  ],
115  ];
116  }
117 
126  public function splitIntoBlock($tag, $content, $eliminateExtraEndTags, $expected)
127  {
128  $this->assertSame($expected, $this->subject->splitIntoBlock($tag, $content, $eliminateExtraEndTags));
129  }
130 
137  public function xHtmlCleaningDoesNotModifyCDATA($source, $expected)
138  {
139  $result = $this->subject->XHTML_clean($source);
140  $this->assertSame($expected, $result);
141  }
142 
147  {
148  return [
149  'Span tag with no attrib' => [
150  '<span>text</span>',
151  'text'
152  ],
153  'Span tag with allowed id attrib' => [
154  '<span id="id">text</span>',
155  '<span id="id">text</span>'
156  ],
157  'Span tag with disallowed style attrib' => [
158  '<span style="line-height: 12px;">text</span>',
159  'text'
160  ]
161  ];
162  }
163 
170  public function tagCorrectlyRemovedWhenRmTagIfNoAttribIsConfigured($content, $expectedResult)
171  {
172  $tsConfig = [
173  'allowTags' => 'span',
174  'tags.' => [
175  'span.' => [
176  'allowedAttribs' => 'id',
177  'rmTagIfNoAttrib' => 1
178  ]
179  ]
180  ];
181  $this->assertEquals($expectedResult, $this->parseConfigAndCleanHtml($tsConfig, $content));
182  }
183 
188  {
189  $tsConfig = [
190  'allowTags' => 'div,span',
191  'rmTagIfNoAttrib' => 'span',
192  'globalNesting' => 'div,span'
193  ];
194  $content = '<span></span><span id="test"><div></span></div>';
195  $expectedResult = '<span id="test"></span>';
196  $this->assertEquals($expectedResult, $this->parseConfigAndCleanHtml($tsConfig, $content));
197  }
198 
205  {
206  return [
207  'Valid nesting is untouched' => [
208  '<B><I></B></I>',
209  '<B><I></B></I>'
210  ],
211  'Valid nesting with content is untouched' => [
212  'testa<B>test1<I>test2</B>test3</I>testb',
213  'testa<B>test1<I>test2</B>test3</I>testb'
214  ],
215  'Superflous tags are removed' => [
216  '</B><B><I></B></I></B>',
217  '<B><I></B></I>'
218  ],
219  'Superflous tags with content are removed' => [
220  'test1</B>test2<B>test3<I>test4</B>test5</I>test6</B>test7',
221  'test1test2<B>test3<I>test4</B>test5</I>test6test7'
222  ],
223  'Another valid nesting test' => [
224  '<span><div></span></div>',
225  '<span><div></span></div>',
226  ],
227  ];
228  }
229 
236  public function localNestingCorrectlyRemovesInvalidTags($content, $expectedResult)
237  {
238  $tsConfig = [
239  'allowTags' => 'div,span,b,i',
240  'localNesting' => 'div,span,b,i',
241  ];
242  $this->assertEquals($expectedResult, $this->parseConfigAndCleanHtml($tsConfig, $content));
243  }
244 
251  {
252  return [
253  'Valid nesting is untouched' => [
254  '<B><I></I></B>',
255  '<B><I></I></B>'
256  ],
257  'Valid nesting with content is untouched' => [
258  'testa<B>test1<I>test2</I>test3</B>testb',
259  'testa<B>test1<I>test2</I>test3</B>testb'
260  ],
261  'Invalid nesting is cleaned' => [
262  '</B><B><I></B></I></B>',
263  '<B></B>'
264  ],
265  'Invalid nesting with content is cleaned' => [
266  'test1</B>test2<B>test3<I>test4</B>test5</I>test6</B>test7',
267  'test1test2<B>test3test4</B>test5test6test7'
268  ],
269  'Another invalid nesting test' => [
270  '<span><div></span></div>',
271  '<span></span>',
272  ],
273  ];
274  }
275 
282  public function globalNestingCorrectlyRemovesInvalidTags($content, $expectedResult)
283  {
284  $tsConfig = [
285  'allowTags' => 'span,div,b,i',
286  'globalNesting' => 'span,div,b,i',
287  ];
288  $this->assertEquals($expectedResult, $this->parseConfigAndCleanHtml($tsConfig, $content));
289  }
290 
294  public function emptyTagsDataProvider()
295  {
296  return [
297  [0 , null, false, '<h1></h1>', '<h1></h1>'],
298  [1 , null, false, '<h1></h1>', ''],
299  [1 , null, false, '<h1>hallo</h1>', '<h1>hallo</h1>'],
300  [1 , null, false, '<h1 class="something"></h1>', ''],
301  [1 , null, false, '<h1 class="something"></h1><h2></h2>', ''],
302  [1 , 'h2', false, '<h1 class="something"></h1><h2></h2>', '<h1 class="something"></h1>'],
303  [1 , 'h2, h1', false, '<h1 class="something"></h1><h2></h2>', ''],
304  [1 , null, false, '<div><p></p></div>', ''],
305  [1 , null, false, '<div><p>&nbsp;</p></div>', '<div><p>&nbsp;</p></div>'],
306  [1 , null, true, '<div><p>&nbsp;&nbsp;</p></div>', ''],
307  [1 , null, true, '<div>&nbsp;&nbsp;<p></p></div>', ''],
308  [1 , null, false, '<div>Some content<p></p></div>', '<div>Some content</div>'],
309  [1 , null, true, '<div>Some content<p></p></div>', '<div>Some content</div>'],
310  [1 , null, false, '<div>Some content</div>', '<div>Some content</div>'],
311  [1 , null, true, '<div>Some content</div>', '<div>Some content</div>'],
312  [1 , null, false, '<a href="#skiplinks">Skiplinks </a><b></b>', '<a href="#skiplinks">Skiplinks </a>'],
313  [1 , null, true, '<a href="#skiplinks">Skiplinks </a><b></b>', '<a href="#skiplinks">Skiplinks </a>'],
314  ];
315  }
316 
326  public function stripEmptyTags($stripOn, $tagList, $treatNonBreakingSpaceAsEmpty, $content, $expectedResult)
327  {
328  $tsConfig = [
329  'keepNonMatchedTags' => 1,
330  'stripEmptyTags' => $stripOn,
331  'stripEmptyTags.' => [
332  'tags' => $tagList,
333  'treatNonBreakingSpaceAsEmpty' => $treatNonBreakingSpaceAsEmpty
334  ],
335  ];
336 
337  $result = $this->parseConfigAndCleanHtml($tsConfig, $content);
338  $this->assertEquals($expectedResult, $result);
339  }
340 
348  protected function parseConfigAndCleanHtml(array $tsConfig, $content)
349  {
350  $config = $this->subject->HTMLparserConfig($tsConfig);
351  return $this->subject->HTMLcleaner($content, $config[0], $config[1], $config[2], $config[3]);
352  }
353 
359  public function getFirstTagDataProvider()
360  {
361  return [
362  ['<body><span></span></body>', '<body>'],
363  ['<span>Wrapper<div>Some content</div></span>', '<span>'],
364  ['Something before<span>Wrapper<div>Some content</div></span>Something after', 'Something before<span>'],
365  ['Something without tag', '']
366  ];
367  }
368 
379  public function getFirstTag($str, $expected)
380  {
381  $this->assertEquals($expected, $this->subject->getFirstTag($str));
382  }
383 
389  public function getFirstTagNameDataProvider()
390  {
391  return [
392  ['<body><span></span></body>',
393  false,
394  'BODY'],
395  ['<body><span></span></body>',
396  true,
397  'body'],
398  ['<div class="test"><span></span></div>',
399  false,
400  'DIV'],
401  ['<div><span class="test"></span></div>',
402  false,
403  'DIV'],
404  ['<br /><span class="test"></span>',
405  false,
406  'BR'],
407  ['<img src="test.jpg" />',
408  false,
409  'IMG'],
410  ];
411  }
412 
423  public function getFirstTagName($str, $preserveCase, $expected)
424  {
425  $this->assertEquals($expected, $this->subject->getFirstTagName($str, $preserveCase));
426  }
427 
432  {
433  return [
434  ['<span>Wrapper<div>Some content</div></span>', 'Wrapper<div>Some content</div>'],
435  ['<td><tr>Some content</tr></td>', '<tr>Some content</tr>'],
436  ['Something before<span>Wrapper<div>Some content</div></span>Something after', 'Wrapper<div>Some content</div>'],
437  ['<span class="hidden">Wrapper<div>Some content</div></span>', 'Wrapper<div>Some content</div>'],
438  ['<span>Wrapper<div class="hidden">Some content</div></span>', 'Wrapper<div class="hidden">Some content</div>'],
439  ];
440  }
441 
451  public function removeFirstAndLastTag($str, $expectedResult)
452  {
453  $this->assertEquals($expectedResult, $this->subject->removeFirstAndLastTag($str));
454  }
455 }
tagCorrectlyRemovedWhenRmTagIfNoAttribIsConfigured($content, $expectedResult)
stripEmptyTags($stripOn, $tagList, $treatNonBreakingSpaceAsEmpty, $content, $expectedResult)
globalNestingCorrectlyRemovesInvalidTags($content, $expectedResult)
getFirstTagName($str, $preserveCase, $expected)
parseConfigAndCleanHtml(array $tsConfig, $content)
splitIntoBlock($tag, $content, $eliminateExtraEndTags, $expected)
localNestingCorrectlyRemovesInvalidTags($content, $expectedResult)