TYPO3 CMS  TYPO3_8-7
HtmlParser.php
Go to the documentation of this file.
1 <?php
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
20 
26 {
30  protected $caseShift_cache = [];
31 
32  // Void elements that do not have closing tags, as defined by HTML5, except link element
33  const VOID_ELEMENTS = 'area|base|br|col|command|embed|hr|img|input|keygen|meta|param|source|track|wbr';
34 
35  /************************************
36  *
37  * Parsing HTML code
38  *
39  ************************************/
51  public function splitIntoBlock($tag, $content, $eliminateExtraEndTags = false)
52  {
53  $tags = array_unique(GeneralUtility::trimExplode(',', $tag, true));
54  array_walk($tags, function (&$tag) {
55  $tag = preg_quote($tag, '/');
56  });
57  $regexStr = '/\\<\\/?(' . implode('|', $tags) . ')(\\s*\\>|\\s[^\\>]*\\>)/si';
58  $parts = preg_split($regexStr, $content);
59  $newParts = [];
60  $pointer = strlen($parts[0]);
61  $buffer = $parts[0];
62  $nested = 0;
63  reset($parts);
64  // We skip the first element in foreach loop
65  $partsSliced = array_slice($parts, 1, null, true);
66  foreach ($partsSliced as $v) {
67  $isEndTag = substr($content, $pointer, 2) === '</' ? 1 : 0;
68  $tagLen = strcspn(substr($content, $pointer), '>') + 1;
69  // We meet a start-tag:
70  if (!$isEndTag) {
71  // Ground level:
72  if (!$nested) {
73  // Previous buffer stored
74  $newParts[] = $buffer;
75  $buffer = '';
76  }
77  // We are inside now!
78  $nested++;
79  // New buffer set and pointer increased
80  $mbuffer = substr($content, $pointer, strlen($v) + $tagLen);
81  $pointer += strlen($mbuffer);
82  $buffer .= $mbuffer;
83  } else {
84  // If we meet an endtag:
85  // Decrease nested-level
86  $nested--;
87  $eliminated = 0;
88  if ($eliminateExtraEndTags && $nested < 0) {
89  $nested = 0;
90  $eliminated = 1;
91  } else {
92  // In any case, add the endtag to current buffer and increase pointer
93  $buffer .= substr($content, $pointer, $tagLen);
94  }
95  $pointer += $tagLen;
96  // if we're back on ground level, (and not by eliminating tags...
97  if (!$nested && !$eliminated) {
98  $newParts[] = $buffer;
99  $buffer = '';
100  }
101  // New buffer set and pointer increased
102  $mbuffer = substr($content, $pointer, strlen($v));
103  $pointer += strlen($mbuffer);
104  $buffer .= $mbuffer;
105  }
106  }
107  $newParts[] = $buffer;
108  return $newParts;
109  }
110 
123  public function splitIntoBlockRecursiveProc($tag, $content, &$procObj, $callBackContent, $callBackTags, $level = 0)
124  {
125  $parts = $this->splitIntoBlock($tag, $content, true);
126  foreach ($parts as $k => $v) {
127  if ($k % 2) {
128  $firstTagName = $this->getFirstTagName($v, true);
129  $tagsArray = [];
130  $tagsArray['tag_start'] = $this->getFirstTag($v);
131  $tagsArray['tag_end'] = '</' . $firstTagName . '>';
132  $tagsArray['tag_name'] = strtolower($firstTagName);
133  $tagsArray['content'] = $this->splitIntoBlockRecursiveProc($tag, $this->removeFirstAndLastTag($v), $procObj, $callBackContent, $callBackTags, $level + 1);
134  if ($callBackTags) {
135  $tagsArray = $procObj->{$callBackTags}($tagsArray, $level);
136  }
137  $parts[$k] = $tagsArray['tag_start'] . $tagsArray['content'] . $tagsArray['tag_end'];
138  } else {
139  if ($callBackContent) {
140  $parts[$k] = $procObj->{$callBackContent}($parts[$k], $level);
141  }
142  }
143  }
144  return implode('', $parts);
145  }
146 
157  public function splitTags($tag, $content)
158  {
159  $tags = GeneralUtility::trimExplode(',', $tag, true);
160  array_walk($tags, function (&$tag) {
161  $tag = preg_quote($tag, '/');
162  });
163  $regexStr = '/\\<(' . implode('|', $tags) . ')(\\s[^>]*)?\\/?>/si';
164  $parts = preg_split($regexStr, $content);
165  $pointer = strlen($parts[0]);
166  $newParts = [];
167  $newParts[] = $parts[0];
168  reset($parts);
169  // We skip the first element in foreach loop
170  $partsSliced = array_slice($parts, 1, null, true);
171  foreach ($partsSliced as $v) {
172  $tagLen = strcspn(substr($content, $pointer), '>') + 1;
173  // Set tag:
174  // New buffer set and pointer increased
175  $tag = substr($content, $pointer, $tagLen);
176  $newParts[] = $tag;
177  $pointer += strlen($tag);
178  // Set content:
179  $newParts[] = $v;
180  $pointer += strlen($v);
181  }
182  return $newParts;
183  }
184 
192  public function removeFirstAndLastTag($str)
193  {
194  // End of first tag:
195  $start = strpos($str, '>');
196  // Begin of last tag:
197  $end = strrpos($str, '<');
198  // Return
199  return substr($str, $start + 1, $end - $start - 1);
200  }
201 
209  public function getFirstTag($str)
210  {
211  // First:
212  $endLen = strpos($str, '>');
213  return $endLen !== false ? substr($str, 0, $endLen + 1) : '';
214  }
215 
224  public function getFirstTagName($str, $preserveCase = false)
225  {
226  $matches = [];
227  if (preg_match('/^\\s*\\<([^\\s\\>]+)(\\s|\\>)/', $str, $matches) === 1) {
228  if (!$preserveCase) {
229  return strtoupper($matches[1]);
230  }
231  return $matches[1];
232  }
233  return '';
234  }
235 
248  public function get_tag_attributes($tag, $deHSC = false)
249  {
250  list($components, $metaC) = $this->split_tag_attributes($tag);
251  // Attribute name is stored here
252  $name = '';
253  $valuemode = false;
254  $attributes = [];
255  $attributesMeta = [];
256  if (is_array($components)) {
257  foreach ($components as $key => $val) {
258  // Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value
259  if ($val !== '=') {
260  if ($valuemode) {
261  if ($name) {
262  $attributes[$name] = $deHSC ? htmlspecialchars_decode($val) : $val;
263  $attributesMeta[$name]['dashType'] = $metaC[$key];
264  $name = '';
265  }
266  } else {
267  if ($namekey = preg_replace('/[^[:alnum:]_\\:\\-]/', '', $val)) {
268  $name = strtolower($namekey);
269  $attributesMeta[$name] = [];
270  $attributesMeta[$name]['origTag'] = $namekey;
271  $attributes[$name] = '';
272  }
273  }
274  $valuemode = false;
275  } else {
276  $valuemode = true;
277  }
278  }
279  return [$attributes, $attributesMeta];
280  }
281  }
282 
297  public function split_tag_attributes($tag)
298  {
299  $matches = [];
300  if (preg_match('/(\\<[^\\s]+\\s+)?(.*?)\\s*(\\>)?$/s', $tag, $matches) !== 1) {
301  return [[], []];
302  }
303  $tag_tmp = $matches[2];
304  $metaValue = [];
305  $value = [];
306  $matches = [];
307  if (preg_match_all('/("[^"]*"|\'[^\']*\'|[^\\s"\'\\=]+|\\=)/s', $tag_tmp, $matches) > 0) {
308  foreach ($matches[1] as $part) {
309  $firstChar = $part[0];
310  if ($firstChar === '"' || $firstChar === '\'') {
311  $metaValue[] = $firstChar;
312  $value[] = substr($part, 1, -1);
313  } else {
314  $metaValue[] = '';
315  $value[] = $part;
316  }
317  }
318  }
319  return [$value, $metaValue];
320  }
321 
322  /*********************************
323  *
324  * Clean HTML code
325  *
326  *********************************/
363  public function HTMLcleaner($content, $tags = [], $keepAll = 0, $hSC = 0, $addConfig = [])
364  {
365  $newContent = [];
366  $tokArr = explode('<', $content);
367  $newContent[] = $this->bidir_htmlspecialchars(current($tokArr), $hSC);
368  // We skip the first element in foreach loop
369  $tokArrSliced = array_slice($tokArr, 1, null, true);
370  $c = 1;
371  $tagRegister = [];
372  $tagStack = [];
373  $inComment = false;
374  $inCdata = false;
375  $skipTag = false;
376  foreach ($tokArrSliced as $tok) {
377  if ($inComment) {
378  if (($eocPos = strpos($tok, '-->')) === false) {
379  // End of comment is not found in the token. Go further until end of comment is found in other tokens.
380  $newContent[$c++] = '<' . $tok;
381  continue;
382  }
383  // Comment ends in the middle of the token: add comment and proceed with rest of the token
384  $newContent[$c++] = '<' . substr($tok, 0, ($eocPos + 3));
385  $tok = substr($tok, $eocPos + 3);
386  $inComment = false;
387  $skipTag = true;
388  } elseif ($inCdata) {
389  if (($eocPos = strpos($tok, '/*]]>*/')) === false) {
390  // End of comment is not found in the token. Go further until end of comment is found in other tokens.
391  $newContent[$c++] = '<' . $tok;
392  continue;
393  }
394  // Comment ends in the middle of the token: add comment and proceed with rest of the token
395  $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10);
396  $tok = substr($tok, $eocPos + 10);
397  $inCdata = false;
398  $skipTag = true;
399  } elseif (substr($tok, 0, 3) === '!--') {
400  if (($eocPos = strpos($tok, '-->')) === false) {
401  // Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment
402  $newContent[$c++] = '<' . $tok;
403  $inComment = true;
404  continue;
405  }
406  // Start and end of comment are both in the current token. Add comment and proceed with rest of the token
407  $newContent[$c++] = '<' . substr($tok, 0, ($eocPos + 3));
408  $tok = substr($tok, $eocPos + 3);
409  $skipTag = true;
410  } elseif (substr($tok, 0, 10) === '![CDATA[*/') {
411  if (($eocPos = strpos($tok, '/*]]>*/')) === false) {
412  // Comment started in this token but it does end in the same token. Set a flag to skip till the end of comment
413  $newContent[$c++] = '<' . $tok;
414  $inCdata = true;
415  continue;
416  }
417  // Start and end of comment are both in the current token. Add comment and proceed with rest of the token
418  $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 10);
419  $tok = substr($tok, $eocPos + 10);
420  $skipTag = true;
421  }
422  $firstChar = $tok[0] ?? null;
423  // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..>
424  if (!$skipTag && preg_match('/[[:alnum:]\\/]/', $firstChar) == 1) {
425  $tagEnd = strpos($tok, '>');
426  // If there is and end-bracket... tagEnd can't be 0 as the first character can't be a >
427  if ($tagEnd) {
428  $endTag = $firstChar === '/' ? 1 : 0;
429  $tagContent = substr($tok, $endTag, $tagEnd - $endTag);
430  $tagParts = preg_split('/\\s+/s', $tagContent, 2);
431  $tagName = strtolower($tagParts[0]);
432  $emptyTag = 0;
433  if (isset($tags[$tagName])) {
434  // If there is processing to do for the tag:
435  if (is_array($tags[$tagName])) {
436  if (preg_match('/^(' . self::VOID_ELEMENTS . ' )$/i', $tagName)) {
437  $emptyTag = 1;
438  }
439  // If NOT an endtag, do attribute processing (added dec. 2003)
440  if (!$endTag) {
441  // Override attributes
442  if ((string)$tags[$tagName]['overrideAttribs'] !== '') {
443  $tagParts[1] = $tags[$tagName]['overrideAttribs'];
444  }
445  // Allowed tags
446  if ((string)$tags[$tagName]['allowedAttribs'] !== '') {
447  // No attribs allowed
448  if ((string)$tags[$tagName]['allowedAttribs'] === '0') {
449  $tagParts[1] = '';
450  } elseif (trim($tagParts[1])) {
451  $tagAttrib = $this->get_tag_attributes($tagParts[1]);
452  $tagParts[1] = '';
453  $newTagAttrib = [];
454  if (!($tList = $tags[$tagName]['_allowedAttribs'])) {
455  // Just explode attribts for tag once
456  $tList = ($tags[$tagName]['_allowedAttribs'] = GeneralUtility::trimExplode(',', strtolower($tags[$tagName]['allowedAttribs']), true));
457  }
458  foreach ($tList as $allowTag) {
459  if (isset($tagAttrib[0][$allowTag])) {
460  $newTagAttrib[$allowTag] = $tagAttrib[0][$allowTag];
461  }
462  }
463  $tagParts[1] = $this->compileTagAttribs($newTagAttrib, $tagAttrib[1]);
464  }
465  }
466  // Fixed attrib values
467  if (is_array($tags[$tagName]['fixAttrib'])) {
468  $tagAttrib = $this->get_tag_attributes($tagParts[1]);
469  $tagParts[1] = '';
470  foreach ($tags[$tagName]['fixAttrib'] as $attr => $params) {
471  if (isset($params['set']) && $params['set'] !== '') {
472  $tagAttrib[0][$attr] = $params['set'];
473  }
474  if (!empty($params['unset'])) {
475  unset($tagAttrib[0][$attr]);
476  }
477  if (!isset($tagAttrib[0][$attr]) && (string)$params['default'] !== '') {
478  $tagAttrib[0][$attr] = $params['default'];
479  }
480  if ($params['always'] || isset($tagAttrib[0][$attr])) {
481  if ($params['trim']) {
482  $tagAttrib[0][$attr] = trim($tagAttrib[0][$attr]);
483  }
484  if ($params['intval']) {
485  $tagAttrib[0][$attr] = (int)$tagAttrib[0][$attr];
486  }
487  if ($params['lower']) {
488  $tagAttrib[0][$attr] = strtolower($tagAttrib[0][$attr]);
489  }
490  if ($params['upper']) {
491  $tagAttrib[0][$attr] = strtoupper($tagAttrib[0][$attr]);
492  }
493  if ($params['range']) {
494  if (isset($params['range'][1])) {
495  $tagAttrib[0][$attr] = MathUtility::forceIntegerInRange($tagAttrib[0][$attr], (int)$params['range'][0], (int)$params['range'][1]);
496  } else {
497  $tagAttrib[0][$attr] = MathUtility::forceIntegerInRange($tagAttrib[0][$attr], (int)$params['range'][0]);
498  }
499  }
500  if (is_array($params['list'])) {
501  // For the class attribute, remove from the attribute value any class not in the list
502  // Classes are case sensitive
503  if ($attr === 'class') {
504  $newClasses = [];
505  $classes = GeneralUtility::trimExplode(' ', $tagAttrib[0][$attr], true);
506  foreach ($classes as $class) {
507  if (in_array($class, $params['list'])) {
508  $newClasses[] = $class;
509  }
510  }
511  if (!empty($newClasses)) {
512  $tagAttrib[0][$attr] = implode(' ', $newClasses);
513  } else {
514  $tagAttrib[0][$attr] = $params['list'][0];
515  }
516  } else {
517  if (!in_array($this->caseShift($tagAttrib[0][$attr], $params['casesensitiveComp']), $this->caseShift($params['list'], $params['casesensitiveComp'], $tagName))) {
518  $tagAttrib[0][$attr] = $params['list'][0];
519  }
520  }
521  }
522  if ($params['removeIfFalse'] && $params['removeIfFalse'] !== 'blank' && !$tagAttrib[0][$attr] || $params['removeIfFalse'] === 'blank' && (string)$tagAttrib[0][$attr] === '') {
523  unset($tagAttrib[0][$attr]);
524  }
525  if ((string)$params['removeIfEquals'] !== '' && $this->caseShift($tagAttrib[0][$attr], $params['casesensitiveComp']) === $this->caseShift($params['removeIfEquals'], $params['casesensitiveComp'])) {
526  unset($tagAttrib[0][$attr]);
527  }
528  if ($params['prefixLocalAnchors']) {
529  if ($tagAttrib[0][$attr][0] === '#') {
530  if ($params['prefixLocalAnchors'] == 2) {
532  $contentObjectRenderer = GeneralUtility::makeInstance(ContentObjectRenderer::class);
533  $prefix = $contentObjectRenderer->getUrlToCurrentLocation();
534  } else {
535  $prefix = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL');
536  }
537  $tagAttrib[0][$attr] = $prefix . $tagAttrib[0][$attr];
538  }
539  }
540  if ($params['prefixRelPathWith']) {
541  $urlParts = parse_url($tagAttrib[0][$attr]);
542  if (!$urlParts['scheme'] && $urlParts['path'][0] !== '/') {
543  // If it is NOT an absolute URL (by http: or starting "/")
544  $tagAttrib[0][$attr] = $params['prefixRelPathWith'] . $tagAttrib[0][$attr];
545  }
546  }
547  if ($params['userFunc']) {
548  if (is_array($params['userFunc.'])) {
549  $params['userFunc.']['attributeValue'] = $tagAttrib[0][$attr];
550  } else {
551  $params['userFunc.'] = $tagAttrib[0][$attr];
552  }
553  $tagAttrib[0][$attr] = GeneralUtility::callUserFunction($params['userFunc'], $params['userFunc.'], $this);
554  }
555  }
556  }
557  $tagParts[1] = $this->compileTagAttribs($tagAttrib[0], $tagAttrib[1]);
558  }
559  } else {
560  // If endTag, remove any possible attributes:
561  $tagParts[1] = '';
562  }
563  // Protecting the tag by converting < and > to &lt; and &gt; ??
564  if ($tags[$tagName]['protect']) {
565  $lt = '&lt;';
566  $gt = '&gt;';
567  } else {
568  $lt = '<';
569  $gt = '>';
570  }
571  // Remapping tag name?
572  if ($tags[$tagName]['remap']) {
573  $tagParts[0] = $tags[$tagName]['remap'];
574  }
575  // rmTagIfNoAttrib
576  if ($endTag || trim($tagParts[1]) || !$tags[$tagName]['rmTagIfNoAttrib']) {
577  $setTag = 1;
578  // Remove this closing tag if $tagName was among $TSconfig['removeTags']
579  if ($endTag && $tags[$tagName]['allowedAttribs'] === 0 && $tags[$tagName]['rmTagIfNoAttrib'] === 1) {
580  $setTag = 0;
581  }
582  if ($tags[$tagName]['nesting']) {
583  if (!is_array($tagRegister[$tagName])) {
584  $tagRegister[$tagName] = [];
585  }
586  if ($endTag) {
587  $correctTag = 1;
588  if ($tags[$tagName]['nesting'] === 'global') {
589  $lastEl = end($tagStack);
590  if ($tagName !== $lastEl) {
591  if (in_array($tagName, $tagStack)) {
592  while (!empty($tagStack) && $tagName !== $lastEl) {
593  $elPos = end($tagRegister[$lastEl]);
594  unset($newContent[$elPos]);
595  array_pop($tagRegister[$lastEl]);
596  array_pop($tagStack);
597  $lastEl = end($tagStack);
598  }
599  } else {
600  // In this case the
601  $correctTag = 0;
602  }
603  }
604  }
605  if (empty($tagRegister[$tagName]) || !$correctTag) {
606  $setTag = 0;
607  } else {
608  array_pop($tagRegister[$tagName]);
609  if ($tags[$tagName]['nesting'] === 'global') {
610  array_pop($tagStack);
611  }
612  }
613  } else {
614  $tagRegister[$tagName][] = $c;
615  if ($tags[$tagName]['nesting'] === 'global') {
616  $tagStack[] = $tagName;
617  }
618  }
619  }
620  if ($setTag) {
621  // Setting the tag
622  $newContent[$c++] = $lt . ($endTag ? '/' : '') . trim($tagParts[0] . ' ' . $tagParts[1]) . ($emptyTag ? ' /' : '') . $gt;
623  }
624  }
625  } else {
626  $newContent[$c++] = '<' . ($endTag ? '/' : '') . $tagContent . '>';
627  }
628  } elseif ($keepAll) {
629  // This is if the tag was not defined in the array for processing:
630  if ($keepAll === 'protect') {
631  $lt = '&lt;';
632  $gt = '&gt;';
633  } else {
634  $lt = '<';
635  $gt = '>';
636  }
637  $newContent[$c++] = $lt . ($endTag ? '/' : '') . $tagContent . $gt;
638  }
639  $newContent[$c++] = $this->bidir_htmlspecialchars(substr($tok, $tagEnd + 1), $hSC);
640  } else {
641  $newContent[$c++] = $this->bidir_htmlspecialchars('<' . $tok, $hSC);
642  }
643  } else {
644  $newContent[$c++] = $this->bidir_htmlspecialchars(($skipTag ? '' : '<') . $tok, $hSC);
645  // It was not a tag anyways
646  $skipTag = false;
647  }
648  }
649  // Unsetting tags:
650  foreach ($tagRegister as $tag => $positions) {
651  foreach ($positions as $pKey) {
652  unset($newContent[$pKey]);
653  }
654  }
655  $newContent = implode('', $newContent);
656  $newContent = $this->stripEmptyTagsIfConfigured($newContent, $addConfig);
657  return $newContent;
658  }
659 
667  public function bidir_htmlspecialchars($value, $dir)
668  {
669  switch ((int)$dir) {
670  case 1:
671  return htmlspecialchars($value);
672  case 2:
673  return htmlspecialchars($value, ENT_COMPAT, 'UTF-8', false);
674  case -1:
675  return htmlspecialchars_decode($value);
676  default:
677  return $value;
678  }
679  }
680 
690  public function prefixResourcePath($main_prefix, $content, $alternatives = [], $suffix = '')
691  {
692  $parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a,param', $content);
693  foreach ($parts as $k => $v) {
694  if ($k % 2) {
695  $params = $this->get_tag_attributes($v);
696  // Detect tag-ending so that it is re-applied correctly.
697  $tagEnd = substr($v, -2) === '/>' ? ' />' : '>';
698  // The 'name' of the first tag
699  $firstTagName = $this->getFirstTagName($v);
700  $somethingDone = 0;
701  $prefix = isset($alternatives[strtoupper($firstTagName)]) ? $alternatives[strtoupper($firstTagName)] : $main_prefix;
702  switch (strtolower($firstTagName)) {
703  case 'td':
704 
705  case 'body':
706 
707  case 'table':
708  $src = $params[0]['background'];
709  if ($src) {
710  $params[0]['background'] = $this->prefixRelPath($prefix, $params[0]['background'], $suffix);
711  $somethingDone = 1;
712  }
713  break;
714  case 'img':
715 
716  case 'input':
717 
718  case 'script':
719 
720  case 'embed':
721  $src = $params[0]['src'];
722  if ($src) {
723  $params[0]['src'] = $this->prefixRelPath($prefix, $params[0]['src'], $suffix);
724  $somethingDone = 1;
725  }
726  break;
727  case 'link':
728 
729  case 'a':
730  $src = $params[0]['href'];
731  if ($src) {
732  $params[0]['href'] = $this->prefixRelPath($prefix, $params[0]['href'], $suffix);
733  $somethingDone = 1;
734  }
735  break;
736  case 'form':
737  $src = $params[0]['action'];
738  if ($src) {
739  $params[0]['action'] = $this->prefixRelPath($prefix, $params[0]['action'], $suffix);
740  $somethingDone = 1;
741  }
742  break;
743  case 'param':
744  $test = $params[0]['name'];
745  if ($test && $test === 'movie') {
746  if ($params[0]['value']) {
747  $params[0]['value'] = $this->prefixRelPath($prefix, $params[0]['value'], $suffix);
748  $somethingDone = 1;
749  }
750  }
751  break;
752  }
753  if ($somethingDone) {
754  $tagParts = preg_split('/\\s+/s', $v, 2);
755  $tagParts[1] = $this->compileTagAttribs($params[0], $params[1]);
756  $parts[$k] = '<' . trim(strtolower($firstTagName) . ' ' . $tagParts[1]) . $tagEnd;
757  }
758  }
759  }
760  $content = implode('', $parts);
761  // Fix <style> section:
762  $prefix = isset($alternatives['style']) ? $alternatives['style'] : $main_prefix;
763  if ((string)$prefix !== '') {
764  $parts = $this->splitIntoBlock('style', $content);
765  foreach ($parts as $k => &$part) {
766  if ($k % 2) {
767  $part = preg_replace('/(url[[:space:]]*\\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\\))/i', '\\1' . $prefix . '\\2' . $suffix . '\\3', $part);
768  }
769  }
770  unset($part);
771  $content = implode('', $parts);
772  }
773  return $content;
774  }
775 
785  public function prefixRelPath($prefix, $srcVal, $suffix = '')
786  {
787  // Only prefix if it's not an absolute URL or
788  // only a link to a section within the page.
789  if ($srcVal[0] !== '/' && $srcVal[0] !== '#') {
790  $urlParts = parse_url($srcVal);
791  // Only prefix URLs without a scheme
792  if (!$urlParts['scheme']) {
793  $srcVal = $prefix . $srcVal . $suffix;
794  }
795  }
796  return $srcVal;
797  }
798 
808  public function caseShift($str, $caseSensitiveComparison, $cacheKey = '')
809  {
810  if ($caseSensitiveComparison) {
811  return $str;
812  }
813  if (is_array($str)) {
814  // Fetch from runlevel cache
815  if ($cacheKey && isset($this->caseShift_cache[$cacheKey])) {
816  $str = $this->caseShift_cache[$cacheKey];
817  } else {
818  array_walk($str, function (&$value) {
819  $value = strtoupper($value);
820  });
821  if ($cacheKey) {
822  $this->caseShift_cache[$cacheKey] = $str;
823  }
824  }
825  } else {
826  $str = strtoupper($str);
827  }
828  return $str;
829  }
830 
839  public function compileTagAttribs($tagAttrib, $meta = [])
840  {
841  $accu = [];
842  foreach ($tagAttrib as $k => $v) {
843  $attr = $meta[$k]['origTag'] ?: $k;
844  if (strcmp($v, '') || isset($meta[$k]['dashType'])) {
845  $dash = $meta[$k]['dashType'] ?: (MathUtility::canBeInterpretedAsInteger($v) ? '' : '"');
846  $attr .= '=' . $dash . $v . $dash;
847  }
848  $accu[] = $attr;
849  }
850  return implode(' ', $accu);
851  }
852 
861  public function HTMLparserConfig($TSconfig, $keepTags = [])
862  {
863  // Allow tags (base list, merged with incoming array)
864  $alTags = array_flip(GeneralUtility::trimExplode(',', strtolower($TSconfig['allowTags']), true));
865  $keepTags = array_merge($alTags, $keepTags);
866  // Set config properties.
867  if (is_array($TSconfig['tags.'])) {
868  foreach ($TSconfig['tags.'] as $key => $tagC) {
869  if (!is_array($tagC) && $key == strtolower($key)) {
870  if ((string)$tagC === '0') {
871  unset($keepTags[$key]);
872  }
873  if ((string)$tagC === '1' && !isset($keepTags[$key])) {
874  $keepTags[$key] = 1;
875  }
876  }
877  }
878  foreach ($TSconfig['tags.'] as $key => $tagC) {
879  if (is_array($tagC) && $key == strtolower($key)) {
880  $key = substr($key, 0, -1);
881  if (!is_array($keepTags[$key])) {
882  $keepTags[$key] = [];
883  }
884  if (is_array($tagC['fixAttrib.'])) {
885  foreach ($tagC['fixAttrib.'] as $atName => $atConfig) {
886  if (is_array($atConfig)) {
887  $atName = substr($atName, 0, -1);
888  if (!is_array($keepTags[$key]['fixAttrib'][$atName])) {
889  $keepTags[$key]['fixAttrib'][$atName] = [];
890  }
891  $keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName], $atConfig);
892  if ((string)$keepTags[$key]['fixAttrib'][$atName]['range'] !== '') {
893  $keepTags[$key]['fixAttrib'][$atName]['range'] = GeneralUtility::trimExplode(',', $keepTags[$key]['fixAttrib'][$atName]['range']);
894  }
895  if ((string)$keepTags[$key]['fixAttrib'][$atName]['list'] !== '') {
896  $keepTags[$key]['fixAttrib'][$atName]['list'] = GeneralUtility::trimExplode(',', $keepTags[$key]['fixAttrib'][$atName]['list']);
897  }
898  }
899  }
900  }
901  unset($tagC['fixAttrib.']);
902  unset($tagC['fixAttrib']);
903  if (isset($tagC['rmTagIfNoAttrib']) && $tagC['rmTagIfNoAttrib'] && empty($tagC['nesting'])) {
904  $tagC['nesting'] = 1;
905  }
906  $keepTags[$key] = array_merge($keepTags[$key], $tagC);
907  }
908  }
909  }
910  // LocalNesting
911  if ($TSconfig['localNesting']) {
912  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['localNesting']), true);
913  foreach ($lN as $tn) {
914  if (isset($keepTags[$tn])) {
915  if (!is_array($keepTags[$tn])) {
916  $keepTags[$tn] = [];
917  }
918  $keepTags[$tn]['nesting'] = 1;
919  }
920  }
921  }
922  if ($TSconfig['globalNesting']) {
923  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['globalNesting']), true);
924  foreach ($lN as $tn) {
925  if (isset($keepTags[$tn])) {
926  if (!is_array($keepTags[$tn])) {
927  $keepTags[$tn] = [];
928  }
929  $keepTags[$tn]['nesting'] = 'global';
930  }
931  }
932  }
933  if ($TSconfig['rmTagIfNoAttrib']) {
934  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['rmTagIfNoAttrib']), true);
935  foreach ($lN as $tn) {
936  if (isset($keepTags[$tn])) {
937  if (!is_array($keepTags[$tn])) {
938  $keepTags[$tn] = [];
939  }
940  $keepTags[$tn]['rmTagIfNoAttrib'] = 1;
941  if (empty($keepTags[$tn]['nesting'])) {
942  $keepTags[$tn]['nesting'] = 1;
943  }
944  }
945  }
946  }
947  if ($TSconfig['noAttrib']) {
948  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['noAttrib']), true);
949  foreach ($lN as $tn) {
950  if (isset($keepTags[$tn])) {
951  if (!is_array($keepTags[$tn])) {
952  $keepTags[$tn] = [];
953  }
954  $keepTags[$tn]['allowedAttribs'] = 0;
955  }
956  }
957  }
958  if ($TSconfig['removeTags']) {
959  $lN = GeneralUtility::trimExplode(',', strtolower($TSconfig['removeTags']), true);
960  foreach ($lN as $tn) {
961  $keepTags[$tn] = [];
962  $keepTags[$tn]['allowedAttribs'] = 0;
963  $keepTags[$tn]['rmTagIfNoAttrib'] = 1;
964  }
965  }
966  // Create additional configuration:
967  $addConfig = [];
968  if (isset($TSconfig['stripEmptyTags'])) {
969  $addConfig['stripEmptyTags'] = $TSconfig['stripEmptyTags'];
970  if (isset($TSconfig['stripEmptyTags.'])) {
971  $addConfig['stripEmptyTags.'] = $TSconfig['stripEmptyTags.'];
972  }
973  }
974  return [
975  $keepTags,
976  '' . $TSconfig['keepNonMatchedTags'],
977  (int)$TSconfig['htmlSpecialChars'],
978  $addConfig
979  ];
980  }
981 
992  public function stripEmptyTags($content, $tagList = '', $treatNonBreakingSpaceAsEmpty = false, $keepTags = false)
993  {
994  if (!empty($tagList)) {
995  $tagRegEx = implode('|', GeneralUtility::trimExplode(',', $tagList, true));
996  if ($keepTags) {
997  $tagRegEx = '(?!' . $tagRegEx . ')[^ >]+';
998  }
999  } else {
1000  $tagRegEx = '[^ >]+'; // all characters until you reach a > or space;
1001  }
1002  $count = 1;
1003  $nbspRegex = $treatNonBreakingSpaceAsEmpty ? '|(&nbsp;)' : '';
1004  $finalRegex = sprintf('/<(%s)[^>]*>( %s)*<\/\\1[^>]*>/i', $tagRegEx, $nbspRegex);
1005  while ($count !== 0) {
1006  $content = preg_replace($finalRegex, '', $content, -1, $count);
1007  }
1008  return $content;
1009  }
1010 
1018  protected function stripEmptyTagsIfConfigured($value, $configuration)
1019  {
1020  if (empty($configuration['stripEmptyTags'])) {
1021  return $value;
1022  }
1023 
1024  $tags = null;
1025  $keepTags = false;
1026  if (!empty($configuration['stripEmptyTags.']['keepTags'])) {
1027  $tags = $configuration['stripEmptyTags.']['keepTags'];
1028  $keepTags = true;
1029  } elseif (!empty($configuration['stripEmptyTags.']['tags'])) {
1030  $tags = $configuration['stripEmptyTags.']['tags'];
1031  }
1032 
1033  $treatNonBreakingSpaceAsEmpty = !empty($configuration['stripEmptyTags.']['treatNonBreakingSpaceAsEmpty']);
1034 
1035  return $this->stripEmptyTags($value, $tags, $treatNonBreakingSpaceAsEmpty, $keepTags);
1036  }
1037 }
compileTagAttribs($tagAttrib, $meta=[])
Definition: HtmlParser.php:839
static forceIntegerInRange($theInt, $min, $max=2000000000, $defaultValue=0)
Definition: MathUtility.php:31
static callUserFunction($funcName, &$params, &$ref, $_='', $errorMode=0)
stripEmptyTagsIfConfigured($value, $configuration)
caseShift($str, $caseSensitiveComparison, $cacheKey='')
Definition: HtmlParser.php:808
static trimExplode($delim, $string, $removeEmptyValues=false, $limit=0)
prefixRelPath($prefix, $srcVal, $suffix='')
Definition: HtmlParser.php:785
static makeInstance($className,... $constructorArguments)
get_tag_attributes($tag, $deHSC=false)
Definition: HtmlParser.php:248
prefixResourcePath($main_prefix, $content, $alternatives=[], $suffix='')
Definition: HtmlParser.php:690
splitIntoBlockRecursiveProc($tag, $content, &$procObj, $callBackContent, $callBackTags, $level=0)
Definition: HtmlParser.php:123
HTMLparserConfig($TSconfig, $keepTags=[])
Definition: HtmlParser.php:861
bidir_htmlspecialchars($value, $dir)
Definition: HtmlParser.php:667
stripEmptyTags($content, $tagList='', $treatNonBreakingSpaceAsEmpty=false, $keepTags=false)
Definition: HtmlParser.php:992
splitIntoBlock($tag, $content, $eliminateExtraEndTags=false)
Definition: HtmlParser.php:51
getFirstTagName($str, $preserveCase=false)
Definition: HtmlParser.php:224