TYPO3 CMS  TYPO3_7-6
RteHtmlParser.php
Go to the documentation of this file.
1 <?php
2 namespace TYPO3\CMS\Core\Html;
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
22 
27 {
31  public $blockElementList = 'PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE';
32 
38  public $recPid = 0;
39 
45  public $elRef = '';
46 
52  public $relPath = '';
53 
59  public $relBackPath = '';
60 
66  public $tsConfig = [];
67 
73  public $procOptions = [];
74 
81 
87  public $rte_p = '';
88 
94  public $getKeepTags_cache = [];
95 
101  public $allowedClasses = [];
102 
108  public $preserveTags = '';
109 
117  public function init($elRef = '', $recPid = 0)
118  {
119  $this->recPid = $recPid;
120  $this->elRef = $elRef;
121  }
122 
131  public function setRelPath($path)
132  {
133  $path = trim($path);
134  $path = preg_replace('/^\\//', '', $path);
135  $path = preg_replace('/\\/$/', '', $path);
136  if ($path) {
137  $this->relPath = $path;
138  $this->relBackPath = '';
139  $partsC = count(explode('/', $this->relPath));
140  for ($a = 0; $a < $partsC; $a++) {
141  $this->relBackPath .= '../';
142  }
143  $this->relPath .= '/';
144  }
145  }
146 
157  public static function evalWriteFile($pArr, $currentRecord)
158  {
160  }
161 
162  /**********************************************
163  *
164  * Main function
165  *
166  **********************************************/
177  public function RTE_transform($value, $specConf, $direction = 'rte', $thisConfig = [])
178  {
179  // Init:
180  $this->tsConfig = $thisConfig;
181  $this->procOptions = (array)$thisConfig['proc.'];
182  $this->preserveTags = strtoupper(implode(',', GeneralUtility::trimExplode(',', $this->procOptions['preserveTags'])));
183  // dynamic configuration of blockElementList
184  if ($this->procOptions['blockElementList']) {
185  $this->blockElementList = $this->procOptions['blockElementList'];
186  }
187  // Get parameters for rte_transformation:
188  $p = ($this->rte_p = BackendUtility::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']));
189  // Setting modes:
190  if ((string)$this->procOptions['overruleMode'] !== '') {
191  $modes = array_unique(GeneralUtility::trimExplode(',', $this->procOptions['overruleMode']));
192  } else {
193  $modes = array_unique(GeneralUtility::trimExplode('-', $p['mode']));
194  }
195  $revmodes = array_flip($modes);
196  // Find special modes and extract them:
197  if (isset($revmodes['ts'])) {
198  $modes[$revmodes['ts']] = 'ts_transform,ts_preserve,ts_images,ts_links';
199  }
200  // Find special modes and extract them:
201  if (isset($revmodes['ts_css'])) {
202  $modes[$revmodes['ts_css']] = 'css_transform,ts_images,ts_links';
203  }
204  // Make list unique
205  $modes = array_unique(GeneralUtility::trimExplode(',', implode(',', $modes), true));
206  // Reverse order if direction is "rte"
207  if ($direction == 'rte') {
208  $modes = array_reverse($modes);
209  }
210  // Getting additional HTML cleaner configuration. These are applied either before or after the main transformation is done and is thus totally independent processing options you can set up:
211  $entry_HTMLparser = $this->procOptions['entryHTMLparser_' . $direction] ? $this->HTMLparserConfig($this->procOptions['entryHTMLparser_' . $direction . '.']) : '';
212  $exit_HTMLparser = $this->procOptions['exitHTMLparser_' . $direction] ? $this->HTMLparserConfig($this->procOptions['exitHTMLparser_' . $direction . '.']) : '';
213  // Line breaks of content is unified into char-10 only (removing char 13)
214  if (!$this->procOptions['disableUnifyLineBreaks']) {
215  $value = str_replace(CRLF, LF, $value);
216  }
217  // In an entry-cleaner was configured, pass value through the HTMLcleaner with that:
218  if (is_array($entry_HTMLparser)) {
219  $value = $this->HTMLcleaner($value, $entry_HTMLparser[0], $entry_HTMLparser[1], $entry_HTMLparser[2], $entry_HTMLparser[3]);
220  }
221  // Traverse modes:
222  foreach ($modes as $cmd) {
223  // ->DB
224  if ($direction == 'db') {
225  // Checking for user defined transformation:
226  if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
227  $_procObj = GeneralUtility::getUserObj($_classRef);
228  $_procObj->pObj = $this;
229  $_procObj->transformationKey = $cmd;
230  $value = $_procObj->transform_db($value, $this);
231  } else {
232  // ... else use defaults:
233  switch ($cmd) {
234  case 'ts_images':
235  $value = $this->TS_images_db($value);
236  break;
237  case 'ts_reglinks':
238  $value = $this->TS_reglinks($value, 'db');
239  break;
240  case 'ts_links':
241  $value = $this->TS_links_db($value);
242  break;
243  case 'ts_preserve':
244  $value = $this->TS_preserve_db($value);
245  break;
246  case 'ts_transform':
247 
248  case 'css_transform':
249  $this->allowedClasses = GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'], true);
250  // CR has a very disturbing effect, so just remove all CR and rely on LF
251  $value = str_replace(CR, '', $value);
252  // Transform empty paragraphs into spacing paragraphs
253  $value = str_replace('<p></p>', '<p>&nbsp;</p>', $value);
254  // Double any trailing spacing paragraph so that it does not get removed by divideIntoLines()
255  $value = preg_replace('/<p>&nbsp;<\/p>$/', '<p>&nbsp;</p>' . '<p>&nbsp;</p>', $value);
256  $value = $this->TS_transform_db($value, $cmd == 'css_transform');
257  break;
258  case 'ts_strip':
259  $value = $this->TS_strip_db($value);
260  break;
261  default:
262  // Do nothing
263  }
264  }
265  }
266  // ->RTE
267  if ($direction == 'rte') {
268  // Checking for user defined transformation:
269  if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
270  $_procObj = GeneralUtility::getUserObj($_classRef);
271  $_procObj->pObj = $this;
272  $value = $_procObj->transform_rte($value, $this);
273  } else {
274  // ... else use defaults:
275  switch ($cmd) {
276  case 'ts_images':
277  $value = $this->TS_images_rte($value);
278  break;
279  case 'ts_reglinks':
280  $value = $this->TS_reglinks($value, 'rte');
281  break;
282  case 'ts_links':
283  $value = $this->TS_links_rte($value);
284  break;
285  case 'ts_preserve':
286  $value = $this->TS_preserve_rte($value);
287  break;
288  case 'ts_transform':
289 
290  case 'css_transform':
291  // Has a very disturbing effect, so just remove all '13' - depend on '10'
292  $value = str_replace(CR, '', $value);
293  $value = $this->TS_transform_rte($value, $cmd == 'css_transform');
294  break;
295  default:
296  // Do nothing
297  }
298  }
299  }
300  }
301  // In an exit-cleaner was configured, pass value through the HTMLcleaner with that:
302  if (is_array($exit_HTMLparser)) {
303  $value = $this->HTMLcleaner($value, $exit_HTMLparser[0], $exit_HTMLparser[1], $exit_HTMLparser[2], $exit_HTMLparser[3]);
304  }
305  // Final clean up of linebreaks:
306  if (!$this->procOptions['disableUnifyLineBreaks']) {
307  // Make sure no \r\n sequences has entered in the meantime...
308  $value = str_replace(CRLF, LF, $value);
309  // ... and then change all \n into \r\n
310  $value = str_replace(LF, CRLF, $value);
311  }
312  // Return value:
313  return $value;
314  }
315 
316  /************************************
317  *
318  * Specific RTE TRANSFORMATION functions
319  *
320  *************************************/
332  public function TS_images_db($value)
333  {
334  // Split content by <img> tags and traverse the resulting array for processing:
335  $imgSplit = $this->splitTags('img', $value);
336  if (count($imgSplit) > 1) {
337  $siteUrl = $this->siteUrl();
338  $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
340  $resourceFactory = Resource\ResourceFactory::getInstance();
342  $magicImageService = GeneralUtility::makeInstance(\TYPO3\CMS\Core\Resource\Service\MagicImageService::class);
343  $magicImageService->setMagicImageMaximumDimensions($this->tsConfig);
344  foreach ($imgSplit as $k => $v) {
345  // Image found, do processing:
346  if ($k % 2) {
347  // Get attributes
348  $attribArray = $this->get_tag_attributes_classic($v, 1);
349  // It's always an absolute URL coming from the RTE into the Database.
350  $absoluteUrl = trim($attribArray['src']);
351  // Make path absolute if it is relative and we have a site path which is not '/'
352  $pI = pathinfo($absoluteUrl);
353  if ($sitePath && !$pI['scheme'] && GeneralUtility::isFirstPartOfStr($absoluteUrl, $sitePath)) {
354  // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
355  $absoluteUrl = substr($absoluteUrl, strlen($sitePath));
356  $absoluteUrl = $siteUrl . $absoluteUrl;
357  }
358  // Image dimensions set in the img tag, if any
359  $imgTagDimensions = $this->getWHFromAttribs($attribArray);
360  if ($imgTagDimensions[0]) {
361  $attribArray['width'] = $imgTagDimensions[0];
362  }
363  if ($imgTagDimensions[1]) {
364  $attribArray['height'] = $imgTagDimensions[1];
365  }
366  $originalImageFile = null;
367  if ($attribArray['data-htmlarea-file-uid']) {
368  // An original image file uid is available
369  try {
371  $originalImageFile = $resourceFactory->getFileObject(intval($attribArray['data-htmlarea-file-uid']));
372  } catch (Resource\Exception\FileDoesNotExistException $fileDoesNotExistException) {
373  // Log the fact the file could not be retrieved.
374  $message = sprintf('Could not find file with uid "%s"', $attribArray['data-htmlarea-file-uid']);
375  $this->getLogger()->error($message);
376  }
377  }
378  if ($originalImageFile instanceof Resource\File) {
379  // Public url of local file is relative to the site url, absolute otherwise
380  if ($absoluteUrl == $originalImageFile->getPublicUrl() || $absoluteUrl == $siteUrl . $originalImageFile->getPublicUrl()) {
381  // This is a plain image, i.e. reference to the original image
382  if ($this->procOptions['plainImageMode']) {
383  // "plain image mode" is configured
384  // Find the dimensions of the original image
385  $imageInfo = [
386  $originalImageFile->getProperty('width'),
387  $originalImageFile->getProperty('height')
388  ];
389  if (!$imageInfo[0] || !$imageInfo[1]) {
390  $filePath = $originalImageFile->getForLocalProcessing(false);
391  $imageInfo = @getimagesize($filePath);
392  }
393  $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
394  }
395  } else {
396  // Magic image case: get a processed file with the requested configuration
397  $imageConfiguration = [
398  'width' => $imgTagDimensions[0],
399  'height' => $imgTagDimensions[1]
400  ];
401  $magicImage = $magicImageService->createMagicImage($originalImageFile, $imageConfiguration);
402  $attribArray['width'] = $magicImage->getProperty('width');
403  $attribArray['height'] = $magicImage->getProperty('height');
404  $attribArray['src'] = $magicImage->getPublicUrl();
405  }
406  } elseif (!GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl) && !$this->procOptions['dontFetchExtPictures'] && TYPO3_MODE === 'BE') {
407  // External image from another URL: in that case, fetch image, unless the feature is disabled or we are not in backend mode
408  // Fetch the external image
409  $externalFile = $this->getUrl($absoluteUrl);
410  if ($externalFile) {
411  $pU = parse_url($absoluteUrl);
412  $pI = pathinfo($pU['path']);
413  $extension = strtolower($pI['extension']);
414  if ($extension === 'jpg' || $extension === 'jpeg' || $extension === 'gif' || $extension === 'png') {
415  $fileName = GeneralUtility::shortMD5($absoluteUrl) . '.' . $pI['extension'];
416  // We insert this image into the user default upload folder
417  list($table, $field) = explode(':', $this->elRef);
418  $folder = $GLOBALS['BE_USER']->getDefaultUploadFolder($this->recPid, $table, $field);
419  $fileObject = $folder->createFile($fileName)->setContents($externalFile);
420  $imageConfiguration = [
421  'width' => $attribArray['width'],
422  'height' => $attribArray['height']
423  ];
424  $magicImage = $magicImageService->createMagicImage($fileObject, $imageConfiguration);
425  $attribArray['width'] = $magicImage->getProperty('width');
426  $attribArray['height'] = $magicImage->getProperty('height');
427  $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
428  $attribArray['src'] = $magicImage->getPublicUrl();
429  }
430  }
431  } elseif (GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl)) {
432  // Finally, check image as local file (siteURL equals the one of the image)
433  // Image has no data-htmlarea-file-uid attribute
434  // Relative path, rawurldecoded for special characters.
435  $path = rawurldecode(substr($absoluteUrl, strlen($siteUrl)));
436  // Absolute filepath, locked to relative path of this project
437  $filepath = GeneralUtility::getFileAbsFileName($path);
438  // Check file existence (in relative directory to this installation!)
439  if ($filepath && @is_file($filepath)) {
440  // Treat it as a plain image
441  if ($this->procOptions['plainImageMode']) {
442  // If "plain image mode" has been configured
443  // Find the original dimensions of the image
444  $imageInfo = @getimagesize($filepath);
445  $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
446  }
447  // Let's try to find a file uid for this image
448  try {
449  $fileOrFolderObject = $resourceFactory->retrieveFileOrFolderObject($path);
450  if ($fileOrFolderObject instanceof Resource\FileInterface) {
451  $fileIdentifier = $fileOrFolderObject->getIdentifier();
452  $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
453  // @todo if the retrieved file is a processed file, get the original file...
454  $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
455  }
456  } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
457  // Nothing to be done if file/folder not found
458  }
459  }
460  }
461  // Remove width and height from style attribute
462  $attribArray['style'] = preg_replace('/(?:^|[^-])(\\s*(?:width|height)\\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
463  // Must have alt attribute
464  if (!isset($attribArray['alt'])) {
465  $attribArray['alt'] = '';
466  }
467  // Convert absolute to relative url
468  if (GeneralUtility::isFirstPartOfStr($attribArray['src'], $siteUrl)) {
469  $attribArray['src'] = $this->relBackPath . substr($attribArray['src'], strlen($siteUrl));
470  }
471  $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, 1, 1) . ' />';
472  }
473  }
474  }
475  return implode('', $imgSplit);
476  }
477 
486  public function TS_images_rte($value)
487  {
488  // Split content by <img> tags and traverse the resulting array for processing:
489  $imgSplit = $this->splitTags('img', $value);
490  if (count($imgSplit) > 1) {
491  $siteUrl = $this->siteUrl();
492  $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
493  foreach ($imgSplit as $k => $v) {
494  // Image found
495  if ($k % 2) {
496  // Get the attributes of the img tag
497  $attribArray = $this->get_tag_attributes_classic($v, 1);
498  $absoluteUrl = trim($attribArray['src']);
499  // Transform the src attribute into an absolute url, if it not already
500  if (strtolower(substr($absoluteUrl, 0, 4)) !== 'http') {
501  $attribArray['src'] = substr($attribArray['src'], strlen($this->relBackPath));
502  // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
503  $attribArray['src'] = preg_replace('#^' . preg_quote($sitePath, '#') . '#', '', $attribArray['src']);
504  $attribArray['src'] = $siteUrl . $attribArray['src'];
505  }
506  // Must have alt attribute
507  if (!isset($attribArray['alt'])) {
508  $attribArray['alt'] = '';
509  }
510  $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, 1, 1) . ' />';
511  }
512  }
513  }
514  // Return processed content:
515  return implode('', $imgSplit);
516  }
517 
526  public function TS_reglinks($value, $direction)
527  {
528  $retVal = '';
529  switch ($direction) {
530  case 'rte':
531  $retVal = $this->TS_AtagToAbs($value, 1);
532  break;
533  case 'db':
534  $siteURL = $this->siteUrl();
535  $blockSplit = $this->splitIntoBlock('A', $value);
536  foreach ($blockSplit as $k => $v) {
537  // Block
538  if ($k % 2) {
539  $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v), 1);
540  // If the url is local, remove url-prefix
541  if ($siteURL && substr($attribArray['href'], 0, strlen($siteURL)) == $siteURL) {
542  $attribArray['href'] = $this->relBackPath . substr($attribArray['href'], strlen($siteURL));
543  }
544  $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
545  $eTag = '</a>';
546  $blockSplit[$k] = $bTag . $this->TS_reglinks($this->removeFirstAndLastTag($blockSplit[$k]), $direction) . $eTag;
547  }
548  }
549  $retVal = implode('', $blockSplit);
550  break;
551  }
552  return $retVal;
553  }
554 
563  public function TS_links_db($value)
564  {
565  $conf = [];
566  // Split content into <a> tag blocks and process:
567  $blockSplit = $this->splitIntoBlock('A', $value);
568  foreach ($blockSplit as $k => $v) {
569  // If an A-tag was found:
570  if ($k % 2) {
571  $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v), 1);
572  $info = $this->urlInfoForLinkTags($attribArray['href']);
573  // Check options:
574  $attribArray_copy = $attribArray;
575  unset($attribArray_copy['href']);
576  unset($attribArray_copy['target']);
577  unset($attribArray_copy['class']);
578  unset($attribArray_copy['title']);
579  unset($attribArray_copy['data-htmlarea-external']);
580  // Unset "rteerror" and "style" attributes if "rteerror" is set!
581  if ($attribArray_copy['rteerror']) {
582  unset($attribArray_copy['style']);
583  unset($attribArray_copy['rteerror']);
584  }
585  // Remove additional parameters
586  if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['removeParams_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['removeParams_PostProc'])) {
587  $parameters = [
588  'conf' => &$conf,
589  'aTagParams' => &$attribArray_copy
590  ];
591  foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['removeParams_PostProc'] as $objRef) {
592  $processor = GeneralUtility::getUserObj($objRef);
593  $attribArray_copy = $processor->removeParams($parameters, $this);
594  }
595  }
596  // Only if href, target, class and tile are the only attributes, we can alter the link!
597  if (empty($attribArray_copy)) {
598  // Quoting class and title attributes if they contain spaces
599  $attribArray['class'] = preg_match('/ /', $attribArray['class']) ? '"' . $attribArray['class'] . '"' : $attribArray['class'];
600  $attribArray['title'] = preg_match('/ /', $attribArray['title']) ? '"' . $attribArray['title'] . '"' : $attribArray['title'];
601  // Creating the TYPO3 pseudo-tag "<LINK>" for the link (includes href/url, target and class attributes):
602  // If data-htmlarea-external attribute is set, keep the href unchanged
603  if ($attribArray['data-htmlarea-external']) {
604  $href = $attribArray['href'];
605  } else {
606  $href = $info['url'] . ($info['query'] ? ',0,' . $info['query'] : '');
607  }
608  $typoLink = GeneralUtility::makeInstance(TypoLinkCodecService::class)->encode(['url' => $href, 'target' => $attribArray['target'], 'class' => trim($attribArray['class'], '"'), 'title' => trim($attribArray['title'], '"'), 'additionalParams' => '']);
609  $bTag = '<link ' . $typoLink . '>';
610  $eTag = '</link>';
611  // Modify parameters
612  if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])) {
613  $parameters = [
614  'conf' => &$conf,
615  'currentBlock' => $v,
616  'url' => $href,
617  'attributes' => $attribArray
618  ];
619  foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'] as $objRef) {
620  $processor = GeneralUtility::getUserObj($objRef);
621  $blockSplit[$k] = $processor->modifyParamsLinksDb($parameters, $this);
622  }
623  } else {
624  $blockSplit[$k] = $bTag . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
625  }
626  } else {
627  // ... otherwise store the link as a-tag.
628  // Unsetting 'rtekeep' attribute if that had been set.
629  unset($attribArray['rtekeep']);
630  if (!$attribArray['data-htmlarea-external']) {
631  $siteURL = $this->siteUrl();
632  // If the url is local, remove url-prefix
633  if ($siteURL && substr($attribArray['href'], 0, strlen($siteURL)) == $siteURL) {
634  $attribArray['href'] = $this->relBackPath . substr($attribArray['href'], strlen($siteURL));
635  }
636  // Check for FAL link-handler keyword
637  list($linkHandlerKeyword, $linkHandlerValue) = explode(':', $attribArray['href'], 2);
638  if ($linkHandlerKeyword === '?file') {
639  try {
640  $fileOrFolderObject = \TYPO3\CMS\Core\Resource\ResourceFactory::getInstance()->retrieveFileOrFolderObject(rawurldecode($linkHandlerValue));
641  if ($fileOrFolderObject instanceof \TYPO3\CMS\Core\Resource\FileInterface || $fileOrFolderObject instanceof \TYPO3\CMS\Core\Resource\Folder) {
642  $attribArray['href'] = $fileOrFolderObject->getPublicUrl();
643  }
644  } catch (\TYPO3\CMS\Core\Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
645  // The indentifier inserted in the RTE is already gone...
646  }
647  }
648  }
649  unset($attribArray['data-htmlarea-external']);
650  $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
651  $eTag = '</a>';
652  $blockSplit[$k] = $bTag . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
653  }
654  }
655  }
656  return implode('', $blockSplit);
657  }
658 
667  public function TS_links_rte($value)
668  {
669  $conf = [];
670  $value = $this->TS_AtagToAbs($value);
671  // Split content by the TYPO3 pseudo tag "<link>":
672  $blockSplit = $this->splitIntoBlock('link', $value, 1);
673  $siteUrl = $this->siteUrl();
674  foreach ($blockSplit as $k => $v) {
675  $error = '';
676  $external = false;
677  // Block
678  if ($k % 2) {
679  // split away the first "<link" part
680  $typolink = explode(' ', substr($this->getFirstTag($v), 0, -1), 2)[1];
681  $tagCode = GeneralUtility::makeInstance(TypoLinkCodecService::class)->decode($typolink);
682 
683  $link_param = $tagCode['url'];
684  // Parsing the typolink data. This parsing is roughly done like in \TYPO3\CMS\Frontend\ContentObject->typoLink()
685  // Parse URL:
686  $pU = parse_url($link_param);
687  if (strstr($link_param, '@') && (!$pU['scheme'] || $pU['scheme'] == 'mailto')) {
688  // mailadr
689  $href = 'mailto:' . preg_replace('/^mailto:/i', '', $link_param);
690  } elseif ($link_param[0] === '#') {
691  // check if anchor
692  $href = $siteUrl . $link_param;
693  } else {
694  // Check for FAL link-handler keyword:
695  list($linkHandlerKeyword, $linkHandlerValue) = explode(':', trim($link_param), 2);
696  if ($linkHandlerKeyword === 'file' && !StringUtility::beginsWith($link_param, 'file://')) {
697  $href = $siteUrl . '?' . $linkHandlerKeyword . ':' . rawurlencode($linkHandlerValue);
698  } else {
699  $fileChar = (int)strpos($link_param, '/');
700  $urlChar = (int)strpos($link_param, '.');
701  // Detects if a file is found in site-root.
702  list($rootFileDat) = explode('?', $link_param);
703  $rFD_fI = pathinfo($rootFileDat);
704  $fileExtension = strtolower($rFD_fI['extension']);
705  if (strpos($link_param, '/') === false && trim($rootFileDat) && (@is_file(PATH_site . $rootFileDat) || $fileExtension === 'php' || $fileExtension === 'html' || $fileExtension === 'htm')) {
706  $href = $siteUrl . $link_param;
707  } elseif (
708  (
709  $pU['scheme']
710  && !isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['tslib/class.tslib_content.php']['typolinkLinkHandler'][$pU['scheme']])
711  )
712  || $urlChar && (!$fileChar || $urlChar < $fileChar)
713  ) {
714  // url (external): if has scheme or if a '.' comes before a '/'.
715  $href = $link_param;
716  if (!$pU['scheme']) {
717  $href = 'http://' . $href;
718  }
719  $external = true;
720  } elseif ($fileChar) {
721  // It is an internal file or folder
722  // Try to transform the href into a FAL reference
723  try {
724  $fileOrFolderObject = \TYPO3\CMS\Core\Resource\ResourceFactory::getInstance()->retrieveFileOrFolderObject($link_param);
725  } catch (\TYPO3\CMS\Core\Resource\Exception $exception) {
726  // Nothing to be done if file/folder not found or path invalid
727  $fileOrFolderObject = null;
728  }
729  if ($fileOrFolderObject instanceof \TYPO3\CMS\Core\Resource\Folder) {
730  // It's a folder
731  $folderIdentifier = $fileOrFolderObject->getIdentifier();
732  $href = $siteUrl . '?file:' . rawurlencode($folderIdentifier);
733  } elseif ($fileOrFolderObject instanceof \TYPO3\CMS\Core\Resource\FileInterface) {
734  // It's a file
735  $fileIdentifier = $fileOrFolderObject->getIdentifier();
736  $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
737  $href = $siteUrl . '?file:' . $fileObject->getUid();
738  } else {
739  $href = $siteUrl . $link_param;
740  }
741  } else {
742  // integer or alias (alias is without slashes or periods or commas, that is 'nospace,alphanum_x,lower,unique' according to tables.php!!)
743  // Splitting the parameter by ',' and if the array counts more than 1 element it's an id/type/parameters triplet
744  $pairParts = GeneralUtility::trimExplode(',', $link_param, true);
745  $idPart = $pairParts[0];
746  $link_params_parts = explode('#', $idPart);
747  $idPart = trim($link_params_parts[0]);
748  $sectionMark = trim($link_params_parts[1]);
749  if ((string)$idPart === '') {
750  $idPart = $this->recPid;
751  }
752  // If no id or alias is given, set it to class record pid
753  // Checking if the id-parameter is an alias.
754  if (!\TYPO3\CMS\Core\Utility\MathUtility::canBeInterpretedAsInteger($idPart)) {
755  list($idPartR) = BackendUtility::getRecordsByField('pages', 'alias', $idPart);
756  $idPart = (int)$idPartR['uid'];
757  }
758  $page = BackendUtility::getRecord('pages', $idPart);
759  if (is_array($page)) {
760  // Page must exist...
761  $href = $siteUrl . '?id=' . $idPart . ($pairParts[2] ? $pairParts[2] : '') . ($sectionMark ? '#' . $sectionMark : '');
762  } elseif (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['tslib/class.tslib_content.php']['typolinkLinkHandler'][array_shift(explode(':', $link_param))])) {
763  $href = $link_param;
764  } else {
765  $href = $siteUrl . '?id=' . $link_param;
766  $error = 'No page found: ' . $idPart;
767  }
768  }
769  }
770  }
771  // Setting the A-tag:
772  $bTag = '<a href="' . htmlspecialchars($href) . '"'
773  . ($tagCode['target'] ? ' target="' . htmlspecialchars($tagCode['target']) . '"' : '')
774  . ($tagCode['class'] ? ' class="' . htmlspecialchars($tagCode['class']) . '"' : '')
775  . ($tagCode['title'] ? ' title="' . htmlspecialchars($tagCode['title']) . '"' : '')
776  . ($external ? ' data-htmlarea-external="1"' : '')
777  . ($error ? ' rteerror="' . htmlspecialchars($error) . '" style="background-color: yellow; border:2px red solid; color: black;"' : '') . '>';
778  $eTag = '</a>';
779  // Modify parameters
780  if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'])) {
781  $parameters = [
782  'conf' => &$conf,
783  'currentBlock' => $v,
784  'url' => $href,
785  'tagCode' => $tagCode,
786  'external' => $external,
787  'error' => $error
788  ];
789  foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] as $objRef) {
790  $processor = GeneralUtility::getUserObj($objRef);
791  $blockSplit[$k] = $processor->modifyParamsLinksRte($parameters, $this);
792  }
793  } else {
794  $blockSplit[$k] = $bTag . $this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
795  }
796  }
797  }
798  // Return content:
799  return implode('', $blockSplit);
800  }
801 
808  public function TS_preserve_db($value)
809  {
810  if (!$this->preserveTags) {
811  return $value;
812  }
813  // Splitting into blocks for processing (span-tags are used for special tags)
814  $blockSplit = $this->splitIntoBlock('span', $value);
815  foreach ($blockSplit as $k => $v) {
816  // Block
817  if ($k % 2) {
818  $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v));
819  if ($attribArray['specialtag']) {
820  $theTag = rawurldecode($attribArray['specialtag']);
821  $theTagName = $this->getFirstTagName($theTag);
822  $blockSplit[$k] = $theTag . $this->removeFirstAndLastTag($blockSplit[$k]) . '</' . $theTagName . '>';
823  }
824  }
825  }
826  return implode('', $blockSplit);
827  }
828 
835  public function TS_preserve_rte($value)
836  {
837  if (!$this->preserveTags) {
838  return $value;
839  }
840  $blockSplit = $this->splitIntoBlock($this->preserveTags, $value);
841  foreach ($blockSplit as $k => $v) {
842  // Block
843  if ($k % 2) {
844  $blockSplit[$k] = '<span specialtag="' . rawurlencode($this->getFirstTag($v)) . '">' . $this->removeFirstAndLastTag($blockSplit[$k]) . '</span>';
845  }
846  }
847  return implode('', $blockSplit);
848  }
849 
859  public function TS_transform_db($value, $css = false)
860  {
861  // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
862  $this->TS_transform_db_safecounter--;
863  if ($this->TS_transform_db_safecounter < 0) {
864  return $value;
865  }
866  // Split the content from RTE by the occurrence of these blocks:
867  $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,' . ($this->procOptions['preserveDIVSections'] ? 'DIV,' : '') . $this->blockElementList, $value);
868  $cc = 0;
869  $aC = count($blockSplit);
870  // Avoid superfluous linebreaks by transform_db after ending headListTag
871  while ($aC && trim($blockSplit[$aC - 1]) === '') {
872  unset($blockSplit[$aC - 1]);
873  $aC = count($blockSplit);
874  }
875  // Traverse the blocks
876  foreach ($blockSplit as $k => $v) {
877  $cc++;
878  $lastBR = $cc == $aC ? '' : LF;
879  if ($k % 2) {
880  // Inside block:
881  // Init:
882  $tag = $this->getFirstTag($v);
883  $tagName = strtolower($this->getFirstTagName($v));
884  // Process based on the tag:
885  switch ($tagName) {
886  case 'blockquote':
887 
888  case 'dd':
889 
890  case 'div':
891 
892  case 'header':
893 
894  case 'section':
895 
896  case 'footer':
897 
898  case 'nav':
899 
900  case 'article':
901 
902  case 'aside':
903  $blockSplit[$k] = $tag . $this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k]), $css) . '</' . $tagName . '>' . $lastBR;
904  break;
905  case 'ol':
906 
907  case 'ul':
908  if ($css) {
909  $blockSplit[$k] = preg_replace(('/[' . LF . CR . ']+/'), ' ', $this->transformStyledATags($blockSplit[$k])) . $lastBR;
910  }
911  break;
912  case 'table':
913  // Tables are NOT allowed in any form (unless preserveTables is set or CSS is the mode)
914  if (!$this->procOptions['preserveTables'] && !$css) {
915  $blockSplit[$k] = $this->TS_transform_db($this->removeTables($blockSplit[$k]));
916  } else {
917  $blockSplit[$k] = preg_replace(('/[' . LF . CR . ']+/'), ' ', $this->transformStyledATags($blockSplit[$k])) . $lastBR;
918  }
919  break;
920  case 'h1':
921 
922  case 'h2':
923 
924  case 'h3':
925 
926  case 'h4':
927 
928  case 'h5':
929 
930  case 'h6':
931  if (!$css) {
932  $attribArray = $this->get_tag_attributes_classic($tag);
933  // Processing inner content here:
934  $innerContent = $this->HTMLcleaner_db($this->removeFirstAndLastTag($blockSplit[$k]));
935  $blockSplit[$k] = '<' . $tagName . ($attribArray['align'] ? ' align="' . htmlspecialchars($attribArray['align']) . '"' : '') . ($attribArray['class'] ? ' class="' . htmlspecialchars($attribArray['class']) . '"' : '') . '>' . $innerContent . '</' . $tagName . '>' . $lastBR;
936  } else {
937  // Eliminate true linebreaks inside Hx tags
938  $blockSplit[$k] = preg_replace(('/[' . LF . CR . ']+/'), ' ', $this->transformStyledATags($blockSplit[$k])) . $lastBR;
939  }
940  break;
941  case 'pre':
942  break;
943  default:
944  // Eliminate true linebreaks inside other headlist tags
945  $blockSplit[$k] = preg_replace(('/[' . LF . CR . ']+/'), ' ', $this->transformStyledATags($blockSplit[$k])) . $lastBR;
946  }
947  } else {
948  // NON-block:
949  if (trim($blockSplit[$k]) !== '') {
950  $blockSplit[$k] = preg_replace('/<hr\\/>/', '<hr />', $blockSplit[$k]);
951  // Remove linebreaks preceding hr tags
952  $blockSplit[$k] = preg_replace('/[' . LF . CR . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $blockSplit[$k]);
953  // Remove linebreaks following hr tags
954  $blockSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . CR . ']+/', '<$1$2/>', $blockSplit[$k]);
955  // Replace other linebreaks with space
956  $blockSplit[$k] = preg_replace('/[' . LF . CR . ']+/', ' ', $blockSplit[$k]);
957  $blockSplit[$k] = $this->divideIntoLines($blockSplit[$k]) . $lastBR;
958  $blockSplit[$k] = $this->transformStyledATags($blockSplit[$k]);
959  } else {
960  unset($blockSplit[$k]);
961  }
962  }
963  }
964  $this->TS_transform_db_safecounter++;
965  return implode('', $blockSplit);
966  }
967 
974  public function transformStyledATags($value)
975  {
976  $blockSplit = $this->splitIntoBlock('A', $value);
977  foreach ($blockSplit as $k => $v) {
978  // If an A-tag was found
979  if ($k % 2) {
980  $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v), 1);
981  // If "style" attribute is set and rteerror is not set!
982  if ($attribArray['style'] && !$attribArray['rteerror']) {
983  $attribArray_copy['style'] = $attribArray['style'];
984  unset($attribArray['style']);
985  $bTag = '<span ' . GeneralUtility::implodeAttributes($attribArray_copy, 1) . '><a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
986  $eTag = '</a></span>';
987  $blockSplit[$k] = $bTag . $this->removeFirstAndLastTag($blockSplit[$k]) . $eTag;
988  }
989  }
990  }
991  return implode('', $blockSplit);
992  }
993 
1003  public function TS_transform_rte($value, $css = 0)
1004  {
1005  // Split the content from database by the occurrence of the block elements
1006  $blockElementList = 'TABLE,BLOCKQUOTE,' . ($this->procOptions['preserveDIVSections'] ? 'DIV,' : '') . $this->blockElementList;
1007  $blockSplit = $this->splitIntoBlock($blockElementList, $value);
1008  // Traverse the blocks
1009  foreach ($blockSplit as $k => $v) {
1010  if ($k % 2) {
1011  // Inside one of the blocks:
1012  // Init:
1013  $tag = $this->getFirstTag($v);
1014  $tagName = strtolower($this->getFirstTagName($v));
1015  $attribArray = $this->get_tag_attributes_classic($tag);
1016  // Based on tagname, we do transformations:
1017  switch ($tagName) {
1018  case 'blockquote':
1019 
1020  case 'dd':
1021 
1022  case 'div':
1023 
1024  case 'header':
1025 
1026  case 'section':
1027 
1028  case 'footer':
1029 
1030  case 'nav':
1031 
1032  case 'article':
1033 
1034  case 'aside':
1035  $blockSplit[$k] = $tag . $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k]), $css) . '</' . $tagName . '>';
1036  break;
1037  }
1038  $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]);
1039  } else {
1040  // NON-block:
1041  $nextFTN = $this->getFirstTagName($blockSplit[$k + 1]);
1042  $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1);
1043  // If the line is followed by a block or is the last line:
1044  if (GeneralUtility::inList($blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) {
1045  // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1
1046  if (!$onlyLineBreaks) {
1047  $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]);
1048  } else {
1049  // If the line contains only linebreaks, remove the leading linebreak
1050  $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]);
1051  }
1052  }
1053  // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks
1054  if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) {
1055  unset($blockSplit[$k]);
1056  } else {
1057  $blockSplit[$k] = $this->setDivTags($blockSplit[$k], $this->procOptions['useDIVasParagraphTagForRTE'] ? 'div' : 'p');
1058  }
1059  }
1060  }
1061  return implode(LF, $blockSplit);
1062  }
1063 
1071  public function TS_strip_db($value)
1072  {
1073  $value = strip_tags($value, '<' . implode('><', explode(',', 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote')) . '>');
1074  return $value;
1075  }
1076 
1077  /***************************************************************
1078  *
1079  * Generic RTE transformation, analysis and helper functions
1080  *
1081  **************************************************************/
1089  public function getUrl($url)
1090  {
1091  return GeneralUtility::getUrl($url);
1092  }
1093 
1104  public function HTMLcleaner_db($content, $tagList = '')
1105  {
1106  if (!$tagList) {
1107  $keepTags = $this->getKeepTags('db');
1108  } else {
1109  $keepTags = $this->getKeepTags('db', $tagList);
1110  }
1111  // Default: remove unknown tags.
1112  $kUknown = $this->procOptions['dontRemoveUnknownTags_db'] ? 1 : 0;
1113  // Default: re-convert literals to characters (that is &lt; to <)
1114  $hSC = $this->procOptions['dontUndoHSC_db'] ? 0 : -1;
1115  // Create additional configuration in order to honor the setting RTE.default.proc.HTMLparser_db.xhtml_cleaning=1
1116  $addConfig = [];
1117  if (is_array($this->procOptions['HTMLparser_db.']) && $this->procOptions['HTMLparser_db.']['xhtml_cleaning'] || is_array($this->procOptions['entryHTMLparser_db.']) && $this->procOptions['entryHTMLparser_db.']['xhtml_cleaning'] || is_array($this->procOptions['exitHTMLparser_db.']) && $this->procOptions['exitHTMLparser_db.']['xhtml_cleaning']) {
1118  $addConfig['xhtml'] = 1;
1119  }
1120  return $this->HTMLcleaner($content, $keepTags, $kUknown, $hSC, $addConfig);
1121  }
1122 
1132  public function getKeepTags($direction = 'rte', $tagList = '')
1133  {
1134  if (!is_array($this->getKeepTags_cache[$direction]) || $tagList) {
1135  // Setting up allowed tags:
1136  // If the $tagList input var is set, this will take precedence
1137  if ((string)$tagList !== '') {
1138  $keepTags = array_flip(GeneralUtility::trimExplode(',', $tagList, true));
1139  } else {
1140  // Default is to get allowed/denied tags from internal array of processing options:
1141  // Construct default list of tags to keep:
1142  $typoScript_list = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
1143  $keepTags = array_flip(GeneralUtility::trimExplode(',', $typoScript_list . ',' . strtolower($this->procOptions['allowTags']), true));
1144  // For tags to deny, remove them from $keepTags array:
1145  $denyTags = GeneralUtility::trimExplode(',', $this->procOptions['denyTags'], true);
1146  foreach ($denyTags as $dKe) {
1147  unset($keepTags[$dKe]);
1148  }
1149  }
1150  // Based on the direction of content, set further options:
1151  switch ($direction) {
1152  case 'rte':
1153  if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
1154  // Transform bold/italics tags to strong/em
1155  if (isset($keepTags['b'])) {
1156  $keepTags['b'] = ['remap' => 'STRONG'];
1157  }
1158  if (isset($keepTags['i'])) {
1159  $keepTags['i'] = ['remap' => 'EM'];
1160  }
1161  }
1162  // Transforming keepTags array so it can be understood by the HTMLcleaner function. This basically converts the format of the array from TypoScript (having .'s) to plain multi-dimensional array.
1163  list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'], $keepTags);
1164  break;
1165  case 'db':
1166  if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
1167  // Transform strong/em back to bold/italics:
1168  if (isset($keepTags['strong'])) {
1169  $keepTags['strong'] = ['remap' => 'b'];
1170  }
1171  if (isset($keepTags['em'])) {
1172  $keepTags['em'] = ['remap' => 'i'];
1173  }
1174  }
1175  // Setting up span tags if they are allowed:
1176  if (isset($keepTags['span'])) {
1177  $classes = array_merge([''], $this->allowedClasses);
1178  $keepTags['span'] = [
1179  'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop',
1180  'fixAttrib' => [
1181  'class' => [
1182  'list' => $classes,
1183  'removeIfFalse' => 1
1184  ]
1185  ],
1186  'rmTagIfNoAttrib' => 1
1187  ];
1188  if (!$this->procOptions['allowedClasses']) {
1189  unset($keepTags['span']['fixAttrib']['class']['list']);
1190  }
1191  }
1192  // Setting up font tags if they are allowed:
1193  if (isset($keepTags['font'])) {
1194  $colors = array_merge([''], GeneralUtility::trimExplode(',', $this->procOptions['allowedFontColors'], true));
1195  $keepTags['font'] = [
1196  'allowedAttribs' => 'face,color,size',
1197  'fixAttrib' => [
1198  'face' => [
1199  'removeIfFalse' => 1
1200  ],
1201  'color' => [
1202  'removeIfFalse' => 1,
1203  'list' => $colors
1204  ],
1205  'size' => [
1206  'removeIfFalse' => 1
1207  ]
1208  ],
1209  'rmTagIfNoAttrib' => 1
1210  ];
1211  if (!$this->procOptions['allowedFontColors']) {
1212  unset($keepTags['font']['fixAttrib']['color']['list']);
1213  }
1214  }
1215  // Setting further options, getting them from the processiong options:
1216  $TSc = $this->procOptions['HTMLparser_db.'];
1217  if (!$TSc['globalNesting']) {
1218  $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';
1219  }
1220  if (!$TSc['noAttrib']) {
1221  $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
1222  }
1223  // Transforming the array from TypoScript to regular array:
1224  list($keepTags) = $this->HTMLparserConfig($TSc, $keepTags);
1225  break;
1226  }
1227  // Caching (internally, in object memory) the result unless tagList is set:
1228  if (!$tagList) {
1229  $this->getKeepTags_cache[$direction] = $keepTags;
1230  } else {
1231  return $keepTags;
1232  }
1233  }
1234  // Return result:
1235  return $this->getKeepTags_cache[$direction];
1236  }
1237 
1250  public function divideIntoLines($value, $count = 5, $returnArray = false)
1251  {
1252  // Setting configuration for processing:
1253  $allowTagsOutside = GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside'] ? 'hr,' . $this->procOptions['allowTagsOutside'] : 'hr,img'), true);
1254  $remapParagraphTag = strtoupper($this->procOptions['remapParagraphTag']);
1255  $divSplit = $this->splitIntoBlock('div,p', $value, 1);
1256  // Setting the third param to 1 will eliminate false end-tags. Maybe this is a good thing to do...?
1257  if ($this->procOptions['keepPDIVattribs']) {
1258  $keepAttribListArr = GeneralUtility::trimExplode(',', strtolower($this->procOptions['keepPDIVattribs']), true);
1259  } else {
1260  $keepAttribListArr = [];
1261  }
1262  // Returns plainly the value if there was no div/p sections in it
1263  if (count($divSplit) <= 1 || $count <= 0) {
1264  // Wrap hr tags with LF's
1265  $newValue = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $value);
1266  $newValue = preg_replace('/' . LF . LF . '/i', LF, $newValue);
1267  $newValue = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $newValue);
1268  return $newValue;
1269  }
1270  // Traverse the splitted sections:
1271  foreach ($divSplit as $k => $v) {
1272  if ($k % 2) {
1273  // Inside
1274  $v = $this->removeFirstAndLastTag($v);
1275  // Fetching 'sub-lines' - which will explode any further p/div nesting...
1276  $subLines = $this->divideIntoLines($v, $count - 1, 1);
1277  // So, if there happend to be sub-nesting of p/div, this is written directly as the new content of THIS section. (This would be considered 'an error')
1278  if (is_array($subLines)) {
1279  } else {
1280  //... but if NO subsection was found, we process it as a TRUE line without erronous content:
1281  $subLines = [$subLines];
1282  // process break-tags, if configured for. Simply, the breaktags will here be treated like if each was a line of content...
1283  if (!$this->procOptions['dontConvBRtoParagraph']) {
1284  $subLines = preg_split('/<br[[:space:]]*[\\/]?>/i', $v);
1285  }
1286  // Traverse sublines (there is typically one, except if <br/> has been converted to lines as well!)
1287  foreach ($subLines as $sk => $value) {
1288  // Clear up the subline for DB.
1289  $subLines[$sk] = $this->HTMLcleaner_db($subLines[$sk]);
1290  // Get first tag, attributes etc:
1291  $fTag = $this->getFirstTag($divSplit[$k]);
1292  $tagName = strtolower($this->getFirstTagName($divSplit[$k]));
1293  $attribs = $this->get_tag_attributes($fTag);
1294  // Keep attributes (lowercase)
1295  $newAttribs = [];
1296  if (!empty($keepAttribListArr)) {
1297  foreach ($keepAttribListArr as $keepA) {
1298  if (isset($attribs[0][$keepA])) {
1299  $newAttribs[$keepA] = $attribs[0][$keepA];
1300  }
1301  }
1302  }
1303  // ALIGN attribute:
1304  if (!$this->procOptions['skipAlign'] && trim($attribs[0]['align']) !== '' && strtolower($attribs[0]['align']) != 'left') {
1305  // Set to value, but not 'left'
1306  $newAttribs['align'] = strtolower($attribs[0]['align']);
1307  }
1308  // CLASS attribute:
1309  // Set to whatever value
1310  if (!$this->procOptions['skipClass'] && trim($attribs[0]['class']) !== '') {
1311  if (empty($this->allowedClasses) || in_array($attribs[0]['class'], $this->allowedClasses)) {
1312  $newAttribs['class'] = $attribs[0]['class'];
1313  } else {
1314  $classes = GeneralUtility::trimExplode(' ', $attribs[0]['class'], true);
1315  $newClasses = [];
1316  foreach ($classes as $class) {
1317  if (in_array($class, $this->allowedClasses)) {
1318  $newClasses[] = $class;
1319  }
1320  }
1321  if (!empty($newClasses)) {
1322  $newAttribs['class'] = implode(' ', $newClasses);
1323  }
1324  }
1325  }
1326  // Remove any line break char (10 or 13)
1327  $subLines[$sk] = preg_replace('/' . LF . '|' . CR . '/', '', $subLines[$sk]);
1328  // If there are any attributes or if we are supposed to remap the tag, then do so:
1329  if (!empty($newAttribs) && $remapParagraphTag !== '1') {
1330  if ($remapParagraphTag === 'P') {
1331  $tagName = 'p';
1332  }
1333  if ($remapParagraphTag === 'DIV') {
1334  $tagName = 'div';
1335  }
1336  $subLines[$sk] = '<' . trim($tagName . ' ' . $this->compileTagAttribs($newAttribs)) . '>' . $subLines[$sk] . '</' . $tagName . '>';
1337  }
1338  }
1339  }
1340  // Add the processed line(s)
1341  $divSplit[$k] = implode(LF, $subLines);
1342  // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
1343  // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
1344  // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
1345  if (trim(strip_tags($divSplit[$k])) == '&nbsp;' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $divSplit[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($divSplit[$k]))) {
1346  $divSplit[$k] = '';
1347  }
1348  } else {
1349  // outside div:
1350  // Remove positions which are outside div/p tags and without content
1351  $divSplit[$k] = trim(strip_tags($divSplit[$k], '<' . implode('><', $allowTagsOutside) . '>'));
1352  // Wrap hr tags with LF's
1353  $divSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $divSplit[$k]);
1354  $divSplit[$k] = preg_replace('/' . LF . LF . '/i', LF, $divSplit[$k]);
1355  $divSplit[$k] = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $divSplit[$k]);
1356  if ((string)$divSplit[$k] === '') {
1357  unset($divSplit[$k]);
1358  }
1359  }
1360  }
1361  // Return value:
1362  return $returnArray ? $divSplit : implode(LF, $divSplit);
1363  }
1364 
1374  public function setDivTags($value, $dT = 'p')
1375  {
1376  // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
1377  $keepTags = $this->getKeepTags('rte');
1378  // Default: remove unknown tags.
1379  $kUknown = $this->procOptions['dontProtectUnknownTags_rte'] ? 0 : 'protect';
1380  // Default: re-convert literals to characters (that is &lt; to <)
1381  $hSC = $this->procOptions['dontHSC_rte'] ? 0 : 1;
1382  $convNBSP = !$this->procOptions['dontConvAmpInNBSP_rte'] ? 1 : 0;
1383  // Divide the content into lines, based on LF:
1384  $parts = explode(LF, $value);
1385  foreach ($parts as $k => $v) {
1386  // Processing of line content:
1387  // If the line is blank, set it to &nbsp;
1388  if (trim($parts[$k]) === '') {
1389  $parts[$k] = '&nbsp;';
1390  } else {
1391  // Clean the line content:
1392  $parts[$k] = $this->HTMLcleaner($parts[$k], $keepTags, $kUknown, $hSC);
1393  if ($convNBSP) {
1394  $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);
1395  }
1396  }
1397  // Wrapping the line in <$dT> if not already wrapped and does not contain an hr tag
1398  if (!preg_match('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', $parts[$k])) {
1399  $testStr = strtolower(trim($parts[$k]));
1400  if (substr($testStr, 0, 4) != '<div' || substr($testStr, -6) != '</div>') {
1401  if (substr($testStr, 0, 2) != '<p' || substr($testStr, -4) != '</p>') {
1402  // Only set p-tags if there is not already div or p tags:
1403  $parts[$k] = '<' . $dT . '>' . $parts[$k] . '</' . $dT . '>';
1404  }
1405  }
1406  }
1407  }
1408  // Implode result:
1409  return implode(LF, $parts);
1410  }
1411 
1418  public function siteUrl()
1419  {
1420  return GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
1421  }
1422 
1431  public function removeTables($value, $breakChar = '<br />')
1432  {
1433  // Splitting value into table blocks:
1434  $tableSplit = $this->splitIntoBlock('table', $value);
1435  // Traverse blocks of tables:
1436  foreach ($tableSplit as $k => $v) {
1437  if ($k % 2) {
1438  $tableSplit[$k] = '';
1439  $rowSplit = $this->splitIntoBlock('tr', $v);
1440  foreach ($rowSplit as $k2 => $v2) {
1441  if ($k2 % 2) {
1442  $cellSplit = $this->getAllParts($this->splitIntoBlock('td', $v2), 1, 0);
1443  foreach ($cellSplit as $k3 => $v3) {
1444  $tableSplit[$k] .= $v3 . $breakChar;
1445  }
1446  }
1447  }
1448  }
1449  }
1450  // Implode it all again:
1451  return implode($breakChar, $tableSplit);
1452  }
1453 
1461  public function defaultTStagMapping($code, $direction = 'rte')
1462  {
1463  if ($direction == 'db') {
1464  $code = $this->mapTags($code, [
1465  // Map tags
1466  'strong' => 'b',
1467  'em' => 'i'
1468  ]);
1469  }
1470  if ($direction == 'rte') {
1471  $code = $this->mapTags($code, [
1472  // Map tags
1473  'b' => 'strong',
1474  'i' => 'em'
1475  ]);
1476  }
1477  return $code;
1478  }
1479 
1487  public function getWHFromAttribs($attribArray)
1488  {
1489  $style = trim($attribArray['style']);
1490  if ($style) {
1491  $regex = '[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1492  // Width
1493  $reg = [];
1494  preg_match('/width' . $regex . '/i', $style, $reg);
1495  $w = (int)$reg[1];
1496  // Height
1497  preg_match('/height' . $regex . '/i', $style, $reg);
1498  $h = (int)$reg[1];
1499  }
1500  if (!$w) {
1501  $w = $attribArray['width'];
1502  }
1503  if (!$h) {
1504  $h = $attribArray['height'];
1505  }
1506  return [(int)$w, (int)$h];
1507  }
1508 
1515  public function urlInfoForLinkTags($url)
1516  {
1517  $info = [];
1518  $url = trim($url);
1519  if (substr(strtolower($url), 0, 7) == 'mailto:') {
1520  $info['url'] = trim(substr($url, 7));
1521  $info['type'] = 'email';
1522  } elseif (strpos($url, '?file:') !== false) {
1523  $info['type'] = 'file';
1524  $info['url'] = rawurldecode(substr($url, strpos($url, '?file:') + 1));
1525  } else {
1526  $curURL = $this->siteUrl();
1527  $urlLength = strlen($url);
1528  for ($a = 0; $a < $urlLength; $a++) {
1529  if ($url[$a] != $curURL[$a]) {
1530  break;
1531  }
1532  }
1533  $info['relScriptPath'] = substr($curURL, $a);
1534  $info['relUrl'] = substr($url, $a);
1535  $info['url'] = $url;
1536  $info['type'] = 'ext';
1537  $siteUrl_parts = parse_url($url);
1538  $curUrl_parts = parse_url($curURL);
1539  // Hosts should match
1540  if ($siteUrl_parts['host'] == $curUrl_parts['host'] && (!$info['relScriptPath'] || defined('TYPO3_mainDir') && substr($info['relScriptPath'], 0, strlen(TYPO3_mainDir)) == TYPO3_mainDir)) {
1541  // If the script path seems to match or is empty (FE-EDIT)
1542  // New processing order 100502
1543  $uP = parse_url($info['relUrl']);
1544  if ($info['relUrl'] === '#' . $siteUrl_parts['fragment']) {
1545  $info['url'] = $info['relUrl'];
1546  $info['type'] = 'anchor';
1547  } elseif (!trim($uP['path']) || $uP['path'] === 'index.php') {
1548  // URL is a page (id parameter)
1549  $pp = preg_split('/^id=/', $uP['query']);
1550  $pp[1] = preg_replace('/&id=[^&]*/', '', $pp[1]);
1551  $parameters = explode('&', $pp[1]);
1552  $id = array_shift($parameters);
1553  if ($id) {
1554  $info['pageid'] = $id;
1555  $info['cElement'] = $uP['fragment'];
1556  $info['url'] = $id . ($info['cElement'] ? '#' . $info['cElement'] : '');
1557  $info['type'] = 'page';
1558  $info['query'] = $parameters[0] ? '&' . implode('&', $parameters) : '';
1559  }
1560  } else {
1561  $info['url'] = $info['relUrl'];
1562  $info['type'] = 'file';
1563  }
1564  } else {
1565  unset($info['relScriptPath']);
1566  unset($info['relUrl']);
1567  }
1568  }
1569  return $info;
1570  }
1571 
1579  public function TS_AtagToAbs($value, $dontSetRTEKEEP = false)
1580  {
1581  $blockSplit = $this->splitIntoBlock('A', $value);
1582  foreach ($blockSplit as $k => $v) {
1583  // Block
1584  if ($k % 2) {
1585  $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v), 1);
1586  // Checking if there is a scheme, and if not, prepend the current url.
1587  // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1588  if ($attribArray['href'] !== '') {
1589  $uP = parse_url(strtolower($attribArray['href']));
1590  if (!$uP['scheme']) {
1591  $attribArray['href'] = $this->siteUrl() . substr($attribArray['href'], strlen($this->relBackPath));
1592  } elseif ($uP['scheme'] != 'mailto') {
1593  $attribArray['data-htmlarea-external'] = 1;
1594  }
1595  } else {
1596  $attribArray['rtekeep'] = 1;
1597  }
1598  if (!$dontSetRTEKEEP) {
1599  $attribArray['rtekeep'] = 1;
1600  }
1601  $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
1602  $eTag = '</a>';
1603  $blockSplit[$k] = $bTag . $this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
1604  }
1605  }
1606  return implode('', $blockSplit);
1607  }
1608 
1617  protected function applyPlainImageModeSettings($imageInfo, $attribArray)
1618  {
1619  if ($this->procOptions['plainImageMode']) {
1620  // Perform corrections to aspect ratio based on configuration
1621  switch ((string)$this->procOptions['plainImageMode']) {
1622  case 'lockDimensions':
1623  $attribArray['width'] = $imageInfo[0];
1624  $attribArray['height'] = $imageInfo[1];
1625  break;
1626  case 'lockRatioWhenSmaller':
1627  if ($attribArray['width'] > $imageInfo[0]) {
1628  $attribArray['width'] = $imageInfo[0];
1629  }
1630  case 'lockRatio':
1631  if ($imageInfo[0] > 0) {
1632  $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1633  }
1634  break;
1635  }
1636  }
1637  return $attribArray;
1638  }
1639 
1643  protected function getLogger()
1644  {
1646  $logManager = GeneralUtility::makeInstance(\TYPO3\CMS\Core\Log\LogManager::class);
1647 
1648  return $logManager->getLogger(get_class($this));
1649  }
1650 }
RTE_transform($value, $specConf, $direction='rte', $thisConfig=[])
compileTagAttribs($tagAttrib, $meta=[], $xhtmlClean=0)
static implodeAttributes(array $arr, $xhtmlSafe=false, $dontOmitBlankAttribs=false)
HTMLcleaner_db($content, $tagList='')
static isFirstPartOfStr($str, $partStr)
removeTables($value, $breakChar='< br/>')
static getRecordsByField($theTable, $theField, $theValue, $whereClause='', $groupBy='', $orderBy='', $limit='', $useDeleteClause=true)
applyPlainImageModeSettings($imageInfo, $attribArray)
TS_AtagToAbs($value, $dontSetRTEKEEP=false)
getAllParts($parts, $tag_parts=true, $include_tag=true)
Definition: HtmlParser.php:335
static trimExplode($delim, $string, $removeEmptyValues=false, $limit=0)
get_tag_attributes($tag, $deHSC=0)
Definition: HtmlParser.php:408
getKeepTags($direction='rte', $tagList='')
defaultTStagMapping($code, $direction='rte')
HTMLparserConfig($TSconfig, $keepTags=[])
TS_transform_db($value, $css=false)
static getUrl($url, $includeHeader=0, $requestHeaders=false, &$report=null)
static getFileAbsFileName($filename, $onlyRelative=true, $relToTYPO3_mainDir=false)
mapTags($value, $tags=[], $ltChar='<', $ltChar2='<')
divideIntoLines($value, $count=5, $returnArray=false)
static beginsWith($haystack, $needle)
static getRecord($table, $uid, $fields=' *', $where='', $useDeleteClause=true)
if(TYPO3_MODE==='BE') $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_tsfebeuserauth.php']['frontendEditingController']['default']
static evalWriteFile($pArr, $currentRecord)
get_tag_attributes_classic($tag, $deHSC=0)
splitIntoBlock($tag, $content, $eliminateExtraEndTags=false)
Definition: HtmlParser.php:191
getFirstTagName($str, $preserveCase=false)
Definition: HtmlParser.php:388