87 $indexerConfig = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(
'indexed_search');
94 $ignoreExtensions = GeneralUtility::trimExplode(
',', strtolower($indexerConfig[
'ignoreExtensions']),
true);
95 if (in_array($extension, $ignoreExtensions)) {
96 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:ignoreExtensions'), $extension), 1);
100 switch ($extension) {
103 if ($indexerConfig[
'pdftools']) {
104 $pdfPath = rtrim($indexerConfig[
'pdftools'],
'/') .
'/';
105 if (@is_file($pdfPath .
'pdftotext' . $exe) && @is_file($pdfPath .
'pdfinfo' . $exe)) {
106 $this->app[
'pdfinfo'] = $pdfPath .
'pdfinfo' . $exe;
107 $this->app[
'pdftotext'] = $pdfPath .
'pdftotext' . $exe;
112 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:pdfToolsNotFound'), $pdfPath), 3);
115 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:pdfToolsDisabled'), 1);
120 if ($indexerConfig[
'catdoc']) {
121 $catdocPath = rtrim($indexerConfig[
'catdoc'],
'/') .
'/';
122 if (@is_file($catdocPath .
'catdoc' . $exe)) {
123 $this->app[
'catdoc'] = $catdocPath .
'catdoc' . $exe;
126 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:catdocNotFound'), $catdocPath), 3);
129 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:catdocDisabled'), 1);
136 if ($indexerConfig[
'ppthtml']) {
137 $ppthtmlPath = rtrim($indexerConfig[
'ppthtml'],
'/') .
'/';
138 if (@is_file($ppthtmlPath .
'ppthtml' . $exe)) {
139 $this->app[
'ppthtml'] = $ppthtmlPath .
'ppthtml' . $exe;
142 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:ppthtmlNotFound'), $ppthtmlPath), 3);
145 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:ppthtmlDisabled'), 1);
151 if ($indexerConfig[
'xlhtml']) {
152 $xlhtmlPath = rtrim($indexerConfig[
'xlhtml'],
'/') .
'/';
153 if (@is_file($xlhtmlPath .
'xlhtml' . $exe)) {
154 $this->app[
'xlhtml'] = $xlhtmlPath .
'xlhtml' . $exe;
157 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:xlhtmlNotFound'), $xlhtmlPath), 3);
160 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:xlhtmlDisabled'), 1);
170 if ($indexerConfig[
'unzip']) {
171 $unzipPath = rtrim($indexerConfig[
'unzip'],
'/') .
'/';
172 if (@is_file($unzipPath .
'unzip' . $exe)) {
173 $this->app[
'unzip'] = $unzipPath .
'unzip' . $exe;
176 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:unzipNotFound'), $unzipPath), 3);
179 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:unzipDisabled'), 1);
189 if ($indexerConfig[
'unzip']) {
190 $unzipPath = rtrim($indexerConfig[
'unzip'],
'/') .
'/';
191 if (@is_file($unzipPath .
'unzip' . $exe)) {
192 $this->app[
'unzip'] = $unzipPath .
'unzip' . $exe;
195 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:unzipNotFound'), $unzipPath), 3);
198 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:unzipDisabled'), 1);
203 if ($indexerConfig[
'unrtf']) {
204 $unrtfPath = rtrim($indexerConfig[
'unrtf'],
'/') .
'/';
205 if (@is_file($unrtfPath .
'unrtf' . $exe)) {
206 $this->app[
'unrtf'] = $unrtfPath .
'unrtf' . $exe;
209 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:unrtfNotFound'), $unrtfPath), 3);
212 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:unrtfDisabled'), 1);
226 $mainExtension =
'html';
233 $mainExtension =
'jpeg';
239 $this->supportedExtensions[$extension] =
true;
240 $this->ext2itemtype_map[$extension] = $mainExtension ?: $extension;
255 switch ($extension) {
298 $indexerConfig = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(
'indexed_search');
300 $ignoreExtensions = GeneralUtility::trimExplode(
',', strtolower($indexerConfig[
'ignoreExtensions']),
true);
301 if (in_array($extension, $ignoreExtensions)) {
305 switch ($extension) {
308 if ($indexerConfig[
'pdftools']) {
309 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.PDF'), $extension);
314 if ($indexerConfig[
'catdoc']) {
315 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.DOC'), $extension);
322 if ($indexerConfig[
'ppthtml']) {
323 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.PP'), $extension);
329 if ($indexerConfig[
'xlhtml']) {
330 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.XLS'), $extension);
336 if ($indexerConfig[
'unzip']) {
337 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.DOC'), $extension);
343 if ($indexerConfig[
'unzip']) {
344 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.PP'), $extension);
349 if ($indexerConfig[
'unzip']) {
350 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.XLS'), $extension);
355 if ($indexerConfig[
'unzip']) {
356 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.SXC'), $extension);
361 if ($indexerConfig[
'unzip']) {
362 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.SXI'), $extension);
367 if ($indexerConfig[
'unzip']) {
368 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.SXW'), $extension);
373 if ($indexerConfig[
'unzip']) {
374 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.ODS'), $extension);
379 if ($indexerConfig[
'unzip']) {
380 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.ODP'), $extension);
385 if ($indexerConfig[
'unzip']) {
386 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.ODT'), $extension);
391 if ($indexerConfig[
'unrtf']) {
392 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.RTF'), $extension);
399 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.images'), $extension);
403 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.HTML'), $extension);
406 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.TXT'), $extension);
409 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.CSV'), $extension);
412 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.XML'), $extension);
428 switch ((
string)$extension) {
441 protected function sL($reference)
443 return $this->langObject->sL($reference);
463 if (!$this->supportedExtensions[$ext]) {
469 if ($this->app[
'pdfinfo']) {
472 $cmd = $this->app[
'pdfinfo'] .
' ' . escapeshellarg($absFile);
476 if ((
int)$pdfInfo[
'pages']) {
477 list($low, $high) = explode(
'-', $cPKey);
479 $tempFileName = GeneralUtility::tempnam(
'Typo3_indexer');
481 @unlink($tempFileName);
483 $cmd = $this->app[
'pdftotext'] .
' -f ' . $low .
' -l ' . $high .
' -enc UTF-8 -q ' . escapeshellarg($absFile) .
' ' . $tempFileName;
485 if (@is_file($tempFileName)) {
486 $content = file_get_contents($tempFileName);
487 unlink($tempFileName);
490 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:pdfToolsFailed'), $absFile), 2);
492 if ((
string)$content !==
'') {
493 $contentArr = $this->pObj->splitRegularContent($this->
removeEndJunk($content));
496 if (!empty($pdfInfo[
'title'])) {
497 $contentArr[
'title'] = $pdfInfo[
'title'];
503 if ($this->app[
'catdoc']) {
505 $cmd = $this->app[
'catdoc'] .
' -d utf-8 ' . escapeshellarg($absFile);
507 $content = implode(LF, $res);
509 $contentArr = $this->pObj->splitRegularContent($this->
removeEndJunk($content));
515 if ($this->app[
'ppthtml']) {
517 $cmd = $this->app[
'ppthtml'] .
' ' . escapeshellarg($absFile);
519 $content = implode(LF, $res);
521 $content = $this->pObj->convertHTMLToUtf8($content);
522 $contentArr = $this->pObj->splitHTMLContent($this->
removeEndJunk($content));
528 if ($this->app[
'xlhtml']) {
530 $cmd = $this->app[
'xlhtml'] .
' -nc -te ' . escapeshellarg($absFile);
532 $content = implode(LF, $res);
534 $content = $this->pObj->convertHTMLToUtf8($content);
535 $contentArr = $this->pObj->splitHTMLContent($this->
removeEndJunk($content));
547 if ($this->app[
'unzip']) {
553 $cmd = $this->app[
'unzip'] .
' -p ' . escapeshellarg($absFile) .
' word/document.xml';
559 $cmd = $this->app[
'unzip'] .
' -p ' . escapeshellarg($absFile) .
' ppt/slides/slide1.xml';
564 $cmd = $this->app[
'unzip'] .
' -p ' . escapeshellarg($absFile) .
' xl/worksheets/sheet1.xml';
568 $content_xml = implode(LF, $res);
570 $utf8_content = trim(strip_tags(str_replace(
'<',
' <', $content_xml)));
571 $contentArr = $this->pObj->splitRegularContent($utf8_content);
575 $cmd = $this->app[
'unzip'] .
' -p ' . escapeshellarg($absFile) .
' docProps/core.xml';
577 $meta_xml = implode(LF, $res);
579 $metaContent = GeneralUtility::xml2tree($meta_xml);
580 if (is_array($metaContent)) {
581 $contentArr[
'title'] .=
' ' . $metaContent[
'cp:coreProperties'][0][
'ch'][
'dc:title'][0][
'values'][0];
582 $contentArr[
'description'] = $metaContent[
'cp:coreProperties'][0][
'ch'][
'dc:subject'][0][
'values'][0];
583 $contentArr[
'description'] .=
' ' . $metaContent[
'cp:coreProperties'][0][
'ch'][
'dc:description'][0][
'values'][0];
584 $contentArr[
'keywords'] = $metaContent[
'cp:coreProperties'][0][
'ch'][
'cp:keywords'][0][
'values'][0];
595 if ($this->app[
'unzip']) {
598 $cmd = $this->app[
'unzip'] .
' -p ' . escapeshellarg($absFile) .
' content.xml';
600 $content_xml = implode(LF, $res);
603 $cmd = $this->app[
'unzip'] .
' -p ' . escapeshellarg($absFile) .
' meta.xml';
605 $meta_xml = implode(LF, $res);
607 $utf8_content = trim(strip_tags(str_replace(
'<',
' <', $content_xml)));
608 $contentArr = $this->pObj->splitRegularContent($utf8_content);
612 $metaContent = GeneralUtility::xml2tree($meta_xml);
613 $metaContent = $metaContent[
'office:document-meta'][0][
'ch'][
'office:meta'][0][
'ch'];
614 if (is_array($metaContent)) {
615 $contentArr[
'title'] = $metaContent[
'dc:title'][0][
'values'][0] ? $metaContent[
'dc:title'][0][
'values'][0] : $contentArr[
'title'];
616 $contentArr[
'description'] = $metaContent[
'dc:subject'][0][
'values'][0] .
' ' . $metaContent[
'dc:description'][0][
'values'][0];
618 if (is_array($metaContent[
'meta:keywords'][0][
'ch'][
'meta:keyword'])) {
619 foreach ($metaContent[
'meta:keywords'][0][
'ch'][
'meta:keyword'] as $kwDat) {
620 $contentArr[
'keywords'] .= $kwDat[
'values'][0] .
' ';
628 if ($this->app[
'unrtf']) {
630 $cmd = $this->app[
'unrtf'] .
' ' . escapeshellarg($absFile);
632 $fileContent = implode(LF, $res);
634 $fileContent = $this->pObj->convertHTMLToUtf8($fileContent);
635 $contentArr = $this->pObj->splitHTMLContent($fileContent);
643 $content = GeneralUtility::getUrl($absFile);
645 $contentCharset =
'utf-8';
646 $content = $this->pObj->convertHTMLToUtf8($content, $contentCharset);
647 $contentArr = $this->pObj->splitRegularContent($content);
654 $fileContent = GeneralUtility::getUrl($absFile);
655 $fileContent = $this->pObj->convertHTMLToUtf8($fileContent);
656 $contentArr = $this->pObj->splitHTMLContent($fileContent);
661 $fileContent = GeneralUtility::getUrl($absFile);
663 preg_match(
'/^[[:space:]]*<\\?xml[^>]+encoding[[:space:]]*=[[:space:]]*["\'][[:space:]]*([[:alnum:]_-]+)[[:space:]]*["\']/i', substr($fileContent, 0, 200), $reg);
664 $charset = $reg[1] ? trim(strtolower($reg[1])) :
'utf-8';
666 $fileContent = $this->pObj->convertHTMLToUtf8(strip_tags(str_replace(
'<',
' <', $fileContent)), $charset);
667 $contentArr = $this->pObj->splitRegularContent($fileContent);
677 if (function_exists(
'exif_read_data')) {
678 $exif = @exif_read_data($absFile,
'IFD0');
683 $comment = trim($exif[
'COMMENT'][0] .
' ' . $exif[
'ImageDescription']);
687 $contentArr = $this->pObj->splitRegularContent($comment);
696 if (is_array($contentArr) && !$contentArr[
'title']) {
715 if (!
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
'UTF8filesystem']) {
720 if ($this->lastLocale ==
null) {
721 throw new \RuntimeException(
'Cannot reset locale to NULL.', 1357064326);
723 setlocale(LC_CTYPE, $this->lastLocale);
724 $this->lastLocale =
null;
726 if ($this->lastLocale !==
null) {
727 throw new \RuntimeException(
'Cannot set new locale as locale has already been changed before.', 1357064437);
729 $this->lastLocale = setlocale(LC_CTYPE, 0);
730 setlocale(LC_CTYPE,
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
'systemLocale']);
751 $cmd = $this->app[
'pdfinfo'] .
' ' . escapeshellarg($absFile);
755 if ((
int)$pdfInfo[
'pages']) {
758 if ($this->pdf_mode > 0) {
759 $iter = ceil($pdfInfo[
'pages'] / $this->pdf_mode);
764 for ($a = 0; $a < $iter; $a++) {
765 $low = floor($a * ($pdfInfo[
'pages'] / $iter)) + 1;
766 $high = floor(($a + 1) * ($pdfInfo[
'pages'] / $iter));
767 $cParts[] = $low .
'-' . $high;
788 if (is_array($pdfInfoArray)) {
789 foreach ($pdfInfoArray as $line) {
790 $parts = explode(
':', $line, 2);
791 if (count($parts) > 1 && trim($parts[0])) {
792 $res[strtolower(trim($parts[0]))] = trim($parts[1]);
807 return trim(preg_replace(
'/[' . LF . chr(12) .
']*$/',
'', $string));
821 public function getIcon($extension)
823 if ($extension ===
'htm') {
825 } elseif ($extension ===
'jpeg') {
828 return 'EXT:indexed_search/Resources/Public/Icons/FileTypes/' . $extension .
'.gif';