79 $indexerConfig = unserialize(
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXT'][
'extConf'][
'indexed_search']);
81 $exe = TYPO3_OS ==
'WIN' ?
'.exe' :
'';
87 if (in_array($extension, $ignoreExtensions)) {
88 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:ignoreExtensions'), $extension), 1);
95 if ($indexerConfig[
'pdftools']) {
96 $pdfPath = rtrim($indexerConfig[
'pdftools'],
'/') .
'/';
97 if (@is_file(($pdfPath .
'pdftotext' . $exe)) && @is_file(($pdfPath .
'pdfinfo' . $exe))) {
98 $this->app[
'pdfinfo'] = $pdfPath .
'pdfinfo' . $exe;
99 $this->app[
'pdftotext'] = $pdfPath .
'pdftotext' . $exe;
104 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:pdfToolsNotFound'), $pdfPath), 3);
107 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:pdfToolsDisabled'), 1);
112 if ($indexerConfig[
'catdoc']) {
113 $catdocPath = rtrim($indexerConfig[
'catdoc'],
'/') .
'/';
114 if (@is_file(($catdocPath .
'catdoc' . $exe))) {
115 $this->app[
'catdoc'] = $catdocPath .
'catdoc' . $exe;
118 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:catdocNotFound'), $catdocPath), 3);
121 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:catdocDisabled'), 1);
129 if ($indexerConfig[
'ppthtml']) {
130 $ppthtmlPath = rtrim($indexerConfig[
'ppthtml'],
'/') .
'/';
131 if (@is_file(($ppthtmlPath .
'ppthtml' . $exe))) {
132 $this->app[
'ppthtml'] = $ppthtmlPath .
'ppthtml' . $exe;
135 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:ppthtmlNotFound'), $ppthtmlPath), 3);
138 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:ppthtmlDisabled'), 1);
144 if ($indexerConfig[
'xlhtml']) {
145 $xlhtmlPath = rtrim($indexerConfig[
'xlhtml'],
'/') .
'/';
146 if (@is_file(($xlhtmlPath .
'xlhtml' . $exe))) {
147 $this->app[
'xlhtml'] = $xlhtmlPath .
'xlhtml' . $exe;
150 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:xlhtmlNotFound'), $xlhtmlPath), 3);
153 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:xlhtmlDisabled'), 1);
168 if ($indexerConfig[
'unzip']) {
169 $unzipPath = rtrim($indexerConfig[
'unzip'],
'/') .
'/';
170 if (@is_file(($unzipPath .
'unzip' . $exe))) {
171 $this->app[
'unzip'] = $unzipPath .
'unzip' . $exe;
174 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:unzipNotFound'), $unzipPath), 3);
177 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:unzipDisabled'), 1);
182 if ($indexerConfig[
'unrtf']) {
183 $unrtfPath = rtrim($indexerConfig[
'unrtf'],
'/') .
'/';
184 if (@is_file(($unrtfPath .
'unrtf' . $exe))) {
185 $this->app[
'unrtf'] = $unrtfPath .
'unrtf' . $exe;
188 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:unrtfNotFound'), $unrtfPath), 3);
191 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:unrtfDisabled'), 1);
209 $mainExtension =
'html';
217 $mainExtension =
'jpeg';
223 $this->supportedExtensions[$extension] = TRUE;
224 $this->ext2itemtype_map[$extension] = $mainExtension ?: $extension;
238 switch ($extension) {
293 $indexerConfig = unserialize(
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXT'][
'extConf'][
'indexed_search']);
296 if (in_array($extension, $ignoreExtensions)) {
300 switch ($extension) {
303 if ($indexerConfig[
'pdftools']) {
304 return sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:extension.PDF'), $extension);
309 if ($indexerConfig[
'catdoc']) {
310 return sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:extension.DOC'), $extension);
318 if ($indexerConfig[
'ppthtml']) {
319 return sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:extension.PP'), $extension);
325 if ($indexerConfig[
'xlhtml']) {
326 return sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:extension.XLS'), $extension);
331 if ($indexerConfig[
'unzip']) {
332 return sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:extension.SXC'), $extension);
337 if ($indexerConfig[
'unzip']) {
338 return sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:extension.SXI'), $extension);
343 if ($indexerConfig[
'unzip']) {
344 return sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:extension.SXW'), $extension);
349 if ($indexerConfig[
'unzip']) {
350 return sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:extension.ODS'), $extension);
355 if ($indexerConfig[
'unzip']) {
356 return sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:extension.ODP'), $extension);
361 if ($indexerConfig[
'unzip']) {
362 return sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:extension.ODT'), $extension);
367 if ($indexerConfig[
'unrtf']) {
368 return sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:extension.RTF'), $extension);
377 return sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:extension.Images'), $extension);
383 return sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:extension.HTML'), $extension);
387 return sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:extension.TXT'), $extension);
391 return sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:extension.CSV'), $extension);
395 return sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:extension.XML'), $extension);
411 switch ((
string) $extension) {
425 protected function sL($reference, $useHtmlSpecialChar = FALSE) {
426 return $this->langObject->sL($reference, $useHtmlSpecialChar);
446 if (!$this->supportedExtensions[$ext]) {
452 if ($this->app[
'pdfinfo']) {
455 $cmd = $this->app[
'pdfinfo'] .
' ' . escapeshellarg($absFile);
459 if ((
int)$pdfInfo[
'pages']) {
460 list($low, $high) = explode(
'-', $cPKey);
464 @unlink($tempFileName);
466 $cmd = $this->app[
'pdftotext'] .
' -f ' . $low .
' -l ' . $high .
' -enc UTF-8 -q ' . escapeshellarg($absFile) .
' ' . $tempFileName;
468 if (@is_file($tempFileName)) {
470 unlink($tempFileName);
472 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/locallang.xlf:pdfToolsFailed'), $absFile), 2);
474 if (strlen($content)) {
475 $contentArr = $this->pObj->splitRegularContent($this->
removeEndJunk($content));
482 if ($this->app[
'catdoc']) {
484 $cmd = $this->app[
'catdoc'] .
' -d utf-8 ' . escapeshellarg($absFile);
486 $content = implode(LF, $res);
488 $contentArr = $this->pObj->splitRegularContent($this->
removeEndJunk($content));
495 if ($this->app[
'ppthtml']) {
497 $cmd = $this->app[
'ppthtml'] .
' ' . escapeshellarg($absFile);
499 $content = implode(LF, $res);
501 $content = $this->pObj->convertHTMLToUtf8($content);
502 $contentArr = $this->pObj->splitHTMLContent($this->
removeEndJunk($content));
503 $contentArr[
'title'] = basename($absFile);
508 if ($this->app[
'xlhtml']) {
510 $cmd = $this->app[
'xlhtml'] .
' -nc -te ' . escapeshellarg($absFile);
512 $content = implode(LF, $res);
514 $content = $this->pObj->convertHTMLToUtf8($content);
515 $contentArr = $this->pObj->splitHTMLContent($this->
removeEndJunk($content));
516 $contentArr[
'title'] = basename($absFile);
531 if ($this->app[
'unzip']) {
534 $cmd = $this->app[
'unzip'] .
' -p ' . escapeshellarg($absFile) .
' content.xml';
536 $content_xml = implode(LF, $res);
539 $cmd = $this->app[
'unzip'] .
' -p ' . escapeshellarg($absFile) .
' meta.xml';
541 $meta_xml = implode(LF, $res);
543 $utf8_content = trim(strip_tags(str_replace(
'<',
' <', $content_xml)));
544 $contentArr = $this->pObj->splitRegularContent($utf8_content);
545 $contentArr[
'title'] = basename($absFile);
549 $metaContent = $metaContent[
'office:document-meta'][0][
'ch'][
'office:meta'][0][
'ch'];
550 if (is_array($metaContent)) {
551 $contentArr[
'title'] = $metaContent[
'dc:title'][0][
'values'][0] ? $metaContent[
'dc:title'][0][
'values'][0] : $contentArr[
'title'];
552 $contentArr[
'description'] = $metaContent[
'dc:subject'][0][
'values'][0] .
' ' . $metaContent[
'dc:description'][0][
'values'][0];
554 if (is_array($metaContent[
'meta:keywords'][0][
'ch'][
'meta:keyword'])) {
555 foreach ($metaContent[
'meta:keywords'][0][
'ch'][
'meta:keyword'] as $kwDat) {
556 $contentArr[
'keywords'] .= $kwDat[
'values'][0] .
' ';
564 if ($this->app[
'unrtf']) {
566 $cmd = $this->app[
'unrtf'] .
' ' . escapeshellarg($absFile);
568 $fileContent = implode(LF, $res);
570 $fileContent = $this->pObj->convertHTMLToUtf8($fileContent);
571 $contentArr = $this->pObj->splitHTMLContent($fileContent);
582 $contentCharset =
'utf-8';
583 $content = $this->pObj->convertHTMLToUtf8($content, $contentCharset);
584 $contentArr = $this->pObj->splitRegularContent($content);
585 $contentArr[
'title'] = basename($absFile);
593 $fileContent = $this->pObj->convertHTMLToUtf8($fileContent);
594 $contentArr = $this->pObj->splitHTMLContent($fileContent);
601 preg_match(
'/^[[:space:]]*<\\?xml[^>]+encoding[[:space:]]*=[[:space:]]*["\'][[:space:]]*([[:alnum:]_-]+)[[:space:]]*["\']/i', substr($fileContent, 0, 200), $reg);
602 $charset = $reg[1] ? $this->pObj->csObj->parse_charset($reg[1]) :
'utf-8';
604 $fileContent = $this->pObj->convertHTMLToUtf8(strip_tags(str_replace(
'<',
' <', $fileContent)), $charset);
605 $contentArr = $this->pObj->splitRegularContent($fileContent);
606 $contentArr[
'title'] = basename($absFile);
617 if (function_exists(
'exif_read_data')) {
618 $exif = @exif_read_data($absFile,
'IFD0');
623 $comment = trim($exif[
'COMMENT'][0] .
' ' . $exif[
'ImageDescription']);
627 $contentArr = $this->pObj->splitRegularContent($comment);
628 $contentArr[
'title'] = basename($absFile);
636 if (is_array($contentArr) && !$contentArr[
'title']) {
638 $contentArr[
'title'] = str_replace(
'_',
' ', basename($absFile));
655 static $lastLocale = NULL;
656 if (!
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
'UTF8filesystem']) {
661 if ($lastLocale == NULL) {
662 throw new \RuntimeException(
'Cannot reset locale to NULL.', 1357064326);
664 setlocale(LC_CTYPE, $lastLocale);
667 if ($lastLocale !== NULL) {
668 throw new \RuntimeException(
'Cannot set new locale as locale has already been changed before.', 1357064437);
670 $lastLocale = setlocale(LC_CTYPE, 0);
671 setlocale(LC_CTYPE,
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
'systemLocale']);
692 $cmd = $this->app[
'pdfinfo'] .
' ' . escapeshellarg($absFile);
696 if ((
int)$pdfInfo[
'pages']) {
699 if ($this->pdf_mode > 0) {
700 $iter = ceil($pdfInfo[
'pages'] / $this->pdf_mode);
705 for ($a = 0; $a < $iter; $a++) {
706 $low = floor($a * ($pdfInfo[
'pages'] / $iter)) + 1;
707 $high = floor(($a + 1) * ($pdfInfo[
'pages'] / $iter));
708 $cParts[] = $low .
'-' . $high;
729 if (is_array($pdfInfoArray)) {
730 foreach ($pdfInfoArray as $line) {
731 $parts = explode(
':', $line, 2);
732 if (count($parts) > 1 && trim($parts[0])) {
733 $res[strtolower(trim($parts[0]))] = trim($parts[1]);
748 return trim(preg_replace(
'/[' . LF . chr(12) .
']*$/',
'', $string));
764 if ($extension ==
'htm') {
767 if ($extension ==
'jpeg') {
770 return 'EXT:indexed_search/pi/res/' . $extension .
'.gif';
splitPdfInfo($pdfInfoArray)
static xml2tree($string, $depth=999)
static forceIntegerInRange($theInt, $min, $max=2000000000, $defaultValue=0)
setLocaleForServerFileSystem($resetLocale=FALSE)
searchTypeMediaTitle($extension)
static trimExplode($delim, $string, $removeEmptyValues=FALSE, $limit=0)
readFileContent($ext, $absFile, $cPKey)
sL($reference, $useHtmlSpecialChar=FALSE)
static getUrl($url, $includeHeader=0, $requestHeaders=FALSE, &$report=NULL)
fileContentParts($ext, $absFile)
static tempnam($filePrefix, $fileSuffix='')
isMultiplePageExtension($extension)
if(!defined('TYPO3_MODE')) $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_userauth.php']['logoff_pre_processing'][]
static exec($command, &$output=NULL, &$returnValue=0)