17 use Psr\Http\Message\ServerRequestInterface;
46 'csObj' =>
'Using $csObj within Indexing is discouraged, the property will be removed in TYPO3 v10.0 - if needed instantiate CharsetConverter yourself.',
53 -1 =>
'mtime matched the document, so no changes detected and no content updated',
54 -2 =>
'The minimum age was not exceeded',
55 1 =>
'The configured max-age was exceeded for the document and thus it\'s indexed.',
56 2 =>
'The minimum age was exceed and mtime was set and the mtime was different, so the page was indexed.',
57 3 =>
'The minimum age was exceed, but mtime was not set, so the page was indexed.',
58 4 =>
'Page has never been indexed (is not represented in the index_phash table).'
265 $this->timeTracker = GeneralUtility::makeInstance(TimeTracker::class);
276 $disableFrontendIndexing = (bool)GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(
'indexed_search',
'disableFrontendIndexing');
279 if (\
TYPO3\CMS\Core\Utility\ExtensionManagementUtility::isLoaded(
'crawler') && $pObj->applicationData[
'tx_crawler'][
'running'] && in_array(
'tx_indexedsearch_reindex', $pObj->applicationData[
'tx_crawler'][
'parameters'][
'procInstructions'])) {
281 $pObj->applicationData[
'tx_crawler'][
'log'][] =
'Forced Re-indexing enabled';
283 $this->crawlerActive =
true;
285 $this->forceIndexing =
true;
288 if ($pObj->config[
'config'][
'index_enable']) {
290 if (!$disableFrontendIndexing || $this->crawlerActive) {
291 if (!$pObj->page[
'no_search']) {
292 if (!$pObj->no_cache) {
294 $languageAspect = GeneralUtility::makeInstance(Context::class)->getAspect(
'language');
295 if ($languageAspect->getId() === $languageAspect->getContentId()) {
299 $this->conf[
'id'] = $pObj->id;
301 $this->conf[
'type'] = $pObj->type;
303 $this->conf[
'sys_language_uid'] = $languageAspect->getId();
305 $this->conf[
'MP'] = $pObj->MP;
308 $this->conf[
'gr_list'] = implode(
',', GeneralUtility::makeInstance(Context::class)->getPropertyFromAspect(
'frontend.user',
'groupIds', [0, -1]));
310 $this->conf[
'cHash'] = $pObj->cHash;
312 $this->conf[
'cHash_array'] = $pObj->cHash_array;
314 $this->conf[
'staticPageArguments'] = [];
316 if (
$GLOBALS[
'TYPO3_REQUEST'] instanceof ServerRequestInterface) {
317 $pageArguments =
$GLOBALS[
'TYPO3_REQUEST']->getAttribute(
'routing',
null);
318 if ($pageArguments instanceof PageArguments) {
319 $this->conf[
'staticPageArguments'] = $pageArguments->getStaticArguments();
323 $this->conf[
'crdate'] = $pObj->page[
'crdate'];
328 $this->conf[
'page_cache_reg1'] = $pObj->page_cache_reg1;
331 $this->conf[
'rootline_uids'] = [];
332 foreach ($pObj->config[
'rootLine'] as $rlkey => $rldat) {
333 $this->conf[
'rootline_uids'][$rlkey] = $rldat[
'uid'];
336 $this->conf[
'content'] = $pObj->content;
338 $this->conf[
'indexedDocTitle'] = $pObj->convOutputCharset($pObj->indexedDocTitle);
340 $this->conf[
'metaCharset'] = $pObj->metaCharset;
342 $this->conf[
'mtime'] = $pObj->register[
'SYS_LASTCHANGED'] ?? $pObj->page[
'SYS_LASTCHANGED'];
345 $this->conf[
'index_externals'] = $pObj->config[
'config'][
'index_externals'];
347 $this->conf[
'index_descrLgd'] = $pObj->config[
'config'][
'index_descrLgd'];
349 $this->conf[
'index_metatags'] = $pObj->config[
'config'][
'index_metatags'] ??
true;
351 $this->conf[
'recordUid'] = 0;
352 $this->conf[
'freeIndexUid'] = 0;
353 $this->conf[
'freeIndexSetId'] = 0;
358 $this->
log_setTSlogMessage(
'Index page? No, languageId was different from contentId which indicates that the page contains fall-back content and that would be falsely indexed as localized content.');
361 $this->
log_setTSlogMessage(
'Index page? No, page was set to "no_cache" and so cannot be indexed.');
364 $this->
log_setTSlogMessage(
'Index page? No, The "No Search" flag has been set in the page properties!');
367 $this->
log_setTSlogMessage(
'Index page? No, Ordinary Frontend indexing during rendering is disabled.');
389 public function backend_initIndexer($id, $type, $sys_language_uid, $MP, $uidRL, $cHash_array = [], $createCHash =
false)
394 $this->conf[
'id'] = $id;
396 $this->conf[
'type'] = $type;
398 $this->conf[
'sys_language_uid'] = $sys_language_uid;
400 $this->conf[
'MP'] = $MP;
402 $this->conf[
'gr_list'] =
'0,-1';
406 $cHash_array[
'id'] = $id;
408 $cacheHash = GeneralUtility::makeInstance(\
TYPO3\CMS\Frontend\Page\CacheHashCalculator::class);
411 $this->conf[
'cHash'] =
'';
414 $this->conf[
'cHash_array'] = $cHash_array;
417 $this->conf[
'freeIndexUid'] = 0;
418 $this->conf[
'freeIndexSetId'] = 0;
421 $this->conf[
'page_cache_reg1'] = 0;
424 $this->conf[
'rootline_uids'] = $uidRL;
426 $this->conf[
'index_externals'] = 1;
428 $this->conf[
'index_descrLgd'] = 200;
430 $this->conf[
'index_metatags'] =
true;
444 $this->conf[
'freeIndexUid'] = $freeIndexUid;
445 $this->conf[
'freeIndexSetId'] = $freeIndexSetId;
460 public function backend_indexAsTYPO3Page($title, $keywords, $description, $content, $charset, $mtime, $crdate = 0, $recordUid = 0)
463 $this->conf[
'mtime'] = $mtime;
465 $this->conf[
'crdate'] = $crdate;
467 $this->conf[
'recordUid'] = $recordUid;
470 $this->conf[
'content'] =
'
473 <title>' . htmlspecialchars($title) .
'</title>
474 <meta name="keywords" content="' . htmlspecialchars($keywords) .
'" />
475 <meta name="description" content="' . htmlspecialchars($description) .
'" />
478 ' . htmlspecialchars($content) .
'
483 $this->conf[
'metaCharset'] = $charset;
485 $this->conf[
'indexedDocTitle'] =
'';
499 public function init()
502 $this->cHashParams = $this->conf[
'cHash_array'];
503 if (is_array($this->cHashParams) && !empty($this->cHashParams)) {
504 if ($this->conf[
'cHash']) {
506 $this->cHashParams[
'cHash'] = $this->conf[
'cHash'];
508 unset($this->cHashParams[
'encryptionKey']);
513 $this->indexerConfig = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(
'indexed_search');
519 $this->enableMetaphoneSearch = !isset($this->indexerConfig[
'enableMetaphoneSearch']) || $this->indexerConfig[
'enableMetaphoneSearch'];
523 if ($this->conf[
'index_externals']) {
527 $lexerObjectClassName =
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'lexer'] ?: Lexer::class;
528 $this->lexerObj = GeneralUtility::makeInstance($lexerObjectClassName);
529 $this->lexerObj->debug = $this->indexerConfig[
'debugMode'];
532 if ($this->enableMetaphoneSearch &&
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'metaphone']) {
533 $this->metaphoneObj = GeneralUtility::makeInstance(
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'metaphone']);
534 $this->metaphoneObj->pObj = $this;
537 $this->csObj = GeneralUtility::makeInstance(\
TYPO3\CMS\Core\Charset\CharsetConverter::class);
548 foreach (
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'external_parsers'] ?? [] as $extension => $className) {
549 $this->external_parsers[$extension] = GeneralUtility::makeInstance($className);
550 $this->external_parsers[$extension]->pObj = $this;
552 if (!$this->external_parsers[$extension]->initParser($extension)) {
553 unset($this->external_parsers[$extension]);
570 if ($check > 0 || !$is_grlist || $this->forceIndexing) {
572 if ($this->forceIndexing) {
574 } elseif ($check > 0) {
582 if ($this->conf[
'indexedDocTitle']) {
583 $this->contentParts[
'title'] = $this->conf[
'indexedDocTitle'];
592 if (!is_array($checkCHash) || $check === 1) {
593 $Pstart = GeneralUtility::milliseconds();
594 $this->
log_push(
'Converting charset of content (' . $this->conf[
'metaCharset'] .
') to utf-8',
'');
598 $this->
log_push(
'Extract words from content',
'');
602 $this->
log_push(
'Analyze the extracted words',
'');
610 $this->
log_push(
'Check word list and submit words',
'');
617 $this->
updateParsetime($this->hash[
'phash'], GeneralUtility::milliseconds() - $Pstart);
619 $this->
log_push(
'Checking external files',
'');
620 if ($this->conf[
'index_externals']) {
626 $this->
updateTstamp($this->hash[
'phash'], $this->conf[
'mtime']);
629 $this->
update_grlist($checkCHash[
'phash'], $this->hash[
'phash']);
631 $this->
log_setTSlogMessage(
'Indexing not needed, the contentHash, ' . $this->content_md5h .
', has not changed. Timestamp, grlist and rootline updated if necessary.');
649 $contentArr[
'body'] = stristr($content,
'<body');
650 $headPart = substr($content, 0, -strlen($contentArr[
'body']));
652 $this->
embracingTags($headPart,
'TITLE', $contentArr[
'title'], $dummy2, $dummy);
653 $titleParts = explode(
':', $contentArr[
'title'], 2);
654 $contentArr[
'title'] = trim($titleParts[1] ?? $titleParts[0]);
656 if ($this->conf[
'index_metatags']) {
659 while ($this->
embracingTags($headPart,
'meta', $dummy, $headPart, $meta[$i])) {
663 for ($i = 0; isset($meta[$i]); $i++) {
665 $meta[$i] = GeneralUtility::get_tag_attributes($meta[$i],
true);
666 if (stristr($meta[$i][
'name'],
'keywords')) {
669 if (stristr($meta[$i][
'name'],
'description')) {
670 $contentArr[
'description'] .=
',' . $meta[$i][
'content'];
677 $tagList = explode(
',', $this->excludeSections);
678 foreach ($tagList as $tag) {
679 while ($this->
embracingTags($contentArr[
'body'], $tag, $dummy, $contentArr[
'body'], $dummy2)) {
683 $contentArr[
'body'] = str_replace(
'<',
' <', $contentArr[
'body']);
684 $contentArr[
'body'] = trim(strip_tags($contentArr[
'body']));
685 $contentArr[
'keywords'] = trim($contentArr[
'keywords']);
686 $contentArr[
'description'] = trim($contentArr[
'description']);
699 if (preg_match(
'/<meta[[:space:]]+[^>]*http-equiv[[:space:]]*=[[:space:]]*["\']CONTENT-TYPE["\'][^>]*>/i', $content, $reg)) {
700 if (preg_match(
'/charset[[:space:]]*=[[:space:]]*([[:alnum:]-]+)/i', $reg[0], $reg2)) {
717 $charset = trim(strtolower($charset));
719 if ($charset && $charset !==
'utf-8') {
720 $content = mb_convert_encoding($content,
'utf-8', $charset);
723 return html_entity_decode($content);
738 public function embracingTags($string, $tagName, &$tagContent, &$stringAfter, &$paramList)
740 $endTag =
'</' . $tagName .
'>';
741 $startTag =
'<' . $tagName;
743 $isTagInText = stristr($string, $startTag);
748 list($paramList, $isTagInText) = explode(
'>', substr($isTagInText, strlen($startTag)), 2);
749 $afterTagInText = stristr($isTagInText, $endTag);
750 if ($afterTagInText) {
751 $stringBefore = substr($string, 0, strpos(strtolower($string), strtolower($startTag)));
752 $tagContent = substr($isTagInText, 0, strlen($isTagInText) - strlen($afterTagInText));
753 $stringAfter = $stringBefore . substr($afterTagInText, strlen($endTag));
756 $stringAfter = $isTagInText;
769 $expBody = preg_split(
'/\\<\\!\\-\\-[\\s]?TYPO3SEARCH_/', $body);
770 if (count($expBody) > 1) {
772 foreach ($expBody as $val) {
773 $part = explode(
'-->', $val, 2);
774 if (trim($part[0]) ===
'begin') {
777 } elseif (trim($part[0]) ===
'end') {
797 if ($this->indexerConfig[
'useCrawlerForExternalFiles'] && \
TYPO3\CMS\Core\Utility\ExtensionManagementUtility::isLoaded(
'crawler')) {
798 $crawler = GeneralUtility::makeInstance(\tx_crawler_lib::class);
801 foreach ($list as $linkInfo) {
803 if ($linkInfo[
'localPath']) {
805 $linkSource = htmlspecialchars_decode($linkInfo[
'localPath']);
807 $linkSource = htmlspecialchars_decode($linkInfo[
'href']);
810 $qParts = parse_url($linkSource);
812 if ($qParts[
'query'] && strstr($qParts[
'query'],
'jumpurl=')) {
813 parse_str($qParts[
'query'], $getP);
814 $linkSource = $getP[
'jumpurl'];
815 $qParts = parse_url($linkSource);
817 if (!$linkInfo[
'localPath'] && $qParts[
'scheme']) {
818 if ($this->indexerConfig[
'indexExternalURLs']) {
822 } elseif (!$qParts[
'query']) {
823 $linkSource = urldecode($linkSource);
824 if (GeneralUtility::isAllowedAbsPath($linkSource)) {
825 $localFile = $linkSource;
829 if ($localFile && @is_file($localFile)) {
831 if ($linkInfo[
'localPath']) {
832 $fI = pathinfo($linkSource);
833 $ext = strtolower($fI[
'extension']);
834 if (is_object($crawler)) {
836 'document' => $linkSource,
837 'alturl' => $linkInfo[
'href'],
840 unset($params[
'conf'][
'content']);
841 $crawler->addQueueEntry_callBack(0, $params, Hook\CrawlerFilesHook::class, $this->conf[
'id']);
842 $this->
log_setTSlogMessage(
'media "' . $params[
'document'] .
'" added to "crawler" queue.', 1);
847 if (is_object($crawler)) {
849 'document' => $linkSource,
852 unset($params[
'conf'][
'content']);
853 $crawler->addQueueEntry_callBack(0, $params, Hook\CrawlerFilesHook::class, $this->conf[
'id']);
854 $this->
log_setTSlogMessage(
'media "' . $params[
'document'] .
'" added to "crawler" queue.', 1);
873 $htmlParser = GeneralUtility::makeInstance(\
TYPO3\CMS\Core\Html\HtmlParser::class);
874 $htmlParts = $htmlParser->splitTags(
'a', $html);
875 $hyperLinksData = [];
876 foreach ($htmlParts as $index => $tagData) {
877 if ($index % 2 !== 0) {
878 $tagAttributes = $htmlParser->get_tag_attributes($tagData,
true);
879 $firstTagName = $htmlParser->getFirstTagName($tagData);
880 if (strtolower($firstTagName) ===
'a') {
881 if ($tagAttributes[0][
'href'] && $tagAttributes[0][
'href'][0] !==
'#') {
882 $hyperLinksData[] = [
884 'href' => $tagAttributes[0][
'href'],
885 'localPath' => $this->
createLocalPath(urldecode($tagAttributes[0][
'href']))
891 return $hyperLinksData;
903 $htmlParser = GeneralUtility::makeInstance(\
TYPO3\CMS\Core\Html\HtmlParser::class);
904 $htmlParts = $htmlParser->splitTags(
'base', $html);
905 foreach ($htmlParts as $index => $tagData) {
906 if ($index % 2 !== 0) {
907 $tagAttributes = $htmlParser->get_tag_attributes($tagData,
true);
908 $firstTagName = $htmlParser->getFirstTagName($tagData);
909 if (strtolower($firstTagName) ===
'base') {
910 $href = $tagAttributes[0][
'href'];
935 if (stristr($urlHeaders[
'Content-Type'],
'text/html')) {
936 $content = ($this->indexExternalUrl_content = GeneralUtility::getUrl($externalUrl));
937 if ((
string)$content !==
'') {
939 $tmpFile = GeneralUtility::tempnam(
'EXTERNAL_URL');
941 GeneralUtility::writeFile($tmpFile, $content);
960 $content = GeneralUtility::getUrl($url, 2);
961 if ((
string)$content !==
'') {
963 $headers = GeneralUtility::trimExplode(LF, $content,
true);
965 foreach ($headers as $line) {
966 if (trim($line) ===
'') {
969 list($headKey, $headValue) = explode(
':', $line, 2);
970 $retVal[$headKey] = $headValue;
986 'createLocalPathFromT3vars',
987 'createLocalPathUsingAbsRefPrefix',
988 'createLocalPathUsingDomainURL',
989 'createLocalPathFromAbsoluteURL',
990 'createLocalPathFromRelativeURL'
992 foreach ($pathFunctions as $functionName) {
993 $localPath = $this->{$functionName}($sourcePath);
994 if ($localPath !=
'') {
1012 $indexLocalFiles =
$GLOBALS[
'T3_VAR'][
'ext'][
'indexed_search'][
'indexLocalFiles'] ??
null;
1013 if (is_array($indexLocalFiles)) {
1014 $md5 = GeneralUtility::shortMD5($sourcePath);
1018 if (isset($indexLocalFiles[$md5]) && is_file($indexLocalFiles[$md5])) {
1019 $localPath = $indexLocalFiles[$md5];
1034 $baseURL = GeneralUtility::getIndpEnv(
'TYPO3_SITE_URL');
1035 $baseURLLength = strlen($baseURL);
1036 if (strpos($sourcePath, $baseURL) === 0) {
1037 $sourcePath = substr($sourcePath, $baseURLLength);
1039 if (!self::isAllowedLocalFile($localPath)) {
1057 $absRefPrefix =
$GLOBALS[
'TSFE']->config[
'config'][
'absRefPrefix'];
1058 $absRefPrefixLength = strlen($absRefPrefix);
1059 if ($absRefPrefixLength > 0 && strpos($sourcePath, $absRefPrefix) === 0) {
1060 $sourcePath = substr($sourcePath, $absRefPrefixLength);
1062 if (!self::isAllowedLocalFile($localPath)) {
1080 if ($sourcePath[0] ===
'/') {
1081 $sourcePath = substr($sourcePath, 1);
1083 if (!self::isAllowedLocalFile($localPath)) {
1099 if (self::isRelativeURL($sourcePath)) {
1101 if (!self::isAllowedLocalFile($localPath)) {
1116 $urlParts = @parse_url($url);
1117 return (!isset($urlParts[
'scheme']) || $urlParts[
'scheme'] ===
'') && $urlParts[
'path'][0] !==
'/';
1128 $filePath = GeneralUtility::resolveBackPath($filePath);
1130 $isFile = is_file($filePath);
1131 return $insideWebPath && $isFile;
1147 public function indexRegularDocument($file, $force =
false, $contentTmpFile =
'', $altExtension =
'')
1150 $fI = pathinfo($file);
1151 $ext = $altExtension ?: strtolower($fI[
'extension']);
1153 if (!$contentTmpFile) {
1154 if (!GeneralUtility::isAbsPath($file)) {
1161 $absFile = GeneralUtility::isAllowedAbsPath($absFile) ? $absFile :
'';
1163 $absFile = $contentTmpFile;
1166 if ($absFile && @is_file($absFile)) {
1167 if ($this->external_parsers[$ext]) {
1168 $fileInfo = stat($absFile);
1170 foreach ($cParts as $cPKey) {
1171 $this->internal_log = [];
1173 $Pstart = GeneralUtility::milliseconds();
1174 $subinfo = [
'key' => $cPKey];
1176 $phash_arr = ($this->file_phash_arr = $this->
setExtHashes($file, $subinfo));
1178 if ($check > 0 || $force) {
1185 if ($this->externalFileCounter < $this->maxExternalFiles || $force) {
1195 $this->externalFileCounter++;
1197 $this->
log_push(
'Extract words from content',
'');
1201 $this->
log_push(
'Analyze the extracted words',
'');
1205 $this->
log_push(
'Submitting page',
'');
1210 $this->
log_push(
'Check word list and submit words',
'');
1217 $this->
updateParsetime($phash_arr[
'phash'], GeneralUtility::milliseconds() - $Pstart);
1220 $this->
updateTstamp($phash_arr[
'phash'], $fileInfo[
'mtime']);
1227 $this->
log_setTSlogMessage(
'The limit of ' . $this->maxExternalFiles .
' has already been exceeded, so no indexing will take place this time.');
1241 $this->
log_setTSlogMessage(
'Indexing not possible; File "' . $absFile .
'" not found or valid.');
1254 public function readFileContent($fileExtension, $absoluteFileName, $sectionPointer)
1256 $contentArray =
null;
1258 if (is_object($this->external_parsers[$fileExtension])) {
1259 $contentArray = $this->external_parsers[$fileExtension]->readFileContent($fileExtension, $absoluteFileName, $sectionPointer);
1261 return $contentArray;
1275 if (is_object($this->external_parsers[$ext])) {
1276 $cParts = $this->external_parsers[$ext]->fileContentParts($ext, $absFile);
1291 $contentArr[
'body'] = $content;
1309 foreach ($contentArr as $key => $value) {
1310 if ((
string)$contentArr[$key] !==
'') {
1311 if ($charset !==
'utf-8') {
1312 $contentArr[$key] = mb_convert_encoding($contentArr[$key],
'utf-8', $charset);
1315 $contentArr[$key] = html_entity_decode($contentArr[$key]);
1329 foreach ($contentArr as $key => $value) {
1330 $contentArr[$key] = $this->lexerObj->split2Words($contentArr[$key]);
1333 $contentArr[
'title'] = array_unique($contentArr[
'title']);
1334 $contentArr[
'keywords'] = array_unique($contentArr[
'keywords']);
1335 $contentArr[
'description'] = array_unique($contentArr[
'description']);
1351 $bodyDescription = preg_replace(
'/\s+/u',
' ', $contentArr[
'body']);
1353 $bodyDescription = mb_strcut($bodyDescription, 0, $maxL,
'utf-8');
1355 return $bodyDescription;
1384 foreach ($content[$key] as $val) {
1385 $val = substr($val, 0, 60);
1387 if (!isset($retArr[$val])) {
1391 $metaphone = $this->enableMetaphoneSearch ? substr($this->
metaphone($val, $this->storeMetaphoneInfoAsWords), 0, 60) :
'';
1392 $retArr[$val][
'metaphone'] = $metaphone;
1395 if ($this->storeMetaphoneInfoAsWords) {
1396 $this->metaphoneContent .=
' ' . $retArr[$val][
'metaphone'];
1399 $retArr[$val][
'cmp'] = $retArr[$val][
'cmp'] | pow(2, $offset);
1401 $retArr[$val][
'count']++;
1414 foreach ($content[
'body'] as $key => $val) {
1415 $val = substr($val, 0, 60);
1417 if (!isset($retArr[$val])) {
1419 $retArr[$val][
'first'] = $key;
1423 $metaphone = $this->enableMetaphoneSearch ? substr($this->
metaphone($val, $this->storeMetaphoneInfoAsWords), 0, 60) :
'';
1424 $retArr[$val][
'metaphone'] = $metaphone;
1427 if ($this->storeMetaphoneInfoAsWords) {
1428 $this->metaphoneContent .=
' ' . $retArr[$val][
'metaphone'];
1431 $retArr[$val][
'count']++;
1443 public function metaphone($word, $returnRawMetaphoneValue =
false)
1445 if (is_object($this->metaphoneObj)) {
1446 $metaphoneRawValue = $this->metaphoneObj->metaphone($word, $this->conf[
'sys_language_uid']);
1451 if ($returnRawMetaphoneValue) {
1452 $result = $metaphoneRawValue;
1453 } elseif ($metaphoneRawValue !==
'') {
1476 'phash' => $this->hash[
'phash'],
1477 'phash_grouping' => $this->hash[
'phash_grouping'],
1478 'cHashParams' => serialize($this->cHashParams),
1479 'static_page_arguments' => is_array($this->conf[
'staticPageArguments']) ? json_encode($this->conf[
'staticPageArguments']) : null,
1480 'contentHash' => $this->content_md5h,
1481 'data_page_id' => $this->conf[
'id'],
1483 'data_page_reg1' => $this->conf[
'page_cache_reg1'],
1484 'data_page_type' => $this->conf[
'type'],
1485 'data_page_mp' => $this->conf[
'MP'],
1486 'gr_list' => $this->conf[
'gr_list'],
1489 'item_title' => $this->contentParts[
'title'],
1491 'item_mtime' => (int)$this->conf[
'mtime'],
1492 'item_size' => strlen($this->conf[
'content']),
1495 'item_crdate' => $this->conf[
'crdate'],
1497 'sys_language_uid' => $this->conf[
'sys_language_uid'],
1500 'recordUid' => (int)$this->conf[
'recordUid'],
1501 'freeIndexUid' => (int)$this->conf[
'freeIndexUid'],
1502 'freeIndexSetId' => (int)$this->conf[
'freeIndexSetId']
1505 $connection = GeneralUtility::makeInstance(ConnectionPool::class)
1506 ->getConnectionForTable(
'index_phash');
1507 $connection->insert(
1516 $this->
submit_grlist($this->hash[
'phash'], $this->hash[
'phash']);
1519 'phash' => $this->hash[
'phash'],
1520 'fulltextdata' => implode(
' ', $this->contentParts),
1523 if ($this->indexerConfig[
'fullTextDataLength'] > 0) {
1524 $fields[
'fulltextdata'] = substr(
$fields[
'fulltextdata'], 0, $this->indexerConfig[
'fullTextDataLength']);
1527 $connection = GeneralUtility::makeInstance(ConnectionPool::class)
1528 ->getConnectionForTable(
'index_fulltext');
1529 $connection->insert(
'index_fulltext',
$fields);
1532 if ($this->indexerConfig[
'debugMode']) {
1534 'phash' => $this->hash[
'phash'],
1535 'debuginfo' => serialize([
1536 'cHashParams' => $this->cHashParams,
1537 'external_parsers initialized' => array_keys($this->external_parsers),
1538 'conf' => array_merge($this->conf, [
'content' => substr($this->conf[
'content'], 0, 1000)]),
1539 'contentParts' => array_merge($this->contentParts, [
'body' => substr($this->contentParts[
'body'], 0, 1000)]),
1540 'logs' => $this->internal_log,
1541 'lexer' => $this->lexerObj->debugString
1545 $connection = GeneralUtility::makeInstance(ConnectionPool::class)
1546 ->getConnectionForTable(
'index_debug');
1547 $connection->insert(
'index_debug',
$fields);
1564 'phash_x' => $phash_x,
1566 'gr_list' => $this->conf[
'gr_list']
1569 $connection = GeneralUtility::makeInstance(ConnectionPool::class)
1570 ->getConnectionForTable(
'index_grlist');
1571 $connection->insert(
'index_grlist',
$fields);
1586 'phash_t3' => $hash_t3,
1587 'page_id' => (int)$this->conf[
'id']
1591 $connection = GeneralUtility::makeInstance(ConnectionPool::class)
1592 ->getConnectionForTable(
'index_section');
1593 $connection->insert(
'index_section',
$fields);
1606 $connectionPool = GeneralUtility::makeInstance(ConnectionPool::class);
1607 $tableArray = [
'index_phash',
'index_section',
'index_grlist',
'index_fulltext',
'index_debug'];
1608 foreach ($tableArray as $table) {
1610 $connectionPool->getConnectionForTable($table)->delete($table, [
'phash' => (
int)$phash]);
1618 $connectionPool->getConnectionForTable(
'index_section')
1619 ->delete(
'index_section', [
'phash_t3' => (
int)$phash]);
1644 $storeItemType = $this->external_parsers[$ext]->ext2itemtype_map[$ext];
1645 $storeItemType = $storeItemType ?: $ext;
1649 $fileParts = parse_url($file);
1652 'phash' =>
$hash[
'phash'],
1653 'phash_grouping' =>
$hash[
'phash_grouping'],
1654 'cHashParams' => serialize($subinfo),
1656 'data_filename' => $file,
1657 'item_type' => $storeItemType,
1660 'item_mtime' => $mtime,
1661 'item_size' => $size,
1662 'item_crdate' => $ctime,
1665 'gr_list' => $this->conf[
'gr_list'],
1666 'externalUrl' => $fileParts[
'scheme'] ? 1 : 0,
1667 'recordUid' => (int)$this->conf[
'recordUid'],
1668 'freeIndexUid' => (
int)$this->conf[
'freeIndexUid'],
1669 'freeIndexSetId' => (int)$this->conf[
'freeIndexSetId'],
1670 'sys_language_uid' => (
int)$this->conf[
'sys_language_uid']
1673 $connection = GeneralUtility::makeInstance(ConnectionPool::class)
1674 ->getConnectionForTable(
'index_phash');
1675 $connection->insert(
1683 'phash' =>
$hash[
'phash'],
1687 if ($this->indexerConfig[
'fullTextDataLength'] > 0) {
1688 $fields[
'fulltextdata'] = substr(
$fields[
'fulltextdata'], 0, $this->indexerConfig[
'fullTextDataLength']);
1691 $connection = GeneralUtility::makeInstance(ConnectionPool::class)
1692 ->getConnectionForTable(
'index_fulltext');
1693 $connection->insert(
'index_fulltext',
$fields);
1696 if ($this->indexerConfig[
'debugMode']) {
1698 'phash' =>
$hash[
'phash'],
1699 'debuginfo' => serialize([
1700 'cHashParams' => $subinfo,
1702 'logs' => $this->internal_log,
1703 'lexer' => $this->lexerObj->debugString
1707 $connection = GeneralUtility::makeInstance(ConnectionPool::class)
1708 ->getConnectionForTable(
'index_debug');
1709 $connection->insert(
'index_debug',
$fields);
1726 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1727 ->getQueryBuilderForTable(
'index_grlist');
1728 $count = (int)$queryBuilder->count(
'*')
1729 ->from(
'index_grlist')
1731 $queryBuilder->expr()->eq(
1733 $queryBuilder->createNamedParameter(
$hash, \PDO::PARAM_INT)
1735 $queryBuilder->expr()->orX(
1736 $queryBuilder->expr()->eq(
1738 $queryBuilder->createNamedParameter(
1743 $queryBuilder->expr()->eq(
1745 $queryBuilder->createNamedParameter(
1772 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
1773 ->getQueryBuilderForTable(
'index_section');
1774 $count = (int)$queryBuilder->count(
'phash')
1775 ->from(
'index_section')
1777 $queryBuilder->expr()->eq(
1779 $queryBuilder->createNamedParameter(
$hash, \PDO::PARAM_INT)
1781 $queryBuilder->expr()->eq(
1783 $queryBuilder->createNamedParameter($this->conf[
'id'], \PDO::PARAM_INT)
1801 $connectionPool = GeneralUtility::makeInstance(ConnectionPool::class);
1803 $tableArray = [
'index_phash',
'index_grlist',
'index_fulltext',
'index_debug'];
1804 foreach ($tableArray as $table) {
1808 $connectionPool->getConnectionForTable($table)->delete($table, [
'phash' => (
int)$phash]);
1831 $row = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(
'index_phash')
1833 [
'item_mtime',
'tstamp'],
1835 [
'phash' => (
int)$phash],
1843 if ($this->tstamp_maxAge && $row[
'tstamp'] + $this->tstamp_maxAge <
$GLOBALS[
'EXEC_TIME']) {
1848 if (!$this->tstamp_minAge || $row[
'tstamp'] + $this->tstamp_minAge <
$GLOBALS[
'EXEC_TIME']) {
1852 if ($row[
'item_mtime'] != $mtime) {
1859 if ($this->tstamp_maxAge) {
1860 $this->
log_setTSlogMessage(
'mtime matched, timestamp NOT updated because a maxAge is set (' . ($row[
'tstamp'] + $this->tstamp_maxAge -
$GLOBALS[
'EXEC_TIME']) .
' seconds to expire time).', 1);
1893 $row = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(
'index_phash')
1898 'phash_grouping' => (
int)$this->hash[
'phash_grouping'],
1899 'contentHash' => (
int)$this->content_md5h
1926 $count = (int)GeneralUtility::makeInstance(ConnectionPool::class)
1927 ->getConnectionForTable(
'index_phash')
1932 'phash_grouping' => (
int)$hashGr,
1937 $result = $count === 0;
1952 $count = (int)GeneralUtility::makeInstance(ConnectionPool::class)
1953 ->getConnectionForTable(
'index_grlist')
1957 [
'phash_x' => (
int)$phash_x]
1960 $result = $count > 0;
1975 $count = (int)GeneralUtility::makeInstance(ConnectionPool::class)
1976 ->getConnectionForTable(
'index_grlist')
1981 'phash' => (
int)$phash,
1988 $this->
log_setTSlogMessage(
'Inserted gr_list \'' . $this->conf[
'gr_list'] .
'\' for phash \
'' . $phash .
'\'', 1);
2010 $updateFields[
'item_mtime'] = (int)$mtime;
2013 GeneralUtility::makeInstance(ConnectionPool::class)
2014 ->getConnectionForTable(
'index_phash')
2019 'phash' => (
int)$phash
2035 GeneralUtility::makeInstance(ConnectionPool::class)
2036 ->getConnectionForTable(
'index_phash')
2040 'freeIndexSetId' => (
int)$this->conf[
'freeIndexSetId']
2043 'phash' => (
int)$phash
2060 GeneralUtility::makeInstance(ConnectionPool::class)
2061 ->getConnectionForTable(
'index_phash')
2065 'parsetime' => (
int)$parsetime
2068 'phash' => (
int)$phash
2085 GeneralUtility::makeInstance(ConnectionPool::class)
2086 ->getConnectionForTable(
'index_section')
2091 'page_id' => (
int)$this->conf[
'id']
2104 $fieldArray[
'rl0'] = (int)$this->conf[
'rootline_uids'][0];
2105 $fieldArray[
'rl1'] = (int)$this->conf[
'rootline_uids'][1];
2106 $fieldArray[
'rl2'] = (int)$this->conf[
'rootline_uids'][2];
2107 foreach (
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'addRootLineFields'] ?? [] as $fieldName => $rootLineLevel) {
2108 $fieldArray[$fieldName] = (int)$this->conf[
'rootline_uids'][$rootLineLevel];
2128 $wordListArrayCount = count($wordListArray);
2129 $phashArray = array_map(
'intval', array_column($wordListArray,
'hash'));
2131 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(
'index_words');
2132 $count = (int)$queryBuilder->count(
'baseword')
2133 ->from(
'index_words')
2135 $queryBuilder->expr()->in(
2137 $queryBuilder->createNamedParameter($phashArray, Connection::PARAM_INT_ARRAY)
2143 if ($count !== $wordListArrayCount) {
2144 $connection = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(
'index_words');
2145 $queryBuilder = $connection->createQueryBuilder();
2147 $result = $queryBuilder->select(
'baseword')
2148 ->from(
'index_words')
2150 $queryBuilder->expr()->in(
2152 $queryBuilder->createNamedParameter($phashArray, Connection::PARAM_INT_ARRAY)
2158 while ($row = $result->fetch()) {
2159 unset($wordListArray[$row[
'baseword']]);
2162 foreach ($wordListArray as $key => $val) {
2166 $connection->insert(
2169 'wid' => $val[
'hash'],
2171 'metaphone' => $val[
'metaphone']
2189 $connectionPool = GeneralUtility::makeInstance(ConnectionPool::class);
2190 $queryBuilder = $connectionPool->getQueryBuilderForTable(
'index_words');
2191 $result = $queryBuilder->select(
'wid')
2192 ->from(
'index_words')
2194 $queryBuilder->expr()->neq(
'is_stopword', $queryBuilder->createNamedParameter(0, \PDO::PARAM_INT))
2200 while ($row = $result->fetch()) {
2201 $stopWords[$row[
'wid']] = $row;
2204 $connectionPool->getConnectionForTable(
'index_rel')->delete(
'index_rel', [
'phash' => (
int)$phash]);
2206 $fields = [
'phash',
'wid',
'count',
'first',
'freq',
'flags'];
2208 foreach ($wordList as $val) {
2209 if (isset($stopWords[$val[
'hash']])) {
2217 $this->
freqMap($val[
'count'] / $this->wordcount),
2222 if (!empty($rows)) {
2223 $connectionPool->getConnectionForTable(
'index_rel')->bulkInsert(
'index_rel', $rows,
$fields);
2238 $newFreq = $freq * $mapFactor;
2239 $newFreq = $newFreq > $this->freqRange ? $this->freqRange : $newFreq;
2241 $newFreq = $freq / $mapFactor;
2243 return (
int)$newFreq;
2258 'id' => (int)$this->conf[
'id'],
2259 'type' => (
int)$this->conf[
'type'],
2260 'sys_lang' => (int)$this->conf[
'sys_language_uid'],
2261 'MP' => (
string)$this->conf[
'MP'],
2263 'staticPageArguments' => is_array($this->conf[
'staticPageArguments']) ? json_encode($this->conf[
'staticPageArguments']) :
null,
2268 $hArray[
'gr_list'] = (string)$this->conf[
'gr_list'];
2289 $hArray[
'subinfo'] = $subinfo;
2307 $this->timeTracker->push($msg, $key);
2315 $this->timeTracker->pull();
2326 $this->timeTracker->setTSlogMessage($msg, $errorNum);
2327 $this->internal_log[] = $msg;
2340 $keywords = GeneralUtility::trimExplode(
',', $keywordList);
2341 return ' ' . implode(
', ', $keywords) .
' ';