36 -1 =>
'mtime matched the document, so no changes detected and no content updated',
37 -2 =>
'The minimum age was not exceeded',
38 1 =>
'The configured max-age was exceeded for the document and thus it\'s indexed.',
39 2 =>
'The minimum age was exceed and mtime was set and the mtime was different, so the page was indexed.',
40 3 =>
'The minimum age was exceed, but mtime was not set, so the page was indexed.',
41 4 =>
'Page has never been indexed (is not represented in the index_phash table).' 234 if (\
TYPO3\CMS\Core\Utility\ExtensionManagementUtility::isLoaded(
'crawler') && $pObj->applicationData[
'tx_crawler'][
'running'] && in_array(
'tx_indexedsearch_reindex', $pObj->applicationData[
'tx_crawler'][
'parameters'][
'procInstructions'])) {
236 $pObj->applicationData[
'tx_crawler'][
'log'][] =
'Forced Re-indexing enabled';
238 $this->crawlerActive = TRUE;
240 $this->forceIndexing = TRUE;
243 if ($pObj->config[
'config'][
'index_enable']) {
245 if (!
$indexerConfig[
'disableFrontendIndexing'] || $this->crawlerActive) {
246 if (!$pObj->page[
'no_search']) {
247 if (!$pObj->no_cache) {
248 if ((
int)$pObj->sys_language_uid === (
int)$pObj->sys_language_content) {
250 $this->conf = array();
252 $this->conf[
'id'] = $pObj->id;
254 $this->conf[
'type'] = $pObj->type;
256 $this->conf[
'sys_language_uid'] = $pObj->sys_language_uid;
258 $this->conf[
'MP'] = $pObj->MP;
260 $this->conf[
'gr_list'] = $pObj->gr_list;
262 $this->conf[
'cHash'] = $pObj->cHash;
264 $this->conf[
'cHash_array'] = $pObj->cHash_array;
266 $this->conf[
'crdate'] = $pObj->page[
'crdate'];
268 $this->conf[
'page_cache_reg1'] = $pObj->page_cache_reg1;
271 $this->conf[
'rootline_uids'] = array();
272 foreach ($pObj->config[
'rootLine'] as $rlkey => $rldat) {
273 $this->conf[
'rootline_uids'][$rlkey] = $rldat[
'uid'];
276 $this->conf[
'content'] = $pObj->content;
278 $this->conf[
'indexedDocTitle'] = $pObj->convOutputCharset($pObj->indexedDocTitle);
280 $this->conf[
'metaCharset'] = $pObj->metaCharset;
282 $this->conf[
'mtime'] = isset($pObj->register[
'SYS_LASTCHANGED']) ? $pObj->register[
'SYS_LASTCHANGED'] : $pObj->page[
'SYS_LASTCHANGED'];
285 $this->conf[
'index_externals'] = $pObj->config[
'config'][
'index_externals'];
287 $this->conf[
'index_descrLgd'] = $pObj->config[
'config'][
'index_descrLgd'];
289 $this->conf[
'index_metatags'] = isset($pObj->config[
'config'][
'index_metatags']) ? $pObj->config[
'config'][
'index_metatags'] : TRUE;
291 $this->conf[
'recordUid'] = 0;
292 $this->conf[
'freeIndexUid'] = 0;
293 $this->conf[
'freeIndexSetId'] = 0;
298 $this->
log_setTSlogMessage(
'Index page? No, ->sys_language_uid was different from sys_language_content which indicates that the page contains fall-back content and that would be falsely indexed as localized content.');
301 $this->
log_setTSlogMessage(
'Index page? No, page was set to "no_cache" and so cannot be indexed.');
304 $this->
log_setTSlogMessage(
'Index page? No, The "No Search" flag has been set in the page properties!');
307 $this->
log_setTSlogMessage(
'Index page? No, Ordinary Frontend indexing during rendering is disabled.');
331 public function backend_initIndexer($id, $type, $sys_language_uid, $MP, $uidRL, $cHash_array = array(), $createCHash = FALSE) {
333 $this->conf = array();
335 $this->conf[
'id'] = $id;
337 $this->conf[
'type'] = $type;
339 $this->conf[
'sys_language_uid'] = $sys_language_uid;
341 $this->conf[
'MP'] = $MP;
343 $this->conf[
'gr_list'] =
'0,-1';
351 $this->conf[
'cHash'] =
'';
354 $this->conf[
'cHash_array'] = $cHash_array;
357 $this->conf[
'freeIndexUid'] = 0;
358 $this->conf[
'freeIndexSetId'] = 0;
359 $this->conf[
'page_cache_reg1'] =
'';
361 $this->conf[
'rootline_uids'] = $uidRL;
363 $this->conf[
'index_externals'] = 1;
365 $this->conf[
'index_descrLgd'] = 200;
367 $this->conf[
'index_metatags'] = TRUE;
382 $this->conf[
'freeIndexUid'] = $freeIndexUid;
383 $this->conf[
'freeIndexSetId'] = $freeIndexSetId;
400 public function backend_indexAsTYPO3Page($title, $keywords, $description, $content, $charset, $mtime, $crdate = 0, $recordUid = 0) {
402 $this->conf[
'mtime'] = $mtime;
404 $this->conf[
'crdate'] = $crdate;
406 $this->conf[
'recordUid'] = $recordUid;
409 $this->conf[
'content'] =
' 412 <title>' . htmlspecialchars($title) .
'</title> 413 <meta name="keywords" content="' . htmlspecialchars($keywords) .
'" /> 414 <meta name="description" content="' . htmlspecialchars($description) .
'" /> 417 ' . htmlspecialchars($content) .
' 422 $this->conf[
'metaCharset'] = $charset;
424 $this->conf[
'indexedDocTitle'] =
'';
444 $this->cHashParams = $this->conf[
'cHash_array'];
445 if (is_array($this->cHashParams) && count($this->cHashParams)) {
446 if ($this->conf[
'cHash']) {
448 $this->cHashParams[
'cHash'] = $this->conf[
'cHash'];
450 unset($this->cHashParams[
'encryptionKey']);
455 $this->indexerConfig = unserialize(
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXT'][
'extConf'][
'indexed_search']);
461 $this->enableMetaphoneSearch = isset($this->indexerConfig[
'enableMetaphoneSearch']) ? ($this->indexerConfig[
'enableMetaphoneSearch'] ? TRUE : FALSE) : TRUE;
465 if ($this->conf[
'index_externals']) {
469 $lexerObjRef = $TYPO3_CONF_VARS[
'EXTCONF'][
'indexed_search'][
'lexer'] ? $TYPO3_CONF_VARS[
'EXTCONF'][
'indexed_search'][
'lexer'] :
'TYPO3\\CMS\\IndexedSearch\\Lexer';
471 $this->lexerObj->debug = $this->indexerConfig[
'debugMode'];
474 if ($this->enableMetaphoneSearch && $TYPO3_CONF_VARS[
'EXTCONF'][
'indexed_search'][
'metaphone']) {
476 $this->metaphoneObj->pObj = $this;
492 if (is_array($TYPO3_CONF_VARS[
'EXTCONF'][
'indexed_search'][
'external_parsers'])) {
493 foreach ($TYPO3_CONF_VARS[
'EXTCONF'][
'indexed_search'][
'external_parsers'] as $extension => $_objRef) {
495 $this->external_parsers[$extension]->pObj = $this;
497 if (!$this->external_parsers[$extension]->initParser($extension)) {
498 unset($this->external_parsers[$extension]);
516 $check = $this->
checkMtimeTstamp($this->conf[
'mtime'], $this->hash[
'phash']);
518 if ($check > 0 || !$is_grlist || $this->forceIndexing) {
520 if ($this->forceIndexing) {
522 } elseif ($check > 0) {
528 $this->
log_push(
'Split content',
'');
530 if ($this->conf[
'indexedDocTitle']) {
531 $this->contentParts[
'title'] = $this->conf[
'indexedDocTitle'];
540 if (!is_array($checkCHash) || $check === 1) {
542 $this->
log_push(
'Converting charset of content (' . $this->conf[
'metaCharset'] .
') to utf-8',
'');
546 $this->
log_push(
'Extract words from content',
'');
550 $this->
log_push(
'Analyse the extracted words',
'');
554 $this->
log_push(
'Submitting page',
'');
558 $this->
log_push(
'Check word list and submit words',
'');
559 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_words')) {
561 $this->
submitWords($indexArr, $this->hash[
'phash']);
567 $this->
log_push(
'Checking external files',
'');
568 if ($this->conf[
'index_externals']) {
574 $this->
updateTstamp($this->hash[
'phash'], $this->conf[
'mtime']);
577 $this->
update_grlist($checkCHash[
'phash'], $this->hash[
'phash']);
579 $this->
log_setTSlogMessage(
'Indexing not needed, the contentHash, ' . $this->content_md5h .
', has not changed. Timestamp, grlist and rootline updated if necessary.');
597 $contentArr[
'body'] = stristr($content,
'<body');
598 $headPart = substr($content, 0, -strlen($contentArr[
'body']));
600 $this->
embracingTags($headPart,
'TITLE', $contentArr[
'title'], $dummy2, $dummy);
601 $titleParts = explode(
':', $contentArr[
'title'], 2);
602 $contentArr[
'title'] = trim(isset($titleParts[1]) ? $titleParts[1] : $titleParts[0]);
604 if ($this->conf[
'index_metatags']) {
607 while ($this->
embracingTags($headPart,
'meta', $dummy, $headPart, $meta[$i])) {
611 for ($i = 0; isset($meta[$i]); $i++) {
613 if (stristr($meta[$i][
'name'],
'keywords')) {
616 if (stristr($meta[$i][
'name'],
'description')) {
617 $contentArr[
'description'] .=
',' . $meta[$i][
'content'];
624 $tagList = explode(
',', $this->excludeSections);
625 foreach ($tagList as $tag) {
626 while ($this->
embracingTags($contentArr[
'body'], $tag, $dummy, $contentArr[
'body'], $dummy2)) {
631 $contentArr[
'body'] = str_replace(
'<',
' <', $contentArr[
'body']);
632 $contentArr[
'body'] = trim(strip_tags($contentArr[
'body']));
633 $contentArr[
'keywords'] = trim($contentArr[
'keywords']);
634 $contentArr[
'description'] = trim($contentArr[
'description']);
647 if (preg_match(
'/<meta[[:space:]]+[^>]*http-equiv[[:space:]]*=[[:space:]]*["\']CONTENT-TYPE["\'][^>]*>/i', $content, $reg)) {
648 if (preg_match(
'/charset[[:space:]]*=[[:space:]]*([[:alnum:]-]+)/i', $reg[0], $reg2)) {
665 $charset = $this->csObj->parse_charset($charset);
667 if ($charset && $charset !==
'utf-8') {
668 $content = $this->csObj->utf8_encode($content, $charset);
671 $content = $this->csObj->entities_to_utf8($content, TRUE);
688 public function embracingTags($string, $tagName, &$tagContent, &$stringAfter, &$paramList) {
689 $endTag =
'</' . $tagName .
'>';
690 $startTag =
'<' . $tagName;
692 $isTagInText = stristr($string, $startTag);
697 list($paramList, $isTagInText) = explode(
'>', substr($isTagInText, strlen($startTag)), 2);
698 $afterTagInText = stristr($isTagInText, $endTag);
699 if ($afterTagInText) {
700 $stringBefore = substr($string, 0, strpos(strtolower($string), strtolower($startTag)));
701 $tagContent = substr($isTagInText, 0, strlen($isTagInText) - strlen($afterTagInText));
702 $stringAfter = $stringBefore . substr($afterTagInText, strlen($endTag));
705 $stringAfter = $isTagInText;
718 $expBody = preg_split(
'/\\<\\!\\-\\-[\\s]?TYPO3SEARCH_/', $body);
719 if (count($expBody) > 1) {
721 foreach ($expBody as $val) {
722 $part = explode(
'-->', $val, 2);
723 if (trim($part[0]) ==
'begin') {
726 } elseif (trim($part[0]) ==
'end') {
748 if ($this->indexerConfig[
'useCrawlerForExternalFiles'] && \
TYPO3\CMS\Core\Utility\ExtensionManagementUtility::isLoaded(
'crawler')) {
753 foreach ($list as $linkInfo) {
755 if ($linkInfo[
'localPath']) {
757 $linkSource = htmlspecialchars_decode($linkInfo[
'localPath']);
759 $linkSource = htmlspecialchars_decode($linkInfo[
'href']);
762 $qParts = parse_url($linkSource);
764 if ($qParts[
'query'] && strstr($qParts[
'query'],
'jumpurl=')) {
765 parse_str($qParts[
'query'], $getP);
766 $linkSource = $getP[
'jumpurl'];
767 $qParts = parse_url($linkSource);
769 if (!$linkInfo[
'localPath'] && $qParts[
'scheme']) {
770 if ($this->indexerConfig[
'indexExternalURLs']) {
774 } elseif (!$qParts[
'query']) {
775 $linkSource = urldecode($linkSource);
777 $localFile = $linkSource;
781 if ($localFile && @is_file($localFile)) {
783 if ($linkInfo[
'localPath']) {
784 $fI = pathinfo($linkSource);
785 $ext = strtolower($fI[
'extension']);
786 if (is_object($crawler)) {
788 'document' => $linkSource,
789 'alturl' => $linkInfo[
'href'],
790 'conf' => $this->conf
792 unset($params[
'conf'][
'content']);
793 $crawler->addQueueEntry_callBack(0, $params,
'&TYPO3\\CMS\\IndexedSearch\\Hook\\CrawlerFilesHook', $this->conf[
'id']);
794 $this->
log_setTSlogMessage(
'media "' . $params[
'document'] .
'" added to "crawler" queue.', 1);
799 if (is_object($crawler)) {
801 'document' => $linkSource,
802 'conf' => $this->conf
804 unset($params[
'conf'][
'content']);
805 $crawler->addQueueEntry_callBack(0, $params,
'&TYPO3\\CMS\\IndexedSearch\\Hook\\CrawlerFilesHook', $this->conf[
'id']);
806 $this->
log_setTSlogMessage(
'media "' . $params[
'document'] .
'" added to "crawler" queue.', 1);
826 $htmlParts = $htmlParser->splitTags(
'a', $html);
827 $hyperLinksData = array();
828 foreach ($htmlParts as $index => $tagData) {
829 if ($index % 2 !== 0) {
830 $tagAttributes = $htmlParser->get_tag_attributes($tagData, TRUE);
831 $firstTagName = $htmlParser->getFirstTagName($tagData);
832 if (strtolower($firstTagName) ==
'a') {
833 if ($tagAttributes[0][
'href'] && $tagAttributes[0][
'href'][0] !=
'#') {
834 $hyperLinksData[] = array(
836 'href' => $tagAttributes[0][
'href'],
843 return $hyperLinksData;
855 $htmlParts = $htmlParser->splitTags(
'base', $html);
856 foreach ($htmlParts as $index => $tagData) {
857 if ($index % 2 !== 0) {
858 $tagAttributes = $htmlParser->get_tag_attributes($tagData, TRUE);
859 $firstTagName = $htmlParser->getFirstTagName($tagData);
860 if (strtolower($firstTagName) ==
'base') {
861 $href = $tagAttributes[0][
'href'];
886 $qParts = parse_url($externalUrl);
887 $fI = pathinfo($qParts[
'path']);
888 $ext = strtolower($fI[
'extension']);
891 if (stristr($urlHeaders[
'Content-Type'],
'text/html')) {
893 if (strlen($content)) {
918 if (strlen($content)) {
922 foreach ($headers as $line) {
923 if (!strlen(trim($line))) {
926 list($headKey, $headValue) = explode(
':', $line, 2);
927 $retVal[$headKey] = $headValue;
941 static $pathFunctions = array(
942 'createLocalPathFromT3vars',
943 'createLocalPathUsingAbsRefPrefix',
944 'createLocalPathUsingDomainURL',
945 'createLocalPathFromAbsoluteURL',
946 'createLocalPathFromRelativeURL' 948 foreach ($pathFunctions as $functionName) {
949 $localPath = $this->{$functionName}($sourcePath);
950 if ($localPath !=
'') {
967 $indexLocalFiles =
$GLOBALS[
'T3_VAR'][
'ext'][
'indexed_search'][
'indexLocalFiles'];
968 if (is_array($indexLocalFiles)) {
973 if (isset($indexLocalFiles[$md5]) && is_file($indexLocalFiles[$md5])) {
974 $localPath = $indexLocalFiles[$md5];
989 $baseURLLength = strlen($baseURL);
990 if (substr($sourcePath, 0, $baseURLLength) == $baseURL) {
991 $sourcePath = substr($sourcePath, $baseURLLength);
992 $localPath = PATH_site . $sourcePath;
993 if (!self::isAllowedLocalFile($localPath)) {
1009 if (
$GLOBALS[
'TSFE'] instanceof \
TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController) {
1010 $absRefPrefix =
$GLOBALS[
'TSFE']->config[
'config'][
'absRefPrefix'];
1011 $absRefPrefixLength = strlen($absRefPrefix);
1012 if ($absRefPrefixLength > 0 && substr($sourcePath, 0, $absRefPrefixLength) == $absRefPrefix) {
1013 $sourcePath = substr($sourcePath, $absRefPrefixLength);
1014 $localPath = PATH_site . $sourcePath;
1015 if (!self::isAllowedLocalFile($localPath)) {
1032 if ($sourcePath[0] ==
'/') {
1033 $sourcePath = substr($sourcePath, 1);
1034 $localPath = PATH_site . $sourcePath;
1035 if (!self::isAllowedLocalFile($localPath)) {
1050 if (self::isRelativeURL($sourcePath)) {
1051 $localPath = PATH_site . $sourcePath;
1052 if (!self::isAllowedLocalFile($localPath)) {
1066 $urlParts = @parse_url($url);
1067 return $urlParts[
'scheme'] ==
'' && $urlParts[
'path'][0] !=
'/';
1078 $insideWebPath = substr($filePath, 0, strlen(PATH_site)) == PATH_site;
1079 $isFile = is_file($filePath);
1080 return $insideWebPath && $isFile;
1100 $fI = pathinfo($file);
1101 $ext = $altExtension ?: strtolower($fI[
'extension']);
1103 if (!$contentTmpFile) {
1113 $absFile = $contentTmpFile;
1116 if ($absFile && @is_file($absFile)) {
1117 if ($this->external_parsers[$ext]) {
1118 $fileInfo = stat($absFile);
1120 foreach ($cParts as $cPKey) {
1121 $this->internal_log = array();
1122 $this->
log_push(
'Index: ' . str_replace(
'.',
'_', basename($file)) . ($cPKey ?
'#' . $cPKey :
''),
'');
1124 $subinfo = array(
'key' => $cPKey);
1126 $phash_arr = ($this->file_phash_arr = $this->
setExtHashes($file, $subinfo));
1128 if ($check > 0 || $force) {
1135 if ($this->externalFileCounter < $this->maxExternalFiles || $force) {
1137 $this->
log_push(
'Split content',
'');
1145 $this->externalFileCounter++;
1147 $this->
log_push(
'Extract words from content',
'');
1151 $this->
log_push(
'Analyse the extracted words',
'');
1155 $this->
log_push(
'Submitting page',
'');
1160 $this->
log_push(
'Check word list and submit words',
'');
1161 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_words')) {
1163 $this->
submitWords($indexArr, $phash_arr[
'phash']);
1170 $this->
updateTstamp($phash_arr[
'phash'], $fileInfo[
'mtime']);
1177 $this->
log_setTSlogMessage(
'The limit of ' . $this->maxExternalFiles .
' has already been exceeded, so no indexing will take place this time.');
1188 $this->
log_setTSlogMessage(
'Indexing not possible; The extension "' . $ext .
'" was not supported.');
1191 $this->
log_setTSlogMessage(
'Indexing not possible; File "' . $absFile .
'" not found or valid.');
1206 $contentArray = NULL;
1208 if (is_object($this->external_parsers[$fileExtension])) {
1209 $contentArray = $this->external_parsers[$fileExtension]->readFileContent($fileExtension, $absoluteFileName, $sectionPointer);
1211 return $contentArray;
1225 if (is_object($this->external_parsers[$ext])) {
1226 $cParts = $this->external_parsers[$ext]->fileContentParts($ext, $absFile);
1241 $contentArr[
'body'] = $content;
1260 foreach ($contentArr as $key => $value) {
1261 if (strlen($contentArr[$key])) {
1262 if ($charset !==
'utf-8') {
1263 $contentArr[$key] = $this->csObj->utf8_encode($contentArr[$key], $charset);
1266 $contentArr[$key] = $this->csObj->entities_to_utf8($contentArr[$key], TRUE);
1280 foreach ($contentArr as $key => $value) {
1281 $contentArr[$key] = $this->lexerObj->split2Words($contentArr[$key]);
1284 $contentArr[
'title'] = array_unique($contentArr[
'title']);
1285 $contentArr[
'keywords'] = array_unique($contentArr[
'keywords']);
1286 $contentArr[
'description'] = array_unique($contentArr[
'description']);
1302 $bodyDescription = str_replace(array(
' ', TAB, CR, LF),
' ', $contentArr[
'body']);
1304 $bodyDescription = $this->csObj->strtrunc(
'utf-8', $bodyDescription, $maxL);
1306 return $bodyDescription;
1317 $indexArr = array();
1337 foreach ($content[$key] as $val) {
1338 $val = substr($val, 0, 60);
1340 if (!isset($retArr[$val])) {
1344 $metaphone = $this->enableMetaphoneSearch ? substr($this->
metaphone($val, $this->storeMetaphoneInfoAsWords), 0, 60) :
'';
1345 $retArr[$val][
'metaphone'] = $metaphone;
1348 if ($this->storeMetaphoneInfoAsWords) {
1349 $this->metaphoneContent .=
' ' . $retArr[$val][
'metaphone'];
1352 $retArr[$val][
'cmp'] = $retArr[$val][
'cmp'] | pow(2, $offset);
1354 $retArr[$val][
'count']++;
1368 foreach ($content[
'body'] as $key => $val) {
1369 $val = substr($val, 0, 60);
1371 if (!isset($retArr[$val])) {
1373 $retArr[$val][
'first'] = $key;
1377 $metaphone = $this->enableMetaphoneSearch ? substr($this->
metaphone($val, $this->storeMetaphoneInfoAsWords), 0, 60) :
'';
1378 $retArr[$val][
'metaphone'] = $metaphone;
1381 if ($this->storeMetaphoneInfoAsWords) {
1382 $this->metaphoneContent .=
' ' . $retArr[$val][
'metaphone'];
1385 $retArr[$val][
'count']++;
1398 public function metaphone($word, $returnRawMetaphoneValue = FALSE) {
1399 if (is_object($this->metaphoneObj)) {
1400 $metaphoneRawValue = $this->metaphoneObj->metaphone($word, $this->conf[
'sys_language_uid']);
1405 if ($returnRawMetaphoneValue) {
1407 } elseif (strlen($metaphoneRawValue)) {
1432 'phash' => $this->hash[
'phash'],
1433 'phash_grouping' => $this->hash[
'phash_grouping'],
1434 'cHashParams' => serialize($this->cHashParams),
1435 'contentHash' => $this->content_md5h,
1436 'data_page_id' => $this->conf[
'id'],
1437 'data_page_reg1' => $this->conf[
'page_cache_reg1'],
1438 'data_page_type' => $this->conf[
'type'],
1439 'data_page_mp' => $this->conf[
'MP'],
1440 'gr_list' => $this->conf[
'gr_list'],
1443 'item_title' => $this->contentParts[
'title'],
1445 'item_mtime' => (
int) $this->conf[
'mtime'],
1446 'item_size' => strlen($this->conf[
'content']),
1449 'item_crdate' => $this->conf[
'crdate'],
1451 'sys_language_uid' => $this->conf[
'sys_language_uid'],
1454 'recordUid' => (
int)$this->conf[
'recordUid'],
1455 'freeIndexUid' => (
int)$this->conf[
'freeIndexUid'],
1456 'freeIndexSetId' => (
int)$this->conf[
'freeIndexSetId']
1458 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_phash')) {
1459 $GLOBALS[
'TYPO3_DB']->exec_INSERTquery(
'index_phash', $fields);
1462 $this->
submit_section($this->hash[
'phash'], $this->hash[
'phash']);
1464 $this->
submit_grlist($this->hash[
'phash'], $this->hash[
'phash']);
1467 'phash' => $this->hash[
'phash'],
1468 'fulltextdata' => implode(
' ', $this->contentParts),
1469 'metaphonedata' => $this->metaphoneContent
1471 if ($this->indexerConfig[
'fullTextDataLength'] > 0) {
1472 $fields[
'fulltextdata'] = substr($fields[
'fulltextdata'], 0, $this->indexerConfig[
'fullTextDataLength']);
1474 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_fulltext')) {
1475 $GLOBALS[
'TYPO3_DB']->exec_INSERTquery(
'index_fulltext', $fields);
1478 if ($this->indexerConfig[
'debugMode']) {
1480 'phash' => $this->hash[
'phash'],
1481 'debuginfo' => serialize(array(
1482 'cHashParams' => $this->cHashParams,
1483 'external_parsers initialized' => array_keys($this->external_parsers),
1484 'conf' => array_merge($this->conf, array(
'content' => substr($this->conf[
'content'], 0, 1000))),
1485 'contentParts' => array_merge($this->contentParts, array(
'body' => substr($this->contentParts[
'body'], 0, 1000))),
1486 'logs' => $this->internal_log,
1487 'lexer' => $this->lexerObj->debugString
1490 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_debug')) {
1491 $GLOBALS[
'TYPO3_DB']->exec_INSERTquery(
'index_debug', $fields);
1509 'phash_x' => $phash_x,
1510 'hash_gr_list' => \
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::md5inthash($this->conf[
'gr_list']),
1511 'gr_list' => $this->conf[
'gr_list']
1513 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_grlist')) {
1514 $GLOBALS[
'TYPO3_DB']->exec_INSERTquery(
'index_grlist', $fields);
1530 'phash_t3' => $hash_t3,
1531 'page_id' => (
int)$this->conf[
'id']
1534 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_section')) {
1535 $GLOBALS[
'TYPO3_DB']->exec_INSERTquery(
'index_section', $fields);
1548 $tableArray = explode(
',',
'index_phash,index_section,index_grlist,index_fulltext,index_debug');
1549 foreach ($tableArray as $table) {
1550 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed($table)) {
1551 $GLOBALS[
'TYPO3_DB']->exec_DELETEquery($table,
'phash=' . (
int)$phash);
1555 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_section')) {
1556 $GLOBALS[
'TYPO3_DB']->exec_DELETEquery(
'index_section',
'phash_t3=' . (
int)$phash);
1582 $storeItemType = $this->external_parsers[$ext]->ext2itemtype_map[$ext];
1583 $storeItemType = $storeItemType ?: $ext;
1587 $fileParts = parse_url($file);
1590 'phash' =>
$hash[
'phash'],
1591 'phash_grouping' =>
$hash[
'phash_grouping'],
1592 'cHashParams' => serialize($subinfo),
1594 'data_filename' => $file,
1595 'item_type' => $storeItemType,
1596 'item_title' => trim(
$contentParts[
'title']) ?: basename($file),
1598 'item_mtime' => $mtime,
1599 'item_size' => $size,
1600 'item_crdate' => $ctime,
1603 'gr_list' => $this->conf[
'gr_list'],
1604 'externalUrl' => $fileParts[
'scheme'] ? 1 : 0,
1605 'recordUid' => (
int)$this->conf[
'recordUid'],
1606 'freeIndexUid' => (
int)$this->conf[
'freeIndexUid'],
1607 'freeIndexSetId' => (
int)$this->conf[
'freeIndexSetId'],
1608 'sys_language_uid' => (
int)$this->conf[
'sys_language_uid']
1610 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_phash')) {
1611 $GLOBALS[
'TYPO3_DB']->exec_INSERTquery(
'index_phash', $fields);
1615 'phash' =>
$hash[
'phash'],
1617 'metaphonedata' => $this->metaphoneContent
1619 if ($this->indexerConfig[
'fullTextDataLength'] > 0) {
1620 $fields[
'fulltextdata'] = substr($fields[
'fulltextdata'], 0, $this->indexerConfig[
'fullTextDataLength']);
1622 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_fulltext')) {
1623 $GLOBALS[
'TYPO3_DB']->exec_INSERTquery(
'index_fulltext', $fields);
1626 if ($this->indexerConfig[
'debugMode']) {
1628 'phash' =>
$hash[
'phash'],
1629 'debuginfo' => serialize(array(
1630 'cHashParams' => $subinfo,
1632 'logs' => $this->internal_log,
1633 'lexer' => $this->lexerObj->debugString
1636 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_debug')) {
1637 $GLOBALS[
'TYPO3_DB']->exec_INSERTquery(
'index_debug', $fields);
1651 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_grlist')) {
1652 $count =
$GLOBALS[
'TYPO3_DB']->exec_SELECTcountRows(
'phash',
'index_grlist',
'phash=' . (
int)
$hash .
' AND (hash_gr_list=' . \
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::md5inthash($this->defaultGrList) .
' OR hash_gr_list=' . \
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::md5inthash($this->conf[
'gr_list']) .
')');
1668 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_section')) {
1669 $count =
$GLOBALS[
'TYPO3_DB']->exec_SELECTcountRows(
'phash',
'index_section',
'phash=' . (
int)
$hash .
' AND page_id=' . (
int)$this->conf[
'id']);
1685 $tableArray = explode(
',',
'index_phash,index_grlist,index_fulltext,index_debug');
1686 foreach ($tableArray as $table) {
1687 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed($table)) {
1688 $GLOBALS[
'TYPO3_DB']->exec_DELETEquery($table,
'phash=' . (
int)$phash);
1708 if (!\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_phash')) {
1712 $row =
$GLOBALS[
'TYPO3_DB']->exec_SELECTgetSingleRow(
'item_mtime,tstamp',
'index_phash',
'phash=' . (
int)$phash);
1715 if ($this->tstamp_maxAge && $row[
'tstamp'] + $this->tstamp_maxAge <
$GLOBALS[
'EXEC_TIME']) {
1720 if (!$this->tstamp_minAge || $row[
'tstamp'] + $this->tstamp_minAge <
$GLOBALS[
'EXEC_TIME']) {
1724 if ($row[
'item_mtime'] != $mtime) {
1731 if ($this->tstamp_maxAge) {
1732 $this->
log_setTSlogMessage(
'mtime matched, timestamp NOT updated because a maxAge is set (' . ($row[
'tstamp'] + $this->tstamp_maxAge -
$GLOBALS[
'EXEC_TIME']) .
' seconds to expire time).', 1);
1764 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_phash')) {
1765 $row =
$GLOBALS[
'TYPO3_DB']->exec_SELECTgetSingleRow(
'phash',
'index_phash',
'phash_grouping=' . (
int)$this->hash[
'phash_grouping'] .
' AND contentHash=' . (
int)$this->content_md5h);
1784 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_phash')) {
1785 $count =
$GLOBALS[
'TYPO3_DB']->exec_SELECTcountRows(
'*',
'index_phash',
'phash_grouping=' . (
int)$hashGr .
' AND contentHash=' . (
int)
$content_md5h);
1800 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_grlist')) {
1801 $count =
$GLOBALS[
'TYPO3_DB']->exec_SELECTcountRows(
'phash_x',
'index_grlist',
'phash_x=' . (
int)$phash_x);
1817 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_grlist')) {
1818 $count =
$GLOBALS[
'TYPO3_DB']->exec_SELECTcountRows(
'phash',
'index_grlist',
'phash=' . (
int)$phash .
' AND hash_gr_list=' . \
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::md5inthash($this->conf[
'gr_list']));
1821 $this->
log_setTSlogMessage(
'Inserted gr_list \'' . $this->conf[
'gr_list'] .
'\' for phash \
'' . $phash .
'\'', 1);
1835 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_phash')) {
1836 $updateFields = array(
1840 $updateFields[
'item_mtime'] = (int)$mtime;
1842 $GLOBALS[
'TYPO3_DB']->exec_UPDATEquery(
'index_phash',
'phash=' . (
int)$phash, $updateFields);
1854 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_phash')) {
1855 $updateFields = array(
1856 'freeIndexSetId' => (
int)$this->conf[
'freeIndexSetId']
1858 $GLOBALS[
'TYPO3_DB']->exec_UPDATEquery(
'index_phash',
'phash=' . (
int)$phash, $updateFields);
1871 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_phash')) {
1872 $updateFields = array(
1873 'parsetime' => (
int)$parsetime
1875 $GLOBALS[
'TYPO3_DB']->exec_UPDATEquery(
'index_phash',
'phash=' . (
int)$phash, $updateFields);
1886 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_section')) {
1887 $updateFields = array();
1889 $GLOBALS[
'TYPO3_DB']->exec_UPDATEquery(
'index_section',
'page_id=' . (
int)$this->conf[
'id'], $updateFields);
1902 $fieldArray[
'rl0'] = (int)$this->conf[
'rootline_uids'][0];
1903 $fieldArray[
'rl1'] = (int)$this->conf[
'rootline_uids'][1];
1904 $fieldArray[
'rl2'] = (int)$this->conf[
'rootline_uids'][2];
1905 if (is_array(
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'addRootLineFields'])) {
1906 foreach (
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'addRootLineFields'] as $fieldName => $rootLineLevel) {
1907 $fieldArray[$fieldName] = (int)$this->conf[
'rootline_uids'][$rootLineLevel];
1920 if (\
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_phash') && \
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed(
'index_grlist')) {
1921 $res =
$GLOBALS[
'TYPO3_DB']->exec_SELECTquery(
'A.phash',
'index_phash A,index_grlist B',
' 1923 AND A.phash_grouping=' . (
int)$this->hash[
'phash_grouping'] .
' 1924 AND B.hash_gr_list<>' . \
TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::md5inthash($this->defaultGrList) .
' 1925 AND A.contentHash=' . (
int)$this->content_md5h);
1926 while ($res && FALSE !== ($row =
$GLOBALS[
'TYPO3_DB']->sql_fetch_assoc($res))) {
1927 $this->
log_setTSlogMessage(
'The currently indexed page was indexed under no user-login and apparently this page has been indexed under login conditions earlier, but with the SAME content. Therefore the old similar page with phash=\'' . $row[
'phash'] .
'\' are now removed.
', 1); 1928 $this->removeOldIndexedPages($row['phash
']); 1930 $GLOBALS['TYPO3_DB
']->sql_free_result($res); 1940 public function includeCrawlerClass() { 1941 GeneralUtility::requireOnce(\TYPO3\CMS\Core\Utility\ExtensionManagementUtility::extPath('crawler
') . 'class.tx_crawler_lib.php
'); 1944 /******************************** 1946 * SQL; Submitting words 1948 *******************************/ 1956 public function checkWordList($wordListArray) { 1957 if (\TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed('index_words
')) { 1958 if (count($wordListArray)) { 1959 $phashArray = array(); 1960 foreach ($wordListArray as $value) { 1961 $phashArray[] = (int)$value['hash
']; 1963 $cwl = implode(',
', $phashArray); 1964 $count = $GLOBALS['TYPO3_DB
']->exec_SELECTcountRows('baseword
', 'index_words
', 'wid IN (
' . $cwl . ')
'); 1965 if ($count != count($wordListArray)) { 1966 $res = $GLOBALS['TYPO3_DB
']->exec_SELECTquery('baseword
', 'index_words
', 'wid IN (
' . $cwl . ')
'); 1967 $this->log_setTSlogMessage('Inserting words:
' . (count($wordListArray) - $count), 1); 1968 while (FALSE != ($row = $GLOBALS['TYPO3_DB
']->sql_fetch_assoc($res))) { 1969 unset($wordListArray[$row['baseword
']]); 1971 $GLOBALS['TYPO3_DB
']->sql_free_result($res); 1972 foreach ($wordListArray as $key => $val) { 1973 $insertFields = array( 1974 'wid
' => $val['hash
'], 1978 // A duplicate-key error will occur here if a word is NOT unset in the unset() line. However as long as the words in $wl are NOT longer as 60 chars (the baseword varchar is 60 characters...) this is not a problem. 1979 $GLOBALS['TYPO3_DB
']->exec_INSERTquery('index_words
', $insertFields); 1994 public function submitWords($wordList, $phash) { 1995 if (\TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed('index_rel
')) { 1996 $GLOBALS['TYPO3_DB
']->exec_DELETEquery('index_rel
', 'phash=
' . (int)$phash); 1997 foreach ($wordList as $val) { 1998 $insertFields = array( 1999 'phash
' => (int)$phash, 2000 'wid
' => (int)$val['hash
'], 2001 'count
' => (int)$val['count
'], 2002 'first
' => (int)$val['first
'], 2003 'freq
' => $this->freqMap($val['count
'] / $this->wordcount), 2004 'flags
' => $val['cmp
'] & $this->flagBitMask 2006 $GLOBALS['TYPO3_DB
']->exec_INSERTquery('index_rel
', $insertFields); 2019 public function freqMap($freq) { 2020 $mapFactor = $this->freqMax * 100 * $this->freqRange; 2022 $newFreq = $freq * $mapFactor; 2023 $newFreq = $newFreq > $this->freqRange ? $this->freqRange : $newFreq; 2025 $newFreq = $freq / $mapFactor; 2030 /******************************** 2034 *******************************/ 2041 public function setT3Hashes() { 2044 'id' => (int)$this->conf['id'], 2045 'type
' => (int)$this->conf['type
'], 2046 'sys_lang
' => (int)$this->conf['sys_language_uid
'], 2047 'MP
' => (string) $this->conf['MP
'], 2048 'cHash
' => $this->cHashParams 2050 // Set grouping hash (Identifies a "page" combined of id, type, language, mountpoint and cHash parameters): 2051 $this->hash['phash_grouping
'] = \TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::md5inthash(serialize($hArray)); 2052 // Add gr_list and set plain phash (Subdivision where special page composition based on login is taken into account as well. It is expected that such pages are normally similar regardless of the login.) 2053 $hArray['gr_list
'] = (string) $this->conf['gr_list
']; 2054 $this->hash['phash
'] = \TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::md5inthash(serialize($hArray)); 2065 public function setExtHashes($file, $subinfo = array()) { 2071 // Set grouping hash: 2072 $hash['phash_grouping
'] = \TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::md5inthash(serialize($hArray)); 2074 $hArray['subinfo
'] = $subinfo; 2075 $hash['phash
'] = \TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::md5inthash(serialize($hArray)); 2079 /********************************* 2081 * Internal logging functions 2083 *********************************/ 2092 public function log_push($msg, $key) { 2093 if (is_object($GLOBALS['TT
'])) { 2094 $GLOBALS['TT
']->push($msg, $key); 2104 public function log_pull() { 2105 if (is_object($GLOBALS['TT
'])) { 2106 $GLOBALS['TT
']->pull(); 2118 public function log_setTSlogMessage($msg, $errorNum = 0) { 2119 if (is_object($GLOBALS['TT
'])) { 2120 $GLOBALS['TT
']->setTSlogMessage($msg, $errorNum); 2122 $this->internal_log[] = $msg; 2125 /************************** 2127 * \TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController hooks: 2129 **************************/ 2138 protected function addSpacesToKeywordList($keywordList) { 2139 $keywords = GeneralUtility::trimExplode(',
', $keywordList); 2140 return ' ' . implode(',
', $keywords) . ' '; backend_initIndexer($id, $type, $sys_language_uid, $MP, $uidRL, $cHash_array=array(), $createCHash=FALSE)
analyzeHeaderinfo(&$retArr, $content, $key, $offset)
hook_indexContent(&$pObj)
submitFilePage($hash, $file, $subinfo, $ext, $mtime, $ctime, $size, $content_md5h, $contentParts)
submit_grlist($hash, $phash_x)
$TYPO3_CONF_VARS['SYS']['contentTable']
splitRegularContent($content)
static isAllowedAbsPath($path)
backend_indexAsTYPO3Page($title, $keywords, $description, $content, $charset, $mtime, $crdate=0, $recordUid=0)
static writeFile($file, $content, $changePermissions=FALSE)
bodyDescription($contentArr)
metaphone($word, $returnRawMetaphoneValue=FALSE)
static isAllowedLocalFile($filePath)
checkExternalDocContentHash($hashGr, $content_md5h)
convertHTMLToUtf8($content, $charset='')
static forceIntegerInRange($theInt, $min, $max=2000000000, $defaultValue=0)
static md5inthash($stringToHash)
update_grlist($phash, $phash_x)
static isRelativeURL($url)
$storeMetaphoneInfoAsWords
static getIndpEnv($getEnvName)
static isTableUsed($tableName)
embracingTags($string, $tagName, &$tagContent, &$stringAfter, &$paramList)
indexRegularDocument($file, $force=FALSE, $contentTmpFile='', $altExtension='')
static getUserObj($classRef, $checkPrefix='', $silent=FALSE)
createLocalPathFromRelativeURL($sourcePath)
createLocalPathFromT3vars($sourcePath)
removeLoginpagesWithContentHash()
updateTstamp($phash, $mtime=0)
getRootLineFields(array &$fieldArray)
static makeInstance($className)
static shortMD5($input, $len=10)
static trimExplode($delim, $string, $removeEmptyValues=FALSE, $limit=0)
addSpacesToKeywordList($keywordList)
submitWords($wordList, $phash)
fileContentParts($ext, $absFile)
removeOldIndexedPages($phash)
setExtHashes($file, $subinfo=array())
submitFile_section($hash)
static get_tag_attributes($tag)
updateParsetime($phash, $parsetime)
readFileContent($fileExtension, $absoluteFileName, $sectionPointer)
if($list_of_literals) if(!empty($literals)) if(!empty($literals)) $result
Analyse literals to prepend the N char to them if their contents aren't numeric.
$indexExternalUrl_content
static getUrl($url, $includeHeader=0, $requestHeaders=FALSE, &$report=NULL)
static tempnam($filePrefix, $fileSuffix='')
analyzeBody(&$retArr, $content)
createLocalPathFromAbsoluteURL($sourcePath)
static implodeArrayForUrl($name, array $theArray, $str='', $skipBlank=FALSE, $rawurlencodeParamName=FALSE)
processWordsInArrays($contentArr)
createLocalPathUsingDomainURL($sourcePath)
charsetEntity2utf8(&$contentArr, $charset)
initializeExternalParsers()
if(!defined('TYPO3_MODE')) $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_userauth.php']['logoff_pre_processing'][]
createLocalPath($sourcePath)
backend_setFreeIndexUid($freeIndexUid, $freeIndexSetId=0)
static getFileAbsFileName($filename, $onlyRelative=TRUE, $relToTYPO3_mainDir=FALSE)
checkMtimeTstamp($mtime, $phash)
submit_section($hash, $hash_t3)
static resolveBackPath($pathStr)
splitHTMLContent($content)
removeOldIndexedFiles($phash)
log_setTSlogMessage($msg, $errorNum=0)
indexExternalUrl($externalUrl)
checkWordList($wordListArray)
createLocalPathUsingAbsRefPrefix($sourcePath)