60 $GLOBALS[
'LANG'] = GeneralUtility::makeInstance(\
TYPO3\CMS\Core\Localization\LanguageService::class);
75 $connection = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(
'index_config');
76 $queryBuilder = $connection->createQueryBuilder();
78 $result = $queryBuilder->select(
'*')
79 ->from(
'index_config')
81 $queryBuilder->expr()->lt(
82 'timer_next_indexing',
83 $queryBuilder->createNamedParameter(
$GLOBALS[
'EXEC_TIME'], \PDO::PARAM_INT)
85 $queryBuilder->expr()->eq(
'set_id', $queryBuilder->createNamedParameter(0, \PDO::PARAM_INT))
90 while ($cfgRec = $result->fetch()) {
92 $setId = GeneralUtility::md5int(microtime());
100 'timer_next_indexing' => $nextTime,
104 'uid' => (
int)$cfgRec[
'uid']
108 switch ($cfgRec[
'type']) {
113 'indexConfigUid' => $cfgRec[
'uid'],
114 'procInstructions' => [
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'],
115 'url' =>
'Records (start)'
118 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec[
'pid']);
124 'indexConfigUid' => $cfgRec[
'uid'],
126 'procInstructions' => [
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'],
128 'url' => $cfgRec[
'filepath'],
132 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec[
'pid']);
138 'indexConfigUid' => $cfgRec[
'uid'],
140 'procInstructions' => [
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'],
142 'url' => $cfgRec[
'externalUrl'],
146 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec[
'pid']);
152 'indexConfigUid' => $cfgRec[
'uid'],
154 'procInstructions' => [
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'],
156 'url' => (int)$cfgRec[
'alternative_source_pid'],
160 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec[
'pid']);
167 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'crawler'][$cfgRec[
'type']]) {
168 $hookObj = GeneralUtility::makeInstance(
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'crawler'][$cfgRec[
'type']]);
171 'indexConfigUid' => $cfgRec[
'uid'],
173 'procInstructions' => [
'[Index Cfg UID#' . $cfgRec[
'uid'] .
'/CUSTOM]'],
175 'url' => $hookObj->initMessage($message)
177 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec[
'pid']);
195 if ($params[
'indexConfigUid']) {
196 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
197 ->getQueryBuilderForTable(
'index_config');
198 $queryBuilder->getRestrictions()->removeAll();
200 $cfgRec = $queryBuilder
202 ->from(
'index_config')
204 $queryBuilder->expr()->eq(
206 $queryBuilder->createNamedParameter($params[
'indexConfigUid'], \PDO::PARAM_INT)
211 if (is_array($cfgRec)) {
213 $session_data = unserialize($cfgRec[
'session_data']);
215 switch ($cfgRec[
'type']) {
237 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'crawler'][$cfgRec[
'type']]) {
238 $hookObj = GeneralUtility::makeInstance(
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'crawler'][$cfgRec[
'type']]);
241 $hookObj->indexOperation($cfgRec, $session_data, $params, $this);
245 GeneralUtility::makeInstance(ConnectionPool::class)
246 ->getConnectionForTable(
'index_config')
249 [
'session_data' => serialize($session_data)],
250 [
'uid' => (
int)$cfgRec[
'uid']]
254 return [
'log' => $params];
267 if ($cfgRec[
'table2index'] && isset(
$GLOBALS[
'TCA'][$cfgRec[
'table2index']])) {
269 if (!is_array($session_data)) {
275 $pid = (int)$cfgRec[
'alternative_source_pid'] ?: $cfgRec[
'pid'];
276 $numberOfRecords = $cfgRec[
'recordsbatch']
283 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
284 ->getQueryBuilderForTable($cfgRec[
'table2index']);
286 $baseQueryBuilder = $queryBuilder->select(
'*')
287 ->from($cfgRec[
'table2index'])
289 $queryBuilder->expr()->eq(
291 $queryBuilder->createNamedParameter($pid, \PDO::PARAM_INT)
293 $queryBuilder->expr()->gt(
295 $queryBuilder->createNamedParameter($session_data[
'uid'], \PDO::PARAM_INT)
298 $result = $baseQueryBuilder
299 ->setMaxResults($numberOfRecords)
304 while ($row = $result->fetch()) {
308 $session_data[
'uid'] = $row[
'uid'];
311 $rowCount = $baseQueryBuilder->count(
'uid')->execute()->fetchColumn(0);
315 'indexConfigUid' => $cfgRec[
'uid'],
316 'url' =>
'Records from UID#' . ($session_data[
'uid'] + 1) .
'-?',
317 'procInstructions' => [
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']']
319 $pObj->addQueueEntry_callBack($cfgRec[
'set_id'], $nparams, $this->callBack, $cfgRec[
'pid']);
335 $readpath = $params[
'url'];
336 if (!GeneralUtility::isAbsPath($readpath)) {
337 $readpath = GeneralUtility::getFileAbsFileName($readpath);
339 if (GeneralUtility::isAllowedAbsPath($readpath)) {
340 if (@is_file($readpath)) {
345 $indexerObj = GeneralUtility::makeInstance(\
TYPO3\CMS\IndexedSearch\Indexer::class);
346 $indexerObj->backend_initIndexer($cfgRec[
'pid'], 0, 0,
'', $rl);
347 $indexerObj->backend_setFreeIndexUid($cfgRec[
'uid'], $cfgRec[
'set_id']);
348 $indexerObj->hash[
'phash'] = -1;
352 } elseif (@is_dir($readpath)) {
355 $extList = implode(
',', GeneralUtility::trimExplode(
',', $cfgRec[
'extensions'],
true));
357 $files = GeneralUtility::getAllFilesAndFoldersInPath($fileArr, $readpath, $extList, 0, 0);
358 $directoryList = GeneralUtility::get_dirs($readpath);
359 if (is_array($directoryList) && $params[
'depth'] < $cfgRec[
'depth']) {
360 foreach ($directoryList as $subdir) {
361 if ((
string)$subdir !=
'') {
362 $files[] = $readpath . $subdir .
'/';
368 foreach ($files as $path) {
369 $this->instanceCounter++;
370 if ($path !== $params[
'url']) {
373 'indexConfigUid' => $cfgRec[
'uid'],
375 'procInstructions' => [
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'],
376 'depth' => $params[
'depth'] + 1
378 $pObj->addQueueEntry_callBack($cfgRec[
'set_id'], $nparams, $this->callBack, $cfgRec[
'pid'],
$GLOBALS[
'EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
396 if (!is_array($session_data)) {
398 'urlLog' => [$params[
'url']]
403 $subUrls = $this->
indexExtUrl($params[
'url'], $cfgRec[
'pid'], $rl, $cfgRec[
'uid'], $cfgRec[
'set_id']);
405 if ($params[
'depth'] < $cfgRec[
'depth']) {
406 foreach ($subUrls as $url) {
407 if ($url = $this->
checkUrl($url, $session_data[
'urlLog'], $cfgRec[
'externalUrl'])) {
409 $this->instanceCounter++;
410 $session_data[
'urlLog'][] = $url;
413 'indexConfigUid' => $cfgRec[
'uid'],
415 'procInstructions' => [
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'],
416 'depth' => $params[
'depth'] + 1
418 $pObj->addQueueEntry_callBack($cfgRec[
'set_id'], $nparams, $this->callBack, $cfgRec[
'pid'],
$GLOBALS[
'EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
436 $pageUid = (int)$params[
'url'];
439 $res = $pObj->getUrlsForPageRow($pageRow);
440 $duplicateTrack = [];
446 foreach ($res as $paramSetKey => $vv) {
447 $pObj->urlListFromUrlArray($vv, $pageRow,
$GLOBALS[
'EXEC_TIME'], 30, 1, 0, $duplicateTrack, $downloadUrls, [
'tx_indexedsearch_reindex']);
451 if ($params[
'depth'] < $cfgRec[
'depth']) {
453 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(
'pages');
454 $queryBuilder->getRestrictions()
456 ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
457 $result = $queryBuilder->select(
'uid',
'title')
460 $queryBuilder->expr()->eq(
462 $queryBuilder->createNamedParameter($pageUid, \PDO::PARAM_INT)
467 while ($row = $result->fetch()) {
468 $this->instanceCounter++;
469 $url =
'pages:' . $row[
'uid'] .
': ' . $row[
'title'];
470 $session_data[
'urlLog'][] = $url;
473 'indexConfigUid' => $cfgRec[
'uid'],
474 'url' => $row[
'uid'],
475 'procInstructions' => [
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'],
476 'depth' => $params[
'depth'] + 1
478 $pObj->addQueueEntry_callBack(
483 $GLOBALS[
'EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl
494 $connectionPool = GeneralUtility::makeInstance(ConnectionPool::class);
505 $queryBuilder = $connectionPool->getQueryBuilderForTable(
'index_config');
506 $queryBuilder->getRestrictions()
508 ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
511 $runningIndexingConfigurations = $queryBuilder->select(
'*')
512 ->from(
'index_config')
513 ->where($queryBuilder->expr()->neq(
'set_id', $queryBuilder->createNamedParameter(0, \PDO::PARAM_INT)))
518 foreach ($runningIndexingConfigurations as $cfgRec) {
520 $queued_items = $connectionPool->getConnectionForTable(
'tx_crawler_queue')
525 'set_id' => (
int)$cfgRec[
'set_id'],
529 if (!$queued_items) {
531 $queryBuilder = $connectionPool->getQueryBuilderForTable(
'index_phash');
532 $oldPhashRows = $queryBuilder
534 ->from(
'index_phash')
536 $queryBuilder->expr()->eq(
538 $queryBuilder->createNamedParameter($cfgRec[
'uid'], \PDO::PARAM_INT)
540 $queryBuilder->expr()->neq(
542 $queryBuilder->createNamedParameter($cfgRec[
'set_id'], \PDO::PARAM_INT)
549 foreach ($tablesToClean as $table) {
550 $queryBuilder = $connectionPool->getQueryBuilderForTable($table);
551 $queryBuilder->delete($table)
553 $queryBuilder->expr()->in(
555 $queryBuilder->createNamedParameter(
556 array_column($oldPhashRows,
'phash'),
557 Connection::PARAM_INT_ARRAY
565 $connectionPool->getConnectionForTable(
'index_config')
572 [
'uid' => (
int)$cfgRec[
'uid']]
591 public function checkUrl($url, $urlLog, $baseUrl)
593 $url = preg_replace(
'/\\/\\/$/',
'/', $url);
594 list($url) = explode(
'#', $url);
595 if (!strstr($url,
'../')) {
596 if (GeneralUtility::isFirstPartOfStr($url, $baseUrl)) {
597 if (!in_array($url, $urlLog)) {
614 public function indexExtUrl($url, $pageId, $rl, $cfgUid, $setId)
617 $indexerObj = GeneralUtility::makeInstance(\
TYPO3\CMS\IndexedSearch\Indexer::class);
618 $indexerObj->backend_initIndexer($pageId, 0, 0,
'', $rl);
619 $indexerObj->backend_setFreeIndexUid($cfgUid, $setId);
620 $indexerObj->hash[
'phash'] = -1;
622 $indexerObj->indexExternalUrl($url);
623 $url_qParts = parse_url($url);
624 $baseAbsoluteHref = $url_qParts[
'scheme'] .
'://' . $url_qParts[
'host'];
625 $baseHref = $indexerObj->extractBaseHref($indexerObj->indexExternalUrl_content);
628 $baseHref = $baseAbsoluteHref;
629 $baseHref .= substr($url_qParts[
'path'], 0, strrpos($url_qParts[
'path'],
'/'));
631 $baseHref = rtrim($baseHref,
'/');
634 $list = $indexerObj->extractHyperLinks($indexerObj->indexExternalUrl_content);
636 foreach ($list as $count => $linkInfo) {
638 $subUrl = htmlspecialchars_decode($linkInfo[
'href']);
639 $qParts = parse_url($subUrl);
640 if (!$qParts[
'scheme']) {
641 $relativeUrl = GeneralUtility::resolveBackPath($subUrl);
642 if ($relativeUrl[0] ===
'/') {
643 $subUrl = $baseAbsoluteHref . $relativeUrl;
645 $subUrl = $baseHref .
'/' . $relativeUrl;
648 $subUrls[] = $subUrl;
664 $fieldList = GeneralUtility::trimExplode(
',', $cfgRec[
'fieldlist'],
true);
665 $languageField =
$GLOBALS[
'TCA'][$cfgRec[
'table2index']][
'ctrl'][
'languageField'];
666 $sys_language_uid = $languageField ? $r[$languageField] : 0;
668 $indexerObj = GeneralUtility::makeInstance(\
TYPO3\CMS\IndexedSearch\Indexer::class);
669 parse_str(str_replace(
'###UID###', $r[
'uid'], $cfgRec[
'get_params']), $GETparams);
670 $indexerObj->backend_initIndexer($cfgRec[
'pid'], 0, $sys_language_uid,
'', $rl, $GETparams, (
bool)$cfgRec[
'chashcalc']);
671 $indexerObj->backend_setFreeIndexUid($cfgRec[
'uid'], $cfgRec[
'set_id']);
672 $indexerObj->forceIndexing =
true;
674 foreach ($fieldList as $k => $v) {
678 $theContent .= $r[$v] .
' ';
682 $indexerObj->backend_indexAsTYPO3Page(strip_tags(str_replace(
'<',
' <', $theTitle)),
'',
'', strip_tags(str_replace(
'<',
' <', $theContent)),
'utf-8', $r[
$GLOBALS[
'TCA'][$cfgRec[
'table2index']][
'ctrl'][
'tstamp']], $r[
$GLOBALS[
'TCA'][$cfgRec[
'table2index']][
'ctrl'][
'crdate']], $r[
'uid']);
697 $rootLine = GeneralUtility::makeInstance(RootlineUtility::class, $id)->get();
699 $tmpl = GeneralUtility::makeInstance(\
TYPO3\CMS\Core\TypoScript\ExtendedTemplateService::class);
700 $tmpl->runThroughTemplates($rootLine);
702 foreach ($tmpl->rootLine as $rlkey => $rldat) {
703 $rootLineUids[$rlkey] = $rldat[
'uid'];
708 return $rootLineUids;
721 if ($cfgRec[
'timer_frequency'] <= 24 * 3600) {
722 $aMidNight = mktime(0, 0, 0) - 1 * 24 * 3600;
724 $lastTime = $cfgRec[
'timer_next_indexing'] ?:
$GLOBALS[
'EXEC_TIME'];
725 $aMidNight = mktime(0, 0, 0, date(
'm', $lastTime), date(
'd', $lastTime), date(
'y', $lastTime));
731 $frequencyBlocksUntilNextTime = ceil(($currentTime - $lastSureOffset) / $frequencySeconds);
733 return $lastSureOffset + $frequencyBlocksUntilNextTime * $frequencySeconds;
745 if (trim($url_deny)) {
746 $url_denyArray = GeneralUtility::trimExplode(LF, $url_deny,
true);
747 foreach ($url_denyArray as $testurl) {
748 if (GeneralUtility::isFirstPartOfStr($url, $testurl)) {
765 'indexConfigUid' => $cfgRec[
'uid'],
768 'procInstructions' => [
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']']
770 $this->pObj->addQueueEntry_callBack($cfgRec[
'set_id'], $nparams, $this->callBack, $cfgRec[
'pid']);
780 $connectionPool = GeneralUtility::makeInstance(ConnectionPool::class);
784 $queryBuilder = $connectionPool->getQueryBuilderForTable(
'index_section');
785 $oldPhashRows = $queryBuilder->select(
'phash')
786 ->from(
'index_section')
788 $queryBuilder->expr()->eq(
790 $queryBuilder->createNamedParameter($id, \PDO::PARAM_INT)
796 if (empty($oldPhashRows)) {
808 foreach ($tables as $table) {
809 $queryBuilder = $connectionPool->getQueryBuilderForTable($table);
810 $queryBuilder->delete($table)
812 $queryBuilder->expr()->in(
814 $queryBuilder->createNamedParameter(
815 array_column($oldPhashRows,
'phash'),
816 Connection::PARAM_INT_ARRAY
841 if ($command ===
'delete' && $table ===
'pages') {
858 if (empty($fieldArray)) {
862 if ($status ===
'new') {
863 $id = $pObj->substNEWwithIDs[$id];
864 } elseif ($table ===
'pages' && $status ===
'update' && (array_key_exists(
'hidden', $fieldArray) && $fieldArray[
'hidden'] == 1 || array_key_exists(
'no_search', $fieldArray) && $fieldArray[
'no_search'] == 1)) {
870 if (is_array($currentRecord)) {
874 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
875 ->getQueryBuilderForTable(
'index_config');
876 $result = $queryBuilder->select(
'*')
877 ->from(
'index_config')
879 $queryBuilder->expr()->eq(
'set_id', $queryBuilder->createNamedParameter(0, \PDO::PARAM_INT)),
880 $queryBuilder->expr()->eq(
'type', $queryBuilder->createNamedParameter(1, \PDO::PARAM_INT)),
881 $queryBuilder->expr()->eq(
883 $queryBuilder->createNamedParameter($table, \PDO::PARAM_STR)
885 $queryBuilder->expr()->orX(
886 $queryBuilder->expr()->andX(
887 $queryBuilder->expr()->eq(
888 'alternative_source_pid',
889 $queryBuilder->createNamedParameter(0, \PDO::PARAM_INT)
891 $queryBuilder->expr()->eq(
893 $queryBuilder->createNamedParameter($currentRecord[
'pid'], \PDO::PARAM_INT)
896 $queryBuilder->expr()->eq(
897 'alternative_source_pid',
898 $queryBuilder->createNamedParameter($currentRecord[
'pid'], \PDO::PARAM_INT)
901 $queryBuilder->expr()->eq(
902 'records_indexonchange',
903 $queryBuilder->createNamedParameter(1, \PDO::PARAM_INT)
908 while ($cfgRec = $result->fetch()) {