‪TYPO3CMS  ‪main
IndexSearchRepository.php
Go to the documentation of this file.
1 <?php
2 
3 declare(strict_types=1);
4 
5 /*
6  * This file is part of the TYPO3 CMS project.
7  *
8  * It is free software; you can redistribute it and/or modify it under
9  * the terms of the GNU General Public License, either version 2
10  * of the License, or any later version.
11  *
12  * For the full copyright and license information, please read the
13  * LICENSE.txt file that was distributed with this source code.
14  *
15  * The TYPO3 project - inspiring people to share!
16  */
17 
19 
20 use Doctrine\DBAL\Platforms\MariaDBPlatform as DoctrineMariaDBPlatform;
21 use Doctrine\DBAL\Platforms\MySQLPlatform as DoctrineMySQLPlatform;
22 use Doctrine\DBAL\Result;
37 
43 {
47  protected array ‪$externalParsers = [];
48 
52  protected string ‪$frontendUserGroupList = '';
53 
58  protected string ‪$sections = '';
59 
64  protected ‪SearchType ‪$searchType = SearchType::DISTINCT;
65 
70  protected int ‪$languageUid = 0;
71 
76  protected ‪MediaType ‪$mediaType = MediaType::INTERNAL_PAGES;
77 
82  protected string ‪$sortOrder = '';
83 
88  protected bool ‪$descendingSortOrderFlag = false;
89 
94  protected int ‪$resultpagePointer = 0;
95 
100  protected int ‪$numberOfResults = 10;
101 
107  protected string ‪$searchRootPageIdList = '';
108 
112  protected array ‪$wSelClauses = [];
113 
123  protected bool ‪$useExactCount = false;
124 
131  protected bool ‪$displayForbiddenRecords = false;
132 
133  public function ‪__construct(
134  private readonly ‪Context $context,
135  private readonly ‪ExtensionConfiguration $extensionConfiguration,
136  private readonly ‪TimeTracker $timeTracker,
137  private readonly ‪ConnectionPool $connectionPool,
138  ) {}
139 
146  public function ‪initialize(array $settings, array $searchData, array ‪$externalParsers, int|string ‪$searchRootPageIdList): void
147  {
148  $this->externalParsers = ‪$externalParsers;
149  $this->searchRootPageIdList = (string)‪$searchRootPageIdList;
150  $this->frontendUserGroupList = implode(',', $this->context->getPropertyFromAspect('frontend.user', 'groupIds', [0, -1]));
151  if ($settings['exactCount'] ?? false) {
152  $this->useExactCount = true;
153  }
154  if ($settings['displayForbiddenRecords'] ?? false) {
155  $this->displayForbiddenRecords = true;
156  }
157  $this->sections = (string)($searchData['sections'] ?? '');
158  $this->searchType = SearchType::tryFrom((int)($searchData['searchType'] ?? 0)) ?? SearchType::DISTINCT;
159  $this->languageUid = (int)($searchData['languageUid'] ?? 0);
160  $this->mediaType = MediaType::tryFrom((int)($searchData['mediaType'] ?? 0)) ?? MediaType::INTERNAL_PAGES;
161  $this->sortOrder = (string)($searchData['sortOrder'] ?? '');
162  $this->descendingSortOrderFlag = (bool)($searchData['desc'] ?? false);
163  $this->resultpagePointer = (int)($searchData['pointer'] ?? 0);
164  if (is_numeric($searchData['numberOfResults'] ?? null)) {
165  $this->numberOfResults = (int)$searchData['numberOfResults'];
166  }
167  }
168 
176  public function ‪doSearch(array $searchWords, int $freeIndexUid): array|false
177  {
178  $useMysqlFulltext = (bool)$this->extensionConfiguration->get('indexed_search', 'useMysqlFulltext');
179  // Getting SQL result pointer:
180  $this->timeTracker->push('Searching result');
181  // @todo Change method signatures to return the QueryBuilder instead the Result.
182  if ($useMysqlFulltext) {
183  $result = $this->‪getResultRows_SQLpointerMysqlFulltext($searchWords, $freeIndexUid);
184  } else {
185  $result = $this->‪getResultRows_SQLpointer($searchWords, $freeIndexUid);
186  }
187  $this->timeTracker->pull();
188  // Organize and process result:
189  if ($result) {
190  // We need the result row count beforehand for the pointer calculation. Using $result->rowCount() for
191  // select queries is not reliable across dbms systems, and in case of sqlite this will return 0 here,
192  // even if there is a result set, we need to retrieve all rows and doing a count on the array.
193  // @todo Change this to second count() query call, after getResultRows_SQLpointer() signatures/hook has
194  // been changed to return QueryBuilder instead of the Result.
195  $rows = $result->fetchAllAssociative();
196  // Total search-result count
197  $count = count($rows);
198  // The pointer is set to the result page that is currently being viewed
199  $pointer = ‪MathUtility::forceIntegerInRange($this->resultpagePointer, 0, (int)floor($count / $this->numberOfResults));
200  // Initialize result accumulation variables:
201  $c = 0;
202  // Result pointer: Counts up the position in the current search-result
203  $grouping_phashes = [];
204  // Used to filter out duplicates.
205  $grouping_chashes = [];
206  // Used to filter out duplicates BASED ON cHash.
207  $firstRow = [];
208  // Will hold the first row in result - used to calculate relative hit-ratings.
209  $resultRows = [];
210  // Will hold the results rows for display.
211  // Now, traverse result and put the rows to be displayed into an array
212  // Each row should contain the fields from 'ISEC.*, IP.*' combined
213  // + artificial fields "show_resume" (bool) and "result_number" (counter)
214  // @todo Change this back to while($row = $result->fetchAssociative()) after changing
215  // getResultRows_SQLpointer() returning QueryBuilder instead of a Result.
216  foreach ($rows as $row) {
217  // Set first row
218  if (!$c) {
219  $firstRow = $row;
220  }
221  // Tells whether we can link directly to a document
222  // or not (depends on possible right problems)
223  $row['show_resume'] = $this->‪checkResume($row);
224  $phashGr = !in_array($row['phash_grouping'], $grouping_phashes);
225  $chashGr = !in_array($row['contentHash'] . '.' . $row['data_page_id'], $grouping_chashes);
226  if ($phashGr && $chashGr) {
227  // Only if the resume may be shown are we going to filter out duplicates...
228  if ($row['show_resume'] || $this->displayForbiddenRecords) {
229  // Only on documents which are not multiple pages documents
230  if (!$this->‪multiplePagesType((string)($row['item_type'] ?? ''))) {
231  $grouping_phashes[] = $row['phash_grouping'];
232  }
233  $grouping_chashes[] = $row['contentHash'] . '.' . $row['data_page_id'];
234  // Increase the result pointer
235  $c++;
236  // All rows for display is put into resultRows[]
237  if ($c > $pointer * $this->numberOfResults && $c <= $pointer * $this->numberOfResults + $this->numberOfResults) {
238  $row['result_number'] = $c;
239  $resultRows[] = $row;
240  // This may lead to a problem: If the result check is not stopped here, the search will take longer.
241  // However, the result counter will not filter out grouped cHashes/pHashes that were not processed yet.
242  // You can change this behavior using the "settings.exactCount" property (see above).
243  if (!$this->useExactCount && $c + 1 > ($pointer + 1) * $this->numberOfResults) {
244  break;
245  }
246  }
247  } else {
248  // Skip this row if the user cannot
249  // view it (missing permission)
250  $count--;
251  }
252  } else {
253  // For each time a phash_grouping document is found
254  // (which is thus not displayed) the search-result count is reduced,
255  // so that it matches the number of rows displayed.
256  $count--;
257  }
258  }
259 
260  return [
261  'resultRows' => $resultRows,
262  'firstRow' => $firstRow,
263  'count' => $count,
264  ];
265  }
266  // No results found
267  return false;
268  }
269 
275  public function ‪writeSearchStat(int $pageId, array $searchWords): void
276  {
277  if (empty($searchWords)) {
278  return;
279  }
280  $entries = [];
281  foreach ($searchWords as $val) {
282  $entries[] = [
283  mb_substr($val['sword'], 0, 50),
284  ‪$GLOBALS['EXEC_TIME'],
285  $pageId,
286  ];
287  }
288  $this->connectionPool->getConnectionForTable('index_stat_word')
289  ->bulkInsert(
290  'index_stat_word',
291  $entries,
292  ['word', 'tstamp', 'pageid'],
294  );
295  }
296 
297  public function ‪getFullTextRowByPhash(string $phash): ?array
298  {
299  $queryBuilder = $this->connectionPool->getQueryBuilderForTable('index_fulltext');
300  return $queryBuilder
301  ->select('*')
302  ->from('index_fulltext')
303  ->where(
304  $queryBuilder->expr()->eq(
305  'phash',
306  $queryBuilder->createNamedParameter($phash)
307  )
308  )
309  ->setMaxResults(1)
310  ->executeQuery()
311  ->fetchAssociative() ?: null;
312  }
313 
314  public function ‪getIndexConfigurationById(int $id): ?array
315  {
316  $queryBuilder = $this->connectionPool->getQueryBuilderForTable('index_config');
317  return $queryBuilder
318  ->select('uid', 'title')
319  ->from('index_config')
320  ->where(
321  $queryBuilder->expr()->eq(
322  'uid',
323  $queryBuilder->createNamedParameter($id, ‪Connection::PARAM_INT)
324  )
325  )
326  ->setMaxResults(1)
327  ->executeQuery()
328  ->fetchAssociative() ?: null;
329  }
330 
337  protected function ‪getResultRows_SQLpointer(array $searchWords, int $freeIndexUid): Result|false
338  {
339  // This SEARCHES for the searchwords in $searchWords AND returns a
340  // COMPLETE list of phash-integers of the matches.
341  $list = $this->‪getPhashList($searchWords);
342  // Perform SQL Search / collection of result rows array:
343  if ($list) {
344  // Do the search:
345  $this->timeTracker->push('execFinalQuery');
346  $res = $this->‪execFinalQuery($list, $freeIndexUid);
347  $this->timeTracker->pull();
348  return $res;
349  }
350  return false;
351  }
352 
361  protected function ‪getResultRows_SQLpointerMysqlFulltext(array $searchWordsArray, int $freeIndexUid): Result|false
362  {
363  $connection = $this->connectionPool->getConnectionForTable('index_fulltext');
364  $platform = $connection->getDatabasePlatform();
365  if (!($platform instanceof DoctrineMariaDBPlatform || $platform instanceof DoctrineMySQLPlatform)) {
366  throw new \RuntimeException(
367  'Extension indexed_search is configured to use mysql fulltext, but table \'index_fulltext\''
368  . ' is running on a different DBMS.',
369  1472585525
370  );
371  }
372  // Build the search string, detect which fulltext index to use, and decide whether boolean search is needed or not
373  $searchData = $this->‪getSearchString($searchWordsArray);
374  // Perform SQL Search / collection of result rows array:
375  $resource = false;
376  if ($searchData) {
377  // Do the search:
378  $this->timeTracker->push('execFinalQuery');
379  $resource = $this->‪execFinalQuery_fulltext($searchData, $freeIndexUid);
380  $this->timeTracker->pull();
381  }
382  return $resource;
383  }
384 
393  protected function ‪getSearchString(array $searchWordArray): array
394  {
395  // Change this to TRUE to force BOOLEAN SEARCH MODE (useful if fulltext index is still empty)
396  $searchBoolean = false;
397  // This holds the result if the search is natural (doesn't contain any boolean operators)
398  $naturalSearchString = '';
399  // This holds the result if the search is boolean (contains +/-/| operators)
400  $booleanSearchString = '';
402 
403  // Traverse searchwords and prefix them with corresponding operator
404  foreach ($searchWordArray as $searchWordData) {
405  // Making the query for a single search word based on the search-type
406  $searchWord = $searchWordData['sword'];
407  $wildcard = '';
408  if (str_contains($searchWord, ' ')) {
410  }
411  switch (‪$searchType) {
412  case SearchType::DISTINCT:
413  // Intended fall-thru
414  break;
415  case SearchType::PART_OF_WORD:
416  case SearchType::FIRST_PART_OF_WORD:
417  case SearchType::LAST_PART_OF_WORD:
418  // First part of word
419  $wildcard = '*';
420  // Part-of-word search requires boolean mode!
421  $searchBoolean = true;
422  break;
424  $searchBoolean = true;
425  // Remove existing quotes and fix misplaced quotes.
426  $searchWord = trim(str_replace('"', ' ', $searchWord));
427  break;
428  }
429  // Perform search for word:
430  switch ($searchWordData['oper']) {
431  case 'AND NOT':
432  $booleanSearchString .= ' -' . $searchWord . $wildcard;
433  $searchBoolean = true;
434  break;
435  case 'OR':
436  $booleanSearchString .= ' ' . $searchWord . $wildcard;
437  $searchBoolean = true;
438  break;
439  default:
440  $booleanSearchString .= ' +' . $searchWord . $wildcard;
441  $naturalSearchString .= ' ' . $searchWord;
442  }
443  }
445  $searchString = '"' . trim($naturalSearchString) . '"';
446  } elseif ($searchBoolean) {
447  $searchString = trim($booleanSearchString);
448  } else {
449  $searchString = trim($naturalSearchString);
450  }
451  return [
452  'searchBoolean' => $searchBoolean,
453  'searchString' => $searchString,
454  'fulltextIndex' => 'index_fulltext.fulltextdata',
455  ];
456  }
457 
466  protected function ‪execFinalQuery_fulltext(array $searchData, int $freeIndexUid): Result
467  {
468  $queryBuilder = $this->connectionPool->getQueryBuilderForTable('index_fulltext');
469  $queryBuilder->getRestrictions()->removeAll();
470  $queryBuilder->select('index_fulltext.*', 'ISEC.*', 'IP.*')
471  ->from('index_fulltext')
472  ->join(
473  'index_fulltext',
474  'index_phash',
475  'IP',
476  $queryBuilder->expr()->eq('index_fulltext.phash', $queryBuilder->quoteIdentifier('IP.phash'))
477  )
478  ->join(
479  'IP',
480  'index_section',
481  'ISEC',
482  $queryBuilder->expr()->eq('IP.phash', $queryBuilder->quoteIdentifier('ISEC.phash'))
483  );
484 
486  if ($searchRootPageIdList[0] >= 0) {
487  // Collecting all pages IDs in which to search
488  // filtering out ALL pages that are not accessible due to restriction containers. Does NOT look for "no_search" field!
489  $pageRepository = GeneralUtility::makeInstance(PageRepository::class);
490  $idList = $pageRepository->getPageIdsRecursive(‪$searchRootPageIdList, 9999);
491  $queryBuilder->andWhere(
492  $queryBuilder->expr()->in(
493  'ISEC.page_id',
494  $queryBuilder->quoteArrayBasedValueListToIntegerList($idList)
495  )
496  );
497  }
498 
499  $searchBoolean = '';
500  if ($searchData['searchBoolean']) {
501  $searchBoolean = ' IN BOOLEAN MODE';
502  }
503  $queryBuilder->andWhere(
504  'MATCH (' . $queryBuilder->quoteIdentifier($searchData['fulltextIndex']) . ')'
505  . ' AGAINST (' . $queryBuilder->createNamedParameter($searchData['searchString'])
506  . $searchBoolean
507  . ')'
508  );
509 
510  $queryBuilder->andWhere(
515  );
516 
517  $queryBuilder->groupBy(
518  'IP.phash',
519  'ISEC.phash',
520  'ISEC.phash_t3',
521  'ISEC.rl0',
522  'ISEC.rl1',
523  'ISEC.rl2',
524  'ISEC.page_id',
525  'ISEC.uniqid',
526  'IP.phash_grouping',
527  'IP.data_filename',
528  'IP.data_page_id',
529  'IP.data_page_type',
530  'IP.data_page_mp',
531  'IP.gr_list',
532  'IP.item_type',
533  'IP.item_title',
534  'IP.item_description',
535  'IP.item_mtime',
536  'IP.tstamp',
537  'IP.item_size',
538  'IP.contentHash',
539  'IP.crdate',
540  'IP.parsetime',
541  'IP.sys_language_uid',
542  'IP.item_crdate',
543  'IP.externalUrl',
544  'IP.recordUid',
545  'IP.freeIndexUid',
546  'IP.freeIndexSetId'
547  );
548 
549  return $queryBuilder->executeQuery();
550  }
551 
552  /***********************************
553  *
554  * Helper functions on searching (SQL)
555  *
556  ***********************************/
564  protected function ‪getPhashList(array $searchWords): string
565  {
566  // Initialize variables:
567  $c = 0;
568  // This array accumulates the phash-values
569  $totalHashList = [];
570  $this->wSelClauses = [];
571  // Traverse searchWords; for each, select all phash integers and merge/diff/intersect them with previous word (based on operator)
572  foreach ($searchWords as $v) {
573  // Making the query for a single search word based on the search-type
574  $sWord = (string)($v['sword'] ?? '');
575  $theType = ‪$this->searchType;
576  // If there are spaces in the search-word, make a full text search instead.
577  if (str_contains($sWord, ' ')) {
578  $theType = ‪SearchType::SENTENCE;
579  }
580  $this->timeTracker->push('SearchWord "' . $sWord . '" - $theType=' . $theType->value);
581  // Perform search for word:
582  switch ($theType) {
583  case SearchType::PART_OF_WORD:
584  $res = $this->‪searchWord($sWord, LikeWildcard::BOTH);
585  break;
586  case SearchType::FIRST_PART_OF_WORD:
587  $res = $this->‪searchWord($sWord, LikeWildcard::RIGHT);
588  break;
589  case SearchType::LAST_PART_OF_WORD:
590  $res = $this->‪searchWord($sWord, LikeWildcard::LEFT);
591  break;
593  // Sentence
594  $res = $this->‪searchSentence($sWord);
595  // If there is a fulltext search for a sentence there is
596  // a likeliness that sorting cannot be done by the rankings
597  // from the rel-table (because no relations will exist for the
598  // sentence in the word-table). So therefore mtime is used instead.
599  // It is not required, but otherwise some hits may be left out.
600  $this->sortOrder = 'mtime';
601  break;
602  default:
603  // Distinct word
604  $res = $this->‪searchDistinct($sWord);
605  }
606  // If there was a query to do, then select all phash-integers which resulted from this.
607  // Get phash list by searching for it:
608  $phashList = [];
609  while ($row = $res->fetchAssociative()) {
610  $phashList[] = $row['phash'];
611  }
612  // Here the phash list are merged with the existing result based on whether we are dealing with OR, NOT or AND operations.
613  if ($c) {
614  $totalHashList = match ($v['oper']) {
615  'OR' => array_unique(array_merge($phashList, $totalHashList)),
616  'AND NOT' => array_diff($totalHashList, $phashList),
617  default => array_intersect($totalHashList, $phashList),
618  };
619  } else {
620  // First search
621  $totalHashList = $phashList;
622  }
623  $this->timeTracker->pull();
624  $c++;
625  }
626  return implode(',', $totalHashList);
627  }
628 
635  protected function ‪execPHashListQuery(string $wordSel, string $additionalWhereClause): Result
636  {
637  $queryBuilder = $this->connectionPool->getQueryBuilderForTable('index_words');
638  $queryBuilder->select('IR.phash')
639  ->from('index_words', 'IW')
640  ->from('index_rel', 'IR')
641  ->from('index_section', 'ISEC')
642  ->where(
644  $queryBuilder->expr()->eq('IW.wid', $queryBuilder->quoteIdentifier('IR.wid')),
645  $queryBuilder->expr()->eq('ISEC.phash', $queryBuilder->quoteIdentifier('IR.phash')),
646  ‪QueryHelper::stripLogicalOperatorPrefix($this->sectionTableWhere()),
647  ‪QueryHelper::stripLogicalOperatorPrefix($additionalWhereClause)
648  )
649  ->groupBy('IR.phash');
650 
651  return $queryBuilder->executeQuery();
652  }
653 
659  protected function ‪searchWord(string $sWord, ‪LikeWildcard $likeWildcard): Result
660  {
661  $wSel = $likeWildcard->getLikeQueryPart(
662  'index_words',
663  'IW.baseword',
664  $sWord
665  );
666  $this->wSelClauses[] = $wSel;
667  return $this->‪execPHashListQuery($wSel, ' AND is_stopword=0');
668  }
669 
675  protected function ‪searchDistinct(string $sWord): Result
676  {
677  $expressionBuilder = $this->connectionPool->getQueryBuilderForTable('index_words')->expr();
678  $wSel = $expressionBuilder->eq('IW.wid', $expressionBuilder->literal(md5($sWord)));
679  $this->wSelClauses[] = $wSel;
680  return $this->‪execPHashListQuery($wSel, $expressionBuilder->eq('is_stopword', 0));
681  }
682 
688  protected function ‪searchSentence(string $sWord): Result
689  {
690  $this->wSelClauses[] = '1=1';
691  $likeWildcard = LikeWildcard::BOTH;
692  $likePart = $likeWildcard->getLikeQueryPart(
693  'index_fulltext',
694  'IFT.fulltextdata',
695  $sWord
696  );
697 
698  $queryBuilder = $this->connectionPool->getQueryBuilderForTable('index_section');
699  return $queryBuilder->select('ISEC.phash')
700  ->from('index_section', 'ISEC')
701  ->from('index_fulltext', 'IFT')
702  ->where(
704  $queryBuilder->expr()->eq('ISEC.phash', $queryBuilder->quoteIdentifier('IFT.phash')),
705  ‪QueryHelper::stripLogicalOperatorPrefix($this->sectionTableWhere())
706  )
707  ->groupBy('ISEC.phash')
708  ->executeQuery();
709  }
710 
716  protected function ‪sectionTableWhere(): string
717  {
718  $expressionBuilder = $this->connectionPool->getQueryBuilderForTable('index_section')->expr();
719 
720  $whereClause = $expressionBuilder->and();
721  $match = false;
722  if (!($this->searchRootPageIdList < 0)) {
723  $whereClause = $whereClause->with(
724  $expressionBuilder->in('ISEC.rl0', ‪GeneralUtility::intExplode(',', $this->searchRootPageIdList, true))
725  );
726  }
727  if (str_starts_with($this->sections, 'rl1_')) {
728  $whereClause = $whereClause->with(
729  $expressionBuilder->in('ISEC.rl1', ‪GeneralUtility::intExplode(',', substr($this->sections, 4)))
730  );
731  $match = true;
732  } elseif (str_starts_with($this->sections, 'rl2_')) {
733  $whereClause = $whereClause->with(
734  $expressionBuilder->in('ISEC.rl2', ‪GeneralUtility::intExplode(',', substr($this->sections, 4)))
735  );
736  $match = true;
737  }
738  // If no match above, test the static types:
739  if (!$match) {
740  switch ($this->sections) {
741  case (string)SectionType::ONLY_THIS_PAGE->value:
742  // @todo: This repository either needs to retrieve the request or page uid.
743  $pageId = ‪$GLOBALS['TYPO3_REQUEST']->getAttribute('frontend.page.information')->getId();
744  $whereClause = $whereClause->with(
745  $expressionBuilder->eq('ISEC.page_id', $pageId)
746  );
747  break;
748  case (string)SectionType::TOP_AND_CHILDREN->value:
749  $whereClause = $whereClause->with($expressionBuilder->eq('ISEC.rl2', 0));
750  break;
751  case (string)‪SectionType::LEVEL_TWO_AND_OUT->value:
752  $whereClause = $whereClause->with($expressionBuilder->gt('ISEC.rl2', 0));
753  break;
754  }
755  }
756 
757  return $whereClause->count() ? ' AND ' . $whereClause : '';
758  }
759 
765  protected function ‪mediaTypeWhere(): string
766  {
767  $expressionBuilder = $this->connectionPool->getQueryBuilderForTable('index_phash')->expr();
768  $whereClause = match ($this->mediaType) {
769  ‪MediaType::ALL_EXTERNAL => $expressionBuilder->neq('IP.item_type', $expressionBuilder->literal((string)MediaType::INTERNAL_PAGES->value)),
770  MediaType::ALL_MEDIA => '', // include TYPO3 pages and external media
771  default => $expressionBuilder->eq('IP.item_type', $expressionBuilder->literal((string)$this->mediaType->value)),
772  };
773  return $whereClause ? ' AND ' . $whereClause : '';
774  }
775 
781  protected function ‪languageWhere(): string
782  {
783  // -1 is the same as ALL language.
784  if ($this->languageUid < 0) {
785  return '';
786  }
787 
788  $expressionBuilder = $this->connectionPool->getQueryBuilderForTable('index_phash')->expr();
789 
790  return ' AND ' . $expressionBuilder->eq('IP.sys_language_uid', $this->languageUid);
791  }
792 
799  protected function ‪freeIndexUidWhere(int $freeIndexUid): string
800  {
801  if ($freeIndexUid < 0) {
802  return '';
803  }
804  // First, look if the freeIndexUid is a meta configuration:
805  $queryBuilder = $this->connectionPool->getQueryBuilderForTable('index_config');
806  $indexCfgRec = $queryBuilder->select('indexcfgs')
807  ->from('index_config')
808  ->where(
809  $queryBuilder->expr()->eq('type', $queryBuilder->createNamedParameter(5, ‪Connection::PARAM_INT)),
810  $queryBuilder->expr()->eq(
811  'uid',
812  $queryBuilder->createNamedParameter($freeIndexUid, ‪Connection::PARAM_INT)
813  )
814  )
815  ->executeQuery()
816  ->fetchAssociative();
817 
818  if (is_array($indexCfgRec)) {
819  $refs = ‪GeneralUtility::trimExplode(',', $indexCfgRec['indexcfgs']);
820  // Default value to protect against empty array.
821  $list = [-99];
822  foreach ($refs as $ref) {
823  [$table, ‪$uid] = ‪GeneralUtility::revExplode('_', $ref, 2);
824  ‪$uid = (int)‪$uid;
825  $queryBuilder = $this->connectionPool->getQueryBuilderForTable('index_config');
826  $queryBuilder->select('uid')->from('index_config');
827  switch ($table) {
828  case 'index_config':
829  $idxRec = $queryBuilder
830  ->where(
831  $queryBuilder->expr()->eq(
832  'uid',
833  $queryBuilder->createNamedParameter(‪$uid, ‪Connection::PARAM_INT)
834  )
835  )
836  ->executeQuery()
837  ->fetchAssociative();
838  if ($idxRec) {
839  $list[] = ‪$uid;
840  }
841  break;
842  case 'pages':
843  $indexCfgRecordsFromPid = $queryBuilder
844  ->where(
845  $queryBuilder->expr()->eq(
846  'pid',
847  $queryBuilder->createNamedParameter(‪$uid, ‪Connection::PARAM_INT)
848  )
849  )
850  ->executeQuery();
851  while ($idxRec = $indexCfgRecordsFromPid->fetchAssociative()) {
852  $list[] = $idxRec['uid'];
853  }
854  break;
855  }
856  }
857  $list = array_unique($list);
858  } else {
859  $list = [$freeIndexUid];
860  }
861 
862  $expressionBuilder = $this->connectionPool->getQueryBuilderForTable('index_phash')->expr();
863  return ' AND ' . $expressionBuilder->in('IP.freeIndexUid', array_map('intval', $list));
864  }
865 
872  protected function ‪execFinalQuery(string $list, int $freeIndexUid): Result
873  {
874  $queryBuilder = $this->connectionPool->getQueryBuilderForTable('index_words');
875  $queryBuilder->select('ISEC.*', 'IP.*')
876  ->from('index_phash', 'IP')
877  ->from('index_section', 'ISEC')
878  ->where(
879  $queryBuilder->expr()->in(
880  'IP.phash',
881  $queryBuilder->quoteArrayBasedValueListToStringList(
882  ‪GeneralUtility::trimExplode(',', $list, true)
883  )
884  ),
885  ‪QueryHelper::stripLogicalOperatorPrefix($this->mediaTypeWhere()),
886  ‪QueryHelper::stripLogicalOperatorPrefix($this->languageWhere()),
887  ‪QueryHelper::stripLogicalOperatorPrefix($this->freeIndexUidWhere($freeIndexUid)),
888  $queryBuilder->expr()->eq('ISEC.phash', $queryBuilder->quoteIdentifier('IP.phash'))
889  )
890  ->groupBy(
891  'IP.phash',
892  'ISEC.phash',
893  'ISEC.phash_t3',
894  'ISEC.rl0',
895  'ISEC.rl1',
896  'ISEC.rl2',
897  'ISEC.page_id',
898  'ISEC.uniqid',
899  'IP.phash_grouping',
900  'IP.data_filename',
901  'IP.data_page_id',
902  'IP.data_page_type',
903  'IP.data_page_mp',
904  'IP.gr_list',
905  'IP.item_type',
906  'IP.item_title',
907  'IP.item_description',
908  'IP.item_mtime',
909  'IP.tstamp',
910  'IP.item_size',
911  'IP.contentHash',
912  'IP.crdate',
913  'IP.parsetime',
914  'IP.sys_language_uid',
915  'IP.item_crdate',
916  'IP.externalUrl',
917  'IP.recordUid',
918  'IP.freeIndexUid',
919  'IP.freeIndexSetId',
920  'IP.static_page_arguments'
921  );
922 
923  // Setting up methods of filtering results
924  // based on page types, access, etc.
925  if ($this->searchRootPageIdList >= 0) {
926  // Collecting all pages IDs in which to search,
927  // filtering out ALL pages that are not accessible due to restriction containers.
928  // Does NOT look for "no_search" field!
929  $siteIdNumbers = ‪GeneralUtility::intExplode(',', $this->searchRootPageIdList);
930  $pageRepository = GeneralUtility::makeInstance(PageRepository::class);
931  $pageIdList = $pageRepository->getPageIdsRecursive($siteIdNumbers, 9999);
932  $queryBuilder->andWhere(
933  $queryBuilder->expr()->in(
934  'ISEC.page_id',
935  $queryBuilder->quoteArrayBasedValueListToIntegerList($pageIdList)
936  )
937  );
938  }
939  // otherwise select all / disable everything
940  // If any of the ranking sortings are selected, we must make a
941  // join with the word/rel-table again, because we need to
942  // calculate ranking based on all search-words found.
943  if (str_starts_with($this->sortOrder, 'rank_')) {
944  $queryBuilder
945  ->from('index_words', 'IW')
946  ->from('index_rel', 'IR')
947  ->andWhere(
948  $queryBuilder->expr()->eq('IW.wid', $queryBuilder->quoteIdentifier('IR.wid')),
949  $queryBuilder->expr()->eq('ISEC.phash', $queryBuilder->quoteIdentifier('IR.phash'))
950  );
951  switch ($this->sortOrder) {
952  case 'rank_flag':
953  // This gives priority to word-position (max-value) so that words in title, keywords, description counts more than in content.
954  // The ordering is refined with the frequency sum as well.
955  $queryBuilder
956  ->addSelectLiteral(
957  $queryBuilder->expr()->max('IR.flags', 'order_val1'),
958  $queryBuilder->expr()->sum('IR.freq', 'order_val2')
959  )
960  ->orderBy('order_val1', $this->‪getDescendingSortOrderFlag())
961  ->addOrderBy('order_val2', $this->‪getDescendingSortOrderFlag());
962  break;
963  case 'rank_first':
964  // Results in average position of search words on page.
965  // Must be inversely sorted (low numbers are closer to top)
966  $queryBuilder
967  ->addSelectLiteral($queryBuilder->expr()->avg('IR.first', 'order_val'))
968  ->orderBy('order_val', $this->‪getDescendingSortOrderFlag(true));
969  break;
970  case 'rank_count':
971  // Number of words found
972  $queryBuilder
973  ->addSelectLiteral($queryBuilder->expr()->sum('IR.count', 'order_val'))
974  ->orderBy('order_val', $this->‪getDescendingSortOrderFlag());
975  break;
976  default:
977  // Frequency sum. I'm not sure if this is the best way to do
978  // it (make a sum...). Or should it be the average?
979  $queryBuilder
980  ->addSelectLiteral($queryBuilder->expr()->sum('IR.freq', 'order_val'))
981  ->orderBy('order_val', $this->‪getDescendingSortOrderFlag());
982  }
983 
984  if (!empty($this->wSelClauses)) {
985  // So, words are combined in an OR statement
986  // (no "sentence search" should be done here - may deselect results)
987  $wordSel = $queryBuilder->expr()->or();
988  foreach ($this->wSelClauses as $wSelClause) {
989  $wordSel = $wordSel->with(‪QueryHelper::stripLogicalOperatorPrefix($wSelClause));
990  }
991  $queryBuilder->andWhere($wordSel);
992  }
993  } else {
994  // Otherwise, if sorting are done with the pages table or other fields,
995  // there is no need for joining with the rel/word tables:
996  switch ($this->sortOrder) {
997  case 'title':
998  $queryBuilder->orderBy('IP.item_title', $this->‪getDescendingSortOrderFlag());
999  break;
1000  case 'crdate':
1001  $queryBuilder->orderBy('IP.item_crdate', $this->‪getDescendingSortOrderFlag());
1002  break;
1003  case 'mtime':
1004  $queryBuilder->orderBy('IP.item_mtime', $this->‪getDescendingSortOrderFlag());
1005  break;
1006  }
1007  }
1008 
1009  return $queryBuilder->executeQuery();
1010  }
1011 
1020  protected function ‪checkResume(array $row): bool
1021  {
1022  // If the record is indexed by an indexing configuration, just show it.
1023  // At least this is needed for external URLs and files.
1024  // For records, we might need to extend this - for instance block display if record is access restricted.
1025  if ($row['freeIndexUid']) {
1026  return true;
1027  }
1028  // Evaluate regularly indexed pages based on item_type:
1029  // External media:
1030  $connection = $this->connectionPool->getConnectionForTable('index_grlist');
1031  if ($row['item_type']) {
1032  // For external media we will check the access of the parent page on which the media was linked from.
1033  // "phash_t3" is the phash of the parent TYPO3 page row which initiated the indexing of the documents
1034  // in this section. So, selecting for the grlist records belonging to the parent phash-row where the
1035  // current users gr_list exists will help us to know. If this is NOT found, there is still a theoretical
1036  // possibility that another user accessible page would display a link, so maybe the resume of such a
1037  // document here may be unjustified hidden. But better safe than sorry.
1038  return (bool)$connection->count(
1039  'phash',
1040  'index_grlist',
1041  [
1042  'phash' => $row['phash_t3'],
1043  'gr_list' => $this->frontendUserGroupList,
1044  ]
1045  );
1046  }
1047  // Ordinary TYPO3 pages:
1048  if ((string)$row['gr_list'] !== $this->frontendUserGroupList) {
1049  // Selecting for the grlist records belonging to the phash-row where the current users gr_list exists.
1050  // If it is found it is proof that this user has direct access to the phash-rows content although
1051  // he did not himself initiate the indexing...
1052  return (bool)$connection->count(
1053  'phash',
1054  'index_grlist',
1055  [
1056  'phash' => $row['phash'],
1057  'gr_list' => $this->frontendUserGroupList,
1058  ]
1059  );
1060  }
1061  return true;
1062  }
1063 
1071  protected function ‪getDescendingSortOrderFlag(bool $inverse = false): string
1072  {
1074  if ($inverse) {
1075  $desc = !$desc;
1076  }
1077  return !$desc ? ' DESC' : '';
1078  }
1079 
1086  protected function ‪multiplePagesType(string $itemType): bool
1087  {
1089  $fileContentParser = $this->externalParsers[$itemType] ?? null;
1090  return is_object($fileContentParser) && $fileContentParser->isMultiplePageExtension($itemType);
1091  }
1092 
1098  protected function ‪getSearchRootPageIdList(): array
1099  {
1100  return ‪GeneralUtility::intExplode(',', $this->searchRootPageIdList);
1101  }
1102 }
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\searchDistinct
‪searchDistinct(string $sWord)
Definition: IndexSearchRepository.php:675
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\searchWord
‪searchWord(string $sWord, LikeWildcard $likeWildcard)
Definition: IndexSearchRepository.php:659
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\getResultRows_SQLpointerMysqlFulltext
‪getResultRows_SQLpointerMysqlFulltext(array $searchWordsArray, int $freeIndexUid)
Definition: IndexSearchRepository.php:361
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\$mediaType
‪MediaType $mediaType
Definition: IndexSearchRepository.php:76
‪TYPO3\CMS\IndexedSearch\Type\SENTENCE
‪@ SENTENCE
Definition: SearchType.php:29
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\$sortOrder
‪string $sortOrder
Definition: IndexSearchRepository.php:82
‪TYPO3\CMS\Core\Database\Connection\PARAM_INT
‪const PARAM_INT
Definition: Connection.php:52
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\$languageUid
‪int $languageUid
Definition: IndexSearchRepository.php:70
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\getSearchString
‪array getSearchString(array $searchWordArray)
Definition: IndexSearchRepository.php:393
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\$numberOfResults
‪int $numberOfResults
Definition: IndexSearchRepository.php:100
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\writeSearchStat
‪writeSearchStat(int $pageId, array $searchWords)
Definition: IndexSearchRepository.php:275
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\searchSentence
‪searchSentence(string $sWord)
Definition: IndexSearchRepository.php:688
‪TYPO3\CMS\IndexedSearch\Type\MediaType
‪MediaType
Definition: MediaType.php:24
‪TYPO3\CMS\IndexedSearch\Type\SearchType
‪SearchType
Definition: SearchType.php:24
‪TYPO3\CMS\Core\Configuration\ExtensionConfiguration
Definition: ExtensionConfiguration.php:47
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\freeIndexUidWhere
‪string freeIndexUidWhere(int $freeIndexUid)
Definition: IndexSearchRepository.php:799
‪TYPO3\CMS\IndexedSearch\Domain\Repository
Definition: AdministrationRepository.php:16
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository
Definition: IndexSearchRepository.php:43
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\getIndexConfigurationById
‪getIndexConfigurationById(int $id)
Definition: IndexSearchRepository.php:314
‪TYPO3\CMS\IndexedSearch\Type\SectionType
‪SectionType
Definition: SectionType.php:24
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\getSearchRootPageIdList
‪int[] getSearchRootPageIdList()
Definition: IndexSearchRepository.php:1098
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\mediaTypeWhere
‪string mediaTypeWhere()
Definition: IndexSearchRepository.php:765
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\$resultpagePointer
‪int $resultpagePointer
Definition: IndexSearchRepository.php:94
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\checkResume
‪bool checkResume(array $row)
Definition: IndexSearchRepository.php:1020
‪TYPO3\CMS\Core\Database\Connection\PARAM_STR
‪const PARAM_STR
Definition: Connection.php:57
‪TYPO3\CMS\Core\Context\Context
Definition: Context.php:54
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\$displayForbiddenRecords
‪bool $displayForbiddenRecords
Definition: IndexSearchRepository.php:131
‪TYPO3\CMS\IndexedSearch\Type\LEVEL_TWO_AND_OUT
‪@ LEVEL_TWO_AND_OUT
Definition: SectionType.php:28
‪TYPO3\CMS\IndexedSearch\Utility\LikeWildcard
‪LikeWildcard
Definition: LikeWildcard.php:28
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\doSearch
‪array false doSearch(array $searchWords, int $freeIndexUid)
Definition: IndexSearchRepository.php:176
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\execPHashListQuery
‪execPHashListQuery(string $wordSel, string $additionalWhereClause)
Definition: IndexSearchRepository.php:635
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\getPhashList
‪string getPhashList(array $searchWords)
Definition: IndexSearchRepository.php:564
‪TYPO3\CMS\IndexedSearch\Type\ALL_EXTERNAL
‪@ ALL_EXTERNAL
Definition: MediaType.php:27
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\initialize
‪initialize(array $settings, array $searchData, array $externalParsers, int|string $searchRootPageIdList)
Definition: IndexSearchRepository.php:146
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\$wSelClauses
‪array $wSelClauses
Definition: IndexSearchRepository.php:112
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\__construct
‪__construct(private readonly Context $context, private readonly ExtensionConfiguration $extensionConfiguration, private readonly TimeTracker $timeTracker, private readonly ConnectionPool $connectionPool,)
Definition: IndexSearchRepository.php:133
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\getDescendingSortOrderFlag
‪string getDescendingSortOrderFlag(bool $inverse=false)
Definition: IndexSearchRepository.php:1071
‪TYPO3\CMS\Core\Database\Query\QueryHelper
Definition: QueryHelper.php:32
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\$descendingSortOrderFlag
‪bool $descendingSortOrderFlag
Definition: IndexSearchRepository.php:88
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\$searchRootPageIdList
‪string $searchRootPageIdList
Definition: IndexSearchRepository.php:107
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\sectionTableWhere
‪string sectionTableWhere()
Definition: IndexSearchRepository.php:716
‪TYPO3\CMS\IndexedSearch\FileContentParser
Definition: FileContentParser.php:36
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\$sections
‪string $sections
Definition: IndexSearchRepository.php:58
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\$useExactCount
‪bool $useExactCount
Definition: IndexSearchRepository.php:123
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\$externalParsers
‪array $externalParsers
Definition: IndexSearchRepository.php:47
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\execFinalQuery_fulltext
‪execFinalQuery_fulltext(array $searchData, int $freeIndexUid)
Definition: IndexSearchRepository.php:466
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\getFullTextRowByPhash
‪getFullTextRowByPhash(string $phash)
Definition: IndexSearchRepository.php:297
‪TYPO3\CMS\Core\Database\Connection
Definition: Connection.php:41
‪TYPO3\CMS\Webhooks\Message\$uid
‪identifier readonly int $uid
Definition: PageModificationMessage.php:35
‪TYPO3\CMS\Core\Database\Query\QueryHelper\stripLogicalOperatorPrefix
‪static string stripLogicalOperatorPrefix(string $constraint)
Definition: QueryHelper.php:171
‪$GLOBALS
‪$GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['adminpanel']['modules']
Definition: ext_localconf.php:25
‪TYPO3\CMS\Core\Utility\GeneralUtility\revExplode
‪static list< string > revExplode(string $delimiter, string $string, int $limit=0)
Definition: GeneralUtility.php:787
‪TYPO3\CMS\Core\Utility\MathUtility
Definition: MathUtility.php:24
‪TYPO3\CMS\Core\Domain\Repository\PageRepository
Definition: PageRepository.php:69
‪TYPO3\CMS\Core\Database\ConnectionPool
Definition: ConnectionPool.php:46
‪TYPO3\CMS\Core\Utility\MathUtility\forceIntegerInRange
‪static int forceIntegerInRange(mixed $theInt, int $min, int $max=2000000000, int $defaultValue=0)
Definition: MathUtility.php:34
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\$searchType
‪SearchType $searchType
Definition: IndexSearchRepository.php:64
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:52
‪TYPO3\CMS\Core\Utility\GeneralUtility\intExplode
‪static list< int > intExplode(string $delimiter, string $string, bool $removeEmptyValues=false)
Definition: GeneralUtility.php:756
‪TYPO3\CMS\Core\TimeTracker\TimeTracker
Definition: TimeTracker.php:34
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\languageWhere
‪string languageWhere()
Definition: IndexSearchRepository.php:781
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\execFinalQuery
‪execFinalQuery(string $list, int $freeIndexUid)
Definition: IndexSearchRepository.php:872
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\multiplePagesType
‪bool multiplePagesType(string $itemType)
Definition: IndexSearchRepository.php:1086
‪TYPO3\CMS\Core\Utility\GeneralUtility\trimExplode
‪static list< string > trimExplode(string $delim, string $string, bool $removeEmptyValues=false, int $limit=0)
Definition: GeneralUtility.php:822
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\$frontendUserGroupList
‪string $frontendUserGroupList
Definition: IndexSearchRepository.php:52
‪TYPO3\CMS\IndexedSearch\Domain\Repository\IndexSearchRepository\getResultRows_SQLpointer
‪getResultRows_SQLpointer(array $searchWords, int $freeIndexUid)
Definition: IndexSearchRepository.php:337