TYPO3 CMS  TYPO3_8-7
IndexSearchRepository.php
Go to the documentation of this file.
1 <?php
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
27 
32 {
38  protected $indexerObj;
39 
45  protected $externalParsers = [];
46 
52  protected $frontendUserGroupList = '';
53 
60  protected $sections = null;
61 
68  protected $searchType = null;
69 
76  protected $languageUid = null;
77 
84  protected $mediaType = null;
85 
92  protected $sortOrder = null;
93 
100  protected $descendingSortOrderFlag = null;
101 
108  protected $resultpagePointer = 0;
109 
116  protected $numberOfResults = 10;
117 
126 
133  protected $joinPagesForQuery = false;
134 
140  protected $wSelClauses = [];
141 
153  protected $useExactCount = false;
154 
163  protected $displayForbiddenRecords = false;
164 
173  public function initialize($settings, $searchData, $externalParsers, $searchRootPageIdList)
174  {
175  // Initialize the indexer-class - just to use a few function (for making hashes)
176  $this->indexerObj = GeneralUtility::makeInstance(Indexer::class);
177  $this->externalParsers = $externalParsers;
178  $this->searchRootPageIdList = $searchRootPageIdList;
179  $this->frontendUserGroupList = $this->getTypoScriptFrontendController()->gr_list;
180  // Should we use joinPagesForQuery instead of long lists of uids?
181  if ($settings['searchSkipExtendToSubpagesChecking']) {
182  $this->joinPagesForQuery = 1;
183  }
184  if ($settings['exactCount']) {
185  $this->useExactCount = true;
186  }
187  if ($settings['displayForbiddenRecords']) {
188  $this->displayForbiddenRecords = true;
189  }
190  $this->sections = $searchData['sections'];
191  $this->searchType = $searchData['searchType'];
192  $this->languageUid = $searchData['languageUid'];
193  $this->mediaType = isset($searchData['mediaType']) ? $searchData['mediaType'] : false;
194  $this->sortOrder = $searchData['sortOrder'];
195  $this->descendingSortOrderFlag = $searchData['desc'];
196  $this->resultpagePointer = $searchData['pointer'];
197  if (isset($searchData['numberOfResults']) && is_numeric($searchData['numberOfResults'])) {
198  $this->numberOfResults = (int)$searchData['numberOfResults'];
199  }
200  }
201 
209  public function doSearch($searchWords, $freeIndexUid = -1)
210  {
211  // unserializing the configuration so we can use it here:
212  $extConf = [];
213  if (isset($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search'])) {
214  $extConf = unserialize(
215  $GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search'],
216  ['allowed_classes' => false]
217  );
218  }
219 
220  // Getting SQL result pointer:
221  $this->getTimeTracker()->push('Searching result');
222  if ($hookObj = &$this->hookRequest('getResultRows_SQLpointer')) {
223  $result = $hookObj->getResultRows_SQLpointer($searchWords, $freeIndexUid);
224  } elseif (isset($extConf['useMysqlFulltext']) && $extConf['useMysqlFulltext'] === '1') {
225  $result = $this->getResultRows_SQLpointerMysqlFulltext($searchWords, $freeIndexUid);
226  } else {
227  $result = $this->getResultRows_SQLpointer($searchWords, $freeIndexUid);
228  }
229  $this->getTimeTracker()->pull();
230  // Organize and process result:
231  if ($result) {
232  // Total search-result count
233  $count = $result->rowCount();
234  // The pointer is set to the result page that is currently being viewed
235  $pointer = MathUtility::forceIntegerInRange($this->resultpagePointer, 0, floor($count / $this->numberOfResults));
236  // Initialize result accumulation variables:
237  $c = 0;
238  // Result pointer: Counts up the position in the current search-result
239  $grouping_phashes = [];
240  // Used to filter out duplicates.
241  $grouping_chashes = [];
242  // Used to filter out duplicates BASED ON cHash.
243  $firstRow = [];
244  // Will hold the first row in result - used to calculate relative hit-ratings.
245  $resultRows = [];
246  // Will hold the results rows for display.
247  // Now, traverse result and put the rows to be displayed into an array
248  // Each row should contain the fields from 'ISEC.*, IP.*' combined
249  // + artificial fields "show_resume" (bool) and "result_number" (counter)
250  while ($row = $result->fetch()) {
251  // Set first row
252  if (!$c) {
253  $firstRow = $row;
254  }
255  // Tells whether we can link directly to a document
256  // or not (depends on possible right problems)
257  $row['show_resume'] = $this->checkResume($row);
258  $phashGr = !in_array($row['phash_grouping'], $grouping_phashes);
259  $chashGr = !in_array(($row['contentHash'] . '.' . $row['data_page_id']), $grouping_chashes);
260  if ($phashGr && $chashGr) {
261  // Only if the resume may be shown are we going to filter out duplicates...
262  if ($row['show_resume'] || $this->displayForbiddenRecords) {
263  // Only on documents which are not multiple pages documents
264  if (!$this->multiplePagesType($row['item_type'])) {
265  $grouping_phashes[] = $row['phash_grouping'];
266  }
267  $grouping_chashes[] = $row['contentHash'] . '.' . $row['data_page_id'];
268  // Increase the result pointer
269  $c++;
270  // All rows for display is put into resultRows[]
271  if ($c > $pointer * $this->numberOfResults && $c <= $pointer * $this->numberOfResults + $this->numberOfResults) {
272  $row['result_number'] = $c;
273  $resultRows[] = $row;
274  // This may lead to a problem: If the result check is not stopped here, the search will take longer.
275  // However the result counter will not filter out grouped cHashes/pHashes that were not processed yet.
276  // You can change this behavior using the "settings.exactCount" property (see above).
277  if (!$this->useExactCount && $c + 1 > ($pointer + 1) * $this->numberOfResults) {
278  break;
279  }
280  }
281  } else {
282  // Skip this row if the user cannot
283  // view it (missing permission)
284  $count--;
285  }
286  } else {
287  // For each time a phash_grouping document is found
288  // (which is thus not displayed) the search-result count is reduced,
289  // so that it matches the number of rows displayed.
290  $count--;
291  }
292  }
293 
294  $result->closeCursor();
295 
296  return [
297  'resultRows' => $resultRows,
298  'firstRow' => $firstRow,
299  'count' => $count
300  ];
301  }
302  // No results found
303  return false;
304  }
305 
313  protected function getResultRows_SQLpointer($searchWords, $freeIndexUid = -1)
314  {
315  // This SEARCHES for the searchwords in $searchWords AND returns a
316  // COMPLETE list of phash-integers of the matches.
317  $list = $this->getPhashList($searchWords);
318  // Perform SQL Search / collection of result rows array:
319  if ($list) {
320  // Do the search:
321  $this->getTimeTracker()->push('execFinalQuery');
322  $res = $this->execFinalQuery($list, $freeIndexUid);
323  $this->getTimeTracker()->pull();
324  return $res;
325  }
326  return false;
327  }
328 
338  protected function getResultRows_SQLpointerMysqlFulltext($searchWordsArray, $freeIndexUid = -1)
339  {
340  $connection = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable('index_fulltext');
341  if (strpos($connection->getServerVersion(), 'MySQL') !== 0) {
342  throw new \RuntimeException(
343  'Extension indexed_search is configured to use mysql fulltext, but table \'index_fulltext\''
344  . ' is running on a different DBMS.',
345  1472585525
346  );
347  }
348  // Build the search string, detect which fulltext index to use, and decide whether boolean search is needed or not
349  $searchData = $this->getSearchString($searchWordsArray);
350  // Perform SQL Search / collection of result rows array:
351  $resource = false;
352  if ($searchData) {
354  $timeTracker = GeneralUtility::makeInstance(TimeTracker::class);
355  // Do the search:
356  $timeTracker->push('execFinalQuery');
357  $resource = $this->execFinalQuery_fulltext($searchData, $freeIndexUid);
358  $timeTracker->pull();
359  }
360  return $resource;
361  }
362 
371  protected function getSearchString($searchWordArray)
372  {
373  // Initialize variables:
374  $count = 0;
375  // Change this to TRUE to force BOOLEAN SEARCH MODE (useful if fulltext index is still empty)
376  $searchBoolean = false;
377  $fulltextIndex = 'index_fulltext.fulltextdata';
378  // This holds the result if the search is natural (doesn't contain any boolean operators)
379  $naturalSearchString = '';
380  // This holds the result if the search is boolen (contains +/-/| operators)
381  $booleanSearchString = '';
382 
383  $searchType = (string)$this->getSearchType();
384 
385  // Traverse searchwords and prefix them with corresponding operator
386  foreach ($searchWordArray as $searchWordData) {
387  // Making the query for a single search word based on the search-type
388  $searchWord = $searchWordData['sword'];
389  $wildcard = '';
390  if (strstr($searchWord, ' ')) {
391  $searchType = '20';
392  }
393  switch ($searchType) {
394  case '1':
395  case '2':
396  case '3':
397  // First part of word
398  $wildcard = '*';
399  // Part-of-word search requires boolean mode!
400  $searchBoolean = true;
401  break;
402  case '10':
403  $indexerObj = GeneralUtility::makeInstance(Indexer::class);
404  // Initialize the indexer-class
406  $searchWord = $indexerObj->metaphone($searchWord, $indexerObj->storeMetaphoneInfoAsWords);
407  unset($indexerObj);
408  $fulltextIndex = 'index_fulltext.metaphonedata';
409  break;
410  case '20':
411  $searchBoolean = true;
412  // Remove existing quotes and fix misplaced quotes.
413  $searchWord = trim(str_replace('"', ' ', $searchWord));
414  break;
415  }
416  // Perform search for word:
417  switch ($searchWordData['oper']) {
418  case 'AND NOT':
419  $booleanSearchString .= ' -' . $searchWord . $wildcard;
420  $searchBoolean = true;
421  break;
422  case 'OR':
423  $booleanSearchString .= ' ' . $searchWord . $wildcard;
424  $searchBoolean = true;
425  break;
426  default:
427  $booleanSearchString .= ' +' . $searchWord . $wildcard;
428  $naturalSearchString .= ' ' . $searchWord;
429  }
430  $count++;
431  }
432  if ($searchType == '20') {
433  $searchString = '"' . trim($naturalSearchString) . '"';
434  } elseif ($searchBoolean) {
435  $searchString = trim($booleanSearchString);
436  } else {
437  $searchString = trim($naturalSearchString);
438  }
439  return [
440  'searchBoolean' => $searchBoolean,
441  'searchString' => $searchString,
442  'fulltextIndex' => $fulltextIndex
443  ];
444  }
445 
455  protected function execFinalQuery_fulltext($searchData, $freeIndexUid = -1)
456  {
457  $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_fulltext');
458  $queryBuilder->getRestrictions()->removeAll();
459  $queryBuilder->select('index_fulltext.*', 'ISEC.*', 'IP.*')
460  ->from('index_fulltext')
461  ->join(
462  'index_fulltext',
463  'index_phash',
464  'IP',
465  $queryBuilder->expr()->eq('index_fulltext.phash', $queryBuilder->quoteIdentifier('IP.phash'))
466  )
467  ->join(
468  'IP',
469  'index_section',
470  'ISEC',
471  $queryBuilder->expr()->eq('IP.phash', $queryBuilder->quoteIdentifier('ISEC.phash'))
472  );
473 
474  // Calling hook for alternative creation of page ID list
476  if ($hookObj = &$this->hookRequest('execFinalQuery_idList')) {
477  $pageWhere = $hookObj->execFinalQuery_idList('');
478  $queryBuilder->andWhere(QueryHelper::stripLogicalOperatorPrefix($pageWhere));
479  } elseif ($this->joinPagesForQuery) {
480  // Alternative to getting all page ids by ->getTreeList() where "excludeSubpages" is NOT respected.
481  $queryBuilder
482  ->join(
483  'ISEC',
484  'pages',
485  'pages',
486  $queryBuilder->expr()->eq('ISEC.page_id', $queryBuilder->quoteIdentifier('pages.uid'))
487  )
488  ->andWhere(
489  $queryBuilder->expr()->eq(
490  'pages.no_search',
491  $queryBuilder->createNamedParameter(0, \PDO::PARAM_INT)
492  )
493  )
494  ->andWhere(
495  $queryBuilder->expr()->lt(
496  'pages.doktype',
497  $queryBuilder->createNamedParameter(200, \PDO::PARAM_INT)
498  )
499  );
500  $queryBuilder->setRestrictions(GeneralUtility::makeInstance(FrontendRestrictionContainer::class));
501  } elseif ($searchRootPageIdList[0] >= 0) {
502  // Collecting all pages IDs in which to search;
503  // filtering out ALL pages that are not accessible due to restriction containers. Does NOT look for "no_search" field!
504  $idList = [];
505  foreach ($searchRootPageIdList as $rootId) {
507  $cObj = GeneralUtility::makeInstance(\TYPO3\CMS\Frontend\ContentObject\ContentObjectRenderer::class);
508  $idList[] = $cObj->getTreeList(-1 * $rootId, 9999);
509  }
510  $idList = GeneralUtility::intExplode(',', implode(',', $idList));
511  $queryBuilder->andWhere(
512  $queryBuilder->expr()->in(
513  'ISEC.page_id',
514  $queryBuilder->createNamedParameter($idList, Connection::PARAM_INT_ARRAY)
515  )
516  );
517  }
518 
519  $searchBoolean = '';
520  if ($searchData['searchBoolean']) {
521  $searchBoolean = ' IN BOOLEAN MODE';
522  }
523  $queryBuilder->andWhere(
524  'MATCH (' . $queryBuilder->quoteIdentifier($searchData['fulltextIndex']) . ')'
525  . ' AGAINST (' . $queryBuilder->createNamedParameter($searchData['searchString'])
526  . $searchBoolean
527  . ')'
528  );
529 
530  $queryBuilder->andWhere(
535  );
536 
537  $queryBuilder->groupBy(
538  'IP.phash',
539  'ISEC.phash',
540  'ISEC.phash_t3',
541  'ISEC.rl0',
542  'ISEC.rl1',
543  'ISEC.rl2',
544  'ISEC.page_id',
545  'ISEC.uniqid',
546  'IP.phash_grouping',
547  'IP.data_filename',
548  'IP.data_page_id',
549  'IP.data_page_reg1',
550  'IP.data_page_type',
551  'IP.data_page_mp',
552  'IP.gr_list',
553  'IP.item_type',
554  'IP.item_title',
555  'IP.item_description',
556  'IP.item_mtime',
557  'IP.tstamp',
558  'IP.item_size',
559  'IP.contentHash',
560  'IP.crdate',
561  'IP.parsetime',
562  'IP.sys_language_uid',
563  'IP.item_crdate',
564  'IP.cHashParams',
565  'IP.externalUrl',
566  'IP.recordUid',
567  'IP.freeIndexUid',
568  'IP.freeIndexSetId'
569  );
570 
571  return $queryBuilder->execute();
572  }
573 
574  /***********************************
575  *
576  * Helper functions on searching (SQL)
577  *
578  ***********************************/
586  protected function getPhashList($searchWords)
587  {
588  // Initialize variables:
589  $c = 0;
590  // This array accumulates the phash-values
591  $totalHashList = [];
592  $this->wSelClauses = [];
593  // Traverse searchwords; for each, select all phash integers and merge/diff/intersect them with previous word (based on operator)
594  foreach ($searchWords as $k => $v) {
595  // Making the query for a single search word based on the search-type
596  $sWord = $v['sword'];
597  $theType = (string)$this->searchType;
598  // If there are spaces in the search-word, make a full text search instead.
599  if (strstr($sWord, ' ')) {
600  $theType = 20;
601  }
602  $this->getTimeTracker()->push('SearchWord "' . $sWord . '" - $theType=' . $theType);
603  // Perform search for word:
604  switch ($theType) {
605  case '1':
606  // Part of word
607  $res = $this->searchWord($sWord, Utility\LikeWildcard::BOTH);
608  break;
609  case '2':
610  // First part of word
611  $res = $this->searchWord($sWord, Utility\LikeWildcard::RIGHT);
612  break;
613  case '3':
614  // Last part of word
615  $res = $this->searchWord($sWord, Utility\LikeWildcard::LEFT);
616  break;
617  case '10':
618  // Sounds like
624  $indexerObj = GeneralUtility::makeInstance(Indexer::class);
625  // Perform metaphone search
626  $storeMetaphoneInfoAsWords = !$this->isTableUsed('index_words');
627  $res = $this->searchMetaphone($indexerObj->metaphone($sWord, $storeMetaphoneInfoAsWords));
628  unset($indexerObj);
629  break;
630  case '20':
631  // Sentence
632  $res = $this->searchSentence($sWord);
633  // If there is a fulltext search for a sentence there is
634  // a likeliness that sorting cannot be done by the rankings
635  // from the rel-table (because no relations will exist for the
636  // sentence in the word-table). So therefore mtime is used instead.
637  // It is not required, but otherwise some hits may be left out.
638  $this->sortOrder = 'mtime';
639  break;
640  default:
641  // Distinct word
642  $res = $this->searchDistinct($sWord);
643  }
644  // If there was a query to do, then select all phash-integers which resulted from this.
645  if ($res) {
646  // Get phash list by searching for it:
647  $phashList = [];
648  while ($row = $res->fetch()) {
649  $phashList[] = $row['phash'];
650  }
651  // Here the phash list are merged with the existing result based on whether we are dealing with OR, NOT or AND operations.
652  if ($c) {
653  switch ($v['oper']) {
654  case 'OR':
655  $totalHashList = array_unique(array_merge($phashList, $totalHashList));
656  break;
657  case 'AND NOT':
658  $totalHashList = array_diff($totalHashList, $phashList);
659  break;
660  default:
661  // AND...
662  $totalHashList = array_intersect($totalHashList, $phashList);
663  }
664  } else {
665  // First search
666  $totalHashList = $phashList;
667  }
668  }
669  $this->getTimeTracker()->pull();
670  $c++;
671  }
672  return implode(',', $totalHashList);
673  }
674 
682  protected function execPHashListQuery($wordSel, $additionalWhereClause = '')
683  {
684  $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_words');
685  $queryBuilder->select('IR.phash')
686  ->from('index_words', 'IW')
687  ->from('index_rel', 'IR')
688  ->from('index_section', 'ISEC')
689  ->where(
691  $queryBuilder->expr()->eq('IW.wid', $queryBuilder->quoteIdentifier('IR.wid')),
692  $queryBuilder->expr()->eq('ISEC.phash', $queryBuilder->quoteIdentifier('IR.phash')),
694  QueryHelper::stripLogicalOperatorPrefix($additionalWhereClause)
695  )
696  ->groupBy('IR.phash');
697 
698  return $queryBuilder->execute();
699  }
700 
708  protected function searchWord($sWord, $wildcard)
709  {
710  $likeWildcard = Utility\LikeWildcard::cast($wildcard);
711  $wSel = $likeWildcard->getLikeQueryPart(
712  'index_words',
713  'IW.baseword',
714  $sWord
715  );
716  $this->wSelClauses[] = $wSel;
717  return $this->execPHashListQuery($wSel, ' AND is_stopword=0');
718  }
719 
726  protected function searchDistinct($sWord)
727  {
728  $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
729  ->getQueryBuilderForTable('index_words')
730  ->expr();
731  $wSel = $expressionBuilder->eq('IW.wid', $this->md5inthash($sWord));
732  $this->wSelClauses[] = $wSel;
733  return $this->execPHashListQuery($wSel, $expressionBuilder->eq('is_stopword', 0));
734  }
735 
742  protected function searchSentence($sWord)
743  {
744  $this->wSelClauses[] = '1=1';
745  $likeWildcard = Utility\LikeWildcard::cast(Utility\LikeWildcard::BOTH);
746  $likePart = $likeWildcard->getLikeQueryPart(
747  'index_fulltext',
748  'IFT.fulltextdata',
749  $sWord
750  );
751 
752  $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_section');
753  return $queryBuilder->select('ISEC.phash')
754  ->from('index_section', 'ISEC')
755  ->from('index_fulltext', 'IFT')
756  ->where(
758  $queryBuilder->expr()->eq('ISEC.phash', $queryBuilder->quoteIdentifier(('IFT.phash'))),
760  )
761  ->groupBy('ISEC.phash')
762  ->execute();
763  }
764 
771  protected function searchMetaphone($sWord)
772  {
773  $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
774  ->getQueryBuilderForTable('index_words')
775  ->expr();
776  $wSel = $expressionBuilder->eq('IW.metaphone', $expressionBuilder->literal($sWord));
777  $this->wSelClauses[] = $wSel;
778  return $this->execPHashListQuery($wSel, $expressionBuilder->eq('is_stopword', 0));
779  }
780 
786  public function sectionTableWhere()
787  {
788  $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
789  ->getQueryBuilderForTable('index_section')
790  ->expr();
791 
792  $whereClause = $expressionBuilder->andX();
793  $match = false;
794  if (!($this->searchRootPageIdList < 0)) {
795  $whereClause->add(
796  $expressionBuilder->in('ISEC.rl0', GeneralUtility::intExplode(',', $this->searchRootPageIdList, true))
797  );
798  }
799  if (substr($this->sections, 0, 4) === 'rl1_') {
800  $whereClause->add(
801  $expressionBuilder->in('ISEC.rl1', GeneralUtility::intExplode(',', substr($this->sections, 4)))
802  );
803  $match = true;
804  } elseif (substr($this->sections, 0, 4) === 'rl2_') {
805  $whereClause->add(
806  $expressionBuilder->in('ISEC.rl2', GeneralUtility::intExplode(',', substr($this->sections, 4)))
807  );
808  $match = true;
809  } elseif (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'])) {
810  // Traversing user configured fields to see if any of those are used to limit search to a section:
811  foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] as $fieldName => $rootLineLevel) {
812  if (substr($this->sections, 0, strlen($fieldName) + 1) == $fieldName . '_') {
813  $whereClause->add(
814  $expressionBuilder->in(
815  'ISEC.' . $fieldName,
816  GeneralUtility::intExplode(',', substr($this->sections, strlen($fieldName) + 1))
817  )
818  );
819  $match = true;
820  break;
821  }
822  }
823  }
824  // If no match above, test the static types:
825  if (!$match) {
826  switch ((string)$this->sections) {
827  case '-1':
828  $whereClause->add(
829  $expressionBuilder->eq('ISEC.page_id', (int)$this->getTypoScriptFrontendController()->id)
830  );
831  break;
832  case '-2':
833  $whereClause->add($expressionBuilder->eq('ISEC.rl2', 0));
834  break;
835  case '-3':
836  $whereClause->add($expressionBuilder->gt('ISEC.rl2', 0));
837  break;
838  }
839  }
840 
841  return $whereClause->count() ? ' AND ' . $whereClause : '';
842  }
843 
849  public function mediaTypeWhere()
850  {
851  $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
852  ->getQueryBuilderForTable('index_phash')
853  ->expr();
854  switch ($this->mediaType) {
855  case '0':
856  // '0' => 'only TYPO3 pages',
857  $whereClause = $expressionBuilder->eq('IP.item_type', $expressionBuilder->literal('0'));
858  break;
859  case '-2':
860  // All external documents
861  $whereClause = $expressionBuilder->neq('IP.item_type', $expressionBuilder->literal('0'));
862  break;
863  case false:
864  // Intentional fall-through
865  case '-1':
866  // All content
867  $whereClause = '';
868  break;
869  default:
870  $whereClause = $expressionBuilder->eq('IP.item_type', $expressionBuilder->literal($this->mediaType));
871  }
872  return $whereClause ? ' AND ' . $whereClause : '';
873  }
874 
880  public function languageWhere()
881  {
882  // -1 is the same as ALL language.
883  if ($this->languageUid < 0) {
884  return '';
885  }
886 
887  $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
888  ->getQueryBuilderForTable('index_phash')
889  ->expr();
890 
891  return ' AND ' . $expressionBuilder->eq('IP.sys_language_uid', (int)$this->languageUid);
892  }
893 
900  public function freeIndexUidWhere($freeIndexUid)
901  {
902  $freeIndexUid = (int)$freeIndexUid;
903  if ($freeIndexUid < 0) {
904  return '';
905  }
906  // First, look if the freeIndexUid is a meta configuration:
907  $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
908  ->getQueryBuilderForTable('index_config');
909  $indexCfgRec = $queryBuilder->select('indexcfgs')
910  ->from('index_config')
911  ->where(
912  $queryBuilder->expr()->eq('type', $queryBuilder->createNamedParameter(5, \PDO::PARAM_INT)),
913  $queryBuilder->expr()->eq(
914  'uid',
915  $queryBuilder->createNamedParameter($freeIndexUid, \PDO::PARAM_INT)
916  )
917  )
918  ->execute()
919  ->fetch();
920 
921  if (is_array($indexCfgRec)) {
922  $refs = GeneralUtility::trimExplode(',', $indexCfgRec['indexcfgs']);
923  // Default value to protect against empty array.
924  $list = [-99];
925  foreach ($refs as $ref) {
926  list($table, $uid) = GeneralUtility::revExplode('_', $ref, 2);
927  $uid = (int)$uid;
928  $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
929  ->getQueryBuilderForTable('index_config');
930  $queryBuilder->select('uid')
931  ->from('index_config');
932  switch ($table) {
933  case 'index_config':
934  $idxRec = $queryBuilder
935  ->where(
936  $queryBuilder->expr()->eq(
937  'uid',
938  $queryBuilder->createNamedParameter($uid, \PDO::PARAM_INT)
939  )
940  )
941  ->execute()
942  ->fetch();
943  if ($idxRec) {
944  $list[] = $uid;
945  }
946  break;
947  case 'pages':
948  $indexCfgRecordsFromPid = $queryBuilder
949  ->where(
950  $queryBuilder->expr()->eq(
951  'pid',
952  $queryBuilder->createNamedParameter($uid, \PDO::PARAM_INT)
953  )
954  )
955  ->execute();
956  while ($idxRec = $indexCfgRecordsFromPid->fetch()) {
957  $list[] = $idxRec['uid'];
958  }
959  break;
960  }
961  }
962  $list = array_unique($list);
963  } else {
964  $list = [$freeIndexUid];
965  }
966 
967  $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
968  ->getQueryBuilderForTable('index_phash')
969  ->expr();
970  return ' AND ' . $expressionBuilder->in('IP.freeIndexUid', array_map('intval', $list));
971  }
972 
980  protected function execFinalQuery($list, $freeIndexUid = -1)
981  {
982  $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_words');
983  $queryBuilder->select('ISEC.*', 'IP.*')
984  ->from('index_phash', 'IP')
985  ->from('index_section', 'ISEC')
986  ->where(
987  $queryBuilder->expr()->in(
988  'IP.phash',
989  $queryBuilder->createNamedParameter(
990  GeneralUtility::intExplode(',', $list, true),
991  Connection::PARAM_INT_ARRAY
992  )
993  ),
997  $queryBuilder->expr()->eq('ISEC.phash', $queryBuilder->quoteIdentifier('IP.phash'))
998  )
999  ->groupBy(
1000  'IP.phash',
1001  'ISEC.phash',
1002  'ISEC.phash_t3',
1003  'ISEC.rl0',
1004  'ISEC.rl1',
1005  'ISEC.rl2',
1006  'ISEC.page_id',
1007  'ISEC.uniqid',
1008  'IP.phash_grouping',
1009  'IP.data_filename',
1010  'IP.data_page_id',
1011  'IP.data_page_reg1',
1012  'IP.data_page_type',
1013  'IP.data_page_mp',
1014  'IP.gr_list',
1015  'IP.item_type',
1016  'IP.item_title',
1017  'IP.item_description',
1018  'IP.item_mtime',
1019  'IP.tstamp',
1020  'IP.item_size',
1021  'IP.contentHash',
1022  'IP.crdate',
1023  'IP.parsetime',
1024  'IP.sys_language_uid',
1025  'IP.item_crdate',
1026  'IP.cHashParams',
1027  'IP.externalUrl',
1028  'IP.recordUid',
1029  'IP.freeIndexUid',
1030  'IP.freeIndexSetId'
1031  );
1032 
1033  // Setting up methods of filtering results
1034  // based on page types, access, etc.
1035  if ($hookObj = $this->hookRequest('execFinalQuery_idList')) {
1036  // Calling hook for alternative creation of page ID list
1037  $hookWhere = QueryHelper::stripLogicalOperatorPrefix($hookObj->execFinalQuery_idList($list));
1038  if (!empty($hookWhere)) {
1039  $queryBuilder->andWhere($hookWhere);
1040  }
1041  } elseif ($this->joinPagesForQuery) {
1042  // Alternative to getting all page ids by ->getTreeList() where
1043  // "excludeSubpages" is NOT respected.
1044  $queryBuilder->setRestrictions(GeneralUtility::makeInstance(FrontendRestrictionContainer::class));
1045  $queryBuilder->from('pages');
1046  $queryBuilder->andWhere(
1047  $queryBuilder->expr()->eq('pages.uid', $queryBuilder->quoteIdentifier('ISEC.page_id')),
1048  $queryBuilder->expr()->eq(
1049  'pages.no_search',
1050  $queryBuilder->createNamedParameter(0, \PDO::PARAM_INT)
1051  ),
1052  $queryBuilder->expr()->lt(
1053  'pages.doktype',
1054  $queryBuilder->createNamedParameter(200, \PDO::PARAM_INT)
1055  )
1056  );
1057  } elseif ($this->searchRootPageIdList >= 0) {
1058  // Collecting all pages IDs in which to search;
1059  // filtering out ALL pages that are not accessible due to restriction containers.
1060  // Does NOT look for "no_search" field!
1061  $siteIdNumbers = GeneralUtility::intExplode(',', $this->searchRootPageIdList);
1062  $pageIdList = [];
1063  foreach ($siteIdNumbers as $rootId) {
1064  $pageIdList[] = $this->getTypoScriptFrontendController()->cObj->getTreeList(-1 * $rootId, 9999);
1065  }
1066  $queryBuilder->andWhere(
1067  $queryBuilder->expr()->in(
1068  'ISEC.page_id',
1069  $queryBuilder->createNamedParameter(
1070  array_unique(GeneralUtility::intExplode(',', implode(',', $pageIdList), true)),
1071  Connection::PARAM_INT_ARRAY
1072  )
1073  )
1074  );
1075  }
1076  // otherwise select all / disable everything
1077  // If any of the ranking sortings are selected, we must make a
1078  // join with the word/rel-table again, because we need to
1079  // calculate ranking based on all search-words found.
1080  if (substr($this->sortOrder, 0, 5) === 'rank_') {
1081  $queryBuilder
1082  ->from('index_words', 'IW')
1083  ->from('index_rel', 'IR')
1084  ->andWhere(
1085  $queryBuilder->expr()->eq('IW.wid', $queryBuilder->quoteIdentifier('IR.wid')),
1086  $queryBuilder->expr()->eq('ISEC.phash', $queryBuilder->quoteIdentifier('IR.phash'))
1087  );
1088  switch ($this->sortOrder) {
1089  case 'rank_flag':
1090  // This gives priority to word-position (max-value) so that words in title, keywords, description counts more than in content.
1091  // The ordering is refined with the frequency sum as well.
1092  $queryBuilder
1093  ->addSelectLiteral(
1094  $queryBuilder->expr()->max('IR.flags', 'order_val1'),
1095  $queryBuilder->expr()->sum('IR.freq', 'order_val2')
1096  )
1097  ->orderBy('order_val1', $this->getDescendingSortOrderFlag())
1098  ->addOrderBy('order_val2', $this->getDescendingSortOrderFlag());
1099  break;
1100  case 'rank_first':
1101  // Results in average position of search words on page.
1102  // Must be inversely sorted (low numbers are closer to top)
1103  $queryBuilder
1104  ->addSelectLiteral($queryBuilder->expr()->avg('IR.first', 'order_val'))
1105  ->orderBy('order_val', $this->getDescendingSortOrderFlag(true));
1106  break;
1107  case 'rank_count':
1108  // Number of words found
1109  $queryBuilder
1110  ->addSelectLiteral($queryBuilder->expr()->sum('IR.count', 'order_val'))
1111  ->orderBy('order_val', $this->getDescendingSortOrderFlag());
1112  break;
1113  default:
1114  // Frequency sum. I'm not sure if this is the best way to do
1115  // it (make a sum...). Or should it be the average?
1116  $queryBuilder
1117  ->addSelectLiteral($queryBuilder->expr()->sum('IR.freq', 'order_val'))
1118  ->orderBy('order_val', $this->getDescendingSortOrderFlag());
1119  }
1120 
1121  if (!empty($this->wSelClauses)) {
1122  // So, words are combined in an OR statement
1123  // (no "sentence search" should be done here - may deselect results)
1124  $wordSel = $queryBuilder->expr()->orX();
1125  foreach ($this->wSelClauses as $wSelClause) {
1126  $wordSel->add(QueryHelper::stripLogicalOperatorPrefix($wSelClause));
1127  }
1128  $queryBuilder->andWhere($wordSel);
1129  }
1130  } else {
1131  // Otherwise, if sorting are done with the pages table or other fields,
1132  // there is no need for joining with the rel/word tables:
1133  switch ((string)$this->sortOrder) {
1134  case 'title':
1135  $queryBuilder->orderBy('IP.item_title', $this->getDescendingSortOrderFlag());
1136  break;
1137  case 'crdate':
1138  $queryBuilder->orderBy('IP.item_crdate', $this->getDescendingSortOrderFlag());
1139  break;
1140  case 'mtime':
1141  $queryBuilder->orderBy('IP.item_mtime', $this->getDescendingSortOrderFlag());
1142  break;
1143  }
1144  }
1145 
1146  return $queryBuilder->execute();
1147  }
1148 
1157  protected function checkResume($row)
1158  {
1159  // If the record is indexed by an indexing configuration, just show it.
1160  // At least this is needed for external URLs and files.
1161  // For records we might need to extend this - for instance block display if record is access restricted.
1162  if ($row['freeIndexUid']) {
1163  return true;
1164  }
1165  // Evaluate regularly indexed pages based on item_type:
1166  // External media:
1167  $connection = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable('index_grlist');
1168  if ($row['item_type']) {
1169  // For external media we will check the access of the parent page on which the media was linked from.
1170  // "phash_t3" is the phash of the parent TYPO3 page row which initiated the indexing of the documents
1171  // in this section. So, selecting for the grlist records belonging to the parent phash-row where the
1172  // current users gr_list exists will help us to know. If this is NOT found, there is still a theoretical
1173  // possibility that another user accessible page would display a link, so maybe the resume of such a
1174  // document here may be unjustified hidden. But better safe than sorry.
1175  if (!$this->isTableUsed('index_grlist')) {
1176  return false;
1177  }
1178 
1179  return (bool)$connection->count(
1180  'phash',
1181  'index_grlist',
1182  [
1183  'phash' => (int)$row['phash_t3'],
1184  'gr_list' => $this->frontendUserGroupList
1185  ]
1186  );
1187  }
1188  // Ordinary TYPO3 pages:
1189  if ((string)$row['gr_list'] !== (string)$this->frontendUserGroupList) {
1190  // Selecting for the grlist records belonging to the phash-row where the current users gr_list exists.
1191  // If it is found it is proof that this user has direct access to the phash-rows content although
1192  // he did not himself initiate the indexing...
1193  if (!$this->isTableUsed('index_grlist')) {
1194  return false;
1195  }
1196 
1197  return (bool)$connection->count(
1198  'phash',
1199  'index_grlist',
1200  [
1201  'phash' => (int)$row['phash'],
1202  'gr_list' => $this->frontendUserGroupList
1203  ]
1204  );
1205  }
1206  return true;
1207  }
1208 
1217  protected function getDescendingSortOrderFlag($inverse = false)
1218  {
1220  if ($inverse) {
1221  $desc = !$desc;
1222  }
1223  return !$desc ? ' DESC' : '';
1224  }
1225 
1232  protected function multiplePagesType($itemType)
1233  {
1235  $fileContentParser = $this->externalParsers[$itemType];
1236  return is_object($fileContentParser) && $fileContentParser->isMultiplePageExtension($itemType);
1237  }
1238 
1248  protected function md5inthash($str)
1249  {
1251  }
1252 
1261  protected function isTableUsed($table_list)
1262  {
1263  return Utility\IndexedSearchUtility::isTableUsed($table_list);
1264  }
1265 
1272  public function hookRequest($functionName)
1273  {
1274  // Hook: menuConfig_preProcessModMenu
1275  if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['pi1_hooks'][$functionName]) {
1276  $hookObj = GeneralUtility::getUserObj($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['pi1_hooks'][$functionName]);
1277  if (method_exists($hookObj, $functionName)) {
1278  $hookObj->pObj = $this;
1279  return $hookObj;
1280  }
1281  }
1282  return null;
1283  }
1284 
1291  public function getSearchType()
1292  {
1293  return (int)$this->searchType;
1294  }
1295 
1301  public function getSearchRootPageIdList()
1302  {
1303  return GeneralUtility::intExplode(',', $this->searchRootPageIdList);
1304  }
1305 
1312  public function getJoinPagesForQuery()
1313  {
1314  return $this->joinPagesForQuery;
1315  }
1316 
1320  protected function getTypoScriptFrontendController()
1321  {
1322  return $GLOBALS['TSFE'];
1323  }
1324 
1328  protected function getTimeTracker()
1329  {
1330  return GeneralUtility::makeInstance(TimeTracker::class);
1331  }
1332 }
initialize($settings, $searchData, $externalParsers, $searchRootPageIdList)
static intExplode($delimiter, $string, $removeEmptyValues=false, $limit=0)
static forceIntegerInRange($theInt, $min, $max=2000000000, $defaultValue=0)
Definition: MathUtility.php:31
static trimExplode($delim, $string, $removeEmptyValues=false, $limit=0)
static makeInstance($className,... $constructorArguments)
$extConf
static stripLogicalOperatorPrefix(string $constraint)
static revExplode($delimiter, $string, $count=0)
if(TYPO3_MODE==='BE') $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_tsfebeuserauth.php']['frontendEditingController']['default']