TYPO3 CMS  TYPO3_7-6
CrawlerHook.php
Go to the documentation of this file.
1 <?php
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
20 
25 {
32 
38  public $instanceCounter = 0;
39 
43  public $callBack = self::class;
44 
48  public function __construct()
49  {
50  // To make sure the backend charset is available:
51  if (!is_object($GLOBALS['LANG'])) {
52  $GLOBALS['LANG'] = GeneralUtility::makeInstance(\TYPO3\CMS\Lang\LanguageService::class);
53  $GLOBALS['LANG']->init($GLOBALS['BE_USER']->uc['lang']);
54  }
55  }
56 
65  public function crawler_init(&$pObj)
66  {
67  // Select all indexing configuration which are waiting to be activated:
68  $indexingConfigurations = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('*', 'index_config', 'hidden=0
69  AND (starttime=0 OR starttime<=' . $GLOBALS['EXEC_TIME'] . ')
70  AND timer_next_indexing<' . $GLOBALS['EXEC_TIME'] . '
71  AND set_id=0
72  ' . BackendUtility::deleteClause('index_config'));
73  // For each configuration, check if it should be executed and if so, start:
74  foreach ($indexingConfigurations as $cfgRec) {
75  // Generate a unique set-ID:
76  $setId = GeneralUtility::md5int(microtime());
77  // Get next time:
78  $nextTime = $this->generateNextIndexingTime($cfgRec);
79  // Start process by updating index-config record:
80  $field_array = [
81  'set_id' => $setId,
82  'timer_next_indexing' => $nextTime,
83  'session_data' => ''
84  ];
85  $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_config', 'uid=' . (int)$cfgRec['uid'], $field_array);
86  // Based on configuration type:
87  switch ($cfgRec['type']) {
88  case 1:
89  // RECORDS:
90  // Parameters:
91  $params = [
92  'indexConfigUid' => $cfgRec['uid'],
93  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . ']'],
94  'url' => 'Records (start)'
95  ];
96  //
97  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
98  break;
99  case 2:
100  // FILES:
101  // Parameters:
102  $params = [
103  'indexConfigUid' => $cfgRec['uid'],
104  // General
105  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . ']'],
106  // General
107  'url' => $cfgRec['filepath'],
108  // Partly general... (for URL and file types)
109  'depth' => 0
110  ];
111  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
112  break;
113  case 3:
114  // External URL:
115  // Parameters:
116  $params = [
117  'indexConfigUid' => $cfgRec['uid'],
118  // General
119  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . ']'],
120  // General
121  'url' => $cfgRec['externalUrl'],
122  // Partly general... (for URL and file types)
123  'depth' => 0
124  ];
125  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
126  break;
127  case 4:
128  // Page tree
129  // Parameters:
130  $params = [
131  'indexConfigUid' => $cfgRec['uid'],
132  // General
133  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . ']'],
134  // General
135  'url' => (int)$cfgRec['alternative_source_pid'],
136  // Partly general... (for URL and file types and page tree (root))
137  'depth' => 0
138  ];
139  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
140  break;
141  case 5:
142  // Meta configuration, nothing to do:
143  // NOOP
144  break;
145  default:
146  if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]) {
147  $hookObj = GeneralUtility::getUserObj($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]);
148  if (is_object($hookObj)) {
149  // Parameters:
150  $params = [
151  'indexConfigUid' => $cfgRec['uid'],
152  // General
153  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . '/CUSTOM]'],
154  // General
155  'url' => $hookObj->initMessage($message)
156  ];
157  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
158  }
159  }
160  }
161  }
162  // Finally, look up all old index configurations which are finished and needs to be reset and done.
164  }
165 
173  public function crawler_execute($params, &$pObj)
174  {
175  // Indexer configuration ID must exist:
176  if ($params['indexConfigUid']) {
177  // Load the indexing configuration record:
178  $cfgRec = $GLOBALS['TYPO3_DB']->exec_SELECTgetSingleRow('*', 'index_config', 'uid=' . (int)$params['indexConfigUid']);
179  if (is_array($cfgRec)) {
180  // Unpack session data:
181  $session_data = unserialize($cfgRec['session_data']);
182  // Select which type:
183  switch ($cfgRec['type']) {
184  case 1:
185  // Records:
186  $this->crawler_execute_type1($cfgRec, $session_data, $params, $pObj);
187  break;
188  case 2:
189  // Files
190  $this->crawler_execute_type2($cfgRec, $session_data, $params, $pObj);
191  break;
192  case 3:
193  // External URL:
194  $this->crawler_execute_type3($cfgRec, $session_data, $params, $pObj);
195  break;
196  case 4:
197  // Page tree:
198  $this->crawler_execute_type4($cfgRec, $session_data, $params, $pObj);
199  break;
200  case 5:
201  // Meta
202  // NOOP (should never enter here!)
203  break;
204  default:
205  if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]) {
206  $hookObj = GeneralUtility::getUserObj($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]);
207  if (is_object($hookObj)) {
208  $this->pObj = $pObj;
209  // For addQueueEntryForHook()
210  $hookObj->indexOperation($cfgRec, $session_data, $params, $this);
211  }
212  }
213  }
214  // Save process data which might be modified:
215  $field_array = [
216  'session_data' => serialize($session_data)
217  ];
218  $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_config', 'uid=' . (int)$cfgRec['uid'], $field_array);
219  }
220  }
221  return ['log' => $params];
222  }
223 
233  public function crawler_execute_type1($cfgRec, &$session_data, $params, &$pObj)
234  {
235  if ($cfgRec['table2index'] && isset($GLOBALS['TCA'][$cfgRec['table2index']])) {
236  // Init session data array if not already:
237  if (!is_array($session_data)) {
238  $session_data = [
239  'uid' => 0
240  ];
241  }
242  // Init:
243  $pid = (int)$cfgRec['alternative_source_pid'] ?: $cfgRec['pid'];
244  $numberOfRecords = $cfgRec['recordsbatch'] ? \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cfgRec['recordsbatch'], 1) : 100;
245  // Get root line:
246  $rl = $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
247  // Select
248  $recs = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('*', $cfgRec['table2index'], 'pid = ' . $pid . '
249  AND uid > ' . (int)$session_data['uid'] . BackendUtility::deleteClause($cfgRec['table2index']) . BackendUtility::BEenableFields($cfgRec['table2index']), '', 'uid', $numberOfRecords);
250  // Traverse:
251  if (!empty($recs)) {
252  foreach ($recs as $r) {
253  // Index single record:
254  $this->indexSingleRecord($r, $cfgRec, $rl);
255  // Update the UID we last processed:
256  $session_data['uid'] = $r['uid'];
257  }
258  // Finally, set entry for next indexing of batch of records:
259  $nparams = [
260  'indexConfigUid' => $cfgRec['uid'],
261  'url' => 'Records from UID#' . ($r['uid'] + 1) . '-?',
262  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . ']']
263  ];
264  $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid']);
265  }
266  }
267  }
268 
278  public function crawler_execute_type2($cfgRec, &$session_data, $params, &$pObj)
279  {
280  // Prepare path, making it absolute and checking:
281  $readpath = $params['url'];
282  if (!GeneralUtility::isAbsPath($readpath)) {
283  $readpath = GeneralUtility::getFileAbsFileName($readpath);
284  }
285  if (GeneralUtility::isAllowedAbsPath($readpath)) {
286  if (@is_file($readpath)) {
287  // If file, index it!
288  // Get root line (need to provide this when indexing external files)
289  $rl = $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
290  // (Re)-Indexing file on page.
291  $indexerObj = GeneralUtility::makeInstance(\TYPO3\CMS\IndexedSearch\Indexer::class);
292  $indexerObj->backend_initIndexer($cfgRec['pid'], 0, 0, '', $rl);
293  $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
294  $indexerObj->hash['phash'] = -1;
295  // EXPERIMENT - but to avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
296  // Index document:
297  $indexerObj->indexRegularDocument(\TYPO3\CMS\Core\Utility\PathUtility::stripPathSitePrefix($readpath), true);
298  } elseif (@is_dir($readpath)) {
299  // If dir, read content and create new pending items for log:
300  // Select files and directories in path:
301  $extList = implode(',', GeneralUtility::trimExplode(',', $cfgRec['extensions'], true));
302  $fileArr = [];
303  $files = GeneralUtility::getAllFilesAndFoldersInPath($fileArr, $readpath, $extList, 0, 0);
304  $directoryList = GeneralUtility::get_dirs($readpath);
305  if (is_array($directoryList) && $params['depth'] < $cfgRec['depth']) {
306  foreach ($directoryList as $subdir) {
307  if ((string)$subdir != '') {
308  $files[] = $readpath . $subdir . '/';
309  }
310  }
311  }
312  $files = GeneralUtility::removePrefixPathFromList($files, PATH_site);
313  // traverse the items and create log entries:
314  foreach ($files as $path) {
315  $this->instanceCounter++;
316  if ($path !== $params['url']) {
317  // Parameters:
318  $nparams = [
319  'indexConfigUid' => $cfgRec['uid'],
320  'url' => $path,
321  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . ']'],
322  'depth' => $params['depth'] + 1
323  ];
324  $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid'], $GLOBALS['EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
325  }
326  }
327  }
328  }
329  }
330 
340  public function crawler_execute_type3($cfgRec, &$session_data, $params, &$pObj)
341  {
342  // Init session data array if not already:
343  if (!is_array($session_data)) {
344  $session_data = [
345  'urlLog' => [$params['url']]
346  ];
347  }
348  // Index the URL:
349  $rl = $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
350  $subUrls = $this->indexExtUrl($params['url'], $cfgRec['pid'], $rl, $cfgRec['uid'], $cfgRec['set_id']);
351  // Add more elements to log now:
352  if ($params['depth'] < $cfgRec['depth']) {
353  foreach ($subUrls as $url) {
354  if ($url = $this->checkUrl($url, $session_data['urlLog'], $cfgRec['externalUrl'])) {
355  if (!$this->checkDeniedSuburls($url, $cfgRec['url_deny'])) {
356  $this->instanceCounter++;
357  $session_data['urlLog'][] = $url;
358  // Parameters:
359  $nparams = [
360  'indexConfigUid' => $cfgRec['uid'],
361  'url' => $url,
362  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . ']'],
363  'depth' => $params['depth'] + 1
364  ];
365  $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid'], $GLOBALS['EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
366  }
367  }
368  }
369  }
370  }
371 
381  public function crawler_execute_type4($cfgRec, &$session_data, $params, &$pObj)
382  {
383  // Base page uid:
384  $pageUid = (int)$params['url'];
385  // Get array of URLs from page:
386  $pageRow = BackendUtility::getRecord('pages', $pageUid);
387  $res = $pObj->getUrlsForPageRow($pageRow);
388  $duplicateTrack = [];
389  // Registry for duplicates
390  $downloadUrls = [];
391  // Dummy.
392  // Submit URLs:
393  if (!empty($res)) {
394  foreach ($res as $paramSetKey => $vv) {
395  $urlList = $pObj->urlListFromUrlArray($vv, $pageRow, $GLOBALS['EXEC_TIME'], 30, 1, 0, $duplicateTrack, $downloadUrls, ['tx_indexedsearch_reindex']);
396  }
397  }
398  // Add subpages to log now:
399  if ($params['depth'] < $cfgRec['depth']) {
400  // Subpages selected
401  $recs = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('uid,title', 'pages', 'pid = ' . $pageUid . BackendUtility::deleteClause('pages'));
402  // Traverse subpages and add to queue:
403  if (!empty($recs)) {
404  foreach ($recs as $r) {
405  $this->instanceCounter++;
406  $url = 'pages:' . $r['uid'] . ': ' . $r['title'];
407  $session_data['urlLog'][] = $url;
408  // Parameters:
409  $nparams = [
410  'indexConfigUid' => $cfgRec['uid'],
411  'url' => $r['uid'],
412  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . ']'],
413  'depth' => $params['depth'] + 1
414  ];
415  $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid'], $GLOBALS['EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
416  }
417  }
418  }
419  }
420 
427  {
428  // Lookup running index configurations:
429  $runningIndexingConfigurations = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('uid,set_id', 'index_config', 'set_id<>0' . BackendUtility::deleteClause('index_config'));
430  // For each running configuration, look up how many log entries there are which are scheduled for execution and if none, clear the "set_id" (means; Processing was DONE)
431  foreach ($runningIndexingConfigurations as $cfgRec) {
432  // Look for ended processes:
433  $queued_items = $GLOBALS['TYPO3_DB']->exec_SELECTcountRows('*', 'tx_crawler_queue', 'set_id=' . (int)$cfgRec['set_id'] . ' AND exec_time=0');
434  if (!$queued_items) {
435  // Lookup old phash rows:
436  $oldPhashRows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('phash', 'index_phash', 'freeIndexUid=' . (int)$cfgRec['uid'] . ' AND freeIndexSetId<>' . (int)$cfgRec['set_id']);
437  foreach ($oldPhashRows as $pHashRow) {
438  // Removing old registrations for all tables (code copied from \TYPO3\CMS\IndexedSearch\Domain\Repository\IndexedPagesController\AdministrationRepository)
439  $tableArr = ['index_phash', 'index_rel', 'index_section', 'index_grlist', 'index_fulltext', 'index_debug'];
440  foreach ($tableArr as $table) {
441  $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash=' . (int)$pHashRow['phash']);
442  }
443  }
444  // End process by updating index-config record:
445  $field_array = [
446  'set_id' => 0,
447  'session_data' => ''
448  ];
449  $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_config', 'uid=' . (int)$cfgRec['uid'], $field_array);
450  }
451  }
452  }
453 
454  /*****************************************
455  *
456  * Helper functions
457  *
458  *****************************************/
467  public function checkUrl($url, $urlLog, $baseUrl)
468  {
469  $url = preg_replace('/\\/\\/$/', '/', $url);
470  list($url) = explode('#', $url);
471  if (!strstr($url, '../')) {
472  if (GeneralUtility::isFirstPartOfStr($url, $baseUrl)) {
473  if (!in_array($url, $urlLog)) {
474  return $url;
475  }
476  }
477  }
478  }
479 
490  public function indexExtUrl($url, $pageId, $rl, $cfgUid, $setId)
491  {
492  // Index external URL:
493  $indexerObj = GeneralUtility::makeInstance(\TYPO3\CMS\IndexedSearch\Indexer::class);
494  $indexerObj->backend_initIndexer($pageId, 0, 0, '', $rl);
495  $indexerObj->backend_setFreeIndexUid($cfgUid, $setId);
496  $indexerObj->hash['phash'] = -1;
497  // To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
498  $indexerObj->indexExternalUrl($url);
499  $url_qParts = parse_url($url);
500  $baseAbsoluteHref = $url_qParts['scheme'] . '://' . $url_qParts['host'];
501  $baseHref = $indexerObj->extractBaseHref($indexerObj->indexExternalUrl_content);
502  if (!$baseHref) {
503  // Extract base href from current URL
504  $baseHref = $baseAbsoluteHref;
505  $baseHref .= substr($url_qParts['path'], 0, strrpos($url_qParts['path'], '/'));
506  }
507  $baseHref = rtrim($baseHref, '/');
508  // Get URLs on this page:
509  $subUrls = [];
510  $list = $indexerObj->extractHyperLinks($indexerObj->indexExternalUrl_content);
511  // Traverse links:
512  foreach ($list as $count => $linkInfo) {
513  // Decode entities:
514  $subUrl = htmlspecialchars_decode($linkInfo['href']);
515  $qParts = parse_url($subUrl);
516  if (!$qParts['scheme']) {
517  $relativeUrl = GeneralUtility::resolveBackPath($subUrl);
518  if ($relativeUrl[0] === '/') {
519  $subUrl = $baseAbsoluteHref . $relativeUrl;
520  } else {
521  $subUrl = $baseHref . '/' . $relativeUrl;
522  }
523  }
524  $subUrls[] = $subUrl;
525  }
526  return $subUrls;
527  }
528 
537  public function indexSingleRecord($r, $cfgRec, $rl = null)
538  {
539  // Init:
540  $rl = is_array($rl) ? $rl : $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
541  $fieldList = GeneralUtility::trimExplode(',', $cfgRec['fieldlist'], true);
542  $languageField = $GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['languageField'];
543  $sys_language_uid = $languageField ? $r[$languageField] : 0;
544  // (Re)-Indexing a row from a table:
545  $indexerObj = GeneralUtility::makeInstance(\TYPO3\CMS\IndexedSearch\Indexer::class);
546  parse_str(str_replace('###UID###', $r['uid'], $cfgRec['get_params']), $GETparams);
547  $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl, $GETparams, (bool)$cfgRec['chashcalc']);
548  $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
549  $indexerObj->forceIndexing = true;
550  $theContent = '';
551  foreach ($fieldList as $k => $v) {
552  if (!$k) {
553  $theTitle = $r[$v];
554  } else {
555  $theContent .= $r[$v] . ' ';
556  }
557  }
558  // Indexing the record as a page (but with parameters set, see ->backend_setFreeIndexUid())
559  $indexerObj->backend_indexAsTYPO3Page(strip_tags(str_replace('<', ' <', $theTitle)), '', '', strip_tags(str_replace('<', ' <', $theContent)), $GLOBALS['LANG']->charSet, $r[$GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['tstamp']], $r[$GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['crdate']], $r['uid']);
560  }
561 
569  public function getUidRootLineForClosestTemplate($id)
570  {
571  $tmpl = GeneralUtility::makeInstance(\TYPO3\CMS\Core\TypoScript\ExtendedTemplateService::class);
572  $tmpl->tt_track = 0;
573  // Do not log time-performance information
574  $tmpl->init();
575  // Gets the rootLine
576  $sys_page = GeneralUtility::makeInstance(\TYPO3\CMS\Frontend\Page\PageRepository::class);
577  $rootLine = $sys_page->getRootLine($id);
578  // This generates the constants/config + hierarchy info for the template.
579  $tmpl->runThroughTemplates($rootLine, 0);
580  // Root line uids
581  $rootline_uids = [];
582  foreach ($tmpl->rootLine as $rlkey => $rldat) {
583  $rootline_uids[$rlkey] = $rldat['uid'];
584  }
585  return $rootline_uids;
586  }
587 
594  public function generateNextIndexingTime($cfgRec)
595  {
596  $currentTime = $GLOBALS['EXEC_TIME'];
597  // Now, find a midnight time to use for offset calculation. This has to differ depending on whether we have frequencies within a day or more than a day; Less than a day, we don't care which day to use for offset, more than a day we want to respect the currently entered day as offset regardless of when the script is run - thus the day-of-week used in case "Weekly" is selected will be respected
598  if ($cfgRec['timer_frequency'] <= 24 * 3600) {
599  $aMidNight = mktime(0, 0, 0) - 1 * 24 * 3600;
600  } else {
601  $lastTime = $cfgRec['timer_next_indexing'] ?: $GLOBALS['EXEC_TIME'];
602  $aMidNight = mktime(0, 0, 0, date('m', $lastTime), date('d', $lastTime), date('y', $lastTime));
603  }
604  // Find last offset time plus frequency in seconds:
605  $lastSureOffset = $aMidNight + \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cfgRec['timer_offset'], 0, 86400);
606  $frequencySeconds = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cfgRec['timer_frequency'], 1);
607  // Now, find out how many blocks of the length of frequency there is until the next time:
608  $frequencyBlocksUntilNextTime = ceil(($currentTime - $lastSureOffset) / $frequencySeconds);
609  // Set next time to the offset + the frequencyblocks multiplied with the frequency length in seconds.
610  return $lastSureOffset + $frequencyBlocksUntilNextTime * $frequencySeconds;
611  }
612 
620  public function checkDeniedSuburls($url, $url_deny)
621  {
622  if (trim($url_deny)) {
623  $url_denyArray = GeneralUtility::trimExplode(LF, $url_deny, true);
624  foreach ($url_denyArray as $testurl) {
625  if (GeneralUtility::isFirstPartOfStr($url, $testurl)) {
626  return true;
627  }
628  }
629  }
630  return false;
631  }
632 
640  public function addQueueEntryForHook($cfgRec, $title)
641  {
642  $nparams = [
643  'indexConfigUid' => $cfgRec['uid'],
644  // This must ALWAYS be the cfgRec uid!
645  'url' => $title,
646  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . ']']
647  ];
648  $this->pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid']);
649  }
650 
657  public function deleteFromIndex($id)
658  {
659  // Lookup old phash rows:
660  $oldPhashRows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('phash', 'index_section', 'page_id=' . (int)$id);
661  if (!empty($oldPhashRows)) {
662  $pHashesToDelete = [];
663  foreach ($oldPhashRows as $pHashRow) {
664  $pHashesToDelete[] = $pHashRow['phash'];
665  }
666  $where_clause = 'phash IN (' . implode(',', $GLOBALS['TYPO3_DB']->cleanIntArray($pHashesToDelete)) . ')';
667  $tables = [
668  'index_debug',
669  'index_fulltext',
670  'index_grlist',
671  'index_phash',
672  'index_rel',
673  'index_section',
674  ];
675  foreach ($tables as $table) {
676  $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, $where_clause);
677  }
678  }
679  }
680 
681  /*************************
682  *
683  * Hook functions for TCEmain (indexing of records)
684  *
685  *************************/
696  public function processCmdmap_preProcess($command, $table, $id, $value, $pObj)
697  {
698  // Clean up the index
699  if ($command === 'delete' && $table === 'pages') {
700  $this->deleteFromIndex($id);
701  }
702  }
703 
714  public function processDatamap_afterDatabaseOperations($status, $table, $id, $fieldArray, $pObj)
715  {
716  // Check if any fields are actually updated:
717  if (!empty($fieldArray)) {
718  // Translate new ids.
719  if ($status === 'new') {
720  $id = $pObj->substNEWwithIDs[$id];
721  } elseif ($table === 'pages' && $status === 'update' && (array_key_exists('hidden', $fieldArray) && $fieldArray['hidden'] == 1 || array_key_exists('no_search', $fieldArray) && $fieldArray['no_search'] == 1)) {
722  // If the page should be hidden or not indexed after update, delete index for this page
723  $this->deleteFromIndex($id);
724  }
725  // Get full record and if exists, search for indexing configurations:
726  $currentRecord = BackendUtility::getRecord($table, $id);
727  if (is_array($currentRecord)) {
728  // Select all (not running) indexing configurations of type "record" (1) and which points to this table and is located on the same page as the record or pointing to the right source PID
729  $indexingConfigurations = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('*', 'index_config', 'hidden=0
730  AND (starttime=0 OR starttime<=' . $GLOBALS['EXEC_TIME'] . ')
731  AND set_id=0
732  AND type=1
733  AND table2index=' . $GLOBALS['TYPO3_DB']->fullQuoteStr($table, 'index_config') . '
734  AND (
735  (alternative_source_pid=0 AND pid=' . (int)$currentRecord['pid'] . ')
736  OR (alternative_source_pid=' . (int)$currentRecord['pid'] . ')
737  )
738  AND records_indexonchange=1
739  ' . BackendUtility::deleteClause('index_config'));
740  foreach ($indexingConfigurations as $cfgRec) {
741  $this->indexSingleRecord($currentRecord, $cfgRec);
742  }
743  }
744  }
745  }
746 }
indexExtUrl($url, $pageId, $rl, $cfgUid, $setId)
static isFirstPartOfStr($str, $partStr)
static forceIntegerInRange($theInt, $min, $max=2000000000, $defaultValue=0)
Definition: MathUtility.php:31
static BEenableFields($table, $inv=false)
crawler_execute_type2($cfgRec, &$session_data, $params, &$pObj)
static trimExplode($delim, $string, $removeEmptyValues=false, $limit=0)
crawler_execute_type4($cfgRec, &$session_data, $params, &$pObj)
static getAllFilesAndFoldersInPath(array $fileArr, $path, $extList='', $regDirs=false, $recursivityLevels=99, $excludePattern='')
processCmdmap_preProcess($command, $table, $id, $value, $pObj)
processDatamap_afterDatabaseOperations($status, $table, $id, $fieldArray, $pObj)
static getFileAbsFileName($filename, $onlyRelative=true, $relToTYPO3_mainDir=false)
indexSingleRecord($r, $cfgRec, $rl=null)
crawler_execute_type1($cfgRec, &$session_data, $params, &$pObj)
static getRecord($table, $uid, $fields=' *', $where='', $useDeleteClause=true)
if(TYPO3_MODE==='BE') $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_tsfebeuserauth.php']['frontendEditingController']['default']
static removePrefixPathFromList(array $fileArr, $prefixToRemove)
static deleteClause($table, $tableAlias='')
crawler_execute_type3($cfgRec, &$session_data, $params, &$pObj)