TYPO3 CMS  TYPO3_6-2
CrawlerHook.php
Go to the documentation of this file.
1 <?php
3 
19 
25 class CrawlerHook {
26 
27  // Static:
32 
33  // Number of seconds to use as interval between queued indexing operations of URLs / files (types 2 & 3)
34  // Internal, dynamic:
38  public $instanceCounter = 0;
39 
40  // Counts up for each added URL (type 3)
41  // Internal, static:
45  public $callBack = '&TYPO3\\CMS\\IndexedSearch\\Hook\\CrawlerHook';
46 
47  // The object reference to this class.
57  public function crawler_init(&$pObj) {
58  // Select all indexing configuration which are waiting to be activated:
59  $indexingConfigurations = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('*', 'index_config', 'hidden=0
60  AND (starttime=0 OR starttime<=' . $GLOBALS['EXEC_TIME'] . ')
61  AND timer_next_indexing<' . $GLOBALS['EXEC_TIME'] . '
62  AND set_id=0
63  ' . BackendUtility::deleteClause('index_config'));
64  // For each configuration, check if it should be executed and if so, start:
65  foreach ($indexingConfigurations as $cfgRec) {
66  // Generate a unique set-ID:
67  $setId = GeneralUtility::md5int(microtime());
68  // Get next time:
69  $nextTime = $this->generateNextIndexingTime($cfgRec);
70  // Start process by updating index-config record:
71  $field_array = array(
72  'set_id' => $setId,
73  'timer_next_indexing' => $nextTime,
74  'session_data' => ''
75  );
76  $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_config', 'uid=' . (int)$cfgRec['uid'], $field_array);
77  // Based on configuration type:
78  switch ($cfgRec['type']) {
79  case 1:
80  // RECORDS:
81  // Parameters:
82  $params = array(
83  'indexConfigUid' => $cfgRec['uid'],
84  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
85  'url' => 'Records (start)'
86  );
87  //
88  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
89  break;
90  case 2:
91  // FILES:
92  // Parameters:
93  $params = array(
94  'indexConfigUid' => $cfgRec['uid'],
95  // General
96  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
97  // General
98  'url' => $cfgRec['filepath'],
99  // Partly general... (for URL and file types)
100  'depth' => 0
101  );
102  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
103  break;
104  case 3:
105  // External URL:
106  // Parameters:
107  $params = array(
108  'indexConfigUid' => $cfgRec['uid'],
109  // General
110  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
111  // General
112  'url' => $cfgRec['externalUrl'],
113  // Partly general... (for URL and file types)
114  'depth' => 0
115  );
116  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
117  break;
118  case 4:
119  // Page tree
120  // Parameters:
121  $params = array(
122  'indexConfigUid' => $cfgRec['uid'],
123  // General
124  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
125  // General
126  'url' => (int)$cfgRec['alternative_source_pid'],
127  // Partly general... (for URL and file types and page tree (root))
128  'depth' => 0
129  );
130  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
131  break;
132  case 5:
133  // Meta configuration, nothing to do:
134  // NOOP
135  break;
136  default:
137  if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]) {
138  $hookObj = GeneralUtility::getUserObj($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]);
139  if (is_object($hookObj)) {
140  // Parameters:
141  $params = array(
142  'indexConfigUid' => $cfgRec['uid'],
143  // General
144  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . '/CUSTOM]'),
145  // General
146  'url' => $hookObj->initMessage($message)
147  );
148  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
149  }
150  }
151  }
152  }
153  // Finally, look up all old index configurations which are finished and needs to be reset and done.
155  }
156 
165  public function crawler_execute($params, &$pObj) {
166  // Indexer configuration ID must exist:
167  if ($params['indexConfigUid']) {
168  // Load the indexing configuration record:
169  $cfgRec = $GLOBALS['TYPO3_DB']->exec_SELECTgetSingleRow('*', 'index_config', 'uid=' . (int)$params['indexConfigUid']);
170  if (is_array($cfgRec)) {
171  // Unpack session data:
172  $session_data = unserialize($cfgRec['session_data']);
173  // Select which type:
174  switch ($cfgRec['type']) {
175  case 1:
176  // Records:
177  $this->crawler_execute_type1($cfgRec, $session_data, $params, $pObj);
178  break;
179  case 2:
180  // Files
181  $this->crawler_execute_type2($cfgRec, $session_data, $params, $pObj);
182  break;
183  case 3:
184  // External URL:
185  $this->crawler_execute_type3($cfgRec, $session_data, $params, $pObj);
186  break;
187  case 4:
188  // Page tree:
189  $this->crawler_execute_type4($cfgRec, $session_data, $params, $pObj);
190  break;
191  case 5:
192  // Meta
193  // NOOP (should never enter here!)
194  break;
195  default:
196  if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]) {
197  $hookObj = GeneralUtility::getUserObj($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]);
198  if (is_object($hookObj)) {
199  $this->pObj = $pObj;
200  // For addQueueEntryForHook()
201  $hookObj->indexOperation($cfgRec, $session_data, $params, $this);
202  }
203  }
204  }
205  // Save process data which might be modified:
206  $field_array = array(
207  'session_data' => serialize($session_data)
208  );
209  $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_config', 'uid=' . (int)$cfgRec['uid'], $field_array);
210  }
211  }
212  return array('log' => $params);
213  }
214 
225  public function crawler_execute_type1($cfgRec, &$session_data, $params, &$pObj) {
226  if ($cfgRec['table2index'] && isset($GLOBALS['TCA'][$cfgRec['table2index']])) {
227  // Init session data array if not already:
228  if (!is_array($session_data)) {
229  $session_data = array(
230  'uid' => 0
231  );
232  }
233  // Init:
234  $pid = (int)$cfgRec['alternative_source_pid'] ?: $cfgRec['pid'];
235  $numberOfRecords = $cfgRec['recordsbatch'] ? \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cfgRec['recordsbatch'], 1) : 100;
236  // Get root line:
237  $rl = $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
238  // Select
239  $recs = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('*', $cfgRec['table2index'], 'pid = ' . $pid . '
240  AND uid > ' . (int)$session_data['uid'] . BackendUtility::deleteClause($cfgRec['table2index']) . BackendUtility::BEenableFields($cfgRec['table2index']), '', 'uid', $numberOfRecords);
241  // Traverse:
242  if (count($recs)) {
243  foreach ($recs as $r) {
244  // Index single record:
245  $this->indexSingleRecord($r, $cfgRec, $rl);
246  // Update the UID we last processed:
247  $session_data['uid'] = $r['uid'];
248  }
249  // Finally, set entry for next indexing of batch of records:
250  $nparams = array(
251  'indexConfigUid' => $cfgRec['uid'],
252  'url' => 'Records from UID#' . ($r['uid'] + 1) . '-?',
253  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']')
254  );
255  $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid']);
256  }
257  }
258  }
259 
270  public function crawler_execute_type2($cfgRec, &$session_data, $params, &$pObj) {
271  // Prepare path, making it absolute and checking:
272  $readpath = $params['url'];
273  if (!GeneralUtility::isAbsPath($readpath)) {
274  $readpath = GeneralUtility::getFileAbsFileName($readpath);
275  }
276  if (GeneralUtility::isAllowedAbsPath($readpath)) {
277  if (@is_file($readpath)) {
278  // If file, index it!
279  // Get root line (need to provide this when indexing external files)
280  $rl = $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
281  // (Re)-Indexing file on page.
282  $indexerObj = GeneralUtility::makeInstance('TYPO3\\CMS\\IndexedSearch\\Indexer');
283  $indexerObj->backend_initIndexer($cfgRec['pid'], 0, 0, '', $rl);
284  $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
285  $indexerObj->hash['phash'] = -1;
286  // EXPERIMENT - but to avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
287  // Index document:
288  $indexerObj->indexRegularDocument(\TYPO3\CMS\Core\Utility\PathUtility::stripPathSitePrefix($readpath), TRUE);
289  } elseif (@is_dir($readpath)) {
290  // If dir, read content and create new pending items for log:
291  // Select files and directories in path:
292  $extList = implode(',', GeneralUtility::trimExplode(',', $cfgRec['extensions'], TRUE));
293  $fileArr = array();
294  $files = GeneralUtility::getAllFilesAndFoldersInPath($fileArr, $readpath, $extList, 0, 0);
295  $directoryList = GeneralUtility::get_dirs($readpath);
296  if (is_array($directoryList) && $params['depth'] < $cfgRec['depth']) {
297  foreach ($directoryList as $subdir) {
298  if ((string) $subdir != '') {
299  $files[] = $readpath . $subdir . '/';
300  }
301  }
302  }
303  $files = GeneralUtility::removePrefixPathFromList($files, PATH_site);
304  // traverse the items and create log entries:
305  foreach ($files as $path) {
306  $this->instanceCounter++;
307  if ($path !== $params['url']) {
308  // Parameters:
309  $nparams = array(
310  'indexConfigUid' => $cfgRec['uid'],
311  'url' => $path,
312  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
313  'depth' => $params['depth'] + 1
314  );
315  $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid'], $GLOBALS['EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
316  }
317  }
318  }
319  }
320  }
321 
332  public function crawler_execute_type3($cfgRec, &$session_data, $params, &$pObj) {
333  // Init session data array if not already:
334  if (!is_array($session_data)) {
335  $session_data = array(
336  'urlLog' => array($params['url'])
337  );
338  }
339  // Index the URL:
340  $rl = $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
341  $subUrls = $this->indexExtUrl($params['url'], $cfgRec['pid'], $rl, $cfgRec['uid'], $cfgRec['set_id']);
342  // Add more elements to log now:
343  if ($params['depth'] < $cfgRec['depth']) {
344  foreach ($subUrls as $url) {
345  if ($url = $this->checkUrl($url, $session_data['urlLog'], $cfgRec['externalUrl'])) {
346  if (!$this->checkDeniedSuburls($url, $cfgRec['url_deny'])) {
347  $this->instanceCounter++;
348  $session_data['urlLog'][] = $url;
349  // Parameters:
350  $nparams = array(
351  'indexConfigUid' => $cfgRec['uid'],
352  'url' => $url,
353  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
354  'depth' => $params['depth'] + 1
355  );
356  $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid'], $GLOBALS['EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
357  }
358  }
359  }
360  }
361  }
362 
373  public function crawler_execute_type4($cfgRec, &$session_data, $params, &$pObj) {
374  // Base page uid:
375  $pageUid = (int)$params['url'];
376  // Get array of URLs from page:
377  $pageRow = BackendUtility::getRecord('pages', $pageUid);
378  $res = $pObj->getUrlsForPageRow($pageRow);
379  $duplicateTrack = array();
380  // Registry for duplicates
381  $downloadUrls = array();
382  // Dummy.
383  // Submit URLs:
384  if (count($res)) {
385  foreach ($res as $paramSetKey => $vv) {
386  $urlList = $pObj->urlListFromUrlArray($vv, $pageRow, $GLOBALS['EXEC_TIME'], 30, 1, 0, $duplicateTrack, $downloadUrls, array('tx_indexedsearch_reindex'));
387  }
388  }
389  // Add subpages to log now:
390  if ($params['depth'] < $cfgRec['depth']) {
391  // Subpages selected
392  $recs = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('uid,title', 'pages', 'pid = ' . $pageUid . BackendUtility::deleteClause('pages'));
393  // Traverse subpages and add to queue:
394  if (count($recs)) {
395  foreach ($recs as $r) {
396  $this->instanceCounter++;
397  $url = 'pages:' . $r['uid'] . ': ' . $r['title'];
398  $session_data['urlLog'][] = $url;
399  // Parameters:
400  $nparams = array(
401  'indexConfigUid' => $cfgRec['uid'],
402  'url' => $r['uid'],
403  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
404  'depth' => $params['depth'] + 1
405  );
406  $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid'], $GLOBALS['EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
407  }
408  }
409  }
410  }
411 
419  // Lookup running index configurations:
420  $runningIndexingConfigurations = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('uid,set_id', 'index_config', 'set_id<>0' . BackendUtility::deleteClause('index_config'));
421  // For each running configuration, look up how many log entries there are which are scheduled for execution and if none, clear the "set_id" (means; Processing was DONE)
422  foreach ($runningIndexingConfigurations as $cfgRec) {
423  // Look for ended processes:
424  $queued_items = $GLOBALS['TYPO3_DB']->exec_SELECTcountRows('*', 'tx_crawler_queue', 'set_id=' . (int)$cfgRec['set_id'] . ' AND exec_time=0');
425  if (!$queued_items) {
426  // Lookup old phash rows:
427  $oldPhashRows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('phash', 'index_phash', 'freeIndexUid=' . (int)$cfgRec['uid'] . ' AND freeIndexSetId<>' . (int)$cfgRec['set_id']);
428  foreach ($oldPhashRows as $pHashRow) {
429  // Removing old registrations for all tables (code copied from class.tx_indexedsearch_modfunc1.php)
430  $tableArr = explode(',', 'index_phash,index_rel,index_section,index_grlist,index_fulltext,index_debug');
431  foreach ($tableArr as $table) {
432  $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash=' . (int)$pHashRow['phash']);
433  }
434  }
435  // End process by updating index-config record:
436  $field_array = array(
437  'set_id' => 0,
438  'session_data' => ''
439  );
440  $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_config', 'uid=' . (int)$cfgRec['uid'], $field_array);
441  }
442  }
443  }
444 
445  /*****************************************
446  *
447  * Helper functions
448  *
449  *****************************************/
459  public function checkUrl($url, $urlLog, $baseUrl) {
460  $url = preg_replace('/\\/\\/$/', '/', $url);
461  list($url) = explode('#', $url);
462  if (!strstr($url, '../')) {
463  if (GeneralUtility::isFirstPartOfStr($url, $baseUrl)) {
464  if (!in_array($url, $urlLog)) {
465  return $url;
466  }
467  }
468  }
469  }
470 
482  public function indexExtUrl($url, $pageId, $rl, $cfgUid, $setId) {
483  // Index external URL:
484  $indexerObj = GeneralUtility::makeInstance('TYPO3\\CMS\\IndexedSearch\\Indexer');
485  $indexerObj->backend_initIndexer($pageId, 0, 0, '', $rl);
486  $indexerObj->backend_setFreeIndexUid($cfgUid, $setId);
487  $indexerObj->hash['phash'] = -1;
488  // To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
489  $indexerObj->indexExternalUrl($url);
490  $url_qParts = parse_url($url);
491  $baseAbsoluteHref = $url_qParts['scheme'] . '://' . $url_qParts['host'];
492  $baseHref = $indexerObj->extractBaseHref($indexerObj->indexExternalUrl_content);
493  if (!$baseHref) {
494  // Extract base href from current URL
495  $baseHref = $baseAbsoluteHref;
496  $baseHref .= substr($url_qParts['path'], 0, strrpos($url_qParts['path'], '/'));
497  }
498  $baseHref = rtrim($baseHref, '/');
499  // Get URLs on this page:
500  $subUrls = array();
501  $list = $indexerObj->extractHyperLinks($indexerObj->indexExternalUrl_content);
502  // Traverse links:
503  foreach ($list as $count => $linkInfo) {
504  // Decode entities:
505  $subUrl = htmlspecialchars_decode($linkInfo['href']);
506  $qParts = parse_url($subUrl);
507  if (!$qParts['scheme']) {
508  $relativeUrl = GeneralUtility::resolveBackPath($subUrl);
509  if ($relativeUrl[0] === '/') {
510  $subUrl = $baseAbsoluteHref . $relativeUrl;
511  } else {
512  $subUrl = $baseHref . '/' . $relativeUrl;
513  }
514  }
515  $subUrls[] = $subUrl;
516  }
517  return $subUrls;
518  }
519 
529  public function indexSingleRecord($r, $cfgRec, $rl = NULL) {
530  // Init:
531  $rl = is_array($rl) ? $rl : $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
532  $fieldList = GeneralUtility::trimExplode(',', $cfgRec['fieldlist'], TRUE);
533  $languageField = $GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['languageField'];
534  $sys_language_uid = $languageField ? $r[$languageField] : 0;
535  // (Re)-Indexing a row from a table:
536  $indexerObj = GeneralUtility::makeInstance('TYPO3\\CMS\\IndexedSearch\\Indexer');
537  parse_str(str_replace('###UID###', $r['uid'], $cfgRec['get_params']), $GETparams);
538  $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl, $GETparams, $cfgRec['chashcalc'] ? TRUE : FALSE);
539  $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
540  $indexerObj->forceIndexing = TRUE;
541  $theContent = '';
542  foreach ($fieldList as $k => $v) {
543  if (!$k) {
544  $theTitle = $r[$v];
545  } else {
546  $theContent .= $r[$v] . ' ';
547  }
548  }
549  // Indexing the record as a page (but with parameters set, see ->backend_setFreeIndexUid())
550  $indexerObj->backend_indexAsTYPO3Page(strip_tags(str_replace('<', ' <', $theTitle)), '', '', strip_tags(str_replace('<', ' <', $theContent)), $GLOBALS['LANG']->charSet, $r[$GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['tstamp']], $r[$GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['crdate']], $r['uid']);
551  }
552 
560  public function loadIndexerClass() {
562  }
563 
572  public function getUidRootLineForClosestTemplate($id) {
573  global $TYPO3_CONF_VARS;
574  $tmpl = GeneralUtility::makeInstance('TYPO3\\CMS\\Core\\TypoScript\\ExtendedTemplateService');
575  $tmpl->tt_track = 0;
576  // Do not log time-performance information
577  $tmpl->init();
578  // Gets the rootLine
579  $sys_page = GeneralUtility::makeInstance('TYPO3\\CMS\\Frontend\\Page\\PageRepository');
580  $rootLine = $sys_page->getRootLine($id);
581  // This generates the constants/config + hierarchy info for the template.
582  $tmpl->runThroughTemplates($rootLine, 0);
583  // Root line uids
584  $rootline_uids = array();
585  foreach ($tmpl->rootLine as $rlkey => $rldat) {
586  $rootline_uids[$rlkey] = $rldat['uid'];
587  }
588  return $rootline_uids;
589  }
590 
598  public function generateNextIndexingTime($cfgRec) {
599  $currentTime = $GLOBALS['EXEC_TIME'];
600  // Now, find a midnight time to use for offset calculation. This has to differ depending on whether we have frequencies within a day or more than a day; Less than a day, we don't care which day to use for offset, more than a day we want to respect the currently entered day as offset regardless of when the script is run - thus the day-of-week used in case "Weekly" is selected will be respected
601  if ($cfgRec['timer_frequency'] <= 24 * 3600) {
602  $aMidNight = mktime(0, 0, 0) - 1 * 24 * 3600;
603  } else {
604  $lastTime = $cfgRec['timer_next_indexing'] ?: $GLOBALS['EXEC_TIME'];
605  $aMidNight = mktime(0, 0, 0, date('m', $lastTime), date('d', $lastTime), date('y', $lastTime));
606  }
607  // Find last offset time plus frequency in seconds:
608  $lastSureOffset = $aMidNight + \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cfgRec['timer_offset'], 0, 86400);
609  $frequencySeconds = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cfgRec['timer_frequency'], 1);
610  // Now, find out how many blocks of the length of frequency there is until the next time:
611  $frequencyBlocksUntilNextTime = ceil(($currentTime - $lastSureOffset) / $frequencySeconds);
612  // Set next time to the offset + the frequencyblocks multiplied with the frequency length in seconds.
613  $nextTime = $lastSureOffset + $frequencyBlocksUntilNextTime * $frequencySeconds;
614  return $nextTime;
615  }
616 
625  public function checkDeniedSuburls($url, $url_deny) {
626  if (trim($url_deny)) {
627  $url_denyArray = GeneralUtility::trimExplode(LF, $url_deny, TRUE);
628  foreach ($url_denyArray as $testurl) {
629  if (GeneralUtility::isFirstPartOfStr($url, $testurl)) {
630  return TRUE;
631  }
632  }
633  }
634  return FALSE;
635  }
636 
645  public function addQueueEntryForHook($cfgRec, $title) {
646  $nparams = array(
647  'indexConfigUid' => $cfgRec['uid'],
648  // This must ALWAYS be the cfgRec uid!
649  'url' => $title,
650  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']')
651  );
652  $this->pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid']);
653  }
654 
662  public function deleteFromIndex($id) {
663  // Lookup old phash rows:
664  $oldPhashRows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('phash', 'index_section', 'page_id=' . (int)$id);
665  if (count($oldPhashRows)) {
666  $pHashesToDelete = array();
667  foreach ($oldPhashRows as $pHashRow) {
668  $pHashesToDelete[] = $pHashRow['phash'];
669  }
670  $where_clause = 'phash IN (' . implode(',', $GLOBALS['TYPO3_DB']->cleanIntArray($pHashesToDelete)) . ')';
671  $tables = explode(',', 'index_debug,index_fulltext,index_grlist,index_phash,index_rel,index_section');
672  foreach ($tables as $table) {
673  $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, $where_clause);
674  }
675  }
676  }
677 
678  /*************************
679  *
680  * Hook functions for TCEmain (indexing of records)
681  *
682  *************************/
694  public function processCmdmap_preProcess($command, $table, $id, $value, $pObj) {
695  // Clean up the index
696  if ($command == 'delete' && $table == 'pages') {
697  $this->deleteFromIndex($id);
698  }
699  }
700 
712  public function processDatamap_afterDatabaseOperations($status, $table, $id, $fieldArray, $pObj) {
713  // Check if any fields are actually updated:
714  if (count($fieldArray)) {
715  // Translate new ids.
716  if ($status == 'new') {
717  $id = $pObj->substNEWwithIDs[$id];
718  } elseif ($table == 'pages' && $status == 'update' && (array_key_exists('hidden', $fieldArray) && $fieldArray['hidden'] == 1 || array_key_exists('no_search', $fieldArray) && $fieldArray['no_search'] == 1)) {
719  // If the page should be hidden or not indexed after update, delete index for this page
720  $this->deleteFromIndex($id);
721  }
722  // Get full record and if exists, search for indexing configurations:
723  $currentRecord = BackendUtility::getRecord($table, $id);
724  if (is_array($currentRecord)) {
725  // Select all (not running) indexing configurations of type "record" (1) and which points to this table and is located on the same page as the record or pointing to the right source PID
726  $indexingConfigurations = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('*', 'index_config', 'hidden=0
727  AND (starttime=0 OR starttime<=' . $GLOBALS['EXEC_TIME'] . ')
728  AND set_id=0
729  AND type=1
730  AND table2index=' . $GLOBALS['TYPO3_DB']->fullQuoteStr($table, 'index_config') . '
731  AND (
732  (alternative_source_pid=0 AND pid=' . (int)$currentRecord['pid'] . ')
733  OR (alternative_source_pid=' . (int)$currentRecord['pid'] . ')
734  )
735  AND records_indexonchange=1
736  ' . BackendUtility::deleteClause('index_config'));
737  foreach ($indexingConfigurations as $cfgRec) {
738  $this->indexSingleRecord($currentRecord, $cfgRec);
739  }
740  }
741  }
742  }
743 
744 }
indexSingleRecord($r, $cfgRec, $rl=NULL)
indexExtUrl($url, $pageId, $rl, $cfgUid, $setId)
$TYPO3_CONF_VARS['SYS']['contentTable']
static getAllFilesAndFoldersInPath(array $fileArr, $path, $extList='', $regDirs=FALSE, $recursivityLevels=99, $excludePattern='')
static isFirstPartOfStr($str, $partStr)
static forceIntegerInRange($theInt, $min, $max=2000000000, $defaultValue=0)
Definition: MathUtility.php:32
crawler_execute_type2($cfgRec, &$session_data, $params, &$pObj)
static getUserObj($classRef, $checkPrefix='', $silent=FALSE)
static trimExplode($delim, $string, $removeEmptyValues=FALSE, $limit=0)
crawler_execute_type4($cfgRec, &$session_data, $params, &$pObj)
processCmdmap_preProcess($command, $table, $id, $value, $pObj)
processDatamap_afterDatabaseOperations($status, $table, $id, $fieldArray, $pObj)
if(!defined('TYPO3_MODE')) $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_userauth.php']['logoff_pre_processing'][]
crawler_execute_type1($cfgRec, &$session_data, $params, &$pObj)
static getFileAbsFileName($filename, $onlyRelative=TRUE, $relToTYPO3_mainDir=FALSE)
static removePrefixPathFromList(array $fileArr, $prefixToRemove)
static deleteClause($table, $tableAlias='')
crawler_execute_type3($cfgRec, &$session_data, $params, &$pObj)