TYPO3 CMS  TYPO3_8-7
CrawlerHook.php
Go to the documentation of this file.
1 <?php
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
24 
29 {
36 
42  public $instanceCounter = 0;
43 
47  public $callBack = self::class;
48 
52  public function __construct()
53  {
54  // To make sure the backend charset is available:
55  if (!is_object($GLOBALS['LANG'])) {
56  $GLOBALS['LANG'] = GeneralUtility::makeInstance(\TYPO3\CMS\Lang\LanguageService::class);
57  $GLOBALS['LANG']->init($GLOBALS['BE_USER']->uc['lang']);
58  }
59  }
60 
68  public function crawler_init(&$pObj)
69  {
70  // Select all indexing configuration which are waiting to be activated:
71  $connection = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable('index_config');
72  $queryBuilder = $connection->createQueryBuilder();
73 
74  $result = $queryBuilder->select('*')
75  ->from('index_config')
76  ->where(
77  $queryBuilder->expr()->lt(
78  'timer_next_indexing',
79  $queryBuilder->createNamedParameter($GLOBALS['EXEC_TIME'], \PDO::PARAM_INT)
80  ),
81  $queryBuilder->expr()->eq('set_id', $queryBuilder->createNamedParameter(0, \PDO::PARAM_INT))
82  )
83  ->execute();
84 
85  // For each configuration, check if it should be executed and if so, start:
86  while ($cfgRec = $result->fetch()) {
87  // Generate a unique set-ID:
88  $setId = GeneralUtility::md5int(microtime());
89  // Get next time:
90  $nextTime = $this->generateNextIndexingTime($cfgRec);
91  // Start process by updating index-config record:
92  $connection->update(
93  'index_config',
94  [
95  'set_id' => $setId,
96  'timer_next_indexing' => $nextTime,
97  'session_data' => ''
98  ],
99  [
100  'uid' => (int)$cfgRec['uid']
101  ]
102  );
103  // Based on configuration type:
104  switch ($cfgRec['type']) {
105  case 1:
106  // RECORDS:
107  // Parameters:
108  $params = [
109  'indexConfigUid' => $cfgRec['uid'],
110  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . ']'],
111  'url' => 'Records (start)'
112  ];
113  //
114  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
115  break;
116  case 2:
117  // FILES:
118  // Parameters:
119  $params = [
120  'indexConfigUid' => $cfgRec['uid'],
121  // General
122  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . ']'],
123  // General
124  'url' => $cfgRec['filepath'],
125  // Partly general... (for URL and file types)
126  'depth' => 0
127  ];
128  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
129  break;
130  case 3:
131  // External URL:
132  // Parameters:
133  $params = [
134  'indexConfigUid' => $cfgRec['uid'],
135  // General
136  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . ']'],
137  // General
138  'url' => $cfgRec['externalUrl'],
139  // Partly general... (for URL and file types)
140  'depth' => 0
141  ];
142  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
143  break;
144  case 4:
145  // Page tree
146  // Parameters:
147  $params = [
148  'indexConfigUid' => $cfgRec['uid'],
149  // General
150  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . ']'],
151  // General
152  'url' => (int)$cfgRec['alternative_source_pid'],
153  // Partly general... (for URL and file types and page tree (root))
154  'depth' => 0
155  ];
156  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
157  break;
158  case 5:
159  // Meta configuration, nothing to do:
160  // NOOP
161  break;
162  default:
163  if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]) {
164  $hookObj = GeneralUtility::getUserObj($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]);
165  if (is_object($hookObj)) {
166  // Parameters:
167  $params = [
168  'indexConfigUid' => $cfgRec['uid'],
169  // General
170  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . '/CUSTOM]'],
171  // General
172  'url' => $hookObj->initMessage($message)
173  ];
174  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
175  }
176  }
177  }
178  }
179  // Finally, look up all old index configurations which are finished and needs to be reset and done.
181  }
182 
190  public function crawler_execute($params, &$pObj)
191  {
192  // Indexer configuration ID must exist:
193  if ($params['indexConfigUid']) {
194  $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
195  ->getQueryBuilderForTable('index_config');
196  $queryBuilder->getRestrictions()->removeAll();
197  // Load the indexing configuration record:
198  $cfgRec = $queryBuilder
199  ->select('*')
200  ->from('index_config')
201  ->where(
202  $queryBuilder->expr()->eq(
203  'uid',
204  $queryBuilder->createNamedParameter($params['indexConfigUid'], \PDO::PARAM_INT)
205  )
206  )
207  ->execute()
208  ->fetch();
209  if (is_array($cfgRec)) {
210  // Unpack session data:
211  $session_data = unserialize($cfgRec['session_data']);
212  // Select which type:
213  switch ($cfgRec['type']) {
214  case 1:
215  // Records:
216  $this->crawler_execute_type1($cfgRec, $session_data, $params, $pObj);
217  break;
218  case 2:
219  // Files
220  $this->crawler_execute_type2($cfgRec, $session_data, $params, $pObj);
221  break;
222  case 3:
223  // External URL:
224  $this->crawler_execute_type3($cfgRec, $session_data, $params, $pObj);
225  break;
226  case 4:
227  // Page tree:
228  $this->crawler_execute_type4($cfgRec, $session_data, $params, $pObj);
229  break;
230  case 5:
231  // Meta
232  // NOOP (should never enter here!)
233  break;
234  default:
235  if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]) {
236  $hookObj = GeneralUtility::getUserObj($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]);
237  if (is_object($hookObj)) {
238  $this->pObj = $pObj;
239  // For addQueueEntryForHook()
240  $hookObj->indexOperation($cfgRec, $session_data, $params, $this);
241  }
242  }
243  }
244  // Save process data which might be modified:
245  GeneralUtility::makeInstance(ConnectionPool::class)
246  ->getConnectionForTable('index_config')
247  ->update(
248  'index_config',
249  ['session_data' => serialize($session_data)],
250  ['uid' => (int)$cfgRec['uid']]
251  );
252  }
253  }
254  return ['log' => $params];
255  }
256 
265  public function crawler_execute_type1($cfgRec, &$session_data, $params, &$pObj)
266  {
267  if ($cfgRec['table2index'] && isset($GLOBALS['TCA'][$cfgRec['table2index']])) {
268  // Init session data array if not already:
269  if (!is_array($session_data)) {
270  $session_data = [
271  'uid' => 0
272  ];
273  }
274  // Init:
275  $pid = (int)$cfgRec['alternative_source_pid'] ?: $cfgRec['pid'];
276  $numberOfRecords = $cfgRec['recordsbatch']
277  ? MathUtility::forceIntegerInRange($cfgRec['recordsbatch'], 1)
278  : 100;
279 
280  // Get root line:
281  $rootLine = $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
282  // Select
283  $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
284  ->getQueryBuilderForTable($cfgRec['table2index']);
285 
286  $result = $queryBuilder->select('*')
287  ->from($cfgRec['table2index'])
288  ->where(
289  $queryBuilder->expr()->eq(
290  'pid',
291  $queryBuilder->createNamedParameter($pid, \PDO::PARAM_INT)
292  ),
293  $queryBuilder->expr()->gt(
294  'uid',
295  $queryBuilder->createNamedParameter($session_data['uid'], \PDO::PARAM_INT)
296  )
297  )
298  ->setMaxResults($numberOfRecords)
299  ->orderBy('uid')
300  ->execute();
301 
302  // Traverse:
303  while ($row = $result->fetch()) {
304  // Index single record:
305  $this->indexSingleRecord($row, $cfgRec, $rootLine);
306  // Update the UID we last processed:
307  $session_data['uid'] = $row['uid'];
308  }
309 
310  // Finally, set entry for next indexing of batch of records:
311  if ($result->rowCount()) {
312  $nparams = [
313  'indexConfigUid' => $cfgRec['uid'],
314  'url' => 'Records from UID#' . ($session_data['uid'] + 1) . '-?',
315  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . ']']
316  ];
317  $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid']);
318  }
319  }
320  }
321 
330  public function crawler_execute_type2($cfgRec, &$session_data, $params, &$pObj)
331  {
332  // Prepare path, making it absolute and checking:
333  $readpath = $params['url'];
334  if (!GeneralUtility::isAbsPath($readpath)) {
335  $readpath = GeneralUtility::getFileAbsFileName($readpath);
336  }
337  if (GeneralUtility::isAllowedAbsPath($readpath)) {
338  if (@is_file($readpath)) {
339  // If file, index it!
340  // Get root line (need to provide this when indexing external files)
341  $rl = $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
342  // (Re)-Indexing file on page.
343  $indexerObj = GeneralUtility::makeInstance(\TYPO3\CMS\IndexedSearch\Indexer::class);
344  $indexerObj->backend_initIndexer($cfgRec['pid'], 0, 0, '', $rl);
345  $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
346  $indexerObj->hash['phash'] = -1;
347  // EXPERIMENT - but to avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
348  // Index document:
349  $indexerObj->indexRegularDocument(\TYPO3\CMS\Core\Utility\PathUtility::stripPathSitePrefix($readpath), true);
350  } elseif (@is_dir($readpath)) {
351  // If dir, read content and create new pending items for log:
352  // Select files and directories in path:
353  $extList = implode(',', GeneralUtility::trimExplode(',', $cfgRec['extensions'], true));
354  $fileArr = [];
355  $files = GeneralUtility::getAllFilesAndFoldersInPath($fileArr, $readpath, $extList, 0, 0);
356  $directoryList = GeneralUtility::get_dirs($readpath);
357  if (is_array($directoryList) && $params['depth'] < $cfgRec['depth']) {
358  foreach ($directoryList as $subdir) {
359  if ((string)$subdir != '') {
360  $files[] = $readpath . $subdir . '/';
361  }
362  }
363  }
364  $files = GeneralUtility::removePrefixPathFromList($files, PATH_site);
365  // traverse the items and create log entries:
366  foreach ($files as $path) {
367  $this->instanceCounter++;
368  if ($path !== $params['url']) {
369  // Parameters:
370  $nparams = [
371  'indexConfigUid' => $cfgRec['uid'],
372  'url' => $path,
373  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . ']'],
374  'depth' => $params['depth'] + 1
375  ];
376  $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid'], $GLOBALS['EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
377  }
378  }
379  }
380  }
381  }
382 
391  public function crawler_execute_type3($cfgRec, &$session_data, $params, &$pObj)
392  {
393  // Init session data array if not already:
394  if (!is_array($session_data)) {
395  $session_data = [
396  'urlLog' => [$params['url']]
397  ];
398  }
399  // Index the URL:
400  $rl = $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
401  $subUrls = $this->indexExtUrl($params['url'], $cfgRec['pid'], $rl, $cfgRec['uid'], $cfgRec['set_id']);
402  // Add more elements to log now:
403  if ($params['depth'] < $cfgRec['depth']) {
404  foreach ($subUrls as $url) {
405  if ($url = $this->checkUrl($url, $session_data['urlLog'], $cfgRec['externalUrl'])) {
406  if (!$this->checkDeniedSuburls($url, $cfgRec['url_deny'])) {
407  $this->instanceCounter++;
408  $session_data['urlLog'][] = $url;
409  // Parameters:
410  $nparams = [
411  'indexConfigUid' => $cfgRec['uid'],
412  'url' => $url,
413  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . ']'],
414  'depth' => $params['depth'] + 1
415  ];
416  $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid'], $GLOBALS['EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
417  }
418  }
419  }
420  }
421  }
422 
431  public function crawler_execute_type4($cfgRec, &$session_data, $params, &$pObj)
432  {
433  // Base page uid:
434  $pageUid = (int)$params['url'];
435  // Get array of URLs from page:
436  $pageRow = BackendUtility::getRecord('pages', $pageUid);
437  $res = $pObj->getUrlsForPageRow($pageRow);
438  $duplicateTrack = [];
439  // Registry for duplicates
440  $downloadUrls = [];
441  // Dummy.
442  // Submit URLs:
443  if (!empty($res)) {
444  foreach ($res as $paramSetKey => $vv) {
445  $pObj->urlListFromUrlArray($vv, $pageRow, $GLOBALS['EXEC_TIME'], 30, 1, 0, $duplicateTrack, $downloadUrls, ['tx_indexedsearch_reindex']);
446  }
447  }
448  // Add subpages to log now:
449  if ($params['depth'] < $cfgRec['depth']) {
450  // Subpages selected
451  $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('pages');
452  $queryBuilder->getRestrictions()
453  ->removeAll()
454  ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
455  $result = $queryBuilder->select('uid', 'title')
456  ->from('pages')
457  ->where(
458  $queryBuilder->expr()->eq(
459  'pid',
460  $queryBuilder->createNamedParameter($pageUid, \PDO::PARAM_INT)
461  )
462  )
463  ->execute();
464  // Traverse subpages and add to queue:
465  while ($row = $result->fetch()) {
466  $this->instanceCounter++;
467  $url = 'pages:' . $row['uid'] . ': ' . $row['title'];
468  $session_data['urlLog'][] = $url;
469  // Parameters:
470  $nparams = [
471  'indexConfigUid' => $cfgRec['uid'],
472  'url' => $row['uid'],
473  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . ']'],
474  'depth' => $params['depth'] + 1
475  ];
476  $pObj->addQueueEntry_callBack(
477  $cfgRec['set_id'],
478  $nparams,
479  $this->callBack,
480  $cfgRec['pid'],
481  $GLOBALS['EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl
482  );
483  }
484  }
485  }
486 
491  {
492  $connectionPool = GeneralUtility::makeInstance(ConnectionPool::class);
493  // List of tables that store information related to the phash value
494  $tablesToClean = [
495  'index_phash',
496  'index_rel',
497  'index_section',
498  'index_grlist',
499  'index_fulltext',
500  'index_debug'
501  ];
502 
503  $queryBuilder = $connectionPool->getQueryBuilderForTable('index_config');
504  $queryBuilder->getRestrictions()
505  ->removeAll()
506  ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
507 
508  // Lookup running index configurations:
509  $runningIndexingConfigurations = $queryBuilder->select('*')
510  ->from('index_config')
511  ->where($queryBuilder->expr()->neq('set_id', $queryBuilder->createNamedParameter(0, \PDO::PARAM_INT)))
512  ->execute()
513  ->fetchAll();
514  // For each running configuration, look up how many log entries there are which are scheduled
515  // for execution and if none, clear the "set_id" (means; Processing was DONE)
516  foreach ($runningIndexingConfigurations as $cfgRec) {
517  // Look for ended processes:
518  $queued_items = $connectionPool->getConnectionForTable('tx_crawler_queue')
519  ->count(
520  '*',
521  'tx_crawler_queue',
522  [
523  'set_id' => (int)$cfgRec['set_id'],
524  'exec_time' => 0
525  ]
526  );
527  if (!$queued_items) {
528  // Lookup old phash rows:
529  $queryBuilder = $connectionPool->getQueryBuilderForTable('index_phash');
530  $oldPhashRows = $queryBuilder
531  ->select('phash')
532  ->from('index_phash')
533  ->where(
534  $queryBuilder->expr()->eq(
535  'freeIndexUid',
536  $queryBuilder->createNamedParameter($cfgRec['uid'], \PDO::PARAM_INT)
537  ),
538  $queryBuilder->expr()->neq(
539  'freeIndexSetId',
540  $queryBuilder->createNamedParameter($cfgRec['set_id'], \PDO::PARAM_INT)
541  )
542  )
543  ->execute()
544  ->fetchAll();
545 
546  // Removing old registrations for all tables
547  foreach ($tablesToClean as $table) {
548  $queryBuilder = $connectionPool->getQueryBuilderForTable($table);
549  $queryBuilder->delete($table)
550  ->where(
551  $queryBuilder->expr()->in(
552  'phash',
553  $queryBuilder->createNamedParameter(
554  array_column($oldPhashRows, 'phash'),
555  Connection::PARAM_INT_ARRAY
556  )
557  )
558  )
559  ->execute();
560  }
561 
562  // End process by updating index-config record:
563  $connectionPool->getConnectionForTable('index_config')
564  ->update(
565  'index_config',
566  [
567  'set_id' => 0,
568  'session_data' => ''
569  ],
570  ['uid' => (int)$cfgRec['uid']]
571  );
572  }
573  }
574  }
575 
576  /*****************************************
577  *
578  * Helper functions
579  *
580  *****************************************/
589  public function checkUrl($url, $urlLog, $baseUrl)
590  {
591  $url = preg_replace('/\\/\\/$/', '/', $url);
592  list($url) = explode('#', $url);
593  if (!strstr($url, '../')) {
594  if (GeneralUtility::isFirstPartOfStr($url, $baseUrl)) {
595  if (!in_array($url, $urlLog)) {
596  return $url;
597  }
598  }
599  }
600  }
601 
612  public function indexExtUrl($url, $pageId, $rl, $cfgUid, $setId)
613  {
614  // Index external URL:
615  $indexerObj = GeneralUtility::makeInstance(\TYPO3\CMS\IndexedSearch\Indexer::class);
616  $indexerObj->backend_initIndexer($pageId, 0, 0, '', $rl);
617  $indexerObj->backend_setFreeIndexUid($cfgUid, $setId);
618  $indexerObj->hash['phash'] = -1;
619  // To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
620  $indexerObj->indexExternalUrl($url);
621  $url_qParts = parse_url($url);
622  $baseAbsoluteHref = $url_qParts['scheme'] . '://' . $url_qParts['host'];
623  $baseHref = $indexerObj->extractBaseHref($indexerObj->indexExternalUrl_content);
624  if (!$baseHref) {
625  // Extract base href from current URL
626  $baseHref = $baseAbsoluteHref;
627  $baseHref .= substr($url_qParts['path'], 0, strrpos($url_qParts['path'], '/'));
628  }
629  $baseHref = rtrim($baseHref, '/');
630  // Get URLs on this page:
631  $subUrls = [];
632  $list = $indexerObj->extractHyperLinks($indexerObj->indexExternalUrl_content);
633  // Traverse links:
634  foreach ($list as $count => $linkInfo) {
635  // Decode entities:
636  $subUrl = htmlspecialchars_decode($linkInfo['href']);
637  $qParts = parse_url($subUrl);
638  if (!$qParts['scheme']) {
639  $relativeUrl = GeneralUtility::resolveBackPath($subUrl);
640  if ($relativeUrl[0] === '/') {
641  $subUrl = $baseAbsoluteHref . $relativeUrl;
642  } else {
643  $subUrl = $baseHref . '/' . $relativeUrl;
644  }
645  }
646  $subUrls[] = $subUrl;
647  }
648  return $subUrls;
649  }
650 
658  public function indexSingleRecord($r, $cfgRec, $rl = null)
659  {
660  // Init:
661  $rl = is_array($rl) ? $rl : $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
662  $fieldList = GeneralUtility::trimExplode(',', $cfgRec['fieldlist'], true);
663  $languageField = $GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['languageField'];
664  $sys_language_uid = $languageField ? $r[$languageField] : 0;
665  // (Re)-Indexing a row from a table:
666  $indexerObj = GeneralUtility::makeInstance(\TYPO3\CMS\IndexedSearch\Indexer::class);
667  parse_str(str_replace('###UID###', $r['uid'], $cfgRec['get_params']), $GETparams);
668  $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl, $GETparams, (bool)$cfgRec['chashcalc']);
669  $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
670  $indexerObj->forceIndexing = true;
671  $theContent = '';
672  foreach ($fieldList as $k => $v) {
673  if (!$k) {
674  $theTitle = $r[$v];
675  } else {
676  $theContent .= $r[$v] . ' ';
677  }
678  }
679  // Indexing the record as a page (but with parameters set, see ->backend_setFreeIndexUid())
680  $indexerObj->backend_indexAsTYPO3Page(strip_tags(str_replace('<', ' <', $theTitle)), '', '', strip_tags(str_replace('<', ' <', $theContent)), 'utf-8', $r[$GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['tstamp']], $r[$GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['crdate']], $r['uid']);
681  }
682 
690  public function getUidRootLineForClosestTemplate($id)
691  {
692  $tmpl = GeneralUtility::makeInstance(\TYPO3\CMS\Core\TypoScript\ExtendedTemplateService::class);
693  $tmpl->init();
694  // Gets the rootLine
695  $sys_page = GeneralUtility::makeInstance(\TYPO3\CMS\Frontend\Page\PageRepository::class);
696  $rootLine = $sys_page->getRootLine($id);
697  // This generates the constants/config + hierarchy info for the template.
698  $tmpl->runThroughTemplates($rootLine, 0);
699  // Root line uids
700  $rootline_uids = [];
701  foreach ($tmpl->rootLine as $rlkey => $rldat) {
702  $rootline_uids[$rlkey] = $rldat['uid'];
703  }
704  return $rootline_uids;
705  }
706 
713  public function generateNextIndexingTime($cfgRec)
714  {
715  $currentTime = $GLOBALS['EXEC_TIME'];
716  // Now, find a midnight time to use for offset calculation. This has to differ depending on whether we have frequencies within a day or more than a day; Less than a day, we don't care which day to use for offset, more than a day we want to respect the currently entered day as offset regardless of when the script is run - thus the day-of-week used in case "Weekly" is selected will be respected
717  if ($cfgRec['timer_frequency'] <= 24 * 3600) {
718  $aMidNight = mktime(0, 0, 0) - 1 * 24 * 3600;
719  } else {
720  $lastTime = $cfgRec['timer_next_indexing'] ?: $GLOBALS['EXEC_TIME'];
721  $aMidNight = mktime(0, 0, 0, date('m', $lastTime), date('d', $lastTime), date('y', $lastTime));
722  }
723  // Find last offset time plus frequency in seconds:
724  $lastSureOffset = $aMidNight + MathUtility::forceIntegerInRange($cfgRec['timer_offset'], 0, 86400);
725  $frequencySeconds = MathUtility::forceIntegerInRange($cfgRec['timer_frequency'], 1);
726  // Now, find out how many blocks of the length of frequency there is until the next time:
727  $frequencyBlocksUntilNextTime = ceil(($currentTime - $lastSureOffset) / $frequencySeconds);
728  // Set next time to the offset + the frequencyblocks multiplied with the frequency length in seconds.
729  return $lastSureOffset + $frequencyBlocksUntilNextTime * $frequencySeconds;
730  }
731 
739  public function checkDeniedSuburls($url, $url_deny)
740  {
741  if (trim($url_deny)) {
742  $url_denyArray = GeneralUtility::trimExplode(LF, $url_deny, true);
743  foreach ($url_denyArray as $testurl) {
744  if (GeneralUtility::isFirstPartOfStr($url, $testurl)) {
745  return true;
746  }
747  }
748  }
749  return false;
750  }
751 
758  public function addQueueEntryForHook($cfgRec, $title)
759  {
760  $nparams = [
761  'indexConfigUid' => $cfgRec['uid'],
762  // This must ALWAYS be the cfgRec uid!
763  'url' => $title,
764  'procInstructions' => ['[Index Cfg UID#' . $cfgRec['uid'] . ']']
765  ];
766  $this->pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid']);
767  }
768 
774  public function deleteFromIndex($id)
775  {
776  $connectionPool = GeneralUtility::makeInstance(ConnectionPool::class);
777 
778  // Lookup old phash rows:
779 
780  $queryBuilder = $connectionPool->getQueryBuilderForTable('index_section');
781  $oldPhashRows = $queryBuilder->select('phash')
782  ->from('index_section')
783  ->where(
784  $queryBuilder->expr()->eq(
785  'page_id',
786  $queryBuilder->createNamedParameter($id, \PDO::PARAM_INT)
787  )
788  )
789  ->execute()
790  ->fetchAll();
791 
792  if (empty($oldPhashRows)) {
793  return;
794  }
795 
796  $tables = [
797  'index_debug',
798  'index_fulltext',
799  'index_grlist',
800  'index_phash',
801  'index_rel',
802  'index_section',
803  ];
804  foreach ($tables as $table) {
805  $queryBuilder = $connectionPool->getQueryBuilderForTable($table);
806  $queryBuilder->delete($table)
807  ->where(
808  $queryBuilder->expr()->in(
809  'phash',
810  $queryBuilder->createNamedParameter(
811  array_column($oldPhashRows, 'phash'),
812  Connection::PARAM_INT_ARRAY
813  )
814  )
815  )
816  ->execute();
817  }
818  }
819 
820  /*************************
821  *
822  * Hook functions for DataHandler (indexing of records)
823  *
824  *************************/
834  public function processCmdmap_preProcess($command, $table, $id, $value, $pObj)
835  {
836  // Clean up the index
837  if ($command === 'delete' && $table === 'pages') {
838  $this->deleteFromIndex($id);
839  }
840  }
841 
851  public function processDatamap_afterDatabaseOperations($status, $table, $id, $fieldArray, $pObj)
852  {
853  // Check if any fields are actually updated:
854  if (empty($fieldArray)) {
855  return;
856  }
857  // Translate new ids.
858  if ($status === 'new') {
859  $id = $pObj->substNEWwithIDs[$id];
860  } elseif ($table === 'pages' && $status === 'update' && (array_key_exists('hidden', $fieldArray) && $fieldArray['hidden'] == 1 || array_key_exists('no_search', $fieldArray) && $fieldArray['no_search'] == 1)) {
861  // If the page should be hidden or not indexed after update, delete index for this page
862  $this->deleteFromIndex($id);
863  }
864  // Get full record and if exists, search for indexing configurations:
865  $currentRecord = BackendUtility::getRecord($table, $id);
866  if (is_array($currentRecord)) {
867  // Select all (not running) indexing configurations of type "record" (1) and
868  // which points to this table and is located on the same page as the record
869  // or pointing to the right source PID
870  $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
871  ->getQueryBuilderForTable('index_config');
872  $result = $queryBuilder->select('*')
873  ->from('index_config')
874  ->where(
875  $queryBuilder->expr()->eq('set_id', $queryBuilder->createNamedParameter(0, \PDO::PARAM_INT)),
876  $queryBuilder->expr()->eq('type', $queryBuilder->createNamedParameter(1, \PDO::PARAM_INT)),
877  $queryBuilder->expr()->eq(
878  'table2index',
879  $queryBuilder->createNamedParameter($table, \PDO::PARAM_STR)
880  ),
881  $queryBuilder->expr()->orX(
882  $queryBuilder->expr()->andX(
883  $queryBuilder->expr()->eq(
884  'alternative_source_pid',
885  $queryBuilder->createNamedParameter(0, \PDO::PARAM_INT)
886  ),
887  $queryBuilder->expr()->eq(
888  'pid',
889  $queryBuilder->createNamedParameter($currentRecord['pid'], \PDO::PARAM_INT)
890  )
891  ),
892  $queryBuilder->expr()->eq(
893  'alternative_source_pid',
894  $queryBuilder->createNamedParameter($currentRecord['pid'], \PDO::PARAM_INT)
895  )
896  ),
897  $queryBuilder->expr()->eq(
898  'records_indexonchange',
899  $queryBuilder->createNamedParameter(1, \PDO::PARAM_INT)
900  )
901  )
902  ->execute();
903 
904  while ($cfgRec = $result->fetch()) {
905  $this->indexSingleRecord($currentRecord, $cfgRec);
906  }
907  }
908  }
909 }
indexExtUrl($url, $pageId, $rl, $cfgUid, $setId)
static isFirstPartOfStr($str, $partStr)
static forceIntegerInRange($theInt, $min, $max=2000000000, $defaultValue=0)
Definition: MathUtility.php:31
crawler_execute_type2($cfgRec, &$session_data, $params, &$pObj)
static getFileAbsFileName($filename, $_=null, $_2=null)
static trimExplode($delim, $string, $removeEmptyValues=false, $limit=0)
static makeInstance($className,... $constructorArguments)
crawler_execute_type4($cfgRec, &$session_data, $params, &$pObj)
static getAllFilesAndFoldersInPath(array $fileArr, $path, $extList='', $regDirs=false, $recursivityLevels=99, $excludePattern='')
processCmdmap_preProcess($command, $table, $id, $value, $pObj)
processDatamap_afterDatabaseOperations($status, $table, $id, $fieldArray, $pObj)
indexSingleRecord($r, $cfgRec, $rl=null)
crawler_execute_type1($cfgRec, &$session_data, $params, &$pObj)
static getRecord($table, $uid, $fields=' *', $where='', $useDeleteClause=true)
if(TYPO3_MODE==='BE') $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_tsfebeuserauth.php']['frontendEditingController']['default']
static removePrefixPathFromList(array $fileArr, $prefixToRemove)
crawler_execute_type3($cfgRec, &$session_data, $params, &$pObj)