45 public $callBack =
'&TYPO3\\CMS\\IndexedSearch\\Hook\\CrawlerHook';
59 $indexingConfigurations =
$GLOBALS[
'TYPO3_DB']->exec_SELECTgetRows(
'*',
'index_config',
'hidden=0 60 AND (starttime=0 OR starttime<=' .
$GLOBALS[
'EXEC_TIME'] .
') 61 AND timer_next_indexing<' .
$GLOBALS[
'EXEC_TIME'] .
' 65 foreach ($indexingConfigurations as $cfgRec) {
73 'timer_next_indexing' => $nextTime,
76 $GLOBALS[
'TYPO3_DB']->exec_UPDATEquery(
'index_config',
'uid=' . (
int)$cfgRec[
'uid'], $field_array);
78 switch ($cfgRec[
'type']) {
83 'indexConfigUid' => $cfgRec[
'uid'],
84 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'),
85 'url' =>
'Records (start)' 88 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec[
'pid']);
94 'indexConfigUid' => $cfgRec[
'uid'],
96 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'),
98 'url' => $cfgRec[
'filepath'],
102 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec[
'pid']);
108 'indexConfigUid' => $cfgRec[
'uid'],
110 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'),
112 'url' => $cfgRec[
'externalUrl'],
116 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec[
'pid']);
122 'indexConfigUid' => $cfgRec[
'uid'],
124 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'),
126 'url' => (
int)$cfgRec[
'alternative_source_pid'],
130 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec[
'pid']);
137 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'crawler'][$cfgRec[
'type']]) {
139 if (is_object($hookObj)) {
142 'indexConfigUid' => $cfgRec[
'uid'],
144 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
'/CUSTOM]'),
146 'url' => $hookObj->initMessage($message)
148 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec[
'pid']);
167 if ($params[
'indexConfigUid']) {
169 $cfgRec =
$GLOBALS[
'TYPO3_DB']->exec_SELECTgetSingleRow(
'*',
'index_config',
'uid=' . (
int)$params[
'indexConfigUid']);
170 if (is_array($cfgRec)) {
172 $session_data = unserialize($cfgRec[
'session_data']);
174 switch ($cfgRec[
'type']) {
196 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'crawler'][$cfgRec[
'type']]) {
198 if (is_object($hookObj)) {
201 $hookObj->indexOperation($cfgRec, $session_data, $params, $this);
206 $field_array = array(
207 'session_data' => serialize($session_data)
209 $GLOBALS[
'TYPO3_DB']->exec_UPDATEquery(
'index_config',
'uid=' . (
int)$cfgRec[
'uid'], $field_array);
212 return array(
'log' => $params);
226 if ($cfgRec[
'table2index'] && isset(
$GLOBALS[
'TCA'][$cfgRec[
'table2index']])) {
228 if (!is_array($session_data)) {
229 $session_data = array(
234 $pid = (int)$cfgRec[
'alternative_source_pid'] ?: $cfgRec[
'pid'];
239 $recs =
$GLOBALS[
'TYPO3_DB']->exec_SELECTgetRows(
'*', $cfgRec[
'table2index'],
'pid = ' . $pid .
' 243 foreach ($recs as $r) {
247 $session_data[
'uid'] = $r[
'uid'];
251 'indexConfigUid' => $cfgRec[
'uid'],
252 'url' =>
'Records from UID#' . ($r[
'uid'] + 1) .
'-?',
253 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']')
255 $pObj->addQueueEntry_callBack($cfgRec[
'set_id'], $nparams, $this->callBack, $cfgRec[
'pid']);
272 $readpath = $params[
'url'];
277 if (@is_file($readpath)) {
283 $indexerObj->backend_initIndexer($cfgRec[
'pid'], 0, 0,
'', $rl);
284 $indexerObj->backend_setFreeIndexUid($cfgRec[
'uid'], $cfgRec[
'set_id']);
285 $indexerObj->hash[
'phash'] = -1;
288 $indexerObj->indexRegularDocument(\
TYPO3\CMS\Core\Utility\PathUtility::stripPathSitePrefix($readpath), TRUE);
289 } elseif (@is_dir($readpath)) {
296 if (is_array($directoryList) && $params[
'depth'] < $cfgRec[
'depth']) {
297 foreach ($directoryList as $subdir) {
298 if ((
string) $subdir !=
'') {
299 $files[] = $readpath . $subdir .
'/';
305 foreach ($files as $path) {
306 $this->instanceCounter++;
307 if ($path !== $params[
'url']) {
310 'indexConfigUid' => $cfgRec[
'uid'],
312 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'),
313 'depth' => $params[
'depth'] + 1
315 $pObj->addQueueEntry_callBack($cfgRec[
'set_id'], $nparams, $this->callBack, $cfgRec[
'pid'],
$GLOBALS[
'EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
334 if (!is_array($session_data)) {
335 $session_data = array(
336 'urlLog' => array($params[
'url'])
341 $subUrls = $this->
indexExtUrl($params[
'url'], $cfgRec[
'pid'], $rl, $cfgRec[
'uid'], $cfgRec[
'set_id']);
343 if ($params[
'depth'] < $cfgRec[
'depth']) {
344 foreach ($subUrls as $url) {
345 if ($url = $this->
checkUrl($url, $session_data[
'urlLog'], $cfgRec[
'externalUrl'])) {
347 $this->instanceCounter++;
348 $session_data[
'urlLog'][] = $url;
351 'indexConfigUid' => $cfgRec[
'uid'],
353 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'),
354 'depth' => $params[
'depth'] + 1
356 $pObj->addQueueEntry_callBack($cfgRec[
'set_id'], $nparams, $this->callBack, $cfgRec[
'pid'],
$GLOBALS[
'EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
375 $pageUid = (int)$params[
'url'];
377 $pageRow = BackendUtility::getRecord(
'pages', $pageUid);
378 $res = $pObj->getUrlsForPageRow($pageRow);
379 $duplicateTrack = array();
381 $downloadUrls = array();
385 foreach ($res as $paramSetKey => $vv) {
386 $urlList = $pObj->urlListFromUrlArray($vv, $pageRow,
$GLOBALS[
'EXEC_TIME'], 30, 1, 0, $duplicateTrack, $downloadUrls, array(
'tx_indexedsearch_reindex'));
390 if ($params[
'depth'] < $cfgRec[
'depth']) {
395 foreach ($recs as $r) {
396 $this->instanceCounter++;
397 $url =
'pages:' . $r[
'uid'] .
': ' . $r[
'title'];
398 $session_data[
'urlLog'][] = $url;
401 'indexConfigUid' => $cfgRec[
'uid'],
403 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'),
404 'depth' => $params[
'depth'] + 1
406 $pObj->addQueueEntry_callBack($cfgRec[
'set_id'], $nparams, $this->callBack, $cfgRec[
'pid'],
$GLOBALS[
'EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
422 foreach ($runningIndexingConfigurations as $cfgRec) {
424 $queued_items =
$GLOBALS[
'TYPO3_DB']->exec_SELECTcountRows(
'*',
'tx_crawler_queue',
'set_id=' . (
int)$cfgRec[
'set_id'] .
' AND exec_time=0');
425 if (!$queued_items) {
427 $oldPhashRows =
$GLOBALS[
'TYPO3_DB']->exec_SELECTgetRows(
'phash',
'index_phash',
'freeIndexUid=' . (
int)$cfgRec[
'uid'] .
' AND freeIndexSetId<>' . (
int)$cfgRec[
'set_id']);
428 foreach ($oldPhashRows as $pHashRow) {
430 $tableArr = explode(
',',
'index_phash,index_rel,index_section,index_grlist,index_fulltext,index_debug');
431 foreach ($tableArr as $table) {
432 $GLOBALS[
'TYPO3_DB']->exec_DELETEquery($table,
'phash=' . (
int)$pHashRow[
'phash']);
436 $field_array = array(
440 $GLOBALS[
'TYPO3_DB']->exec_UPDATEquery(
'index_config',
'uid=' . (
int)$cfgRec[
'uid'], $field_array);
459 public function checkUrl($url, $urlLog, $baseUrl) {
460 $url = preg_replace(
'/\\/\\/$/',
'/', $url);
461 list($url) = explode(
'#', $url);
462 if (!strstr($url,
'../')) {
464 if (!in_array($url, $urlLog)) {
482 public function indexExtUrl($url, $pageId, $rl, $cfgUid, $setId) {
485 $indexerObj->backend_initIndexer($pageId, 0, 0,
'', $rl);
486 $indexerObj->backend_setFreeIndexUid($cfgUid, $setId);
487 $indexerObj->hash[
'phash'] = -1;
489 $indexerObj->indexExternalUrl($url);
490 $url_qParts = parse_url($url);
491 $baseAbsoluteHref = $url_qParts[
'scheme'] .
'://' . $url_qParts[
'host'];
492 $baseHref = $indexerObj->extractBaseHref($indexerObj->indexExternalUrl_content);
495 $baseHref = $baseAbsoluteHref;
496 $baseHref .= substr($url_qParts[
'path'], 0, strrpos($url_qParts[
'path'],
'/'));
498 $baseHref = rtrim($baseHref,
'/');
501 $list = $indexerObj->extractHyperLinks($indexerObj->indexExternalUrl_content);
503 foreach ($list as $count => $linkInfo) {
505 $subUrl = htmlspecialchars_decode($linkInfo[
'href']);
506 $qParts = parse_url($subUrl);
507 if (!$qParts[
'scheme']) {
509 if ($relativeUrl[0] ===
'/') {
510 $subUrl = $baseAbsoluteHref . $relativeUrl;
512 $subUrl = $baseHref .
'/' . $relativeUrl;
515 $subUrls[] = $subUrl;
533 $languageField =
$GLOBALS[
'TCA'][$cfgRec[
'table2index']][
'ctrl'][
'languageField'];
534 $sys_language_uid = $languageField ? $r[$languageField] : 0;
537 parse_str(str_replace(
'###UID###', $r[
'uid'], $cfgRec[
'get_params']), $GETparams);
538 $indexerObj->backend_initIndexer($cfgRec[
'pid'], 0, $sys_language_uid,
'', $rl, $GETparams, $cfgRec[
'chashcalc'] ? TRUE : FALSE);
539 $indexerObj->backend_setFreeIndexUid($cfgRec[
'uid'], $cfgRec[
'set_id']);
540 $indexerObj->forceIndexing = TRUE;
542 foreach ($fieldList as $k => $v) {
546 $theContent .= $r[$v] .
' ';
550 $indexerObj->backend_indexAsTYPO3Page(strip_tags(str_replace(
'<',
' <', $theTitle)),
'',
'', strip_tags(str_replace(
'<',
' <', $theContent)),
$GLOBALS[
'LANG']->charSet, $r[
$GLOBALS[
'TCA'][$cfgRec[
'table2index']][
'ctrl'][
'tstamp']], $r[
$GLOBALS[
'TCA'][$cfgRec[
'table2index']][
'ctrl'][
'crdate']], $r[
'uid']);
580 $rootLine = $sys_page->getRootLine($id);
582 $tmpl->runThroughTemplates($rootLine, 0);
584 $rootline_uids = array();
585 foreach ($tmpl->rootLine as $rlkey => $rldat) {
586 $rootline_uids[$rlkey] = $rldat[
'uid'];
588 return $rootline_uids;
599 $currentTime =
$GLOBALS[
'EXEC_TIME'];
601 if ($cfgRec[
'timer_frequency'] <= 24 * 3600) {
602 $aMidNight = mktime(0, 0, 0) - 1 * 24 * 3600;
604 $lastTime = $cfgRec[
'timer_next_indexing'] ?:
$GLOBALS[
'EXEC_TIME'];
605 $aMidNight = mktime(0, 0, 0, date(
'm', $lastTime), date(
'd', $lastTime), date(
'y', $lastTime));
611 $frequencyBlocksUntilNextTime = ceil(($currentTime - $lastSureOffset) / $frequencySeconds);
613 $nextTime = $lastSureOffset + $frequencyBlocksUntilNextTime * $frequencySeconds;
626 if (trim($url_deny)) {
628 foreach ($url_denyArray as $testurl) {
647 'indexConfigUid' => $cfgRec[
'uid'],
650 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']')
652 $this->pObj->addQueueEntry_callBack($cfgRec[
'set_id'], $nparams, $this->callBack, $cfgRec[
'pid']);
664 $oldPhashRows =
$GLOBALS[
'TYPO3_DB']->exec_SELECTgetRows(
'phash',
'index_section',
'page_id=' . (
int)$id);
665 if (count($oldPhashRows)) {
666 $pHashesToDelete = array();
667 foreach ($oldPhashRows as $pHashRow) {
668 $pHashesToDelete[] = $pHashRow[
'phash'];
670 $where_clause =
'phash IN (' . implode(
',',
$GLOBALS[
'TYPO3_DB']->cleanIntArray($pHashesToDelete)) .
')';
671 $tables = explode(
',',
'index_debug,index_fulltext,index_grlist,index_phash,index_rel,index_section');
672 foreach ($tables as $table) {
673 $GLOBALS[
'TYPO3_DB']->exec_DELETEquery($table, $where_clause);
696 if ($command ==
'delete' && $table ==
'pages') {
714 if (count($fieldArray)) {
716 if ($status ==
'new') {
717 $id = $pObj->substNEWwithIDs[$id];
718 } elseif ($table ==
'pages' && $status ==
'update' && (array_key_exists(
'hidden', $fieldArray) && $fieldArray[
'hidden'] == 1 || array_key_exists(
'no_search', $fieldArray) && $fieldArray[
'no_search'] == 1)) {
723 $currentRecord = BackendUtility::getRecord($table, $id);
724 if (is_array($currentRecord)) {
726 $indexingConfigurations =
$GLOBALS[
'TYPO3_DB']->exec_SELECTgetRows(
'*',
'index_config',
'hidden=0 727 AND (starttime=0 OR starttime<=' .
$GLOBALS[
'EXEC_TIME'] .
') 730 AND table2index=' .
$GLOBALS[
'TYPO3_DB']->fullQuoteStr($table,
'index_config') .
' 732 (alternative_source_pid=0 AND pid=' . (
int)$currentRecord[
'pid'] .
') 733 OR (alternative_source_pid=' . (
int)$currentRecord[
'pid'] .
') 735 AND records_indexonchange=1 737 foreach ($indexingConfigurations as $cfgRec) {
checkDeniedSuburls($url, $url_deny)
indexSingleRecord($r, $cfgRec, $rl=NULL)
indexExtUrl($url, $pageId, $rl, $cfgUid, $setId)
$TYPO3_CONF_VARS['SYS']['contentTable']
static isAllowedAbsPath($path)
crawler_execute($params, &$pObj)
static getAllFilesAndFoldersInPath(array $fileArr, $path, $extList='', $regDirs=FALSE, $recursivityLevels=99, $excludePattern='')
checkUrl($url, $urlLog, $baseUrl)
addQueueEntryForHook($cfgRec, $title)
static isFirstPartOfStr($str, $partStr)
static forceIntegerInRange($theInt, $min, $max=2000000000, $defaultValue=0)
crawler_execute_type2($cfgRec, &$session_data, $params, &$pObj)
static getUserObj($classRef, $checkPrefix='', $silent=FALSE)
static logDeprecatedFunction()
cleanUpOldRunningConfigurations()
static makeInstance($className)
static trimExplode($delim, $string, $removeEmptyValues=FALSE, $limit=0)
crawler_execute_type4($cfgRec, &$session_data, $params, &$pObj)
processCmdmap_preProcess($command, $table, $id, $value, $pObj)
processDatamap_afterDatabaseOperations($status, $table, $id, $fieldArray, $pObj)
generateNextIndexingTime($cfgRec)
if(!defined('TYPO3_MODE')) $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_userauth.php']['logoff_pre_processing'][]
crawler_execute_type1($cfgRec, &$session_data, $params, &$pObj)
static getFileAbsFileName($filename, $onlyRelative=TRUE, $relToTYPO3_mainDir=FALSE)
static removePrefixPathFromList(array $fileArr, $prefixToRemove)
getUidRootLineForClosestTemplate($id)
static resolveBackPath($pathStr)
static BEenableFields($table, $inv=0)
static deleteClause($table, $tableAlias='')
crawler_execute_type3($cfgRec, &$session_data, $params, &$pObj)