TYPO3 CMS  TYPO3_6-2
CrawlerHook.php
Go to the documentation of this file.
1 <?php
3 
26 class CrawlerHook {
27 
34  public function initMessage() {
35  return 'Start of Custom Example Indexing session!';
36  }
37 
51  public function indexOperation($cfgRec, &$session_data, $params, &$pObj) {
52  // Init session data array if not already:
53  if (!is_array($session_data)) {
54  $session_data = array(
55  'step' => 0
56  );
57  }
58  // Increase step counter (this is just an example of how the session data can be used - to track how many instances of indexing is left)
59  $session_data['step']++;
60  switch ((int)$session_data['step']) {
61  case 1:
62  // Indexing Example: Content accessed with GET parameters added to URL:
63  // Get rootline from the Indexing Record (needed because the indexer relates all search results to a position in the page tree!) [DON'T CHANGE]:
64  $rl = $pObj->getUidRootLineForClosestTemplate($cfgRec['pid']);
65  // Set up language uid, if any:
66  $sys_language_uid = 0;
67  // Set up 2 example items to index:
68  $exampleItems = array(
69  array(
70  'ID' => '123',
71  'title' => 'Title of Example 1',
72  'content' => 'Vestibulum leo turpis, fringilla sit amet, semper eget, vestibulum ut, arcu. Vestibulum mauris orci, vulputate quis, congue eget, nonummy'
73  ),
74  array(
75  'ID' => 'example2',
76  'title' => 'Title of Example 2',
77  'content' => 'Cras tortor turpis, vulputate non, accumsan a, pretium in, magna. Cras turpis turpis, pretium pulvinar, pretium vel, nonummy eu.'
78  )
79  );
80  // For each item, index it (this is what you might like to do in batches of like 100 items if all your content spans thousands of items!)
81  foreach ($exampleItems as $item) {
82  // Prepare the GET variables array that must be added to the page URL in order to view result:
83  parse_str('&itemID=' . rawurlencode($item['ID']), $GETparams);
84  // Prepare indexer (make instance, initialize it, set special features for indexing parameterized content - probably none of this should be changed by you) [DON'T CHANGE]:
85  $indexerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\IndexedSearch\\Indexer');
86  $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl, $GETparams, FALSE);
87  $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
88  $indexerObj->forceIndexing = TRUE;
89  // Indexing the content of the item (see \TYPO3\CMS\IndexedSearch\Indexer::backend_indexAsTYPO3Page() for options)
90  $indexerObj->backend_indexAsTYPO3Page($item['title'], '', '', $item['content'], $GLOBALS['LANG']->charSet, $item['tstamp'], $item['create_date'], $item['ID']);
91  }
92  break;
93  case 2:
94  // Indexing Example: Content accessed directly in file system:
95  // Get rootline from the Indexing Record (needed because the indexer relates all search results to a position in the page tree!) [DON'T CHANGE]:
96  $rl = $pObj->getUidRootLineForClosestTemplate($cfgRec['pid']);
97  // Set up language uid, if any:
98  $sys_language_uid = 0;
99  // Prepare indexer (make instance, initialize it, set special features for indexing parameterized content - probably none of this should be changed by you) [DON'T CHANGE]:
100  $indexerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\IndexedSearch\\Indexer');
101  $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl);
102  $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
103  $indexerObj->hash['phash'] = -1;
104  // To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
105  // Index document:
106  $indexerObj->indexRegularDocument('fileadmin/templates/index.html', TRUE);
107  break;
108  case 3:
109  // Indexing Example: Content accessed on External URLs:
110  // Index external URL:
111  $indexerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\IndexedSearch\\Indexer');
112  $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl);
113  $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
114  $indexerObj->hash['phash'] = -1;
115  // To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
116  // Index external URL (HTML only):
117  $indexerObj->indexExternalUrl('http://www.google.com/');
118  break;
119  }
120  // Finally, set entry for next indexing instance (if all steps are not completed)
121  if ($session_data['step'] <= 3) {
122  $title = 'Step #' . $session_data['step'] . ' of 3';
123  // Just information field. Never mind that the field is called "url" - this is what will be shown in the "crawler" log. Could be a URL - or whatever else tells what that indexing instance will do.
124  $pObj->addQueueEntryForHook($cfgRec, $title);
125  }
126  }
127 
128 }
indexOperation($cfgRec, &$session_data, $params, &$pObj)
Definition: CrawlerHook.php:51
if(!defined('TYPO3_MODE')) $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_userauth.php']['logoff_pre_processing'][]