TYPO3 CMS  TYPO3_7-6
CrawlerHook.php
Go to the documentation of this file.
1 <?php
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
19 
24 {
30  public function initMessage()
31  {
32  return 'Start of Custom Example Indexing session!';
33  }
34 
46  public function indexOperation($cfgRec, &$session_data, $params, &$pObj)
47  {
48  // Set up language uid, if any:
49  $sys_language_uid = 0;
50 
51  // Init session data array if not already:
52  if (!is_array($session_data)) {
53  $session_data = [
54  'step' => 0
55  ];
56  }
57  // Increase step counter (this is just an example of how the session data can be used - to track how many instances of indexing is left)
58  $session_data['step']++;
59  switch ((int)$session_data['step']) {
60  case 1:
61  // Indexing Example: Content accessed with GET parameters added to URL:
62  // Get rootline from the Indexing Record (needed because the indexer relates all search results to a position in the page tree!) [DON'T CHANGE]:
63  $rl = $pObj->getUidRootLineForClosestTemplate($cfgRec['pid']);
64  // Set up 2 example items to index:
65  $exampleItems = [
66  [
67  'ID' => '123',
68  'title' => 'Title of Example 1',
69  'content' => 'Vestibulum leo turpis, fringilla sit amet, semper eget, vestibulum ut, arcu. Vestibulum mauris orci, vulputate quis, congue eget, nonummy'
70  ],
71  [
72  'ID' => 'example2',
73  'title' => 'Title of Example 2',
74  'content' => 'Cras tortor turpis, vulputate non, accumsan a, pretium in, magna. Cras turpis turpis, pretium pulvinar, pretium vel, nonummy eu.'
75  ]
76  ];
77  // For each item, index it (this is what you might like to do in batches of like 100 items if all your content spans thousands of items!)
78  foreach ($exampleItems as $item) {
79  // Prepare the GET variables array that must be added to the page URL in order to view result:
80  parse_str('&itemID=' . rawurlencode($item['ID']), $GETparams);
81  // Prepare indexer (make instance, initialize it, set special features for indexing parameterized content - probably none of this should be changed by you) [DON'T CHANGE]:
83  $indexerObj = GeneralUtility::makeInstance(Indexer::class);
84  $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl, $GETparams, false);
85  $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
86  $indexerObj->forceIndexing = true;
87  // Indexing the content of the item (see \TYPO3\CMS\IndexedSearch\Indexer::backend_indexAsTYPO3Page() for options)
88  $indexerObj->backend_indexAsTYPO3Page($item['title'], '', '', $item['content'], $GLOBALS['LANG']->charSet, $item['tstamp'], $item['create_date'], $item['ID']);
89  }
90  break;
91  case 2:
92  // Indexing Example: Content accessed directly in file system:
93  // Get rootline from the Indexing Record (needed because the indexer relates all search results to a position in the page tree!) [DON'T CHANGE]:
94  $rl = $pObj->getUidRootLineForClosestTemplate($cfgRec['pid']);
95  // Prepare indexer (make instance, initialize it, set special features for indexing parameterized content - probably none of this should be changed by you) [DON'T CHANGE]:
97  $indexerObj = GeneralUtility::makeInstance(Indexer::class);
98  $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl);
99  $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
100  $indexerObj->hash['phash'] = -1;
101  // To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
102  // Index document:
103  $indexerObj->indexRegularDocument('fileadmin/templates/index.html', true);
104  break;
105  case 3:
106  // Indexing Example: Content accessed on External URLs:
107  // Index external URL:
109  $indexerObj = GeneralUtility::makeInstance(Indexer::class);
110  $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', null);
111  $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
112  $indexerObj->hash['phash'] = -1;
113  // To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
114  // Index external URL (HTML only):
115  $indexerObj->indexExternalUrl('http://www.google.com/');
116  break;
117  }
118  // Finally, set entry for next indexing instance (if all steps are not completed)
119  if ($session_data['step'] <= 3) {
120  $title = 'Step #' . $session_data['step'] . ' of 3';
121  // Just information field. Never mind that the field is called "url" - this is what will be shown in the "crawler" log. Could be a URL - or whatever else tells what that indexing instance will do.
122  $pObj->addQueueEntryForHook($cfgRec, $title);
123  }
124  }
125 }
if(TYPO3_MODE==='BE') $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_tsfebeuserauth.php']['frontendEditingController']['default']