‪TYPO3CMS  ‪main
LinkAnalyzer.php
Go to the documentation of this file.
1 <?php
2 
3 /*
4  * This file is part of the TYPO3 CMS project.
5  *
6  * It is free software; you can redistribute it and/or modify it under
7  * the terms of the GNU General Public License, either version 2
8  * of the License, or any later version.
9  *
10  * For the full copyright and license information, please read the
11  * LICENSE.txt file that was distributed with this source code.
12  *
13  * The TYPO3 project - inspiring people to share!
14  */
15 
17 
18 use Psr\EventDispatcher\EventDispatcherInterface;
19 use TYPO3\CMS\Backend\Utility\BackendUtility;
33 
39 {
45  protected array ‪$searchFields = [];
46 
52  protected array ‪$pids = [];
53 
57  protected array ‪$linkCounts = [];
58 
62  protected array ‪$brokenLinkCounts = [];
63 
69  protected ‪$tsConfig = [];
70 
71  public function ‪__construct(
72  protected readonly EventDispatcherInterface $eventDispatcher,
73  protected readonly ‪BrokenLinkRepository $brokenLinkRepository,
74  protected readonly ‪SoftReferenceParserFactory $softReferenceParserFactory,
75  protected readonly ‪LinktypeRegistry $linktypeRegistry,
76  ) {}
77 
85  public function ‪init(array ‪$searchFields, array $pidList, ‪$tsConfig)
86  {
87  $this->searchFields = ‪$searchFields;
88  $this->pids = $pidList;
89  $this->tsConfig = ‪$tsConfig;
90 
91  foreach ($this->linktypeRegistry->getLinktypes() as ‪$identifier => $linktype) {
92  if (is_array(‪$tsConfig['linktypesConfig.'][‪$identifier . '.'] ?? false)) {
93  // setAdditionalConfig might use global configuration, so still call it, even if options are empty
94  $linktype->setAdditionalConfig(‪$tsConfig['linktypesConfig.'][‪$identifier . '.']);
95  }
96  }
97  }
98 
105  public function ‪getLinkStatistics(array $linkTypes = [], $considerHidden = false)
106  {
107  if (empty($linkTypes) || empty($this->pids)) {
108  return;
109  }
110 
111  $this->brokenLinkRepository->removeAllBrokenLinksOfRecordsOnPageIds(
112  $this->pids,
113  $linkTypes
114  );
115 
116  // Traverse all configured tables
117  foreach ($this->searchFields as $table => ‪$fields) {
118  // If table is not configured, assume the extension is not installed
119  // and therefore no need to check it
120  if (!is_array(‪$GLOBALS['TCA'][$table] ?? null)) {
121  continue;
122  }
123 
124  // Re-init selectFields for table
125  $selectFields = array_merge(['uid', 'pid', ‪$GLOBALS['TCA'][$table]['ctrl']['label']], ‪$fields);
126  if (‪$GLOBALS['TCA'][$table]['ctrl']['languageField'] ?? false) {
127  $selectFields[] = ‪$GLOBALS['TCA'][$table]['ctrl']['languageField'];
128  }
129  if (‪$GLOBALS['TCA'][$table]['ctrl']['type'] ?? false) {
130  if (isset(‪$GLOBALS['TCA'][$table]['columns'][‪$GLOBALS['TCA'][$table]['ctrl']['type']])) {
131  $selectFields[] = ‪$GLOBALS['TCA'][$table]['ctrl']['type'];
132  }
133  }
134 
135  $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
136  ->getQueryBuilderForTable($table);
137 
138  if ($considerHidden) {
139  $queryBuilder->getRestrictions()
140  ->removeAll()
141  ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
142  }
143  $queryBuilder->select(...$selectFields)->from($table);
144 
145  // We need to do the work in chunks, as it may be quite huge and would hit the one
146  // or other limit depending on the used dbms - and we also avoid placeholder usage
147  // as they are hard to calculate beforehand because of some magic handling of dbal.
149  GeneralUtility::makeInstance(ConnectionPool::class)
150  ->getConnectionForTable($table)
151  ->getDatabasePlatform()
152  );
153  foreach (array_chunk($this->pids, (int)floor($maxChunk / 2)) as $pageIdsChunk) {
154  $statement = clone $queryBuilder;
155  $statement->where(
156  $statement->expr()->in(
157  ($table === 'pages' ? 'uid' : 'pid'),
158  $statement->quoteArrayBasedValueListToIntegerList($pageIdsChunk)
159  )
160  );
161  $result = $statement->executeQuery();
162 
163  // @todo #64091: only select rows that have content in at least one of the relevant fields (via OR)
164  while ($row = $result->fetchAssociative()) {
165  $results = [];
166  $this->‪analyzeRecord($results, $table, ‪$fields, $row);
167  $this->‪checkLinks($results, $linkTypes);
168  }
169  }
170  }
171  }
172 
176  protected function ‪checkLinks(array $links, array $linkTypes)
177  {
178  foreach ($this->linktypeRegistry->getLinktypes() as $key => $hookObj) {
179  if (!is_array($links[$key] ?? false) || (!in_array($key, $linkTypes, true))) {
180  continue;
181  }
182 
183  // Check them
184  foreach ($links[$key] as $entryValue) {
185  $table = $entryValue['table'];
186  ‪$record = [];
187  ‪$record['headline'] = BackendUtility::getRecordTitle($table, $entryValue['row']);
188  ‪$record['record_pid'] = $entryValue['row']['pid'];
189  ‪$record['record_uid'] = $entryValue['uid'];
190  ‪$record['table_name'] = $table;
191  ‪$record['link_type'] = $key;
192  ‪$record['link_title'] = $entryValue['link_title'] ?? '';
193  ‪$record['field'] = $entryValue['field'];
194  ‪$record['last_check'] = time();
195  $typeField = ‪$GLOBALS['TCA'][$table]['ctrl']['type'] ?? false;
196  if (isset($entryValue['row'][$typeField])) {
197  ‪$record['element_type'] = (string)$entryValue['row'][$typeField];
198  }
199  $languageField = ‪$GLOBALS['TCA'][$table]['ctrl']['languageField'] ?? false;
200  if ($languageField && isset($entryValue['row'][$languageField])) {
201  ‪$record['language'] = $entryValue['row'][$languageField];
202  } else {
203  ‪$record['language'] = -1;
204  }
205  if (!empty($entryValue['pageAndAnchor'] ?? '')) {
206  // Page with anchor, e.g. 18#1580
207  ‪$url = $entryValue['pageAndAnchor'];
208  } else {
209  ‪$url = $entryValue['substr']['tokenValue'];
210  }
211  ‪$record['url'] = ‪$url;
212 
213  if (!($this->linkCounts[$table] ?? false)) {
214  $this->linkCounts[$table] = 0;
215  }
216 
217  if (!($this->brokenLinkCounts[$table] ?? false)) {
218  $this->brokenLinkCounts[$table] = 0;
219  }
220 
221  $this->linkCounts[$table]++;
222  $checkUrl = $hookObj->checkLink(‪$url, $entryValue, $this);
223 
224  // Broken link found
225  if (!$checkUrl) {
226  $this->brokenLinkRepository->addBrokenLink(‪$record, false, $hookObj->getErrorParams() ?: []);
227  $this->brokenLinkCounts[$table]++;
228  }
229  }
230  }
231  }
232 
239  public function ‪recheckLinks(
240  array $checkOptions,
241  string|int $recordUid,
242  string $table,
243  string $field,
244  int $timestamp,
245  bool $considerHidden = true
246  ): void {
247  // If table is not configured, assume the extension is not installed
248  // and therefore no need to check it
249  if (!is_array(‪$GLOBALS['TCA'][$table])) {
250  return;
251  }
252 
253  // get all links for $record / $table / $field combination
254  $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
255  ->getQueryBuilderForTable($table);
256  if ($considerHidden) {
257  $queryBuilder->getRestrictions()->removeByType(HiddenRestriction::class);
258  }
259 
260  $row = $queryBuilder->select('uid', 'pid', ‪$GLOBALS['TCA'][$table]['ctrl']['label'], $field, 'tstamp')
261  ->from($table)
262  ->where(
263  $queryBuilder->expr()->eq(
264  'uid',
265  $queryBuilder->createNamedParameter($recordUid, ‪Connection::PARAM_INT)
266  )
267  )
268  ->executeQuery()
269  ->fetchAssociative();
270 
271  if (!$row) {
272  // missing record: remove existing links
273  $this->brokenLinkRepository->removeBrokenLinksForRecord($table, (int)$recordUid);
274  return;
275  }
276  if (($row['tstamp'] ?? 0) && $timestamp && ((int)($row['tstamp']) < $timestamp)) {
277  // timestamp has not changed: no need to recheck
278  return;
279  }
280  $resultsLinks = [];
281  $this->‪analyzeRecord($resultsLinks, $table, [$field], $row);
282  if ($resultsLinks) {
283  // remove existing broken links from table
284  $this->brokenLinkRepository->removeBrokenLinksForRecord($table, (int)$recordUid);
285  // find all broken links for list of links
286  $this->‪checkLinks($resultsLinks, $checkOptions);
287  }
288  }
289 
298  public function ‪analyzeRecord(array &$results, $table, array ‪$fields, array ‪$record)
299  {
300  $event = new BeforeRecordIsAnalyzedEvent($table, ‪$record, ‪$fields, $this, $results);
301  $this->eventDispatcher->dispatch($event);
302  $results = $event->getResults();
303  ‪$record = $event->getRecord();
304 
305  // Put together content of all relevant fields
306  $htmlParser = GeneralUtility::makeInstance(HtmlParser::class);
307  $idRecord = ‪$record['uid'];
308  // Get all references
309  foreach (‪$fields as $field) {
310  $conf = ‪$GLOBALS['TCA'][$table]['columns'][$field]['config'];
311  $valueField = ‪$record[$field];
312 
313  // Add a softref definition for link fields if the TCA does not specify one already
314  if (($conf['type'] ?? '') === 'link' && empty($conf['softref'])) {
315  $conf['softref'] = 'typolink';
316  }
317 
318  // Check if a TCA configured field has soft references defined (see TYPO3 Core API document)
319  if (!($conf['softref'] ?? false) || (string)$valueField === '') {
320  continue;
321  }
322  // Traverse soft references
323  // set subst such that findRef will return substitutes for urls, emails etc
324  $softRefParams = ['subst'];
325  foreach ($this->softReferenceParserFactory->getParsersBySoftRefParserList($conf['softref'], $softRefParams) as $softReferenceParser) {
326  $parserResult = $softReferenceParser->parse($table, $field, $idRecord, $valueField);
327  if (!$parserResult->hasMatched()) {
328  continue;
329  }
330 
331  if ($softReferenceParser->getParserKey() === 'typolink_tag') {
332  $this->‪analyzeTypoLinks($parserResult, $results, $htmlParser, ‪$record, $field, $table);
333  } else {
334  $this->‪analyzeLinks($parserResult, $results, ‪$record, $field, $table);
335  }
336  }
337  }
338  }
339 
349  protected function ‪analyzeLinks(SoftReferenceParserResult $parserResult, array &$results, array ‪$record, $field, $table)
350  {
351  foreach ($parserResult->getMatchedElements() as $element) {
352  $reference = $element['subst'] ?? [];
353  $type = '';
354  $idRecord = ‪$record['uid'];
355  if (empty($reference)) {
356  continue;
357  }
358 
359  foreach ($this->linktypeRegistry->getLinktypes() as $keyArr => $hookObj) {
360  $type = $hookObj->fetchType($reference, $type, $keyArr);
361  // Store the type that was found
362  // This prevents overriding by internal validator
363  if (!empty($type)) {
364  $reference['type'] = $type;
365  }
366  }
367  $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $reference['tokenID']]['substr'] = $reference;
368  $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $reference['tokenID']]['row'] = ‪$record;
369  $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $reference['tokenID']]['table'] = $table;
370  $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $reference['tokenID']]['field'] = $field;
371  $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $reference['tokenID']]['uid'] = $idRecord;
372  }
373  }
374 
385  protected function ‪analyzeTypoLinks(SoftReferenceParserResult $parserResult, array &$results, $htmlParser, array ‪$record, $field, $table)
386  {
387  $linkTags = $htmlParser->splitIntoBlock('a,link', $parserResult->getContent());
388  $idRecord = ‪$record['uid'];
389  $type = '';
390  $title = '';
391  $countLinkTags = count($linkTags);
392  for ($i = 1; $i < $countLinkTags; $i += 2) {
393  $currentR = [];
394  $referencedRecordType = '';
395  foreach ($parserResult->getMatchedElements() as $element) {
396  $type = '';
397  $r = $element['subst'];
398  if (empty($r['tokenID']) || substr_count($linkTags[$i], $r['tokenID']) === 0) {
399  continue;
400  }
401 
402  // Type of referenced record
403  if (str_contains($r['recordRef'] ?? '', 'pages')) {
404  $currentR = $r;
405  // Contains number of the page
406  $referencedRecordType = $r['tokenValue'];
407  $wasPage = true;
408  } elseif (str_contains($r['recordRef'] ?? '', 'tt_content') && (isset($wasPage) && $wasPage === true)) {
409  $referencedRecordType = $referencedRecordType . '#c' . $r['tokenValue'];
410  $wasPage = false;
411  } else {
412  $currentR = $r;
413  }
414  $title = strip_tags($linkTags[$i]);
415  }
416  // @todo Should be checked why it could be that $currentR stays empty which breaks further processing with
417  // chained PHP array access errors in hooks fetchType() and the $result[] build lines below. Further
418  // $currentR could be overwritten in the inner loop, thus not checking all elements.
419  if (empty($currentR)) {
420  continue;
421  }
422  foreach ($this->linktypeRegistry->getLinktypes() as $keyArr => $hookObj) {
423  $type = $hookObj->fetchType($currentR, $type, $keyArr);
424  // Store the type that was found
425  // This prevents overriding by internal validator
426  if (!empty($type)) {
427  $currentR['type'] = $type;
428  }
429  }
430  $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['substr'] = $currentR;
431  $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['row'] = ‪$record;
432  $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['table'] = $table;
433  $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['field'] = $field;
434  $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['uid'] = $idRecord;
435  $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['link_title'] = $title;
436  $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['pageAndAnchor'] = $referencedRecordType;
437  }
438  }
439 
445  public function ‪getLinkCounts(): array
446  {
447  return $this->brokenLinkRepository->getNumberOfBrokenLinksForRecordsOnPages($this->pids, $this->searchFields);
448  }
449 
450  protected function ‪getLanguageService(): ‪LanguageService
451  {
452  return ‪$GLOBALS['LANG'];
453  }
454 }
‪TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction
Definition: HiddenRestriction.php:27
‪TYPO3\CMS\Core\DataHandling\SoftReference\SoftReferenceParserResult\getContent
‪getContent()
Definition: SoftReferenceParserResult.php:77
‪TYPO3\CMS\Core\Database\Connection\PARAM_INT
‪const PARAM_INT
Definition: Connection.php:52
‪TYPO3\CMS\Core\DataHandling\SoftReference\SoftReferenceParserFactory
Definition: SoftReferenceParserFactory.php:28
‪TYPO3\CMS\Core\Database\Platform\PlatformInformation\getMaxBindParameters
‪static getMaxBindParameters(DoctrineAbstractPlatform $platform)
Definition: PlatformInformation.php:106
‪TYPO3\CMS\Core\Html\HtmlParser
Definition: HtmlParser.php:26
‪$fields
‪$fields
Definition: pages.php:5
‪TYPO3\CMS\Webhooks\Message\$record
‪identifier readonly int readonly array $record
Definition: PageModificationMessage.php:36
‪TYPO3\CMS\Linkvalidator
‪TYPO3\CMS\Linkvalidator\Event\BeforeRecordIsAnalyzedEvent
Definition: BeforeRecordIsAnalyzedEvent.php:27
‪TYPO3\CMS\Core\Database\Connection
Definition: Connection.php:41
‪TYPO3\CMS\Webhooks\Message\$url
‪identifier readonly UriInterface $url
Definition: LoginErrorOccurredMessage.php:36
‪TYPO3\CMS\Core\DataHandling\SoftReference\SoftReferenceParserResult
Definition: SoftReferenceParserResult.php:43
‪TYPO3\CMS\Core\DataHandling\SoftReference\SoftReferenceParserResult\getMatchedElements
‪getMatchedElements()
Definition: SoftReferenceParserResult.php:82
‪$GLOBALS
‪$GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['adminpanel']['modules']
Definition: ext_localconf.php:25
‪TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction
Definition: DeletedRestriction.php:28
‪TYPO3\CMS\Core\Database\Platform\PlatformInformation
Definition: PlatformInformation.php:33
‪TYPO3\CMS\Core\Localization\LanguageService
Definition: LanguageService.php:46
‪TYPO3\CMS\Core\Database\ConnectionPool
Definition: ConnectionPool.php:46
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:52
‪TYPO3\CMS\Linkvalidator\Linktype\LinktypeRegistry
Definition: LinktypeRegistry.php:27
‪TYPO3\CMS\Webhooks\Message\$identifier
‪identifier readonly string $identifier
Definition: FileAddedMessage.php:37