‪TYPO3CMS  ‪main
ExternalLinktype.php
Go to the documentation of this file.
1 <?php
2 
3 declare(strict_types=1);
4 
5 /*
6  * This file is part of the TYPO3 CMS project.
7  *
8  * It is free software; you can redistribute it and/or modify it under
9  * the terms of the GNU General Public License, either version 2
10  * of the License, or any later version.
11  *
12  * For the full copyright and license information, please read the
13  * LICENSE.txt file that was distributed with this source code.
14  *
15  * The TYPO3 project - inspiring people to share!
16  */
17 
19 
20 use GuzzleHttp\Cookie\CookieJar;
21 use GuzzleHttp\Exception\ClientException;
22 use GuzzleHttp\Exception\ConnectException;
23 use GuzzleHttp\Exception\RequestException;
24 use GuzzleHttp\Exception\ServerException;
25 use GuzzleHttp\Exception\TooManyRedirectsException;
30 
40 {
41  // HTTP status code was delivered (and can be found in $errorParams['errno'])
42  protected const ‪ERROR_TYPE_HTTP_STATUS_CODE = 'httpStatusCode';
43  // An error occurred in lowlevel handler and a cURL error code can be found in $errorParams['errno']
44  protected const ‪ERROR_TYPE_LOWLEVEL_LIBCURL_ERRNO = 'libcurlErrno';
45  protected const ‪ERROR_TYPE_GENERIC_EXCEPTION = 'exception';
46  protected const ‪ERROR_TYPE_UNKNOWN = 'unknown';
47 
53  protected ‪$urlReports = [];
54 
60  protected ‪$urlErrorParams = [];
61 
67  protected ‪$headers = [
68  'User-Agent' => 'TYPO3 linkvalidator',
69  'Accept' => '*/*',
70  'Accept-Language' => '*',
71  'Accept-Encoding' => '*',
72  ];
73 
80  protected ‪$method = 'HEAD';
81 
89  protected ‪$range = '0-4048';
90 
95  protected int ‪$timeout = 0;
96 
97  protected string ‪$identifier = 'external';
98 
99  public function ‪__construct(
100  protected readonly ‪RequestFactory $requestFactory,
101  ) {}
102 
103  public function ‪setAdditionalConfig(array $config): void
104  {
105  if ($config['headers.'] ?? false) {
106  $this->headers = array_merge($this->headers, $config['headers.']);
107  }
108 
109  if ($config['httpAgentName'] ?? false) {
110  $this->headers['User-Agent'] = $config['httpAgentName'];
111  }
112 
113  if ($config['httpAgentUrl'] ?? false) {
114  $this->headers['User-Agent'] .= ' ' . $config['httpAgentUrl'];
115  }
116 
117  $email = '';
118  if ($config['httpAgentEmail'] ?? false) {
119  $email = $config['httpAgentEmail'];
120  } elseif (‪$GLOBALS['TYPO3_CONF_VARS']['MAIL']['defaultMailFromAddress'] ?? false) {
121  $email = ‪$GLOBALS['TYPO3_CONF_VARS']['MAIL']['defaultMailFromAddress'];
122  }
123  if ($email) {
124  $this->headers['User-Agent'] .= ';' . $email;
125  }
126 
127  if ($config['method'] ?? false) {
128  $this->method = $config['method'];
129  }
130  if ($config['range'] ?? false) {
131  $this->range = $config['range'];
132  }
133  if (isset($config['timeout'])) {
134  $this->timeout = (int)$config['timeout'];
135  }
136  }
137 
147  public function ‪checkLink(string $origUrl, array $softRefEntry, LinkAnalyzer $reference): bool
148  {
149  $isValidUrl = false;
150  // use URL from cache, if available
151  if (isset($this->urlReports[$origUrl])) {
152  $this->‪setErrorParams($this->urlErrorParams[$origUrl]);
153  return $this->urlReports[$origUrl];
154  }
155  $options = [
156  'cookies' => GeneralUtility::makeInstance(CookieJar::class),
157  'allow_redirects' => ['strict' => true],
158  'headers' => ‪$this->headers,
159  ];
160  if ($this->timeout > 0) {
161  $options['timeout'] = ‪$this->timeout;
162  }
163  ‪$url = $this->‪preprocessUrl($origUrl);
164  if (!empty(‪$url)) {
165  if ($this->method === 'HEAD') {
166  $isValidUrl = $this->‪requestUrl(‪$url, 'HEAD', $options);
167  }
168  if (!$isValidUrl) {
169  // HEAD was not allowed or threw an error, now trying GET
170  if ($this->range) {
171  $options['headers']['Range'] = 'bytes=' . ‪$this->range;
172  }
173  $isValidUrl = $this->‪requestUrl(‪$url, 'GET', $options);
174  }
175  }
176  $this->urlReports[$origUrl] = $isValidUrl;
177  $this->urlErrorParams[$origUrl] = ‪$this->errorParams;
178  return $isValidUrl;
179  }
180 
184  protected function ‪requestUrl(string ‪$url, string ‪$method, array $options): bool
185  {
186  $this->errorParams = [];
187  $isValidUrl = false;
188  try {
189  $response = $this->requestFactory->request(‪$url, ‪$method, $options);
190  if ($response->getStatusCode() >= 300) {
191  $this->errorParams['errorType'] = ‪self::ERROR_TYPE_HTTP_STATUS_CODE;
192  $this->errorParams['errno'] = $response->getStatusCode();
193  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
194  } else {
195  $isValidUrl = true;
196  }
197  /* Guzzle Exceptions:
198  * . \RuntimeException
199  * ├── SeekException (implements GuzzleException)
200  * └── TransferException (implements GuzzleException)
201  * └── RequestException
202  * ├── BadResponseException
203  * │ ├── ServerException
204  * │ └── ClientException
205  * ├── ConnectException
206  * └── TooManyRedirectsException
207  */
208  } catch (TooManyRedirectsException $e) {
209  $this->errorParams['errorType'] = 'tooManyRedirects';
210  $this->errorParams['exception'] = $e->getMessage();
211  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
212  } catch (ClientException | ServerException $e) {
213  // ClientException - A GuzzleHttp\Exception\ClientException is thrown for 400 level errors if the http_errors request option is set to true.
214  // ServerException - A GuzzleHttp\Exception\ServerException is thrown for 500 level errors if the http_errors request option is set to true.
215  if ($e->hasResponse()) {
216  $this->errorParams['errorType'] = ‪self::ERROR_TYPE_HTTP_STATUS_CODE;
217  $this->errorParams['errno'] = $e->getResponse()->getStatusCode();
218  } else {
219  $this->errorParams['errorType'] = ‪self::ERROR_TYPE_UNKNOWN;
220  }
221  $this->errorParams['exception'] = $e->getMessage();
222  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
223  } catch (RequestException | ConnectException $e) {
224  // RequestException - In the event of a networking error (connection timeout, DNS errors, etc.), a GuzzleHttp\Exception\RequestException is thrown.
225  // Catching this exception will catch any exception that can be thrown while transferring requests.
226  // ConnectException - A GuzzleHttp\Exception\ConnectException exception is thrown in the event of a networking error.
227  $this->errorParams['errorType'] = ‪self::ERROR_TYPE_LOWLEVEL_LIBCURL_ERRNO;
228  $this->errorParams['exception'] = $e->getMessage();
229  $handlerContext = $e->getHandlerContext();
230  if ($handlerContext['errno'] ?? 0) {
231  $this->errorParams['errno'] = (int)($handlerContext['errno']);
232  }
233  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
234  } catch (\Exception $e) {
235  // Generic catch for anything else that may go wrong
236  $this->errorParams['errorType'] = ‪self::ERROR_TYPE_GENERIC_EXCEPTION;
237  $this->errorParams['exception'] = $e->getMessage();
238  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
239  }
240  return $isValidUrl;
241  }
242 
249  public function ‪getErrorMessage(array ‪$errorParams): string
250  {
251  $lang = $this->‪getLanguageService();
252  $errorType = ‪$errorParams['errorType'] ?? '';
253  if ($errorType === '') {
254  return $lang->sL('LLL:EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf:list.report.noinformation');
255  }
256 
257  $errno = (int)(‪$errorParams['errno'] ?? 0);
258  $exception = ‪$errorParams['exception'] ?? '';
259  $message = '';
260 
261  switch ($errorType) {
263  // in this case error is HTTP status code
264  switch ($errno) {
265  case 300:
266  $message = $lang->sL('LLL:EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf:list.report.error.httpstatuscode.300');
267  break;
268  case 305:
269  $message = $lang->sL('LLL:EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf:list.report.error.httpstatuscode.305');
270  break;
271  case 403:
272  $message = $lang->sL('LLL:EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf:list.report.pageforbidden403');
273  break;
274  case 404:
275  $message = $lang->sL('LLL:EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf:list.report.pagenotfound404');
276  break;
277  case 500:
278  $message = $lang->sL('LLL:EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf:list.report.internalerror500');
279  break;
280  default:
281  if ($errno) {
282  // show generic error message with HTTP status code
283  $message = sprintf($lang->sL('LLL:EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf:list.report.externalerror'), $errno);
284  }
285  }
286  break;
287 
289  if ($errno) {
290  // get localized error message
291  $message = $lang->sL('LLL:EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf:list.report.error.libcurl.' . ‪$errorParams['errno']);
292  } else {
293  // fallback to generic error message and show exception
294  $message = $lang->sL('LLL:EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf:list.report.networkexception');
295  if ($exception !== '') {
296  $message .= ' ('
297  . ‪$errorParams['exception']
298  . ')';
299  }
300  }
301  break;
302 
303  case 'loop':
304  $message = sprintf(
305  $lang->sL('LLL:EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf:list.report.redirectloop'),
306  (‪$errorParams['exception'] ?? ''),
307  ''
308  );
309  break;
310 
311  case 'tooManyRedirects':
312  $message = $lang->sL('LLL:EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf:list.report.tooManyRedirects');
313  break;
314 
315  case 'exception':
316  if ($exception) {
317  $message = sprintf($lang->sL('LLL:EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf:list.report.httpexception'), $exception);
318  }
319  break;
320  }
321  if (!$message) {
322  // use generic error message as fallback
323  if ($exception) {
324  // Show exception, if available
325  $message = sprintf($lang->sL('LLL:EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf:list.report.httpexception'), $exception);
326  } else {
327  $message = $lang->sL('LLL:EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf:linkcheck.error.external.generic');
328  }
329  }
330  return $message;
331  }
332 
341  public function ‪fetchType(array $value, string $type, string $key): string
342  {
343  preg_match_all('/((?:http|https))(?::\\/\\/)(?:[^\\s<>]+)/i', (string)$value['tokenValue'], $urls, PREG_PATTERN_ORDER);
344  if (!empty($urls[0][0])) {
345  $type = 'external';
346  }
347  return $type;
348  }
349 
353  protected function ‪preprocessUrl(string ‪$url): string
354  {
355  ‪$url = html_entity_decode(‪$url);
356  $parts = parse_url(‪$url);
357  if ($parts['host'] ?? false) {
358  try {
359  $newDomain = (string)idn_to_ascii($parts['host']);
360  if (strcmp($parts['host'], $newDomain) !== 0) {
361  $parts['host'] = $newDomain;
363  }
364  } catch (\Exception | \Throwable $e) {
365  // ignore error and proceed with link checking
366  }
367  }
368  return ‪$url;
369  }
370 }
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\ERROR_TYPE_LOWLEVEL_LIBCURL_ERRNO
‪const ERROR_TYPE_LOWLEVEL_LIBCURL_ERRNO
Definition: ExternalLinktype.php:44
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$timeout
‪int $timeout
Definition: ExternalLinktype.php:90
‪TYPO3\CMS\Core\Utility\HttpUtility\buildUrl
‪static buildUrl(array $urlParts)
Definition: HttpUtility.php:102
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\checkLink
‪bool checkLink(string $origUrl, array $softRefEntry, LinkAnalyzer $reference)
Definition: ExternalLinktype.php:142
‪TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype\$errorParams
‪array $errorParams
Definition: AbstractLinktype.php:32
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$range
‪string $range
Definition: ExternalLinktype.php:84
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$urlErrorParams
‪array $urlErrorParams
Definition: ExternalLinktype.php:58
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\fetchType
‪string fetchType(array $value, string $type, string $key)
Definition: ExternalLinktype.php:336
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\ERROR_TYPE_UNKNOWN
‪const ERROR_TYPE_UNKNOWN
Definition: ExternalLinktype.php:46
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$identifier
‪string $identifier
Definition: ExternalLinktype.php:92
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\preprocessUrl
‪preprocessUrl(string $url)
Definition: ExternalLinktype.php:348
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\__construct
‪__construct(protected readonly RequestFactory $requestFactory,)
Definition: ExternalLinktype.php:94
‪TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype
Definition: AbstractLinktype.php:26
‪TYPO3\CMS\Linkvalidator\Linktype
Definition: AbstractLinktype.php:18
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\getErrorMessage
‪string getErrorMessage(array $errorParams)
Definition: ExternalLinktype.php:244
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\ERROR_TYPE_GENERIC_EXCEPTION
‪const ERROR_TYPE_GENERIC_EXCEPTION
Definition: ExternalLinktype.php:45
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\setAdditionalConfig
‪setAdditionalConfig(array $config)
Definition: ExternalLinktype.php:98
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype
Definition: ExternalLinktype.php:40
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$urlReports
‪array $urlReports
Definition: ExternalLinktype.php:52
‪TYPO3\CMS\Core\Http\RequestFactory
Definition: RequestFactory.php:30
‪TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype\setErrorParams
‪setErrorParams($value)
Definition: AbstractLinktype.php:71
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\requestUrl
‪requestUrl(string $url, string $method, array $options)
Definition: ExternalLinktype.php:179
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$headers
‪array $headers
Definition: ExternalLinktype.php:64
‪TYPO3\CMS\Webhooks\Message\$url
‪identifier readonly UriInterface $url
Definition: LoginErrorOccurredMessage.php:36
‪$GLOBALS
‪$GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['adminpanel']['modules']
Definition: ext_localconf.php:25
‪TYPO3\CMS\Core\Utility\HttpUtility
Definition: HttpUtility.php:24
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:52
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\ERROR_TYPE_HTTP_STATUS_CODE
‪const ERROR_TYPE_HTTP_STATUS_CODE
Definition: ExternalLinktype.php:42
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$method
‪string $method
Definition: ExternalLinktype.php:76
‪TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype\getLanguageService
‪getLanguageService()
Definition: AbstractLinktype.php:97