‪TYPO3CMS  11.5
ExternalLinktype.php
Go to the documentation of this file.
1 <?php
2 
3 /*
4  * This file is part of the TYPO3 CMS project.
5  *
6  * It is free software; you can redistribute it and/or modify it under
7  * the terms of the GNU General Public License, either version 2
8  * of the License, or any later version.
9  *
10  * For the full copyright and license information, please read the
11  * LICENSE.txt file that was distributed with this source code.
12  *
13  * The TYPO3 project - inspiring people to share!
14  */
15 
17 
18 use GuzzleHttp\Cookie\CookieJar;
19 use GuzzleHttp\Exception\ClientException;
20 use GuzzleHttp\Exception\ConnectException;
21 use GuzzleHttp\Exception\RequestException;
22 use GuzzleHttp\Exception\ServerException;
23 use GuzzleHttp\Exception\TooManyRedirectsException;
27 
32 {
33  // HTTP status code was delivered (and can be found in $errorParams['errno'])
34  protected const ‪ERROR_TYPE_HTTP_STATUS_CODE = 'httpStatusCode';
35  // An error occurred in lowlevel handler and a cURL error code can be found in $errorParams['errno']
36  protected const ‪ERROR_TYPE_LOWLEVEL_LIBCURL_ERRNO = 'libcurlErrno';
37  protected const ‪ERROR_TYPE_GENERIC_EXCEPTION = 'exception';
38  protected const ‪ERROR_TYPE_UNKNOWN = 'unknown';
39 
45  protected ‪$urlReports = [];
46 
52  protected ‪$urlErrorParams = [];
53 
59  protected ‪$headers = [
60  'User-Agent' => 'TYPO3 linkvalidator',
61  'Accept' => '*/*',
62  'Accept-Language' => '*',
63  'Accept-Encoding' => '*',
64  ];
65 
72  protected ‪$method = 'HEAD';
73 
81  protected ‪$range = '0-4048';
82 
89  protected int ‪$timeout = 0;
90 
94  protected ‪$requestFactory;
95 
99  protected ‪$errorParams = [];
100 
101  public function ‪__construct(‪RequestFactory ‪$requestFactory = null)
102  {
103  $this->requestFactory = ‪$requestFactory ?: GeneralUtility::makeInstance(RequestFactory::class);
104  }
105 
106  public function ‪setAdditionalConfig(array $config): void
107  {
108  if ($config['headers.'] ?? false) {
109  $this->headers = array_merge($this->headers, $config['headers.']);
110  }
111 
112  if ($config['httpAgentName'] ?? false) {
113  $this->headers['User-Agent'] = $config['httpAgentName'];
114  }
115 
116  if ($config['httpAgentUrl'] ?? false) {
117  $this->headers['User-Agent'] .= ' ' . $config['httpAgentUrl'];
118  }
119 
120  $email = '';
121  if ($config['httpAgentEmail'] ?? false) {
122  $email = $config['httpAgentEmail'];
123  } elseif (‪$GLOBALS['TYPO3_CONF_VARS']['MAIL']['defaultMailFromAddress'] ?? false) {
124  $email = ‪$GLOBALS['TYPO3_CONF_VARS']['MAIL']['defaultMailFromAddress'];
125  }
126  if ($email) {
127  $this->headers['User-Agent'] .= ';' . $email;
128  }
129 
130  if ($config['method'] ?? false) {
131  $this->method = $config['method'];
132  }
133  if ($config['range'] ?? false) {
134  $this->range = $config['range'];
135  }
136  if (isset($config['timeout'])) {
137  $this->timeout = (int)$config['timeout'];
138  }
139  }
140 
150  public function ‪checkLink($origUrl, $softRefEntry, $reference)
151  {
152  $isValidUrl = false;
153  // use URL from cache, if available
154  if (isset($this->urlReports[$origUrl])) {
155  $this->‪setErrorParams($this->urlErrorParams[$origUrl]);
156  return $this->urlReports[$origUrl];
157  }
158  $options = [
159  'cookies' => GeneralUtility::makeInstance(CookieJar::class),
160  'allow_redirects' => ['strict' => true],
161  'headers' => ‪$this->headers,
162  ];
163  if ($this->timeout > 0) {
164  $options['timeout'] = ‪$this->timeout;
165  }
166  $url = $this->‪preprocessUrl($origUrl);
167  if (!empty($url)) {
168  if ($this->method === 'HEAD') {
169  $isValidUrl = $this->‪requestUrl($url, 'HEAD', $options);
170  }
171  if (!$isValidUrl) {
172  // HEAD was not allowed or threw an error, now trying GET
173  if ($this->range) {
174  $options['headers']['Range'] = 'bytes=' . ‪$this->range;
175  }
176  $isValidUrl = $this->‪requestUrl($url, 'GET', $options);
177  }
178  }
179  $this->urlReports[$origUrl] = $isValidUrl;
180  $this->urlErrorParams[$origUrl] = ‪$this->errorParams;
181  return $isValidUrl;
182  }
183 
192  protected function ‪requestUrl(string $url, string ‪$method, array $options): bool
193  {
194  $this->errorParams = [];
195  $isValidUrl = false;
196  try {
197  $response = $this->requestFactory->request($url, ‪$method, $options);
198  if ($response->getStatusCode() >= 300) {
199  $this->errorParams['errorType'] = $response->getStatusCode();
200  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
201  } else {
202  $isValidUrl = true;
203  }
204  /* Guzzle Exceptions:
205  * . \RuntimeException
206  * ├── SeekException (implements GuzzleException)
207  * └── TransferException (implements GuzzleException)
208  * └── RequestException
209  * ├── BadResponseException
210  * │ ├── ServerException
211  * │ └── ClientException
212  * ├── ConnectException
213  * └── TooManyRedirectsException
214  */
215  } catch (TooManyRedirectsException $e) {
216  $this->errorParams['errorType'] = 'tooManyRedirects';
217  $this->errorParams['exception'] = $e->getMessage();
218  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
219  } catch (ClientException | ServerException $e) {
220  // ClientException - A GuzzleHttp\Exception\ClientException is thrown for 400 level errors if the http_errors request option is set to true.
221  // ServerException - A GuzzleHttp\Exception\ServerException is thrown for 500 level errors if the http_errors request option is set to true.
222  if ($e->hasResponse()) {
223  $this->errorParams['errorType'] = ‪self::ERROR_TYPE_HTTP_STATUS_CODE;
224  $this->errorParams['errno'] = $e->getResponse()->getStatusCode();
225  } else {
226  $this->errorParams['errorType'] = ‪self::ERROR_TYPE_UNKNOWN;
227  }
228  $this->errorParams['exception'] = $e->getMessage();
229  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
230  } catch (RequestException | ConnectException $e) {
231  // RequestException - In the event of a networking error (connection timeout, DNS errors, etc.), a GuzzleHttp\Exception\RequestException is thrown.
232  // Catching this exception will catch any exception that can be thrown while transferring requests.
233  // ConnectException - A GuzzleHttp\Exception\ConnectException exception is thrown in the event of a networking error.
234  $this->errorParams['errorType'] = ‪self::ERROR_TYPE_LOWLEVEL_LIBCURL_ERRNO;
235  $this->errorParams['exception'] = $e->getMessage();
236  $handlerContext = $e->getHandlerContext();
237  if ($handlerContext['errno'] ?? 0) {
238  $this->errorParams['errno'] = (int)($handlerContext['errno']);
239  }
240  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
241  } catch (\Exception $e) {
242  // Generic catch for anything else that may go wrong
243  $this->errorParams['errorType'] = ‪self::ERROR_TYPE_GENERIC_EXCEPTION;
244  $this->errorParams['exception'] = $e->getMessage();
245  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
246  }
247  return $isValidUrl;
248  }
249 
257  public function ‪getErrorMessage(‪$errorParams)
258  {
259  $lang = $this->‪getLanguageService();
260  $errorType = ‪$errorParams['errorType'] ?? '';
261  if ($errorType === '') {
262  return $lang->getLL('list.report.noinformation');
263  }
264  switch ($errorType) {
266  switch (‪$errorParams['errno'] ?? 0) {
267  case 403:
268  $message = $lang->getLL('list.report.pageforbidden403');
269  break;
270  case 404:
271  $message = $lang->getLL('list.report.pagenotfound404');
272  break;
273  case 500:
274  $message = $lang->getLL('list.report.internalerror500');
275  break;
276  default:
277  // fall back to other error messages
278  $message = $lang->getLL('list.report.error.httpstatuscode.' . (‪$errorParams['errno'] ?? 0));
279  if (!$message) {
280  // fall back to generic error message
281  $message = sprintf($lang->getLL('list.report.externalerror'), $errorType);
282  }
283  }
284  break;
285 
287  $message = '';
288  if (‪$errorParams['errno'] ?? 0) {
289  // get localized error message
290  $message = $lang->getLL('list.report.error.libcurl.' . ‪$errorParams['errno']);
291  }
292  if (!$message) {
293  // fallback to generic error message and show exception
294  $message = $lang->getLL('list.report.networkexception');
295  if ((‪$errorParams['exception'] ?? '') != '') {
296  $message .= ' ('
297  . ‪$errorParams['exception']
298  . ')';
299  }
300  }
301  break;
302 
303  case 'loop':
304  $message = sprintf(
305  $lang->getLL('list.report.redirectloop'),
306  (‪$errorParams['exception'] ?? ''),
307  ''
308  );
309  break;
310 
311  case 'tooManyRedirects':
312  $message = $lang->getLL('list.report.tooManyRedirects');
313  break;
314 
315  case 'exception':
316  $message = sprintf($lang->getLL('list.report.httpexception'), (‪$errorParams['exception'] ?? ''));
317  break;
318 
319  default:
320  $message = sprintf($lang->getLL('list.report.otherhttpcode'), $errorType, (‪$errorParams['exception'] ?? ''));
321  }
322  return $message;
323  }
324 
333  public function ‪fetchType($value, $type, $key)
334  {
335  preg_match_all('/((?:http|https))(?::\\/\\/)(?:[^\\s<>]+)/i', $value['tokenValue'] ?? '', $urls, PREG_PATTERN_ORDER);
336  if (!empty($urls[0][0])) {
337  $type = 'external';
338  }
339  return $type;
340  }
341 
348  protected function ‪preprocessUrl(string $url): string
349  {
350  $url = html_entity_decode($url);
351  $parts = parse_url($url);
352  if ($parts['host'] ?? false) {
353  try {
354  $newDomain = (string)idn_to_ascii($parts['host']);
355  if (strcmp($parts['host'], $newDomain) !== 0) {
356  $parts['host'] = $newDomain;
357  $url = ‪HttpUtility::buildUrl($parts);
358  }
359  } catch (\Exception | \Throwable $e) {
360  // ignore error and proceed with link checking
361  }
362  }
363  return $url;
364  }
365 }
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\ERROR_TYPE_LOWLEVEL_LIBCURL_ERRNO
‪const ERROR_TYPE_LOWLEVEL_LIBCURL_ERRNO
Definition: ExternalLinktype.php:36
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$timeout
‪int $timeout
Definition: ExternalLinktype.php:84
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$range
‪string $range
Definition: ExternalLinktype.php:76
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\getErrorMessage
‪string getErrorMessage($errorParams)
Definition: ExternalLinktype.php:250
‪TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype\getLanguageService
‪LanguageService getLanguageService()
Definition: AbstractLinktype.php:93
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$urlErrorParams
‪array $urlErrorParams
Definition: ExternalLinktype.php:50
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\preprocessUrl
‪string preprocessUrl(string $url)
Definition: ExternalLinktype.php:341
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\ERROR_TYPE_UNKNOWN
‪const ERROR_TYPE_UNKNOWN
Definition: ExternalLinktype.php:38
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\fetchType
‪string fetchType($value, $type, $key)
Definition: ExternalLinktype.php:326
‪TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype
Definition: AbstractLinktype.php:24
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\__construct
‪__construct(RequestFactory $requestFactory=null)
Definition: ExternalLinktype.php:94
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$requestFactory
‪RequestFactory $requestFactory
Definition: ExternalLinktype.php:88
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\requestUrl
‪bool requestUrl(string $url, string $method, array $options)
Definition: ExternalLinktype.php:185
‪TYPO3\CMS\Linkvalidator\Linktype
Definition: AbstractLinktype.php:16
‪TYPO3\CMS\Core\Utility\HttpUtility\buildUrl
‪static string buildUrl(array $urlParts)
Definition: HttpUtility.php:149
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\ERROR_TYPE_GENERIC_EXCEPTION
‪const ERROR_TYPE_GENERIC_EXCEPTION
Definition: ExternalLinktype.php:37
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\setAdditionalConfig
‪setAdditionalConfig(array $config)
Definition: ExternalLinktype.php:99
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype
Definition: ExternalLinktype.php:32
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$urlReports
‪array $urlReports
Definition: ExternalLinktype.php:44
‪TYPO3\CMS\Core\Http\RequestFactory
Definition: RequestFactory.php:31
‪TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype\setErrorParams
‪setErrorParams($value)
Definition: AbstractLinktype.php:63
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$headers
‪array $headers
Definition: ExternalLinktype.php:56
‪$GLOBALS
‪$GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['adminpanel']['modules']
Definition: ext_localconf.php:25
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$errorParams
‪array $errorParams
Definition: ExternalLinktype.php:92
‪TYPO3\CMS\Core\Utility\HttpUtility
Definition: HttpUtility.php:22
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\checkLink
‪bool checkLink($origUrl, $softRefEntry, $reference)
Definition: ExternalLinktype.php:143
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:50
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\ERROR_TYPE_HTTP_STATUS_CODE
‪const ERROR_TYPE_HTTP_STATUS_CODE
Definition: ExternalLinktype.php:34
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$method
‪string $method
Definition: ExternalLinktype.php:68