‪TYPO3CMS  10.4
ExternalLinktype.php
Go to the documentation of this file.
1 <?php
2 
3 /*
4  * This file is part of the TYPO3 CMS project.
5  *
6  * It is free software; you can redistribute it and/or modify it under
7  * the terms of the GNU General Public License, either version 2
8  * of the License, or any later version.
9  *
10  * For the full copyright and license information, please read the
11  * LICENSE.txt file that was distributed with this source code.
12  *
13  * The TYPO3 project - inspiring people to share!
14  */
15 
17 
18 use GuzzleHttp\Cookie\CookieJar;
19 use GuzzleHttp\Exception\ClientException;
20 use GuzzleHttp\Exception\RequestException;
21 use GuzzleHttp\Exception\TooManyRedirectsException;
25 
30 {
36  protected ‪$urlReports = [];
37 
43  protected ‪$urlErrorParams = [];
44 
50  protected ‪$headers = [
51  'User-Agent' => 'TYPO3 linkvalidator',
52  'Accept' => '*/*',
53  'Accept-Language' => '*',
54  'Accept-Encoding' => '*',
55  ];
56 
63  protected ‪$method = 'HEAD';
64 
72  protected ‪$range = '0-4048';
73 
80  protected ‪$timeout = 0;
81 
85  protected ‪$requestFactory;
86 
90  protected ‪$errorParams = [];
91 
93  {
94  $this->requestFactory = ‪$requestFactory ?: GeneralUtility::makeInstance(RequestFactory::class);
95  }
96 
97  public function ‪setAdditionalConfig(array $config): void
98  {
99  if ($config['headers.'] ?? false) {
100  $this->headers = array_merge($this->headers, $config['headers.']);
101  }
102 
103  if ($config['httpAgentName'] ?? false) {
104  $this->headers['User-Agent'] = $config['httpAgentName'];
105  }
106 
107  if ($config['httpAgentUrl'] ?? false) {
108  $this->headers['User-Agent'] .= ' ' . $config['httpAgentUrl'];
109  }
110 
111  $email = '';
112  if ($config['httpAgentEmail'] ?? false) {
113  $email = $config['httpAgentEmail'];
114  } elseif (‪$GLOBALS['TYPO3_CONF_VARS']['MAIL']['defaultMailFromAddress'] ?? false) {
115  $email = ‪$GLOBALS['TYPO3_CONF_VARS']['MAIL']['defaultMailFromAddress'];
116  }
117  if ($email) {
118  $this->headers['User-Agent'] .= ';' . $email;
119  }
120 
121  if ($config['method'] ?? false) {
122  $this->method = $config['method'];
123  }
124  if ($config['range'] ?? false) {
125  $this->range = $config['range'];
126  }
127  if (isset($config['timeout'])) {
128  $this->timeout = (int)$config['timeout'];
129  }
130  }
131 
141  public function ‪checkLink($origUrl, $softRefEntry, $reference)
142  {
143  $isValidUrl = false;
144  // use URL from cache, if available
145  if (isset($this->urlReports[$origUrl])) {
146  $this->‪setErrorParams($this->urlErrorParams[$origUrl]);
147  return $this->urlReports[$origUrl];
148  }
149  $options = [
150  'cookies' => GeneralUtility::makeInstance(CookieJar::class),
151  'allow_redirects' => ['strict' => true],
152  'headers' => ‪$this->headers
153  ];
154  if ($this->timeout > 0) {
155  $options['timeout'] = ‪$this->timeout;
156  }
157  $url = $this->‪preprocessUrl($origUrl);
158  if (!empty($url)) {
159  if ($this->method === 'HEAD') {
160  $isValidUrl = $this->‪requestUrl($url, 'HEAD', $options);
161  }
162  if (!$isValidUrl) {
163  // HEAD was not allowed or threw an error, now trying GET
164  if ($this->range) {
165  $options['headers']['Range'] = 'bytes=' . ‪$this->range;
166  }
167  $isValidUrl = $this->‪requestUrl($url, 'GET', $options);
168  }
169  }
170  $this->urlReports[$origUrl] = $isValidUrl;
171  $this->urlErrorParams[$origUrl] = ‪$this->errorParams;
172  return $isValidUrl;
173  }
174 
183  protected function ‪requestUrl(string $url, string ‪$method, array $options): bool
184  {
185  $this->errorParams = [];
186  $isValidUrl = false;
187  try {
188  $response = $this->requestFactory->request($url, ‪$method, $options);
189  if ($response->getStatusCode() >= 300) {
190  $this->errorParams['errorType'] = $response->getStatusCode();
191  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
192  } else {
193  $isValidUrl = true;
194  }
195  } catch (TooManyRedirectsException $e) {
196  // redirect loop or too many redirects
197  // todo: change errorType to 'redirect' (breaking change)
198  $this->errorParams['errorType'] = 'loop';
199  $this->errorParams['exception'] = $e->getMessage();
200  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
201  } catch (ClientException $e) {
202  if ($e->hasResponse()) {
203  $this->errorParams['errorType'] = $e->getResponse()->getStatusCode();
204  } else {
205  $this->errorParams['errorType'] = 'unknown';
206  }
207  $this->errorParams['exception'] = $e->getMessage();
208  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
209  } catch (RequestException $e) {
210  $this->errorParams['errorType'] = 'network';
211  $this->errorParams['exception'] = $e->getMessage();
212  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
213  } catch (\Exception $e) {
214  // Generic catch for anything else that may go wrong
215  $this->errorParams['errorType'] = 'exception';
216  $this->errorParams['exception'] = $e->getMessage();
217  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
218  }
219  return $isValidUrl;
220  }
221 
228  public function ‪getErrorMessage(‪$errorParams)
229  {
230  $lang = $this->‪getLanguageService();
231  $errorType = ‪$errorParams['errorType'];
232  switch ($errorType) {
233  case 300:
234  $message = sprintf($lang->getLL('list.report.externalerror'), $errorType);
235  break;
236  case 403:
237  $message = $lang->getLL('list.report.pageforbidden403');
238  break;
239  case 404:
240  $message = $lang->getLL('list.report.pagenotfound404');
241  break;
242  case 500:
243  $message = $lang->getLL('list.report.internalerror500');
244  break;
245  case 'loop':
246  $message = sprintf(
247  $lang->getLL('list.report.redirectloop'),
248  ‪$errorParams['exception'],
249  ''
250  );
251  break;
252  case 'exception':
253  $message = sprintf($lang->getLL('list.report.httpexception'), ‪$errorParams['exception']);
254  break;
255  case 'network':
256  $message = $lang->getLL('list.report.networkexception');
257  if (‪$errorParams['exception']) {
258  $message .= ':' . ‪$errorParams['exception'];
259  }
260  break;
261  default:
262  $message = sprintf($lang->getLL('list.report.otherhttpcode'), $errorType, ‪$errorParams['exception']);
263  }
264 
265  return $message;
266  }
267 
276  public function ‪fetchType($value, $type, $key)
277  {
278  preg_match_all('/((?:http|https))(?::\\/\\/)(?:[^\\s<>]+)/i', $value['tokenValue'], $urls, PREG_PATTERN_ORDER);
279  if (!empty($urls[0][0])) {
280  $type = 'external';
281  }
282  return $type;
283  }
284 
291  protected function ‪preprocessUrl(string $url): string
292  {
293  $url = html_entity_decode($url);
294  $parts = parse_url($url);
295  if ($parts['host'] ?? false) {
296  try {
297  $newDomain = (string)‪HttpUtility::idn_to_ascii($parts['host']);
298  if (strcmp($parts['host'], $newDomain) !== 0) {
299  $parts['host'] = $newDomain;
300  $url = ‪HttpUtility::buildUrl($parts);
301  }
302  } catch (\Exception | \Throwable $e) {
303  // ignore error and proceed with link checking
304  }
305  }
306  return $url;
307  }
308 }
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$timeout
‪int $timeout
Definition: ExternalLinktype.php:74
‪TYPO3\CMS\Core\Utility\HttpUtility\idn_to_ascii
‪static string bool idn_to_ascii(string $domain)
Definition: HttpUtility.php:195
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$range
‪string $range
Definition: ExternalLinktype.php:67
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\getErrorMessage
‪string getErrorMessage($errorParams)
Definition: ExternalLinktype.php:220
‪TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype\getLanguageService
‪LanguageService getLanguageService()
Definition: AbstractLinktype.php:92
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$urlErrorParams
‪array $urlErrorParams
Definition: ExternalLinktype.php:41
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\preprocessUrl
‪string preprocessUrl(string $url)
Definition: ExternalLinktype.php:283
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\fetchType
‪string fetchType($value, $type, $key)
Definition: ExternalLinktype.php:268
‪TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype
Definition: AbstractLinktype.php:24
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\__construct
‪__construct(RequestFactory $requestFactory=null)
Definition: ExternalLinktype.php:84
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$requestFactory
‪RequestFactory $requestFactory
Definition: ExternalLinktype.php:78
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\requestUrl
‪bool requestUrl(string $url, string $method, array $options)
Definition: ExternalLinktype.php:175
‪TYPO3\CMS\Linkvalidator\Linktype
Definition: AbstractLinktype.php:16
‪TYPO3\CMS\Core\Utility\HttpUtility\buildUrl
‪static string buildUrl(array $urlParts)
Definition: HttpUtility.php:141
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\setAdditionalConfig
‪setAdditionalConfig(array $config)
Definition: ExternalLinktype.php:89
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype
Definition: ExternalLinktype.php:30
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$urlReports
‪array $urlReports
Definition: ExternalLinktype.php:35
‪TYPO3\CMS\Core\Http\RequestFactory
Definition: RequestFactory.php:31
‪TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype\setErrorParams
‪setErrorParams($value)
Definition: AbstractLinktype.php:63
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$headers
‪array $headers
Definition: ExternalLinktype.php:47
‪$GLOBALS
‪$GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['adminpanel']['modules']
Definition: ext_localconf.php:5
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$errorParams
‪array $errorParams
Definition: ExternalLinktype.php:82
‪TYPO3\CMS\Core\Utility\HttpUtility
Definition: HttpUtility.php:24
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\checkLink
‪bool checkLink($origUrl, $softRefEntry, $reference)
Definition: ExternalLinktype.php:133
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:46
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$method
‪string $method
Definition: ExternalLinktype.php:59