‪TYPO3CMS  9.5
ExternalLinktype.php
Go to the documentation of this file.
1 <?php
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
17 use GuzzleHttp\Cookie\CookieJar;
21 
26 {
32  protected ‪$urlReports = [];
33 
39  protected ‪$urlErrorParams = [];
40 
46  protected ‪$headers = [
47  'User-Agent' => 'TYPO3 linkvalidator',
48  'Accept' => '*/*',
49  'Accept-Language' => '*',
50  'Accept-Encoding' => '*',
51  ];
52 
56  protected ‪$requestFactory;
57 
61  protected ‪$errorParams = [];
62 
64  {
65  $this->requestFactory = ‪$requestFactory ?: GeneralUtility::makeInstance(RequestFactory::class);
66  }
67 
77  public function ‪checkLink($origUrl, $softRefEntry, $reference)
78  {
79  $isValidUrl = false;
80  // use URL from cache, if available
81  if (isset($this->urlReports[$origUrl])) {
82  $this->‪setErrorParams($this->urlErrorParams[$origUrl]);
83  return $this->urlReports[$origUrl];
84  }
85  $options = [
86  'cookies' => GeneralUtility::makeInstance(CookieJar::class),
87  'allow_redirects' => ['strict' => true],
88  'headers' => ‪$this->headers
89  ];
90  $url = $this->‪preprocessUrl($origUrl);
91  if (!empty($url)) {
92  $isValidUrl = $this->‪requestUrl($url, 'HEAD', $options);
93  if (!$isValidUrl) {
94  // HEAD was not allowed or threw an error, now trying GET
95  $options['headers']['Range'] = 'bytes=0-4048';
96  $isValidUrl = $this->‪requestUrl($url, 'GET', $options);
97  }
98  }
99  $this->urlReports[$origUrl] = $isValidUrl;
100  $this->urlErrorParams[$origUrl] = ‪$this->errorParams;
101  return $isValidUrl;
102  }
103 
112  protected function ‪requestUrl(string $url, string $method, array $options): bool
113  {
114  $this->errorParams = [];
115  $isValidUrl = false;
116  try {
117  $response = $this->requestFactory->request($url, $method, $options);
118  if ($response->getStatusCode() >= 300) {
119  $this->errorParams['errorType'] = $response->getStatusCode();
120  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
121  } else {
122  $isValidUrl = true;
123  }
124  } catch (\GuzzleHttp\Exception\TooManyRedirectsException $e) {
125  // redirect loop or too many redirects
126  // todo: change errorType to 'redirect' (breaking change)
127  $this->errorParams['errorType'] = 'loop';
128  $this->errorParams['exception'] = $e->getMessage();
129  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
130  } catch (\GuzzleHttp\Exception\ClientException $e) {
131  if ($e->hasResponse()) {
132  $this->errorParams['errorType'] = $e->getResponse()->getStatusCode();
133  } else {
134  $this->errorParams['errorType'] = 'unknown';
135  }
136  $this->errorParams['exception'] = $e->getMessage();
137  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
138  } catch (\GuzzleHttp\Exception\RequestException $e) {
139  $this->errorParams['errorType'] = 'network';
140  $this->errorParams['exception'] = $e->getMessage();
141  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
142  } catch (\Exception $e) {
143  // Generic catch for anything else that may go wrong
144  $this->errorParams['errorType'] = 'exception';
145  $this->errorParams['exception'] = $e->getMessage();
146  $this->errorParams['message'] = $this->‪getErrorMessage($this->errorParams);
147  }
148  return $isValidUrl;
149  }
150 
157  public function ‪getErrorMessage(‪$errorParams)
158  {
159  $lang = $this->‪getLanguageService();
160  $errorType = ‪$errorParams['errorType'];
161  switch ($errorType) {
162  case 300:
163  $message = sprintf($lang->getLL('list.report.externalerror'), $errorType);
164  break;
165  case 403:
166  $message = $lang->getLL('list.report.pageforbidden403');
167  break;
168  case 404:
169  $message = $lang->getLL('list.report.pagenotfound404');
170  break;
171  case 500:
172  $message = $lang->getLL('list.report.internalerror500');
173  break;
174  case 'loop':
175  $message = sprintf(
176  $lang->getLL('list.report.redirectloop'),
177  ‪$errorParams['exception'],
178  ''
179  );
180  break;
181  case 'exception':
182  $message = sprintf($lang->getLL('list.report.httpexception'), ‪$errorParams['exception']);
183  break;
184  case 'network':
185  $message = $lang->getLL('list.report.networkexception');
186  if (‪$errorParams['exception']) {
187  $message .= ':' . ‪$errorParams['exception'];
188  }
189  break;
190  default:
191  $message = sprintf($lang->getLL('list.report.otherhttpcode'), $errorType, ‪$errorParams['exception']);
192  }
193 
194  return $message;
195  }
196 
205  public function ‪fetchType($value, $type, $key)
206  {
207  preg_match_all('/((?:http|https))(?::\\/\\/)(?:[^\\s<>]+)/i', $value['tokenValue'], $urls, PREG_PATTERN_ORDER);
208  if (!empty($urls[0][0])) {
209  $type = 'external';
210  }
211  return $type;
212  }
213 
220  protected function ‪preprocessUrl(string $url): string
221  {
222  $url = html_entity_decode($url);
223  $parts = parse_url($url);
224  $host = (string)($parts['host'] ?? '');
225  if ($host !== '') {
226  try {
227  $newDomain = (string)‪HttpUtility::idn_to_ascii($host);
228  if (strcmp($host, $newDomain) !== 0) {
229  $parts['host'] = $newDomain;
230  $url = ‪HttpUtility::buildUrl($parts);
231  }
232  } catch (\Exception | \Throwable $e) {
233  // ignore error and proceed with link checking
234  }
235  }
236  return $url;
237  }
238 }
‪TYPO3\CMS\Core\Utility\HttpUtility\idn_to_ascii
‪static string bool idn_to_ascii(string $domain)
Definition: HttpUtility.php:193
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\getErrorMessage
‪string getErrorMessage($errorParams)
Definition: ExternalLinktype.php:152
‪TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype\getLanguageService
‪LanguageService getLanguageService()
Definition: AbstractLinktype.php:79
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$urlErrorParams
‪array $urlErrorParams
Definition: ExternalLinktype.php:37
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\preprocessUrl
‪string preprocessUrl(string $url)
Definition: ExternalLinktype.php:215
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\fetchType
‪string fetchType($value, $type, $key)
Definition: ExternalLinktype.php:200
‪TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype
Definition: AbstractLinktype.php:22
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\__construct
‪__construct(RequestFactory $requestFactory=null)
Definition: ExternalLinktype.php:58
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$requestFactory
‪RequestFactory $requestFactory
Definition: ExternalLinktype.php:52
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\requestUrl
‪bool requestUrl(string $url, string $method, array $options)
Definition: ExternalLinktype.php:107
‪TYPO3\CMS\Linkvalidator\Linktype
Definition: AbstractLinktype.php:2
‪TYPO3\CMS\Core\Utility\HttpUtility\buildUrl
‪static string buildUrl(array $urlParts)
Definition: HttpUtility.php:138
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype
Definition: ExternalLinktype.php:26
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$urlReports
‪array $urlReports
Definition: ExternalLinktype.php:31
‪TYPO3\CMS\Core\Http\RequestFactory
Definition: RequestFactory.php:27
‪TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype\setErrorParams
‪setErrorParams($value)
Definition: AbstractLinktype.php:50
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$headers
‪array $headers
Definition: ExternalLinktype.php:43
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\$errorParams
‪array $errorParams
Definition: ExternalLinktype.php:56
‪TYPO3\CMS\Core\Utility\HttpUtility
Definition: HttpUtility.php:21
‪TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype\checkLink
‪bool checkLink($origUrl, $softRefEntry, $reference)
Definition: ExternalLinktype.php:72
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:45