‪TYPO3CMS  ‪main
Indexer.php
Go to the documentation of this file.
1 <?php
2 
3 /*
4  * This file is part of the TYPO3 CMS project.
5  *
6  * It is free software; you can redistribute it and/or modify it under
7  * the terms of the GNU General Public License, either version 2
8  * of the License, or any later version.
9  *
10  * For the full copyright and license information, please read the
11  * LICENSE.txt file that was distributed with this source code.
12  *
13  * The TYPO3 project - inspiring people to share!
14  */
15 
17 
18 use Psr\Log\LoggerAwareInterface;
19 use Psr\Log\LoggerAwareTrait;
28 use TYPO3\CMS\Core\Type\File\ImageInfo;
30 
34 class ‪Indexer implements LoggerAwareInterface
35 {
36  use LoggerAwareTrait;
37 
41  protected ‪$filesToUpdate = [];
42 
46  protected ‪$identifiedFileUids = [];
47 
51  protected ‪$storage;
52 
56  protected ‪$extractorService;
57 
59  {
60  $this->storage = ‪$storage;
61  }
62 
70  {
71  if (!is_string(‪$identifier) || ‪$identifier === '') {
72  throw new \InvalidArgumentException(
73  'Invalid file identifier given. It must be of type string and not empty. "' . gettype(‪$identifier) . '" given.',
74  1401732565
75  );
76  }
77 
78  $fileProperties = $this->‪gatherFileInformationArray(‪$identifier);
79  $fileIndexRepository = $this->‪getFileIndexRepository();
80 
81  ‪$record = $fileIndexRepository->addRaw($fileProperties);
82  $fileObject = $this->‪getResourceFactory()->‪getFileObject(‪$record['uid'], ‪$record);
83  $fileIndexRepository->updateIndexingTime($fileObject->getUid());
84 
85  $metaData = $this->‪extractRequiredMetaData($fileObject);
86  if ($this->storage->autoExtractMetadataEnabled()) {
87  $metaData = array_merge($metaData, $this->‪getExtractorService()->‪extractMetaData($fileObject));
88  }
89  $fileObject->getMetaData()->add($metaData)->save();
90 
91  return $fileObject;
92  }
93 
97  public function ‪updateIndexEntry(‪File $fileObject): ‪File
98  {
99  $updatedInformation = $this->‪gatherFileInformationArray($fileObject->‪getIdentifier());
100  $fileObject->‪updateProperties($updatedInformation);
101 
102  $fileIndexRepository = $this->‪getFileIndexRepository();
103  $fileIndexRepository->update($fileObject);
104  $fileIndexRepository->updateIndexingTime($fileObject->‪getUid());
105 
106  $metaData = $this->‪extractRequiredMetaData($fileObject);
107  if ($this->storage->autoExtractMetadataEnabled()) {
108  $metaData = array_merge($metaData, $this->‪getExtractorService()->‪extractMetaData($fileObject));
109  }
110  $fileObject->‪getMetaData()->add($metaData)->save();
111 
112  return $fileObject;
113  }
114 
115  public function ‪processChangesInStorages()
116  {
117  // get all file-identifiers from the storage
118  $availableFiles = $this->storage->getFileIdentifiersInFolder($this->storage->getRootLevelFolder(false)->getIdentifier(), true, true);
119  $this->‪detectChangedFilesInStorage($availableFiles);
121 
122  $this->‪detectMissingFiles();
123  }
124 
128  public function ‪runMetaDataExtraction($maximumFileCount = -1)
129  {
130  $fileIndexRecords = $this->‪getFileIndexRepository()->‪findInStorageWithIndexOutstanding($this->storage, $maximumFileCount);
131  foreach ($fileIndexRecords as $indexRecord) {
132  $fileObject = $this->‪getResourceFactory()->‪getFileObject($indexRecord['uid'], $indexRecord);
133  // Check for existence of file before extraction
134  if ($fileObject->exists()) {
135  try {
136  $this->‪extractMetaData($fileObject);
138  // We skip files that are not accessible
139  } catch (‪IllegalFileExtensionException $e) {
140  // We skip files that have an extension that we don't allow
141  }
142  } else {
143  // Mark file as missing and continue with next record
144  $this->‪getFileIndexRepository()->‪markFileAsMissing($indexRecord['uid']);
145  }
146  }
147  }
148 
152  public function ‪extractMetaData(‪File $fileObject)
153  {
154  $metaData = array_merge([
155  $fileObject->‪getMetaData()->get(),
156  ], $this->getExtractorService()->extractMetaData($fileObject));
157 
158  $fileObject->‪getMetaData()->add($metaData)->save();
159 
161  }
162 
167  protected function ‪detectMissingFiles()
168  {
169  $allCurrentFiles = $this->‪getFileIndexRepository()->‪findInStorageAndNotInUidList(
170  $this->storage,
171  []
172  );
173 
174  foreach ($allCurrentFiles as ‪$record) {
175  // Check if the record retrieved from the database was associated
176  // with an existing file.
177  // If yes: All is good, file is in index and in database.
178  // If no: Database record may need to be marked as removed (extra check!)
179  if (in_array(‪$record['uid'], $this->identifiedFileUids, true)) {
180  continue;
181  }
182 
183  if (!$this->storage->hasFile(‪$record['identifier'])) {
184  $this->‪getFileIndexRepository()->‪markFileAsMissing($record['uid']);
185  }
186  }
187  }
188 
194  protected function ‪isFileTypeSupportedByExtractor(File $file, ExtractorInterface $extractor)
195  {
196  $isSupported = true;
197  $fileTypeRestrictions = $extractor->getFileTypeRestrictions();
198  if (!empty($fileTypeRestrictions) && !in_array($file->getType(), $fileTypeRestrictions)) {
199  $isSupported = false;
200  }
201  return $isSupported;
202  }
203 
207  protected function ‪detectChangedFilesInStorage(array $fileIdentifierArray)
208  {
209  foreach ($fileIdentifierArray as $fileIdentifier) {
210  // skip processed files
211  if ($this->storage->isWithinProcessingFolder($fileIdentifier)) {
212  continue;
213  }
214  // Get the modification time for file-identifier from the storage
215  $modificationTime = $this->storage->getFileInfoByIdentifier($fileIdentifier, ['mtime']);
216  // Look if the the modification time in FS is higher than the one in database (key needed on timestamps)
217  $indexRecord = $this->‪getFileIndexRepository()->‪findOneByStorageAndIdentifier($this->storage, $fileIdentifier);
218 
219  if ($indexRecord !== false) {
220  $this->identifiedFileUids[] = $indexRecord['uid'];
221 
222  if ((int)$indexRecord['modification_date'] !== $modificationTime['mtime'] || $indexRecord['missing']) {
223  $this->filesToUpdate[$fileIdentifier] = $indexRecord;
224  }
225  } else {
226  $this->filesToUpdate[$fileIdentifier] = null;
227  }
228  }
229  }
230 
235  protected function ‪processChangedAndNewFiles()
236  {
237  foreach ($this->filesToUpdate as ‪$identifier => $data) {
238  try {
239  if ($data === null) {
240  // search for files with same content hash in indexed storage
241  $fileHash = $this->storage->hashFileByIdentifier(‪$identifier, 'sha1');
242  $files = $this->‪getFileIndexRepository()->‪findByContentHash($fileHash);
243  $fileObject = null;
244  if (!empty($files)) {
245  foreach ($files as $fileIndexEntry) {
246  // check if file is missing then we assume it's moved/renamed
247  if (!$this->storage->hasFile($fileIndexEntry['identifier'])) {
248  $fileObject = $this->‪getResourceFactory()->‪getFileObject(
249  $fileIndexEntry['uid'],
250  $fileIndexEntry
251  );
252  $fileObject->‪updateProperties(
253  [
254  'identifier' => ‪$identifier,
255  ]
256  );
257  $this->‪updateIndexEntry($fileObject);
258  $this->identifiedFileUids[] = $fileObject->getUid();
259  break;
260  }
261  }
262  }
263  // create new index when no missing file with same content hash is found
264  if ($fileObject === null) {
265  $fileObject = $this->‪createIndexEntry(‪$identifier);
266  $this->identifiedFileUids[] = $fileObject->getUid();
267  }
268  } else {
269  // update existing file
270  $fileObject = $this->‪getResourceFactory()->‪getFileObject($data['uid'], $data);
271  $this->‪updateIndexEntry($fileObject);
272  }
273  } catch (InvalidHashException $e) {
274  $this->logger->error('Unable to create hash for file: {identifier}', ['identifier' => ‪$identifier]);
275  } catch (\Exception $e) {
276  $this->logger->error('Unable to index / update file with identifier {identifier}', [
277  'identifier' => ‪$identifier,
278  'exception' => $e,
279  ]);
280  }
281  }
282  }
283 
288  protected function ‪extractRequiredMetaData(File $fileObject): array
289  {
290  $metaData = [];
291 
292  // since the core desperately needs image sizes in metadata table do this manually
293  // prevent doing this for remote storages, remote storages must provide the data with extractors
294  if ($fileObject->isImage() && $this->storage->getDriverType() === 'Local') {
295  $rawFileLocation = $fileObject->getForLocalProcessing(false);
296  $imageInfo = GeneralUtility::makeInstance(ImageInfo::class, $rawFileLocation);
297  $metaData = [
298  'width' => $imageInfo->getWidth(),
299  'height' => $imageInfo->getHeight(),
300  ];
301  }
302 
303  return $metaData;
304  }
305 
306  /****************************
307  * UTILITY
308  ****************************/
315  protected function ‪gatherFileInformationArray(‪$identifier): array
316  {
317  $fileInfo = $this->storage->getFileInfoByIdentifier(‪$identifier);
318  $fileInfo = $this->‪transformFromDriverFileInfoArrayToFileObjectFormat($fileInfo);
319  $fileInfo['type'] = $this->‪getFileType($fileInfo['mime_type'])->value;
320  $fileInfo['sha1'] = $this->storage->hashFileByIdentifier(‪$identifier, 'sha1');
321  $fileInfo['missing'] = 0;
322 
323  return $fileInfo;
324  }
325 
329  protected function ‪getFileType(string $mimeType): ‪FileType
330  {
331  return ‪FileType::tryFromMimeType($mimeType);
332  }
333 
342  protected function ‪transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo)
343  {
344  $mappingInfo = [
345  // 'driverKey' => 'fileProperty' Key is from the driver, value is for the property in the file
346  'size' => 'size',
347  'atime' => null,
348  'mtime' => 'modification_date',
349  'ctime' => 'creation_date',
350  'mimetype' => 'mime_type',
351  ];
352  $mappedFileInfo = [];
353  foreach ($fileInfo as $key => $value) {
354  if (array_key_exists($key, $mappingInfo)) {
355  if ($mappingInfo[$key] !== null) {
356  $mappedFileInfo[$mappingInfo[$key]] = $value;
357  }
358  } else {
359  $mappedFileInfo[$key] = $value;
360  }
361  }
362  return $mappedFileInfo;
363  }
364 
370  protected function ‪getFileIndexRepository()
371  {
372  return GeneralUtility::makeInstance(FileIndexRepository::class);
373  }
374 
380  protected function ‪getMetaDataRepository()
381  {
382  return GeneralUtility::makeInstance(MetaDataRepository::class);
383  }
384 
390  protected function ‪getResourceFactory()
391  {
392  return GeneralUtility::makeInstance(ResourceFactory::class);
393  }
394 
395  protected function ‪getExtractorService(): ‪ExtractorService
396  {
397  if ($this->extractorService === null) {
398  $this->extractorService = GeneralUtility::makeInstance(ExtractorService::class);
399  }
401  }
402 }
‪TYPO3\CMS\Core\Resource\Index\MetaDataRepository
Definition: MetaDataRepository.php:40
‪TYPO3\CMS\Core\Resource\Index\ExtractorInterface\getFileTypeRestrictions
‪array getFileTypeRestrictions()
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\findInStorageAndNotInUidList
‪array findInStorageAndNotInUidList(ResourceStorage $storage, array $uidList)
Definition: FileIndexRepository.php:422
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository
Definition: FileIndexRepository.php:44
‪TYPO3\CMS\Core\Resource\AbstractFile\getType
‪int getType()
Definition: AbstractFile.php:281
‪TYPO3\CMS\Core\Resource\Index\Indexer\$filesToUpdate
‪array $filesToUpdate
Definition: Indexer.php:40
‪TYPO3\CMS\Core\Resource\Index\Indexer\getExtractorService
‪getExtractorService()
Definition: Indexer.php:391
‪TYPO3\CMS\Core\Resource\AbstractFile\getForLocalProcessing
‪non empty string getForLocalProcessing(bool $writable=true)
Definition: AbstractFile.php:576
‪TYPO3\CMS\Core\Resource\Index\Indexer\transformFromDriverFileInfoArrayToFileObjectFormat
‪array transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo)
Definition: Indexer.php:338
‪TYPO3\CMS\Core\Resource\Index\Indexer
Definition: Indexer.php:35
‪TYPO3\CMS\Core\Resource\Index\Indexer\getMetaDataRepository
‪MetaDataRepository getMetaDataRepository()
Definition: Indexer.php:376
‪TYPO3\CMS\Core\Resource\Index\ExtractorInterface
Definition: ExtractorInterface.php:25
‪TYPO3\CMS\Core\Resource\Index\Indexer\isFileTypeSupportedByExtractor
‪bool isFileTypeSupportedByExtractor(File $file, ExtractorInterface $extractor)
Definition: Indexer.php:190
‪TYPO3\CMS\Core\Resource\Index\Indexer\detectChangedFilesInStorage
‪detectChangedFilesInStorage(array $fileIdentifierArray)
Definition: Indexer.php:203
‪TYPO3\CMS\Core\Resource\AbstractFile\isImage
‪bool isImage()
Definition: AbstractFile.php:301
‪TYPO3\CMS\Core\Resource\Index\Indexer\updateIndexEntry
‪updateIndexEntry(File $fileObject)
Definition: Indexer.php:93
‪TYPO3\CMS\Core\Resource\Index\Indexer\detectMissingFiles
‪detectMissingFiles()
Definition: Indexer.php:163
‪TYPO3\CMS\Core\Resource\Index\Indexer\$storage
‪ResourceStorage $storage
Definition: Indexer.php:48
‪TYPO3\CMS\Core\Resource\Exception\IllegalFileExtensionException
Definition: IllegalFileExtensionException.php:23
‪TYPO3\CMS\Core\Resource\Exception\InsufficientFileAccessPermissionsException
Definition: InsufficientFileAccessPermissionsException.php:23
‪TYPO3\CMS\Core\Resource\tryFromMimeType
‪@ tryFromMimeType
Definition: FileType.php:57
‪TYPO3\CMS\Core\Resource\Index\Indexer\getFileIndexRepository
‪FileIndexRepository getFileIndexRepository()
Definition: Indexer.php:366
‪TYPO3\CMS\Core\Resource\Index\Indexer\extractRequiredMetaData
‪extractRequiredMetaData(File $fileObject)
Definition: Indexer.php:284
‪TYPO3\CMS\Core\Resource\Index\Indexer\gatherFileInformationArray
‪gatherFileInformationArray($identifier)
Definition: Indexer.php:311
‪TYPO3\CMS\Core\Resource\Index\Indexer\__construct
‪__construct(ResourceStorage $storage)
Definition: Indexer.php:54
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\updateIndexingTime
‪updateIndexingTime($fileUid)
Definition: FileIndexRepository.php:455
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\findByContentHash
‪mixed findByContentHash($hash)
Definition: FileIndexRepository.php:148
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\findInStorageWithIndexOutstanding
‪array findInStorageWithIndexOutstanding(ResourceStorage $storage, $limit=-1)
Definition: FileIndexRepository.php:394
‪TYPO3\CMS\Core\Resource\ResourceFactory
Definition: ResourceFactory.php:42
‪TYPO3\CMS\Core\Resource\File
Definition: File.php:26
‪TYPO3\CMS\Webhooks\Message\$record
‪identifier readonly int readonly array $record
Definition: PageModificationMessage.php:36
‪TYPO3\CMS\Core\Resource\Index\Indexer\extractMetaData
‪extractMetaData(File $fileObject)
Definition: Indexer.php:148
‪TYPO3\CMS\Core\Resource\Service\ExtractorService
Definition: ExtractorService.php:29
‪TYPO3\CMS\Core\Resource\Index\Indexer\processChangesInStorages
‪processChangesInStorages()
Definition: Indexer.php:111
‪TYPO3\CMS\Core\Resource\Index\Indexer\createIndexEntry
‪createIndexEntry($identifier)
Definition: Indexer.php:65
‪TYPO3\CMS\Core\Resource\Index
Definition: ExtractorInterface.php:16
‪TYPO3\CMS\Core\Resource\AbstractFile\getUid
‪return MathUtility::canBeInterpretedAsInteger($size) ?(int) $size int getUid()
Definition: AbstractFile.php:195
‪TYPO3\CMS\Core\Resource\Exception
Definition: Exception.php:21
‪TYPO3\CMS\Core\Resource\Index\Indexer\processChangedAndNewFiles
‪processChangedAndNewFiles()
Definition: Indexer.php:231
‪TYPO3\CMS\Core\Resource\File\getMetaData
‪getMetaData()
Definition: File.php:322
‪TYPO3\CMS\Core\Resource\ResourceStorage
Definition: ResourceStorage.php:129
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\markFileAsMissing
‪markFileAsMissing($fileUid)
Definition: FileIndexRepository.php:474
‪TYPO3\CMS\Core\Resource\File\updateProperties
‪updateProperties(array $properties)
Definition: File.php:155
‪TYPO3\CMS\Core\Resource\ResourceFactory\getFileObject
‪File getFileObject($uid, array $fileData=[])
Definition: ResourceFactory.php:193
‪TYPO3\CMS\Core\Resource\Index\Indexer\$identifiedFileUids
‪int[] $identifiedFileUids
Definition: Indexer.php:44
‪TYPO3\CMS\Core\Resource\FileType
‪FileType
Definition: FileType.php:21
‪TYPO3\CMS\Core\Resource\Index\Indexer\$extractorService
‪ExtractorService $extractorService
Definition: Indexer.php:52
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:52
‪TYPO3\CMS\Core\Resource\AbstractFile\getIdentifier
‪getIdentifier()
Definition: AbstractFile.php:144
‪TYPO3\CMS\Core\Resource\Exception\InvalidHashException
Definition: InvalidHashException.php:26
‪TYPO3\CMS\Core\Resource\Index\Indexer\runMetaDataExtraction
‪runMetaDataExtraction($maximumFileCount=-1)
Definition: Indexer.php:124
‪TYPO3\CMS\Webhooks\Message\$identifier
‪identifier readonly string $identifier
Definition: FileAddedMessage.php:37
‪TYPO3\CMS\Core\Resource\Index\Indexer\getFileType
‪getFileType(string $mimeType)
Definition: Indexer.php:325
‪TYPO3\CMS\Core\Resource\Index\Indexer\getResourceFactory
‪ResourceFactory getResourceFactory()
Definition: Indexer.php:386
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\findOneByStorageAndIdentifier
‪array bool findOneByStorageAndIdentifier(ResourceStorage $storage, $identifier)
Definition: FileIndexRepository.php:124