‪TYPO3CMS  ‪main
Indexer.php
Go to the documentation of this file.
1 <?php
2 
3 /*
4  * This file is part of the TYPO3 CMS project.
5  *
6  * It is free software; you can redistribute it and/or modify it under
7  * the terms of the GNU General Public License, either version 2
8  * of the License, or any later version.
9  *
10  * For the full copyright and license information, please read the
11  * LICENSE.txt file that was distributed with this source code.
12  *
13  * The TYPO3 project - inspiring people to share!
14  */
15 
17 
18 use Psr\Log\LoggerAwareInterface;
19 use Psr\Log\LoggerAwareTrait;
29 
33 class ‪Indexer implements LoggerAwareInterface
34 {
35  use LoggerAwareTrait;
36 
40  protected ‪$filesToUpdate = [];
41 
45  protected ‪$identifiedFileUids = [];
46 
50  protected ‪$storage;
51 
55  protected ‪$extractorService;
56 
58  {
59  $this->storage = ‪$storage;
60  }
61 
69  {
70  if (!is_string(‪$identifier) || ‪$identifier === '') {
71  throw new \InvalidArgumentException(
72  'Invalid file identifier given. It must be of type string and not empty. "' . gettype(‪$identifier) . '" given.',
73  1401732565
74  );
75  }
76 
77  $fileProperties = $this->‪gatherFileInformationArray(‪$identifier);
78  $fileIndexRepository = $this->‪getFileIndexRepository();
79 
80  ‪$record = $fileIndexRepository->addRaw($fileProperties);
81  $fileObject = $this->‪getResourceFactory()->‪getFileObject(‪$record['uid'], ‪$record);
82  $fileIndexRepository->updateIndexingTime($fileObject->getUid());
83 
84  $metaData = $this->‪extractRequiredMetaData($fileObject);
85  if ($this->storage->autoExtractMetadataEnabled()) {
86  $metaData = array_merge($metaData, $this->‪getExtractorService()->‪extractMetaData($fileObject));
87  }
88  $fileObject->getMetaData()->add($metaData)->save();
89 
90  return $fileObject;
91  }
92 
96  public function ‪updateIndexEntry(‪File $fileObject): ‪File
97  {
98  $updatedInformation = $this->‪gatherFileInformationArray($fileObject->‪getIdentifier());
99  $fileObject->‪updateProperties($updatedInformation);
100 
101  $fileIndexRepository = $this->‪getFileIndexRepository();
102  $fileIndexRepository->update($fileObject);
103  $fileIndexRepository->updateIndexingTime($fileObject->‪getUid());
104 
105  $metaData = $this->‪extractRequiredMetaData($fileObject);
106  if ($this->storage->autoExtractMetadataEnabled()) {
107  $metaData = array_merge($metaData, $this->‪getExtractorService()->‪extractMetaData($fileObject));
108  }
109  $fileObject->‪getMetaData()->add($metaData)->save();
110 
111  return $fileObject;
112  }
113 
114  public function ‪processChangesInStorages()
115  {
116  // get all file-identifiers from the storage
117  $availableFiles = $this->storage->getFileIdentifiersInFolder($this->storage->getRootLevelFolder(false)->getIdentifier(), true, true);
118  $this->‪detectChangedFilesInStorage($availableFiles);
120 
121  $this->‪detectMissingFiles();
122  }
123 
127  public function ‪runMetaDataExtraction($maximumFileCount = -1)
128  {
129  $fileIndexRecords = $this->‪getFileIndexRepository()->‪findInStorageWithIndexOutstanding($this->storage, $maximumFileCount);
130  foreach ($fileIndexRecords as $indexRecord) {
131  $fileObject = $this->‪getResourceFactory()->‪getFileObject($indexRecord['uid'], $indexRecord);
132  // Check for existence of file before extraction
133  if ($fileObject->exists()) {
134  try {
135  $this->‪extractMetaData($fileObject);
137  // We skip files that are not accessible
138  } catch (‪IllegalFileExtensionException $e) {
139  // We skip files that have an extension that we don't allow
140  }
141  } else {
142  // Mark file as missing and continue with next record
143  $this->‪getFileIndexRepository()->‪markFileAsMissing($indexRecord['uid']);
144  }
145  }
146  }
147 
151  public function ‪extractMetaData(‪File $fileObject)
152  {
153  $metaData = array_merge([
154  $fileObject->‪getMetaData()->get(),
155  ], $this->getExtractorService()->extractMetaData($fileObject));
156 
157  $fileObject->‪getMetaData()->add($metaData)->save();
158 
160  }
161 
166  protected function ‪detectMissingFiles()
167  {
168  $indexedNotExistentFiles = $this->‪getFileIndexRepository()->‪findInStorageAndNotInUidList(
169  $this->storage,
170  $this->identifiedFileUids
171  );
172 
173  foreach ($indexedNotExistentFiles as ‪$record) {
174  if (!$this->storage->hasFile(‪$record['identifier'])) {
175  $this->‪getFileIndexRepository()->‪markFileAsMissing($record['uid']);
176  }
177  }
178  }
179 
185  protected function ‪isFileTypeSupportedByExtractor(File $file, ExtractorInterface $extractor)
186  {
187  $isSupported = true;
188  $fileTypeRestrictions = $extractor->getFileTypeRestrictions();
189  if (!empty($fileTypeRestrictions) && !in_array($file->getType(), $fileTypeRestrictions)) {
190  $isSupported = false;
191  }
192  return $isSupported;
193  }
194 
198  protected function ‪detectChangedFilesInStorage(array $fileIdentifierArray)
199  {
200  foreach ($fileIdentifierArray as $fileIdentifier) {
201  // skip processed files
202  if ($this->storage->isWithinProcessingFolder($fileIdentifier)) {
203  continue;
204  }
205  // Get the modification time for file-identifier from the storage
206  $modificationTime = $this->storage->getFileInfoByIdentifier($fileIdentifier, ['mtime']);
207  // Look if the the modification time in FS is higher than the one in database (key needed on timestamps)
208  $indexRecord = $this->‪getFileIndexRepository()->‪findOneByStorageAndIdentifier($this->storage, $fileIdentifier);
209 
210  if ($indexRecord !== false) {
211  $this->identifiedFileUids[] = $indexRecord['uid'];
212 
213  if ((int)$indexRecord['modification_date'] !== $modificationTime['mtime'] || $indexRecord['missing']) {
214  $this->filesToUpdate[$fileIdentifier] = $indexRecord;
215  }
216  } else {
217  $this->filesToUpdate[$fileIdentifier] = null;
218  }
219  }
220  }
221 
226  protected function ‪processChangedAndNewFiles()
227  {
228  foreach ($this->filesToUpdate as ‪$identifier => $data) {
229  try {
230  if ($data === null) {
231  // search for files with same content hash in indexed storage
232  $fileHash = $this->storage->hashFileByIdentifier(‪$identifier, 'sha1');
233  $files = $this->‪getFileIndexRepository()->‪findByContentHash($fileHash);
234  $fileObject = null;
235  if (!empty($files)) {
236  foreach ($files as $fileIndexEntry) {
237  // check if file is missing then we assume it's moved/renamed
238  if (!$this->storage->hasFile($fileIndexEntry['identifier'])) {
239  $fileObject = $this->‪getResourceFactory()->‪getFileObject(
240  $fileIndexEntry['uid'],
241  $fileIndexEntry
242  );
243  $fileObject->‪updateProperties(
244  [
245  'identifier' => ‪$identifier,
246  ]
247  );
248  $this->‪updateIndexEntry($fileObject);
249  $this->identifiedFileUids[] = $fileObject->getUid();
250  break;
251  }
252  }
253  }
254  // create new index when no missing file with same content hash is found
255  if ($fileObject === null) {
256  $fileObject = $this->‪createIndexEntry(‪$identifier);
257  $this->identifiedFileUids[] = $fileObject->getUid();
258  }
259  } else {
260  // update existing file
261  $fileObject = $this->‪getResourceFactory()->‪getFileObject($data['uid'], $data);
262  $this->‪updateIndexEntry($fileObject);
263  }
264  } catch (InvalidHashException $e) {
265  $this->logger->error('Unable to create hash for file: {identifier}', ['identifier' => ‪$identifier]);
266  } catch (\Exception $e) {
267  $this->logger->error('Unable to index / update file with identifier {identifier}', [
268  'identifier' => ‪$identifier,
269  'exception' => $e,
270  ]);
271  }
272  }
273  }
274 
279  protected function ‪extractRequiredMetaData(File $fileObject): array
280  {
281  $metaData = [];
282 
283  // since the core desperately needs image sizes in metadata table do this manually
284  // prevent doing this for remote storages, remote storages must provide the data with extractors
285  if ($fileObject->isImage() && $this->storage->getDriverType() === 'Local') {
286  $rawFileLocation = $fileObject->getForLocalProcessing(false);
287  $imageInfo = GeneralUtility::makeInstance(ImageInfo::class, $rawFileLocation);
288  $metaData = [
289  'width' => $imageInfo->getWidth(),
290  'height' => $imageInfo->getHeight(),
291  ];
292  }
293 
294  return $metaData;
295  }
296 
297  /****************************
298  * UTILITY
299  ****************************/
306  protected function ‪gatherFileInformationArray(‪$identifier): array
307  {
308  $fileInfo = $this->storage->getFileInfoByIdentifier(‪$identifier);
309  $fileInfo = $this->‪transformFromDriverFileInfoArrayToFileObjectFormat($fileInfo);
310  $fileInfo['type'] = $this->‪getFileType($fileInfo['mime_type']);
311  $fileInfo['sha1'] = $this->storage->hashFileByIdentifier(‪$identifier, 'sha1');
312  $fileInfo['missing'] = 0;
313 
314  return $fileInfo;
315  }
316 
323  protected function ‪getFileType($mimeType)
324  {
325  [$fileType] = explode('/', $mimeType);
326  switch (strtolower($fileType)) {
327  case 'text':
328  $type = ‪File::FILETYPE_TEXT;
329  break;
330  case 'image':
331  $type = ‪File::FILETYPE_IMAGE;
332  break;
333  case 'audio':
334  $type = ‪File::FILETYPE_AUDIO;
335  break;
336  case 'video':
337  $type = ‪File::FILETYPE_VIDEO;
338  break;
339  case 'application':
340  case 'software':
342  break;
343  default:
345  }
346  return $type;
347  }
348 
357  protected function ‪transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo)
358  {
359  $mappingInfo = [
360  // 'driverKey' => 'fileProperty' Key is from the driver, value is for the property in the file
361  'size' => 'size',
362  'atime' => null,
363  'mtime' => 'modification_date',
364  'ctime' => 'creation_date',
365  'mimetype' => 'mime_type',
366  ];
367  $mappedFileInfo = [];
368  foreach ($fileInfo as $key => $value) {
369  if (array_key_exists($key, $mappingInfo)) {
370  if ($mappingInfo[$key] !== null) {
371  $mappedFileInfo[$mappingInfo[$key]] = $value;
372  }
373  } else {
374  $mappedFileInfo[$key] = $value;
375  }
376  }
377  return $mappedFileInfo;
378  }
379 
385  protected function ‪getFileIndexRepository()
386  {
387  return GeneralUtility::makeInstance(FileIndexRepository::class);
388  }
389 
395  protected function ‪getMetaDataRepository()
396  {
397  return GeneralUtility::makeInstance(MetaDataRepository::class);
398  }
399 
405  protected function ‪getResourceFactory()
406  {
407  return GeneralUtility::makeInstance(ResourceFactory::class);
408  }
409 
410  protected function ‪getExtractorService(): ‪ExtractorService
411  {
412  if ($this->extractorService === null) {
413  $this->extractorService = GeneralUtility::makeInstance(ExtractorService::class);
414  }
416  }
417 }
‪TYPO3\CMS\Core\Resource\Index\MetaDataRepository
Definition: MetaDataRepository.php:39
‪TYPO3\CMS\Core\Resource\Index\ExtractorInterface\getFileTypeRestrictions
‪array getFileTypeRestrictions()
‪TYPO3\CMS\Core\Resource\AbstractFile\FILETYPE_UNKNOWN
‪const FILETYPE_UNKNOWN
Definition: AbstractFile.php:69
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\findInStorageAndNotInUidList
‪array findInStorageAndNotInUidList(ResourceStorage $storage, array $uidList)
Definition: FileIndexRepository.php:425
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository
Definition: FileIndexRepository.php:44
‪TYPO3\CMS\Core\Resource\AbstractFile\getType
‪int getType()
Definition: AbstractFile.php:272
‪TYPO3\CMS\Core\Resource\Index\Indexer\$filesToUpdate
‪array $filesToUpdate
Definition: Indexer.php:39
‪TYPO3\CMS\Core\Resource\Index\Indexer\getExtractorService
‪getExtractorService()
Definition: Indexer.php:406
‪TYPO3\CMS\Core\Resource\AbstractFile\getForLocalProcessing
‪non empty string getForLocalProcessing(bool $writable=true)
Definition: AbstractFile.php:551
‪TYPO3\CMS\Core\Resource\AbstractFile\FILETYPE_VIDEO
‪const FILETYPE_VIDEO
Definition: AbstractFile.php:93
‪TYPO3\CMS\Core\Resource\Index\Indexer\getFileType
‪string getFileType($mimeType)
Definition: Indexer.php:319
‪TYPO3\CMS\Core\Resource\Index\Indexer\transformFromDriverFileInfoArrayToFileObjectFormat
‪array transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo)
Definition: Indexer.php:353
‪TYPO3\CMS\Core\Resource\Index\Indexer
Definition: Indexer.php:34
‪TYPO3\CMS\Core\Resource\Index\Indexer\getMetaDataRepository
‪MetaDataRepository getMetaDataRepository()
Definition: Indexer.php:391
‪TYPO3\CMS\Core\Resource\Index\ExtractorInterface
Definition: ExtractorInterface.php:25
‪TYPO3\CMS\Core\Resource\Index\Indexer\isFileTypeSupportedByExtractor
‪bool isFileTypeSupportedByExtractor(File $file, ExtractorInterface $extractor)
Definition: Indexer.php:181
‪TYPO3\CMS\Core\Resource\Index\Indexer\detectChangedFilesInStorage
‪detectChangedFilesInStorage(array $fileIdentifierArray)
Definition: Indexer.php:194
‪TYPO3\CMS\Core\Resource\AbstractFile\isImage
‪bool isImage()
Definition: AbstractFile.php:309
‪TYPO3\CMS\Core\Resource\Index\Indexer\updateIndexEntry
‪updateIndexEntry(File $fileObject)
Definition: Indexer.php:92
‪TYPO3\CMS\Core\Resource\Index\Indexer\detectMissingFiles
‪detectMissingFiles()
Definition: Indexer.php:162
‪TYPO3\CMS\Core\Resource\Index\Indexer\$storage
‪ResourceStorage $storage
Definition: Indexer.php:47
‪TYPO3\CMS\Core\Resource\Exception\IllegalFileExtensionException
Definition: IllegalFileExtensionException.php:24
‪TYPO3\CMS\Core\Resource\AbstractFile\FILETYPE_IMAGE
‪const FILETYPE_IMAGE
Definition: AbstractFile.php:81
‪TYPO3\CMS\Core\Resource\Exception\InsufficientFileAccessPermissionsException
Definition: InsufficientFileAccessPermissionsException.php:24
‪TYPO3\CMS\Core\Resource\Index\Indexer\getFileIndexRepository
‪FileIndexRepository getFileIndexRepository()
Definition: Indexer.php:381
‪TYPO3\CMS\Core\Resource\Index\Indexer\extractRequiredMetaData
‪extractRequiredMetaData(File $fileObject)
Definition: Indexer.php:275
‪TYPO3\CMS\Core\Resource\Index\Indexer\gatherFileInformationArray
‪gatherFileInformationArray($identifier)
Definition: Indexer.php:302
‪TYPO3\CMS\Core\Resource\Index\Indexer\__construct
‪__construct(ResourceStorage $storage)
Definition: Indexer.php:53
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\updateIndexingTime
‪updateIndexingTime($fileUid)
Definition: FileIndexRepository.php:458
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\findByContentHash
‪mixed findByContentHash($hash)
Definition: FileIndexRepository.php:148
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\findInStorageWithIndexOutstanding
‪array findInStorageWithIndexOutstanding(ResourceStorage $storage, $limit=-1)
Definition: FileIndexRepository.php:397
‪TYPO3\CMS\Core\Resource\ResourceFactory
Definition: ResourceFactory.php:41
‪TYPO3\CMS\Core\Resource\File
Definition: File.php:26
‪TYPO3\CMS\Webhooks\Message\$record
‪identifier readonly int readonly array $record
Definition: PageModificationMessage.php:36
‪TYPO3\CMS\Core\Resource\Index\Indexer\extractMetaData
‪extractMetaData(File $fileObject)
Definition: Indexer.php:147
‪TYPO3\CMS\Core\Resource\Service\ExtractorService
Definition: ExtractorService.php:29
‪TYPO3\CMS\Core\Resource\Index\Indexer\processChangesInStorages
‪processChangesInStorages()
Definition: Indexer.php:110
‪TYPO3\CMS\Core\Resource\Index\Indexer\createIndexEntry
‪createIndexEntry($identifier)
Definition: Indexer.php:64
‪TYPO3\CMS\Core\Resource\AbstractFile\FILETYPE_AUDIO
‪const FILETYPE_AUDIO
Definition: AbstractFile.php:87
‪TYPO3\CMS\Core\Resource\Index
Definition: ExtractorInterface.php:16
‪TYPO3\CMS\Core\Type\File\ImageInfo
Definition: ImageInfo.php:28
‪TYPO3\CMS\Core\Resource\AbstractFile\FILETYPE_TEXT
‪const FILETYPE_TEXT
Definition: AbstractFile.php:75
‪TYPO3\CMS\Core\Resource\AbstractFile\getUid
‪return MathUtility::canBeInterpretedAsInteger($size) ?(int) $size int getUid()
Definition: AbstractFile.php:188
‪TYPO3\CMS\Core\Resource\Exception
Definition: Exception.php:22
‪TYPO3\CMS\Core\Resource\Index\Indexer\processChangedAndNewFiles
‪processChangedAndNewFiles()
Definition: Indexer.php:222
‪TYPO3\CMS\Core\Resource\File\getMetaData
‪getMetaData()
Definition: File.php:322
‪TYPO3\CMS\Core\Resource\ResourceStorage
Definition: ResourceStorage.php:127
‪TYPO3\CMS\Core\Resource\AbstractFile\FILETYPE_APPLICATION
‪const FILETYPE_APPLICATION
Definition: AbstractFile.php:99
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\markFileAsMissing
‪markFileAsMissing($fileUid)
Definition: FileIndexRepository.php:477
‪TYPO3\CMS\Core\Resource\File\updateProperties
‪updateProperties(array $properties)
Definition: File.php:155
‪TYPO3\CMS\Core\Resource\ResourceFactory\getFileObject
‪File getFileObject($uid, array $fileData=[])
Definition: ResourceFactory.php:192
‪TYPO3\CMS\Core\Resource\Index\Indexer\$identifiedFileUids
‪int[] $identifiedFileUids
Definition: Indexer.php:43
‪TYPO3\CMS\Core\Resource\Index\Indexer\$extractorService
‪ExtractorService $extractorService
Definition: Indexer.php:51
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:51
‪TYPO3\CMS\Core\Resource\AbstractFile\getIdentifier
‪getIdentifier()
Definition: AbstractFile.php:137
‪TYPO3\CMS\Core\Resource\Exception\InvalidHashException
Definition: InvalidHashException.php:27
‪TYPO3\CMS\Core\Resource\Index\Indexer\runMetaDataExtraction
‪runMetaDataExtraction($maximumFileCount=-1)
Definition: Indexer.php:123
‪TYPO3\CMS\Webhooks\Message\$identifier
‪identifier readonly string $identifier
Definition: FileAddedMessage.php:37
‪TYPO3\CMS\Core\Resource\Index\Indexer\getResourceFactory
‪ResourceFactory getResourceFactory()
Definition: Indexer.php:401
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\findOneByStorageAndIdentifier
‪array bool findOneByStorageAndIdentifier(ResourceStorage $storage, $identifier)
Definition: FileIndexRepository.php:124