‪TYPO3CMS  10.4
Indexer.php
Go to the documentation of this file.
1 <?php
2 
3 /*
4  * This file is part of the TYPO3 CMS project.
5  *
6  * It is free software; you can redistribute it and/or modify it under
7  * the terms of the GNU General Public License, either version 2
8  * of the License, or any later version.
9  *
10  * For the full copyright and license information, please read the
11  * LICENSE.txt file that was distributed with this source code.
12  *
13  * The TYPO3 project - inspiring people to share!
14  */
15 
17 
18 use Psr\Log\LoggerAwareInterface;
19 use Psr\Log\LoggerAwareTrait;
29 
33 class ‪Indexer implements LoggerAwareInterface
34 {
35  use LoggerAwareTrait;
36 
40  protected ‪$filesToUpdate = [];
41 
45  protected ‪$identifiedFileUids = [];
46 
50  protected ‪$storage;
51 
55  protected ‪$extractorService;
56 
61  {
62  $this->storage = ‪$storage;
63  }
64 
72  public function ‪createIndexEntry($identifier): ‪File
73  {
74  if (!is_string($identifier) || $identifier === '') {
75  throw new \InvalidArgumentException(
76  'Invalid file identifier given. It must be of type string and not empty. "' . gettype($identifier) . '" given.',
77  1401732565
78  );
79  }
80 
81  $fileProperties = $this->‪gatherFileInformationArray($identifier);
82  $record = $this->‪getFileIndexRepository()->‪addRaw($fileProperties);
83 
84  $fileObject = $this->‪getResourceFactory()->‪getFileObject($record['uid'], $record);
85  $metaData = $this->‪extractRequiredMetaData($fileObject);
86 
87  if ($this->storage->autoExtractMetadataEnabled()) {
88  $metaData = array_merge($metaData, $this->‪getExtractorService()->‪extractMetaData($fileObject));
89  }
90  $fileObject->getMetaData()->add($metaData)->save();
91 
92  return $fileObject;
93  }
94 
101  public function ‪updateIndexEntry(File $fileObject): File
102  {
103  $updatedInformation = $this->‪gatherFileInformationArray($fileObject->getIdentifier());
104  $fileObject->updateProperties($updatedInformation);
105 
106  $this->‪getFileIndexRepository()->‪update($fileObject);
107  $metaData = $this->‪extractRequiredMetaData($fileObject);
108 
109  if ($this->storage->autoExtractMetadataEnabled()) {
110  $metaData = array_merge($metaData, $this->‪getExtractorService()->‪extractMetaData($fileObject));
111  }
112  $fileObject->getMetaData()->add($metaData)->save();
113  return $fileObject;
114  }
115 
116  public function ‪processChangesInStorages()
117  {
118  // get all file-identifiers from the storage
119  $availableFiles = $this->storage->getFileIdentifiersInFolder($this->storage->getRootLevelFolder(false)->getIdentifier(), true, true);
120  $this->‪detectChangedFilesInStorage($availableFiles);
122 
123  $this->‪detectMissingFiles();
124  }
125 
129  public function ‪runMetaDataExtraction($maximumFileCount = -1)
130  {
131  $fileIndexRecords = $this->‪getFileIndexRepository()->‪findInStorageWithIndexOutstanding($this->storage, $maximumFileCount);
132  foreach ($fileIndexRecords as $indexRecord) {
133  $fileObject = $this->‪getResourceFactory()->‪getFileObject($indexRecord['uid'], $indexRecord);
134  // Check for existence of file before extraction
135  if ($fileObject->exists()) {
136  try {
137  $this->‪extractMetaData($fileObject);
139  // We skip files that are not accessible
140  } catch (‪IllegalFileExtensionException $e) {
141  // We skip files that have an extension that we don't allow
142  }
143  } else {
144  // Mark file as missing and continue with next record
145  $this->‪getFileIndexRepository()->‪markFileAsMissing($indexRecord['uid']);
146  }
147  }
148  }
149 
155  public function ‪extractMetaData(‪File $fileObject)
156  {
157  $metaData = array_merge([
158  $fileObject->‪getMetaData()->‪get()
159  ], $this->getExtractorService()->extractMetaData($fileObject));
160 
161  $fileObject->‪getMetaData()->‪add($metaData)->save();
162 
164  }
165 
170  protected function ‪detectMissingFiles()
171  {
172  $indexedNotExistentFiles = $this->‪getFileIndexRepository()->‪findInStorageAndNotInUidList(
173  $this->storage,
174  $this->identifiedFileUids
175  );
176 
177  foreach ($indexedNotExistentFiles as $record) {
178  if (!$this->storage->hasFile($record['identifier'])) {
179  $this->‪getFileIndexRepository()->‪markFileAsMissing($record['uid']);
180  }
181  }
182  }
183 
191  protected function ‪isFileTypeSupportedByExtractor(File $file, ExtractorInterface $extractor)
192  {
193  $isSupported = true;
194  $fileTypeRestrictions = $extractor->getFileTypeRestrictions();
195  if (!empty($fileTypeRestrictions) && !in_array($file->getType(), $fileTypeRestrictions)) {
196  $isSupported = false;
197  }
198  return $isSupported;
199  }
200 
206  protected function ‪detectChangedFilesInStorage(array $fileIdentifierArray)
207  {
208  foreach ($fileIdentifierArray as $fileIdentifier) {
209  // skip processed files
210  if ($this->storage->isWithinProcessingFolder($fileIdentifier)) {
211  continue;
212  }
213  // Get the modification time for file-identifier from the storage
214  $modificationTime = $this->storage->getFileInfoByIdentifier($fileIdentifier, ['mtime']);
215  // Look if the the modification time in FS is higher than the one in database (key needed on timestamps)
216  $indexRecord = $this->‪getFileIndexRepository()->‪findOneByStorageUidAndIdentifier($this->storage->getUid(), $fileIdentifier);
217 
218  if ($indexRecord !== false) {
219  $this->identifiedFileUids[] = $indexRecord['uid'];
220 
221  if ((int)$indexRecord['modification_date'] !== $modificationTime['mtime'] || $indexRecord['missing']) {
222  $this->filesToUpdate[$fileIdentifier] = $indexRecord;
223  }
224  } else {
225  $this->filesToUpdate[$fileIdentifier] = null;
226  }
227  }
228  }
229 
234  protected function ‪processChangedAndNewFiles()
235  {
236  foreach ($this->filesToUpdate as $identifier => $data) {
237  try {
238  if ($data === null) {
239  // search for files with same content hash in indexed storage
240  $fileHash = $this->storage->hashFileByIdentifier($identifier, 'sha1');
241  $files = $this->‪getFileIndexRepository()->‪findByContentHash($fileHash);
242  $fileObject = null;
243  if (!empty($files)) {
244  foreach ($files as $fileIndexEntry) {
245  // check if file is missing then we assume it's moved/renamed
246  if (!$this->storage->hasFile($fileIndexEntry['identifier'])) {
247  $fileObject = $this->‪getResourceFactory()->‪getFileObject(
248  $fileIndexEntry['uid'],
249  $fileIndexEntry
250  );
251  $fileObject->‪updateProperties(
252  [
253  'identifier' => $identifier,
254  ]
255  );
256  $this->‪updateIndexEntry($fileObject);
257  $this->identifiedFileUids[] = $fileObject->‪getUid();
258  break;
259  }
260  }
261  }
262  // create new index when no missing file with same content hash is found
263  if ($fileObject === null) {
264  $fileObject = $this->‪createIndexEntry($identifier);
265  $this->identifiedFileUids[] = $fileObject->‪getUid();
266  }
267  } else {
268  // update existing file
269  $fileObject = $this->‪getResourceFactory()->‪getFileObject($data['uid'], $data);
270  $this->‪updateIndexEntry($fileObject);
271  }
272  } catch (InvalidHashException $e) {
273  $this->logger->error('Unable to create hash for file ' . $identifier);
274  } catch (\Exception $e) {
275  $this->logger->error('Unable to index / update file with identifier ' . $identifier . ' (Error: ' . $e->getMessage() . ')');
276  }
277  }
278  }
279 
287  protected function ‪extractRequiredMetaData(File $fileObject): array
288  {
289  $metaData = [];
290 
291  // since the core desperately needs image sizes in metadata table do this manually
292  // prevent doing this for remote storages, remote storages must provide the data with extractors
293  if ($fileObject->isImage() && $this->storage->getDriverType() === 'Local') {
294  $rawFileLocation = $fileObject->getForLocalProcessing(false);
295  $imageInfo = GeneralUtility::makeInstance(ImageInfo::class, $rawFileLocation);
296  $metaData = [
297  'width' => $imageInfo->getWidth(),
298  'height' => $imageInfo->getHeight(),
299  ];
300  }
301 
302  return $metaData;
303  }
304 
305  /****************************
306  * UTILITY
307  ****************************/
308 
316  protected function ‪gatherFileInformationArray($identifier): array
317  {
318  $fileInfo = $this->storage->getFileInfoByIdentifier($identifier);
319  $fileInfo = $this->‪transformFromDriverFileInfoArrayToFileObjectFormat($fileInfo);
320  $fileInfo['type'] = $this->‪getFileType($fileInfo['mime_type']);
321  $fileInfo['sha1'] = $this->storage->hashFileByIdentifier($identifier, 'sha1');
322  $fileInfo['missing'] = 0;
323 
324  return $fileInfo;
325  }
326 
333  protected function ‪getFileType($mimeType)
334  {
335  [$fileType] = explode('/', $mimeType);
336  switch (strtolower($fileType)) {
337  case 'text':
338  $type = ‪File::FILETYPE_TEXT;
339  break;
340  case 'image':
341  $type = ‪File::FILETYPE_IMAGE;
342  break;
343  case 'audio':
344  $type = ‪File::FILETYPE_AUDIO;
345  break;
346  case 'video':
347  $type = ‪File::FILETYPE_VIDEO;
348  break;
349  case 'application':
350  case 'software':
352  break;
353  default:
355  }
356  return $type;
357  }
358 
368  protected function ‪transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo)
369  {
370  $mappingInfo = [
371  // 'driverKey' => 'fileProperty' Key is from the driver, value is for the property in the file
372  'size' => 'size',
373  'atime' => null,
374  'mtime' => 'modification_date',
375  'ctime' => 'creation_date',
376  'mimetype' => 'mime_type'
377  ];
378  $mappedFileInfo = [];
379  foreach ($fileInfo as $key => $value) {
380  if (array_key_exists($key, $mappingInfo)) {
381  if ($mappingInfo[$key] !== null) {
382  $mappedFileInfo[$mappingInfo[$key]] = $value;
383  }
384  } else {
385  $mappedFileInfo[$key] = $value;
386  }
387  }
388  return $mappedFileInfo;
389  }
390 
396  protected function ‪getFileIndexRepository()
397  {
399  }
400 
406  protected function ‪getMetaDataRepository()
407  {
409  }
410 
416  protected function ‪getResourceFactory()
417  {
418  return GeneralUtility::makeInstance(ResourceFactory::class);
419  }
420 
424  protected function ‪getExtractorService(): ‪ExtractorService
425  {
426  if ($this->extractorService === null) {
427  $this->extractorService = GeneralUtility::makeInstance(ExtractorService::class);
428  }
430  }
431 }
‪TYPO3\CMS\Core\Resource\Index\MetaDataRepository
Definition: MetaDataRepository.php:40
‪TYPO3\CMS\Core\Resource\Index\ExtractorInterface\getFileTypeRestrictions
‪array getFileTypeRestrictions()
‪TYPO3\CMS\Core\Resource\AbstractFile\FILETYPE_UNKNOWN
‪const FILETYPE_UNKNOWN
Definition: AbstractFile.php:66
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\findInStorageAndNotInUidList
‪array findInStorageAndNotInUidList(ResourceStorage $storage, array $uidList)
Definition: FileIndexRepository.php:475
‪TYPO3\CMS\Core\Resource\File\getMetaData
‪MetaDataAspect getMetaData()
Definition: File.php:356
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository
Definition: FileIndexRepository.php:45
‪TYPO3\CMS\Core\Resource\AbstractFile\getType
‪int getType()
Definition: AbstractFile.php:286
‪TYPO3\CMS\Core\Resource\Index\Indexer\$filesToUpdate
‪array $filesToUpdate
Definition: Indexer.php:39
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\addRaw
‪array addRaw(array $data)
Definition: FileIndexRepository.php:338
‪TYPO3\CMS\Core\Resource\AbstractFile\getIdentifier
‪string getIdentifier()
Definition: AbstractFile.php:141
‪TYPO3\CMS\Core\Resource\AbstractFile\FILETYPE_VIDEO
‪const FILETYPE_VIDEO
Definition: AbstractFile.php:90
‪TYPO3\CMS\Core\Resource\AbstractFile\getForLocalProcessing
‪string getForLocalProcessing($writable=true)
Definition: AbstractFile.php:577
‪TYPO3\CMS\Core\Resource\Index\Indexer\getFileType
‪string getFileType($mimeType)
Definition: Indexer.php:329
‪TYPO3\CMS\Core\Resource\Index\Indexer\transformFromDriverFileInfoArrayToFileObjectFormat
‪array transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo)
Definition: Indexer.php:364
‪TYPO3\CMS\Core\Resource\Index\Indexer\updateIndexEntry
‪File updateIndexEntry(File $fileObject)
Definition: Indexer.php:97
‪TYPO3\CMS\Core\Resource\Index\Indexer
Definition: Indexer.php:34
‪TYPO3\CMS\Core\Resource\Index\Indexer\createIndexEntry
‪File createIndexEntry($identifier)
Definition: Indexer.php:68
‪TYPO3\CMS\Core\Resource\Index\Indexer\getMetaDataRepository
‪MetaDataRepository getMetaDataRepository()
Definition: Indexer.php:402
‪TYPO3\CMS\Core\Resource\Index\ExtractorInterface
Definition: ExtractorInterface.php:25
‪TYPO3\CMS\Core\Resource\Index\Indexer\isFileTypeSupportedByExtractor
‪bool isFileTypeSupportedByExtractor(File $file, ExtractorInterface $extractor)
Definition: Indexer.php:187
‪TYPO3\CMS\Core\Resource\Index\Indexer\detectChangedFilesInStorage
‪detectChangedFilesInStorage(array $fileIdentifierArray)
Definition: Indexer.php:202
‪TYPO3\CMS\Core\Resource\AbstractFile\isImage
‪bool isImage()
Definition: AbstractFile.php:323
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\findOneByStorageUidAndIdentifier
‪array bool findOneByStorageUidAndIdentifier($storageUid, $identifier)
Definition: FileIndexRepository.php:132
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\update
‪update(File $file)
Definition: FileIndexRepository.php:408
‪TYPO3\CMS\Core\Resource\Index\Indexer\detectMissingFiles
‪detectMissingFiles()
Definition: Indexer.php:166
‪TYPO3\CMS\Core\Resource\Index\Indexer\$storage
‪ResourceStorage $storage
Definition: Indexer.php:47
‪TYPO3\CMS\Core\Resource\Exception\IllegalFileExtensionException
Definition: IllegalFileExtensionException.php:24
‪TYPO3\CMS\Core\Resource\AbstractFile\FILETYPE_IMAGE
‪const FILETYPE_IMAGE
Definition: AbstractFile.php:78
‪TYPO3\CMS\Core\Resource\Exception\InsufficientFileAccessPermissionsException
Definition: InsufficientFileAccessPermissionsException.php:24
‪TYPO3\CMS\Core\Resource\Index\Indexer\getFileIndexRepository
‪FileIndexRepository getFileIndexRepository()
Definition: Indexer.php:392
‪TYPO3\CMS\Core\Resource\Index\Indexer\__construct
‪__construct(ResourceStorage $storage)
Definition: Indexer.php:56
‪TYPO3\CMS\Core\Resource\Index\Indexer\getExtractorService
‪ExtractorService getExtractorService()
Definition: Indexer.php:420
‪TYPO3\CMS\Core\Resource\Index\Indexer\extractRequiredMetaData
‪array extractRequiredMetaData(File $fileObject)
Definition: Indexer.php:283
‪TYPO3\CMS\Core\Resource\MetaDataAspect\add
‪self add(array $metaData)
Definition: MetaDataAspect.php:63
‪TYPO3\CMS\Core\Resource\Index\MetaDataRepository\getInstance
‪static MetaDataRepository getInstance()
Definition: MetaDataRepository.php:240
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\updateIndexingTime
‪updateIndexingTime($fileUid)
Definition: FileIndexRepository.php:508
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\findByContentHash
‪mixed findByContentHash($hash)
Definition: FileIndexRepository.php:187
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\findInStorageWithIndexOutstanding
‪array findInStorageWithIndexOutstanding(ResourceStorage $storage, $limit=-1)
Definition: FileIndexRepository.php:446
‪TYPO3\CMS\Core\Resource\ResourceFactory
Definition: ResourceFactory.php:41
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\getInstance
‪static FileIndexRepository getInstance()
Definition: FileIndexRepository.php:78
‪TYPO3\CMS\Core\Resource\File
Definition: File.php:24
‪TYPO3\CMS\Core\Resource\Index\Indexer\extractMetaData
‪extractMetaData(File $fileObject)
Definition: Indexer.php:151
‪TYPO3\CMS\Core\Resource\MetaDataAspect\get
‪array get()
Definition: MetaDataAspect.php:76
‪TYPO3\CMS\Core\Resource\Service\ExtractorService
Definition: ExtractorService.php:28
‪TYPO3\CMS\Core\Resource\Index\Indexer\processChangesInStorages
‪processChangesInStorages()
Definition: Indexer.php:112
‪TYPO3\CMS\Core\Resource\AbstractFile\getUid
‪int getUid()
Definition: AbstractFile.php:202
‪TYPO3\CMS\Core\Resource\AbstractFile\FILETYPE_AUDIO
‪const FILETYPE_AUDIO
Definition: AbstractFile.php:84
‪TYPO3\CMS\Core\Resource\Index
Definition: ExtractorInterface.php:16
‪TYPO3\CMS\Core\Type\File\ImageInfo
Definition: ImageInfo.php:27
‪TYPO3\CMS\Core\Resource\AbstractFile\FILETYPE_TEXT
‪const FILETYPE_TEXT
Definition: AbstractFile.php:72
‪TYPO3\CMS\Core\Resource\Exception
Definition: Exception.php:22
‪TYPO3\CMS\Core\Resource\Index\Indexer\processChangedAndNewFiles
‪processChangedAndNewFiles()
Definition: Indexer.php:230
‪TYPO3\CMS\Core\Resource\ResourceStorage
Definition: ResourceStorage.php:122
‪TYPO3\CMS\Core\Resource\AbstractFile\FILETYPE_APPLICATION
‪const FILETYPE_APPLICATION
Definition: AbstractFile.php:96
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\markFileAsMissing
‪markFileAsMissing($fileUid)
Definition: FileIndexRepository.php:527
‪TYPO3\CMS\Core\Resource\File\updateProperties
‪updateProperties(array $properties)
Definition: File.php:182
‪TYPO3\CMS\Core\Resource\ResourceFactory\getFileObject
‪File getFileObject($uid, array $fileData=[])
Definition: ResourceFactory.php:390
‪TYPO3\CMS\Core\Resource\Index\Indexer\$identifiedFileUids
‪int[] $identifiedFileUids
Definition: Indexer.php:43
‪TYPO3\CMS\Core\Resource\Index\Indexer\$extractorService
‪ExtractorService $extractorService
Definition: Indexer.php:51
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:46
‪TYPO3\CMS\Core\Resource\Index\Indexer\gatherFileInformationArray
‪array gatherFileInformationArray($identifier)
Definition: Indexer.php:312
‪TYPO3\CMS\Core\Resource\Exception\InvalidHashException
Definition: InvalidHashException.php:27
‪TYPO3\CMS\Core\Resource\Index\Indexer\runMetaDataExtraction
‪runMetaDataExtraction($maximumFileCount=-1)
Definition: Indexer.php:125
‪TYPO3\CMS\Core\Resource\Index\Indexer\getResourceFactory
‪ResourceFactory getResourceFactory()
Definition: Indexer.php:412