‪TYPO3CMS  11.5
Indexer.php
Go to the documentation of this file.
1 <?php
2 
3 /*
4  * This file is part of the TYPO3 CMS project.
5  *
6  * It is free software; you can redistribute it and/or modify it under
7  * the terms of the GNU General Public License, either version 2
8  * of the License, or any later version.
9  *
10  * For the full copyright and license information, please read the
11  * LICENSE.txt file that was distributed with this source code.
12  *
13  * The TYPO3 project - inspiring people to share!
14  */
15 
17 
18 use Psr\Log\LoggerAwareInterface;
19 use Psr\Log\LoggerAwareTrait;
29 
33 class ‪Indexer implements LoggerAwareInterface
34 {
35  use LoggerAwareTrait;
36 
40  protected ‪$filesToUpdate = [];
41 
45  protected ‪$identifiedFileUids = [];
46 
50  protected ‪$storage;
51 
55  protected ‪$extractorService;
56 
61  {
62  $this->storage = ‪$storage;
63  }
64 
72  public function ‪createIndexEntry($identifier): ‪File
73  {
74  if (!is_string($identifier) || $identifier === '') {
75  throw new \InvalidArgumentException(
76  'Invalid file identifier given. It must be of type string and not empty. "' . gettype($identifier) . '" given.',
77  1401732565
78  );
79  }
80 
81  $fileProperties = $this->‪gatherFileInformationArray($identifier);
82  $fileIndexRepository = $this->‪getFileIndexRepository();
83 
84  $record = $fileIndexRepository->addRaw($fileProperties);
85  $fileObject = $this->‪getResourceFactory()->‪getFileObject($record['uid'], $record);
86  $fileIndexRepository->updateIndexingTime($fileObject->getUid());
87 
88  $metaData = $this->‪extractRequiredMetaData($fileObject);
89  if ($this->storage->autoExtractMetadataEnabled()) {
90  $metaData = array_merge($metaData, $this->‪getExtractorService()->‪extractMetaData($fileObject));
91  }
92  $fileObject->getMetaData()->add($metaData)->save();
93 
94  return $fileObject;
95  }
96 
103  public function ‪updateIndexEntry(File $fileObject): File
104  {
105  $updatedInformation = $this->‪gatherFileInformationArray($fileObject->getIdentifier());
106  $fileObject->updateProperties($updatedInformation);
107 
108  $fileIndexRepository = $this->‪getFileIndexRepository();
109  $fileIndexRepository->update($fileObject);
110  $fileIndexRepository->updateIndexingTime($fileObject->getUid());
111 
112  $metaData = $this->‪extractRequiredMetaData($fileObject);
113  if ($this->storage->autoExtractMetadataEnabled()) {
114  $metaData = array_merge($metaData, $this->‪getExtractorService()->‪extractMetaData($fileObject));
115  }
116  $fileObject->getMetaData()->add($metaData)->save();
117 
118  return $fileObject;
119  }
120 
121  public function ‪processChangesInStorages()
122  {
123  // get all file-identifiers from the storage
124  $availableFiles = $this->storage->getFileIdentifiersInFolder($this->storage->getRootLevelFolder(false)->getIdentifier(), true, true);
125  $this->‪detectChangedFilesInStorage($availableFiles);
127 
128  $this->‪detectMissingFiles();
129  }
130 
134  public function ‪runMetaDataExtraction($maximumFileCount = -1)
135  {
136  $fileIndexRecords = $this->‪getFileIndexRepository()->‪findInStorageWithIndexOutstanding($this->storage, $maximumFileCount);
137  foreach ($fileIndexRecords as $indexRecord) {
138  $fileObject = $this->‪getResourceFactory()->‪getFileObject($indexRecord['uid'], $indexRecord);
139  // Check for existence of file before extraction
140  if ($fileObject->exists()) {
141  try {
142  $this->‪extractMetaData($fileObject);
144  // We skip files that are not accessible
145  } catch (‪IllegalFileExtensionException $e) {
146  // We skip files that have an extension that we don't allow
147  }
148  } else {
149  // Mark file as missing and continue with next record
150  $this->‪getFileIndexRepository()->‪markFileAsMissing($indexRecord['uid']);
151  }
152  }
153  }
154 
160  public function ‪extractMetaData(‪File $fileObject)
161  {
162  $metaData = array_merge([
163  $fileObject->‪getMetaData()->‪get(),
164  ], $this->getExtractorService()->extractMetaData($fileObject));
165 
166  $fileObject->‪getMetaData()->‪add($metaData)->save();
167 
169  }
170 
175  protected function ‪detectMissingFiles()
176  {
177  $indexedNotExistentFiles = $this->‪getFileIndexRepository()->‪findInStorageAndNotInUidList(
178  $this->storage,
179  $this->identifiedFileUids
180  );
181 
182  foreach ($indexedNotExistentFiles as $record) {
183  if (!$this->storage->hasFile($record['identifier'])) {
184  $this->‪getFileIndexRepository()->‪markFileAsMissing($record['uid']);
185  }
186  }
187  }
188 
196  protected function ‪isFileTypeSupportedByExtractor(File $file, ExtractorInterface $extractor)
197  {
198  $isSupported = true;
199  $fileTypeRestrictions = $extractor->getFileTypeRestrictions();
200  if (!empty($fileTypeRestrictions) && !in_array($file->getType(), $fileTypeRestrictions)) {
201  $isSupported = false;
202  }
203  return $isSupported;
204  }
205 
211  protected function ‪detectChangedFilesInStorage(array $fileIdentifierArray)
212  {
213  foreach ($fileIdentifierArray as $fileIdentifier) {
214  // skip processed files
215  if ($this->storage->isWithinProcessingFolder($fileIdentifier)) {
216  continue;
217  }
218  // Get the modification time for file-identifier from the storage
219  $modificationTime = $this->storage->getFileInfoByIdentifier($fileIdentifier, ['mtime']);
220  // Look if the the modification time in FS is higher than the one in database (key needed on timestamps)
221  $indexRecord = $this->‪getFileIndexRepository()->‪findOneByStorageAndIdentifier($this->storage, $fileIdentifier);
222 
223  if ($indexRecord !== false) {
224  $this->identifiedFileUids[] = $indexRecord['uid'];
225 
226  if ((int)$indexRecord['modification_date'] !== $modificationTime['mtime'] || $indexRecord['missing']) {
227  $this->filesToUpdate[$fileIdentifier] = $indexRecord;
228  }
229  } else {
230  $this->filesToUpdate[$fileIdentifier] = null;
231  }
232  }
233  }
234 
239  protected function ‪processChangedAndNewFiles()
240  {
241  foreach ($this->filesToUpdate as $identifier => $data) {
242  try {
243  if ($data === null) {
244  // search for files with same content hash in indexed storage
245  $fileHash = $this->storage->hashFileByIdentifier($identifier, 'sha1');
246  $files = $this->‪getFileIndexRepository()->‪findByContentHash($fileHash);
247  $fileObject = null;
248  if (!empty($files)) {
249  foreach ($files as $fileIndexEntry) {
250  // check if file is missing then we assume it's moved/renamed
251  if (!$this->storage->hasFile($fileIndexEntry['identifier'])) {
252  $fileObject = $this->‪getResourceFactory()->‪getFileObject(
253  $fileIndexEntry['uid'],
254  $fileIndexEntry
255  );
256  $fileObject->‪updateProperties(
257  [
258  'identifier' => $identifier,
259  ]
260  );
261  $this->‪updateIndexEntry($fileObject);
262  $this->identifiedFileUids[] = $fileObject->‪getUid();
263  break;
264  }
265  }
266  }
267  // create new index when no missing file with same content hash is found
268  if ($fileObject === null) {
269  $fileObject = $this->‪createIndexEntry($identifier);
270  $this->identifiedFileUids[] = $fileObject->‪getUid();
271  }
272  } else {
273  // update existing file
274  $fileObject = $this->‪getResourceFactory()->‪getFileObject($data['uid'], $data);
275  $this->‪updateIndexEntry($fileObject);
276  }
277  } catch (InvalidHashException $e) {
278  $this->logger->error('Unable to create hash for file: {identifier}', ['identifier' => $identifier]);
279  } catch (\Exception $e) {
280  $this->logger->error('Unable to index / update file with identifier {identifier}', [
281  'identifier' => $identifier,
282  'exception' => $e,
283  ]);
284  }
285  }
286  }
287 
295  protected function ‪extractRequiredMetaData(File $fileObject): array
296  {
297  $metaData = [];
298 
299  // since the core desperately needs image sizes in metadata table do this manually
300  // prevent doing this for remote storages, remote storages must provide the data with extractors
301  if ($fileObject->isImage() && $this->storage->getDriverType() === 'Local') {
302  $rawFileLocation = $fileObject->getForLocalProcessing(false);
303  $imageInfo = GeneralUtility::makeInstance(ImageInfo::class, $rawFileLocation);
304  $metaData = [
305  'width' => $imageInfo->getWidth(),
306  'height' => $imageInfo->getHeight(),
307  ];
308  }
309 
310  return $metaData;
311  }
312 
313  /****************************
314  * UTILITY
315  ****************************/
316 
324  protected function ‪gatherFileInformationArray($identifier): array
325  {
326  $fileInfo = $this->storage->getFileInfoByIdentifier($identifier);
327  $fileInfo = $this->‪transformFromDriverFileInfoArrayToFileObjectFormat($fileInfo);
328  $fileInfo['type'] = $this->‪getFileType($fileInfo['mime_type']);
329  $fileInfo['sha1'] = $this->storage->hashFileByIdentifier($identifier, 'sha1');
330  $fileInfo['missing'] = 0;
331 
332  return $fileInfo;
333  }
334 
341  protected function ‪getFileType($mimeType)
342  {
343  [$fileType] = explode('/', $mimeType);
344  switch (strtolower($fileType)) {
345  case 'text':
346  $type = ‪File::FILETYPE_TEXT;
347  break;
348  case 'image':
349  $type = ‪File::FILETYPE_IMAGE;
350  break;
351  case 'audio':
352  $type = ‪File::FILETYPE_AUDIO;
353  break;
354  case 'video':
355  $type = ‪File::FILETYPE_VIDEO;
356  break;
357  case 'application':
358  case 'software':
360  break;
361  default:
363  }
364  return $type;
365  }
366 
376  protected function ‪transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo)
377  {
378  $mappingInfo = [
379  // 'driverKey' => 'fileProperty' Key is from the driver, value is for the property in the file
380  'size' => 'size',
381  'atime' => null,
382  'mtime' => 'modification_date',
383  'ctime' => 'creation_date',
384  'mimetype' => 'mime_type',
385  ];
386  $mappedFileInfo = [];
387  foreach ($fileInfo as $key => $value) {
388  if (array_key_exists($key, $mappingInfo)) {
389  if ($mappingInfo[$key] !== null) {
390  $mappedFileInfo[$mappingInfo[$key]] = $value;
391  }
392  } else {
393  $mappedFileInfo[$key] = $value;
394  }
395  }
396  return $mappedFileInfo;
397  }
398 
404  protected function ‪getFileIndexRepository()
405  {
406  return GeneralUtility::makeInstance(FileIndexRepository::class);
407  }
408 
414  protected function ‪getMetaDataRepository()
415  {
416  return GeneralUtility::makeInstance(MetaDataRepository::class);
417  }
418 
424  protected function ‪getResourceFactory()
425  {
426  return GeneralUtility::makeInstance(ResourceFactory::class);
427  }
428 
432  protected function ‪getExtractorService(): ‪ExtractorService
433  {
434  if ($this->extractorService === null) {
435  $this->extractorService = GeneralUtility::makeInstance(ExtractorService::class);
436  }
438  }
439 }
‪TYPO3\CMS\Core\Resource\Index\MetaDataRepository
Definition: MetaDataRepository.php:41
‪TYPO3\CMS\Core\Resource\Index\ExtractorInterface\getFileTypeRestrictions
‪array getFileTypeRestrictions()
‪TYPO3\CMS\Core\Resource\AbstractFile\FILETYPE_UNKNOWN
‪const FILETYPE_UNKNOWN
Definition: AbstractFile.php:66
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\findInStorageAndNotInUidList
‪array findInStorageAndNotInUidList(ResourceStorage $storage, array $uidList)
Definition: FileIndexRepository.php:450
‪TYPO3\CMS\Core\Resource\File\getMetaData
‪MetaDataAspect getMetaData()
Definition: File.php:341
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository
Definition: FileIndexRepository.php:42
‪TYPO3\CMS\Core\Resource\AbstractFile\getType
‪int getType()
Definition: AbstractFile.php:291
‪TYPO3\CMS\Core\Resource\Index\Indexer\$filesToUpdate
‪array $filesToUpdate
Definition: Indexer.php:39
‪TYPO3\CMS\Core\Resource\AbstractFile\getIdentifier
‪string getIdentifier()
Definition: AbstractFile.php:141
‪TYPO3\CMS\Core\Resource\AbstractFile\FILETYPE_VIDEO
‪const FILETYPE_VIDEO
Definition: AbstractFile.php:90
‪TYPO3\CMS\Core\Resource\AbstractFile\getForLocalProcessing
‪string getForLocalProcessing($writable=true)
Definition: AbstractFile.php:583
‪TYPO3\CMS\Core\Resource\Index\Indexer\getFileType
‪string getFileType($mimeType)
Definition: Indexer.php:337
‪TYPO3\CMS\Core\Resource\Index\Indexer\transformFromDriverFileInfoArrayToFileObjectFormat
‪array transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo)
Definition: Indexer.php:372
‪TYPO3\CMS\Core\Resource\Index\Indexer\updateIndexEntry
‪File updateIndexEntry(File $fileObject)
Definition: Indexer.php:99
‪TYPO3\CMS\Core\Resource\Index\Indexer
Definition: Indexer.php:34
‪TYPO3\CMS\Core\Resource\Index\Indexer\createIndexEntry
‪File createIndexEntry($identifier)
Definition: Indexer.php:68
‪TYPO3\CMS\Core\Resource\Index\Indexer\getMetaDataRepository
‪MetaDataRepository getMetaDataRepository()
Definition: Indexer.php:410
‪TYPO3\CMS\Core\Resource\Index\ExtractorInterface
Definition: ExtractorInterface.php:25
‪TYPO3\CMS\Core\Resource\Index\Indexer\isFileTypeSupportedByExtractor
‪bool isFileTypeSupportedByExtractor(File $file, ExtractorInterface $extractor)
Definition: Indexer.php:192
‪TYPO3\CMS\Core\Resource\Index\Indexer\detectChangedFilesInStorage
‪detectChangedFilesInStorage(array $fileIdentifierArray)
Definition: Indexer.php:207
‪TYPO3\CMS\Core\Resource\AbstractFile\isImage
‪bool isImage()
Definition: AbstractFile.php:328
‪TYPO3\CMS\Core\Resource\Index\Indexer\detectMissingFiles
‪detectMissingFiles()
Definition: Indexer.php:171
‪TYPO3\CMS\Core\Resource\Index\Indexer\$storage
‪ResourceStorage $storage
Definition: Indexer.php:47
‪TYPO3\CMS\Core\Resource\Exception\IllegalFileExtensionException
Definition: IllegalFileExtensionException.php:23
‪TYPO3\CMS\Core\Resource\AbstractFile\FILETYPE_IMAGE
‪const FILETYPE_IMAGE
Definition: AbstractFile.php:78
‪TYPO3\CMS\Core\Resource\Exception\InsufficientFileAccessPermissionsException
Definition: InsufficientFileAccessPermissionsException.php:23
‪TYPO3\CMS\Core\Resource\Index\Indexer\getFileIndexRepository
‪FileIndexRepository getFileIndexRepository()
Definition: Indexer.php:400
‪TYPO3\CMS\Core\Resource\Index\Indexer\__construct
‪__construct(ResourceStorage $storage)
Definition: Indexer.php:56
‪TYPO3\CMS\Core\Resource\Index\Indexer\getExtractorService
‪ExtractorService getExtractorService()
Definition: Indexer.php:428
‪TYPO3\CMS\Core\Resource\Index\Indexer\extractRequiredMetaData
‪array extractRequiredMetaData(File $fileObject)
Definition: Indexer.php:291
‪TYPO3\CMS\Core\Resource\MetaDataAspect\add
‪self add(array $metaData)
Definition: MetaDataAspect.php:63
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\updateIndexingTime
‪updateIndexingTime($fileUid)
Definition: FileIndexRepository.php:483
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\findByContentHash
‪mixed findByContentHash($hash)
Definition: FileIndexRepository.php:162
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\findInStorageWithIndexOutstanding
‪array findInStorageWithIndexOutstanding(ResourceStorage $storage, $limit=-1)
Definition: FileIndexRepository.php:421
‪TYPO3\CMS\Core\Resource\ResourceFactory
Definition: ResourceFactory.php:41
‪TYPO3\CMS\Core\Resource\File
Definition: File.php:24
‪TYPO3\CMS\Core\Resource\Index\Indexer\extractMetaData
‪extractMetaData(File $fileObject)
Definition: Indexer.php:156
‪TYPO3\CMS\Core\Resource\MetaDataAspect\get
‪array get()
Definition: MetaDataAspect.php:76
‪TYPO3\CMS\Core\Resource\Service\ExtractorService
Definition: ExtractorService.php:29
‪TYPO3\CMS\Core\Resource\Index\Indexer\processChangesInStorages
‪processChangesInStorages()
Definition: Indexer.php:117
‪TYPO3\CMS\Core\Resource\AbstractFile\getUid
‪int getUid()
Definition: AbstractFile.php:203
‪TYPO3\CMS\Core\Resource\AbstractFile\FILETYPE_AUDIO
‪const FILETYPE_AUDIO
Definition: AbstractFile.php:84
‪TYPO3\CMS\Core\Resource\Index
Definition: ExtractorInterface.php:16
‪TYPO3\CMS\Core\Type\File\ImageInfo
Definition: ImageInfo.php:28
‪TYPO3\CMS\Core\Resource\AbstractFile\FILETYPE_TEXT
‪const FILETYPE_TEXT
Definition: AbstractFile.php:72
‪TYPO3\CMS\Core\Resource\Exception
Definition: Exception.php:21
‪TYPO3\CMS\Core\Resource\Index\Indexer\processChangedAndNewFiles
‪processChangedAndNewFiles()
Definition: Indexer.php:235
‪TYPO3\CMS\Core\Resource\ResourceStorage
Definition: ResourceStorage.php:125
‪TYPO3\CMS\Core\Resource\AbstractFile\FILETYPE_APPLICATION
‪const FILETYPE_APPLICATION
Definition: AbstractFile.php:96
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\markFileAsMissing
‪markFileAsMissing($fileUid)
Definition: FileIndexRepository.php:502
‪TYPO3\CMS\Core\Resource\File\updateProperties
‪updateProperties(array $properties)
Definition: File.php:166
‪TYPO3\CMS\Core\Resource\ResourceFactory\getFileObject
‪File getFileObject($uid, array $fileData=[])
Definition: ResourceFactory.php:209
‪TYPO3\CMS\Core\Resource\Index\Indexer\$identifiedFileUids
‪int[] $identifiedFileUids
Definition: Indexer.php:43
‪TYPO3\CMS\Core\Resource\Index\Indexer\$extractorService
‪ExtractorService $extractorService
Definition: Indexer.php:51
‪TYPO3\CMS\Core\Utility\GeneralUtility
Definition: GeneralUtility.php:50
‪TYPO3\CMS\Core\Resource\Index\Indexer\gatherFileInformationArray
‪array gatherFileInformationArray($identifier)
Definition: Indexer.php:320
‪TYPO3\CMS\Core\Resource\Exception\InvalidHashException
Definition: InvalidHashException.php:26
‪TYPO3\CMS\Core\Resource\Index\Indexer\runMetaDataExtraction
‪runMetaDataExtraction($maximumFileCount=-1)
Definition: Indexer.php:130
‪TYPO3\CMS\Core\Resource\Index\Indexer\getResourceFactory
‪ResourceFactory getResourceFactory()
Definition: Indexer.php:420
‪TYPO3\CMS\Core\Resource\Index\FileIndexRepository\findOneByStorageAndIdentifier
‪array bool findOneByStorageAndIdentifier(ResourceStorage $storage, $identifier)
Definition: FileIndexRepository.php:136