TYPO3 CMS  TYPO3_8-7
Indexer.php
Go to the documentation of this file.
1 <?php
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
25 
29 class Indexer
30 {
34  protected $filesToUpdate = [];
35 
39  protected $identifiedFileUids = [];
40 
44  protected $storage = null;
45 
49  protected $extractionServices = null;
50 
55  {
56  $this->storage = $storage;
57  }
58 
66  public function createIndexEntry($identifier)
67  {
68  if (!isset($identifier) || !is_string($identifier) || $identifier === '') {
69  throw new \InvalidArgumentException('Invalid file identifier given. It must be of type string and not empty. "' . gettype($identifier) . '" given.', 1401732565);
70  }
71  $fileProperties = $this->gatherFileInformationArray($identifier);
72  $record = $this->getFileIndexRepository()->addRaw($fileProperties);
73  $fileObject = $this->getResourceFactory()->getFileObject($record['uid'], $record);
74  $this->extractRequiredMetaData($fileObject);
75 
76  if ($this->storage->autoExtractMetadataEnabled()) {
77  $this->extractMetaData($fileObject);
78  }
79 
80  return $fileObject;
81  }
82 
88  public function updateIndexEntry(File $fileObject)
89  {
90  $updatedInformation = $this->gatherFileInformationArray($fileObject->getIdentifier());
91  $fileObject->updateProperties($updatedInformation);
92  $this->getFileIndexRepository()->update($fileObject);
93  $this->extractRequiredMetaData($fileObject);
94  }
95 
98  public function processChangesInStorages()
99  {
100  // get all file-identifiers from the storage
101  $availableFiles = $this->storage->getFileIdentifiersInFolder($this->storage->getRootLevelFolder(false)->getIdentifier(), true, true);
102  $this->detectChangedFilesInStorage($availableFiles);
103  $this->processChangedAndNewFiles();
104 
105  $this->detectMissingFiles();
106  }
107 
111  public function runMetaDataExtraction($maximumFileCount = -1)
112  {
113  $fileIndexRecords = $this->getFileIndexRepository()->findInStorageWithIndexOutstanding($this->storage, $maximumFileCount);
114  foreach ($fileIndexRecords as $indexRecord) {
115  $fileObject = $this->getResourceFactory()->getFileObject($indexRecord['uid'], $indexRecord);
116  // Check for existence of file before extraction
117  if ($fileObject->exists()) {
118  try {
119  $this->extractMetaData($fileObject);
121  // We skip files that are not accessible
122  } catch (IllegalFileExtensionException $e) {
123  // We skip files that have an extension that we don't allow
124  }
125  } else {
126  // Mark file as missing and continue with next record
127  $this->getFileIndexRepository()->markFileAsMissing($indexRecord['uid']);
128  }
129  }
130  }
131 
137  public function extractMetaData(File $fileObject)
138  {
139  $newMetaData = [
140  0 => $fileObject->_getMetaData()
141  ];
142 
143  // Loop through available extractors and fetch metadata for the given file.
144  foreach ($this->getExtractionServices() as $service) {
145  if ($this->isFileTypeSupportedByExtractor($fileObject, $service) && $service->canProcess($fileObject)) {
146  $newMetaData[$service->getPriority()] = $service->extractMetaData($fileObject, $newMetaData);
147  }
148  }
149 
150  // Sort metadata by priority so that merging happens in order of precedence.
151  ksort($newMetaData);
152 
153  // Merge the collected metadata.
154  $metaData = [];
155  foreach ($newMetaData as $data) {
156  $metaData = array_merge($metaData, $data);
157  }
158  $fileObject->_updateMetaDataProperties($metaData);
159  $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
160  $this->getFileIndexRepository()->updateIndexingTime($fileObject->getUid());
161  }
162 
168  protected function getExtractionServices()
169  {
170  if ($this->extractionServices === null) {
171  $this->extractionServices = $this->getExtractorRegistry()->getExtractorsWithDriverSupport($this->storage->getDriverType());
172  }
174  }
175 
180  protected function detectMissingFiles()
181  {
182  $indexedNotExistentFiles = $this->getFileIndexRepository()->findInStorageAndNotInUidList(
183  $this->storage,
184  $this->identifiedFileUids
185  );
186 
187  foreach ($indexedNotExistentFiles as $record) {
188  if (!$this->storage->hasFile($record['identifier'])) {
189  $this->getFileIndexRepository()->markFileAsMissing($record['uid']);
190  }
191  }
192  }
193 
201  protected function isFileTypeSupportedByExtractor(File $file, ExtractorInterface $extractor)
202  {
203  $isSupported = true;
204  $fileTypeRestrictions = $extractor->getFileTypeRestrictions();
205  if (!empty($fileTypeRestrictions) && !in_array($file->getType(), $fileTypeRestrictions)) {
206  $isSupported = false;
207  }
208  return $isSupported;
209  }
210 
216  protected function detectChangedFilesInStorage(array $fileIdentifierArray)
217  {
218  foreach ($fileIdentifierArray as $fileIdentifier) {
219  // skip processed files
220  if ($this->storage->isWithinProcessingFolder($fileIdentifier)) {
221  continue;
222  }
223  // Get the modification time for file-identifier from the storage
224  $modificationTime = $this->storage->getFileInfoByIdentifier($fileIdentifier, ['mtime']);
225  // Look if the the modification time in FS is higher than the one in database (key needed on timestamps)
226  $indexRecord = $this->getFileIndexRepository()->findOneByStorageUidAndIdentifier($this->storage->getUid(), $fileIdentifier);
227 
228  if ($indexRecord !== false) {
229  $this->identifiedFileUids[] = $indexRecord['uid'];
230 
231  if ((int)$indexRecord['modification_date'] !== $modificationTime['mtime'] || $indexRecord['missing']) {
232  $this->filesToUpdate[$fileIdentifier] = $indexRecord;
233  }
234  } else {
235  $this->filesToUpdate[$fileIdentifier] = null;
236  }
237  }
238  }
239 
244  protected function processChangedAndNewFiles()
245  {
246  foreach ($this->filesToUpdate as $identifier => $data) {
247  if ($data == null) {
248  // search for files with same content hash in indexed storage
249  $fileHash = $this->storage->hashFileByIdentifier($identifier, 'sha1');
250  $files = $this->getFileIndexRepository()->findByContentHash($fileHash);
251  $fileObject = null;
252  if (!empty($files)) {
253  foreach ($files as $fileIndexEntry) {
254  // check if file is missing then we assume it's moved/renamed
255  if (!$this->storage->hasFile($fileIndexEntry['identifier'])) {
256  $fileObject = $this->getResourceFactory()->getFileObject($fileIndexEntry['uid'], $fileIndexEntry);
257  $fileObject->updateProperties([
258  'identifier' => $identifier
259  ]);
260  $this->updateIndexEntry($fileObject);
261  $this->identifiedFileUids[] = $fileObject->getUid();
262  break;
263  }
264  }
265  }
266  // create new index when no missing file with same content hash is found
267  if ($fileObject === null) {
268  $fileObject = $this->createIndexEntry($identifier);
269  $this->identifiedFileUids[] = $fileObject->getUid();
270  }
271  } else {
272  // update existing file
273  $fileObject = $this->getResourceFactory()->getFileObject($data['uid'], $data);
274  $this->updateIndexEntry($fileObject);
275  }
276  }
277  }
278 
285  protected function extractRequiredMetaData(File $fileObject)
286  {
287  // since the core desperately needs image sizes in metadata table do this manually
288  // prevent doing this for remote storages, remote storages must provide the data with extractors
289  if ($fileObject->getType() == File::FILETYPE_IMAGE && $this->storage->getDriverType() === 'Local') {
290  $rawFileLocation = $fileObject->getForLocalProcessing(false);
291  $imageInfo = GeneralUtility::makeInstance(ImageInfo::class, $rawFileLocation);
292  $metaData = [
293  'width' => $imageInfo->getWidth(),
294  'height' => $imageInfo->getHeight(),
295  ];
296  $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
297  $fileObject->_updateMetaDataProperties($metaData);
298  }
299  }
300 
301  /****************************
302  *
303  * UTILITY
304  *
305  ****************************/
306 
313  protected function gatherFileInformationArray($identifier)
314  {
315  $fileInfo = $this->storage->getFileInfoByIdentifier($identifier);
316  $fileInfo = $this->transformFromDriverFileInfoArrayToFileObjectFormat($fileInfo);
317  $fileInfo['type'] = $this->getFileType($fileInfo['mime_type']);
318  $fileInfo['sha1'] = $this->storage->hashFileByIdentifier($identifier, 'sha1');
319  if (!isset($fileInfo['extension'])) {
320  $fileInfo['extension'] = PathUtility::pathinfo($fileInfo['name'], PATHINFO_EXTENSION);
321  }
322  $fileInfo['missing'] = 0;
323 
324  return $fileInfo;
325  }
326 
333  protected function getFileType($mimeType)
334  {
335  list($fileType) = explode('/', $mimeType);
336  switch (strtolower($fileType)) {
337  case 'text':
338  $type = File::FILETYPE_TEXT;
339  break;
340  case 'image':
341  $type = File::FILETYPE_IMAGE;
342  break;
343  case 'audio':
344  $type = File::FILETYPE_AUDIO;
345  break;
346  case 'video':
347  $type = File::FILETYPE_VIDEO;
348  break;
349  case 'application':
350  case 'software':
352  break;
353  default:
354  $type = File::FILETYPE_UNKNOWN;
355  }
356  return $type;
357  }
358 
369  protected function transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo)
370  {
371  $mappingInfo = [
372  // 'driverKey' => 'fileProperty' Key is from the driver, value is for the property in the file
373  'size' => 'size',
374  'atime' => null,
375  'mtime' => 'modification_date',
376  'ctime' => 'creation_date',
377  'mimetype' => 'mime_type'
378  ];
379  $mappedFileInfo = [];
380  foreach ($fileInfo as $key => $value) {
381  if (array_key_exists($key, $mappingInfo)) {
382  if ($mappingInfo[$key] !== null) {
383  $mappedFileInfo[$mappingInfo[$key]] = $value;
384  }
385  } else {
386  $mappedFileInfo[$key] = $value;
387  }
388  }
389  return $mappedFileInfo;
390  }
391 
397  protected function getFileIndexRepository()
398  {
400  }
401 
407  protected function getMetaDataRepository()
408  {
410  }
411 
417  protected function getResourceFactory()
418  {
420  }
421 
427  protected function getExtractorRegistry()
428  {
430  }
431 }
isFileTypeSupportedByExtractor(File $file, ExtractorInterface $extractor)
Definition: Indexer.php:201
__construct(ResourceStorage $storage)
Definition: Indexer.php:54
transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo)
Definition: Indexer.php:369
_updateMetaDataProperties(array $properties)
Definition: File.php:242
updateProperties(array $properties)
Definition: File.php:201
static makeInstance($className,... $constructorArguments)
static pathinfo($path, $options=null)
detectChangedFilesInStorage(array $fileIdentifierArray)
Definition: Indexer.php:216
runMetaDataExtraction($maximumFileCount=-1)
Definition: Indexer.php:111
updateIndexEntry(File $fileObject)
Definition: Indexer.php:88
extractRequiredMetaData(File $fileObject)
Definition: Indexer.php:285