TYPO3CMS  8
 All Classes Namespaces Files Functions Variables Pages
core/Classes/Resource/Index/Indexer.php
Go to the documentation of this file.
1 <?php
2 namespace TYPO3\CMS\Core\Resource\Index;
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
23 
27 class Indexer
28 {
32  protected $filesToUpdate = [];
33 
37  protected $identifiedFileUids = [];
38 
42  protected $storage = null;
43 
47  protected $extractionServices = null;
48 
53  {
54  $this->storage = $storage;
55  }
56 
64  public function createIndexEntry($identifier)
65  {
66  if (!isset($identifier) || !is_string($identifier) || $identifier === '') {
67  throw new \InvalidArgumentException('Invalid file identifier given. It must be of type string and not empty. "' . gettype($identifier) . '" given.', 1401732565);
68  }
69  $fileProperties = $this->gatherFileInformationArray($identifier);
70  $record = $this->getFileIndexRepository()->addRaw($fileProperties);
71  $fileObject = $this->getResourceFactory()->getFileObject($record['uid'], $record);
72  $this->extractRequiredMetaData($fileObject);
73  return $fileObject;
74  }
75 
82  public function updateIndexEntry(File $fileObject)
83  {
84  $updatedInformation = $this->gatherFileInformationArray($fileObject->getIdentifier());
85  $fileObject->updateProperties($updatedInformation);
86  $this->getFileIndexRepository()->update($fileObject);
87  $this->extractRequiredMetaData($fileObject);
88  }
89 
93  public function processChangesInStorages()
94  {
95  // get all file-identifiers from the storage
96  $availableFiles = $this->storage->getFileIdentifiersInFolder($this->storage->getRootLevelFolder(false)->getIdentifier(), true, true);
97  $this->detectChangedFilesInStorage($availableFiles);
99 
100  $this->detectMissingFiles();
101  }
102 
107  public function runMetaDataExtraction($maximumFileCount = -1)
108  {
109  $fileIndexRecords = $this->getFileIndexRepository()->findInStorageWithIndexOutstanding($this->storage, $maximumFileCount);
110  foreach ($fileIndexRecords as $indexRecord) {
111  $fileObject = $this->getResourceFactory()->getFileObject($indexRecord['uid'], $indexRecord);
112  $this->extractMetaData($fileObject);
113  }
114  }
115 
121  public function extractMetaData(File $fileObject)
122  {
123  $newMetaData = [
124  0 => $fileObject->_getMetaData()
125  ];
126 
127  // Loop through available extractors and fetch metadata for the given file.
128  foreach ($this->getExtractionServices() as $service) {
129  if ($this->isFileTypeSupportedByExtractor($fileObject, $service) && $service->canProcess($fileObject)) {
130  $newMetaData[$service->getPriority()] = $service->extractMetaData($fileObject, $newMetaData);
131  }
132  }
133 
134  // Sort metadata by priority so that merging happens in order of precedence.
135  ksort($newMetaData);
136 
137  // Merge the collected metadata.
138  $metaData = [];
139  foreach ($newMetaData as $data) {
140  $metaData = array_merge($metaData, $data);
141  }
142  $fileObject->_updateMetaDataProperties($metaData);
143  $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
144  $this->getFileIndexRepository()->updateIndexingTime($fileObject->getUid());
145  }
146 
152  protected function getExtractionServices()
153  {
154  if ($this->extractionServices === null) {
155  $this->extractionServices = $this->getExtractorRegistry()->getExtractorsWithDriverSupport($this->storage->getDriverType());
156  }
158  }
159 
164  protected function detectMissingFiles()
165  {
166  $indexedNotExistentFiles = $this->getFileIndexRepository()->findInStorageAndNotInUidList(
167  $this->storage,
168  $this->identifiedFileUids
169  );
170 
171  foreach ($indexedNotExistentFiles as $record) {
172  if (!$this->storage->hasFile($record['identifier'])) {
173  $this->getFileIndexRepository()->markFileAsMissing($record['uid']);
174  }
175  }
176  }
177 
185  protected function isFileTypeSupportedByExtractor(File $file, ExtractorInterface $extractor)
186  {
187  $isSupported = true;
188  $fileTypeRestrictions = $extractor->getFileTypeRestrictions();
189  if (!empty($fileTypeRestrictions) && !in_array($file->getType(), $fileTypeRestrictions)) {
190  $isSupported = false;
191  }
192  return $isSupported;
193  }
194 
201  protected function detectChangedFilesInStorage(array $fileIdentifierArray)
202  {
203  foreach ($fileIdentifierArray as $fileIdentifier) {
204  // skip processed files
205  if ($this->storage->isWithinProcessingFolder($fileIdentifier)) {
206  continue;
207  }
208  // Get the modification time for file-identifier from the storage
209  $modificationTime = $this->storage->getFileInfoByIdentifier($fileIdentifier, ['mtime']);
210  // Look if the the modification time in FS is higher than the one in database (key needed on timestamps)
211  $indexRecord = $this->getFileIndexRepository()->findOneByStorageUidAndIdentifier($this->storage->getUid(), $fileIdentifier);
212 
213  if ($indexRecord !== false) {
214  $this->identifiedFileUids[] = $indexRecord['uid'];
215 
216  if ((int)$indexRecord['modification_date'] !== $modificationTime['mtime'] || $indexRecord['missing']) {
217  $this->filesToUpdate[$fileIdentifier] = $indexRecord;
218  }
219  } else {
220  $this->filesToUpdate[$fileIdentifier] = null;
221  }
222  }
223  }
224 
231  protected function processChangedAndNewFiles()
232  {
233  foreach ($this->filesToUpdate as $identifier => $data) {
234  if ($data == null) {
235  // search for files with same content hash in indexed storage
236  $fileHash = $this->storage->hashFileByIdentifier($identifier, 'sha1');
237  $files = $this->getFileIndexRepository()->findByContentHash($fileHash);
238  $fileObject = null;
239  if (!empty($files)) {
240  foreach ($files as $fileIndexEntry) {
241  // check if file is missing then we assume it's moved/renamed
242  if (!$this->storage->hasFile($fileIndexEntry['identifier'])) {
243  $fileObject = $this->getResourceFactory()->getFileObject($fileIndexEntry['uid'], $fileIndexEntry);
244  $fileObject->updateProperties([
245  'identifier' => $identifier
246  ]);
247  $this->updateIndexEntry($fileObject);
248  $this->identifiedFileUids[] = $fileObject->getUid();
249  break;
250  }
251  }
252  }
253  // create new index when no missing file with same content hash is found
254  if ($fileObject === null) {
255  $fileObject = $this->createIndexEntry($identifier);
256  $this->identifiedFileUids[] = $fileObject->getUid();
257  }
258  } else {
259  // update existing file
260  $fileObject = $this->getResourceFactory()->getFileObject($data['uid'], $data);
261  $this->updateIndexEntry($fileObject);
262  }
263  }
264  }
265 
272  protected function extractRequiredMetaData(File $fileObject)
273  {
274  // since the core desperately needs image sizes in metadata table do this manually
275  // prevent doing this for remote storages, remote storages must provide the data with extractors
276  if ($fileObject->getType() == File::FILETYPE_IMAGE && $this->storage->getDriverType() === 'Local') {
277  $rawFileLocation = $fileObject->getForLocalProcessing(false);
278  $imageInfo = GeneralUtility::makeInstance(ImageInfo::class, $rawFileLocation);
279  $metaData = [
280  'width' => $imageInfo->getWidth(),
281  'height' => $imageInfo->getHeight(),
282  ];
283  $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
284  $fileObject->_updateMetaDataProperties($metaData);
285  }
286  }
287 
288  /****************************
289  *
290  * UTILITY
291  *
292  ****************************/
293 
300  protected function gatherFileInformationArray($identifier)
301  {
302  $fileInfo = $this->storage->getFileInfoByIdentifier($identifier);
303  $fileInfo = $this->transformFromDriverFileInfoArrayToFileObjectFormat($fileInfo);
304  $fileInfo['type'] = $this->getFileType($fileInfo['mime_type']);
305  $fileInfo['sha1'] = $this->storage->hashFileByIdentifier($identifier, 'sha1');
306  $fileInfo['extension'] = PathUtility::pathinfo($fileInfo['name'], PATHINFO_EXTENSION);
307  $fileInfo['missing'] = 0;
308 
309  return $fileInfo;
310  }
311 
318  protected function getFileType($mimeType)
319  {
320  list($fileType) = explode('/', $mimeType);
321  switch (strtolower($fileType)) {
322  case 'text':
323  $type = File::FILETYPE_TEXT;
324  break;
325  case 'image':
326  $type = File::FILETYPE_IMAGE;
327  break;
328  case 'audio':
329  $type = File::FILETYPE_AUDIO;
330  break;
331  case 'video':
332  $type = File::FILETYPE_VIDEO;
333  break;
334  case 'application':
335  case 'software':
337  break;
338  default:
339  $type = File::FILETYPE_UNKNOWN;
340  }
341  return $type;
342  }
343 
354  protected function transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo)
355  {
356  $mappingInfo = [
357  // 'driverKey' => 'fileProperty' Key is from the driver, value is for the property in the file
358  'size' => 'size',
359  'atime' => null,
360  'mtime' => 'modification_date',
361  'ctime' => 'creation_date',
362  'mimetype' => 'mime_type'
363  ];
364  $mappedFileInfo = [];
365  foreach ($fileInfo as $key => $value) {
366  if (array_key_exists($key, $mappingInfo)) {
367  if ($mappingInfo[$key] !== null) {
368  $mappedFileInfo[$mappingInfo[$key]] = $value;
369  }
370  } else {
371  $mappedFileInfo[$key] = $value;
372  }
373  }
374  return $mappedFileInfo;
375  }
376 
382  protected function getFileIndexRepository()
383  {
385  }
386 
392  protected function getMetaDataRepository()
393  {
395  }
396 
402  protected function getResourceFactory()
403  {
405  }
406 
412  protected function getExtractorRegistry()
413  {
415  }
416 }
isFileTypeSupportedByExtractor(File $file, ExtractorInterface $extractor)
static pathinfo($path, $options=null)
static makeInstance($className,...$constructorArguments)