TYPO3 CMS  TYPO3_7-6
Indexer.php
Go to the documentation of this file.
1 <?php
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
23 
27 class Indexer
28 {
32  protected $filesToUpdate = [];
33 
37  protected $identifiedFileUids = [];
38 
42  protected $storage = null;
43 
47  protected $extractionServices = null;
48 
53  {
54  $this->storage = $storage;
55  }
56 
64  public function createIndexEntry($identifier)
65  {
66  if (!isset($identifier) || !is_string($identifier) || $identifier === '') {
67  throw new \InvalidArgumentException('Invalid file identifier given. It must be of type string and not empty. "' . gettype($identifier) . '" given.', 1401732565);
68  }
69  $fileProperties = $this->gatherFileInformationArray($identifier);
70  $record = $this->getFileIndexRepository()->addRaw($fileProperties);
71  $fileObject = $this->getResourceFactory()->getFileObject($record['uid'], $record);
72  $this->extractRequiredMetaData($fileObject);
73  return $fileObject;
74  }
75 
82  public function updateIndexEntry(File $fileObject)
83  {
84  $updatedInformation = $this->gatherFileInformationArray($fileObject->getIdentifier());
85  $fileObject->updateProperties($updatedInformation);
86  $this->getFileIndexRepository()->update($fileObject);
87  $this->extractRequiredMetaData($fileObject);
88  }
89 
93  public function processChangesInStorages()
94  {
95  // get all file-identifiers from the storage
96  $availableFiles = $this->storage->getFileIdentifiersInFolder($this->storage->getRootLevelFolder(false)->getIdentifier(), true, true);
97  $this->detectChangedFilesInStorage($availableFiles);
99 
100  $this->detectMissingFiles();
101  }
102 
107  public function runMetaDataExtraction($maximumFileCount = -1)
108  {
109  $fileIndexRecords = $this->getFileIndexRepository()->findInStorageWithIndexOutstanding($this->storage, $maximumFileCount);
110  foreach ($fileIndexRecords as $indexRecord) {
111  $fileObject = $this->getResourceFactory()->getFileObject($indexRecord['uid'], $indexRecord);
112 
113  // Check for existence of file before extraction
114  if ($fileObject->exists()) {
115  try {
116  $this->extractMetaData($fileObject);
118  // We skip files that are not accessible
119  } catch (IllegalFileExtensionException $e) {
120  // We skip files that have an extension that we don't allow
121  }
122  } else {
123  // Mark file as missing and continue with next record
124  $this->getFileIndexRepository()->markFileAsMissing($indexRecord['uid']);
125  }
126  }
127  }
128 
134  public function extractMetaData(File $fileObject)
135  {
136  $newMetaData = [
137  0 => $fileObject->_getMetaData()
138  ];
139 
140  // Loop through available extractors and fetch metadata for the given file.
141  foreach ($this->getExtractionServices() as $service) {
142  if ($this->isFileTypeSupportedByExtractor($fileObject, $service) && $service->canProcess($fileObject)) {
143  $newMetaData[$service->getPriority()] = $service->extractMetaData($fileObject, $newMetaData);
144  }
145  }
146 
147  // Sort metadata by priority so that merging happens in order of precedence.
148  ksort($newMetaData);
149 
150  // Merge the collected metadata.
151  $metaData = [];
152  foreach ($newMetaData as $data) {
153  $metaData = array_merge($metaData, $data);
154  }
155  $fileObject->_updateMetaDataProperties($metaData);
156  $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
157  $this->getFileIndexRepository()->updateIndexingTime($fileObject->getUid());
158  }
159 
165  protected function getExtractionServices()
166  {
167  if ($this->extractionServices === null) {
168  $this->extractionServices = $this->getExtractorRegistry()->getExtractorsWithDriverSupport($this->storage->getDriverType());
169  }
171  }
172 
177  protected function detectMissingFiles()
178  {
179  $indexedNotExistentFiles = $this->getFileIndexRepository()->findInStorageAndNotInUidList(
180  $this->storage,
181  $this->identifiedFileUids
182  );
183 
184  foreach ($indexedNotExistentFiles as $record) {
185  if (!$this->storage->hasFile($record['identifier'])) {
186  $this->getFileIndexRepository()->markFileAsMissing($record['uid']);
187  }
188  }
189  }
190 
198  protected function isFileTypeSupportedByExtractor(File $file, ExtractorInterface $extractor)
199  {
200  $isSupported = true;
201  $fileTypeRestrictions = $extractor->getFileTypeRestrictions();
202  if (!empty($fileTypeRestrictions) && !in_array($file->getType(), $fileTypeRestrictions)) {
203  $isSupported = false;
204  }
205  return $isSupported;
206  }
207 
214  protected function detectChangedFilesInStorage(array $fileIdentifierArray)
215  {
216  foreach ($fileIdentifierArray as $fileIdentifier) {
217  // skip processed files
218  if ($this->storage->isWithinProcessingFolder($fileIdentifier)) {
219  continue;
220  }
221  // Get the modification time for file-identifier from the storage
222  $modificationTime = $this->storage->getFileInfoByIdentifier($fileIdentifier, ['mtime']);
223  // Look if the the modification time in FS is higher than the one in database (key needed on timestamps)
224  $indexRecord = $this->getFileIndexRepository()->findOneByStorageUidAndIdentifier($this->storage->getUid(), $fileIdentifier);
225 
226  if ($indexRecord !== false) {
227  $this->identifiedFileUids[] = $indexRecord['uid'];
228 
229  if ((int)$indexRecord['modification_date'] !== $modificationTime['mtime'] || $indexRecord['missing']) {
230  $this->filesToUpdate[$fileIdentifier] = $indexRecord;
231  }
232  } else {
233  $this->filesToUpdate[$fileIdentifier] = null;
234  }
235  }
236  }
237 
244  protected function processChangedAndNewFiles()
245  {
246  foreach ($this->filesToUpdate as $identifier => $data) {
247  if ($data == null) {
248  // search for files with same content hash in indexed storage
249  $fileHash = $this->storage->hashFileByIdentifier($identifier, 'sha1');
250  $files = $this->getFileIndexRepository()->findByContentHash($fileHash);
251  $fileObject = null;
252  if (!empty($files)) {
253  foreach ($files as $fileIndexEntry) {
254  // check if file is missing then we assume it's moved/renamed
255  if (!$this->storage->hasFile($fileIndexEntry['identifier'])) {
256  $fileObject = $this->getResourceFactory()->getFileObject($fileIndexEntry['uid'], $fileIndexEntry);
257  $fileObject->updateProperties([
258  'identifier' => $identifier
259  ]);
260  $this->updateIndexEntry($fileObject);
261  $this->identifiedFileUids[] = $fileObject->getUid();
262  break;
263  }
264  }
265  }
266  // create new index when no missing file with same content hash is found
267  if ($fileObject === null) {
268  $fileObject = $this->createIndexEntry($identifier);
269  $this->identifiedFileUids[] = $fileObject->getUid();
270  }
271  } else {
272  // update existing file
273  $fileObject = $this->getResourceFactory()->getFileObject($data['uid'], $data);
274  $this->updateIndexEntry($fileObject);
275  }
276  }
277  }
278 
285  protected function extractRequiredMetaData(File $fileObject)
286  {
287  // since the core desperately needs image sizes in metadata table do this manually
288  // prevent doing this for remote storages, remote storages must provide the data with extractors
289  if ($fileObject->getType() == File::FILETYPE_IMAGE && $this->storage->getDriverType() === 'Local') {
290  $rawFileLocation = $fileObject->getForLocalProcessing(false);
291  $imageInfo = GeneralUtility::makeInstance(ImageInfo::class, $rawFileLocation);
292  $metaData = [
293  'width' => $imageInfo->getWidth(),
294  'height' => $imageInfo->getHeight(),
295  ];
296  $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
297  $fileObject->_updateMetaDataProperties($metaData);
298  }
299  }
300 
301  /****************************
302  *
303  * UTILITY
304  *
305  ****************************/
306 
313  protected function gatherFileInformationArray($identifier)
314  {
315  $fileInfo = $this->storage->getFileInfoByIdentifier($identifier);
316  $fileInfo = $this->transformFromDriverFileInfoArrayToFileObjectFormat($fileInfo);
317  $fileInfo['type'] = $this->getFileType($fileInfo['mime_type']);
318  $fileInfo['sha1'] = $this->storage->hashFileByIdentifier($identifier, 'sha1');
319  $fileInfo['extension'] = \TYPO3\CMS\Core\Utility\PathUtility::pathinfo($fileInfo['name'], PATHINFO_EXTENSION);
320  $fileInfo['missing'] = 0;
321 
322  return $fileInfo;
323  }
324 
331  protected function getFileType($mimeType)
332  {
333  list($fileType) = explode('/', $mimeType);
334  switch (strtolower($fileType)) {
335  case 'text':
336  $type = File::FILETYPE_TEXT;
337  break;
338  case 'image':
339  $type = File::FILETYPE_IMAGE;
340  break;
341  case 'audio':
342  $type = File::FILETYPE_AUDIO;
343  break;
344  case 'video':
345  $type = File::FILETYPE_VIDEO;
346  break;
347  case 'application':
348  case 'software':
350  break;
351  default:
352  $type = File::FILETYPE_UNKNOWN;
353  }
354  return $type;
355  }
356 
367  protected function transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo)
368  {
369  $mappingInfo = [
370  // 'driverKey' => 'fileProperty' Key is from the driver, value is for the property in the file
371  'size' => 'size',
372  'atime' => null,
373  'mtime' => 'modification_date',
374  'ctime' => 'creation_date',
375  'mimetype' => 'mime_type'
376  ];
377  $mappedFileInfo = [];
378  foreach ($fileInfo as $key => $value) {
379  if (array_key_exists($key, $mappingInfo)) {
380  if ($mappingInfo[$key] !== null) {
381  $mappedFileInfo[$mappingInfo[$key]] = $value;
382  }
383  } else {
384  $mappedFileInfo[$key] = $value;
385  }
386  }
387  return $mappedFileInfo;
388  }
389 
395  protected function getFileIndexRepository()
396  {
398  }
399 
405  protected function getMetaDataRepository()
406  {
408  }
409 
415  protected function getResourceFactory()
416  {
417  return \TYPO3\CMS\Core\Resource\ResourceFactory::getInstance();
418  }
419 
425  protected function getExtractorRegistry()
426  {
428  }
429 }
isFileTypeSupportedByExtractor(File $file, ExtractorInterface $extractor)
Definition: Indexer.php:198
__construct(ResourceStorage $storage)
Definition: Indexer.php:52
transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo)
Definition: Indexer.php:367
_updateMetaDataProperties(array $properties)
Definition: File.php:247
updateProperties(array $properties)
Definition: File.php:205
static pathinfo($path, $options=null)
detectChangedFilesInStorage(array $fileIdentifierArray)
Definition: Indexer.php:214
runMetaDataExtraction($maximumFileCount=-1)
Definition: Indexer.php:107
updateIndexEntry(File $fileObject)
Definition: Indexer.php:82
extractRequiredMetaData(File $fileObject)
Definition: Indexer.php:285