TYPO3 CMS  TYPO3_6-2
Indexer.php
Go to the documentation of this file.
1 <?php
3 
20 
24 class Indexer {
25 
29  protected $filesToUpdate = array();
30 
34  protected $identifiedFileUids = array();
35 
39  protected $storage = NULL;
40 
45  $this->storage = $storage;
46  }
47 
55  public function createIndexEntry($identifier) {
56  if (!isset($identifier) || !is_string($identifier) || $identifier === '') {
57  throw new \InvalidArgumentException('Invalid file identifier given. It must be of type string and not empty. "' . gettype($identifier) . '" given.', 1401732565);
58  }
59  $fileProperties = $this->gatherFileInformationArray($identifier);
60  $record = $this->getFileIndexRepository()->addRaw($fileProperties);
61  $fileObject = $this->getResourceFactory()->getFileObject($record['uid'], $record);
62  $this->extractRequiredMetaData($fileObject);
63  return $fileObject;
64  }
65 
72  public function updateIndexEntry(File $fileObject) {
73  $updatedInformation = $this->gatherFileInformationArray($fileObject->getIdentifier());
74  $fileObject->updateProperties($updatedInformation);
75  $this->getFileIndexRepository()->update($fileObject);
76  $this->extractRequiredMetaData($fileObject);
77  }
78 
82  public function processChangesInStorages() {
83  // get all file-identifiers from the storage
84  $availableFiles = $this->storage->getFileIdentifiersInFolder($this->storage->getRootLevelFolder(FALSE)->getIdentifier(), TRUE, TRUE);
85  $this->detectChangedFilesInStorage($availableFiles);
87 
88  $this->detectMissingFiles();
89  }
90 
95  public function runMetaDataExtraction($maximumFileCount = -1) {
96  $fileIndexRecords = $this->getFileIndexRepository()->findInStorageWithIndexOutstanding($this->storage, $maximumFileCount);
97 
98  $extractionServices = $this->getExtractorRegistry()->getExtractorsWithDriverSupport($this->storage->getDriverType());
99  foreach ($fileIndexRecords as $indexRecord) {
100  $fileObject = $this->getResourceFactory()->getFileObject($indexRecord['uid'], $indexRecord);
101 
102  $newMetaData = array(
103  0 => $fileObject->_getMetaData()
104  );
105  foreach ($extractionServices as $service) {
106  if ($service->canProcess($fileObject)) {
107  $newMetaData[$service->getPriority()] = $service->extractMetaData($fileObject, $newMetaData);
108  }
109  }
110  ksort($newMetaData);
111  $metaData = array();
112  foreach ($newMetaData as $data) {
113  $metaData = array_merge($metaData, $data);
114  }
115  $fileObject->_updateMetaDataProperties($metaData);
116  $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
117  $this->getFileIndexRepository()->updateIndexingTime($fileObject->getUid());
118  }
119  }
120 
125  protected function detectMissingFiles() {
126  $indexedNotExistentFiles = $this->getFileIndexRepository()->findInStorageAndNotInUidList(
127  $this->storage,
128  $this->identifiedFileUids
129  );
130 
131  foreach ($indexedNotExistentFiles as $record) {
132  if (!$this->storage->hasFile($record['identifier'])) {
133  $this->getFileIndexRepository()->markFileAsMissing($record['uid']);
134  }
135  }
136  }
137 
144  protected function detectChangedFilesInStorage(array $fileIdentifierArray) {
145  foreach ($fileIdentifierArray as $fileIdentifier) {
146  // skip processed files
147  if ($this->storage->isWithinProcessingFolder($fileIdentifier)) {
148  continue;
149  }
150  // Get the modification time for file-identifier from the storage
151  $modificationTime = $this->storage->getFileInfoByIdentifier($fileIdentifier, array('mtime'));
152  // Look if the the modification time in FS is higher than the one in database (key needed on timestamps)
153  $indexRecord = $this->getFileIndexRepository()->findOneByStorageUidAndIdentifier($this->storage->getUid(), $fileIdentifier);
154 
155  if ($indexRecord !== FALSE) {
156  $this->identifiedFileUids[] = $indexRecord['uid'];
157 
158  if ($indexRecord['modification_date'] < $modificationTime['mtime'] || $indexRecord['missing']) {
159  $this->filesToUpdate[$fileIdentifier] = $indexRecord;
160  }
161  } else {
162  $this->filesToUpdate[$fileIdentifier] = NULL;
163  }
164  }
165  }
166 
173  protected function processChangedAndNewFiles() {
174  foreach ($this->filesToUpdate AS $identifier => $data) {
175  if ($data == NULL) {
176  // search for files with same content hash in indexed storage
177  $fileHash = $this->storage->hashFileByIdentifier($identifier, 'sha1');
178  $files = $this->getFileIndexRepository()->findByContentHash($fileHash);
179  $fileObject = NULL;
180  if (!empty($files)) {
181  foreach ($files as $fileIndexEntry) {
182  // check if file is missing then we assume it's moved/renamed
183  if (!$this->storage->hasFile($fileIndexEntry['identifier'])) {
184  $fileObject = $this->getResourceFactory()->getFileObject($fileIndexEntry['uid'], $fileIndexEntry);
185  $fileObject->updateProperties(array(
186  'identifier' => $identifier
187  ));
188  $this->updateIndexEntry($fileObject);
189  $this->identifiedFileUids[] = $fileObject->getUid();
190  break;
191  }
192  }
193  }
194  // create new index when no missing file with same content hash is found
195  if ($fileObject === NULL) {
196  $fileObject = $this->createIndexEntry($identifier);
197  $this->identifiedFileUids[] = $fileObject->getUid();
198  }
199  } else {
200  // update existing file
201  $fileObject = $this->getResourceFactory()->getFileObject($data['uid'], $data);
202  $this->updateIndexEntry($fileObject);
203  }
204  }
205  }
206 
213  protected function extractRequiredMetaData(File $fileObject) {
214  // since the core desperately needs image sizes in metadata table do this manually
215  // prevent doing this for remote storages, remote storages must provide the data with extractors
216  if ($fileObject->getType() == File::FILETYPE_IMAGE && $this->storage->getDriverType() === 'Local') {
217  $rawFileLocation = $fileObject->getForLocalProcessing(FALSE);
218  $imageInfo = GeneralUtility::makeInstance('TYPO3\\CMS\\Core\\Type\\File\\ImageInfo', $rawFileLocation);
219  $metaData = array(
220  'width' => $imageInfo->getWidth(),
221  'height' => $imageInfo->getHeight(),
222  );
223  $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
224  $fileObject->_updateMetaDataProperties($metaData);
225  }
226  }
227 
228  /****************************
229  *
230  * UTILITY
231  *
232  ****************************/
233 
240  protected function gatherFileInformationArray($identifier) {
241  $fileInfo = $this->storage->getFileInfoByIdentifier($identifier);
242  $fileInfo = $this->transformFromDriverFileInfoArrayToFileObjectFormat($fileInfo);
243  $fileInfo['type'] = $this->getFileType($fileInfo['mime_type']);
244  $fileInfo['sha1'] = $this->storage->hashFileByIdentifier($identifier, 'sha1');
245  $fileInfo['extension'] = \TYPO3\CMS\Core\Utility\PathUtility::pathinfo($fileInfo['name'], PATHINFO_EXTENSION);
246  $fileInfo['missing'] = 0;
247 
248  return $fileInfo;
249  }
250 
257  protected function getFileType($mimeType) {
258  list($fileType) = explode('/', $mimeType);
259  switch (strtolower($fileType)) {
260  case 'text':
261  $type = File::FILETYPE_TEXT;
262  break;
263  case 'image':
264  $type = File::FILETYPE_IMAGE;
265  break;
266  case 'audio':
267  $type = File::FILETYPE_AUDIO;
268  break;
269  case 'video':
270  $type = File::FILETYPE_VIDEO;
271  break;
272  case 'application':
273  case 'software':
275  break;
276  default:
277  $type = File::FILETYPE_UNKNOWN;
278  }
279  return $type;
280  }
281 
292  protected function transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo) {
293  $mappingInfo = array(
294  // 'driverKey' => 'fileProperty' Key is from the driver, value is for the property in the file
295  'size' => 'size',
296  'atime' => NULL,
297  'mtime' => 'modification_date',
298  'ctime' => 'creation_date',
299  'mimetype' => 'mime_type'
300  );
301  $mappedFileInfo = array();
302  foreach ($fileInfo as $key => $value) {
303  if (array_key_exists($key, $mappingInfo)) {
304  if ($mappingInfo[$key] !== NULL) {
305  $mappedFileInfo[$mappingInfo[$key]] = $value;
306  }
307  } else {
308  $mappedFileInfo[$key] = $value;
309  }
310  }
311  return $mappedFileInfo;
312  }
313 
314 
320  protected function getFileIndexRepository() {
322  }
323 
329  protected function getMetaDataRepository() {
331  }
332 
338  protected function getResourceFactory() {
339  return \TYPO3\CMS\Core\Resource\ResourceFactory::getInstance();
340  }
341 
347  protected function getExtractorRegistry() {
349  }
350 }
__construct(ResourceStorage $storage)
Definition: Indexer.php:44
transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo)
Definition: Indexer.php:292
_updateMetaDataProperties(array $properties)
Definition: File.php:251
updateProperties(array $properties)
Definition: File.php:210
static pathinfo($path, $options=NULL)
detectChangedFilesInStorage(array $fileIdentifierArray)
Definition: Indexer.php:144
runMetaDataExtraction($maximumFileCount=-1)
Definition: Indexer.php:95
updateIndexEntry(File $fileObject)
Definition: Indexer.php:72
extractRequiredMetaData(File $fileObject)
Definition: Indexer.php:213