TYPO3 CMS  TYPO3_8-7
LostFilesCommand.php
Go to the documentation of this file.
1 <?php
2 declare(strict_types = 1);
4 
5 /*
6  * This file is part of the TYPO3 CMS project.
7  *
8  * It is free software; you can redistribute it and/or modify it under
9  * the terms of the GNU General Public License, either version 2
10  * of the License, or any later version.
11  *
12  * For the full copyright and license information, please read the
13  * LICENSE.txt file that was distributed with this source code.
14  *
15  * The TYPO3 project - inspiring people to share!
16  */
17 
27 
31 class LostFilesCommand extends Command
32 {
33 
37  public function configure()
38  {
39  $this
40  ->setDescription('Looking for files in the uploads/ folder which does not have a reference in TYPO3 managed records.')
41  ->setHelp('
42 Assumptions:
43 - a perfect integrity of the reference index table (always update the reference index table before using this tool!)
44 - that all contents in the uploads folder are files attached to TCA records and exclusively managed by DataHandler through "group" type fields
45 - index.html, .htaccess files and RTEmagic* image files (ignored)
46 - Files found in deleted records are included (otherwise you would see a false list of lost files)
47 
48 The assumptions are not requirements by the TYPO3 API but reflects the de facto implementation of most TYPO3 installations and therefore a practical approach to cleaning up the uploads/ or costum folder.
49 Therefore, if all "group" type fields in TCA and flexforms are positioned inside the uploads/ folder and if no files inside are managed manually it should be safe to clean out files with no relations found in the system.
50 Under such circumstances there should theoretically be no lost files in the uploads/ or custom folder since DataHandler should have managed relations automatically including adding and deleting files.
51 However, there is at least one reason known to why files might be found lost and that is when FlexForms are used. In such a case a change of/in the Data Structure XML (or the ability of the system to find the Data Structure definition!) used for the flexform could leave lost files behind. This is not unlikely to happen when records are deleted. More details can be found in a note to the function FlexFormTools->getDataStructureIdentifier()
52 Another scenario could of course be de-installation of extensions which managed files in the uploads/ or custom folders.
53 
54 If the option "--dry-run" is not set, the files are then deleted automatically.
55 Warning: First, make sure those files are not used somewhere TYPO3 does not know about! See the assumptions above.
56 
57 If you want to get more detailed information, use the --verbose option.')
58  ->addOption(
59  'exclude',
60  null,
61  InputOption::VALUE_REQUIRED,
62  'Comma-separated list of paths that should be excluded, e.g. "uploads/pics,uploads/media"'
63  )
64  ->addOption(
65  'dry-run',
66  null,
67  InputOption::VALUE_NONE,
68  'If this option is set, the files will not actually be deleted, but just the output which files would be deleted are shown'
69  )
70  ->addOption(
71  'update-refindex',
72  null,
73  InputOption::VALUE_NONE,
74  'Setting this option automatically updates the reference index and does not ask on command line. Alternatively, use -n to avoid the interactive mode'
75  )
76  ->addOption(
77  'custom-path',
78  null,
79  InputOption::VALUE_REQUIRED,
80  'Comma separated list of paths to process. Example: "fileadmin/[path1],fileadmin/[path2],...", if not passed, uploads/ will be used by default.'
81  );
82  }
83 
93  protected function execute(InputInterface $input, OutputInterface $output)
94  {
95  // Make sure the _cli_ user is loaded
96  Bootstrap::getInstance()->initializeBackendAuthentication();
97 
98  $io = new SymfonyStyle($input, $output);
99  $io->title($this->getDescription());
100 
101  $dryRun = $input->hasOption('dry-run') && $input->getOption('dry-run') != false ? true : false;
102 
103  $this->updateReferenceIndex($input, $io);
104 
105  // Find the lost files
106  if ($input->hasOption('exclude') && !empty($input->getOption('exclude'))) {
107  $excludedPaths = GeneralUtility::trimExplode(',', $input->getOption('exclude'), true);
108  } else {
109  $excludedPaths = [];
110  }
111 
112  // Use custom-path
113  $customPaths = '';
114  if ($input->hasOption('custom-path') && !empty($input->getOption('custom-path'))) {
115  $customPaths = $input->getOption('custom-path');
116  }
117 
118  $lostFiles = $this->findLostFiles($excludedPaths, $customPaths);
119 
120  if (count($lostFiles)) {
121  if (!$io->isQuiet()) {
122  $io->note('Found ' . count($lostFiles) . ' lost files, ready to be deleted.');
123  if ($io->isVerbose()) {
124  $io->listing($lostFiles);
125  }
126  }
127 
128  // Delete them
129  $this->deleteLostFiles($lostFiles, $dryRun, $io);
130 
131  $io->success('Deleted ' . count($lostFiles) . ' lost files.');
132  } else {
133  $io->success('Nothing to do, no lost files found');
134  }
135  }
136 
146  protected function updateReferenceIndex(InputInterface $input, SymfonyStyle $io)
147  {
148  // Check for reference index to update
149  $io->note('Finding lost files managed by TYPO3 requires a clean reference index (sys_refindex)');
150  $updateReferenceIndex = false;
151  if ($input->hasOption('update-refindex') && $input->getOption('update-refindex')) {
152  $updateReferenceIndex = true;
153  } elseif ($input->isInteractive()) {
154  $updateReferenceIndex = $io->confirm('Should the reference index be updated right now?', false);
155  }
156 
157  // Update the reference index
158  if ($updateReferenceIndex) {
159  $referenceIndex = GeneralUtility::makeInstance(ReferenceIndex::class);
160  $referenceIndex->updateIndex(false, !$io->isQuiet());
161  } else {
162  $io->writeln('Reference index is assumed to be up to date, continuing.');
163  }
164  }
165 
173  protected function findLostFiles($excludedPaths = [], $customPaths = ''): array
174  {
175  $lostFiles = [];
176 
177  // Get all files
178  $files = [];
179  if (!empty($customPaths)) {
180  $customPaths = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $customPaths, true);
181  foreach ($customPaths as $customPath) {
182  if (false === realpath(PATH_site . $customPath)
183  || !GeneralUtility::isFirstPartOfStr(realpath(PATH_site . $customPath), realpath(PATH_site))) {
184  throw new \Exception('The path: "' . $customPath . '" is invalid', 1450086736);
185  }
186  $files = GeneralUtility::getAllFilesAndFoldersInPath($files, PATH_site . $customPath);
187  }
188  } else {
189  $files = GeneralUtility::getAllFilesAndFoldersInPath($files, PATH_site . 'uploads/');
190  }
191 
192  $files = GeneralUtility::removePrefixPathFromList($files, PATH_site);
193 
194  $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
195  ->getQueryBuilderForTable('sys_refindex');
196 
197  // Traverse files and for each, look up if its found in the reference index.
198  foreach ($files as $key => $value) {
199 
200  // First, allow "index.html", ".htaccess" files since they are often used for good reasons
201  if (substr($value, -11) === '/index.html' || substr($value, -10) === '/.htaccess') {
202  continue;
203  }
204 
205  // If the file is a RTEmagic-image name and if so, we allow it
206  if (preg_match('/^RTEmagic[P|C]_/', basename($value))) {
207  continue;
208  }
209 
210  $fileIsInExcludedPath = false;
211  foreach ($excludedPaths as $exclPath) {
212  if (GeneralUtility::isFirstPartOfStr($value, $exclPath)) {
213  $fileIsInExcludedPath = true;
214  break;
215  }
216  }
217 
218  if ($fileIsInExcludedPath) {
219  continue;
220  }
221 
222  // Looking for a reference from a field which is NOT a soft reference (thus, only fields with a proper TCA/Flexform configuration)
223  $result = $queryBuilder
224  ->select('hash')
225  ->from('sys_refindex')
226  ->where(
227  $queryBuilder->expr()->eq(
228  'ref_table',
229  $queryBuilder->createNamedParameter('_FILE', \PDO::PARAM_STR)
230  ),
231  $queryBuilder->expr()->eq(
232  'ref_string',
233  $queryBuilder->createNamedParameter($value, \PDO::PARAM_STR)
234  ),
235  $queryBuilder->expr()->eq(
236  'softref_key',
237  $queryBuilder->createNamedParameter('', \PDO::PARAM_STR)
238  )
239  )
240  ->orderBy('sorting', 'DESC')
241  ->execute();
242 
243  // We conclude that the file is lost
244  if ($result->rowCount() === 0) {
245  $lostFiles[] = $value;
246  }
247  }
248 
249  return $lostFiles;
250  }
251 
259  protected function deleteLostFiles(array $lostFiles, bool $dryRun, SymfonyStyle $io)
260  {
261  foreach ($lostFiles as $lostFile) {
262  $absoluteFileName = GeneralUtility::getFileAbsFileName($lostFile);
263  if ($io->isVeryVerbose()) {
264  $io->writeln('Deleting file "' . $absoluteFileName . '"');
265  }
266  if (!$dryRun) {
267  if ($absoluteFileName && @is_file($absoluteFileName)) {
268  unlink($absoluteFileName);
269  if (!$io->isQuiet()) {
270  $io->writeln('Permanently deleted file record "' . $absoluteFileName . '".');
271  }
272  } else {
273  $io->error('File "' . $absoluteFileName . '" was not found!');
274  }
275  }
276  }
277  }
278 }
updateReferenceIndex(InputInterface $input, SymfonyStyle $io)
static isFirstPartOfStr($str, $partStr)
deleteLostFiles(array $lostFiles, bool $dryRun, SymfonyStyle $io)
static getFileAbsFileName($filename, $_=null, $_2=null)
execute(InputInterface $input, OutputInterface $output)
static trimExplode($delim, $string, $removeEmptyValues=false, $limit=0)
static makeInstance($className,... $constructorArguments)
static getAllFilesAndFoldersInPath(array $fileArr, $path, $extList='', $regDirs=false, $recursivityLevels=99, $excludePattern='')
findLostFiles($excludedPaths=[], $customPaths='')
static removePrefixPathFromList(array $fileArr, $prefixToRemove)