xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/index/PendingFileCompleter.java (revision 7d63a44f8b0d8fed25b0d6b3b3476d9eeaa744f1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>.
22  * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
23  */
24 package org.opengrok.indexer.index;
25 
26 import java.io.File;
27 import java.io.IOException;
28 import java.nio.file.DirectoryStream;
29 import java.nio.file.FileAlreadyExistsException;
30 import java.nio.file.FileVisitResult;
31 import java.nio.file.Files;
32 import java.nio.file.Path;
33 import java.nio.file.Paths;
34 import java.nio.file.SimpleFileVisitor;
35 import java.nio.file.StandardCopyOption;
36 import java.nio.file.attribute.BasicFileAttributes;
37 import java.time.Duration;
38 import java.time.Instant;
39 import java.util.Comparator;
40 import java.util.List;
41 import java.util.Map;
42 import java.util.Set;
43 import java.util.TreeSet;
44 import java.util.concurrent.ConcurrentHashMap;
45 import java.util.logging.Level;
46 import java.util.logging.Logger;
47 import java.util.stream.Collectors;
48 
49 import org.opengrok.indexer.logger.LoggerFactory;
50 import org.opengrok.indexer.util.Progress;
51 import org.opengrok.indexer.util.StringUtils;
52 import org.opengrok.indexer.util.TandemPath;
53 
54 /**
55  * Represents a tracker of pending file deletions and renamings that can later
56  * be executed.
57  * <p>
58  * {@link PendingFileCompleter} is not generally thread-safe, as only
59  * {@link #add(org.opengrok.indexer.index.PendingFileRenaming)},
60  * {@link #add(org.opengrok.indexer.index.PendingSymlinkage)} and
61  * {@link #add(org.opengrok.indexer.index.PendingFileDeletion)} are expected
62  * to be run in parallel; these methods are thread-safe w.r.t. underlying data structures.
63  * <p>
64  * No other methods are thread-safe between each other. E.g.,
65  * {@link #complete()} should only be called by a single thread after all
66  * additions of {@link PendingSymlinkage}s, {@link PendingFileDeletion}s, and
67  * {@link PendingFileRenaming}s are indicated.
68  */
69 class PendingFileCompleter {
70 
71     /**
72      * An extension that should be used as the suffix of files for
73      * {@link PendingFileRenaming} actions.
74      * <p>Value is {@code ".org_opengrok"}.
75      */
76     public static final String PENDING_EXTENSION = ".org_opengrok";
77 
78     private static final Logger LOGGER = LoggerFactory.getLogger(PendingFileCompleter.class);
79 
80     private volatile boolean completing;
81 
82     /**
83      * Descending path segment length comparator.
84      */
85     private static final Comparator<File> DESC_PATHLEN_COMPARATOR =
86         (File f1, File f2) -> {
87             String s1 = f1.getAbsolutePath();
88             String s2 = f2.getAbsolutePath();
89             int n1 = countPathSegments(s1);
90             int n2 = countPathSegments(s2);
91             // DESC: s2 no. of segments <=> s1 no. of segments
92             int cmp = Integer.compare(n2, n1);
93             if (cmp != 0) {
94                 return cmp;
95             }
96 
97             // the Comparator must also be "consistent with equals", so check
98             // string contents too when (length)cmp == 0. (ASC: s1 <=> s2.)
99             cmp = s1.compareTo(s2);
100             return cmp;
101     };
102 
103     private final Set<PendingFileDeletion> deletions = ConcurrentHashMap.newKeySet();
104 
105     private final Set<PendingFileRenaming> renames = ConcurrentHashMap.newKeySet();
106 
107     private final Set<PendingSymlinkage> linkages = ConcurrentHashMap.newKeySet();
108 
109     /**
110      * Adds the specified element to this instance's set if it is not already
111      * present.
112      * @param e element to be added to this set
113      * @return {@code true} if this instance's set did not already contain the
114      * specified element
115      * @throws IllegalStateException if {@link #complete()} is running
116      */
add(PendingFileDeletion e)117     public boolean add(PendingFileDeletion e) {
118         if (completing) {
119             throw new IllegalStateException("complete() is running");
120         }
121         return deletions.add(e);
122     }
123 
124     /**
125      * Adds the specified element to this instance's set if it is not already
126      * present.
127      * @param e element to be added to this set
128      * @return {@code true} if this instance's set did not already contain the
129      * specified element
130      * @throws IllegalStateException if {@link #complete()} is running
131      */
add(PendingSymlinkage e)132     public boolean add(PendingSymlinkage e) {
133         if (completing) {
134             throw new IllegalStateException("complete() is running");
135         }
136         return linkages.add(e);
137     }
138 
139     /**
140      * Adds the specified element to this instance's set if it is not already
141      * present, and also remove any pending deletion for the same absolute path.
142      * @param e element to be added to this set
143      * @return {@code true} if this instance's set did not already contain the
144      * specified element
145      * @throws IllegalStateException if {@link #complete()} is running
146      */
add(PendingFileRenaming e)147     public boolean add(PendingFileRenaming e) {
148         if (completing) {
149             throw new IllegalStateException("complete() is running");
150         }
151         boolean rc = renames.add(e);
152         deletions.remove(new PendingFileDeletion(e.getAbsolutePath()));
153         return rc;
154     }
155 
156     /**
157      * Complete all the tracked file operations: first in a stage for pending
158      * deletions, next in a stage for pending renamings, and finally in a stage
159      * for pending symbolic linkages.
160      * <p>
161      * All operations in each stage are tried in parallel, and any failure is
162      * caught and raises an exception (after all items in the stage have been
163      * tried).
164      * <p>
165      * Deletions are tried for each
166      * {@link PendingFileDeletion#getAbsolutePath()}; for a version of the path
167      * with {@link #PENDING_EXTENSION} appended; and also for the path's parent
168      * directory, which does nothing if the directory is not empty.
169      * @return the number of successful operations
170      * @throws java.io.IOException if an I/O error occurs
171      */
complete()172     public int complete() throws IOException {
173         completing = true;
174         try {
175             return completeInner();
176         } finally {
177             completing = false;
178         }
179     }
180 
completeInner()181     private int completeInner() throws IOException {
182         Instant start = Instant.now();
183         int numDeletions = completeDeletions();
184         if (LOGGER.isLoggable(Level.FINE)) {
185             LOGGER.log(Level.FINE, "deleted {0} file(s) (took {1})",
186                     new Object[] {numDeletions, StringUtils.getReadableTime(
187                             Duration.between(start, Instant.now()).toMillis())});
188         }
189 
190         start = Instant.now();
191         int numRenamings = completeRenamings();
192         if (LOGGER.isLoggable(Level.FINE)) {
193             LOGGER.log(Level.FINE, "renamed {0} file(s) (took {1})",
194                     new Object[] {numRenamings, StringUtils.getReadableTime(
195                             Duration.between(start, Instant.now()).toMillis())});
196         }
197 
198         start = Instant.now();
199         int numLinkages = completeLinkages();
200         if (LOGGER.isLoggable(Level.FINE)) {
201             LOGGER.log(Level.FINE, "affirmed links for {0} path(s) (took {1})",
202                     new Object[] {numLinkages, StringUtils.getReadableTime(
203                             Duration.between(start, Instant.now()).toMillis())});
204         }
205 
206         return numDeletions + numRenamings + numLinkages;
207     }
208 
209     /**
210      * Attempts to rename all the tracked elements, catching any failures, and
211      * throwing an exception if any failed.
212      * @return the number of successful renamings
213      */
completeRenamings()214     private int completeRenamings() throws IOException {
215         int numPending = renames.size();
216         int numFailures = 0;
217 
218         if (numPending < 1) {
219             return 0;
220         }
221 
222         List<PendingFileRenamingExec> pendingExecs = renames.
223             parallelStream().map(f ->
224             new PendingFileRenamingExec(f.getTransientPath(),
225                 f.getAbsolutePath())).collect(
226             Collectors.toList());
227         Map<Boolean, List<PendingFileRenamingExec>> bySuccess;
228 
229         try (Progress progress = new Progress(LOGGER, "pending renames", numPending)) {
230             bySuccess = pendingExecs.parallelStream().collect(
231                             Collectors.groupingByConcurrent((x) -> {
232                                 progress.increment();
233                                 try {
234                                     doRename(x);
235                                     return true;
236                                 } catch (IOException e) {
237                                     x.exception = e;
238                                     return false;
239                                 }
240                             }));
241         }
242         renames.clear();
243 
244         List<PendingFileRenamingExec> failures = bySuccess.getOrDefault(
245             Boolean.FALSE, null);
246         if (failures != null && failures.size() > 0) {
247             numFailures = failures.size();
248             double pctFailed = 100.0 * numFailures / numPending;
249             String exmsg = String.format(
250                 "%d failures (%.1f%%) while renaming pending files",
251                 numFailures, pctFailed);
252             throw new IOException(exmsg, failures.get(0).exception);
253         }
254 
255         return numPending - numFailures;
256     }
257 
258     /**
259      * Attempts to delete all the tracked elements, catching any failures, and
260      * throwing an exception if any failed.
261      * @return the number of successful deletions
262      */
completeDeletions()263     private int completeDeletions() throws IOException {
264         int numPending = deletions.size();
265         int numFailures = 0;
266 
267         if (numPending < 1) {
268             return 0;
269         }
270 
271         List<PendingFileDeletionExec> pendingExecs = deletions.
272             parallelStream().map(f ->
273             new PendingFileDeletionExec(f.getAbsolutePath())).collect(
274             Collectors.toList());
275         Map<Boolean, List<PendingFileDeletionExec>> bySuccess;
276 
277         try (Progress progress = new Progress(LOGGER, "pending deletions", numPending)) {
278             bySuccess = pendingExecs.parallelStream().collect(
279                             Collectors.groupingByConcurrent((x) -> {
280                                 progress.increment();
281                                 doDelete(x);
282                                 return true;
283                             }));
284         }
285         deletions.clear();
286 
287         List<PendingFileDeletionExec> successes = bySuccess.getOrDefault(
288             Boolean.TRUE, null);
289         if (successes != null) {
290             tryDeleteParents(successes);
291         }
292 
293         List<PendingFileDeletionExec> failures = bySuccess.getOrDefault(
294             Boolean.FALSE, null);
295         if (failures != null && failures.size() > 0) {
296             numFailures = failures.size();
297             double pctFailed = 100.0 * numFailures / numPending;
298             String exmsg = String.format(
299                 "%d failures (%.1f%%) while deleting pending files",
300                 numFailures, pctFailed);
301             throw new IOException(exmsg, failures.get(0).exception);
302         }
303 
304         return numPending - numFailures;
305     }
306 
307     /**
308      * Attempts to link the tracked elements, catching any failures, and
309      * throwing an exception if any failed.
310      * @return the number of successful linkages
311      */
completeLinkages()312     private int completeLinkages() throws IOException {
313         int numPending = linkages.size();
314         int numFailures = 0;
315 
316         if (numPending < 1) {
317             return 0;
318         }
319 
320         List<PendingSymlinkageExec> pendingExecs =
321             linkages.parallelStream().map(f ->
322                 new PendingSymlinkageExec(f.getSourcePath(),
323                         f.getTargetRelPath())).collect(Collectors.toList());
324 
325         Map<Boolean, List<PendingSymlinkageExec>> bySuccess;
326         try (Progress progress = new Progress(LOGGER, "pending linkages", numPending)) {
327             bySuccess = pendingExecs.parallelStream().collect(
328                             Collectors.groupingByConcurrent((x) -> {
329                                 progress.increment();
330                                 try {
331                                     doLink(x);
332                                     return true;
333                                 } catch (IOException e) {
334                                     x.exception = e;
335                                     return false;
336                                 }
337                             }));
338         }
339         linkages.clear();
340 
341         List<PendingSymlinkageExec> failures = bySuccess.getOrDefault(
342                 Boolean.FALSE, null);
343         if (failures != null && failures.size() > 0) {
344             numFailures = failures.size();
345             double pctFailed = 100.0 * numFailures / numPending;
346             String exmsg = String.format(
347                     "%d failures (%.1f%%) while linking pending paths",
348                     numFailures, pctFailed);
349             throw new IOException(exmsg, failures.get(0).exception);
350         }
351 
352         return numPending - numFailures;
353     }
354 
doDelete(PendingFileDeletionExec del)355     private void doDelete(PendingFileDeletionExec del) {
356         File f = new File(TandemPath.join(del.absolutePath, PENDING_EXTENSION));
357         del.absoluteParent = f.getParentFile();
358 
359         doDelete(f);
360         f = new File(del.absolutePath);
361         doDelete(f);
362     }
363 
doDelete(File f)364     private void doDelete(File f) {
365         if (f.delete()) {
366             LOGGER.log(Level.FINER, "Deleted obsolete file: {0}", f.getPath());
367         } else if (f.exists()) {
368             LOGGER.log(Level.WARNING, "Failed to delete obsolete file: {0}",
369                     f.getPath());
370         }
371     }
372 
doRename(PendingFileRenamingExec ren)373     private void doRename(PendingFileRenamingExec ren) throws IOException {
374         try {
375             Files.move(Paths.get(ren.source), Paths.get(ren.target),
376                 StandardCopyOption.REPLACE_EXISTING);
377         } catch (IOException e) {
378             LOGGER.log(Level.WARNING, "Failed to move file: {0} -> {1}",
379                 new Object[]{ren.source, ren.target});
380             throw e;
381         }
382         if (LOGGER.isLoggable(Level.FINEST)) {
383             LOGGER.log(Level.FINEST, "Moved pending as file: {0}",
384                 ren.target);
385         }
386     }
387 
doLink(PendingSymlinkageExec lnk)388     private void doLink(PendingSymlinkageExec lnk) throws IOException {
389         try {
390             if (!needLink(lnk)) {
391                 return;
392             }
393             Path sourcePath = Paths.get(lnk.source);
394             deleteFileOrDirectory(sourcePath);
395 
396             File sourceParentFile = sourcePath.getParent().toFile();
397             /*
398              * The double check-exists in the following conditional is necessary
399              * because during a race when two threads are simultaneously linking
400              * for a not-yet-existent `sourceParentFile`, the first check-exists
401              * will be false for both threads, but then only one will see true
402              * from mkdirs -- so the other needs a fallback again to
403              * check-exists.
404              */
405             if (sourceParentFile.exists() || sourceParentFile.mkdirs() ||
406                     sourceParentFile.exists()) {
407                 Files.createSymbolicLink(sourcePath, Paths.get(lnk.targetRel));
408             }
409         } catch (FileAlreadyExistsException e) {
410             // Another case of racing threads. Given that each of them works with the same path,
411             // there is no need to worry.
412             return;
413         } catch (IOException e) {
414             LOGGER.log(Level.WARNING, "Failed to link: {0} -> {1}",
415                     new Object[]{lnk.source, lnk.targetRel});
416             throw e;
417         }
418 
419         if (LOGGER.isLoggable(Level.FINEST)) {
420             LOGGER.log(Level.FINEST, "Linked pending: {0} -> {1}",
421                     new Object[]{lnk.source, lnk.targetRel});
422         }
423     }
424 
needLink(PendingSymlinkageExec lnk)425     private boolean needLink(PendingSymlinkageExec lnk) {
426         File src = new File(lnk.source);
427         Path srcpth = src.toPath();
428         // needed if source doesn't exist or isn't a symlink
429         if (!src.exists() || !Files.isSymbolicLink(srcpth)) {
430             return true;
431         }
432 
433         // Re-resolve target.
434         Path tgtpth = srcpth.getParent().resolve(Paths.get(lnk.targetRel));
435 
436         // Re-canonicalize source and target.
437         String srcCanonical;
438         String tgtCanonical;
439         try {
440             srcCanonical = src.getCanonicalPath();
441             tgtCanonical = tgtpth.toFile().getCanonicalPath();
442         } catch (IOException ex) {
443             return true;
444         }
445         // not needed if source's canonical matches re-resolved target canonical
446         return !tgtCanonical.equals(srcCanonical);
447     }
448 
449     /**
450      * Deletes file or directory recursively.
451      * <a href="https://stackoverflow.com/questions/779519/delete-directories-recursively-in-java">
452      * Q: "Delete directories recursively in Java"
453      * </a>,
454      * <a href="https://stackoverflow.com/a/779529/933163">
455      * A: "In Java 7+ you can use {@code Files} class."</a>,
456      * <a href="https://stackoverflow.com/users/1679995/tomasz-dzi%C4%99cielewski">
457      * Tomasz Dzięcielewski
458      * </a>
459      * @param start the starting file
460      */
deleteFileOrDirectory(Path start)461     private void deleteFileOrDirectory(Path start) throws IOException {
462         if (!start.toFile().exists()) {
463             return;
464         }
465         Files.walkFileTree(start, new SimpleFileVisitor<>() {
466             @Override
467             public FileVisitResult visitFile(Path file,
468                     BasicFileAttributes attrs) throws IOException {
469                 Files.delete(file);
470                 return FileVisitResult.CONTINUE;
471             }
472 
473             @Override
474             public FileVisitResult postVisitDirectory(Path dir, IOException exc)
475                     throws IOException {
476                 Files.delete(dir);
477                 return FileVisitResult.CONTINUE;
478             }
479         });
480     }
481 
482     /**
483      * For the unique set of parent directories among
484      * {@link PendingFileDeletionExec#absoluteParent}, traverse in descending
485      * order of path-length, and attempt to clean any empty directories.
486      */
tryDeleteParents(List<PendingFileDeletionExec> dels)487     private void tryDeleteParents(List<PendingFileDeletionExec> dels) {
488         Set<File> parents = new TreeSet<>(DESC_PATHLEN_COMPARATOR);
489         dels.forEach((del) -> parents.add(del.absoluteParent));
490 
491         SkeletonDirs skels = new SkeletonDirs();
492         for (File dir : parents) {
493             skels.reset();
494             findFilelessChildren(skels, dir);
495             skels.childDirs.forEach(this::tryDeleteDirectory);
496             tryDeleteDirectory(dir);
497         }
498     }
499 
tryDeleteDirectory(File dir)500     private void tryDeleteDirectory(File dir) {
501         if (dir.delete()) {
502             LOGGER.log(Level.FINE, "Removed empty parent dir: {0}",
503                 dir.getAbsolutePath());
504         }
505     }
506 
507     /**
508      * Recursively determines eligible, file-less child directories for cleaning
509      * up, and writes them to {@code skels}.
510      */
findFilelessChildren(SkeletonDirs skels, File directory)511     private void findFilelessChildren(SkeletonDirs skels, File directory) {
512         if (!directory.exists()) {
513             return;
514         }
515         String dirPath = directory.getAbsolutePath();
516         boolean topLevelIneligible = false;
517         boolean didLogFileTopLevelIneligible = false;
518 
519         try (DirectoryStream<Path> directoryStream = Files.newDirectoryStream(
520             Paths.get(dirPath))) {
521             for (Path path : directoryStream) {
522                 File f = path.toFile();
523                 if (f.isFile()) {
524                     topLevelIneligible = true;
525                     if (!didLogFileTopLevelIneligible && LOGGER.isLoggable(
526                         Level.FINEST)) {
527                         didLogFileTopLevelIneligible = true; // just once is OK
528                         LOGGER.log(Level.FINEST, "not file-less due to: {0}",
529                             f.getAbsolutePath());
530                     }
531                 } else {
532                     findFilelessChildren(skels, f);
533                     if (!skels.ineligible) {
534                         skels.childDirs.add(f);
535                     } else {
536                         topLevelIneligible = true;
537                         if (LOGGER.isLoggable(Level.FINEST)) {
538                             LOGGER.log(Level.FINEST,
539                                 "its children prevent delete: {0}",
540                                 f.getAbsolutePath());
541                         }
542                     }
543 
544                     // Reset this flag so that other potential, eligible
545                     // children are evaluated.
546                     skels.ineligible = false;
547                 }
548             }
549         } catch (IOException ex) {
550             topLevelIneligible = true;
551             if (LOGGER.isLoggable(Level.FINEST)) {
552                 LOGGER.log(Level.FINEST, "Failed to stream directory:" +
553                     directory, ex);
554             }
555         }
556 
557         skels.ineligible = topLevelIneligible;
558     }
559 
560     /**
561      * Counts segments arising from {@code File.separatorChar} or '\\'.
562      * @param path a defined instance
563      * @return a natural number
564      */
countPathSegments(String path)565     private static int countPathSegments(String path) {
566         int n = 1;
567         for (int i = 0; i < path.length(); ++i) {
568             char c = path.charAt(i);
569             if (c == File.separatorChar || c == '\\') {
570                 ++n;
571             }
572         }
573         return n;
574     }
575 
576     private static class PendingFileDeletionExec {
577         final String absolutePath;
578         File absoluteParent;
579         IOException exception;
PendingFileDeletionExec(String absolutePath)580         PendingFileDeletionExec(String absolutePath) {
581             this.absolutePath = absolutePath;
582         }
583     }
584 
585     private static class PendingFileRenamingExec {
586         final String source;
587         final String target;
588         IOException exception;
PendingFileRenamingExec(String source, String target)589         PendingFileRenamingExec(String source, String target) {
590             this.source = source;
591             this.target = target;
592         }
593     }
594 
595     private static class PendingSymlinkageExec {
596         final String source;
597         final String targetRel;
598         IOException exception;
PendingSymlinkageExec(String source, String relTarget)599         PendingSymlinkageExec(String source, String relTarget) {
600             this.source = source;
601             this.targetRel = relTarget;
602         }
603     }
604 
605     /**
606      * Represents a collection of file-less directories which should also be
607      * deleted for cleanliness.
608      */
609     private static class SkeletonDirs {
610         boolean ineligible; // a flag used during recursion
611         final Set<File> childDirs = new TreeSet<>(DESC_PATHLEN_COMPARATOR);
612 
reset()613         void reset() {
614             ineligible = false;
615             childDirs.clear();
616         }
617     }
618 }
619