1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>. 22 * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. 23 */ 24 package org.opengrok.indexer.index; 25 26 import java.io.File; 27 import java.io.IOException; 28 import java.nio.file.DirectoryStream; 29 import java.nio.file.FileAlreadyExistsException; 30 import java.nio.file.FileVisitResult; 31 import java.nio.file.Files; 32 import java.nio.file.Path; 33 import java.nio.file.Paths; 34 import java.nio.file.SimpleFileVisitor; 35 import java.nio.file.StandardCopyOption; 36 import java.nio.file.attribute.BasicFileAttributes; 37 import java.time.Duration; 38 import java.time.Instant; 39 import java.util.Comparator; 40 import java.util.List; 41 import java.util.Map; 42 import java.util.Set; 43 import java.util.TreeSet; 44 import java.util.concurrent.ConcurrentHashMap; 45 import java.util.logging.Level; 46 import java.util.logging.Logger; 47 import java.util.stream.Collectors; 48 49 import org.opengrok.indexer.logger.LoggerFactory; 50 import org.opengrok.indexer.util.Progress; 51 import org.opengrok.indexer.util.StringUtils; 52 import org.opengrok.indexer.util.TandemPath; 53 54 /** 55 * Represents a tracker of pending file deletions and renamings that can later 56 * be executed. 57 * <p> 58 * {@link PendingFileCompleter} is not generally thread-safe, as only 59 * {@link #add(org.opengrok.indexer.index.PendingFileRenaming)}, 60 * {@link #add(org.opengrok.indexer.index.PendingSymlinkage)} and 61 * {@link #add(org.opengrok.indexer.index.PendingFileDeletion)} are expected 62 * to be run in parallel; these methods are thread-safe w.r.t. underlying data structures. 63 * <p> 64 * No other methods are thread-safe between each other. E.g., 65 * {@link #complete()} should only be called by a single thread after all 66 * additions of {@link PendingSymlinkage}s, {@link PendingFileDeletion}s, and 67 * {@link PendingFileRenaming}s are indicated. 68 */ 69 class PendingFileCompleter { 70 71 /** 72 * An extension that should be used as the suffix of files for 73 * {@link PendingFileRenaming} actions. 74 * <p>Value is {@code ".org_opengrok"}. 75 */ 76 public static final String PENDING_EXTENSION = ".org_opengrok"; 77 78 private static final Logger LOGGER = LoggerFactory.getLogger(PendingFileCompleter.class); 79 80 private volatile boolean completing; 81 82 /** 83 * Descending path segment length comparator. 84 */ 85 private static final Comparator<File> DESC_PATHLEN_COMPARATOR = 86 (File f1, File f2) -> { 87 String s1 = f1.getAbsolutePath(); 88 String s2 = f2.getAbsolutePath(); 89 int n1 = countPathSegments(s1); 90 int n2 = countPathSegments(s2); 91 // DESC: s2 no. of segments <=> s1 no. of segments 92 int cmp = Integer.compare(n2, n1); 93 if (cmp != 0) { 94 return cmp; 95 } 96 97 // the Comparator must also be "consistent with equals", so check 98 // string contents too when (length)cmp == 0. (ASC: s1 <=> s2.) 99 cmp = s1.compareTo(s2); 100 return cmp; 101 }; 102 103 private final Set<PendingFileDeletion> deletions = ConcurrentHashMap.newKeySet(); 104 105 private final Set<PendingFileRenaming> renames = ConcurrentHashMap.newKeySet(); 106 107 private final Set<PendingSymlinkage> linkages = ConcurrentHashMap.newKeySet(); 108 109 /** 110 * Adds the specified element to this instance's set if it is not already 111 * present. 112 * @param e element to be added to this set 113 * @return {@code true} if this instance's set did not already contain the 114 * specified element 115 * @throws IllegalStateException if {@link #complete()} is running 116 */ add(PendingFileDeletion e)117 public boolean add(PendingFileDeletion e) { 118 if (completing) { 119 throw new IllegalStateException("complete() is running"); 120 } 121 return deletions.add(e); 122 } 123 124 /** 125 * Adds the specified element to this instance's set if it is not already 126 * present. 127 * @param e element to be added to this set 128 * @return {@code true} if this instance's set did not already contain the 129 * specified element 130 * @throws IllegalStateException if {@link #complete()} is running 131 */ add(PendingSymlinkage e)132 public boolean add(PendingSymlinkage e) { 133 if (completing) { 134 throw new IllegalStateException("complete() is running"); 135 } 136 return linkages.add(e); 137 } 138 139 /** 140 * Adds the specified element to this instance's set if it is not already 141 * present, and also remove any pending deletion for the same absolute path. 142 * @param e element to be added to this set 143 * @return {@code true} if this instance's set did not already contain the 144 * specified element 145 * @throws IllegalStateException if {@link #complete()} is running 146 */ add(PendingFileRenaming e)147 public boolean add(PendingFileRenaming e) { 148 if (completing) { 149 throw new IllegalStateException("complete() is running"); 150 } 151 boolean rc = renames.add(e); 152 deletions.remove(new PendingFileDeletion(e.getAbsolutePath())); 153 return rc; 154 } 155 156 /** 157 * Complete all the tracked file operations: first in a stage for pending 158 * deletions, next in a stage for pending renamings, and finally in a stage 159 * for pending symbolic linkages. 160 * <p> 161 * All operations in each stage are tried in parallel, and any failure is 162 * caught and raises an exception (after all items in the stage have been 163 * tried). 164 * <p> 165 * Deletions are tried for each 166 * {@link PendingFileDeletion#getAbsolutePath()}; for a version of the path 167 * with {@link #PENDING_EXTENSION} appended; and also for the path's parent 168 * directory, which does nothing if the directory is not empty. 169 * @return the number of successful operations 170 * @throws java.io.IOException if an I/O error occurs 171 */ complete()172 public int complete() throws IOException { 173 completing = true; 174 try { 175 return completeInner(); 176 } finally { 177 completing = false; 178 } 179 } 180 completeInner()181 private int completeInner() throws IOException { 182 Instant start = Instant.now(); 183 int numDeletions = completeDeletions(); 184 if (LOGGER.isLoggable(Level.FINE)) { 185 LOGGER.log(Level.FINE, "deleted {0} file(s) (took {1})", 186 new Object[] {numDeletions, StringUtils.getReadableTime( 187 Duration.between(start, Instant.now()).toMillis())}); 188 } 189 190 start = Instant.now(); 191 int numRenamings = completeRenamings(); 192 if (LOGGER.isLoggable(Level.FINE)) { 193 LOGGER.log(Level.FINE, "renamed {0} file(s) (took {1})", 194 new Object[] {numRenamings, StringUtils.getReadableTime( 195 Duration.between(start, Instant.now()).toMillis())}); 196 } 197 198 start = Instant.now(); 199 int numLinkages = completeLinkages(); 200 if (LOGGER.isLoggable(Level.FINE)) { 201 LOGGER.log(Level.FINE, "affirmed links for {0} path(s) (took {1})", 202 new Object[] {numLinkages, StringUtils.getReadableTime( 203 Duration.between(start, Instant.now()).toMillis())}); 204 } 205 206 return numDeletions + numRenamings + numLinkages; 207 } 208 209 /** 210 * Attempts to rename all the tracked elements, catching any failures, and 211 * throwing an exception if any failed. 212 * @return the number of successful renamings 213 */ completeRenamings()214 private int completeRenamings() throws IOException { 215 int numPending = renames.size(); 216 int numFailures = 0; 217 218 if (numPending < 1) { 219 return 0; 220 } 221 222 List<PendingFileRenamingExec> pendingExecs = renames. 223 parallelStream().map(f -> 224 new PendingFileRenamingExec(f.getTransientPath(), 225 f.getAbsolutePath())).collect( 226 Collectors.toList()); 227 Map<Boolean, List<PendingFileRenamingExec>> bySuccess; 228 229 try (Progress progress = new Progress(LOGGER, "pending renames", numPending)) { 230 bySuccess = pendingExecs.parallelStream().collect( 231 Collectors.groupingByConcurrent((x) -> { 232 progress.increment(); 233 try { 234 doRename(x); 235 return true; 236 } catch (IOException e) { 237 x.exception = e; 238 return false; 239 } 240 })); 241 } 242 renames.clear(); 243 244 List<PendingFileRenamingExec> failures = bySuccess.getOrDefault( 245 Boolean.FALSE, null); 246 if (failures != null && failures.size() > 0) { 247 numFailures = failures.size(); 248 double pctFailed = 100.0 * numFailures / numPending; 249 String exmsg = String.format( 250 "%d failures (%.1f%%) while renaming pending files", 251 numFailures, pctFailed); 252 throw new IOException(exmsg, failures.get(0).exception); 253 } 254 255 return numPending - numFailures; 256 } 257 258 /** 259 * Attempts to delete all the tracked elements, catching any failures, and 260 * throwing an exception if any failed. 261 * @return the number of successful deletions 262 */ completeDeletions()263 private int completeDeletions() throws IOException { 264 int numPending = deletions.size(); 265 int numFailures = 0; 266 267 if (numPending < 1) { 268 return 0; 269 } 270 271 List<PendingFileDeletionExec> pendingExecs = deletions. 272 parallelStream().map(f -> 273 new PendingFileDeletionExec(f.getAbsolutePath())).collect( 274 Collectors.toList()); 275 Map<Boolean, List<PendingFileDeletionExec>> bySuccess; 276 277 try (Progress progress = new Progress(LOGGER, "pending deletions", numPending)) { 278 bySuccess = pendingExecs.parallelStream().collect( 279 Collectors.groupingByConcurrent((x) -> { 280 progress.increment(); 281 doDelete(x); 282 return true; 283 })); 284 } 285 deletions.clear(); 286 287 List<PendingFileDeletionExec> successes = bySuccess.getOrDefault( 288 Boolean.TRUE, null); 289 if (successes != null) { 290 tryDeleteParents(successes); 291 } 292 293 List<PendingFileDeletionExec> failures = bySuccess.getOrDefault( 294 Boolean.FALSE, null); 295 if (failures != null && failures.size() > 0) { 296 numFailures = failures.size(); 297 double pctFailed = 100.0 * numFailures / numPending; 298 String exmsg = String.format( 299 "%d failures (%.1f%%) while deleting pending files", 300 numFailures, pctFailed); 301 throw new IOException(exmsg, failures.get(0).exception); 302 } 303 304 return numPending - numFailures; 305 } 306 307 /** 308 * Attempts to link the tracked elements, catching any failures, and 309 * throwing an exception if any failed. 310 * @return the number of successful linkages 311 */ completeLinkages()312 private int completeLinkages() throws IOException { 313 int numPending = linkages.size(); 314 int numFailures = 0; 315 316 if (numPending < 1) { 317 return 0; 318 } 319 320 List<PendingSymlinkageExec> pendingExecs = 321 linkages.parallelStream().map(f -> 322 new PendingSymlinkageExec(f.getSourcePath(), 323 f.getTargetRelPath())).collect(Collectors.toList()); 324 325 Map<Boolean, List<PendingSymlinkageExec>> bySuccess; 326 try (Progress progress = new Progress(LOGGER, "pending linkages", numPending)) { 327 bySuccess = pendingExecs.parallelStream().collect( 328 Collectors.groupingByConcurrent((x) -> { 329 progress.increment(); 330 try { 331 doLink(x); 332 return true; 333 } catch (IOException e) { 334 x.exception = e; 335 return false; 336 } 337 })); 338 } 339 linkages.clear(); 340 341 List<PendingSymlinkageExec> failures = bySuccess.getOrDefault( 342 Boolean.FALSE, null); 343 if (failures != null && failures.size() > 0) { 344 numFailures = failures.size(); 345 double pctFailed = 100.0 * numFailures / numPending; 346 String exmsg = String.format( 347 "%d failures (%.1f%%) while linking pending paths", 348 numFailures, pctFailed); 349 throw new IOException(exmsg, failures.get(0).exception); 350 } 351 352 return numPending - numFailures; 353 } 354 doDelete(PendingFileDeletionExec del)355 private void doDelete(PendingFileDeletionExec del) { 356 File f = new File(TandemPath.join(del.absolutePath, PENDING_EXTENSION)); 357 del.absoluteParent = f.getParentFile(); 358 359 doDelete(f); 360 f = new File(del.absolutePath); 361 doDelete(f); 362 } 363 doDelete(File f)364 private void doDelete(File f) { 365 if (f.delete()) { 366 LOGGER.log(Level.FINER, "Deleted obsolete file: {0}", f.getPath()); 367 } else if (f.exists()) { 368 LOGGER.log(Level.WARNING, "Failed to delete obsolete file: {0}", 369 f.getPath()); 370 } 371 } 372 doRename(PendingFileRenamingExec ren)373 private void doRename(PendingFileRenamingExec ren) throws IOException { 374 try { 375 Files.move(Paths.get(ren.source), Paths.get(ren.target), 376 StandardCopyOption.REPLACE_EXISTING); 377 } catch (IOException e) { 378 LOGGER.log(Level.WARNING, "Failed to move file: {0} -> {1}", 379 new Object[]{ren.source, ren.target}); 380 throw e; 381 } 382 if (LOGGER.isLoggable(Level.FINEST)) { 383 LOGGER.log(Level.FINEST, "Moved pending as file: {0}", 384 ren.target); 385 } 386 } 387 doLink(PendingSymlinkageExec lnk)388 private void doLink(PendingSymlinkageExec lnk) throws IOException { 389 try { 390 if (!needLink(lnk)) { 391 return; 392 } 393 Path sourcePath = Paths.get(lnk.source); 394 deleteFileOrDirectory(sourcePath); 395 396 File sourceParentFile = sourcePath.getParent().toFile(); 397 /* 398 * The double check-exists in the following conditional is necessary 399 * because during a race when two threads are simultaneously linking 400 * for a not-yet-existent `sourceParentFile`, the first check-exists 401 * will be false for both threads, but then only one will see true 402 * from mkdirs -- so the other needs a fallback again to 403 * check-exists. 404 */ 405 if (sourceParentFile.exists() || sourceParentFile.mkdirs() || 406 sourceParentFile.exists()) { 407 Files.createSymbolicLink(sourcePath, Paths.get(lnk.targetRel)); 408 } 409 } catch (FileAlreadyExistsException e) { 410 // Another case of racing threads. Given that each of them works with the same path, 411 // there is no need to worry. 412 return; 413 } catch (IOException e) { 414 LOGGER.log(Level.WARNING, "Failed to link: {0} -> {1}", 415 new Object[]{lnk.source, lnk.targetRel}); 416 throw e; 417 } 418 419 if (LOGGER.isLoggable(Level.FINEST)) { 420 LOGGER.log(Level.FINEST, "Linked pending: {0} -> {1}", 421 new Object[]{lnk.source, lnk.targetRel}); 422 } 423 } 424 needLink(PendingSymlinkageExec lnk)425 private boolean needLink(PendingSymlinkageExec lnk) { 426 File src = new File(lnk.source); 427 Path srcpth = src.toPath(); 428 // needed if source doesn't exist or isn't a symlink 429 if (!src.exists() || !Files.isSymbolicLink(srcpth)) { 430 return true; 431 } 432 433 // Re-resolve target. 434 Path tgtpth = srcpth.getParent().resolve(Paths.get(lnk.targetRel)); 435 436 // Re-canonicalize source and target. 437 String srcCanonical; 438 String tgtCanonical; 439 try { 440 srcCanonical = src.getCanonicalPath(); 441 tgtCanonical = tgtpth.toFile().getCanonicalPath(); 442 } catch (IOException ex) { 443 return true; 444 } 445 // not needed if source's canonical matches re-resolved target canonical 446 return !tgtCanonical.equals(srcCanonical); 447 } 448 449 /** 450 * Deletes file or directory recursively. 451 * <a href="https://stackoverflow.com/questions/779519/delete-directories-recursively-in-java"> 452 * Q: "Delete directories recursively in Java" 453 * </a>, 454 * <a href="https://stackoverflow.com/a/779529/933163"> 455 * A: "In Java 7+ you can use {@code Files} class."</a>, 456 * <a href="https://stackoverflow.com/users/1679995/tomasz-dzi%C4%99cielewski"> 457 * Tomasz Dzięcielewski 458 * </a> 459 * @param start the starting file 460 */ deleteFileOrDirectory(Path start)461 private void deleteFileOrDirectory(Path start) throws IOException { 462 if (!start.toFile().exists()) { 463 return; 464 } 465 Files.walkFileTree(start, new SimpleFileVisitor<>() { 466 @Override 467 public FileVisitResult visitFile(Path file, 468 BasicFileAttributes attrs) throws IOException { 469 Files.delete(file); 470 return FileVisitResult.CONTINUE; 471 } 472 473 @Override 474 public FileVisitResult postVisitDirectory(Path dir, IOException exc) 475 throws IOException { 476 Files.delete(dir); 477 return FileVisitResult.CONTINUE; 478 } 479 }); 480 } 481 482 /** 483 * For the unique set of parent directories among 484 * {@link PendingFileDeletionExec#absoluteParent}, traverse in descending 485 * order of path-length, and attempt to clean any empty directories. 486 */ tryDeleteParents(List<PendingFileDeletionExec> dels)487 private void tryDeleteParents(List<PendingFileDeletionExec> dels) { 488 Set<File> parents = new TreeSet<>(DESC_PATHLEN_COMPARATOR); 489 dels.forEach((del) -> parents.add(del.absoluteParent)); 490 491 SkeletonDirs skels = new SkeletonDirs(); 492 for (File dir : parents) { 493 skels.reset(); 494 findFilelessChildren(skels, dir); 495 skels.childDirs.forEach(this::tryDeleteDirectory); 496 tryDeleteDirectory(dir); 497 } 498 } 499 tryDeleteDirectory(File dir)500 private void tryDeleteDirectory(File dir) { 501 if (dir.delete()) { 502 LOGGER.log(Level.FINE, "Removed empty parent dir: {0}", 503 dir.getAbsolutePath()); 504 } 505 } 506 507 /** 508 * Recursively determines eligible, file-less child directories for cleaning 509 * up, and writes them to {@code skels}. 510 */ findFilelessChildren(SkeletonDirs skels, File directory)511 private void findFilelessChildren(SkeletonDirs skels, File directory) { 512 if (!directory.exists()) { 513 return; 514 } 515 String dirPath = directory.getAbsolutePath(); 516 boolean topLevelIneligible = false; 517 boolean didLogFileTopLevelIneligible = false; 518 519 try (DirectoryStream<Path> directoryStream = Files.newDirectoryStream( 520 Paths.get(dirPath))) { 521 for (Path path : directoryStream) { 522 File f = path.toFile(); 523 if (f.isFile()) { 524 topLevelIneligible = true; 525 if (!didLogFileTopLevelIneligible && LOGGER.isLoggable( 526 Level.FINEST)) { 527 didLogFileTopLevelIneligible = true; // just once is OK 528 LOGGER.log(Level.FINEST, "not file-less due to: {0}", 529 f.getAbsolutePath()); 530 } 531 } else { 532 findFilelessChildren(skels, f); 533 if (!skels.ineligible) { 534 skels.childDirs.add(f); 535 } else { 536 topLevelIneligible = true; 537 if (LOGGER.isLoggable(Level.FINEST)) { 538 LOGGER.log(Level.FINEST, 539 "its children prevent delete: {0}", 540 f.getAbsolutePath()); 541 } 542 } 543 544 // Reset this flag so that other potential, eligible 545 // children are evaluated. 546 skels.ineligible = false; 547 } 548 } 549 } catch (IOException ex) { 550 topLevelIneligible = true; 551 if (LOGGER.isLoggable(Level.FINEST)) { 552 LOGGER.log(Level.FINEST, "Failed to stream directory:" + 553 directory, ex); 554 } 555 } 556 557 skels.ineligible = topLevelIneligible; 558 } 559 560 /** 561 * Counts segments arising from {@code File.separatorChar} or '\\'. 562 * @param path a defined instance 563 * @return a natural number 564 */ countPathSegments(String path)565 private static int countPathSegments(String path) { 566 int n = 1; 567 for (int i = 0; i < path.length(); ++i) { 568 char c = path.charAt(i); 569 if (c == File.separatorChar || c == '\\') { 570 ++n; 571 } 572 } 573 return n; 574 } 575 576 private static class PendingFileDeletionExec { 577 final String absolutePath; 578 File absoluteParent; 579 IOException exception; PendingFileDeletionExec(String absolutePath)580 PendingFileDeletionExec(String absolutePath) { 581 this.absolutePath = absolutePath; 582 } 583 } 584 585 private static class PendingFileRenamingExec { 586 final String source; 587 final String target; 588 IOException exception; PendingFileRenamingExec(String source, String target)589 PendingFileRenamingExec(String source, String target) { 590 this.source = source; 591 this.target = target; 592 } 593 } 594 595 private static class PendingSymlinkageExec { 596 final String source; 597 final String targetRel; 598 IOException exception; PendingSymlinkageExec(String source, String relTarget)599 PendingSymlinkageExec(String source, String relTarget) { 600 this.source = source; 601 this.targetRel = relTarget; 602 } 603 } 604 605 /** 606 * Represents a collection of file-less directories which should also be 607 * deleted for cleanliness. 608 */ 609 private static class SkeletonDirs { 610 boolean ineligible; // a flag used during recursion 611 final Set<File> childDirs = new TreeSet<>(DESC_PATHLEN_COMPARATOR); 612 reset()613 void reset() { 614 ineligible = false; 615 childDirs.clear(); 616 } 617 } 618 } 619