1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved. 22 * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>. 23 */ 24 package org.opengrok.indexer.history; 25 26 import java.io.File; 27 import java.io.IOException; 28 import java.io.InputStream; 29 import java.lang.reflect.InvocationTargetException; 30 import java.nio.file.Path; 31 import java.util.ArrayList; 32 import java.util.Collection; 33 import java.util.Collections; 34 import java.util.Date; 35 import java.util.HashMap; 36 import java.util.List; 37 import java.util.Map; 38 import java.util.Objects; 39 import java.util.Set; 40 import java.util.concurrent.ConcurrentHashMap; 41 import java.util.concurrent.CountDownLatch; 42 import java.util.concurrent.ExecutorService; 43 import java.util.concurrent.Executors; 44 import java.util.concurrent.Future; 45 import java.util.logging.Level; 46 import java.util.logging.Logger; 47 import java.util.stream.Collectors; 48 49 import org.jetbrains.annotations.Nullable; 50 import org.jetbrains.annotations.VisibleForTesting; 51 import org.opengrok.indexer.configuration.CommandTimeoutType; 52 import org.opengrok.indexer.configuration.Configuration.RemoteSCM; 53 import org.opengrok.indexer.configuration.PathAccepter; 54 import org.opengrok.indexer.configuration.RuntimeEnvironment; 55 import org.opengrok.indexer.logger.LoggerFactory; 56 import org.opengrok.indexer.util.ForbiddenSymlinkException; 57 import org.opengrok.indexer.util.PathUtils; 58 import org.opengrok.indexer.util.Statistics; 59 60 /** 61 * The HistoryGuru is used to implement an transparent layer to the various 62 * source control systems. 63 * 64 * @author Chandan 65 */ 66 public final class HistoryGuru { 67 68 private static final Logger LOGGER = LoggerFactory.getLogger(HistoryGuru.class); 69 70 /** 71 * The one and only instance of the HistoryGuru. 72 */ 73 private static final HistoryGuru INSTANCE = new HistoryGuru(); 74 75 private final RuntimeEnvironment env; 76 77 /** 78 * The history cache to use. 79 */ 80 private final HistoryCache historyCache; 81 82 /** 83 * Map of repositories, with {@code DirectoryName} as key. 84 */ 85 private final Map<String, Repository> repositories = new ConcurrentHashMap<>(); 86 87 /** 88 * Set of repository roots (using ConcurrentHashMap but a throwaway value) 89 * with parent of {@code DirectoryName} as key. 90 */ 91 private final Map<String, String> repositoryRoots = new ConcurrentHashMap<>(); 92 93 /** 94 * Interface to perform repository lookup for a given file path and HistoryGuru state. 95 */ 96 private final RepositoryLookup repositoryLookup; 97 98 private boolean historyIndexDone = false; 99 setHistoryIndexDone()100 public void setHistoryIndexDone() { 101 historyIndexDone = true; 102 } 103 isHistoryIndexDone()104 public boolean isHistoryIndexDone() { 105 return historyIndexDone; 106 } 107 108 /** 109 * Creates a new instance of HistoryGuru, and try to set the default source 110 * control system. 111 */ HistoryGuru()112 private HistoryGuru() { 113 env = RuntimeEnvironment.getInstance(); 114 115 HistoryCache cache = null; 116 if (env.useHistoryCache()) { 117 cache = new FileHistoryCache(); 118 119 try { 120 cache.initialize(); 121 } catch (HistoryException he) { 122 LOGGER.log(Level.WARNING, 123 "Failed to initialize the history cache", he); 124 // Failed to initialize, run without a history cache 125 cache = null; 126 } 127 } 128 historyCache = cache; 129 repositoryLookup = RepositoryLookup.cached(); 130 } 131 132 /** 133 * Get the one and only instance of the HistoryGuru. 134 * 135 * @return the one and only HistoryGuru instance 136 */ getInstance()137 public static HistoryGuru getInstance() { 138 return INSTANCE; 139 } 140 141 /** 142 * Return whether cache should be used for the history log. 143 * 144 * @return {@code true} if the history cache has been enabled and initialized, {@code false} otherwise 145 */ useCache()146 private boolean useCache() { 147 return historyCache != null; 148 } 149 150 /** 151 * Get a string with information about the history cache. 152 * 153 * @return a free form text string describing the history cache instance 154 * @throws HistoryException if an error occurred while getting the info 155 */ getCacheInfo()156 public String getCacheInfo() throws HistoryException { 157 return historyCache == null ? "No cache" : historyCache.getInfo(); 158 } 159 160 /** 161 * Annotate the specified revision of a file. 162 * 163 * @param file the file to annotate 164 * @param rev the revision to annotate (<code>null</code> means BASE) 165 * @return file annotation, or <code>null</code> if the 166 * <code>HistoryParser</code> does not support annotation 167 * @throws IOException if I/O exception occurs 168 */ 169 @Nullable annotate(File file, String rev)170 public Annotation annotate(File file, String rev) throws IOException { 171 Annotation annotation = null; 172 173 Repository repo = getRepository(file); 174 if (repo != null) { 175 annotation = repo.annotate(file, rev); 176 History hist = null; 177 try { 178 hist = getHistory(file); 179 } catch (HistoryException ex) { 180 LOGGER.log(Level.FINEST, "Cannot get messages for tooltip: ", ex); 181 } 182 if (hist != null && annotation != null) { 183 Set<String> revs = annotation.getRevisions(); 184 int revsMatched = 0; 185 for (HistoryEntry he : hist.getHistoryEntries()) { 186 String histRev = he.getRevision(); 187 String shortRev = repo.getRevisionForAnnotate(histRev); 188 if (revs.contains(shortRev)) { 189 annotation.addDesc(shortRev, "changeset: " + he.getRevision() 190 + "\nsummary: " + he.getMessage() + "\nuser: " 191 + he.getAuthor() + "\ndate: " + he.getDate()); 192 // History entries are coming from recent to older, 193 // file version should be from oldest to newer. 194 annotation.addFileVersion(shortRev, revs.size() - revsMatched); 195 revsMatched++; 196 } 197 } 198 } 199 } 200 201 return annotation; 202 } 203 204 /** 205 * Get the appropriate history reader for given file. 206 * 207 * @param file The file to get the history reader for 208 * @throws HistoryException If an error occurs while getting the history 209 * @return A HistorReader that may be used to read out history data for a 210 * named file 211 */ getHistoryReader(File file)212 public HistoryReader getHistoryReader(File file) throws HistoryException { 213 History history = getHistory(file, false); 214 return history == null ? null : new HistoryReader(history); 215 } 216 217 /** 218 * Get the history for the specified file. 219 * 220 * @param file the file to get the history for 221 * @return history for the file 222 * @throws HistoryException on error when accessing the history 223 */ getHistory(File file)224 public History getHistory(File file) throws HistoryException { 225 return getHistory(file, true, false); 226 } 227 getHistory(File file, boolean withFiles)228 public History getHistory(File file, boolean withFiles) throws HistoryException { 229 return getHistory(file, withFiles, false); 230 } 231 232 /** 233 * Get history for the specified file (called from the web app). 234 * 235 * @param file the file to get the history for 236 * @return history for the file 237 * @throws HistoryException on error when accessing the history 238 */ getHistoryUI(File file)239 public History getHistoryUI(File file) throws HistoryException { 240 return getHistory(file, true, true); 241 } 242 243 /** 244 * The idea is that some repositories require reaching out to remote server whenever 245 * a history operation is done. Sometimes this is unwanted and this method decides that. 246 * This should be consulted before the actual repository operation, i.e. not when fetching 247 * history from a cache since that is inherently local operation. 248 * @param repo repository 249 * @param file file to decide the operation for 250 * @param ui whether coming from UI 251 * @return whether to perform the history operation 252 */ isRepoHistoryEligible(Repository repo, File file, boolean ui)253 boolean isRepoHistoryEligible(Repository repo, File file, boolean ui) { 254 RemoteSCM rscm = env.getRemoteScmSupported(); 255 boolean doRemote = (ui && (rscm == RemoteSCM.UIONLY)) 256 || (rscm == RemoteSCM.ON) 257 || (ui || ((rscm == RemoteSCM.DIRBASED) && (repo != null) && repo.hasHistoryForDirectories())); 258 259 return (repo != null && repo.isHistoryEnabled() && repo.isWorking() && repo.fileHasHistory(file) 260 && (!repo.isRemote() || doRemote)); 261 } 262 263 @Nullable getHistoryFromCache(File file, Repository repository, boolean withFiles)264 private History getHistoryFromCache(File file, Repository repository, boolean withFiles) 265 throws HistoryException, ForbiddenSymlinkException { 266 267 if (useCache() && historyCache.supportsRepository(repository)) { 268 return historyCache.get(file, repository, withFiles); 269 } 270 271 return null; 272 } 273 274 /** 275 * Get last {@link HistoryEntry} for a file. First, try to retrieve it from the cache. 276 * If that fails, fallback to the repository method. 277 * @param file file to get the history entry for 278 * @param ui is the request coming from the UI 279 * @return last (newest) history entry for given file or {@code null} 280 * @throws HistoryException if history retrieval failed 281 */ 282 @Nullable getLastHistoryEntry(File file, boolean ui)283 public HistoryEntry getLastHistoryEntry(File file, boolean ui) throws HistoryException { 284 Statistics statistics = new Statistics(); 285 LOGGER.log(Level.FINEST, "started retrieval of last history entry for ''{0}''", file); 286 final File dir = file.isDirectory() ? file : file.getParentFile(); 287 final Repository repository = getRepository(dir); 288 289 History history; 290 try { 291 history = getHistoryFromCache(file, repository, false); 292 if (history != null) { 293 HistoryEntry lastHistoryEntry = history.getLastHistoryEntry(); 294 if (lastHistoryEntry != null) { 295 LOGGER.log(Level.FINEST, "got latest history entry {0} for ''{1}'' from history cache", 296 new Object[]{lastHistoryEntry, file}); 297 return lastHistoryEntry; 298 } 299 } 300 } catch (ForbiddenSymlinkException e) { 301 LOGGER.log(Level.FINER, e.getMessage()); 302 return null; 303 } 304 305 if (!isRepoHistoryEligible(repository, file, ui)) { 306 LOGGER.log(Level.FINER, "cannot retrieve the last history entry for ''{0}'' in {1} because of settings", 307 new Object[]{file, repository}); 308 return null; 309 } 310 311 // Fallback to the repository method. 312 HistoryEntry lastHistoryEntry = repository.getLastHistoryEntry(file, ui); 313 if (lastHistoryEntry != null) { 314 LOGGER.log(Level.FINEST, "got latest history entry {0} for ''{1}'' using repository {2}", 315 new Object[]{lastHistoryEntry, file, repository}); 316 } 317 statistics.report(LOGGER, Level.FINEST, 318 String.format("finished retrieval of last history entry for '%s' (%s)", 319 file, lastHistoryEntry != null ? "success" : "fail"), "history.entry.latest"); 320 return lastHistoryEntry; 321 } 322 getHistory(File file, boolean withFiles, boolean ui)323 public History getHistory(File file, boolean withFiles, boolean ui) throws HistoryException { 324 return getHistory(file, withFiles, ui, true); 325 } 326 327 /** 328 * Get the history for the specified file. The history cache is tried first, then the repository. 329 * 330 * @param file the file to get the history for 331 * @param withFiles whether the returned history should contain a 332 * list of files touched by each changeset (the file list may be skipped if false, but it doesn't have to) 333 * @param ui called from the webapp 334 * @param fallback fall back to fetching the history from the repository 335 * if it cannot be retrieved from history cache 336 * @return history for the file 337 * @throws HistoryException on error when accessing the history 338 */ getHistory(File file, boolean withFiles, boolean ui, boolean fallback)339 public History getHistory(File file, boolean withFiles, boolean ui, boolean fallback) throws HistoryException { 340 341 final File dir = file.isDirectory() ? file : file.getParentFile(); 342 final Repository repository = getRepository(dir); 343 344 History history; 345 try { 346 history = getHistoryFromCache(file, repository, withFiles); 347 if (history != null) { 348 return history; 349 } 350 351 return getHistoryFromRepository(file, repository, ui); 352 } catch (ForbiddenSymlinkException e) { 353 LOGGER.log(Level.FINER, e.getMessage()); 354 return null; 355 } 356 } 357 358 @Nullable getHistoryFromRepository(File file, Repository repository, boolean ui)359 private History getHistoryFromRepository(File file, Repository repository, boolean ui) throws HistoryException { 360 History history; 361 362 if (!isRepoHistoryEligible(repository, file, ui)) { 363 return null; 364 } 365 366 /* 367 * Some mirrors of repositories which are capable of fetching history 368 * for directories may contain lots of files untracked by given SCM. 369 * For these it would be waste of time to get their history 370 * since the history of all files in this repository should have been 371 * fetched in the first phase of indexing. 372 */ 373 if (env.isIndexer() && isHistoryIndexDone() && 374 repository.isHistoryEnabled() && repository.hasHistoryForDirectories()) { 375 LOGGER.log(Level.FINE, "not getting the history for ''{0}'' in repository {1} as the it supports " 376 + "history for directories", 377 new Object[]{file, repository}); 378 return null; 379 } 380 381 if (!env.getPathAccepter().accept(file)) { 382 return null; 383 } 384 385 try { 386 history = repository.getHistory(file); 387 } catch (UnsupportedOperationException e) { 388 // In this case, we've found a file for which the SCM has no history 389 // An example is a non-SCCS file somewhere in an SCCS-controlled workspace. 390 return null; 391 } 392 393 return history; 394 } 395 396 /** 397 * Gets a named revision of the specified file into the specified target file. 398 * 399 * @param target a require target file 400 * @param parent The directory containing the file 401 * @param basename The name of the file 402 * @param rev The revision to get 403 * @return {@code true} if content was found 404 * @throws java.io.IOException if an I/O error occurs 405 */ getRevision(File target, String parent, String basename, String rev)406 public boolean getRevision(File target, String parent, String basename, String rev) throws IOException { 407 Repository repo = getRepository(new File(parent)); 408 return repo != null && repo.getHistoryGet(target, parent, basename, rev); 409 } 410 411 /** 412 * Get a named revision of the specified file. 413 * 414 * @param parent The directory containing the file 415 * @param basename The name of the file 416 * @param rev The revision to get 417 * @return An InputStream containing the named revision of the file. 418 */ getRevision(String parent, String basename, String rev)419 public InputStream getRevision(String parent, String basename, String rev) { 420 InputStream ret = null; 421 422 Repository repo = getRepository(new File(parent)); 423 if (repo != null) { 424 ret = repo.getHistoryGet(parent, basename, rev); 425 } 426 return ret; 427 } 428 429 /** 430 * Does this directory contain files with source control information? 431 * 432 * @param file The name of the directory 433 * @return true if the files in this directory have associated revision 434 * history 435 */ hasHistory(File file)436 public boolean hasHistory(File file) { 437 Repository repo = getRepository(file); 438 439 if (repo == null) { 440 return false; 441 } 442 443 // This should return true for Annotate view. 444 return repo.isWorking() && repo.fileHasHistory(file) 445 && ((env.getRemoteScmSupported() == RemoteSCM.ON) 446 || (env.getRemoteScmSupported() == RemoteSCM.UIONLY) 447 || (env.getRemoteScmSupported() == RemoteSCM.DIRBASED) 448 || !repo.isRemote()); 449 } 450 451 /** 452 * Does the history cache contain entry for this directory ? 453 * @param file file object 454 * @return true if there is cache, false otherwise 455 */ hasCacheForFile(File file)456 public boolean hasCacheForFile(File file) { 457 if (!useCache()) { 458 return false; 459 } 460 461 try { 462 return historyCache.hasCacheForFile(file); 463 } catch (HistoryException ex) { 464 return false; 465 } 466 } 467 468 /** 469 * Check if we can annotate the specified file. 470 * 471 * @param file the file to check 472 * @return <code>true</code> if the file is under version control and the 473 * version control system supports annotation 474 */ hasAnnotation(File file)475 public boolean hasAnnotation(File file) { 476 if (!file.isDirectory()) { 477 Repository repo = getRepository(file); 478 if (repo != null && repo.isWorking()) { 479 return repo.fileHasAnnotation(file); 480 } 481 } 482 483 return false; 484 } 485 486 /** 487 * Get the last modified times for all files and subdirectories in the 488 * specified directory. 489 * 490 * @param directory the directory whose files to check 491 * @return a map from file names to modification times for the files that 492 * the history cache has information about 493 * @throws org.opengrok.indexer.history.HistoryException if history cannot be retrieved 494 */ getLastModifiedTimes(File directory)495 public Map<String, Date> getLastModifiedTimes(File directory) 496 throws HistoryException { 497 498 Repository repository = getRepository(directory); 499 500 if (repository != null && useCache()) { 501 return historyCache.getLastModifiedTimes(directory, repository); 502 } 503 504 return Collections.emptyMap(); 505 } 506 507 /** 508 * recursively search for repositories with a depth limit, add those found 509 * to the internally used map. 510 * 511 * @param files list of files to check if they contain a repository 512 * @param allowedNesting number of levels of nested repos to allow 513 * @param depth current depth - using global scanningDepth - one can limit 514 * this to improve scanning performance 515 * @param isNested a value indicating if a parent {@link Repository} was 516 * already found above the {@code files} 517 * @return collection of added repositories 518 */ addRepositories(File[] files, int allowedNesting, int depth, boolean isNested)519 private Collection<RepositoryInfo> addRepositories(File[] files, 520 int allowedNesting, int depth, boolean isNested) { 521 522 List<RepositoryInfo> repoList = new ArrayList<>(); 523 PathAccepter pathAccepter = env.getPathAccepter(); 524 525 for (File file : files) { 526 if (!file.isDirectory()) { 527 continue; 528 } 529 530 String path; 531 try { 532 path = file.getCanonicalPath(); 533 534 Repository repository = null; 535 try { 536 repository = RepositoryFactory.getRepository(file, CommandTimeoutType.INDEXER, isNested); 537 } catch (InstantiationException | NoSuchMethodException | InvocationTargetException e) { 538 LOGGER.log(Level.WARNING, "Could not create repository for '" 539 + file + "', could not instantiate the repository.", e); 540 } catch (IllegalAccessException iae) { 541 LOGGER.log(Level.WARNING, "Could not create repository for '" 542 + file + "', missing access rights.", iae); 543 continue; 544 } catch (ForbiddenSymlinkException e) { 545 LOGGER.log(Level.WARNING, "Could not create repository for ''{0}'': {1}", 546 new Object[] {file, e.getMessage()}); 547 continue; 548 } 549 if (repository == null) { 550 if (depth > env.getScanningDepth()) { 551 // we reached our search max depth, skip looking through the children 552 continue; 553 } 554 // Not a repository, search its sub-dirs. 555 if (pathAccepter.accept(file)) { 556 File[] subFiles = file.listFiles(); 557 if (subFiles == null) { 558 LOGGER.log(Level.WARNING, 559 "Failed to get sub directories for ''{0}'', " + 560 "check access permissions.", 561 file.getAbsolutePath()); 562 } else { 563 // Recursive call to scan next depth 564 repoList.addAll(addRepositories(subFiles, 565 allowedNesting, depth + 1, isNested)); 566 } 567 } 568 } else { 569 LOGGER.log(Level.CONFIG, "Adding <{0}> repository: <{1}>", 570 new Object[]{repository.getClass().getName(), path}); 571 572 repoList.add(new RepositoryInfo(repository)); 573 putRepository(repository); 574 575 if (allowedNesting > 0 && repository.supportsSubRepositories()) { 576 File[] subFiles = file.listFiles(); 577 if (subFiles == null) { 578 LOGGER.log(Level.WARNING, 579 "Failed to get sub directories for ''{0}'', check access permissions.", 580 file.getAbsolutePath()); 581 } else if (depth <= env.getScanningDepth()) { 582 // Search down to a limit -- if not: too much 583 // stat'ing for huge Mercurial repositories 584 repoList.addAll(addRepositories(subFiles, 585 allowedNesting - 1, depth + 1, true)); 586 } 587 } 588 } 589 } catch (IOException exp) { 590 LOGGER.log(Level.WARNING, 591 "Failed to get canonical path for {0}: {1}", 592 new Object[]{file.getAbsolutePath(), exp.getMessage()}); 593 LOGGER.log(Level.WARNING, "Repository will be ignored...", exp); 594 } 595 } 596 597 return repoList; 598 } 599 600 /** 601 * Recursively search for repositories in given directories, add those found 602 * to the internally used repository map. 603 * 604 * @param files list of directories to check if they contain a repository 605 * @return collection of added repositories 606 */ addRepositories(File[] files)607 public Collection<RepositoryInfo> addRepositories(File[] files) { 608 ExecutorService executor = env.getIndexerParallelizer().getFixedExecutor(); 609 List<Future<Collection<RepositoryInfo>>> futures = new ArrayList<>(); 610 for (File file: files) { 611 futures.add(executor.submit(() -> addRepositories(new File[]{file}, 612 env.getNestingMaximum(), 0, false))); 613 } 614 615 List<RepositoryInfo> repoList = new ArrayList<>(); 616 futures.forEach(future -> { 617 try { 618 repoList.addAll(future.get()); 619 } catch (Exception e) { 620 LOGGER.log(Level.WARNING, "failed to get results of repository scan", e); 621 } 622 }); 623 624 LOGGER.log(Level.FINER, "Discovered repositories: {0}", repoList); 625 626 return repoList; 627 } 628 629 /** 630 * Recursively search for repositories in given directories, add those found 631 * to the internally used repository map. 632 * 633 * @param repos collection of repository paths 634 * @return collection of added repositories 635 */ addRepositories(Collection<String> repos)636 public Collection<RepositoryInfo> addRepositories(Collection<String> repos) { 637 return addRepositories(repos.stream().map(File::new).toArray(File[]::new)); 638 } 639 640 /** 641 * Get collection of repositories used internally by HistoryGuru. 642 * @return collection of repositories 643 */ getRepositories()644 public Collection<RepositoryInfo> getRepositories() { 645 return repositories.values().stream(). 646 map(RepositoryInfo::new).collect(Collectors.toSet()); 647 } 648 createCache(Repository repository, String sinceRevision)649 private void createCache(Repository repository, String sinceRevision) { 650 String path = repository.getDirectoryName(); 651 String type = repository.getClass().getSimpleName(); 652 653 if (!repository.isHistoryEnabled()) { 654 LOGGER.log(Level.INFO, 655 "Skipping history cache creation of {0} repository in ''{1}'' and its subdirectories", 656 new Object[]{type, path}); 657 return; 658 } 659 660 if (repository.isWorking()) { 661 Statistics elapsed = new Statistics(); 662 663 LOGGER.log(Level.INFO, "Creating history cache for {0} ({1}) {2} renamed file handling", 664 new Object[]{path, type, repository.isHandleRenamedFiles() ? "with" : "without"}); 665 666 try { 667 repository.createCache(historyCache, sinceRevision); 668 } catch (Exception e) { 669 LOGGER.log(Level.WARNING, 670 "An error occurred while creating cache for " + path + " (" + type + ")", e); 671 } 672 673 elapsed.report(LOGGER, "Done history cache for " + path); 674 } else { 675 LOGGER.log(Level.WARNING, 676 "Skipping creation of history cache of {0} repository in {1}: Missing SCM dependencies?", 677 new Object[]{type, path}); 678 } 679 } 680 createCacheReal(Collection<Repository> repositories)681 private void createCacheReal(Collection<Repository> repositories) { 682 if (repositories.isEmpty()) { 683 LOGGER.log(Level.WARNING, "History cache is enabled however the list of repositories is empty. " + 684 "Either specify the repositories in configuration or let the indexer scan them."); 685 return; 686 } 687 688 Statistics elapsed = new Statistics(); 689 ExecutorService executor = env.getIndexerParallelizer().getHistoryExecutor(); 690 // Since we know each repository object from the repositories 691 // collection is unique, we can abuse HashMap to create a list of 692 // repository,revision tuples with repository as key (as the revision 693 // string does not have to be unique - surely it is not unique 694 // for the initial index case). 695 HashMap<Repository, String> repos2process = new HashMap<>(); 696 697 // Collect the list of <latestRev,repo> pairs first so that we 698 // do not have to deal with latch decrementing in the cycle below. 699 for (final Repository repo : repositories) { 700 final String latestRev; 701 702 try { 703 latestRev = historyCache.getLatestCachedRevision(repo); 704 repos2process.put(repo, latestRev); 705 } catch (HistoryException he) { 706 LOGGER.log(Level.WARNING, 707 String.format( 708 "Failed to retrieve latest cached revision for %s", 709 repo.getDirectoryName()), he); 710 } 711 } 712 713 LOGGER.log(Level.INFO, "Creating history cache for {0} repositories", 714 repos2process.size()); 715 final CountDownLatch latch = new CountDownLatch(repos2process.size()); 716 for (final Map.Entry<Repository, String> entry : repos2process.entrySet()) { 717 executor.submit(() -> { 718 try { 719 createCache(entry.getKey(), entry.getValue()); 720 } catch (Exception ex) { 721 // We want to catch any exception since we are in thread. 722 LOGGER.log(Level.WARNING, "createCacheReal() got exception", ex); 723 } finally { 724 latch.countDown(); 725 } 726 }); 727 } 728 729 /* 730 * Wait until the history of all repositories is done. This is necessary 731 * since the next phase of generating index will need the history to 732 * be ready as it is recorded in Lucene index. 733 */ 734 try { 735 latch.await(); 736 } catch (InterruptedException ex) { 737 LOGGER.log(Level.SEVERE, "latch exception", ex); 738 return; 739 } 740 741 // The cache has been populated. Now, optimize how it is stored on 742 // disk to enhance performance and save space. 743 try { 744 historyCache.optimize(); 745 } catch (HistoryException he) { 746 LOGGER.log(Level.WARNING, 747 "Failed optimizing the history cache database", he); 748 } 749 elapsed.report(LOGGER, "Done history cache for all repositories", "indexer.history.cache"); 750 setHistoryIndexDone(); 751 } 752 753 /** 754 * Create history cache for selected repositories. 755 * For this to work the repositories have to be already present in the 756 * internal map, e.g. via {@code setRepositories()} or {@code addRepositories()}. 757 * 758 * @param repositories list of repository paths 759 */ createCache(Collection<String> repositories)760 public void createCache(Collection<String> repositories) { 761 if (!useCache()) { 762 return; 763 } 764 createCacheReal(getReposFromString(repositories)); 765 } 766 767 /** 768 * Remove history data for a list of repositories. 769 * Note that this just deals with the data, the map used by HistoryGuru 770 * will be left intact. 771 * 772 * @param repositories list of repository paths relative to source root 773 * @return list of repository paths that were found and their history data removed 774 */ clearCache(Collection<String> repositories)775 public List<String> clearCache(Collection<String> repositories) { 776 List<String> clearedRepos = new ArrayList<>(); 777 778 if (!useCache()) { 779 return clearedRepos; 780 } 781 782 for (Repository r : getReposFromString(repositories)) { 783 try { 784 historyCache.clear(r); 785 clearedRepos.add(r.getDirectoryName()); 786 LOGGER.log(Level.INFO, 787 "History cache for {0} cleared.", r.getDirectoryName()); 788 } catch (HistoryException e) { 789 LOGGER.log(Level.WARNING, 790 "Clearing history cache for repository {0} failed: {1}", 791 new Object[]{r.getDirectoryName(), e.getLocalizedMessage()}); 792 } 793 } 794 795 return clearedRepos; 796 } 797 798 /** 799 * Clear entry for single file from history cache. 800 * @param path path to the file relative to the source root 801 */ clearCacheFile(String path)802 public void clearCacheFile(String path) { 803 if (!useCache()) { 804 return; 805 } 806 807 historyCache.clearFile(path); 808 } 809 810 /** 811 * Remove history data for a list of repositories. Those that are 812 * successfully cleared may be removed from the internal list of repositories, 813 * depending on the {@code removeRepositories} parameter. 814 * 815 * @param repositories list of repository paths relative to source root 816 * @param removeRepositories set true to also remove the repositories from internal structures 817 */ removeCache(Collection<String> repositories, boolean removeRepositories)818 public void removeCache(Collection<String> repositories, boolean removeRepositories) { 819 if (!useCache()) { 820 return; 821 } 822 823 List<String> repos = clearCache(repositories); 824 if (removeRepositories) { 825 removeRepositories(repos); 826 } 827 } 828 829 /** 830 * Create the history cache for all of the repositories. 831 */ createCache()832 public void createCache() { 833 if (!useCache()) { 834 return; 835 } 836 837 createCacheReal(repositories.values()); 838 } 839 840 /** 841 * Lookup repositories from list of repository paths. 842 * @param repositories paths to repositories relative to source root 843 * @return list of repositories 844 */ getReposFromString(Collection<String> repositories)845 private List<Repository> getReposFromString(Collection<String> repositories) { 846 ArrayList<Repository> repos = new ArrayList<>(); 847 File srcRoot = env.getSourceRootFile(); 848 849 for (String file : repositories) { 850 File f = new File(srcRoot, file); 851 Repository r = getRepository(f); 852 if (r == null) { 853 LOGGER.log(Level.WARNING, "Could not locate a repository for {0}", 854 f.getAbsolutePath()); 855 } else if (!repos.contains(r)) { 856 repos.add(r); 857 } 858 } 859 860 return repos; 861 } 862 getRepository(File file)863 public Repository getRepository(File file) { 864 return repositoryLookup.getRepository(file.toPath(), repositoryRoots.keySet(), repositories, 865 PathUtils::getRelativeToCanonical); 866 } 867 868 /** 869 * Remove list of repositories from the list maintained in the HistoryGuru. 870 * This is much less heavyweight than {@code invalidateRepositories()} 871 * since it just removes items from the map. 872 * @param repos repository paths 873 */ removeRepositories(Collection<String> repos)874 public void removeRepositories(Collection<String> repos) { 875 Set<Repository> removedRepos = repos.stream().map(repositories::remove) 876 .filter(Objects::nonNull).collect(Collectors.toSet()); 877 repositoryLookup.repositoriesRemoved(removedRepos); 878 // Re-map the repository roots. 879 repositoryRoots.clear(); 880 List<Repository> ccopy = new ArrayList<>(repositories.values()); 881 ccopy.forEach(this::putRepository); 882 } 883 884 /** 885 * Set list of known repositories which match the list of directories. 886 * @param repos list of repositories 887 * @param dirs collection of directories that might correspond to the repositories 888 * @param cmdType command timeout type 889 */ invalidateRepositories(Collection<? extends RepositoryInfo> repos, Collection<String> dirs, CommandTimeoutType cmdType)890 public void invalidateRepositories(Collection<? extends RepositoryInfo> repos, Collection<String> dirs, CommandTimeoutType cmdType) { 891 if (repos != null && !repos.isEmpty() && dirs != null && !dirs.isEmpty()) { 892 List<RepositoryInfo> newrepos = new ArrayList<>(); 893 for (RepositoryInfo i : repos) { 894 for (String dir : dirs) { 895 Path dirPath = new File(dir).toPath(); 896 Path iPath = new File(i.getDirectoryName()).toPath(); 897 if (iPath.startsWith(dirPath)) { 898 newrepos.add(i); 899 } 900 } 901 } 902 repos = newrepos; 903 } 904 905 invalidateRepositories(repos, cmdType); 906 } 907 908 /** 909 * Go through the list of specified repositories and determine if they 910 * are valid. Those that make it through will form the new HistoryGuru 911 * internal map. This means this method should be used only if dealing 912 * with whole collection of repositories. 913 * <br> 914 * The caller is expected to reflect the new list via {@code getRepositories()}. 915 * <br> 916 * The processing is done via thread pool since the operation 917 * is expensive (see {@code RepositoryFactory.getRepository()}). 918 * 919 * @param repos collection of repositories to invalidate. 920 * If null or empty, the internal map of repositories will be cleared. 921 * @param cmdType command timeout type 922 */ invalidateRepositories(Collection<? extends RepositoryInfo> repos, CommandTimeoutType cmdType)923 public void invalidateRepositories(Collection<? extends RepositoryInfo> repos, CommandTimeoutType cmdType) { 924 if (repos == null || repos.isEmpty()) { 925 clear(); 926 return; 927 } 928 929 Map<String, Repository> newrepos = 930 Collections.synchronizedMap(new HashMap<>(repos.size())); 931 Statistics elapsed = new Statistics(); 932 933 LOGGER.log(Level.FINE, "invalidating {0} repositories", repos.size()); 934 935 /* 936 * getRepository() below does various checks of the repository 937 * which involves executing commands and I/O so make the checks 938 * run in parallel to speed up the process. 939 */ 940 final CountDownLatch latch = new CountDownLatch(repos.size()); 941 int parallelismLevel; 942 // Both indexer and web app startup should be as quick as possible. 943 if (cmdType == CommandTimeoutType.INDEXER || cmdType == CommandTimeoutType.WEBAPP_START) { 944 parallelismLevel = env.getIndexingParallelism(); 945 } else { 946 parallelismLevel = env.getRepositoryInvalidationParallelism(); 947 } 948 final ExecutorService executor = Executors.newFixedThreadPool(parallelismLevel, 949 runnable -> { 950 Thread thread = Executors.defaultThreadFactory().newThread(runnable); 951 thread.setName("invalidate-repos-" + thread.getId()); 952 return thread; 953 }); 954 955 for (RepositoryInfo rinfo : repos) { 956 executor.submit(() -> { 957 try { 958 Repository r = RepositoryFactory.getRepository(rinfo, cmdType); 959 if (r == null) { 960 LOGGER.log(Level.WARNING, 961 "Failed to instantiate internal repository data for {0} in {1}", 962 new Object[]{rinfo.getType(), rinfo.getDirectoryName()}); 963 } else { 964 newrepos.put(r.getDirectoryName(), r); 965 } 966 } catch (Exception ex) { 967 // We want to catch any exception since we are in thread. 968 LOGGER.log(Level.WARNING, "Could not create " + rinfo.getType() 969 + " for '" + rinfo.getDirectoryName(), ex); 970 } finally { 971 latch.countDown(); 972 } 973 }); 974 } 975 976 // Wait until all repositories are validated. 977 try { 978 latch.await(); 979 } catch (InterruptedException ex) { 980 LOGGER.log(Level.SEVERE, "latch exception", ex); 981 } 982 executor.shutdown(); 983 984 clear(); 985 newrepos.forEach((_key, repo) -> putRepository(repo)); 986 987 elapsed.report(LOGGER, String.format("Done invalidating %d repositories", newrepos.size()), 988 "history.repositories.invalidate"); 989 } 990 991 @VisibleForTesting clear()992 public void clear() { 993 repositoryRoots.clear(); 994 repositories.clear(); 995 repositoryLookup.clear(); 996 } 997 998 /** 999 * Adds the specified {@code repository} to this instance's repository map 1000 * and repository-root map (if not already there). 1001 * @param repository a defined instance 1002 */ putRepository(Repository repository)1003 private void putRepository(Repository repository) { 1004 String repoDirectoryName = repository.getDirectoryName(); 1005 File repoDirectoryFile = new File(repoDirectoryName); 1006 String repoDirParent = repoDirectoryFile.getParent(); 1007 repositoryRoots.put(repoDirParent, ""); 1008 repositories.put(repoDirectoryName, repository); 1009 } 1010 } 1011