xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java (revision 4d55022fee5f09d91f5bbd05969d661f037940f3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>.
23  */
24 package org.opengrok.indexer.history;
25 
26 import java.io.File;
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.lang.reflect.InvocationTargetException;
30 import java.nio.file.Path;
31 import java.util.ArrayList;
32 import java.util.Collection;
33 import java.util.Collections;
34 import java.util.Date;
35 import java.util.HashMap;
36 import java.util.List;
37 import java.util.Map;
38 import java.util.Objects;
39 import java.util.Set;
40 import java.util.concurrent.ConcurrentHashMap;
41 import java.util.concurrent.CountDownLatch;
42 import java.util.concurrent.ExecutorService;
43 import java.util.concurrent.Executors;
44 import java.util.concurrent.Future;
45 import java.util.logging.Level;
46 import java.util.logging.Logger;
47 import java.util.stream.Collectors;
48 
49 import org.jetbrains.annotations.Nullable;
50 import org.jetbrains.annotations.VisibleForTesting;
51 import org.opengrok.indexer.configuration.CommandTimeoutType;
52 import org.opengrok.indexer.configuration.Configuration.RemoteSCM;
53 import org.opengrok.indexer.configuration.PathAccepter;
54 import org.opengrok.indexer.configuration.RuntimeEnvironment;
55 import org.opengrok.indexer.logger.LoggerFactory;
56 import org.opengrok.indexer.util.ForbiddenSymlinkException;
57 import org.opengrok.indexer.util.PathUtils;
58 import org.opengrok.indexer.util.Statistics;
59 
60 /**
61  * The HistoryGuru is used to implement an transparent layer to the various
62  * source control systems.
63  *
64  * @author Chandan
65  */
66 public final class HistoryGuru {
67 
68     private static final Logger LOGGER = LoggerFactory.getLogger(HistoryGuru.class);
69 
70     /**
71      * The one and only instance of the HistoryGuru.
72      */
73     private static final HistoryGuru INSTANCE = new HistoryGuru();
74 
75     private final RuntimeEnvironment env;
76 
77     /**
78      * The history cache to use.
79      */
80     private final HistoryCache historyCache;
81 
82     /**
83      * Map of repositories, with {@code DirectoryName} as key.
84      */
85     private final Map<String, Repository> repositories = new ConcurrentHashMap<>();
86 
87     /**
88      * Set of repository roots (using ConcurrentHashMap but a throwaway value)
89      * with parent of {@code DirectoryName} as key.
90      */
91     private final Map<String, String> repositoryRoots = new ConcurrentHashMap<>();
92 
93     /**
94      * Interface to perform repository lookup for a given file path and HistoryGuru state.
95      */
96     private final RepositoryLookup repositoryLookup;
97 
98     private boolean historyIndexDone = false;
99 
setHistoryIndexDone()100     public void setHistoryIndexDone() {
101         historyIndexDone = true;
102     }
103 
isHistoryIndexDone()104     public boolean isHistoryIndexDone() {
105         return historyIndexDone;
106     }
107 
108     /**
109      * Creates a new instance of HistoryGuru, and try to set the default source
110      * control system.
111      */
HistoryGuru()112     private HistoryGuru() {
113         env = RuntimeEnvironment.getInstance();
114 
115         HistoryCache cache = null;
116         if (env.useHistoryCache()) {
117             cache = new FileHistoryCache();
118 
119             try {
120                 cache.initialize();
121             } catch (HistoryException he) {
122                 LOGGER.log(Level.WARNING,
123                         "Failed to initialize the history cache", he);
124                 // Failed to initialize, run without a history cache
125                 cache = null;
126             }
127         }
128         historyCache = cache;
129         repositoryLookup = RepositoryLookup.cached();
130     }
131 
132     /**
133      * Get the one and only instance of the HistoryGuru.
134      *
135      * @return the one and only HistoryGuru instance
136      */
getInstance()137     public static HistoryGuru getInstance() {
138         return INSTANCE;
139     }
140 
141     /**
142      * Return whether cache should be used for the history log.
143      *
144      * @return {@code true} if the history cache has been enabled and initialized, {@code false} otherwise
145      */
useCache()146     private boolean useCache() {
147         return historyCache != null;
148     }
149 
150     /**
151      * Get a string with information about the history cache.
152      *
153      * @return a free form text string describing the history cache instance
154      * @throws HistoryException if an error occurred while getting the info
155      */
getCacheInfo()156     public String getCacheInfo() throws HistoryException {
157         return historyCache == null ? "No cache" : historyCache.getInfo();
158     }
159 
160     /**
161      * Annotate the specified revision of a file.
162      *
163      * @param file the file to annotate
164      * @param rev the revision to annotate (<code>null</code> means BASE)
165      * @return file annotation, or <code>null</code> if the
166      * <code>HistoryParser</code> does not support annotation
167      * @throws IOException if I/O exception occurs
168      */
169     @Nullable
annotate(File file, String rev)170     public Annotation annotate(File file, String rev) throws IOException {
171         Annotation annotation = null;
172 
173         Repository repo = getRepository(file);
174         if (repo != null) {
175             annotation = repo.annotate(file, rev);
176             History hist = null;
177             try {
178                 hist = getHistory(file);
179             } catch (HistoryException ex) {
180                 LOGGER.log(Level.FINEST, "Cannot get messages for tooltip: ", ex);
181             }
182             if (hist != null && annotation != null) {
183                 Set<String> revs = annotation.getRevisions();
184                 int revsMatched = 0;
185                 for (HistoryEntry he : hist.getHistoryEntries()) {
186                     String histRev = he.getRevision();
187                     String shortRev = repo.getRevisionForAnnotate(histRev);
188                     if (revs.contains(shortRev)) {
189                         annotation.addDesc(shortRev, "changeset: " + he.getRevision()
190                                 + "\nsummary: " + he.getMessage() + "\nuser: "
191                                 + he.getAuthor() + "\ndate: " + he.getDate());
192                         // History entries are coming from recent to older,
193                         // file version should be from oldest to newer.
194                         annotation.addFileVersion(shortRev, revs.size() - revsMatched);
195                         revsMatched++;
196                     }
197                 }
198             }
199         }
200 
201         return annotation;
202     }
203 
204     /**
205      * Get the appropriate history reader for given file.
206      *
207      * @param file The file to get the history reader for
208      * @throws HistoryException If an error occurs while getting the history
209      * @return A HistorReader that may be used to read out history data for a
210      * named file
211      */
getHistoryReader(File file)212     public HistoryReader getHistoryReader(File file) throws HistoryException {
213         History history = getHistory(file, false);
214         return history == null ? null : new HistoryReader(history);
215     }
216 
217     /**
218      * Get the history for the specified file.
219      *
220      * @param file the file to get the history for
221      * @return history for the file
222      * @throws HistoryException on error when accessing the history
223      */
getHistory(File file)224     public History getHistory(File file) throws HistoryException {
225         return getHistory(file, true, false);
226     }
227 
getHistory(File file, boolean withFiles)228     public History getHistory(File file, boolean withFiles) throws HistoryException {
229         return getHistory(file, withFiles, false);
230     }
231 
232     /**
233      * Get history for the specified file (called from the web app).
234      *
235      * @param file the file to get the history for
236      * @return history for the file
237      * @throws HistoryException on error when accessing the history
238      */
getHistoryUI(File file)239     public History getHistoryUI(File file) throws HistoryException {
240         return getHistory(file, true, true);
241     }
242 
243     /**
244      * The idea is that some repositories require reaching out to remote server whenever
245      * a history operation is done. Sometimes this is unwanted and this method decides that.
246      * This should be consulted before the actual repository operation, i.e. not when fetching
247      * history from a cache since that is inherently local operation.
248      * @param repo repository
249      * @param file file to decide the operation for
250      * @param ui whether coming from UI
251      * @return whether to perform the history operation
252      */
isRepoHistoryEligible(Repository repo, File file, boolean ui)253     boolean isRepoHistoryEligible(Repository repo, File file, boolean ui) {
254         RemoteSCM rscm = env.getRemoteScmSupported();
255         boolean doRemote = (ui && (rscm == RemoteSCM.UIONLY))
256                 || (rscm == RemoteSCM.ON)
257                 || (ui || ((rscm == RemoteSCM.DIRBASED) && (repo != null) && repo.hasHistoryForDirectories()));
258 
259         return (repo != null && repo.isHistoryEnabled() && repo.isWorking() && repo.fileHasHistory(file)
260                 && (!repo.isRemote() || doRemote));
261     }
262 
263     @Nullable
getHistoryFromCache(File file, Repository repository, boolean withFiles)264     private History getHistoryFromCache(File file, Repository repository, boolean withFiles)
265             throws HistoryException, ForbiddenSymlinkException {
266 
267         if (useCache() && historyCache.supportsRepository(repository)) {
268             return historyCache.get(file, repository, withFiles);
269         }
270 
271         return null;
272     }
273 
274     /**
275      * Get last {@link HistoryEntry} for a file. First, try to retrieve it from the cache.
276      * If that fails, fallback to the repository method.
277      * @param file file to get the history entry for
278      * @param ui is the request coming from the UI
279      * @return last (newest) history entry for given file or {@code null}
280      * @throws HistoryException if history retrieval failed
281      */
282     @Nullable
getLastHistoryEntry(File file, boolean ui)283     public HistoryEntry getLastHistoryEntry(File file, boolean ui) throws HistoryException {
284         Statistics statistics = new Statistics();
285         LOGGER.log(Level.FINEST, "started retrieval of last history entry for ''{0}''", file);
286         final File dir = file.isDirectory() ? file : file.getParentFile();
287         final Repository repository = getRepository(dir);
288 
289         History history;
290         try {
291             history = getHistoryFromCache(file, repository, false);
292             if (history != null) {
293                 HistoryEntry lastHistoryEntry = history.getLastHistoryEntry();
294                 if (lastHistoryEntry != null) {
295                     LOGGER.log(Level.FINEST, "got latest history entry {0} for ''{1}'' from history cache",
296                             new Object[]{lastHistoryEntry, file});
297                     return lastHistoryEntry;
298                 }
299             }
300         } catch (ForbiddenSymlinkException e) {
301             LOGGER.log(Level.FINER, e.getMessage());
302             return null;
303         }
304 
305         if (!isRepoHistoryEligible(repository, file, ui)) {
306             LOGGER.log(Level.FINER, "cannot retrieve the last history entry for ''{0}'' in {1} because of settings",
307                     new Object[]{file, repository});
308             return null;
309         }
310 
311         // Fallback to the repository method.
312         HistoryEntry lastHistoryEntry = repository.getLastHistoryEntry(file, ui);
313         if (lastHistoryEntry != null) {
314             LOGGER.log(Level.FINEST, "got latest history entry {0} for ''{1}'' using repository {2}",
315                     new Object[]{lastHistoryEntry, file, repository});
316         }
317         statistics.report(LOGGER, Level.FINEST,
318                 String.format("finished retrieval of last history entry for '%s' (%s)",
319                         file, lastHistoryEntry != null ? "success" : "fail"), "history.entry.latest");
320         return lastHistoryEntry;
321     }
322 
getHistory(File file, boolean withFiles, boolean ui)323     public History getHistory(File file, boolean withFiles, boolean ui) throws HistoryException {
324         return getHistory(file, withFiles, ui, true);
325     }
326 
327     /**
328      * Get the history for the specified file. The history cache is tried first, then the repository.
329      *
330      * @param file the file to get the history for
331      * @param withFiles whether the returned history should contain a
332      * list of files touched by each changeset (the file list may be skipped if false, but it doesn't have to)
333      * @param ui called from the webapp
334      * @param fallback fall back to fetching the history from the repository
335      *                 if it cannot be retrieved from history cache
336      * @return history for the file
337      * @throws HistoryException on error when accessing the history
338      */
getHistory(File file, boolean withFiles, boolean ui, boolean fallback)339     public History getHistory(File file, boolean withFiles, boolean ui, boolean fallback) throws HistoryException {
340 
341         final File dir = file.isDirectory() ? file : file.getParentFile();
342         final Repository repository = getRepository(dir);
343 
344         History history;
345         try {
346             history = getHistoryFromCache(file, repository, withFiles);
347             if (history != null) {
348                 return history;
349             }
350 
351             return getHistoryFromRepository(file, repository, ui);
352         } catch (ForbiddenSymlinkException e) {
353             LOGGER.log(Level.FINER, e.getMessage());
354             return null;
355         }
356     }
357 
358     @Nullable
getHistoryFromRepository(File file, Repository repository, boolean ui)359     private History getHistoryFromRepository(File file, Repository repository, boolean ui) throws HistoryException {
360         History history;
361 
362         if (!isRepoHistoryEligible(repository, file, ui)) {
363             return null;
364         }
365 
366         /*
367          * Some mirrors of repositories which are capable of fetching history
368          * for directories may contain lots of files untracked by given SCM.
369          * For these it would be waste of time to get their history
370          * since the history of all files in this repository should have been
371          * fetched in the first phase of indexing.
372          */
373         if (env.isIndexer() && isHistoryIndexDone() &&
374                 repository.isHistoryEnabled() && repository.hasHistoryForDirectories()) {
375             LOGGER.log(Level.FINE, "not getting the history for ''{0}'' in repository {1} as the it supports "
376                     + "history for directories",
377                     new Object[]{file, repository});
378             return null;
379         }
380 
381         if (!env.getPathAccepter().accept(file)) {
382             return null;
383         }
384 
385         try {
386             history = repository.getHistory(file);
387         } catch (UnsupportedOperationException e) {
388             // In this case, we've found a file for which the SCM has no history
389             // An example is a non-SCCS file somewhere in an SCCS-controlled workspace.
390             return null;
391         }
392 
393         return history;
394     }
395 
396     /**
397      * Gets a named revision of the specified file into the specified target file.
398      *
399      * @param target a require target file
400      * @param parent The directory containing the file
401      * @param basename The name of the file
402      * @param rev The revision to get
403      * @return {@code true} if content was found
404      * @throws java.io.IOException if an I/O error occurs
405      */
getRevision(File target, String parent, String basename, String rev)406     public boolean getRevision(File target, String parent, String basename, String rev) throws IOException {
407         Repository repo = getRepository(new File(parent));
408         return repo != null && repo.getHistoryGet(target, parent, basename, rev);
409     }
410 
411     /**
412      * Get a named revision of the specified file.
413      *
414      * @param parent The directory containing the file
415      * @param basename The name of the file
416      * @param rev The revision to get
417      * @return An InputStream containing the named revision of the file.
418      */
getRevision(String parent, String basename, String rev)419     public InputStream getRevision(String parent, String basename, String rev) {
420         InputStream ret = null;
421 
422         Repository repo = getRepository(new File(parent));
423         if (repo != null) {
424             ret = repo.getHistoryGet(parent, basename, rev);
425         }
426         return ret;
427     }
428 
429     /**
430      * Does this directory contain files with source control information?
431      *
432      * @param file The name of the directory
433      * @return true if the files in this directory have associated revision
434      * history
435      */
hasHistory(File file)436     public boolean hasHistory(File file) {
437         Repository repo = getRepository(file);
438 
439         if (repo == null) {
440             return false;
441         }
442 
443         // This should return true for Annotate view.
444         return repo.isWorking() && repo.fileHasHistory(file)
445                 && ((env.getRemoteScmSupported() == RemoteSCM.ON)
446                 || (env.getRemoteScmSupported() == RemoteSCM.UIONLY)
447                 || (env.getRemoteScmSupported() == RemoteSCM.DIRBASED)
448                 || !repo.isRemote());
449     }
450 
451     /**
452      * Does the history cache contain entry for this directory ?
453      * @param file file object
454      * @return true if there is cache, false otherwise
455      */
hasCacheForFile(File file)456     public boolean hasCacheForFile(File file) {
457         if (!useCache()) {
458             return false;
459         }
460 
461         try {
462             return historyCache.hasCacheForFile(file);
463         } catch (HistoryException ex) {
464             return false;
465         }
466     }
467 
468     /**
469      * Check if we can annotate the specified file.
470      *
471      * @param file the file to check
472      * @return <code>true</code> if the file is under version control and the
473      * version control system supports annotation
474      */
hasAnnotation(File file)475     public boolean hasAnnotation(File file) {
476         if (!file.isDirectory()) {
477             Repository repo = getRepository(file);
478             if (repo != null && repo.isWorking()) {
479                 return repo.fileHasAnnotation(file);
480             }
481         }
482 
483         return false;
484     }
485 
486     /**
487      * Get the last modified times for all files and subdirectories in the
488      * specified directory.
489      *
490      * @param directory the directory whose files to check
491      * @return a map from file names to modification times for the files that
492      * the history cache has information about
493      * @throws org.opengrok.indexer.history.HistoryException if history cannot be retrieved
494      */
getLastModifiedTimes(File directory)495     public Map<String, Date> getLastModifiedTimes(File directory)
496             throws HistoryException {
497 
498         Repository repository = getRepository(directory);
499 
500         if (repository != null && useCache()) {
501             return historyCache.getLastModifiedTimes(directory, repository);
502         }
503 
504         return Collections.emptyMap();
505     }
506 
507     /**
508      * recursively search for repositories with a depth limit, add those found
509      * to the internally used map.
510      *
511      * @param files list of files to check if they contain a repository
512      * @param allowedNesting number of levels of nested repos to allow
513      * @param depth current depth - using global scanningDepth - one can limit
514      * this to improve scanning performance
515      * @param isNested a value indicating if a parent {@link Repository} was
516      * already found above the {@code files}
517      * @return collection of added repositories
518      */
addRepositories(File[] files, int allowedNesting, int depth, boolean isNested)519     private Collection<RepositoryInfo> addRepositories(File[] files,
520             int allowedNesting, int depth, boolean isNested) {
521 
522         List<RepositoryInfo> repoList = new ArrayList<>();
523         PathAccepter pathAccepter = env.getPathAccepter();
524 
525         for (File file : files) {
526             if (!file.isDirectory()) {
527                 continue;
528             }
529 
530             String path;
531             try {
532                 path = file.getCanonicalPath();
533 
534                 Repository repository = null;
535                 try {
536                     repository = RepositoryFactory.getRepository(file, CommandTimeoutType.INDEXER, isNested);
537                 } catch (InstantiationException | NoSuchMethodException | InvocationTargetException e) {
538                     LOGGER.log(Level.WARNING, "Could not create repository for '"
539                             + file + "', could not instantiate the repository.", e);
540                 } catch (IllegalAccessException iae) {
541                     LOGGER.log(Level.WARNING, "Could not create repository for '"
542                             + file + "', missing access rights.", iae);
543                     continue;
544                 } catch (ForbiddenSymlinkException e) {
545                     LOGGER.log(Level.WARNING, "Could not create repository for ''{0}'': {1}",
546                             new Object[] {file, e.getMessage()});
547                     continue;
548                 }
549                 if (repository == null) {
550                     if (depth > env.getScanningDepth()) {
551                         // we reached our search max depth, skip looking through the children
552                         continue;
553                     }
554                     // Not a repository, search its sub-dirs.
555                     if (pathAccepter.accept(file)) {
556                         File[] subFiles = file.listFiles();
557                         if (subFiles == null) {
558                             LOGGER.log(Level.WARNING,
559                                     "Failed to get sub directories for ''{0}'', " +
560                                     "check access permissions.",
561                                     file.getAbsolutePath());
562                         } else {
563                             // Recursive call to scan next depth
564                             repoList.addAll(addRepositories(subFiles,
565                                     allowedNesting, depth + 1, isNested));
566                         }
567                     }
568                 } else {
569                     LOGGER.log(Level.CONFIG, "Adding <{0}> repository: <{1}>",
570                             new Object[]{repository.getClass().getName(), path});
571 
572                     repoList.add(new RepositoryInfo(repository));
573                     putRepository(repository);
574 
575                     if (allowedNesting > 0 && repository.supportsSubRepositories()) {
576                         File[] subFiles = file.listFiles();
577                         if (subFiles == null) {
578                             LOGGER.log(Level.WARNING,
579                                     "Failed to get sub directories for ''{0}'', check access permissions.",
580                                     file.getAbsolutePath());
581                         } else if (depth <= env.getScanningDepth()) {
582                             // Search down to a limit -- if not: too much
583                             // stat'ing for huge Mercurial repositories
584                             repoList.addAll(addRepositories(subFiles,
585                                     allowedNesting - 1, depth + 1, true));
586                         }
587                     }
588                 }
589             } catch (IOException exp) {
590                 LOGGER.log(Level.WARNING,
591                         "Failed to get canonical path for {0}: {1}",
592                         new Object[]{file.getAbsolutePath(), exp.getMessage()});
593                 LOGGER.log(Level.WARNING, "Repository will be ignored...", exp);
594             }
595         }
596 
597         return repoList;
598     }
599 
600     /**
601      * Recursively search for repositories in given directories, add those found
602      * to the internally used repository map.
603      *
604      * @param files list of directories to check if they contain a repository
605      * @return collection of added repositories
606      */
addRepositories(File[] files)607     public Collection<RepositoryInfo> addRepositories(File[] files) {
608         ExecutorService executor = env.getIndexerParallelizer().getFixedExecutor();
609         List<Future<Collection<RepositoryInfo>>> futures = new ArrayList<>();
610         for (File file: files) {
611             futures.add(executor.submit(() -> addRepositories(new File[]{file},
612                     env.getNestingMaximum(), 0, false)));
613         }
614 
615         List<RepositoryInfo> repoList = new ArrayList<>();
616         futures.forEach(future -> {
617             try {
618                 repoList.addAll(future.get());
619             } catch (Exception e) {
620                 LOGGER.log(Level.WARNING, "failed to get results of repository scan", e);
621             }
622         });
623 
624         LOGGER.log(Level.FINER, "Discovered repositories: {0}", repoList);
625 
626         return repoList;
627     }
628 
629     /**
630      * Recursively search for repositories in given directories, add those found
631      * to the internally used repository map.
632      *
633      * @param repos collection of repository paths
634      * @return collection of added repositories
635      */
addRepositories(Collection<String> repos)636     public Collection<RepositoryInfo> addRepositories(Collection<String> repos) {
637         return addRepositories(repos.stream().map(File::new).toArray(File[]::new));
638     }
639 
640     /**
641      * Get collection of repositories used internally by HistoryGuru.
642      * @return collection of repositories
643      */
getRepositories()644     public Collection<RepositoryInfo> getRepositories() {
645         return repositories.values().stream().
646                 map(RepositoryInfo::new).collect(Collectors.toSet());
647     }
648 
createCache(Repository repository, String sinceRevision)649     private void createCache(Repository repository, String sinceRevision) {
650         String path = repository.getDirectoryName();
651         String type = repository.getClass().getSimpleName();
652 
653         if (!repository.isHistoryEnabled()) {
654             LOGGER.log(Level.INFO,
655                     "Skipping history cache creation of {0} repository in ''{1}'' and its subdirectories",
656                     new Object[]{type, path});
657             return;
658         }
659 
660         if (repository.isWorking()) {
661             Statistics elapsed = new Statistics();
662 
663             LOGGER.log(Level.INFO, "Creating history cache for {0} ({1}) {2} renamed file handling",
664                     new Object[]{path, type, repository.isHandleRenamedFiles() ? "with" : "without"});
665 
666             try {
667                 repository.createCache(historyCache, sinceRevision);
668             } catch (Exception e) {
669                 LOGGER.log(Level.WARNING,
670                         "An error occurred while creating cache for " + path + " (" + type + ")", e);
671             }
672 
673             elapsed.report(LOGGER, "Done history cache for " + path);
674         } else {
675             LOGGER.log(Level.WARNING,
676                     "Skipping creation of history cache of {0} repository in {1}: Missing SCM dependencies?",
677                     new Object[]{type, path});
678         }
679     }
680 
createCacheReal(Collection<Repository> repositories)681     private void createCacheReal(Collection<Repository> repositories) {
682         if (repositories.isEmpty()) {
683             LOGGER.log(Level.WARNING, "History cache is enabled however the list of repositories is empty. " +
684                     "Either specify the repositories in configuration or let the indexer scan them.");
685             return;
686         }
687 
688         Statistics elapsed = new Statistics();
689         ExecutorService executor = env.getIndexerParallelizer().getHistoryExecutor();
690         // Since we know each repository object from the repositories
691         // collection is unique, we can abuse HashMap to create a list of
692         // repository,revision tuples with repository as key (as the revision
693         // string does not have to be unique - surely it is not unique
694         // for the initial index case).
695         HashMap<Repository, String> repos2process = new HashMap<>();
696 
697         // Collect the list of <latestRev,repo> pairs first so that we
698         // do not have to deal with latch decrementing in the cycle below.
699         for (final Repository repo : repositories) {
700             final String latestRev;
701 
702             try {
703                 latestRev = historyCache.getLatestCachedRevision(repo);
704                 repos2process.put(repo, latestRev);
705             } catch (HistoryException he) {
706                 LOGGER.log(Level.WARNING,
707                         String.format(
708                                 "Failed to retrieve latest cached revision for %s",
709                                 repo.getDirectoryName()), he);
710             }
711         }
712 
713         LOGGER.log(Level.INFO, "Creating history cache for {0} repositories",
714                 repos2process.size());
715         final CountDownLatch latch = new CountDownLatch(repos2process.size());
716         for (final Map.Entry<Repository, String> entry : repos2process.entrySet()) {
717             executor.submit(() -> {
718                 try {
719                     createCache(entry.getKey(), entry.getValue());
720                 } catch (Exception ex) {
721                     // We want to catch any exception since we are in thread.
722                     LOGGER.log(Level.WARNING, "createCacheReal() got exception", ex);
723                 } finally {
724                     latch.countDown();
725                 }
726             });
727         }
728 
729         /*
730          * Wait until the history of all repositories is done. This is necessary
731          * since the next phase of generating index will need the history to
732          * be ready as it is recorded in Lucene index.
733          */
734         try {
735             latch.await();
736         } catch (InterruptedException ex) {
737             LOGGER.log(Level.SEVERE, "latch exception", ex);
738             return;
739         }
740 
741         // The cache has been populated. Now, optimize how it is stored on
742         // disk to enhance performance and save space.
743         try {
744             historyCache.optimize();
745         } catch (HistoryException he) {
746             LOGGER.log(Level.WARNING,
747                     "Failed optimizing the history cache database", he);
748         }
749         elapsed.report(LOGGER, "Done history cache for all repositories", "indexer.history.cache");
750         setHistoryIndexDone();
751     }
752 
753     /**
754      * Create history cache for selected repositories.
755      * For this to work the repositories have to be already present in the
756      * internal map, e.g. via {@code setRepositories()} or {@code addRepositories()}.
757      *
758      * @param repositories list of repository paths
759      */
createCache(Collection<String> repositories)760     public void createCache(Collection<String> repositories) {
761         if (!useCache()) {
762             return;
763         }
764         createCacheReal(getReposFromString(repositories));
765     }
766 
767     /**
768      * Remove history data for a list of repositories.
769      * Note that this just deals with the data, the map used by HistoryGuru
770      * will be left intact.
771      *
772      * @param repositories list of repository paths relative to source root
773      * @return list of repository paths that were found and their history data removed
774      */
clearCache(Collection<String> repositories)775     public List<String> clearCache(Collection<String> repositories) {
776         List<String> clearedRepos = new ArrayList<>();
777 
778         if (!useCache()) {
779             return clearedRepos;
780         }
781 
782         for (Repository r : getReposFromString(repositories)) {
783             try {
784                 historyCache.clear(r);
785                 clearedRepos.add(r.getDirectoryName());
786                 LOGGER.log(Level.INFO,
787                         "History cache for {0} cleared.", r.getDirectoryName());
788             } catch (HistoryException e) {
789                 LOGGER.log(Level.WARNING,
790                         "Clearing history cache for repository {0} failed: {1}",
791                         new Object[]{r.getDirectoryName(), e.getLocalizedMessage()});
792             }
793         }
794 
795         return clearedRepos;
796     }
797 
798     /**
799      * Clear entry for single file from history cache.
800      * @param path path to the file relative to the source root
801      */
clearCacheFile(String path)802     public void clearCacheFile(String path) {
803         if (!useCache()) {
804             return;
805         }
806 
807         historyCache.clearFile(path);
808     }
809 
810     /**
811      * Remove history data for a list of repositories. Those that are
812      * successfully cleared may be removed from the internal list of repositories,
813      * depending on the {@code removeRepositories} parameter.
814      *
815      * @param repositories list of repository paths relative to source root
816      * @param removeRepositories set true to also remove the repositories from internal structures
817      */
removeCache(Collection<String> repositories, boolean removeRepositories)818     public void removeCache(Collection<String> repositories, boolean removeRepositories) {
819         if (!useCache()) {
820             return;
821         }
822 
823         List<String> repos = clearCache(repositories);
824         if (removeRepositories) {
825             removeRepositories(repos);
826         }
827     }
828 
829     /**
830      * Create the history cache for all of the repositories.
831      */
createCache()832     public void createCache() {
833         if (!useCache()) {
834             return;
835         }
836 
837         createCacheReal(repositories.values());
838     }
839 
840     /**
841      * Lookup repositories from list of repository paths.
842      * @param repositories paths to repositories relative to source root
843      * @return list of repositories
844      */
getReposFromString(Collection<String> repositories)845     private List<Repository> getReposFromString(Collection<String> repositories) {
846         ArrayList<Repository> repos = new ArrayList<>();
847         File srcRoot = env.getSourceRootFile();
848 
849         for (String file : repositories) {
850             File f = new File(srcRoot, file);
851             Repository r = getRepository(f);
852             if (r == null) {
853                 LOGGER.log(Level.WARNING, "Could not locate a repository for {0}",
854                         f.getAbsolutePath());
855             } else if (!repos.contains(r)) {
856                 repos.add(r);
857             }
858         }
859 
860         return repos;
861     }
862 
getRepository(File file)863     public Repository getRepository(File file) {
864         return repositoryLookup.getRepository(file.toPath(), repositoryRoots.keySet(), repositories,
865                 PathUtils::getRelativeToCanonical);
866     }
867 
868     /**
869      * Remove list of repositories from the list maintained in the HistoryGuru.
870      * This is much less heavyweight than {@code invalidateRepositories()}
871      * since it just removes items from the map.
872      * @param repos repository paths
873      */
removeRepositories(Collection<String> repos)874     public void removeRepositories(Collection<String> repos) {
875         Set<Repository> removedRepos = repos.stream().map(repositories::remove)
876             .filter(Objects::nonNull).collect(Collectors.toSet());
877         repositoryLookup.repositoriesRemoved(removedRepos);
878         // Re-map the repository roots.
879         repositoryRoots.clear();
880         List<Repository> ccopy = new ArrayList<>(repositories.values());
881         ccopy.forEach(this::putRepository);
882     }
883 
884     /**
885      * Set list of known repositories which match the list of directories.
886      * @param repos list of repositories
887      * @param dirs collection of directories that might correspond to the repositories
888      * @param cmdType command timeout type
889      */
invalidateRepositories(Collection<? extends RepositoryInfo> repos, Collection<String> dirs, CommandTimeoutType cmdType)890     public void invalidateRepositories(Collection<? extends RepositoryInfo> repos, Collection<String> dirs, CommandTimeoutType cmdType) {
891         if (repos != null && !repos.isEmpty() && dirs != null && !dirs.isEmpty()) {
892             List<RepositoryInfo> newrepos = new ArrayList<>();
893             for (RepositoryInfo i : repos) {
894                 for (String dir : dirs) {
895                     Path dirPath = new File(dir).toPath();
896                     Path iPath = new File(i.getDirectoryName()).toPath();
897                     if (iPath.startsWith(dirPath)) {
898                         newrepos.add(i);
899                     }
900                 }
901             }
902             repos = newrepos;
903         }
904 
905         invalidateRepositories(repos, cmdType);
906     }
907 
908     /**
909      * Go through the list of specified repositories and determine if they
910      * are valid. Those that make it through will form the new HistoryGuru
911      * internal map. This means this method should be used only if dealing
912      * with whole collection of repositories.
913      * <br>
914      * The caller is expected to reflect the new list via {@code getRepositories()}.
915      * <br>
916      * The processing is done via thread pool since the operation
917      * is expensive (see {@code RepositoryFactory.getRepository()}).
918      *
919      * @param repos collection of repositories to invalidate.
920      * If null or empty, the internal map of repositories will be cleared.
921      * @param cmdType command timeout type
922      */
invalidateRepositories(Collection<? extends RepositoryInfo> repos, CommandTimeoutType cmdType)923     public void invalidateRepositories(Collection<? extends RepositoryInfo> repos, CommandTimeoutType cmdType) {
924         if (repos == null || repos.isEmpty()) {
925             clear();
926             return;
927         }
928 
929         Map<String, Repository> newrepos =
930             Collections.synchronizedMap(new HashMap<>(repos.size()));
931         Statistics elapsed = new Statistics();
932 
933         LOGGER.log(Level.FINE, "invalidating {0} repositories", repos.size());
934 
935         /*
936          * getRepository() below does various checks of the repository
937          * which involves executing commands and I/O so make the checks
938          * run in parallel to speed up the process.
939          */
940         final CountDownLatch latch = new CountDownLatch(repos.size());
941         int parallelismLevel;
942         // Both indexer and web app startup should be as quick as possible.
943         if (cmdType == CommandTimeoutType.INDEXER || cmdType == CommandTimeoutType.WEBAPP_START) {
944             parallelismLevel = env.getIndexingParallelism();
945         } else {
946             parallelismLevel = env.getRepositoryInvalidationParallelism();
947         }
948         final ExecutorService executor = Executors.newFixedThreadPool(parallelismLevel,
949                 runnable -> {
950                     Thread thread = Executors.defaultThreadFactory().newThread(runnable);
951                     thread.setName("invalidate-repos-" + thread.getId());
952                     return thread;
953                 });
954 
955         for (RepositoryInfo rinfo : repos) {
956             executor.submit(() -> {
957                 try {
958                     Repository r = RepositoryFactory.getRepository(rinfo, cmdType);
959                     if (r == null) {
960                         LOGGER.log(Level.WARNING,
961                                 "Failed to instantiate internal repository data for {0} in {1}",
962                                 new Object[]{rinfo.getType(), rinfo.getDirectoryName()});
963                     } else {
964                         newrepos.put(r.getDirectoryName(), r);
965                     }
966                 } catch (Exception ex) {
967                     // We want to catch any exception since we are in thread.
968                     LOGGER.log(Level.WARNING, "Could not create " + rinfo.getType()
969                         + " for '" + rinfo.getDirectoryName(), ex);
970                 } finally {
971                     latch.countDown();
972                 }
973             });
974         }
975 
976         // Wait until all repositories are validated.
977         try {
978             latch.await();
979         } catch (InterruptedException ex) {
980             LOGGER.log(Level.SEVERE, "latch exception", ex);
981         }
982         executor.shutdown();
983 
984         clear();
985         newrepos.forEach((_key, repo) -> putRepository(repo));
986 
987         elapsed.report(LOGGER, String.format("Done invalidating %d repositories", newrepos.size()),
988                 "history.repositories.invalidate");
989     }
990 
991     @VisibleForTesting
clear()992     public void clear() {
993         repositoryRoots.clear();
994         repositories.clear();
995         repositoryLookup.clear();
996     }
997 
998     /**
999      * Adds the specified {@code repository} to this instance's repository map
1000      * and repository-root map (if not already there).
1001      * @param repository a defined instance
1002      */
putRepository(Repository repository)1003     private void putRepository(Repository repository) {
1004         String repoDirectoryName = repository.getDirectoryName();
1005         File repoDirectoryFile = new File(repoDirectoryName);
1006         String repoDirParent = repoDirectoryFile.getParent();
1007         repositoryRoots.put(repoDirParent, "");
1008         repositories.put(repoDirectoryName, repository);
1009     }
1010 }
1011