xref: /OpenGrok/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java (revision 855e7d602b9c6c61751617c0dec23c5a6efd14e9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2010, 2022, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2018, 2020, Chris Fraire <cfraire@me.com>.
23  */
24 package org.opengrok.indexer.index;
25 
26 import java.io.File;
27 import java.io.FileOutputStream;
28 import java.io.IOException;
29 import java.nio.charset.StandardCharsets;
30 import java.nio.file.Files;
31 import java.nio.file.Path;
32 import java.nio.file.Paths;
33 import java.util.ArrayList;
34 import java.util.Arrays;
35 import java.util.Collections;
36 import java.util.HashMap;
37 import java.util.HashSet;
38 import java.util.List;
39 import java.util.Set;
40 import java.util.TreeSet;
41 import java.util.stream.Collectors;
42 import java.util.stream.Stream;
43 
44 import org.apache.lucene.document.Document;
45 import org.apache.lucene.queryparser.classic.ParseException;
46 import org.apache.lucene.search.ScoreDoc;
47 
48 import org.eclipse.jgit.api.Git;
49 import org.eclipse.jgit.api.MergeCommand;
50 import org.eclipse.jgit.lib.ObjectId;
51 import org.junit.jupiter.api.AfterEach;
52 import org.junit.jupiter.api.BeforeEach;
53 import org.junit.jupiter.api.Test;
54 import org.junit.jupiter.params.ParameterizedTest;
55 import org.junit.jupiter.params.provider.Arguments;
56 import org.junit.jupiter.params.provider.MethodSource;
57 import org.junit.jupiter.params.provider.ValueSource;
58 import org.opengrok.indexer.analysis.Definitions;
59 import org.opengrok.indexer.condition.EnabledForRepository;
60 import org.opengrok.indexer.configuration.CommandTimeoutType;
61 import org.opengrok.indexer.configuration.Project;
62 import org.opengrok.indexer.configuration.RuntimeEnvironment;
63 import org.opengrok.indexer.history.FileCollector;
64 import org.opengrok.indexer.history.History;
65 import org.opengrok.indexer.history.HistoryEntry;
66 import org.opengrok.indexer.history.HistoryGuru;
67 import org.opengrok.indexer.history.Repository;
68 import org.opengrok.indexer.history.RepositoryFactory;
69 import org.opengrok.indexer.history.RepositoryInfo;
70 import org.opengrok.indexer.history.RepositoryWithHistoryTraversal;
71 import org.opengrok.indexer.search.QueryBuilder;
72 import org.opengrok.indexer.search.SearchEngine;
73 import org.opengrok.indexer.util.ForbiddenSymlinkException;
74 import org.opengrok.indexer.util.IOUtils;
75 import org.opengrok.indexer.util.TandemPath;
76 import org.opengrok.indexer.util.TestRepository;
77 
78 import static org.junit.jupiter.api.Assertions.assertEquals;
79 import static org.junit.jupiter.api.Assertions.assertFalse;
80 import static org.junit.jupiter.api.Assertions.assertNotEquals;
81 import static org.junit.jupiter.api.Assertions.assertNotNull;
82 import static org.junit.jupiter.api.Assertions.assertNull;
83 import static org.junit.jupiter.api.Assertions.assertTrue;
84 import static org.mockito.ArgumentMatchers.any;
85 import static org.mockito.Mockito.atLeast;
86 import static org.mockito.Mockito.doReturn;
87 import static org.mockito.Mockito.spy;
88 import static org.mockito.Mockito.times;
89 import static org.mockito.Mockito.verify;
90 import static org.mockito.Mockito.when;
91 import static org.opengrok.indexer.condition.RepositoryInstalled.Type.CVS;
92 
93 /**
94  * Unit tests for the {@code IndexDatabase} class.
95  *
96  * This is quite a heavy test class - it runs the indexer before each (parametrized) test,
97  * so it might contribute significantly to the overall test run time.
98  */
99 class IndexDatabaseTest {
100 
101     private static TestRepository repository;
102 
103     private Indexer indexer;
104 
105     private RuntimeEnvironment env;
106 
107     @BeforeEach
setUpClass()108     public void setUpClass() throws Exception {
109         env = RuntimeEnvironment.getInstance();
110 
111         repository = new TestRepository();
112         repository.create(HistoryGuru.class.getResource("/repositories"));
113 
114         // After copying the files from the archive, Git will consider the files to be changed,
115         // at least on Windows. This causes some tests, particularly testGetIndexDownArgs() to fail.
116         // To avoid this, clone the Git repository.
117         Path gitRepositoryRootPath = Path.of(repository.getSourceRoot(), "git");
118         Path gitCheckoutPath = Path.of(repository.getSourceRoot(), "gitcheckout");
119         Git git = Git.cloneRepository()
120                 .setURI(gitRepositoryRootPath.toFile().toURI().toString())
121                 .setDirectory(gitCheckoutPath.toFile())
122                 .call();
123         // The Git object has to be closed, otherwise the move below would fail on Windows with
124         // AccessDeniedException due to the file handle still being open.
125         git.close();
126         IOUtils.removeRecursive(gitRepositoryRootPath);
127         Files.move(gitCheckoutPath, gitRepositoryRootPath);
128 
129         env.setSourceRoot(repository.getSourceRoot());
130         env.setDataRoot(repository.getDataRoot());
131         env.setHistoryEnabled(true);
132         env.setProjectsEnabled(true);
133         RepositoryFactory.initializeIgnoredNames(env);
134 
135         // Restore the project and repository information.
136         env.setProjects(new HashMap<>());
137         HistoryGuru.getInstance().removeRepositories(List.of("/git"));
138         env.setRepositories(repository.getSourceRoot());
139         HistoryGuru.getInstance().invalidateRepositories(env.getRepositories(), CommandTimeoutType.INDEXER);
140         env.generateProjectRepositoriesMap();
141 
142         indexer = Indexer.getInstance();
143         indexer.prepareIndexer(
144                 env, true, true,
145                 false, null, null);
146 
147         // Reset the state of the git project w.r.t. history based reindex.
148         // It is the responsibility of each test that relies on the per project tunable
149         // to call gitProject.completeWithDefaults().
150         Project gitProject = env.getProjects().get("git");
151         gitProject.clearProperties();
152 
153         env.setDefaultProjectsFromNames(new TreeSet<>(Arrays.asList("/c")));
154 
155         indexer.doIndexerExecution(true, null, null);
156 
157         env.clearFileCollector();
158     }
159 
160     @AfterEach
tearDownClass()161     public void tearDownClass() throws Exception {
162         repository.destroy();
163     }
164 
165     @Test
testGetDefinitions()166     void testGetDefinitions() throws Exception {
167         // Test that we can get definitions for one of the files in the
168         // repository.
169         File f1 = new File(repository.getSourceRoot() + "/git/main.c");
170         Definitions defs1 = IndexDatabase.getDefinitions(f1);
171         assertNotNull(defs1);
172         assertTrue(defs1.hasSymbol("main"));
173         assertTrue(defs1.hasSymbol("argv"));
174         assertFalse(defs1.hasSymbol("b"));
175         assertTrue(defs1.hasDefinitionAt("main", 3, new String[1]));
176 
177         //same for windows delimiters
178         f1 = new File(repository.getSourceRoot() + "\\git\\main.c");
179         defs1 = IndexDatabase.getDefinitions(f1);
180         assertNotNull(defs1);
181         assertTrue(defs1.hasSymbol("main"));
182         assertTrue(defs1.hasSymbol("argv"));
183         assertFalse(defs1.hasSymbol("b"));
184         assertTrue(defs1.hasDefinitionAt("main", 3, new String[1]));
185 
186         // Test that we get null back if we request definitions for a file
187         // that's not in the repository.
188         File f2 = new File(repository.getSourceRoot() + "/git/foobar.d");
189         Definitions defs2 = IndexDatabase.getDefinitions(f2);
190         assertNull(defs2);
191     }
192 
checkDataExistence(String fileName, boolean shouldExist)193     private void checkDataExistence(String fileName, boolean shouldExist) {
194         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
195 
196         for (String dirName : new String[] {"historycache", IndexDatabase.XREF_DIR}) {
197             File dataDir = new File(env.getDataRootFile(), dirName);
198             File dataFile = new File(dataDir, TandemPath.join(fileName, ".gz"));
199 
200             if (shouldExist) {
201                 assertTrue(dataFile.exists(), "file " + fileName + " not found in " + dirName);
202             } else {
203                 assertFalse(dataFile.exists(), "file " + fileName + " found in " + dirName);
204             }
205         }
206     }
207 
208     /**
209      * Test removal of IndexDatabase. xrefs and history index entries after
210      * file has been removed from a repository.
211      */
212     @ParameterizedTest
213     @ValueSource(booleans = {true, false})
testCleanupAfterIndexRemoval(boolean historyBasedReindex)214     void testCleanupAfterIndexRemoval(boolean historyBasedReindex) throws Exception {
215         final int origNumFiles;
216 
217         env.setHistoryBasedReindex(historyBasedReindex);
218 
219         String projectName = "git";
220         Project project = env.getProjects().get(projectName);
221         assertNotNull(project);
222         IndexDatabase idb = new IndexDatabase(project);
223         assertNotNull(idb);
224 
225         String fileName = "header.h";
226         File gitRoot = new File(repository.getSourceRoot(), projectName);
227         assertTrue(new File(gitRoot, fileName).exists());
228 
229         // Check that the file was indexed successfully in terms of generated data.
230         checkDataExistence(projectName + File.separator + fileName, true);
231         origNumFiles = idb.getNumFiles();
232 
233         /*
234          * Initially was 6, then IndexAnalysisSettings added 1, then
235          * NumLinesLOCAggregator added 3.
236          */
237         assertEquals(10, origNumFiles, "Lucene number of documents");
238 
239         // Remove the file and reindex using IndexDatabase directly.
240         File file = new File(repository.getSourceRoot(), projectName + File.separator + fileName);
241         assertTrue(file.delete());
242         assertFalse(file.exists(), "file " + fileName + " not removed");
243         idb.update();
244 
245         // Check that the data for the file has been removed.
246         checkDataExistence(projectName + File.separator + fileName, false);
247         assertEquals(origNumFiles - 1, idb.getNumFiles());
248     }
249 
250     /**
251      * This is a test of {@code populateDocument} so it should be rather in {@code AnalyzerGuruTest}
252      * however it lacks the pre-requisite indexing phase.
253      */
254     @Test
testIndexPath()255     void testIndexPath() throws IOException {
256         SearchEngine instance = new SearchEngine();
257         // Use as broad search as possible.
258         instance.setFile("c");
259         instance.search();
260         ScoreDoc[] scoredocs = instance.scoreDocs();
261         assertTrue(scoredocs.length > 0, "need some search hits to perform the check");
262         for (ScoreDoc sd : scoredocs) {
263             Document doc = instance.doc(sd.doc);
264             assertFalse(doc.getField(QueryBuilder.PATH).stringValue().contains("\\"),
265                     "PATH field should not contain backslash characters");
266         }
267     }
268 
269     @Test
testGetLastRev()270     void testGetLastRev() throws IOException, ParseException {
271         Document doc = IndexDatabase.getDocument(Paths.get(repository.getSourceRoot(),
272                 "git", "main.c").toFile());
273         assertNotNull(doc);
274         assertEquals("aa35c258", doc.get(QueryBuilder.LASTREV));
275     }
276 
changeFileAndCommit(Git git, File file, String comment)277     static void changeFileAndCommit(Git git, File file, String comment) throws Exception {
278         String authorName = "Foo Bar";
279         String authorEmail = "foobar@example.com";
280 
281         try (FileOutputStream fos = new FileOutputStream(file, true)) {
282             fos.write(comment.getBytes(StandardCharsets.UTF_8));
283         }
284 
285         git.commit().setMessage(comment).setAuthor(authorName, authorEmail).setAll(true).call();
286     }
287 
addFileAndCommit(Git git, String newFileName, File repositoryRoot, String message)288     private void addFileAndCommit(Git git, String newFileName, File repositoryRoot, String message) throws Exception {
289         File newFile = new File(repositoryRoot, newFileName);
290         if (!newFile.createNewFile()) {
291             throw new IOException("Could not create file " + newFile);
292         }
293         try (FileOutputStream fos = new FileOutputStream(newFile)) {
294             fos.write("foo bar foo bar foo bar".getBytes(StandardCharsets.UTF_8));
295         }
296         git.add().addFilepattern(newFileName).call();
297         git.commit().setMessage(message).setAuthor("foo bar", "foobar@example.com").setAll(true).call();
298     }
299 
addMergeCommit(Git git, File repositoryRoot)300     private void addMergeCommit(Git git, File repositoryRoot) throws Exception {
301         // Create and checkout a branch.
302         final String branchName = "mybranch";
303         git.branchCreate().setName(branchName).call();
304         git.checkout().setName(branchName).call();
305 
306         // Change a file on the branch.
307         addFileAndCommit(git, "new.txt", repositoryRoot, "new file on a branch");
308 
309         // Checkout the master branch again.
310         git.checkout().setName("master").call();
311 
312         // Retrieve the objectId of the latest commit on the branch.
313         ObjectId mergeBase = git.getRepository().resolve(branchName);
314 
315         // Perform the actual merge without FastForward to see the
316         // actual merge-commit even though the merge is trivial.
317         git.merge().
318                 include(mergeBase).
319                 setCommit(false).
320                 setFastForward(MergeCommand.FastForwardMode.NO_FF).
321                 setMessage("merge commit").
322                 call();
323 
324         // Commit the merge separately so that the author can be set.
325         // (MergeCommand - a result of git.merge() - does not have the setAuthor() method)
326         git.commit().setAuthor("foo bar", "foobar@example.com").call();
327     }
328 
329     /**
330      * Add some commits to the Git repository - change/remove/add/rename a file in separate commits,
331      * also add a merge commit.
332      * @param repositoryRoot Git repository root
333      */
changeGitRepository(File repositoryRoot)334     private void changeGitRepository(File repositoryRoot) throws Exception {
335         try (Git git = Git.init().setDirectory(repositoryRoot).call()) {
336             // This name is specifically picked to add file that would exercise the end of term traversal
337             // in processFileIncremental(), that is (uidIter == null).
338             String newFileName = "zzz.txt";
339             addFileAndCommit(git, newFileName, repositoryRoot, "another new file");
340 
341             // Add another file that is sorted behind to exercise another code path in processFileIncremental().
342             // These 'z'-files are added first so their commits are not the last. This exercises the sorting
343             // of the files in FileCollector and the simultaneous traverse of the index and file list
344             // in processFileIncremental().
345             newFileName = "zzzzzz.txt";
346             addFileAndCommit(git, newFileName, repositoryRoot, "another new file");
347 
348             // Change one of the pre-existing files.
349             File mainFile = new File(repositoryRoot, "main.c");
350             assertTrue(mainFile.exists());
351             changeFileAndCommit(git, mainFile, "new commit");
352 
353             // Delete a file.
354             final String deletedFileName = "header.h";
355             File rmFile = new File(repositoryRoot, deletedFileName);
356             assertTrue(rmFile.exists());
357             git.rm().addFilepattern(deletedFileName).call();
358             git.commit().setMessage("delete").setAuthor("foo", "foobar@example.com").setAll(true).call();
359             assertFalse(rmFile.exists());
360 
361             // Rename some file.
362             final String fooFileName = "Makefile";
363             final String barFileName = "Makefile.renamed";
364             File fooFile = new File(repositoryRoot, fooFileName);
365             assertTrue(fooFile.exists());
366             File barFile = new File(repositoryRoot, barFileName);
367             assertTrue(fooFile.renameTo(barFile));
368             git.add().addFilepattern(barFileName).call();
369             git.rm().addFilepattern(fooFileName).call();
370             git.commit().setMessage("rename").setAuthor("foo", "foobar@example.com").setAll(true).call();
371             assertTrue(barFile.exists());
372             assertFalse(fooFile.exists());
373 
374             addMergeCommit(git, repositoryRoot);
375         }
376     }
377 
provideParamsFortestGetIndexDownArgs()378     private static Stream<Arguments> provideParamsFortestGetIndexDownArgs() {
379         return Stream.of(
380             Arguments.of(false, false, false, false),
381             Arguments.of(false, false, false, true),
382             Arguments.of(false, false, true, false),
383             Arguments.of(false, false, true, true),
384             Arguments.of(false, true, false, false),
385             Arguments.of(false, true, false, true),
386             Arguments.of(false, true, true, false),
387             Arguments.of(false, true, true, true),
388             Arguments.of(true, false, false, false),
389             Arguments.of(true, false, false, true),
390             Arguments.of(true, false, true, false),
391             Arguments.of(true, false, true, true),
392             Arguments.of(true, true, false, false),
393             Arguments.of(true, true, false, true),
394             Arguments.of(true, true, true, false),
395             Arguments.of(true, true, true, true)
396         );
397     }
398 
399     static class AddRemoveFilesListener implements IndexChangedListener {
400         // The file sets need to be thread safe because the methods that modify them can be called in parallel.
401         private final Set<String> removedFiles = Collections.synchronizedSet(new HashSet<>());
402 
403         private final Set<String> addedFiles = Collections.synchronizedSet(new HashSet<>());
404 
405         @Override
fileAdd(String path, String analyzer)406         public void fileAdd(String path, String analyzer) {
407             addedFiles.add(path);
408         }
409 
410         @Override
fileAdded(String path, String analyzer)411         public void fileAdded(String path, String analyzer) {
412         }
413 
414         @Override
fileRemove(String path)415         public void fileRemove(String path) {
416             removedFiles.add(path);
417         }
418 
419         @Override
fileRemoved(String path)420         public void fileRemoved(String path) {
421         }
422 
423         @Override
fileUpdate(String path)424         public void fileUpdate(String path) {
425         }
426 
getRemovedFiles()427         public Set<String> getRemovedFiles() {
428             return removedFiles;
429         }
430 
getAddedFiles()431         public Set<String> getAddedFiles() {
432             return addedFiles;
433         }
434     }
435 
436     /**
437      * Test specifically getIndexDownArgs() with IndexDatabase instance.
438      * This test ensures that correct set of files is discovered.
439      */
440     @ParameterizedTest
441     @MethodSource("provideParamsFortestGetIndexDownArgs")
testGetIndexDownArgs(boolean mergeCommits, boolean renamedFiles, boolean historyBased, boolean perPartes)442     void testGetIndexDownArgs(boolean mergeCommits, boolean renamedFiles, boolean historyBased, boolean perPartes)
443             throws Exception {
444 
445         assertTrue(env.isHistoryEnabled());
446 
447         env.setHistoryBasedReindex(historyBased);
448         env.setHandleHistoryOfRenamedFiles(renamedFiles);
449         env.setMergeCommitsEnabled(mergeCommits);
450         env.setHistoryCachePerPartesEnabled(perPartes);
451 
452         IndexDownArgsFactory factory = new IndexDownArgsFactory();
453         IndexDownArgsFactory spyFactory = spy(factory);
454         IndexDownArgs args = new IndexDownArgs();
455         // In this case the getIndexDownArgs() should be called from update() just once so this will suffice.
456         when(spyFactory.getIndexDownArgs()).thenReturn(args);
457 
458         Project gitProject = env.getProjects().get("git");
459         assertNotNull(gitProject);
460         gitProject.completeWithDefaults();
461         IndexDatabase idbOrig = new IndexDatabase(gitProject, spyFactory);
462         assertNotNull(idbOrig);
463         IndexDatabase idb = spy(idbOrig);
464 
465         File repositoryRoot = new File(repository.getSourceRoot(), "git");
466         assertTrue(repositoryRoot.isDirectory());
467         changeGitRepository(repositoryRoot);
468 
469         // Re-generate the history cache so that the data is ready for history based re-index.
470         HistoryGuru.getInstance().clear();
471         indexer.prepareIndexer(
472                 env, true, true,
473                 false, List.of("/git"), null);
474         env.generateProjectRepositoriesMap();
475 
476         // Check history cache w.r.t. the merge changeset.
477         File mergeFile = new File(repositoryRoot, "new.txt");
478         History history = HistoryGuru.getInstance().getHistory(mergeFile, false, false, false);
479         assertNotNull(history);
480         assertNotNull(history.getHistoryEntries());
481         boolean containsMergeCommitMessage = history.getHistoryEntries().stream().
482                 map(HistoryEntry::getMessage).collect(Collectors.toSet()).contains("merge commit");
483         if (mergeCommits) {
484             assertTrue(containsMergeCommitMessage);
485         } else {
486             assertFalse(containsMergeCommitMessage);
487         }
488 
489         // Setup and use listener for the "removed" files.
490         AddRemoveFilesListener listener = new AddRemoveFilesListener();
491         idb.addIndexChangedListener(listener);
492         idb.update();
493 
494         verify(spyFactory).getIndexDownArgs();
495         // Cannot use args.curCount to compare against because it gets reset in indexParallel()
496         // as it is reused in that stage of indexing.
497         assertNotEquals(0, args.works.size());
498         // The expected data has to match the work done in changeGitRepository().
499         Set<Path> expectedFileSet = new HashSet<>();
500         expectedFileSet.add(Path.of("/git/Makefile.renamed"));
501         expectedFileSet.add(Path.of("/git/main.c"));
502         expectedFileSet.add(Path.of("/git/zzz.txt"));
503         expectedFileSet.add(Path.of("/git/zzzzzz.txt"));
504         expectedFileSet.add(Path.of("/git/new.txt"));
505         assertEquals(expectedFileSet, args.works.stream().map(v -> Path.of(v.path)).collect(Collectors.toSet()));
506 
507         assertEquals(Set.of(
508                 Path.of("/git/header.h"),
509                 Path.of("/git/main.c"),
510                 Path.of("/git/Makefile")
511         ), listener.getRemovedFiles().stream().map(Path::of).collect(Collectors.toSet()));
512 
513         // Verify the assumption made above.
514         verify(idb, times(1)).getIndexDownArgs(any(), any(), any());
515 
516         checkIndexDown(historyBased, idb);
517     }
518 
checkIndexDown(boolean historyBased, IndexDatabase idb)519     private void checkIndexDown(boolean historyBased, IndexDatabase idb) throws IOException {
520         // The initial index (done in setUpClass()) should use file based IndexWorkArgs discovery.
521         // Only the update() done in the actual test should lead to indexDownUsingHistory(),
522         // hence it should be called just once.
523         if (historyBased) {
524             verify(idb, times(1)).indexDownUsingHistory(any(), any());
525             verify(idb, times(0)).indexDown(any(), any(), any());
526         } else {
527             // indexDown() is recursive, so it will be called more than once.
528             verify(idb, times(0)).indexDownUsingHistory(any(), any());
529             verify(idb, atLeast(1)).indexDown(any(), any(), any());
530         }
531     }
532 
533     /**
534      * Make sure that history based reindex is not performed for projects
535      * where some repositories are not instances of {@code RepositoryWithHistoryTraversal}
536      * or have the history based reindex explicitly disabled.
537      *
538      * Instead of checking the result of the functions that make the decision, check the actual indexing.
539      */
540     @EnabledForRepository(CVS)
541     @ParameterizedTest
542     @ValueSource(booleans = {true, false})
testHistoryBasedReindexVsProjectWithDiverseRepos(boolean useCvs)543     void testHistoryBasedReindexVsProjectWithDiverseRepos(boolean useCvs) throws Exception {
544         env.setHistoryBasedReindex(true);
545 
546         // Create a new project with two repositories.
547         String projectName = "new";
548         Path projectPath = Path.of(repository.getSourceRoot(), projectName);
549         assertTrue(projectPath.toFile().mkdirs());
550         assertTrue(projectPath.toFile().isDirectory());
551 
552         String disabledGitRepoName = "git1";
553 
554         if (useCvs) {
555             // Copy CVS repository underneath the project.
556             String subrepoName = "cvssubrepo";
557             Path destinationPath = Path.of(repository.getSourceRoot(), projectName, subrepoName);
558             Path sourcePath = Path.of(repository.getSourceRoot(), "cvs_test", "cvsrepo");
559             assertTrue(sourcePath.toFile().exists());
560             assertTrue(destinationPath.toFile().mkdirs());
561             repository.copyDirectory(sourcePath, destinationPath);
562             assertTrue(destinationPath.toFile().exists());
563 
564             Repository subRepo = RepositoryFactory.getRepository(destinationPath.toFile());
565             assertFalse(subRepo instanceof RepositoryWithHistoryTraversal);
566         } else {
567             // Clone Git repository underneath the project.
568             String cloneUrl = Path.of(repository.getSourceRoot(), "git").toFile().toURI().toString();
569             Path repositoryRootPath = Path.of(repository.getSourceRoot(), projectName, disabledGitRepoName);
570             Git git = Git.cloneRepository()
571                     .setURI(cloneUrl)
572                     .setDirectory(repositoryRootPath.toFile())
573                     .call();
574             git.close();
575             assertTrue(repositoryRootPath.toFile().isDirectory());
576         }
577 
578         // Clone Git repository underneath the project and make a change there.
579         String cloneUrl = Path.of(repository.getSourceRoot(), "git").toFile().toURI().toString();
580         Path repositoryRootPath = Path.of(repository.getSourceRoot(), projectName, "git");
581         Git git = Git.cloneRepository()
582                 .setURI(cloneUrl)
583                 .setDirectory(repositoryRootPath.toFile())
584                 .call();
585         git.close();
586         assertTrue(repositoryRootPath.toFile().isDirectory());
587         changeGitRepository(repositoryRootPath.toFile());
588 
589         // Rescan the repositories.
590         HistoryGuru.getInstance().clear();
591         indexer.prepareIndexer(
592                 env, true, true,
593                 false, List.of("/git"), null);
594         env.setRepositories(new ArrayList<>(HistoryGuru.getInstance().getRepositories()));
595         env.generateProjectRepositoriesMap();
596 
597         // Assert the repositories were detected.
598         Project project = env.getProjects().get(projectName);
599         assertNotNull(project);
600         List<RepositoryInfo> projectRepos = env.getProjectRepositoriesMap().get(project);
601         assertNotNull(projectRepos);
602         assertEquals(2, projectRepos.size());
603 
604         if (!useCvs) {
605             for (RepositoryInfo repo : projectRepos) {
606                 if (repo.getDirectoryNameRelative().equals(disabledGitRepoName)) {
607                     repo.setHistoryBasedReindex(false);
608                 }
609             }
610         }
611 
612         verifyIndexDown(project, false);
613     }
614 
615     /**
616      * Make sure the files detected for a sub-repository are correctly stored in the appropriate
617      * {@code FileCollector} instance.
618      */
619     @Test
testHistoryBasedReindexWithEligibleSubRepo()620     void testHistoryBasedReindexWithEligibleSubRepo() throws Exception {
621         env.setHistoryBasedReindex(true);
622 
623         assertNull(env.getFileCollector("git"));
624 
625         Project gitProject = env.getProjects().get("git");
626         assertNotNull(gitProject);
627         gitProject.completeWithDefaults();
628 
629         // Create a Git repository underneath the existing git repository and make a change there.
630         File repositoryRoot = new File(repository.getSourceRoot(), "git");
631         assertTrue(repositoryRoot.isDirectory());
632         changeGitRepository(repositoryRoot);
633         String subRepoName = "subrepo";
634         File subRepositoryRoot = new File(repositoryRoot, subRepoName);
635         String changedFileName = "subfile.txt";
636         try (Git git = Git.init().setDirectory(subRepositoryRoot).call()) {
637             addFileAndCommit(git, changedFileName, subRepositoryRoot, "new file in subrepo");
638         }
639         assertTrue(new File(subRepositoryRoot, changedFileName).exists());
640 
641         HistoryGuru.getInstance().clear();
642 
643         // Rescan the repositories and refresh the history cache which should also collect the files
644         // for the 2nd stage of indexing.
645         indexer.prepareIndexer(
646                 env, true, true,
647                 false, List.of("/git"), null);
648 
649         // Verify the collected files.
650         FileCollector fileCollector = env.getFileCollector("git");
651         assertNotNull(fileCollector);
652         assertTrue(fileCollector.getFiles().size() > 1);
653         assertTrue(fileCollector.getFiles().
654                 contains(File.separator + gitProject.getName() +
655                         File.separator + subRepoName +
656                         File.separator + changedFileName));
657     }
658 
659     /**
660      * Verify project specific tunable has effect on how the indexing will be performed.
661      * The global history based tunable is tested in testGetIndexDownArgs().
662      */
663     @ParameterizedTest
664     @ValueSource(booleans = {true, false})
testHistoryBasedReindexProjectTunable(boolean historyBased)665     void testHistoryBasedReindexProjectTunable(boolean historyBased) throws Exception {
666         env.setHistoryBasedReindex(!historyBased);
667 
668         // Make a change in the git repository.
669         File repositoryRoot = new File(repository.getSourceRoot(), "git");
670         assertTrue(repositoryRoot.isDirectory());
671         changeGitRepository(repositoryRoot);
672 
673         // The per project tunable should override the global tunable.
674         Project gitProject = env.getProjects().get("git");
675         gitProject.setHistoryBasedReindex(historyBased);
676         gitProject.completeWithDefaults();
677 
678         HistoryGuru.getInstance().clear();
679         indexer.prepareIndexer(
680                 env, true, true,
681                 false, List.of("/git"), null);
682         env.generateProjectRepositoriesMap();
683 
684         verifyIndexDown(gitProject, historyBased);
685 
686         gitProject.setHistoryBasedReindex(true);
687     }
688 
689     /**
690      * Test history based reindex if there was no change to the repository.
691      */
692     @Test
testHistoryBasedReindexWithNoChange()693     void testHistoryBasedReindexWithNoChange() throws Exception {
694         env.setHistoryBasedReindex(true);
695 
696         Project gitProject = env.getProjects().get("git");
697         gitProject.completeWithDefaults();
698 
699         HistoryGuru.getInstance().clear();
700         indexer.prepareIndexer(
701                 env, true, true,
702                 false, List.of("/git"), null);
703         env.generateProjectRepositoriesMap();
704 
705         verifyIndexDown(gitProject, true);
706     }
707 
verifyIndexDown(Project gitProject, boolean historyBased)708     private void verifyIndexDown(Project gitProject, boolean historyBased) throws Exception {
709         // verify that indexer did not use history based reindex.
710         IndexDatabase idbOrig = new IndexDatabase(gitProject);
711         assertNotNull(idbOrig);
712         IndexDatabase idb = spy(idbOrig);
713         idb.update();
714         checkIndexDown(historyBased, idb);
715     }
716 
717     /**
718      * Test forced reindex - see if removeFile() was called for all files in the repository
719      * even though there was no change.
720      */
721     @ParameterizedTest
722     @ValueSource(booleans = {true, false})
testForcedReindex(boolean historyBased)723     void testForcedReindex(boolean historyBased) throws Exception {
724 
725         env.setHistoryBasedReindex(historyBased);
726 
727         Project gitProject = env.getProjects().get("git");
728         assertNotNull(gitProject);
729         gitProject.completeWithDefaults();
730         IndexDatabase idbOrig = new IndexDatabase(gitProject);
731         assertNotNull(idbOrig);
732         IndexDatabase idb = spy(idbOrig);
733 
734         // Re-generate the history cache so that the git repository is ready for history based re-index.
735         indexer.prepareIndexer(
736                 env, true, true,
737                 false, List.of("/git"), null);
738         env.generateProjectRepositoriesMap();
739 
740         // Emulate forcing reindex from scratch.
741         doReturn(false).when(idb).checkSettings(any(), any());
742 
743         // Setup and use listener for the "removed" files.
744         AddRemoveFilesListener listener = new AddRemoveFilesListener();
745         idb.addIndexChangedListener(listener);
746         idb.update();
747 
748         checkIndexDown(historyBased, idb);
749 
750         // List the files in the /git directory tree and compare that to the IndexDatabase file sets.
751         Path repoRoot = Path.of(repository.getSourceRoot(), "git");
752         Set<Path> result;
753         try (Stream<Path> walk = Files.walk(repoRoot)) {
754             result = walk.filter(Files::isRegularFile).
755                     filter(p -> !p.toString().contains(".git")).
756                     collect(Collectors.toSet());
757         }
758         Set<Path> expectedFileSet = result.stream().map(f -> {
759                 try {
760                     return Path.of(RuntimeEnvironment.getInstance().getPathRelativeToSourceRoot(f.toFile()));
761                 } catch (IOException | ForbiddenSymlinkException e) {
762                     return null;
763                 }
764             }).collect(Collectors.toSet());
765         assertEquals(expectedFileSet, listener.getRemovedFiles().stream().map(Path::of).collect(Collectors.toSet()));
766         assertEquals(expectedFileSet, listener.getAddedFiles().stream().map(Path::of).collect(Collectors.toSet()));
767     }
768 
769     /**
770      * make sure the initial indexing is made using indexDown() even though history based reindex is possible.
771      */
772     @Test
testInitialReindexWithHistoryBased()773     void testInitialReindexWithHistoryBased() throws Exception {
774         env.setHistoryBasedReindex(true);
775 
776         // Delete the index (and all data in fact).
777         assertFalse(repository.getDataRoot().isEmpty());
778         IOUtils.removeRecursive(Path.of(repository.getDataRoot()));
779         assertFalse(Path.of(repository.getDataRoot()).toFile().exists());
780 
781         // Update the index of the project.
782         Project gitProject = env.getProjects().get("git");
783         assertNotNull(gitProject);
784         IndexDatabase idbOrig = new IndexDatabase(gitProject);
785         assertNotNull(idbOrig);
786         IndexDatabase idb = spy(idbOrig);
787         idb.update();
788 
789         // Check that the index for the git project was created.
790         Document doc = IndexDatabase.getDocument(Path.of(repository.getSourceRoot(), "git", "main.c").toFile());
791         assertNotNull(doc);
792 
793         checkIndexDown(false, idb);
794     }
795 
796     /**
797      * project-less configuration should lead to file-system based reindex.
798      */
799     @Test
testProjectLessReindexVsHistoryBased()800     void testProjectLessReindexVsHistoryBased() throws Exception {
801         env.setProjectsEnabled(false);
802 
803         // Make a change in the git repository.
804         File repositoryRoot = new File(repository.getSourceRoot(), "git");
805         assertTrue(repositoryRoot.isDirectory());
806         changeGitRepository(repositoryRoot);
807 
808         IndexDatabase idbOrig = new IndexDatabase();
809         assertNotNull(idbOrig);
810         IndexDatabase idb = spy(idbOrig);
811         idb.update();
812 
813         checkIndexDown(false, idb);
814     }
815 }
816