xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java (revision 794d0b7601051ece0392a8b2dc98950bb17570e1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>.
23  * Portions Copyright (c) 2019, Krystof Tulinger <k.tulinger@seznam.cz>.
24  */
25 package org.opengrok.indexer.history;
26 
27 import java.io.File;
28 import java.io.IOException;
29 import java.io.OutputStream;
30 import java.nio.charset.StandardCharsets;
31 import java.nio.file.Path;
32 import java.nio.file.Paths;
33 import java.util.Date;
34 import java.util.HashSet;
35 import java.util.List;
36 import java.util.HashMap;
37 import java.util.Map;
38 import java.util.Scanner;
39 import java.util.Set;
40 import java.util.SortedSet;
41 import java.util.TreeSet;
42 import java.util.concurrent.ExecutionException;
43 import java.util.concurrent.ExecutorService;
44 import java.util.concurrent.Executors;
45 import java.util.concurrent.Future;
46 import java.util.concurrent.TimeUnit;
47 import java.util.concurrent.TimeoutException;
48 import java.util.function.Consumer;
49 import java.util.logging.Level;
50 import java.util.logging.Logger;
51 
52 import org.eclipse.jgit.api.BlameCommand;
53 import org.eclipse.jgit.api.Git;
54 import org.eclipse.jgit.api.errors.GitAPIException;
55 import org.eclipse.jgit.blame.BlameResult;
56 import org.eclipse.jgit.diff.DiffEntry;
57 import org.eclipse.jgit.diff.DiffFormatter;
58 import org.eclipse.jgit.diff.RawText;
59 import org.eclipse.jgit.diff.RawTextComparator;
60 import org.eclipse.jgit.lib.Config;
61 import org.eclipse.jgit.lib.Constants;
62 import org.eclipse.jgit.lib.ObjectId;
63 import org.eclipse.jgit.lib.ObjectLoader;
64 import org.eclipse.jgit.lib.ObjectReader;
65 import org.eclipse.jgit.lib.PersonIdent;
66 import org.eclipse.jgit.lib.Ref;
67 import org.eclipse.jgit.lib.Repository;
68 import org.eclipse.jgit.revwalk.FollowFilter;
69 import org.eclipse.jgit.revwalk.RevCommit;
70 import org.eclipse.jgit.revwalk.RevTree;
71 import org.eclipse.jgit.revwalk.RevWalk;
72 import org.eclipse.jgit.storage.file.FileRepositoryBuilder;
73 import org.eclipse.jgit.treewalk.AbstractTreeIterator;
74 import org.eclipse.jgit.treewalk.CanonicalTreeParser;
75 import org.eclipse.jgit.treewalk.TreeWalk;
76 import org.eclipse.jgit.treewalk.filter.AndTreeFilter;
77 import org.eclipse.jgit.treewalk.filter.PathFilter;
78 import org.eclipse.jgit.treewalk.filter.TreeFilter;
79 import org.eclipse.jgit.util.io.CountingOutputStream;
80 import org.eclipse.jgit.util.io.NullOutputStream;
81 import org.jetbrains.annotations.NotNull;
82 import org.jetbrains.annotations.Nullable;
83 import org.opengrok.indexer.configuration.CommandTimeoutType;
84 import org.opengrok.indexer.configuration.RuntimeEnvironment;
85 import org.opengrok.indexer.logger.LoggerFactory;
86 import org.opengrok.indexer.util.ForbiddenSymlinkException;
87 
88 import static org.opengrok.indexer.history.History.TAGS_SEPARATOR;
89 
90 /**
91  * Access to a Git repository.
92  *
93  */
94 public class GitRepository extends RepositoryWithHistoryTraversal {
95 
96     private static final Logger LOGGER = LoggerFactory.getLogger(GitRepository.class);
97 
98     private static final long serialVersionUID = -6126297612958508386L;
99 
100     public static final int GIT_ABBREV_LEN = 8;
101     public static final int MAX_CHANGESETS = 65536;
102 
GitRepository()103     public GitRepository() {
104         type = "git";
105 
106         ignoredDirs.add(".git");
107         ignoredFiles.add(".git");
108     }
109 
110     /**
111      * Be careful, git uses only forward slashes in its command and output (not in file path).
112      * Using backslashes together with git show will get empty output and 0 status code.
113      * @return string with separator characters replaced with forward slash
114      */
getGitFilePath(String filePath)115     private static String getGitFilePath(String filePath) {
116         return filePath.replace(File.separatorChar, '/');
117     }
118 
119     /**
120      * Try to get file contents for given revision.
121      *
122      * @param out a required OutputStream
123      * @param fullpath full pathname of the file
124      * @param rev revision string
125      * @return a defined instance with {@code success} == {@code true} if no
126      * error occurred and with non-zero {@code iterations} if some data was transferred
127      */
getHistoryRev(OutputStream out, String fullpath, String rev)128     private HistoryRevResult getHistoryRev(OutputStream out, String fullpath, String rev) {
129 
130         HistoryRevResult result = new HistoryRevResult();
131         File directory = new File(getDirectoryName());
132 
133         String filename;
134         result.success = false;
135         try {
136             filename = getGitFilePath(Paths.get(getCanonicalDirectoryName()).relativize(Paths.get(fullpath)).toString());
137         } catch (IOException e) {
138             LOGGER.log(Level.WARNING, String.format("Failed to relativize '%s' in for repository '%s'",
139                     fullpath, directory), e);
140             return result;
141         }
142 
143         try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(directory.getAbsolutePath())) {
144             ObjectId commitId = repository.resolve(rev);
145 
146             // A RevWalk allows walking over commits based on some filtering that is defined.
147             try (RevWalk revWalk = new RevWalk(repository)) {
148                 RevCommit commit = revWalk.parseCommit(commitId);
149                 // and using commit's tree find the path
150                 RevTree tree = commit.getTree();
151 
152                 // Now try to find a specific file.
153                 try (TreeWalk treeWalk = new TreeWalk(repository)) {
154                     treeWalk.addTree(tree);
155                     treeWalk.setRecursive(true);
156                     treeWalk.setFilter(PathFilter.create(filename));
157                     if (!treeWalk.next()) {
158                         LOGGER.log(Level.FINEST, "Did not find expected file ''{0}'' in revision {1} " +
159                                 "for repository ''{2}''", new Object[] {filename, rev, directory});
160                         return result;
161                     }
162 
163                     ObjectId objectId = treeWalk.getObjectId(0);
164                     ObjectLoader loader = repository.open(objectId);
165 
166                     CountingOutputStream countingOutputStream = new CountingOutputStream(out);
167                     loader.copyTo(countingOutputStream);
168                     result.iterations = countingOutputStream.getCount();
169                     result.success = true;
170                 }
171 
172                 revWalk.dispose();
173             }
174         } catch (IOException e) {
175             LOGGER.log(Level.WARNING, String.format("Failed to get file '%s' in revision %s for repository '%s'",
176                     filename, rev, directory), e);
177         }
178 
179         return result;
180     }
181 
182     @Override
getHistoryGet(OutputStream out, String parent, String basename, String rev)183     boolean getHistoryGet(OutputStream out, String parent, String basename, String rev) {
184 
185         String fullPath;
186         try {
187             fullPath = new File(parent, basename).getCanonicalPath();
188         } catch (IOException e) {
189             LOGGER.log(Level.WARNING, e, () -> String.format(
190                     "Failed to get canonical path: %s/%s", parent, basename));
191             return false;
192         }
193 
194         HistoryRevResult result = getHistoryRev(out, fullPath, rev);
195         if (!result.success && result.iterations < 1) {
196             /*
197              * If we failed to get the contents it might be that the file was
198              * renamed, so we need to find its original name in that revision
199              * and retry with the original name.
200              */
201             String origPath;
202             try {
203                 origPath = findOriginalName(fullPath, rev);
204             } catch (IOException exp) {
205                 LOGGER.log(Level.SEVERE, exp, () -> String.format(
206                         "Failed to get original revision: %s/%s (revision %s)",
207                         parent, basename, rev));
208                 return false;
209             }
210 
211             if (origPath != null) {
212                 String fullRenamedPath;
213                 try {
214                     fullRenamedPath = Paths.get(getCanonicalDirectoryName(), origPath).toString();
215                 } catch (IOException e) {
216                     LOGGER.log(Level.WARNING, e, () -> String.format(
217                             "Failed to get canonical path: .../%s", origPath));
218                     return false;
219                 }
220                 if (!fullRenamedPath.equals(fullPath)) {
221                     result = getHistoryRev(out, fullRenamedPath, rev);
222                 }
223             }
224         }
225 
226         return result.success;
227     }
228 
getPathRelativeToCanonicalRepositoryRoot(String fullPath)229     private String getPathRelativeToCanonicalRepositoryRoot(String fullPath) throws IOException {
230         String repoPath = getCanonicalDirectoryName() + File.separator;
231         if (fullPath.startsWith(repoPath)) {
232             return fullPath.substring(repoPath.length());
233         }
234         return fullPath;
235     }
236 
237     /**
238      * Get the name of file in given revision. The returned file name is relative to the repository root.
239      * Assumes renamed file hanndling is on.
240      *
241      * @param fullpath full file path
242      * @param changeset revision ID (could be short)
243      * @return original filename relative to the repository root
244      * @throws java.io.IOException if I/O exception occurred
245      * @see #getPathRelativeToCanonicalRepositoryRoot(String)
246      */
findOriginalName(String fullpath, String changeset)247     String findOriginalName(String fullpath, String changeset) throws IOException {
248 
249         if (fullpath == null || fullpath.isEmpty()) {
250             throw new IOException(String.format("Invalid file path string: %s", fullpath));
251         }
252 
253         if (changeset == null || changeset.isEmpty()) {
254             throw new IOException(String.format("Invalid changeset string for path %s: %s",
255                     fullpath, changeset));
256         }
257 
258         String fileInRepo = getGitFilePath(getPathRelativeToCanonicalRepositoryRoot(fullpath));
259 
260         String originalFile = fileInRepo;
261         try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(getDirectoryName());
262              RevWalk walk = new RevWalk(repository)) {
263 
264             walk.markStart(walk.parseCommit(repository.resolve(Constants.HEAD)));
265             walk.markUninteresting(walk.lookupCommit(repository.resolve(changeset)));
266 
267             Config config = repository.getConfig();
268             config.setBoolean("diff", null, "renames", true);
269             org.eclipse.jgit.diff.DiffConfig dc = config.get(org.eclipse.jgit.diff.DiffConfig.KEY);
270             FollowFilter followFilter = FollowFilter.create(getGitFilePath(fileInRepo), dc);
271             walk.setTreeFilter(followFilter);
272 
273             for (RevCommit commit : walk) {
274                 if (commit.getParentCount() > 1 && !isMergeCommitsEnabled()) {
275                     continue;
276                 }
277 
278                 if (commit.getId().getName().startsWith(changeset)) {
279                     break;
280                 }
281 
282                 if (commit.getParentCount() >= 1) {
283                     OutputStream outputStream = NullOutputStream.INSTANCE;
284                     try (DiffFormatter formatter = new DiffFormatter(outputStream)) {
285                         formatter.setRepository(repository);
286                         formatter.setDetectRenames(true);
287 
288                         List<DiffEntry> diffs = formatter.scan(prepareTreeParser(repository, commit.getParent(0)),
289                                 prepareTreeParser(repository, commit));
290 
291                         for (DiffEntry diff : diffs) {
292                             if (diff.getChangeType() == DiffEntry.ChangeType.RENAME &&
293                                     originalFile.equals(diff.getNewPath())) {
294                                 originalFile = diff.getOldPath();
295                             }
296                         }
297                     }
298                 }
299             }
300         }
301 
302         if (originalFile == null) {
303             LOGGER.log(Level.WARNING, "Failed to get original name in revision {0} for: \"{1}\"",
304                     new Object[]{changeset, fullpath});
305             return null;
306         }
307 
308         return getNativePath(originalFile);
309     }
310 
311     /**
312      * Annotate the specified file/revision.
313      *
314      * @param file file to annotate
315      * @param revision revision to annotate
316      * @return file annotation or {@code null}
317      * @throws java.io.IOException if I/O exception occurred
318      */
319     @Override
annotate(File file, String revision)320     public Annotation annotate(File file, String revision) throws IOException {
321         String filePath = getPathRelativeToCanonicalRepositoryRoot(file.getCanonicalPath());
322 
323         if (revision == null) {
324             revision = getFirstRevision(filePath);
325         }
326         String fileName = Path.of(filePath).getFileName().toString();
327         Annotation annotation = getAnnotation(revision, filePath, fileName);
328 
329         if (annotation.getRevisions().isEmpty() && isHandleRenamedFiles()) {
330             // The file might have changed its location if it was renamed.
331             // Try looking up its original name and get the annotation again.
332             String origName = findOriginalName(file.getCanonicalPath(), revision);
333             if (origName != null) {
334                 annotation = getAnnotation(revision, origName, fileName);
335             }
336         }
337 
338         return annotation;
339     }
340 
getFirstRevision(String filePath)341     private String getFirstRevision(String filePath) {
342         String revision = null;
343         try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(getDirectoryName())) {
344             Iterable<RevCommit> commits = new Git(repository).log().
345                     addPath(getGitFilePath(filePath)).
346                     setMaxCount(1).
347                     call();
348             RevCommit commit = commits.iterator().next();
349             if (commit != null) {
350                 revision = commit.getId().getName();
351             } else {
352                 LOGGER.log(Level.WARNING, "cannot get first revision of ''{0}'' in repository ''{1}''",
353                         new Object[] {filePath, getDirectoryName()});
354             }
355         } catch (IOException | GitAPIException e) {
356             LOGGER.log(Level.WARNING,
357                     String.format("cannot get first revision of '%s' in repository '%s'",
358                             filePath, getDirectoryName()), e);
359         }
360         return revision;
361     }
362 
363     @NotNull
getAnnotation(String revision, String filePath, String fileName)364     private Annotation getAnnotation(String revision, String filePath, String fileName) throws IOException {
365         Annotation annotation = new Annotation(fileName);
366 
367         try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(getDirectoryName())) {
368             BlameCommand blameCommand = new Git(repository).blame().setFilePath(getGitFilePath(filePath));
369             ObjectId commitId = repository.resolve(revision);
370             blameCommand.setStartCommit(commitId);
371             blameCommand.setFollowFileRenames(isHandleRenamedFiles());
372             final BlameResult result = blameCommand.setTextComparator(RawTextComparator.WS_IGNORE_ALL).call();
373             if (result != null) {
374                 final RawText rawText = result.getResultContents();
375                 for (int i = 0; i < rawText.size(); i++) {
376                     final PersonIdent sourceAuthor = result.getSourceAuthor(i);
377                     final RevCommit sourceCommit = result.getSourceCommit(i);
378                     annotation.addLine(sourceCommit.getId().abbreviate(GIT_ABBREV_LEN).
379                             name(), sourceAuthor.getName(), true);
380                 }
381             }
382         } catch (GitAPIException e) {
383             LOGGER.log(Level.FINER,
384                     String.format("failed to get annotation for file '%s' in repository '%s' in revision '%s'",
385                             filePath, getDirectoryName(), revision));
386         }
387         return annotation;
388     }
389 
390     @Override
fileHasAnnotation(File file)391     public boolean fileHasAnnotation(File file) {
392         return true;
393     }
394 
395     @Override
fileHasHistory(File file)396     public boolean fileHasHistory(File file) {
397         return true;
398     }
399 
400     @Override
isRepositoryFor(File file, CommandTimeoutType cmdType)401     boolean isRepositoryFor(File file, CommandTimeoutType cmdType) {
402         if (file.isDirectory()) {
403             File f = new File(file, Constants.DOT_GIT);
404             // No check for directory or file as submodules contain '.git' file.
405             return f.exists();
406         }
407         return false;
408     }
409 
410     @Override
supportsSubRepositories()411     boolean supportsSubRepositories() {
412         return true;
413     }
414 
415     /**
416      * Gets a value indicating the instance is nestable.
417      * @return {@code true}
418      */
419     @Override
isNestable()420     boolean isNestable() {
421         return true;
422     }
423 
424     @Override
isWorking()425     public boolean isWorking() {
426         // TODO: check isBare() in JGit ?
427         return true;
428     }
429 
430     @Override
hasHistoryForDirectories()431     boolean hasHistoryForDirectories() {
432         return true;
433     }
434 
435     @Override
getHistory(File file)436     History getHistory(File file) throws HistoryException {
437         return getHistory(file, null);
438     }
439 
440     @Override
getHistory(File file, String sinceRevision)441     History getHistory(File file, String sinceRevision) throws HistoryException {
442         return getHistory(file, sinceRevision, null);
443     }
444 
getPerPartesCount()445     public int getPerPartesCount() {
446         return MAX_CHANGESETS;
447     }
448 
accept(String sinceRevision, Consumer<String> visitor)449     public void accept(String sinceRevision, Consumer<String> visitor) throws HistoryException {
450         try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(getDirectoryName());
451              RevWalk walk = new RevWalk(repository)) {
452 
453             if (sinceRevision != null) {
454                 walk.markUninteresting(walk.lookupCommit(repository.resolve(sinceRevision)));
455             }
456             walk.markStart(walk.parseCommit(repository.resolve(Constants.HEAD)));
457 
458             for (RevCommit commit : walk) {
459                 // Do not abbreviate the Id as this could cause AmbiguousObjectException in getHistory().
460                 visitor.accept(commit.getId().name());
461             }
462         } catch (IOException e) {
463             throw new HistoryException(e);
464         }
465     }
466 
467     @Nullable
468     @Override
getLastHistoryEntry(File file, boolean ui)469     public HistoryEntry getLastHistoryEntry(File file, boolean ui) throws HistoryException {
470         History hist = getHistory(file, null, null, 1);
471         return hist.getLastHistoryEntry();
472     }
473 
getHistory(File file, String sinceRevision, String tillRevision)474     public History getHistory(File file, String sinceRevision, String tillRevision) throws HistoryException {
475         return getHistory(file, sinceRevision, tillRevision, null);
476     }
477 
traverseHistory(File file, String sinceRevision, String tillRevision, Integer numCommits, List<ChangesetVisitor> visitors)478     public void traverseHistory(File file, String sinceRevision, String tillRevision,
479                               Integer numCommits, List<ChangesetVisitor> visitors) throws HistoryException {
480 
481         if (numCommits != null && numCommits <= 0) {
482             throw new HistoryException("invalid number of commits to retrieve");
483         }
484 
485         boolean isDirectory = file.isDirectory();
486 
487         try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(getDirectoryName());
488              RevWalk walk = new RevWalk(repository)) {
489 
490             setupWalk(file, sinceRevision, tillRevision, repository, walk);
491 
492             int num = 0;
493             for (RevCommit commit : walk) {
494                 CommitInfo commitInfo = new CommitInfo(commit.getId().abbreviate(GIT_ABBREV_LEN).name(),
495                         commit.getAuthorIdent().getWhen(), commit.getAuthorIdent().getName(),
496                         commit.getAuthorIdent().getEmailAddress(), commit.getFullMessage());
497 
498                 for (ChangesetVisitor visitor : visitors) {
499                     // Even though the repository itself is set (not) to consume the merge changesets,
500                     // it should be up to the visitor to have the say. This is because of the history based reindex.
501                     if (commit.getParentCount() > 1 && !visitor.consumeMergeChangesets) {
502                         continue;
503                     }
504 
505                     if (isDirectory) {
506                         SortedSet<String> files = new TreeSet<>();
507                         final Set<String> renamedFiles = new HashSet<>();
508                         final Set<String> deletedFiles = new HashSet<>();
509                         getFilesForCommit(renamedFiles, files, deletedFiles, commit, repository);
510                         visitor.accept(new ChangesetInfo(commitInfo, files, renamedFiles, deletedFiles));
511                     } else {
512                         visitor.accept(new ChangesetInfo(commitInfo));
513                     }
514                 }
515 
516                 if (numCommits != null && ++num >= numCommits) {
517                     break;
518                 }
519             }
520         } catch (IOException | ForbiddenSymlinkException e) {
521             throw new HistoryException(String.format("failed to get history for ''%s''", file), e);
522         }
523     }
524 
setupWalk(File file, String sinceRevision, String tillRevision, Repository repository, RevWalk walk)525     private void setupWalk(File file, String sinceRevision, String tillRevision, Repository repository, RevWalk walk)
526             throws IOException, ForbiddenSymlinkException {
527 
528         if (sinceRevision != null) {
529             walk.markUninteresting(walk.lookupCommit(repository.resolve(sinceRevision)));
530         }
531 
532         if (tillRevision != null) {
533             walk.markStart(walk.lookupCommit(repository.resolve(tillRevision)));
534         } else {
535             walk.markStart(walk.parseCommit(repository.resolve(Constants.HEAD)));
536         }
537 
538         String relativePath = RuntimeEnvironment.getInstance().getPathRelativeToSourceRoot(file);
539         if (!getDirectoryNameRelative().equals(relativePath)) {
540             if (isHandleRenamedFiles()) {
541                 Config config = repository.getConfig();
542                 config.setBoolean("diff", null, "renames", true);
543                 org.eclipse.jgit.diff.DiffConfig dc = config.get(org.eclipse.jgit.diff.DiffConfig.KEY);
544                 FollowFilter followFilter = FollowFilter.create(getGitFilePath(getRepoRelativePath(file)), dc);
545                 walk.setTreeFilter(followFilter);
546             } else {
547                 walk.setTreeFilter(AndTreeFilter.create(
548                         PathFilter.create(getGitFilePath(getRepoRelativePath(file))),
549                         TreeFilter.ANY_DIFF));
550             }
551         }
552     }
553 
554     /**
555      * Accumulate list of changed/deleted/renamed files for given commit.
556      * @param renamedFiles output: renamed files in this commit (if renamed file handling is enabled)
557      * @param changedFiles output: changed files in this commit
558      * @param deletedFiles output: deleted files in this commit
559      * @param commit RevCommit object
560      * @param repository repository object
561      * @throws IOException on error traversing the commit tree
562      */
getFilesForCommit(Set<String> renamedFiles, SortedSet<String> changedFiles, Set<String> deletedFiles, RevCommit commit, Repository repository)563     private void getFilesForCommit(Set<String> renamedFiles, SortedSet<String> changedFiles, Set<String> deletedFiles,
564                                    RevCommit commit,
565                                    Repository repository) throws IOException {
566 
567         if (commit.getParentCount() == 0) { // first commit - add all files
568             try (TreeWalk treeWalk = new TreeWalk(repository)) {
569                 treeWalk.addTree(commit.getTree());
570                 treeWalk.setRecursive(true);
571 
572                 while (treeWalk.next()) {
573                     changedFiles.add(getNativePath(getDirectoryNameRelative()) + File.separator +
574                             getNativePath(treeWalk.getPathString()));
575                 }
576             }
577         } else {
578             getFilesBetweenCommits(repository, commit.getParent(0), commit, changedFiles, renamedFiles, deletedFiles);
579         }
580     }
581 
getNativePath(String path)582     private static String getNativePath(String path) {
583         if (!File.separator.equals("/")) {
584             return path.replace("/", File.separator);
585         }
586 
587         return path;
588     }
589 
590     /**
591      * Assemble list of changed/deleted/renamed files between a commit and its parent.
592      * @param repository repository object
593      * @param oldCommit parent commit
594      * @param newCommit new commit (the method assumes oldCommit is its parent)
595      * @param changedFiles output: set of changedFiles that changed (excludes renamed changedFiles)
596      * @param renamedFiles output: set of renamed files (if renamed handling is enabled)
597      * @param deletedFiles output: set of deleted files
598      * @throws IOException on I/O problem
599      */
getFilesBetweenCommits(org.eclipse.jgit.lib.Repository repository, RevCommit oldCommit, RevCommit newCommit, Set<String> changedFiles, Set<String> renamedFiles, Set<String> deletedFiles)600     private void getFilesBetweenCommits(org.eclipse.jgit.lib.Repository repository,
601                                         RevCommit oldCommit, RevCommit newCommit,
602                                         Set<String> changedFiles, Set<String> renamedFiles, Set<String> deletedFiles)
603             throws IOException {
604 
605         OutputStream outputStream = NullOutputStream.INSTANCE;
606         try (DiffFormatter formatter = new DiffFormatter(outputStream)) {
607             formatter.setRepository(repository);
608             if (isHandleRenamedFiles()) {
609                 formatter.setDetectRenames(true);
610             }
611 
612             List<DiffEntry> diffs = formatter.scan(prepareTreeParser(repository, oldCommit),
613                     prepareTreeParser(repository, newCommit));
614 
615             for (DiffEntry diff : diffs) {
616                 String newPath = getNativePath(getDirectoryNameRelative()) + File.separator +
617                         getNativePath(diff.getNewPath());
618 
619                 handleDiff(changedFiles, renamedFiles, deletedFiles, diff, newPath);
620             }
621         }
622     }
623 
handleDiff(Set<String> changedFiles, Set<String> renamedFiles, Set<String> deletedFiles, DiffEntry diff, String newPath)624     private void handleDiff(Set<String> changedFiles, Set<String> renamedFiles, Set<String> deletedFiles,
625                             DiffEntry diff, String newPath) {
626 
627         switch (diff.getChangeType()) {
628             case DELETE:
629                 if (deletedFiles != null) {
630                     // newPath would be "/dev/null"
631                     String oldPath = getNativePath(getDirectoryNameRelative()) + File.separator +
632                             getNativePath(diff.getOldPath());
633                     deletedFiles.add(oldPath);
634                 }
635                 break;
636             case RENAME:
637                 if (isHandleRenamedFiles()) {
638                     renamedFiles.add(newPath);
639                     if (deletedFiles != null) {
640                         String oldPath = getNativePath(getDirectoryNameRelative()) + File.separator +
641                                 getNativePath(diff.getOldPath());
642                         deletedFiles.add(oldPath);
643                     }
644                 }
645                 break;
646             default:
647                 if (changedFiles != null) {
648                     // Added files (ChangeType.ADD) are treated as changed.
649                     changedFiles.add(newPath);
650                 }
651                 break;
652         }
653     }
654 
prepareTreeParser(org.eclipse.jgit.lib.Repository repository, RevCommit commit)655     private static AbstractTreeIterator prepareTreeParser(org.eclipse.jgit.lib.Repository repository,
656                                                           RevCommit commit) throws IOException {
657         // from the commit we can build the tree which allows us to construct the TreeParser
658         try (RevWalk walk = new RevWalk(repository)) {
659             RevTree tree = walk.parseTree(commit.getTree().getId());
660 
661             CanonicalTreeParser treeParser = new CanonicalTreeParser();
662             try (ObjectReader reader = repository.newObjectReader()) {
663                 treeParser.reset(reader, tree.getId());
664             }
665 
666             walk.dispose();
667 
668             return treeParser;
669         }
670     }
671 
672     @Override
hasFileBasedTags()673     boolean hasFileBasedTags() {
674         return true;
675     }
676 
677     /**
678      * @param dotGit {@code .git} file
679      * @return value of the {@code gitdir} property from the file
680      */
getGitDirValue(File dotGit)681     private String getGitDirValue(File dotGit) {
682         try (Scanner scanner = new Scanner(dotGit, StandardCharsets.UTF_8)) {
683             while (scanner.hasNextLine()) {
684                 String line = scanner.nextLine();
685                 if (line.startsWith(Constants.GITDIR)) {
686                     return line.substring(Constants.GITDIR.length());
687                 }
688             }
689         } catch (IOException e) {
690             LOGGER.log(Level.WARNING, "failed to scan the contents of file ''{0}''", dotGit);
691         }
692 
693         return null;
694     }
695 
getJGitRepository(String directory)696     private org.eclipse.jgit.lib.Repository getJGitRepository(String directory) throws IOException {
697         File dotGitFile = Paths.get(directory, Constants.DOT_GIT).toFile();
698         if (dotGitFile.isDirectory()) {
699             return FileRepositoryBuilder.create(dotGitFile);
700         }
701 
702         // Assume this is a sub-module so dotGitFile is a file.
703         String gitDirValue = getGitDirValue(dotGitFile);
704         if (gitDirValue == null) {
705             throw new IOException("cannot get gitDir value from " + dotGitFile);
706         }
707 
708         // If the gitDir value is relative path, make it absolute.
709         // This is necessary for the JGit Repository construction.
710         File gitDirFile = new File(gitDirValue);
711         if (!gitDirFile.isAbsolute()) {
712             gitDirFile = new File(directory, gitDirValue);
713         }
714 
715         return new FileRepositoryBuilder().setWorkTree(new File(directory)).setGitDir(gitDirFile).build();
716     }
717 
rebuildTagList(File directory)718     private void rebuildTagList(File directory) {
719         this.tagList = new TreeSet<>();
720         try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(directory.getAbsolutePath())) {
721             try (Git git = new Git(repository)) {
722                 List<Ref> refList = git.tagList().call(); // refs sorted according to tag names
723                 Map<RevCommit, String> commit2Tags = new HashMap<>();
724                 for (Ref ref : refList) {
725                     try {
726                         RevCommit commit = getCommit(repository, ref);
727                         String tagName = ref.getName().replace("refs/tags/", "");
728                         commit2Tags.merge(commit, tagName, (oldValue, newValue) -> oldValue + TAGS_SEPARATOR + newValue);
729                     } catch (IOException e) {
730                         LOGGER.log(Level.FINEST,
731                                 String.format("cannot get tags for \"%s\"", directory.getAbsolutePath()), e);
732                     }
733                 }
734 
735                 for (Map.Entry<RevCommit, String> entry : commit2Tags.entrySet()) {
736                     int commitTime = entry.getKey().getCommitTime();
737                     Date date = new Date((long) (commitTime) * 1000);
738                     GitTagEntry tagEntry = new GitTagEntry(entry.getKey().getName(),
739                             date, entry.getValue());
740                     this.tagList.add(tagEntry);
741                 }
742             }
743         } catch (IOException | GitAPIException e) {
744             LOGGER.log(Level.WARNING, String.format("cannot get tags for \"%s\"", directory.getAbsolutePath()), e);
745             // In case of partial success, do not null-out tagList here.
746         }
747 
748         if (LOGGER.isLoggable(Level.FINER)) {
749             LOGGER.log(Level.FINER, "Read tags count={0} for {1}",
750                     new Object[] {tagList.size(), directory});
751         }
752     }
753 
754     /**
755      * Builds a Git tag list by querying Git commit hash, commit time, and tag
756      * names.
757      * <p>Repository technically relies on the tag list to be ancestor ordered.
758      * <p>For a version control system that uses "linear revision numbering"
759      * (e.g. Subversion or Mercurial), the natural ordering in the
760      * {@link TreeSet} is by ancestor order and so
761      * {@link TagEntry#compareTo(HistoryEntry)} always determines the correct
762      * tag.
763      * <p>For {@link GitTagEntry} that does not use linear revision numbering,
764      * the {@link TreeSet} will be ordered by date. That does not necessarily
765      * align with ancestor order. In that case,
766      * {@link GitTagEntry#compareTo(HistoryEntry)} that compares by date can
767      * find the wrong tag.
768      * <p>Linus Torvalds: [When asking] "'can commit X be an ancestor of commit
769      * Y' (as a way to basically limit certain algorithms from having to walk
770      * all the way down). We've used commit dates for it, and realistically it
771      * really has worked very well. But it was always a broken heuristic."
772      * <p>"I think the lack of [generation numbers] is literally the only real
773      * design mistake we have [in Git]."
774      * <p>"We discussed adding generation numbers about 6 years ago [in 2005].
775      * We clearly *should* have done it. Instead, we went with the hacky `let's
776      * use commit time', that everybody really knew was technically wrong, and
777      * was a hack, but avoided the need."
778      * <p>If Git ever gets standard generation numbers,
779      * {@link GitTagEntry#compareTo(HistoryEntry)} should be revised to work
780      * reliably in all cases akin to a version control system that uses "linear
781      * revision numbering."
782      * @param directory a defined directory of the repository
783      * @param cmdType command timeout type
784      */
785     @Override
buildTagList(File directory, CommandTimeoutType cmdType)786     protected void buildTagList(File directory, CommandTimeoutType cmdType) {
787         final ExecutorService executor = Executors.newSingleThreadExecutor();
788         final Future<?> future = executor.submit(() -> rebuildTagList(directory));
789         executor.shutdown();
790 
791         try {
792             future.get(RuntimeEnvironment.getInstance().getCommandTimeout(cmdType), TimeUnit.SECONDS);
793         } catch (InterruptedException | ExecutionException e) {
794             LOGGER.log(Level.WARNING, "failed tag rebuild for directory " + directory, e);
795         } catch (TimeoutException e) {
796             LOGGER.log(Level.WARNING, "timed out tag rebuild for directory " + directory, e);
797         }
798 
799         if (!executor.isTerminated()) {
800             executor.shutdownNow();
801         }
802     }
803 
804     @NotNull
getCommit(org.eclipse.jgit.lib.Repository repository, Ref ref)805     private RevCommit getCommit(org.eclipse.jgit.lib.Repository repository, Ref ref) throws IOException {
806         try (RevWalk walk = new RevWalk(repository)) {
807             return walk.parseCommit(ref.getObjectId());
808         }
809     }
810 
811     @Override
812     @Nullable
determineParent(CommandTimeoutType cmdType)813     String determineParent(CommandTimeoutType cmdType) throws IOException {
814         try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(getDirectoryName())) {
815             if (repository.getConfig() != null) {
816                 return repository.getConfig().getString("remote", Constants.DEFAULT_REMOTE_NAME, "url");
817             } else {
818                 return null;
819             }
820         }
821     }
822 
823     @Override
determineBranch(CommandTimeoutType cmdType)824     String determineBranch(CommandTimeoutType cmdType) throws IOException {
825         try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(getDirectoryName())) {
826             return repository.getBranch();
827         }
828     }
829 
830     @Override
determineCurrentVersion(CommandTimeoutType cmdType)831     public String determineCurrentVersion(CommandTimeoutType cmdType) throws IOException {
832         try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(getDirectoryName())) {
833             Ref head = repository.exactRef(Constants.HEAD);
834             if (head != null && head.getObjectId() != null) {
835                 try (RevWalk walk = new RevWalk(repository); ObjectReader reader = repository.newObjectReader()) {
836                     RevCommit commit = walk.parseCommit(head.getObjectId());
837                     int commitTime = commit.getCommitTime();
838                     Date date = new Date((long) (commitTime) * 1000);
839                     return String.format("%s %s %s %s",
840                             format(date),
841                             reader.abbreviate(head.getObjectId()).name(),
842                             commit.getAuthorIdent().getName(),
843                             commit.getShortMessage());
844                 }
845             }
846         }
847 
848         return null;
849     }
850 }
851