xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/history/MercurialHistoryParser.java (revision 794d0b7601051ece0392a8b2dc98950bb17570e1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2006, 2022, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>.
23  */
24 package org.opengrok.indexer.history;
25 
26 import java.io.BufferedReader;
27 import java.io.File;
28 import java.io.FileNotFoundException;
29 import java.io.IOException;
30 import java.io.InputStream;
31 import java.io.InputStreamReader;
32 import java.nio.file.InvalidPathException;
33 import java.text.ParseException;
34 import java.util.ArrayList;
35 import java.util.Date;
36 import java.util.Iterator;
37 import java.util.List;
38 import java.util.logging.Level;
39 import java.util.logging.Logger;
40 import org.opengrok.indexer.configuration.RuntimeEnvironment;
41 import org.opengrok.indexer.logger.LoggerFactory;
42 import org.opengrok.indexer.util.Executor;
43 import org.opengrok.indexer.util.ForbiddenSymlinkException;
44 
45 /**
46  * Parse a stream of mercurial log comments.
47  */
48 class MercurialHistoryParser implements Executor.StreamHandler {
49 
50     private static final Logger LOGGER = LoggerFactory.getLogger(MercurialHistoryParser.class);
51 
52     /** Prefix which identifies lines with the description of a commit. */
53     private static final String DESC_PREFIX = "description: ";
54 
55     private List<RepositoryWithHistoryTraversal.ChangesetInfo> entries = new ArrayList<>();
56     private final MercurialRepository repository;
57     private final String mydir;
58     private boolean isDir;
59     private final List<ChangesetVisitor> visitors;
60 
MercurialHistoryParser(MercurialRepository repository, List<ChangesetVisitor> visitors)61     MercurialHistoryParser(MercurialRepository repository, List<ChangesetVisitor> visitors) {
62         this.repository = repository;
63         this.visitors = visitors;
64         mydir = repository.getDirectoryName() + File.separator;
65     }
66 
67     /**
68      * Parse the history for the specified file or directory. If a changeset is
69      * specified, only return the history from the changeset right after the specified one.
70      *
71      * @param file the file or directory to get history for
72      * @param sinceRevision the changeset right before the first one to fetch, or
73      * {@code null} if all changesets should be fetched
74      * @param tillRevision end revision or {@code null}
75      * @param numCommits number of revisions to get
76      * @throws HistoryException if an error happens when parsing the history
77      */
parse(File file, String sinceRevision, String tillRevision, Integer numCommits)78     void parse(File file, String sinceRevision, String tillRevision, Integer numCommits) throws HistoryException {
79         isDir = file.isDirectory();
80         try {
81             Executor executor = repository.getHistoryLogExecutor(file, sinceRevision, tillRevision, false,
82                     numCommits);
83             int status = executor.exec(true, this);
84 
85             if (status != 0) {
86                 throw new HistoryException("Failed to get history for: \"" + file.getAbsolutePath() +
87                                            "\" Exit code: " + status);
88             }
89         } catch (IOException e) {
90             throw new HistoryException("Failed to get history for: \"" + file.getAbsolutePath() + "\"", e);
91         }
92 
93         // If a changeset to start from is specified, remove that changeset from the list,
94         // since only the ones following it should be returned.
95         // Also check that the specified changeset was found, otherwise throw an exception.
96         if (sinceRevision != null) {
97             removeAndVerifyOldestChangeset(entries, sinceRevision);
98         }
99 
100         // See getHistoryLogExecutor() for explanation.
101         if (repository.isHandleRenamedFiles() && file.isFile() && tillRevision != null) {
102             removeChangesets(entries, tillRevision);
103         }
104 
105         // The visitors are fed with the ChangesetInfo instances here (as opposed to in parse()),
106         // because of the above manipulations with the entries.
107         for (RepositoryWithHistoryTraversal.ChangesetInfo info : entries) {
108             for (ChangesetVisitor visitor : visitors) {
109                 visitor.accept(info);
110             }
111         }
112     }
113 
114     /**
115      * Remove the oldest changeset from a list (assuming sorted with most recent
116      * changeset first) and verify that it is the changeset expected to find there.
117      *
118      * @param entries a list of {@code HistoryEntry} objects
119      * @param revision the revision we expect the oldest entry to have
120      * @throws HistoryException if the oldest entry was not the one we expected
121      */
removeAndVerifyOldestChangeset(List<RepositoryWithHistoryTraversal.ChangesetInfo> entries, String revision)122     private void removeAndVerifyOldestChangeset(List<RepositoryWithHistoryTraversal.ChangesetInfo> entries, String revision)
123             throws HistoryException {
124 
125         RepositoryWithHistoryTraversal.ChangesetInfo entry = entries.isEmpty() ? null : entries.remove(entries.size() - 1);
126 
127         // TODO We should check more thoroughly that the changeset is the one
128         // we expected it to be, since some SCMs may change the revision
129         // numbers so that identical revision numbers does not always mean
130         // identical changesets. We could for example get the cached changeset
131         // and compare more fields, like author and date.
132         if (entry == null || !revision.equals(entry.commit.revision)) {
133             throw new HistoryException("Cached revision '" + revision
134                     + "' not found in the repository "
135                     + repository.getDirectoryName());
136         }
137     }
138 
removeChangesets(List<RepositoryWithHistoryTraversal.ChangesetInfo> entries, String tillRevision)139     private void removeChangesets(List<RepositoryWithHistoryTraversal.ChangesetInfo> entries, String tillRevision) {
140         for (Iterator<RepositoryWithHistoryTraversal.ChangesetInfo> iter = entries.listIterator(); iter.hasNext(); ) {
141             RepositoryWithHistoryTraversal.ChangesetInfo entry = iter.next();
142             if (entry.commit.revision.equals(tillRevision)) {
143                 break;
144             }
145             iter.remove();
146         }
147     }
148 
149     /**
150      * Process the output from the {@code hg log} command and collect
151      * {@link org.opengrok.indexer.history.RepositoryWithHistoryTraversal.ChangesetInfo} elements.
152      *
153      * @param input The output from the process
154      * @throws java.io.IOException If an error occurs while reading the stream
155      */
156     @Override
processStream(InputStream input)157     public void processStream(InputStream input) throws IOException {
158         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
159         BufferedReader in = new BufferedReader(new InputStreamReader(input));
160         entries = new ArrayList<>();
161         String s;
162         RepositoryWithHistoryTraversal.ChangesetInfo entry = null;
163         while ((s = in.readLine()) != null) {
164             if (s.startsWith(MercurialRepository.CHANGESET)) {
165                 entry = new RepositoryWithHistoryTraversal.ChangesetInfo(new RepositoryWithHistoryTraversal.CommitInfo());
166                 entries.add(entry);
167                 entry.commit.revision = s.substring(MercurialRepository.CHANGESET.length()).trim();
168             } else if (s.startsWith(MercurialRepository.USER) && entry != null) {
169                 entry.commit.authorName = s.substring(MercurialRepository.USER.length()).trim();
170             } else if (s.startsWith(MercurialRepository.DATE) && entry != null) {
171                 Date date;
172                 try {
173                     date = repository.parse(s.substring(MercurialRepository.DATE.length()).trim());
174                 } catch (ParseException pe) {
175                     //
176                     // Overriding processStream() thus need to comply with the
177                     // set of exceptions it can throw.
178                     //
179                     throw new IOException("Could not parse date: " + s, pe);
180                 }
181                 entry.commit.date = date;
182             } else if (s.startsWith(MercurialRepository.FILES) && entry != null) {
183                 String[] strings = s.split(" ");
184                 for (int ii = 1; ii < strings.length; ++ii) {
185                     if (strings[ii].length() > 0) {
186                         File f = new File(mydir, strings[ii]);
187                         try {
188                             String path = env.getPathRelativeToSourceRoot(f);
189                             entry.files.add(path.intern());
190                         } catch (ForbiddenSymlinkException e) {
191                             LOGGER.log(Level.FINER, e.getMessage());
192                             // ignore
193                         } catch (FileNotFoundException e) { // NOPMD
194                             // If the file is not located under the source root,
195                             // ignore it (bug #11664).
196                         } catch (InvalidPathException e) {
197                             LOGGER.log(Level.WARNING, e.getMessage());
198                         }
199                     }
200                 }
201             } else if (repository.isHandleRenamedFiles() && s.startsWith(MercurialRepository.FILE_COPIES) &&
202                 entry != null && isDir) {
203                 /*
204                  * 'file_copies:' should be present only for directories but
205                  * we use isDir to be on the safe side.
206                  */
207                 s = s.replaceFirst(MercurialRepository.FILE_COPIES, "");
208                 String[] splitArray = s.split("\\)");
209                 for (String part: splitArray) {
210                      /*
211                       * This will fail for file names containing ' ('.
212                       */
213                      String[] move = part.split(" \\(");
214                      File f = new File(mydir + move[0]);
215                      if (!move[0].isEmpty() && f.exists()) {
216                              entry.renamedFiles.add(repository.getDirectoryNameRelative() + File.separator + move[0]);
217                      }
218                 }
219             } else if (s.startsWith(DESC_PREFIX) && entry != null) {
220                 entry.commit.message = decodeDescription(s);
221             } else if (s.equals(MercurialRepository.END_OF_ENTRY)
222                 && entry != null) {
223                     entry = null;
224             } else if (s.length() > 0) {
225                 LOGGER.log(Level.WARNING,
226                     "Invalid/unexpected output {0} from hg log for repo {1}",
227                     new Object[]{s, repository.getDirectoryName()});
228             }
229         }
230     }
231 
232     /**
233      * Decode a line with a description of a commit. The line is a sequence of
234      * XML character entities that need to be converted to single characters.
235      * This is to prevent problems if the log message contains one of the
236      * prefixes that {@link #processStream(InputStream)} is looking for (bug
237      * #405).
238      *
239      * This method is way too tolerant, and won't complain if the line has
240      * a different format than expected. It will return weird results, though.
241      *
242      * @param line the XML encoded line
243      * @return the decoded description
244      */
decodeDescription(String line)245     private String decodeDescription(String line) {
246         StringBuilder out = new StringBuilder();
247         int value = 0;
248 
249         // fetch the char values from the &#ddd; sequences
250         for (int i = DESC_PREFIX.length(); i < line.length(); i++) {
251             char ch = line.charAt(i);
252             if (Character.isDigit(ch)) {
253                 value = value * 10 + Character.getNumericValue(ch);
254             } else if (ch == ';') {
255                 out.append((char) value);
256                 value = 0;
257             }
258         }
259 
260         assert value == 0 : "description did not end with a semi-colon";
261 
262         return out.toString();
263     }
264 }
265