1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2006, 2022, Oracle and/or its affiliates. All rights reserved. 22 * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>. 23 */ 24 package org.opengrok.indexer.history; 25 26 import java.io.BufferedReader; 27 import java.io.File; 28 import java.io.FileNotFoundException; 29 import java.io.IOException; 30 import java.io.InputStream; 31 import java.io.InputStreamReader; 32 import java.nio.file.InvalidPathException; 33 import java.text.ParseException; 34 import java.util.ArrayList; 35 import java.util.Date; 36 import java.util.Iterator; 37 import java.util.List; 38 import java.util.logging.Level; 39 import java.util.logging.Logger; 40 import org.opengrok.indexer.configuration.RuntimeEnvironment; 41 import org.opengrok.indexer.logger.LoggerFactory; 42 import org.opengrok.indexer.util.Executor; 43 import org.opengrok.indexer.util.ForbiddenSymlinkException; 44 45 /** 46 * Parse a stream of mercurial log comments. 47 */ 48 class MercurialHistoryParser implements Executor.StreamHandler { 49 50 private static final Logger LOGGER = LoggerFactory.getLogger(MercurialHistoryParser.class); 51 52 /** Prefix which identifies lines with the description of a commit. */ 53 private static final String DESC_PREFIX = "description: "; 54 55 private List<RepositoryWithHistoryTraversal.ChangesetInfo> entries = new ArrayList<>(); 56 private final MercurialRepository repository; 57 private final String mydir; 58 private boolean isDir; 59 private final List<ChangesetVisitor> visitors; 60 MercurialHistoryParser(MercurialRepository repository, List<ChangesetVisitor> visitors)61 MercurialHistoryParser(MercurialRepository repository, List<ChangesetVisitor> visitors) { 62 this.repository = repository; 63 this.visitors = visitors; 64 mydir = repository.getDirectoryName() + File.separator; 65 } 66 67 /** 68 * Parse the history for the specified file or directory. If a changeset is 69 * specified, only return the history from the changeset right after the specified one. 70 * 71 * @param file the file or directory to get history for 72 * @param sinceRevision the changeset right before the first one to fetch, or 73 * {@code null} if all changesets should be fetched 74 * @param tillRevision end revision or {@code null} 75 * @param numCommits number of revisions to get 76 * @throws HistoryException if an error happens when parsing the history 77 */ parse(File file, String sinceRevision, String tillRevision, Integer numCommits)78 void parse(File file, String sinceRevision, String tillRevision, Integer numCommits) throws HistoryException { 79 isDir = file.isDirectory(); 80 try { 81 Executor executor = repository.getHistoryLogExecutor(file, sinceRevision, tillRevision, false, 82 numCommits); 83 int status = executor.exec(true, this); 84 85 if (status != 0) { 86 throw new HistoryException("Failed to get history for: \"" + file.getAbsolutePath() + 87 "\" Exit code: " + status); 88 } 89 } catch (IOException e) { 90 throw new HistoryException("Failed to get history for: \"" + file.getAbsolutePath() + "\"", e); 91 } 92 93 // If a changeset to start from is specified, remove that changeset from the list, 94 // since only the ones following it should be returned. 95 // Also check that the specified changeset was found, otherwise throw an exception. 96 if (sinceRevision != null) { 97 removeAndVerifyOldestChangeset(entries, sinceRevision); 98 } 99 100 // See getHistoryLogExecutor() for explanation. 101 if (repository.isHandleRenamedFiles() && file.isFile() && tillRevision != null) { 102 removeChangesets(entries, tillRevision); 103 } 104 105 // The visitors are fed with the ChangesetInfo instances here (as opposed to in parse()), 106 // because of the above manipulations with the entries. 107 for (RepositoryWithHistoryTraversal.ChangesetInfo info : entries) { 108 for (ChangesetVisitor visitor : visitors) { 109 visitor.accept(info); 110 } 111 } 112 } 113 114 /** 115 * Remove the oldest changeset from a list (assuming sorted with most recent 116 * changeset first) and verify that it is the changeset expected to find there. 117 * 118 * @param entries a list of {@code HistoryEntry} objects 119 * @param revision the revision we expect the oldest entry to have 120 * @throws HistoryException if the oldest entry was not the one we expected 121 */ removeAndVerifyOldestChangeset(List<RepositoryWithHistoryTraversal.ChangesetInfo> entries, String revision)122 private void removeAndVerifyOldestChangeset(List<RepositoryWithHistoryTraversal.ChangesetInfo> entries, String revision) 123 throws HistoryException { 124 125 RepositoryWithHistoryTraversal.ChangesetInfo entry = entries.isEmpty() ? null : entries.remove(entries.size() - 1); 126 127 // TODO We should check more thoroughly that the changeset is the one 128 // we expected it to be, since some SCMs may change the revision 129 // numbers so that identical revision numbers does not always mean 130 // identical changesets. We could for example get the cached changeset 131 // and compare more fields, like author and date. 132 if (entry == null || !revision.equals(entry.commit.revision)) { 133 throw new HistoryException("Cached revision '" + revision 134 + "' not found in the repository " 135 + repository.getDirectoryName()); 136 } 137 } 138 removeChangesets(List<RepositoryWithHistoryTraversal.ChangesetInfo> entries, String tillRevision)139 private void removeChangesets(List<RepositoryWithHistoryTraversal.ChangesetInfo> entries, String tillRevision) { 140 for (Iterator<RepositoryWithHistoryTraversal.ChangesetInfo> iter = entries.listIterator(); iter.hasNext(); ) { 141 RepositoryWithHistoryTraversal.ChangesetInfo entry = iter.next(); 142 if (entry.commit.revision.equals(tillRevision)) { 143 break; 144 } 145 iter.remove(); 146 } 147 } 148 149 /** 150 * Process the output from the {@code hg log} command and collect 151 * {@link org.opengrok.indexer.history.RepositoryWithHistoryTraversal.ChangesetInfo} elements. 152 * 153 * @param input The output from the process 154 * @throws java.io.IOException If an error occurs while reading the stream 155 */ 156 @Override processStream(InputStream input)157 public void processStream(InputStream input) throws IOException { 158 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 159 BufferedReader in = new BufferedReader(new InputStreamReader(input)); 160 entries = new ArrayList<>(); 161 String s; 162 RepositoryWithHistoryTraversal.ChangesetInfo entry = null; 163 while ((s = in.readLine()) != null) { 164 if (s.startsWith(MercurialRepository.CHANGESET)) { 165 entry = new RepositoryWithHistoryTraversal.ChangesetInfo(new RepositoryWithHistoryTraversal.CommitInfo()); 166 entries.add(entry); 167 entry.commit.revision = s.substring(MercurialRepository.CHANGESET.length()).trim(); 168 } else if (s.startsWith(MercurialRepository.USER) && entry != null) { 169 entry.commit.authorName = s.substring(MercurialRepository.USER.length()).trim(); 170 } else if (s.startsWith(MercurialRepository.DATE) && entry != null) { 171 Date date; 172 try { 173 date = repository.parse(s.substring(MercurialRepository.DATE.length()).trim()); 174 } catch (ParseException pe) { 175 // 176 // Overriding processStream() thus need to comply with the 177 // set of exceptions it can throw. 178 // 179 throw new IOException("Could not parse date: " + s, pe); 180 } 181 entry.commit.date = date; 182 } else if (s.startsWith(MercurialRepository.FILES) && entry != null) { 183 String[] strings = s.split(" "); 184 for (int ii = 1; ii < strings.length; ++ii) { 185 if (strings[ii].length() > 0) { 186 File f = new File(mydir, strings[ii]); 187 try { 188 String path = env.getPathRelativeToSourceRoot(f); 189 entry.files.add(path.intern()); 190 } catch (ForbiddenSymlinkException e) { 191 LOGGER.log(Level.FINER, e.getMessage()); 192 // ignore 193 } catch (FileNotFoundException e) { // NOPMD 194 // If the file is not located under the source root, 195 // ignore it (bug #11664). 196 } catch (InvalidPathException e) { 197 LOGGER.log(Level.WARNING, e.getMessage()); 198 } 199 } 200 } 201 } else if (repository.isHandleRenamedFiles() && s.startsWith(MercurialRepository.FILE_COPIES) && 202 entry != null && isDir) { 203 /* 204 * 'file_copies:' should be present only for directories but 205 * we use isDir to be on the safe side. 206 */ 207 s = s.replaceFirst(MercurialRepository.FILE_COPIES, ""); 208 String[] splitArray = s.split("\\)"); 209 for (String part: splitArray) { 210 /* 211 * This will fail for file names containing ' ('. 212 */ 213 String[] move = part.split(" \\("); 214 File f = new File(mydir + move[0]); 215 if (!move[0].isEmpty() && f.exists()) { 216 entry.renamedFiles.add(repository.getDirectoryNameRelative() + File.separator + move[0]); 217 } 218 } 219 } else if (s.startsWith(DESC_PREFIX) && entry != null) { 220 entry.commit.message = decodeDescription(s); 221 } else if (s.equals(MercurialRepository.END_OF_ENTRY) 222 && entry != null) { 223 entry = null; 224 } else if (s.length() > 0) { 225 LOGGER.log(Level.WARNING, 226 "Invalid/unexpected output {0} from hg log for repo {1}", 227 new Object[]{s, repository.getDirectoryName()}); 228 } 229 } 230 } 231 232 /** 233 * Decode a line with a description of a commit. The line is a sequence of 234 * XML character entities that need to be converted to single characters. 235 * This is to prevent problems if the log message contains one of the 236 * prefixes that {@link #processStream(InputStream)} is looking for (bug 237 * #405). 238 * 239 * This method is way too tolerant, and won't complain if the line has 240 * a different format than expected. It will return weird results, though. 241 * 242 * @param line the XML encoded line 243 * @return the decoded description 244 */ decodeDescription(String line)245 private String decodeDescription(String line) { 246 StringBuilder out = new StringBuilder(); 247 int value = 0; 248 249 // fetch the char values from the &#ddd; sequences 250 for (int i = DESC_PREFIX.length(); i < line.length(); i++) { 251 char ch = line.charAt(i); 252 if (Character.isDigit(ch)) { 253 value = value * 10 + Character.getNumericValue(ch); 254 } else if (ch == ';') { 255 out.append((char) value); 256 value = 0; 257 } 258 } 259 260 assert value == 0 : "description did not end with a semi-colon"; 261 262 return out.toString(); 263 } 264 } 265