1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved. 22 * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>. 23 * Portions Copyright (c) 2020, Ric Harris <harrisric@users.noreply.github.com>. 24 */ 25 package org.opengrok.indexer.history; 26 27 import java.io.BufferedInputStream; 28 import java.io.ByteArrayInputStream; 29 import java.io.File; 30 import java.io.IOException; 31 import java.io.InputStream; 32 import java.nio.charset.StandardCharsets; 33 import java.text.ParseException; 34 import java.util.ArrayList; 35 import java.util.HashSet; 36 import java.util.List; 37 import java.util.Set; 38 import java.util.logging.Level; 39 import java.util.logging.Logger; 40 41 import javax.xml.XMLConstants; 42 import javax.xml.parsers.SAXParser; 43 import javax.xml.parsers.SAXParserFactory; 44 45 import org.opengrok.indexer.configuration.CommandTimeoutType; 46 import org.opengrok.indexer.configuration.RuntimeEnvironment; 47 import org.opengrok.indexer.logger.LoggerFactory; 48 import org.opengrok.indexer.util.Executor; 49 import org.xml.sax.Attributes; 50 import org.xml.sax.SAXException; 51 import org.xml.sax.ext.DefaultHandler2; 52 53 /** 54 * Parse source history for a Subversion Repository. 55 * 56 * @author Trond Norbye 57 */ 58 class SubversionHistoryParser implements Executor.StreamHandler { 59 60 private static final Logger LOGGER = LoggerFactory.getLogger(SubversionHistoryParser.class); 61 62 private SAXParser saxParser = null; 63 private Handler handler; 64 65 private static class Handler extends DefaultHandler2 { 66 67 /** 68 * Example of the longest date format that we should accept - SimpleDateFormat cannot cope with micro/nano seconds. 69 */ 70 static final int SVN_MILLIS_DATE_LENGTH = "2020-03-26T15:38:55.999Z".length(); 71 72 final String prefix; 73 final String home; 74 final int length; 75 final List<HistoryEntry> entries = new ArrayList<>(); 76 final Set<String> renamedFiles = new HashSet<>(); 77 final SubversionRepository repository; 78 HistoryEntry entry; 79 StringBuilder sb; 80 boolean isRenamed; 81 Handler(String home, String prefix, int length, SubversionRepository repository)82 Handler(String home, String prefix, int length, SubversionRepository repository) { 83 this.home = home; 84 this.prefix = prefix; 85 this.length = length; 86 this.repository = repository; 87 sb = new StringBuilder(); 88 } 89 getRenamedFiles()90 List<String> getRenamedFiles() { 91 return new ArrayList<>(renamedFiles); 92 } 93 94 @Override startElement(String uri, String localName, String qname, Attributes attr)95 public void startElement(String uri, String localName, String qname, Attributes attr) { 96 isRenamed = false; 97 if ("logentry".equals(qname)) { 98 entry = new HistoryEntry(); 99 entry.setActive(true); 100 entry.setRevision(attr.getValue("revision")); 101 } else if ("path".equals(qname)) { 102 isRenamed = attr.getIndex("copyfrom-path") != -1; 103 } 104 sb.setLength(0); 105 } 106 107 @Override endElement(String uri, String localName, String qname)108 public void endElement(String uri, String localName, String qname) throws SAXException { 109 String s = sb.toString(); 110 if ("author".equals(qname)) { 111 entry.setAuthor(s); 112 } else if ("date".equals(qname)) { 113 try { 114 // need to strip microseconds off - assume final character is Z otherwise invalid anyway. 115 String dateString = s; 116 if (s.length() > SVN_MILLIS_DATE_LENGTH) { 117 dateString = dateString.substring(0, SVN_MILLIS_DATE_LENGTH - 1) + 118 dateString.charAt(dateString.length() - 1); 119 } 120 entry.setDate(repository.parse(dateString)); 121 } catch (ParseException ex) { 122 throw new SAXException("Failed to parse date: " + s, ex); 123 } 124 } else if ("path".equals(qname)) { 125 /* 126 * We only want valid files in the repository, not the 127 * top-level directory itself, hence the check for inequality. 128 */ 129 if (s.startsWith(prefix) && !s.equals(prefix)) { 130 File file = new File(home, s.substring(prefix.length())); 131 String path = file.getAbsolutePath().substring(length); 132 // The same file names may be repeated in many commits, 133 // so intern them to reduce the memory footprint. 134 entry.addFile(path.intern()); 135 if (isRenamed) { 136 renamedFiles.add(path.intern()); 137 } 138 } else { 139 LOGGER.log(Level.FINER, "Skipping file outside repository: " + s); 140 } 141 } else if ("msg".equals(qname)) { 142 entry.setMessage(s); 143 } 144 if ("logentry".equals(qname)) { 145 entries.add(entry); 146 } 147 sb.setLength(0); 148 } 149 150 @Override characters(char[] arg0, int arg1, int arg2)151 public void characters(char[] arg0, int arg1, int arg2) { 152 sb.append(arg0, arg1, arg2); 153 } 154 } 155 156 /** 157 * Initialize the SAX parser instance. 158 */ initSaxParser()159 private void initSaxParser() throws HistoryException { 160 SAXParserFactory factory = SAXParserFactory.newInstance(); 161 saxParser = null; 162 try { 163 saxParser = factory.newSAXParser(); 164 saxParser.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); // Compliant 165 saxParser.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); // compliant 166 } catch (Exception ex) { 167 throw new HistoryException("Failed to create SAX parser", ex); 168 } 169 } 170 171 /** 172 * Parse the history for the specified file. 173 * 174 * @param file the file to parse history for 175 * @param repos Pointer to the SubversionRepository 176 * @param sinceRevision the revision number immediately preceding the first 177 * revision we want, or {@code null} to fetch the entire history 178 * @return object representing the file's history 179 */ parse(File file, SubversionRepository repos, String sinceRevision, int numEntries, CommandTimeoutType cmdType)180 History parse(File file, SubversionRepository repos, String sinceRevision, 181 int numEntries, CommandTimeoutType cmdType) 182 throws HistoryException { 183 184 initSaxParser(); 185 handler = new Handler(repos.getDirectoryName(), repos.reposPath, 186 RuntimeEnvironment.getInstance().getSourceRootPath().length(), 187 repos); 188 189 Executor executor; 190 try { 191 executor = repos.getHistoryLogExecutor(file, sinceRevision, 192 numEntries, cmdType); 193 } catch (IOException e) { 194 throw new HistoryException("Failed to get history for: \"" + 195 file.getAbsolutePath() + "\"", e); 196 } 197 198 int status = executor.exec(true, this); 199 if (status != 0) { 200 throw new HistoryException("Failed to get history for: \"" + 201 file.getAbsolutePath() + "\" Exit code: " + status); 202 } 203 204 List<HistoryEntry> entries = handler.entries; 205 206 // If we only fetch parts of the history, we're not interested in 207 // sinceRevision. Remove it. 208 if (sinceRevision != null) { 209 repos.removeAndVerifyOldestChangeset(entries, sinceRevision); 210 } 211 212 return new History(entries, handler.getRenamedFiles()); 213 } 214 215 /** 216 * Process the output from the log command and insert the HistoryEntries 217 * into the history field. 218 * 219 * @param input The output from the process 220 */ 221 @Override processStream(InputStream input)222 public void processStream(InputStream input) throws IOException { 223 try { 224 initSaxParser(); 225 saxParser.parse(new BufferedInputStream(input), handler); 226 } catch (Exception e) { 227 throw new IOException("An error occurred while parsing the xml output", e); 228 } 229 } 230 231 /** 232 * Parse the given string. 233 * 234 * @param buffer The string to be parsed 235 * @return The parsed history 236 * @throws IOException if we fail to parse the buffer 237 */ parse(String buffer)238 History parse(String buffer) throws IOException { 239 handler = new Handler("/", "", 0, new SubversionRepository()); 240 processStream(new ByteArrayInputStream(buffer.getBytes(StandardCharsets.UTF_8))); 241 return new History(handler.entries, handler.getRenamedFiles()); 242 } 243 } 244