xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/history/SubversionHistoryParser.java (revision 5794e3b70031079b0a2b58b5dc2c7675d99b3743)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>.
23  * Portions Copyright (c) 2020, Ric Harris <harrisric@users.noreply.github.com>.
24  */
25 package org.opengrok.indexer.history;
26 
27 import java.io.BufferedInputStream;
28 import java.io.ByteArrayInputStream;
29 import java.io.File;
30 import java.io.IOException;
31 import java.io.InputStream;
32 import java.nio.charset.StandardCharsets;
33 import java.text.ParseException;
34 import java.util.ArrayList;
35 import java.util.HashSet;
36 import java.util.List;
37 import java.util.Set;
38 import java.util.logging.Level;
39 import java.util.logging.Logger;
40 
41 import javax.xml.XMLConstants;
42 import javax.xml.parsers.SAXParser;
43 import javax.xml.parsers.SAXParserFactory;
44 
45 import org.opengrok.indexer.configuration.CommandTimeoutType;
46 import org.opengrok.indexer.configuration.RuntimeEnvironment;
47 import org.opengrok.indexer.logger.LoggerFactory;
48 import org.opengrok.indexer.util.Executor;
49 import org.xml.sax.Attributes;
50 import org.xml.sax.SAXException;
51 import org.xml.sax.ext.DefaultHandler2;
52 
53 /**
54  * Parse source history for a Subversion Repository.
55  *
56  * @author Trond Norbye
57  */
58 class SubversionHistoryParser implements Executor.StreamHandler {
59 
60     private static final Logger LOGGER = LoggerFactory.getLogger(SubversionHistoryParser.class);
61 
62     private SAXParser saxParser = null;
63     private Handler handler;
64 
65     private static class Handler extends DefaultHandler2 {
66 
67         /**
68          * Example of the longest date format that we should accept - SimpleDateFormat cannot cope with micro/nano seconds.
69          */
70         static final int SVN_MILLIS_DATE_LENGTH = "2020-03-26T15:38:55.999Z".length();
71 
72         final String prefix;
73         final String home;
74         final int length;
75         final List<HistoryEntry> entries = new ArrayList<>();
76         final Set<String> renamedFiles = new HashSet<>();
77         final SubversionRepository repository;
78         HistoryEntry entry;
79         StringBuilder sb;
80         boolean isRenamed;
81 
Handler(String home, String prefix, int length, SubversionRepository repository)82         Handler(String home, String prefix, int length, SubversionRepository repository) {
83             this.home = home;
84             this.prefix = prefix;
85             this.length = length;
86             this.repository = repository;
87             sb = new StringBuilder();
88         }
89 
getRenamedFiles()90         List<String> getRenamedFiles() {
91             return new ArrayList<>(renamedFiles);
92         }
93 
94         @Override
startElement(String uri, String localName, String qname, Attributes attr)95         public void startElement(String uri, String localName, String qname, Attributes attr) {
96             isRenamed = false;
97             if ("logentry".equals(qname)) {
98                 entry = new HistoryEntry();
99                 entry.setActive(true);
100                 entry.setRevision(attr.getValue("revision"));
101             } else if ("path".equals(qname)) {
102                 isRenamed = attr.getIndex("copyfrom-path") != -1;
103             }
104             sb.setLength(0);
105         }
106 
107         @Override
endElement(String uri, String localName, String qname)108         public void endElement(String uri, String localName, String qname) throws SAXException {
109             String s = sb.toString();
110             if ("author".equals(qname)) {
111                 entry.setAuthor(s);
112             } else if ("date".equals(qname)) {
113                 try {
114                     // need to strip microseconds off - assume final character is Z otherwise invalid anyway.
115                     String dateString = s;
116                     if (s.length() > SVN_MILLIS_DATE_LENGTH) {
117                       dateString = dateString.substring(0, SVN_MILLIS_DATE_LENGTH - 1) +
118                           dateString.charAt(dateString.length() - 1);
119                     }
120                     entry.setDate(repository.parse(dateString));
121                 } catch (ParseException ex) {
122                     throw new SAXException("Failed to parse date: " + s, ex);
123                 }
124             } else if ("path".equals(qname)) {
125                 /*
126                  * We only want valid files in the repository, not the
127                  * top-level directory itself, hence the check for inequality.
128                  */
129                 if (s.startsWith(prefix) && !s.equals(prefix)) {
130                     File file = new File(home, s.substring(prefix.length()));
131                     String path = file.getAbsolutePath().substring(length);
132                     // The same file names may be repeated in many commits,
133                     // so intern them to reduce the memory footprint.
134                     entry.addFile(path.intern());
135                     if (isRenamed) {
136                         renamedFiles.add(path.intern());
137                     }
138                 } else {
139                     LOGGER.log(Level.FINER, "Skipping file outside repository: " + s);
140                 }
141             } else if ("msg".equals(qname)) {
142                 entry.setMessage(s);
143             }
144             if ("logentry".equals(qname)) {
145                 entries.add(entry);
146             }
147             sb.setLength(0);
148         }
149 
150         @Override
characters(char[] arg0, int arg1, int arg2)151         public void characters(char[] arg0, int arg1, int arg2) {
152             sb.append(arg0, arg1, arg2);
153         }
154     }
155 
156     /**
157      * Initialize the SAX parser instance.
158      */
initSaxParser()159     private void initSaxParser() throws HistoryException {
160         SAXParserFactory factory = SAXParserFactory.newInstance();
161         saxParser = null;
162         try {
163             saxParser = factory.newSAXParser();
164             saxParser.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); // Compliant
165             saxParser.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); // compliant
166         } catch (Exception ex) {
167             throw new HistoryException("Failed to create SAX parser", ex);
168         }
169     }
170 
171     /**
172      * Parse the history for the specified file.
173      *
174      * @param file the file to parse history for
175      * @param repos Pointer to the SubversionRepository
176      * @param sinceRevision the revision number immediately preceding the first
177      * revision we want, or {@code null} to fetch the entire history
178      * @return object representing the file's history
179      */
parse(File file, SubversionRepository repos, String sinceRevision, int numEntries, CommandTimeoutType cmdType)180     History parse(File file, SubversionRepository repos, String sinceRevision,
181             int numEntries, CommandTimeoutType cmdType)
182             throws HistoryException {
183 
184         initSaxParser();
185         handler = new Handler(repos.getDirectoryName(), repos.reposPath,
186                 RuntimeEnvironment.getInstance().getSourceRootPath().length(),
187                 repos);
188 
189         Executor executor;
190         try {
191             executor = repos.getHistoryLogExecutor(file, sinceRevision,
192                     numEntries, cmdType);
193         } catch (IOException e) {
194             throw new HistoryException("Failed to get history for: \"" +
195                     file.getAbsolutePath() + "\"", e);
196         }
197 
198         int status = executor.exec(true, this);
199         if (status != 0) {
200             throw new HistoryException("Failed to get history for: \"" +
201                     file.getAbsolutePath() + "\" Exit code: " + status);
202         }
203 
204         List<HistoryEntry> entries = handler.entries;
205 
206         // If we only fetch parts of the history, we're not interested in
207         // sinceRevision. Remove it.
208         if (sinceRevision != null) {
209             repos.removeAndVerifyOldestChangeset(entries, sinceRevision);
210         }
211 
212         return new History(entries, handler.getRenamedFiles());
213     }
214 
215    /**
216      * Process the output from the log command and insert the HistoryEntries
217      * into the history field.
218      *
219      * @param input The output from the process
220      */
221     @Override
processStream(InputStream input)222     public void processStream(InputStream input) throws IOException {
223         try {
224             initSaxParser();
225             saxParser.parse(new BufferedInputStream(input), handler);
226         } catch (Exception e) {
227             throw new IOException("An error occurred while parsing the xml output", e);
228         }
229     }
230 
231     /**
232      * Parse the given string.
233      *
234      * @param buffer The string to be parsed
235      * @return The parsed history
236      * @throws IOException if we fail to parse the buffer
237      */
parse(String buffer)238     History parse(String buffer) throws IOException {
239         handler = new Handler("/", "", 0, new SubversionRepository());
240         processStream(new ByteArrayInputStream(buffer.getBytes(StandardCharsets.UTF_8)));
241         return new History(handler.entries, handler.getRenamedFiles());
242     }
243 }
244