xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java (revision 85e5fa688f272c5ec55c4b1bc4eccc6f533b3ee5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>.
23  */
24 package org.opengrok.indexer.history;
25 
26 import java.io.ByteArrayInputStream;
27 import java.io.ByteArrayOutputStream;
28 import java.io.File;
29 import java.io.FileOutputStream;
30 import java.io.IOException;
31 import java.io.InputStream;
32 import java.io.OutputStream;
33 import java.text.DateFormat;
34 import java.text.FieldPosition;
35 import java.text.ParseException;
36 import java.text.ParsePosition;
37 import java.text.SimpleDateFormat;
38 import java.util.ArrayList;
39 import java.util.Date;
40 import java.util.Iterator;
41 import java.util.List;
42 import java.util.Locale;
43 import java.util.TreeSet;
44 import java.util.logging.Level;
45 import java.util.logging.Logger;
46 
47 import org.jetbrains.annotations.Nullable;
48 import org.opengrok.indexer.configuration.CommandTimeoutType;
49 import org.opengrok.indexer.configuration.RuntimeEnvironment;
50 import org.opengrok.indexer.logger.LoggerFactory;
51 import org.opengrok.indexer.util.BufferSink;
52 import org.opengrok.indexer.util.Executor;
53 
54 /**
55  * An interface for an external repository.
56  *
57  * @author Trond Norbye
58  */
59 public abstract class Repository extends RepositoryInfo {
60 
61     private static final long serialVersionUID = -203179700904894217L;
62 
63     private static final Logger LOGGER = LoggerFactory.getLogger(Repository.class);
64 
65     /**
66      * format used for printing the date in {@code currentVersion}.
67      * <p>
68      * NOTE: SimpleDateFormat is not thread-safe, lock must be held when formatting
69      */
70     protected static final SimpleDateFormat OUTPUT_DATE_FORMAT =
71             new SimpleDateFormat("yyyy-MM-dd HH:mm Z");
72 
73     /**
74      * The command with which to access the external repository. Can be
75      * {@code null} if the repository isn't accessed via a CLI, or if it hasn't
76      * been initialized by {@link #ensureCommand} yet.
77      */
78     protected String RepoCommand;
79 
80     protected final List<String> ignoredFiles;
81 
82     protected final List<String> ignoredDirs;
83 
84     /**
85      * List of &lt;revision, tags&gt; pairs for repositories which display tags
86      * only for files changed by the tagged commit.
87      */
88     protected TreeSet<TagEntry> tagList = null;
89 
fileHasHistory(File file)90     abstract boolean fileHasHistory(File file);
91 
92     /**
93      * Check if the repository supports {@code getHistory()} requests for whole
94      * directories at once.
95      *
96      * @return {@code true} if the repository can get history for directories
97      */
hasHistoryForDirectories()98     abstract boolean hasHistoryForDirectories();
99 
100     /**
101      * Get the history for the specified file or directory.
102      * It is expected that {@link History#getRenamedFiles()} and {@link HistoryEntry#getFiles()} are empty for files.
103      *
104      * @param file the file to get the history for
105      * @return history log for file
106      * @throws HistoryException on error accessing the history
107      */
getHistory(File file)108     abstract History getHistory(File file) throws HistoryException;
109 
110     /**
111      * This is generic implementation that retrieves the full history of given file
112      * and returns the latest history entry. This is obviously very inefficient, both in terms of memory and I/O.
113      * The extending classes are encouraged to implement their own version.
114      * @param file file
115      * @return last history entry or null
116      * @throws HistoryException on error
117      */
getLastHistoryEntry(File file, boolean ui)118     public HistoryEntry getLastHistoryEntry(File file, boolean ui) throws HistoryException {
119         History history;
120         try {
121             history = HistoryGuru.getInstance().getHistory(file, false, ui);
122         } catch (HistoryException ex) {
123             LOGGER.log(Level.WARNING, "failed to get history for {0}", file);
124             return null;
125         }
126 
127         if (history != null) {
128             return history.getLastHistoryEntry();
129         } else {
130             return null;
131         }
132     }
133 
Repository()134     protected Repository() {
135         super();
136         ignoredFiles = new ArrayList<>();
137         ignoredDirs = new ArrayList<>();
138     }
139 
140     /**
141      * Gets the instance's repository command, primarily for testing purposes.
142      * @return null if not {@link #isWorking()}, or otherwise a defined command
143      */
getRepoCommand()144     public String getRepoCommand() {
145         isWorking();
146         return RepoCommand;
147     }
148 
149     /**
150      * <p>
151      * Get the history after a specified revision.
152      * <p>
153      * <p>The default implementation first fetches the full history and then throws
154      * away the oldest revisions. This is not efficient, so subclasses should
155      * override it in order to get good performance. Once every subclass has
156      * implemented a more efficient method, the default implementation should be
157      * removed and made abstract.
158      *
159      * @param file the file to get the history for
160      * @param sinceRevision the revision right before the first one to return,
161      * or {@code null} to return the full history
162      * @return partial history for file
163      * @throws HistoryException on error accessing the history
164      */
getHistory(File file, String sinceRevision)165     History getHistory(File file, String sinceRevision) throws HistoryException {
166 
167         // If we want an incremental history update and get here, warn that
168         // it may be slow.
169         if (sinceRevision != null) {
170             LOGGER.log(Level.WARNING,
171                     "Incremental history retrieval is not implemented for {0}.",
172                     getClass().getSimpleName());
173             LOGGER.log(Level.WARNING,
174                     "Falling back to slower full history retrieval.");
175         }
176 
177         History history = getHistory(file);
178 
179         if (sinceRevision == null) {
180             return history;
181         }
182 
183         List<HistoryEntry> partial = new ArrayList<>();
184         for (HistoryEntry entry : history.getHistoryEntries()) {
185             partial.add(entry);
186             if (sinceRevision.equals(entry.getRevision())) {
187                 // Found revision right before the first one to return.
188                 break;
189             }
190         }
191 
192         removeAndVerifyOldestChangeset(partial, sinceRevision);
193         history.setHistoryEntries(partial);
194         return history;
195     }
196 
197     /**
198      * Remove the oldest changeset from a list (assuming sorted with most recent
199      * changeset first) and verify that it is the changeset we expected to find
200      * there.
201      *
202      * @param entries a list of {@code HistoryEntry} objects
203      * @param revision the revision we expect the oldest entry to have
204      * @throws HistoryException if the oldest entry was not the one we expected
205      */
removeAndVerifyOldestChangeset(List<HistoryEntry> entries, String revision)206     void removeAndVerifyOldestChangeset(List<HistoryEntry> entries, String revision) throws HistoryException {
207 
208         HistoryEntry entry = entries.isEmpty() ? null : entries.remove(entries.size() - 1);
209 
210         // TODO We should check more thoroughly that the changeset is the one
211         // we expected it to be, since some SCMs may change the revision
212         // numbers so that identical revision numbers does not always mean
213         // identical changesets. We could for example get the cached changeset
214         // and compare more fields, like author and date.
215         if (entry == null || !revision.equals(entry.getRevision())) {
216             throw new HistoryException("Cached revision '" + revision
217                     + "' not found in the repository "
218                     + getDirectoryName());
219         }
220     }
221 
222     /**
223      * Gets the contents of a specific version of a named file, and copies
224      * into the specified target file.
225      *
226      * @param target a required target file which will be overwritten
227      * @param parent the name of the directory containing the file
228      * @param basename the name of the file to get
229      * @param rev the revision to get
230      * @return {@code true} if contents were found
231      * @throws java.io.IOException if an I/O error occurs
232      */
getHistoryGet(File target, String parent, String basename, String rev)233     public boolean getHistoryGet(File target, String parent, String basename, String rev) throws IOException {
234         try (FileOutputStream out = new FileOutputStream(target)) {
235             return getHistoryGet(out, parent, basename, rev);
236         }
237     }
238 
239     /**
240      * Gets an {@link InputStream} of the contents of a specific version of a
241      * named file.
242      * @param parent the name of the directory containing the file
243      * @param basename the name of the file to get
244      * @param rev the revision to get
245      * @return a defined instance if contents were found; or else {@code null}
246      */
247     @Nullable
getHistoryGet(String parent, String basename, String rev)248     public InputStream getHistoryGet(String parent, String basename, String rev) {
249         ByteArrayOutputStream out = new ByteArrayOutputStream();
250         if (getHistoryGet(out, parent, basename, rev)) {
251             return new ByteArrayInputStream(out.toByteArray());
252         }
253         return null;
254     }
255 
256     /**
257      * Subclasses must override to get the contents of a specific version of a
258      * named file, and copy to the specified {@code sink}.
259      *
260      * @param out a defined instance of OutputStream
261      * @param parent the name of the directory containing the file
262      * @param basename the name of the file to get
263      * @param rev the revision to get
264      * @return a value indicating if the get was successful.
265      */
getHistoryGet(OutputStream out, String parent, String basename, String rev)266     abstract boolean getHistoryGet(OutputStream out, String parent, String basename, String rev);
267 
268     /**
269      * Checks whether this parser can annotate files.
270      *
271      * @param file file to check
272      * @return <code>true</code> if annotation is supported
273      */
fileHasAnnotation(File file)274     abstract boolean fileHasAnnotation(File file);
275 
276     /**
277      * Returns if this repository tags only files changed in last commit, i.e.
278      * if we need to prepare list of repository-wide tags prior to creation of file history entries.
279      *
280      * @return True if we need tag list creation prior to file parsing, false by default.
281      */
hasFileBasedTags()282     boolean hasFileBasedTags() {
283         return false;
284     }
285 
getTagList()286     TreeSet<TagEntry> getTagList() {
287         return this.tagList;
288     }
289 
290     /**
291      * Assign tags to changesets they represent. The complete list of tags must
292      * be pre-built using {@code getTagList()}. Then this function squeezes all
293      * tags to changesets which actually exist in the history of given file.
294      * Must be implemented repository-specific.
295      *
296      * @see #getTagList
297      * @param hist History object we want to assign tags to.
298      */
assignTagsInHistory(History hist)299     void assignTagsInHistory(History hist) {
300         if (hist == null) {
301             return;
302         }
303 
304         if (this.getTagList() == null) {
305             if (RuntimeEnvironment.getInstance().isIndexer()) {
306                 throw new IllegalStateException("getTagList() is null");
307             } else {
308                 return;
309             }
310         }
311 
312         Iterator<TagEntry> it = this.getTagList().descendingIterator();
313         TagEntry lastTagEntry = null;
314         for (HistoryEntry ent : hist.getHistoryEntries()) {
315             // Assign all tags created since the last revision
316             // TODO: is there better way to do this? We need to "repeat"
317             //   last element returned by call to next()
318             while (lastTagEntry != null || it.hasNext()) {
319                 if (lastTagEntry == null) {
320                     lastTagEntry = it.next();
321                 }
322                 if (lastTagEntry.compareTo(ent) >= 0) {
323                     hist.addTags(ent, lastTagEntry.getTags());
324                 } else {
325                     break;
326                 }
327                 if (it.hasNext()) {
328                     lastTagEntry = it.next();
329                 } else {
330                     lastTagEntry = null;
331                 }
332             }
333         }
334     }
335 
336     /**
337      * Create internal list of all tags in this repository.
338      *
339      * @param directory directory of the repository
340      * @param cmdType command timeout type
341      */
buildTagList(File directory, CommandTimeoutType cmdType)342     protected void buildTagList(File directory, CommandTimeoutType cmdType) {
343         this.tagList = null;
344     }
345 
346     /**
347      * Annotate the specified revision of a file.
348      *
349      * @param file the file to annotate
350      * @param revision revision of the file. Either {@code null} or a non-empty
351      * string.
352      * @return an <code>Annotation</code> object
353      * @throws java.io.IOException if an error occurs
354      */
annotate(File file, String revision)355     abstract Annotation annotate(File file, String revision) throws IOException;
356 
357     /**
358      * Return revision for annotate view.
359      *
360      * @param historyRevision full revision
361      * @return revision string suitable for matching into annotation
362      */
getRevisionForAnnotate(String historyRevision)363     protected String getRevisionForAnnotate(String historyRevision) {
364         return historyRevision;
365     }
366 
doCreateCache(HistoryCache cache, String sinceRevision, File directory)367     protected void doCreateCache(HistoryCache cache, String sinceRevision, File directory) throws HistoryException {
368         History history = getHistory(directory, sinceRevision);
369         finishCreateCache(cache, history, null);
370     }
371 
372     /**
373      * Create a history log cache for all files in this repository.
374      * {@code getHistory()} is used to fetch the history for the entire
375      * repository. If {@code hasHistoryForDirectories()} returns {@code false},
376      * this method is a no-op.
377      *
378      * @param cache the cache instance in which to store the history log
379      * @param sinceRevision if non-null, incrementally update the cache with all
380      * revisions after the specified revision; otherwise, create the full
381      * history starting with the initial revision
382      *
383      * @throws HistoryException on error
384      */
createCache(HistoryCache cache, String sinceRevision)385     final void createCache(HistoryCache cache, String sinceRevision) throws HistoryException {
386 
387         if (!isWorking()) {
388             return;
389         }
390 
391         // If it is not possible to get history for a directory, we can't create the cache
392         // this way. Just give up and return.
393         if (!hasHistoryForDirectories()) {
394             LOGGER.log(Level.INFO,
395                     "Skipping creation of history cache for {0}, since retrieval "
396                             + "of history for directories is not implemented for this "
397                             + "repository type.", getDirectoryName());
398             return;
399         }
400 
401         File directory = new File(getDirectoryName());
402 
403         doCreateCache(cache, sinceRevision, directory);
404 
405         LOGGER.log(Level.FINE, "Done storing history cache for repository {0}", getDirectoryName());
406     }
407 
408     /**
409      * Actually store the history in history cache.
410      * @param cache history cache object
411      * @param history history to store
412      * @param tillRevision end revision (matters only for renamed files), can be null
413      * @throws HistoryException on error
414      */
finishCreateCache(HistoryCache cache, History history, String tillRevision)415     void finishCreateCache(HistoryCache cache, History history, String tillRevision) throws HistoryException {
416         // We need to refresh list of tags for incremental reindex.
417         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
418         if (env.isTagsEnabled() && this.hasFileBasedTags()) {
419             this.buildTagList(new File(this.getDirectoryName()), CommandTimeoutType.INDEXER);
420         }
421 
422         if (history != null) {
423             cache.store(history, this, tillRevision);
424         }
425     }
426 
427     /**
428      * Check if this it the right repository type for the given file.
429      *
430      * @param file File to check if this is a repository for.
431      * @param cmdType command timeout type
432      * @return true if this is the correct repository for this file/directory.
433      */
isRepositoryFor(File file, CommandTimeoutType cmdType)434     abstract boolean isRepositoryFor(File file, CommandTimeoutType cmdType);
435 
isRepositoryFor(File file)436     public final boolean isRepositoryFor(File file) {
437         return isRepositoryFor(file, CommandTimeoutType.INDEXER);
438     }
439 
440     /**
441      * Determine parent of this repository.
442      */
determineParent(CommandTimeoutType cmdType)443     abstract String determineParent(CommandTimeoutType cmdType) throws IOException;
444 
445     /**
446      * Determine parent of this repository.
447      * @return parent
448      * @throws java.io.IOException I/O exception
449      */
determineParent()450     public final String determineParent() throws IOException {
451         return determineParent(CommandTimeoutType.INDEXER);
452     }
453 
454     /**
455      * Determine branch of this repository.
456      */
determineBranch(CommandTimeoutType cmdType)457     abstract String determineBranch(CommandTimeoutType cmdType) throws IOException;
458 
459     /**
460      * Determine branch of this repository.
461      * @return branch
462      * @throws java.io.IOException I/O exception
463      */
determineBranch()464     public final String determineBranch() throws IOException {
465         return determineBranch(CommandTimeoutType.INDEXER);
466     }
467 
468     /**
469      * Get list of ignored files for this repository.
470      * @return list of strings
471      */
getIgnoredFiles()472     public List<String> getIgnoredFiles() {
473         return ignoredFiles;
474     }
475 
476     /**
477      * Get list of ignored directories for this repository.
478      * @return list of strings
479      */
getIgnoredDirs()480     public List<String> getIgnoredDirs() {
481         return ignoredDirs;
482     }
483 
484     /**
485      * Determine and return the current version of the repository.
486      *
487      * This operation is considered "heavy" so this function should not be
488      * called on every web request.
489      *
490      * @param cmdType command timeout type
491      * @return the version
492      * @throws IOException if I/O exception occurred
493      */
determineCurrentVersion(CommandTimeoutType cmdType)494     abstract String determineCurrentVersion(CommandTimeoutType cmdType) throws IOException;
495 
determineCurrentVersion()496     public final String determineCurrentVersion() throws IOException {
497         return determineCurrentVersion(CommandTimeoutType.INDEXER);
498     }
499 
500     /**
501      * Returns true if this repository supports sub repositories (a.k.a.
502      * forests).
503      *
504      * @return true if this repository supports sub repositories
505      */
506     @SuppressWarnings("PMD.EmptyMethodInAbstractClassShouldBeAbstract")
supportsSubRepositories()507     boolean supportsSubRepositories() {
508         return false;
509     }
510 
511     /**
512      * Subclasses can override to get a value indicating that a repository implementation is nestable.
513      * @return {@code false}
514      */
isNestable()515     boolean isNestable() {
516         return false;
517     }
518 
getDateFormat()519     private DateFormat getDateFormat() {
520         return new RepositoryDateFormat();
521     }
522 
523     /**
524      * Format the given date according to the output format.
525      *
526      * @param date the date
527      * @return the string representing the formatted date
528      * @see #OUTPUT_DATE_FORMAT
529      */
format(Date date)530     public static String format(Date date) {
531         synchronized (OUTPUT_DATE_FORMAT) {
532             return OUTPUT_DATE_FORMAT.format(date);
533         }
534     }
535 
536     /**
537      * Parse the given string as a date object with the repository date formats.
538      *
539      * @param dateString the string representing the date
540      * @return the instance of a date
541      * @throws ParseException when the string can not be parsed correctly
542      */
parse(String dateString)543     public Date parse(String dateString) throws ParseException {
544         final DateFormat format = getDateFormat();
545         synchronized (format) {
546             return format.parse(dateString);
547         }
548     }
549 
checkCmd(String... args)550     static Boolean checkCmd(String... args) {
551         Executor exec = new Executor(args);
552         return exec.exec(false) == 0;
553     }
554 
getCommand(Class<? extends Repository> repoClass, String propertyKey, String fallbackCommand)555     protected static String getCommand(Class<? extends Repository> repoClass, String propertyKey, String fallbackCommand) {
556         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
557         String className = repoClass.getCanonicalName();
558         String command = env.getRepoCmd(className);
559         if (command == null) {
560             command = System.getProperty(propertyKey, fallbackCommand);
561             env.setRepoCmd(className, command);
562         }
563         return command;
564     }
565 
566     /**
567      * Set the name of the external client command that should be used to access
568      * the repository wrt. the given parameters. Does nothing, if this
569      * repository's <var>RepoCommand</var> has already been set (i.e. has a
570      * non-{@code null} value).
571      *
572      * @param propertyKey property key to lookup the corresponding system
573      * property.
574      * @param fallbackCommand the command to use, if lookup fails.
575      * @return the command to use.
576      * @see #RepoCommand
577      */
ensureCommand(String propertyKey, String fallbackCommand)578     protected String ensureCommand(String propertyKey, String fallbackCommand) {
579         if (RepoCommand == null) {
580             RepoCommand = getCommand(this.getClass(), propertyKey, fallbackCommand);
581         }
582 
583         return RepoCommand;
584     }
585 
getRepoRelativePath(final File file)586     protected String getRepoRelativePath(final File file)
587             throws IOException {
588 
589         String filename = file.getPath();
590         String repoDirName = getDirectoryName();
591 
592         String abs = file.getCanonicalPath();
593         if (abs.startsWith(repoDirName)) {
594             if (abs.length() > repoDirName.length()) {
595                 filename = abs.substring(repoDirName.length() + 1);
596             }
597         } else {
598             abs = file.getAbsolutePath();
599             if (abs.startsWith(repoDirName) && abs.length() >
600                 repoDirName.length()) {
601                 filename = abs.substring(repoDirName.length() + 1);
602             }
603         }
604         return filename;
605     }
606 
607     /**
608      * Copies all bytes from {@code in} to the {@code sink}.
609      * @return the number of writes to {@code sink}
610      */
copyBytes(BufferSink sink, InputStream in)611     static int copyBytes(BufferSink sink, InputStream in) throws IOException {
612         byte[] buffer = new byte[8 * 1024];
613         int iterations = 0;
614         int len;
615         while ((len = in.read(buffer)) != -1) {
616             if (len > 0) {
617                 ++iterations;
618                 sink.write(buffer, 0, len);
619             }
620         }
621         return iterations;
622     }
623 
624     static class HistoryRevResult {
625         boolean success;
626         long iterations;
627     }
628 
629     private class RepositoryDateFormat extends DateFormat {
630         private static final long serialVersionUID = -6951382723884436414L;
631 
632         private final Locale locale = Locale.ENGLISH;
633         // NOTE: SimpleDateFormat is not thread-safe, lock must be held when used
634         private final SimpleDateFormat[] formatters = new SimpleDateFormat[datePatterns.length];
635 
636         {
637             // initialize date formatters
638             for (int i = 0; i < datePatterns.length; i++) {
639                 formatters[i] = new SimpleDateFormat(datePatterns[i], locale);
640                 /*
641                  * TODO: the following would be nice - but currently it
642                  * could break the compatibility with some repository dates
643                  */
644                 // formatters[i].setLenient(false);
645             }
646         }
647 
648         @Override
format(Date date, StringBuffer toAppendTo, FieldPosition fieldPosition)649         public StringBuffer format(Date date, StringBuffer toAppendTo, FieldPosition fieldPosition) {
650             throw new UnsupportedOperationException("not implemented");
651         }
652 
653         @Override
parse(String source)654         public Date parse(String source) throws ParseException {
655             ParseException head = null, tail = null;
656             for (SimpleDateFormat formatter : formatters) {
657                 try {
658                     return formatter.parse(source);
659                 } catch (ParseException ex1) {
660                     /*
661                      * Adding all exceptions together to get some info in
662                      * the logs.
663                      */
664                     ex1 = new ParseException(
665                             String.format("%s with format \"%s\" and locale \"%s\"",
666                                     ex1.getMessage(),
667                                     formatter.toPattern(),
668                                     locale),
669                             ex1.getErrorOffset()
670                     );
671                     if (head == null) {
672                         head = tail = ex1;
673                     } else {
674                         tail.initCause(ex1);
675                         tail = ex1;
676                     }
677                 }
678             }
679             throw head != null ? head : new ParseException(String.format("Unparseable date: \"%s\"", source), 0);
680         }
681 
682         @Override
parse(String source, ParsePosition pos)683         public Date parse(String source, ParsePosition pos) {
684             throw new UnsupportedOperationException("not implemented");
685         }
686     }
687 }
688