1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved. 22 * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>. 23 */ 24 package org.opengrok.indexer.history; 25 26 import java.io.ByteArrayInputStream; 27 import java.io.ByteArrayOutputStream; 28 import java.io.File; 29 import java.io.FileOutputStream; 30 import java.io.IOException; 31 import java.io.InputStream; 32 import java.io.OutputStream; 33 import java.text.DateFormat; 34 import java.text.FieldPosition; 35 import java.text.ParseException; 36 import java.text.ParsePosition; 37 import java.text.SimpleDateFormat; 38 import java.util.ArrayList; 39 import java.util.Date; 40 import java.util.Iterator; 41 import java.util.List; 42 import java.util.Locale; 43 import java.util.TreeSet; 44 import java.util.logging.Level; 45 import java.util.logging.Logger; 46 47 import org.jetbrains.annotations.Nullable; 48 import org.opengrok.indexer.configuration.CommandTimeoutType; 49 import org.opengrok.indexer.configuration.RuntimeEnvironment; 50 import org.opengrok.indexer.logger.LoggerFactory; 51 import org.opengrok.indexer.util.BufferSink; 52 import org.opengrok.indexer.util.Executor; 53 54 /** 55 * An interface for an external repository. 56 * 57 * @author Trond Norbye 58 */ 59 public abstract class Repository extends RepositoryInfo { 60 61 private static final long serialVersionUID = -203179700904894217L; 62 63 private static final Logger LOGGER = LoggerFactory.getLogger(Repository.class); 64 65 /** 66 * format used for printing the date in {@code currentVersion}. 67 * <p> 68 * NOTE: SimpleDateFormat is not thread-safe, lock must be held when formatting 69 */ 70 protected static final SimpleDateFormat OUTPUT_DATE_FORMAT = 71 new SimpleDateFormat("yyyy-MM-dd HH:mm Z"); 72 73 /** 74 * The command with which to access the external repository. Can be 75 * {@code null} if the repository isn't accessed via a CLI, or if it hasn't 76 * been initialized by {@link #ensureCommand} yet. 77 */ 78 protected String RepoCommand; 79 80 protected final List<String> ignoredFiles; 81 82 protected final List<String> ignoredDirs; 83 84 /** 85 * List of <revision, tags> pairs for repositories which display tags 86 * only for files changed by the tagged commit. 87 */ 88 protected TreeSet<TagEntry> tagList = null; 89 fileHasHistory(File file)90 abstract boolean fileHasHistory(File file); 91 92 /** 93 * Check if the repository supports {@code getHistory()} requests for whole 94 * directories at once. 95 * 96 * @return {@code true} if the repository can get history for directories 97 */ hasHistoryForDirectories()98 abstract boolean hasHistoryForDirectories(); 99 100 /** 101 * Get the history for the specified file or directory. 102 * It is expected that {@link History#getRenamedFiles()} and {@link HistoryEntry#getFiles()} are empty for files. 103 * 104 * @param file the file to get the history for 105 * @return history log for file 106 * @throws HistoryException on error accessing the history 107 */ getHistory(File file)108 abstract History getHistory(File file) throws HistoryException; 109 110 /** 111 * This is generic implementation that retrieves the full history of given file 112 * and returns the latest history entry. This is obviously very inefficient, both in terms of memory and I/O. 113 * The extending classes are encouraged to implement their own version. 114 * @param file file 115 * @return last history entry or null 116 * @throws HistoryException on error 117 */ getLastHistoryEntry(File file, boolean ui)118 public HistoryEntry getLastHistoryEntry(File file, boolean ui) throws HistoryException { 119 History history; 120 try { 121 history = HistoryGuru.getInstance().getHistory(file, false, ui); 122 } catch (HistoryException ex) { 123 LOGGER.log(Level.WARNING, "failed to get history for {0}", file); 124 return null; 125 } 126 127 if (history != null) { 128 return history.getLastHistoryEntry(); 129 } else { 130 return null; 131 } 132 } 133 Repository()134 protected Repository() { 135 super(); 136 ignoredFiles = new ArrayList<>(); 137 ignoredDirs = new ArrayList<>(); 138 } 139 140 /** 141 * Gets the instance's repository command, primarily for testing purposes. 142 * @return null if not {@link #isWorking()}, or otherwise a defined command 143 */ getRepoCommand()144 public String getRepoCommand() { 145 isWorking(); 146 return RepoCommand; 147 } 148 149 /** 150 * <p> 151 * Get the history after a specified revision. 152 * <p> 153 * <p>The default implementation first fetches the full history and then throws 154 * away the oldest revisions. This is not efficient, so subclasses should 155 * override it in order to get good performance. Once every subclass has 156 * implemented a more efficient method, the default implementation should be 157 * removed and made abstract. 158 * 159 * @param file the file to get the history for 160 * @param sinceRevision the revision right before the first one to return, 161 * or {@code null} to return the full history 162 * @return partial history for file 163 * @throws HistoryException on error accessing the history 164 */ getHistory(File file, String sinceRevision)165 History getHistory(File file, String sinceRevision) throws HistoryException { 166 167 // If we want an incremental history update and get here, warn that 168 // it may be slow. 169 if (sinceRevision != null) { 170 LOGGER.log(Level.WARNING, 171 "Incremental history retrieval is not implemented for {0}.", 172 getClass().getSimpleName()); 173 LOGGER.log(Level.WARNING, 174 "Falling back to slower full history retrieval."); 175 } 176 177 History history = getHistory(file); 178 179 if (sinceRevision == null) { 180 return history; 181 } 182 183 List<HistoryEntry> partial = new ArrayList<>(); 184 for (HistoryEntry entry : history.getHistoryEntries()) { 185 partial.add(entry); 186 if (sinceRevision.equals(entry.getRevision())) { 187 // Found revision right before the first one to return. 188 break; 189 } 190 } 191 192 removeAndVerifyOldestChangeset(partial, sinceRevision); 193 history.setHistoryEntries(partial); 194 return history; 195 } 196 197 /** 198 * Remove the oldest changeset from a list (assuming sorted with most recent 199 * changeset first) and verify that it is the changeset we expected to find 200 * there. 201 * 202 * @param entries a list of {@code HistoryEntry} objects 203 * @param revision the revision we expect the oldest entry to have 204 * @throws HistoryException if the oldest entry was not the one we expected 205 */ removeAndVerifyOldestChangeset(List<HistoryEntry> entries, String revision)206 void removeAndVerifyOldestChangeset(List<HistoryEntry> entries, String revision) throws HistoryException { 207 208 HistoryEntry entry = entries.isEmpty() ? null : entries.remove(entries.size() - 1); 209 210 // TODO We should check more thoroughly that the changeset is the one 211 // we expected it to be, since some SCMs may change the revision 212 // numbers so that identical revision numbers does not always mean 213 // identical changesets. We could for example get the cached changeset 214 // and compare more fields, like author and date. 215 if (entry == null || !revision.equals(entry.getRevision())) { 216 throw new HistoryException("Cached revision '" + revision 217 + "' not found in the repository " 218 + getDirectoryName()); 219 } 220 } 221 222 /** 223 * Gets the contents of a specific version of a named file, and copies 224 * into the specified target file. 225 * 226 * @param target a required target file which will be overwritten 227 * @param parent the name of the directory containing the file 228 * @param basename the name of the file to get 229 * @param rev the revision to get 230 * @return {@code true} if contents were found 231 * @throws java.io.IOException if an I/O error occurs 232 */ getHistoryGet(File target, String parent, String basename, String rev)233 public boolean getHistoryGet(File target, String parent, String basename, String rev) throws IOException { 234 try (FileOutputStream out = new FileOutputStream(target)) { 235 return getHistoryGet(out, parent, basename, rev); 236 } 237 } 238 239 /** 240 * Gets an {@link InputStream} of the contents of a specific version of a 241 * named file. 242 * @param parent the name of the directory containing the file 243 * @param basename the name of the file to get 244 * @param rev the revision to get 245 * @return a defined instance if contents were found; or else {@code null} 246 */ 247 @Nullable getHistoryGet(String parent, String basename, String rev)248 public InputStream getHistoryGet(String parent, String basename, String rev) { 249 ByteArrayOutputStream out = new ByteArrayOutputStream(); 250 if (getHistoryGet(out, parent, basename, rev)) { 251 return new ByteArrayInputStream(out.toByteArray()); 252 } 253 return null; 254 } 255 256 /** 257 * Subclasses must override to get the contents of a specific version of a 258 * named file, and copy to the specified {@code sink}. 259 * 260 * @param out a defined instance of OutputStream 261 * @param parent the name of the directory containing the file 262 * @param basename the name of the file to get 263 * @param rev the revision to get 264 * @return a value indicating if the get was successful. 265 */ getHistoryGet(OutputStream out, String parent, String basename, String rev)266 abstract boolean getHistoryGet(OutputStream out, String parent, String basename, String rev); 267 268 /** 269 * Checks whether this parser can annotate files. 270 * 271 * @param file file to check 272 * @return <code>true</code> if annotation is supported 273 */ fileHasAnnotation(File file)274 abstract boolean fileHasAnnotation(File file); 275 276 /** 277 * Returns if this repository tags only files changed in last commit, i.e. 278 * if we need to prepare list of repository-wide tags prior to creation of file history entries. 279 * 280 * @return True if we need tag list creation prior to file parsing, false by default. 281 */ hasFileBasedTags()282 boolean hasFileBasedTags() { 283 return false; 284 } 285 getTagList()286 TreeSet<TagEntry> getTagList() { 287 return this.tagList; 288 } 289 290 /** 291 * Assign tags to changesets they represent. The complete list of tags must 292 * be pre-built using {@code getTagList()}. Then this function squeezes all 293 * tags to changesets which actually exist in the history of given file. 294 * Must be implemented repository-specific. 295 * 296 * @see #getTagList 297 * @param hist History object we want to assign tags to. 298 */ assignTagsInHistory(History hist)299 void assignTagsInHistory(History hist) { 300 if (hist == null) { 301 return; 302 } 303 304 if (this.getTagList() == null) { 305 if (RuntimeEnvironment.getInstance().isIndexer()) { 306 throw new IllegalStateException("getTagList() is null"); 307 } else { 308 return; 309 } 310 } 311 312 Iterator<TagEntry> it = this.getTagList().descendingIterator(); 313 TagEntry lastTagEntry = null; 314 for (HistoryEntry ent : hist.getHistoryEntries()) { 315 // Assign all tags created since the last revision 316 // TODO: is there better way to do this? We need to "repeat" 317 // last element returned by call to next() 318 while (lastTagEntry != null || it.hasNext()) { 319 if (lastTagEntry == null) { 320 lastTagEntry = it.next(); 321 } 322 if (lastTagEntry.compareTo(ent) >= 0) { 323 hist.addTags(ent, lastTagEntry.getTags()); 324 } else { 325 break; 326 } 327 if (it.hasNext()) { 328 lastTagEntry = it.next(); 329 } else { 330 lastTagEntry = null; 331 } 332 } 333 } 334 } 335 336 /** 337 * Create internal list of all tags in this repository. 338 * 339 * @param directory directory of the repository 340 * @param cmdType command timeout type 341 */ buildTagList(File directory, CommandTimeoutType cmdType)342 protected void buildTagList(File directory, CommandTimeoutType cmdType) { 343 this.tagList = null; 344 } 345 346 /** 347 * Annotate the specified revision of a file. 348 * 349 * @param file the file to annotate 350 * @param revision revision of the file. Either {@code null} or a non-empty 351 * string. 352 * @return an <code>Annotation</code> object 353 * @throws java.io.IOException if an error occurs 354 */ annotate(File file, String revision)355 abstract Annotation annotate(File file, String revision) throws IOException; 356 357 /** 358 * Return revision for annotate view. 359 * 360 * @param historyRevision full revision 361 * @return revision string suitable for matching into annotation 362 */ getRevisionForAnnotate(String historyRevision)363 protected String getRevisionForAnnotate(String historyRevision) { 364 return historyRevision; 365 } 366 doCreateCache(HistoryCache cache, String sinceRevision, File directory)367 protected void doCreateCache(HistoryCache cache, String sinceRevision, File directory) throws HistoryException { 368 History history = getHistory(directory, sinceRevision); 369 finishCreateCache(cache, history, null); 370 } 371 372 /** 373 * Create a history log cache for all files in this repository. 374 * {@code getHistory()} is used to fetch the history for the entire 375 * repository. If {@code hasHistoryForDirectories()} returns {@code false}, 376 * this method is a no-op. 377 * 378 * @param cache the cache instance in which to store the history log 379 * @param sinceRevision if non-null, incrementally update the cache with all 380 * revisions after the specified revision; otherwise, create the full 381 * history starting with the initial revision 382 * 383 * @throws HistoryException on error 384 */ createCache(HistoryCache cache, String sinceRevision)385 final void createCache(HistoryCache cache, String sinceRevision) throws HistoryException { 386 387 if (!isWorking()) { 388 return; 389 } 390 391 // If it is not possible to get history for a directory, we can't create the cache 392 // this way. Just give up and return. 393 if (!hasHistoryForDirectories()) { 394 LOGGER.log(Level.INFO, 395 "Skipping creation of history cache for {0}, since retrieval " 396 + "of history for directories is not implemented for this " 397 + "repository type.", getDirectoryName()); 398 return; 399 } 400 401 File directory = new File(getDirectoryName()); 402 403 doCreateCache(cache, sinceRevision, directory); 404 405 LOGGER.log(Level.FINE, "Done storing history cache for repository {0}", getDirectoryName()); 406 } 407 408 /** 409 * Actually store the history in history cache. 410 * @param cache history cache object 411 * @param history history to store 412 * @param tillRevision end revision (matters only for renamed files), can be null 413 * @throws HistoryException on error 414 */ finishCreateCache(HistoryCache cache, History history, String tillRevision)415 void finishCreateCache(HistoryCache cache, History history, String tillRevision) throws HistoryException { 416 // We need to refresh list of tags for incremental reindex. 417 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 418 if (env.isTagsEnabled() && this.hasFileBasedTags()) { 419 this.buildTagList(new File(this.getDirectoryName()), CommandTimeoutType.INDEXER); 420 } 421 422 if (history != null) { 423 cache.store(history, this, tillRevision); 424 } 425 } 426 427 /** 428 * Check if this it the right repository type for the given file. 429 * 430 * @param file File to check if this is a repository for. 431 * @param cmdType command timeout type 432 * @return true if this is the correct repository for this file/directory. 433 */ isRepositoryFor(File file, CommandTimeoutType cmdType)434 abstract boolean isRepositoryFor(File file, CommandTimeoutType cmdType); 435 isRepositoryFor(File file)436 public final boolean isRepositoryFor(File file) { 437 return isRepositoryFor(file, CommandTimeoutType.INDEXER); 438 } 439 440 /** 441 * Determine parent of this repository. 442 */ determineParent(CommandTimeoutType cmdType)443 abstract String determineParent(CommandTimeoutType cmdType) throws IOException; 444 445 /** 446 * Determine parent of this repository. 447 * @return parent 448 * @throws java.io.IOException I/O exception 449 */ determineParent()450 public final String determineParent() throws IOException { 451 return determineParent(CommandTimeoutType.INDEXER); 452 } 453 454 /** 455 * Determine branch of this repository. 456 */ determineBranch(CommandTimeoutType cmdType)457 abstract String determineBranch(CommandTimeoutType cmdType) throws IOException; 458 459 /** 460 * Determine branch of this repository. 461 * @return branch 462 * @throws java.io.IOException I/O exception 463 */ determineBranch()464 public final String determineBranch() throws IOException { 465 return determineBranch(CommandTimeoutType.INDEXER); 466 } 467 468 /** 469 * Get list of ignored files for this repository. 470 * @return list of strings 471 */ getIgnoredFiles()472 public List<String> getIgnoredFiles() { 473 return ignoredFiles; 474 } 475 476 /** 477 * Get list of ignored directories for this repository. 478 * @return list of strings 479 */ getIgnoredDirs()480 public List<String> getIgnoredDirs() { 481 return ignoredDirs; 482 } 483 484 /** 485 * Determine and return the current version of the repository. 486 * 487 * This operation is considered "heavy" so this function should not be 488 * called on every web request. 489 * 490 * @param cmdType command timeout type 491 * @return the version 492 * @throws IOException if I/O exception occurred 493 */ determineCurrentVersion(CommandTimeoutType cmdType)494 abstract String determineCurrentVersion(CommandTimeoutType cmdType) throws IOException; 495 determineCurrentVersion()496 public final String determineCurrentVersion() throws IOException { 497 return determineCurrentVersion(CommandTimeoutType.INDEXER); 498 } 499 500 /** 501 * Returns true if this repository supports sub repositories (a.k.a. 502 * forests). 503 * 504 * @return true if this repository supports sub repositories 505 */ 506 @SuppressWarnings("PMD.EmptyMethodInAbstractClassShouldBeAbstract") supportsSubRepositories()507 boolean supportsSubRepositories() { 508 return false; 509 } 510 511 /** 512 * Subclasses can override to get a value indicating that a repository implementation is nestable. 513 * @return {@code false} 514 */ isNestable()515 boolean isNestable() { 516 return false; 517 } 518 getDateFormat()519 private DateFormat getDateFormat() { 520 return new RepositoryDateFormat(); 521 } 522 523 /** 524 * Format the given date according to the output format. 525 * 526 * @param date the date 527 * @return the string representing the formatted date 528 * @see #OUTPUT_DATE_FORMAT 529 */ format(Date date)530 public static String format(Date date) { 531 synchronized (OUTPUT_DATE_FORMAT) { 532 return OUTPUT_DATE_FORMAT.format(date); 533 } 534 } 535 536 /** 537 * Parse the given string as a date object with the repository date formats. 538 * 539 * @param dateString the string representing the date 540 * @return the instance of a date 541 * @throws ParseException when the string can not be parsed correctly 542 */ parse(String dateString)543 public Date parse(String dateString) throws ParseException { 544 final DateFormat format = getDateFormat(); 545 synchronized (format) { 546 return format.parse(dateString); 547 } 548 } 549 checkCmd(String... args)550 static Boolean checkCmd(String... args) { 551 Executor exec = new Executor(args); 552 return exec.exec(false) == 0; 553 } 554 getCommand(Class<? extends Repository> repoClass, String propertyKey, String fallbackCommand)555 protected static String getCommand(Class<? extends Repository> repoClass, String propertyKey, String fallbackCommand) { 556 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 557 String className = repoClass.getCanonicalName(); 558 String command = env.getRepoCmd(className); 559 if (command == null) { 560 command = System.getProperty(propertyKey, fallbackCommand); 561 env.setRepoCmd(className, command); 562 } 563 return command; 564 } 565 566 /** 567 * Set the name of the external client command that should be used to access 568 * the repository wrt. the given parameters. Does nothing, if this 569 * repository's <var>RepoCommand</var> has already been set (i.e. has a 570 * non-{@code null} value). 571 * 572 * @param propertyKey property key to lookup the corresponding system 573 * property. 574 * @param fallbackCommand the command to use, if lookup fails. 575 * @return the command to use. 576 * @see #RepoCommand 577 */ ensureCommand(String propertyKey, String fallbackCommand)578 protected String ensureCommand(String propertyKey, String fallbackCommand) { 579 if (RepoCommand == null) { 580 RepoCommand = getCommand(this.getClass(), propertyKey, fallbackCommand); 581 } 582 583 return RepoCommand; 584 } 585 getRepoRelativePath(final File file)586 protected String getRepoRelativePath(final File file) 587 throws IOException { 588 589 String filename = file.getPath(); 590 String repoDirName = getDirectoryName(); 591 592 String abs = file.getCanonicalPath(); 593 if (abs.startsWith(repoDirName)) { 594 if (abs.length() > repoDirName.length()) { 595 filename = abs.substring(repoDirName.length() + 1); 596 } 597 } else { 598 abs = file.getAbsolutePath(); 599 if (abs.startsWith(repoDirName) && abs.length() > 600 repoDirName.length()) { 601 filename = abs.substring(repoDirName.length() + 1); 602 } 603 } 604 return filename; 605 } 606 607 /** 608 * Copies all bytes from {@code in} to the {@code sink}. 609 * @return the number of writes to {@code sink} 610 */ copyBytes(BufferSink sink, InputStream in)611 static int copyBytes(BufferSink sink, InputStream in) throws IOException { 612 byte[] buffer = new byte[8 * 1024]; 613 int iterations = 0; 614 int len; 615 while ((len = in.read(buffer)) != -1) { 616 if (len > 0) { 617 ++iterations; 618 sink.write(buffer, 0, len); 619 } 620 } 621 return iterations; 622 } 623 624 static class HistoryRevResult { 625 boolean success; 626 long iterations; 627 } 628 629 private class RepositoryDateFormat extends DateFormat { 630 private static final long serialVersionUID = -6951382723884436414L; 631 632 private final Locale locale = Locale.ENGLISH; 633 // NOTE: SimpleDateFormat is not thread-safe, lock must be held when used 634 private final SimpleDateFormat[] formatters = new SimpleDateFormat[datePatterns.length]; 635 636 { 637 // initialize date formatters 638 for (int i = 0; i < datePatterns.length; i++) { 639 formatters[i] = new SimpleDateFormat(datePatterns[i], locale); 640 /* 641 * TODO: the following would be nice - but currently it 642 * could break the compatibility with some repository dates 643 */ 644 // formatters[i].setLenient(false); 645 } 646 } 647 648 @Override format(Date date, StringBuffer toAppendTo, FieldPosition fieldPosition)649 public StringBuffer format(Date date, StringBuffer toAppendTo, FieldPosition fieldPosition) { 650 throw new UnsupportedOperationException("not implemented"); 651 } 652 653 @Override parse(String source)654 public Date parse(String source) throws ParseException { 655 ParseException head = null, tail = null; 656 for (SimpleDateFormat formatter : formatters) { 657 try { 658 return formatter.parse(source); 659 } catch (ParseException ex1) { 660 /* 661 * Adding all exceptions together to get some info in 662 * the logs. 663 */ 664 ex1 = new ParseException( 665 String.format("%s with format \"%s\" and locale \"%s\"", 666 ex1.getMessage(), 667 formatter.toPattern(), 668 locale), 669 ex1.getErrorOffset() 670 ); 671 if (head == null) { 672 head = tail = ex1; 673 } else { 674 tail.initCause(ex1); 675 tail = ex1; 676 } 677 } 678 } 679 throw head != null ? head : new ParseException(String.format("Unparseable date: \"%s\"", source), 0); 680 } 681 682 @Override parse(String source, ParsePosition pos)683 public Date parse(String source, ParsePosition pos) { 684 throw new UnsupportedOperationException("not implemented"); 685 } 686 } 687 } 688