1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved. 22 * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>. 23 */ 24 package org.opengrok.indexer.index; 25 26 import java.io.BufferedInputStream; 27 import java.io.BufferedWriter; 28 import java.io.File; 29 import java.io.FileInputStream; 30 import java.io.FileNotFoundException; 31 import java.io.FileOutputStream; 32 import java.io.IOException; 33 import java.io.InputStream; 34 import java.io.OutputStreamWriter; 35 import java.io.Writer; 36 import java.nio.file.Files; 37 import java.nio.file.Path; 38 import java.nio.file.Paths; 39 import java.util.ArrayList; 40 import java.util.Arrays; 41 import java.util.Comparator; 42 import java.util.HashSet; 43 import java.util.List; 44 import java.util.Map; 45 import java.util.Objects; 46 import java.util.Set; 47 import java.util.TreeMap; 48 import java.util.concurrent.CopyOnWriteArrayList; 49 import java.util.concurrent.CountDownLatch; 50 import java.util.concurrent.ExecutionException; 51 import java.util.concurrent.TimeUnit; 52 import java.util.concurrent.atomic.AtomicInteger; 53 import java.util.logging.Level; 54 import java.util.logging.Logger; 55 import java.util.stream.Collectors; 56 import java.util.zip.GZIPOutputStream; 57 58 import jakarta.ws.rs.client.ClientBuilder; 59 import jakarta.ws.rs.client.Entity; 60 import jakarta.ws.rs.core.Response; 61 import org.apache.lucene.analysis.Analyzer; 62 import org.apache.lucene.analysis.standard.StandardAnalyzer; 63 import org.apache.lucene.document.DateTools; 64 import org.apache.lucene.document.Document; 65 import org.apache.lucene.document.Field; 66 import org.apache.lucene.index.DirectoryReader; 67 import org.apache.lucene.index.IndexReader; 68 import org.apache.lucene.index.IndexWriter; 69 import org.apache.lucene.index.IndexWriterConfig; 70 import org.apache.lucene.index.IndexWriterConfig.OpenMode; 71 import org.apache.lucene.index.IndexableField; 72 import org.apache.lucene.index.MultiTerms; 73 import org.apache.lucene.index.PostingsEnum; 74 import org.apache.lucene.index.Term; 75 import org.apache.lucene.index.Terms; 76 import org.apache.lucene.index.TermsEnum; 77 import org.apache.lucene.queryparser.classic.ParseException; 78 import org.apache.lucene.search.DocIdSetIterator; 79 import org.apache.lucene.search.IndexSearcher; 80 import org.apache.lucene.search.Query; 81 import org.apache.lucene.search.TopDocs; 82 import org.apache.lucene.store.AlreadyClosedException; 83 import org.apache.lucene.store.FSDirectory; 84 import org.apache.lucene.store.LockFactory; 85 import org.apache.lucene.store.NativeFSLockFactory; 86 import org.apache.lucene.store.NoLockFactory; 87 import org.apache.lucene.store.SimpleFSLockFactory; 88 import org.apache.lucene.util.BytesRef; 89 import org.jetbrains.annotations.NotNull; 90 import org.jetbrains.annotations.Nullable; 91 import org.jetbrains.annotations.VisibleForTesting; 92 import org.opengrok.indexer.analysis.AbstractAnalyzer; 93 import org.opengrok.indexer.analysis.AnalyzerFactory; 94 import org.opengrok.indexer.analysis.AnalyzerGuru; 95 import org.opengrok.indexer.analysis.Ctags; 96 import org.opengrok.indexer.analysis.Definitions; 97 import org.opengrok.indexer.analysis.NullableNumLinesLOC; 98 import org.opengrok.indexer.analysis.NumLinesLOC; 99 import org.opengrok.indexer.configuration.PathAccepter; 100 import org.opengrok.indexer.configuration.Project; 101 import org.opengrok.indexer.configuration.RuntimeEnvironment; 102 import org.opengrok.indexer.history.FileCollector; 103 import org.opengrok.indexer.history.HistoryGuru; 104 import org.opengrok.indexer.history.Repository; 105 import org.opengrok.indexer.history.RepositoryInfo; 106 import org.opengrok.indexer.history.RepositoryWithHistoryTraversal; 107 import org.opengrok.indexer.logger.LoggerFactory; 108 import org.opengrok.indexer.search.QueryBuilder; 109 import org.opengrok.indexer.util.ForbiddenSymlinkException; 110 import org.opengrok.indexer.util.IOUtils; 111 import org.opengrok.indexer.util.ObjectPool; 112 import org.opengrok.indexer.util.Progress; 113 import org.opengrok.indexer.util.Statistics; 114 import org.opengrok.indexer.util.TandemPath; 115 import org.opengrok.indexer.web.Util; 116 117 import static org.opengrok.indexer.index.IndexerUtil.getWebAppHeaders; 118 import static org.opengrok.indexer.web.ApiUtils.waitForAsyncApi; 119 120 /** 121 * This class is used to create / update the index databases. Currently, we use 122 * one index database per project. 123 * 124 * @author Trond Norbye 125 * @author Lubos Kosco , update for lucene 4.x , 5.x 126 */ 127 public class IndexDatabase { 128 129 private static final Logger LOGGER = LoggerFactory.getLogger(IndexDatabase.class); 130 131 private static final Comparator<File> FILENAME_COMPARATOR = Comparator.comparing(File::getName); 132 133 private static final Set<String> CHECK_FIELDS; 134 135 private static final Set<String> REVERT_COUNTS_FIELDS; 136 137 private static final Object INSTANCE_LOCK = new Object(); 138 139 /** 140 * Key is canonical path; Value is the first accepted, absolute path. Map 141 * is ordered by canonical length (ASC) and then canonical value (ASC). 142 * The map is accessed by a single-thread running indexDown(). 143 */ 144 private final Map<String, IndexedSymlink> indexedSymlinks = new TreeMap<>( 145 Comparator.comparingInt(String::length).thenComparing(o -> o)); 146 147 private final Project project; 148 private FSDirectory indexDirectory; 149 private IndexReader reader; 150 private IndexWriter writer; 151 private IndexAnalysisSettings3 settings; 152 private PendingFileCompleter completer; 153 private NumLinesLOCAggregator countsAggregator; 154 private TermsEnum uidIter; 155 private PostingsEnum postsIter; 156 private PathAccepter pathAccepter; 157 private AnalyzerGuru analyzerGuru; 158 private File xrefDir; 159 private boolean interrupted; 160 private CopyOnWriteArrayList<IndexChangedListener> listeners; 161 private File dirtyFile; 162 private final Object lock = new Object(); 163 private boolean dirty; 164 private boolean running; 165 private boolean isCountingDeltas; 166 private boolean isWithDirectoryCounts; 167 private List<String> directories; 168 private LockFactory lockfact; 169 private final BytesRef emptyBR = new BytesRef(""); 170 171 // Directory where we store indexes 172 public static final String INDEX_DIR = "index"; 173 public static final String XREF_DIR = "xref"; 174 public static final String SUGGESTER_DIR = "suggester"; 175 176 private final IndexDownArgsFactory indexDownArgsFactory; 177 178 /** 179 * Create a new instance of the Index Database. Use this constructor if you 180 * don't use any projects 181 * 182 * @throws java.io.IOException if an error occurs while creating directories 183 */ IndexDatabase()184 public IndexDatabase() throws IOException { 185 this(null); 186 } 187 188 /** 189 * Create a new instance of an Index Database for a given project. 190 * 191 * @param project the project to create the database for 192 * @param factory {@link IndexDownArgsFactory} instance 193 * @throws java.io.IOException if an error occurs while creating directories 194 */ IndexDatabase(Project project, IndexDownArgsFactory factory)195 public IndexDatabase(Project project, IndexDownArgsFactory factory) throws IOException { 196 indexDownArgsFactory = factory; 197 this.project = project; 198 lockfact = NoLockFactory.INSTANCE; 199 initialize(); 200 } 201 202 @VisibleForTesting IndexDatabase(Project project)203 IndexDatabase(Project project) throws IOException { 204 this(project, new IndexDownArgsFactory()); 205 } 206 207 static { 208 CHECK_FIELDS = new HashSet<>(); 209 CHECK_FIELDS.add(QueryBuilder.TYPE); 210 211 REVERT_COUNTS_FIELDS = new HashSet<>(); 212 REVERT_COUNTS_FIELDS.add(QueryBuilder.D); 213 REVERT_COUNTS_FIELDS.add(QueryBuilder.PATH); 214 REVERT_COUNTS_FIELDS.add(QueryBuilder.NUML); 215 REVERT_COUNTS_FIELDS.add(QueryBuilder.LOC); 216 } 217 218 /** 219 * Update the index database for all the projects. 220 * 221 * @param listener where to signal the changes to the database 222 * @throws IOException if an error occurs 223 */ updateAll(IndexChangedListener listener)224 static CountDownLatch updateAll(IndexChangedListener listener) throws IOException { 225 226 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 227 List<IndexDatabase> dbs = new ArrayList<>(); 228 229 if (env.hasProjects()) { 230 for (Project project : env.getProjectList()) { 231 dbs.add(new IndexDatabase(project)); 232 } 233 } else { 234 dbs.add(new IndexDatabase()); 235 } 236 237 IndexerParallelizer parallelizer = RuntimeEnvironment.getInstance().getIndexerParallelizer(); 238 CountDownLatch latch = new CountDownLatch(dbs.size()); 239 for (IndexDatabase d : dbs) { 240 final IndexDatabase db = d; 241 if (listener != null) { 242 db.addIndexChangedListener(listener); 243 } 244 245 parallelizer.getFixedExecutor().submit(() -> { 246 try { 247 db.update(); 248 } catch (Throwable e) { 249 LOGGER.log(Level.SEVERE, 250 String.format("Problem updating index database in directory %s: ", 251 db.indexDirectory.getDirectory()), e); 252 } finally { 253 latch.countDown(); 254 } 255 }); 256 } 257 return latch; 258 } 259 260 /** 261 * Update the index database for a number of sub-directories. 262 * 263 * @param listener where to signal the changes to the database 264 * @param paths list of paths to be indexed 265 */ update(IndexChangedListener listener, List<String> paths)266 public static void update(IndexChangedListener listener, List<String> paths) { 267 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 268 IndexerParallelizer parallelizer = env.getIndexerParallelizer(); 269 List<IndexDatabase> dbs = new ArrayList<>(); 270 271 for (String path : paths) { 272 Project project = Project.getProject(path); 273 if (project == null && env.hasProjects()) { 274 LOGGER.log(Level.WARNING, "Could not find a project for \"{0}\"", path); 275 } else { 276 IndexDatabase db; 277 278 try { 279 if (project == null) { 280 db = new IndexDatabase(); 281 } else { 282 db = new IndexDatabase(project); 283 } 284 285 int idx = dbs.indexOf(db); 286 if (idx != -1) { 287 db = dbs.get(idx); 288 } 289 290 if (db.addDirectory(path)) { 291 if (idx == -1) { 292 dbs.add(db); 293 } 294 } else { 295 LOGGER.log(Level.WARNING, "Directory does not exist \"{0}\" .", path); 296 } 297 } catch (IOException e) { 298 LOGGER.log(Level.WARNING, "An error occurred while updating index", e); 299 300 } 301 } 302 303 for (final IndexDatabase db : dbs) { 304 db.addIndexChangedListener(listener); 305 parallelizer.getFixedExecutor().submit(() -> { 306 try { 307 db.update(); 308 } catch (Throwable e) { 309 LOGGER.log(Level.SEVERE, "An error occurred while updating index", e); 310 } 311 }); 312 } 313 } 314 } 315 316 @SuppressWarnings("PMD.CollapsibleIfStatements") initialize()317 private void initialize() throws IOException { 318 synchronized (INSTANCE_LOCK) { 319 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 320 File indexDir = new File(env.getDataRootFile(), INDEX_DIR); 321 if (project != null) { 322 indexDir = new File(indexDir, project.getPath()); 323 } 324 325 if (!indexDir.exists() && !indexDir.mkdirs()) { 326 // to avoid race conditions, just recheck.. 327 if (!indexDir.exists()) { 328 throw new FileNotFoundException("Failed to create root directory [" + indexDir.getAbsolutePath() + "]"); 329 } 330 } 331 332 lockfact = pickLockFactory(env); 333 indexDirectory = FSDirectory.open(indexDir.toPath(), lockfact); 334 pathAccepter = env.getPathAccepter(); 335 analyzerGuru = new AnalyzerGuru(); 336 xrefDir = new File(env.getDataRootFile(), XREF_DIR); 337 listeners = new CopyOnWriteArrayList<>(); 338 dirtyFile = new File(indexDir, "dirty"); 339 dirty = dirtyFile.exists(); 340 directories = new ArrayList<>(); 341 } 342 } 343 344 /** 345 * By default the indexer will traverse all directories in the project. If 346 * you add directories with this function update will just process the 347 * specified directories. 348 * 349 * @param dir The directory to scan 350 * @return <code>true</code> if the file is added, false otherwise 351 */ 352 @SuppressWarnings("PMD.UseStringBufferForStringAppends") addDirectory(String dir)353 public boolean addDirectory(String dir) { 354 String directory = dir; 355 if (directory.startsWith("\\")) { 356 directory = directory.replace('\\', '/'); 357 } else if (directory.charAt(0) != '/') { 358 directory = "/" + directory; 359 } 360 File file = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), directory); 361 if (file.exists()) { 362 directories.add(directory); 363 return true; 364 } 365 return false; 366 } 367 showFileCount(String dir, IndexDownArgs args)368 private void showFileCount(String dir, IndexDownArgs args) { 369 if (RuntimeEnvironment.getInstance().isPrintProgress()) { 370 LOGGER.log(Level.INFO, String.format("Need to process: %d files for %s", args.curCount, dir)); 371 } 372 } 373 markProjectIndexed(Project project)374 private void markProjectIndexed(Project project) { 375 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 376 377 // Successfully indexed the project. The message is sent even if 378 // the project's isIndexed() is true because it triggers RepositoryInfo 379 // refresh. 380 if (project == null) { 381 return; 382 } 383 384 // Also need to store the correct value in configuration 385 // when indexer writes it to a file. 386 project.setIndexed(true); 387 388 if (env.getConfigURI() == null) { 389 return; 390 } 391 392 Response response; 393 try { 394 response = ClientBuilder.newBuilder().connectTimeout(env.getConnectTimeout(), TimeUnit.SECONDS).build() 395 .target(env.getConfigURI()) 396 .path("api") 397 .path("v1") 398 .path("projects") 399 .path(Util.uriEncode(project.getName())) 400 .path("indexed") 401 .request() 402 .headers(getWebAppHeaders()) 403 .put(Entity.text("")); 404 } catch (RuntimeException e) { 405 LOGGER.log(Level.WARNING, String.format("Could not notify the webapp that project %s was indexed", 406 project), e); 407 return; 408 } 409 410 if (response.getStatus() == Response.Status.ACCEPTED.getStatusCode()) { 411 try { 412 response = waitForAsyncApi(response); 413 } catch (InterruptedException e) { 414 LOGGER.log(Level.WARNING, "interrupted while waiting for API response", e); 415 } 416 } 417 418 if (response.getStatusInfo().getFamily() != Response.Status.Family.SUCCESSFUL) { 419 LOGGER.log(Level.WARNING, "Could not notify the webapp that project {0} was indexed: {1}", 420 new Object[] {project, response}); 421 } 422 } 423 getRepositoriesForProject(Project project)424 private static List<Repository> getRepositoriesForProject(Project project) { 425 List<Repository> repositoryList = new ArrayList<>(); 426 427 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 428 List<RepositoryInfo> repositoryInfoList = env.getProjectRepositoriesMap().get(project); 429 430 if (repositoryInfoList != null) { 431 for (RepositoryInfo repositoryInfo : repositoryInfoList) { 432 Repository repository = HistoryGuru.getInstance().getRepository(new File(repositoryInfo.getDirectoryName())); 433 if (repository != null) { 434 repositoryList.add(repository); 435 } 436 } 437 } 438 439 return repositoryList; 440 } 441 442 /** 443 * @return whether the repositories of given project are ready for history based reindex 444 */ isReadyForHistoryBasedReindex()445 private boolean isReadyForHistoryBasedReindex() { 446 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 447 448 // So far the history based reindex does not work without projects. 449 if (!env.hasProjects()) { 450 LOGGER.log(Level.FINEST, "projects are disabled, will be indexed by directory traversal."); 451 return false; 452 } 453 454 if (project == null) { 455 LOGGER.log(Level.FINEST, "no project, will be indexed by directory traversal."); 456 return false; 457 } 458 459 // History needs to be enabled for the history cache to work (see the comment below). 460 if (!project.isHistoryEnabled()) { 461 LOGGER.log(Level.FINEST, "history is disabled, will be indexed by directory traversal."); 462 return false; 463 } 464 465 // History cache is necessary to get the last indexed revision for given repository. 466 if (!env.isHistoryCache()) { 467 LOGGER.log(Level.FINEST, "history cache is disabled, will be indexed by directory traversal."); 468 return false; 469 } 470 471 // Per project tunable can override the global tunable, therefore env.isHistoryBasedReindex() is not checked. 472 if (!project.isHistoryBasedReindex()) { 473 LOGGER.log(Level.FINEST, "history-based reindex is disabled, will be indexed by directory traversal."); 474 return false; 475 } 476 477 /* 478 * Check that the index is present for this project. 479 * In case of the initial indexing, the traversal of all changesets would most likely be counterproductive, 480 * assuming traversal of directory tree is cheaper than getting the files from SCM history 481 * in such case. 482 */ 483 try { 484 if (getNumFiles() == 0) { 485 LOGGER.log(Level.FINEST, "zero number of documents for project {0}, " + 486 "will be indexed by directory traversal.", project); 487 return false; 488 } 489 } catch (IOException e) { 490 LOGGER.log(Level.FINEST, "failed to get number of documents for project {0}," + 491 "will be indexed by directory traversal.", project); 492 return false; 493 } 494 495 // If there was no change to any of the repositories of the project, a FileCollector instance will be returned 496 // however the list of files therein will be empty which is legitimate situation (no change of the project). 497 // Only in a case where getFileCollector() returns null (hinting at something went wrong), 498 // the file based traversal should be done. 499 if (env.getFileCollector(project.getName()) == null) { 500 LOGGER.log(Level.FINEST, "no file collector for project {0}, will be indexed by directory traversal.", 501 project); 502 return false; 503 } 504 505 List<Repository> repositories = getRepositoriesForProject(project); 506 // Projects without repositories have to be indexed using indexDown(). 507 if (repositories.isEmpty()) { 508 LOGGER.log(Level.FINEST, "project {0} has no repositories, will be indexed by directory traversal.", 509 project); 510 return false; 511 } 512 513 for (Repository repository : repositories) { 514 if (!isReadyForHistoryBasedReindex(repository)) { 515 return false; 516 } 517 } 518 519 // Here it is assumed there are no files untracked by the repositories of this project. 520 return true; 521 } 522 523 /** 524 * @param repository Repository instance 525 * @return true if the repository can be used for history based reindex 526 */ 527 @VisibleForTesting isReadyForHistoryBasedReindex(Repository repository)528 boolean isReadyForHistoryBasedReindex(Repository repository) { 529 if (!repository.isHistoryEnabled()) { 530 LOGGER.log(Level.FINE, "history is disabled for {0}, " + 531 "the associated project {1} will be indexed using directory traversal", 532 new Object[]{repository, project}); 533 return false; 534 } 535 536 if (!repository.isHistoryBasedReindex()) { 537 LOGGER.log(Level.FINE, "history based reindex is disabled for {0}, " + 538 "the associated project {1} will be indexed using directory traversal", 539 new Object[]{repository, project}); 540 return false; 541 } 542 543 if (!(repository instanceof RepositoryWithHistoryTraversal)) { 544 LOGGER.log(Level.FINE, "project {0} has a repository {1} that does not support history traversal," + 545 "the project will be indexed using directory traversal.", 546 new Object[]{project, repository}); 547 return false; 548 } 549 550 return true; 551 } 552 553 /** 554 * Update the content of this index database. 555 * 556 * @throws IOException if an error occurs 557 */ update()558 public void update() throws IOException { 559 synchronized (lock) { 560 if (running) { 561 throw new IOException("Indexer already running!"); 562 } 563 running = true; 564 interrupted = false; 565 } 566 567 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 568 569 reader = null; 570 writer = null; 571 settings = null; 572 uidIter = null; 573 postsIter = null; 574 indexedSymlinks.clear(); 575 576 IOException finishingException = null; 577 try { 578 Analyzer analyzer = AnalyzerGuru.getAnalyzer(); 579 IndexWriterConfig iwc = new IndexWriterConfig(analyzer); 580 iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); 581 iwc.setRAMBufferSizeMB(env.getRamBufferSize()); 582 writer = new IndexWriter(indexDirectory, iwc); 583 writer.commit(); // to make sure index exists on the disk 584 completer = new PendingFileCompleter(); 585 586 if (directories.isEmpty()) { 587 if (project == null) { 588 directories.add(""); 589 } else { 590 directories.add(project.getPath()); 591 } 592 } 593 594 for (String dir : directories) { 595 File sourceRoot; 596 if ("".equals(dir)) { 597 sourceRoot = env.getSourceRootFile(); 598 } else { 599 sourceRoot = new File(env.getSourceRootFile(), dir); 600 } 601 602 dir = Util.fixPathIfWindows(dir); 603 604 String startUid = Util.path2uid(dir, ""); 605 reader = DirectoryReader.open(indexDirectory); // open existing index 606 countsAggregator = new NumLinesLOCAggregator(); 607 settings = readAnalysisSettings(); 608 if (settings == null) { 609 settings = new IndexAnalysisSettings3(); 610 } 611 Terms terms = null; 612 if (reader.numDocs() > 0) { 613 terms = MultiTerms.getTerms(reader, QueryBuilder.U); 614 615 NumLinesLOCAccessor countsAccessor = new NumLinesLOCAccessor(); 616 if (countsAccessor.hasStored(reader)) { 617 isWithDirectoryCounts = true; 618 isCountingDeltas = true; 619 } else { 620 boolean foundCounts = countsAccessor.register(countsAggregator, reader); 621 isWithDirectoryCounts = false; 622 isCountingDeltas = foundCounts; 623 if (!isCountingDeltas) { 624 LOGGER.info("Forcing reindexing to fully compute directory counts"); 625 } 626 } 627 } else { 628 isWithDirectoryCounts = false; 629 isCountingDeltas = false; 630 } 631 632 try { 633 if (terms != null) { 634 uidIter = terms.iterator(); 635 TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startUid)); //init uid 636 if (stat == TermsEnum.SeekStatus.END) { 637 uidIter = null; 638 LOGGER.log(Level.WARNING, 639 "Couldn''t find a start term for {0}, empty u field?", 640 startUid); 641 } 642 } 643 644 // The actual indexing happens in indexParallel(). Here we merely collect the files 645 // that need to be indexed and the files that should be removed. 646 IndexDownArgs args = indexDownArgsFactory.getIndexDownArgs(); 647 boolean usedHistory = getIndexDownArgs(dir, sourceRoot, args); 648 649 // Traverse the trailing terms. This needs to be done before indexParallel() because 650 // in some cases it can add items to the args parameter. 651 processTrailingTerms(startUid, usedHistory, args); 652 653 args.curCount = 0; 654 Statistics elapsed = new Statistics(); 655 LOGGER.log(Level.INFO, "Starting indexing of directory {0}", dir); 656 indexParallel(dir, args); 657 elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir), 658 "indexer.db.directory.index"); 659 660 /* 661 * As a signifier that #Lines/LOC are comprehensively 662 * stored so that later calculation is in deltas mode, we 663 * need at least one D-document saved. For a repo with only 664 * non-code files, however, no true #Lines/LOC will have 665 * been saved. Subsequent re-indexing will do more work 666 * than necessary (until a source code file is placed). We 667 * can record zeroes for a fake file under the root to get 668 * a D-document even for this special repo situation. 669 * 670 * Metrics are aggregated for directories up to the root, 671 * so it suffices to put the fake directly under the root. 672 */ 673 if (!isWithDirectoryCounts) { 674 final String ROOT_FAKE_FILE = "/.OpenGrok_fake_file"; 675 countsAggregator.register(new NumLinesLOC(ROOT_FAKE_FILE, 0, 0)); 676 } 677 NumLinesLOCAccessor countsAccessor = new NumLinesLOCAccessor(); 678 countsAccessor.store(writer, reader, countsAggregator, 679 isWithDirectoryCounts && isCountingDeltas); 680 681 markProjectIndexed(project); 682 } finally { 683 reader.close(); 684 } 685 } 686 687 // The RuntimeException thrown from the block above can prevent the writing from completing. 688 // This is deliberate. 689 try { 690 finishWriting(); 691 } catch (IOException e) { 692 finishingException = e; 693 } 694 } catch (RuntimeException ex) { 695 LOGGER.log(Level.SEVERE, 696 "Failed with unexpected RuntimeException", ex); 697 throw ex; 698 } finally { 699 completer = null; 700 try { 701 if (writer != null) { 702 writer.close(); 703 } 704 } catch (IOException e) { 705 if (finishingException == null) { 706 finishingException = e; 707 } 708 LOGGER.log(Level.WARNING, 709 "An error occurred while closing writer", e); 710 } finally { 711 writer = null; 712 synchronized (lock) { 713 running = false; 714 } 715 } 716 } 717 718 if (finishingException != null) { 719 throw finishingException; 720 } 721 722 if (!isInterrupted() && isDirty()) { 723 if (env.isOptimizeDatabase()) { 724 optimize(); 725 } 726 env.setIndexTimestamp(); 727 } 728 } 729 processTrailingTerms(String startUid, boolean usedHistory, IndexDownArgs args)730 private void processTrailingTerms(String startUid, boolean usedHistory, IndexDownArgs args) throws IOException { 731 while (uidIter != null && uidIter.term() != null 732 && uidIter.term().utf8ToString().startsWith(startUid)) { 733 734 if (usedHistory) { 735 // Allow for forced reindex. For history based reindex the trailing terms 736 // correspond to the files that have not changed. Such files might need to be re-indexed 737 // if the index format changed. 738 String termPath = Util.uid2url(uidIter.term().utf8ToString()); 739 File termFile = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), termPath); 740 boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) && 741 checkSettings(termFile, termPath); 742 if (!matchOK) { 743 removeFile(false); 744 745 args.curCount++; 746 args.works.add(new IndexFileWork(termFile, termPath)); 747 } 748 } else { 749 // Remove data for the trailing terms that getIndexDownArgs() 750 // did not traverse. These correspond to the files that have been 751 // removed and have higher ordering than any present files. 752 removeFile(true); 753 } 754 755 BytesRef next = uidIter.next(); 756 if (next == null) { 757 uidIter = null; 758 } 759 } 760 } 761 762 /** 763 * @param dir directory path 764 * @param sourceRoot source root File object 765 * @param args {@link IndexDownArgs} instance (output) 766 * @return true if history was used to gather the {@code IndexDownArgs} 767 * @throws IOException on error 768 */ 769 @VisibleForTesting getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args)770 boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws IOException { 771 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 772 boolean historyBased = isReadyForHistoryBasedReindex(); 773 774 if (LOGGER.isLoggable(Level.INFO)) { 775 LOGGER.log(Level.INFO, String.format("Starting file collection using %s traversal for directory '%s'", 776 historyBased ? "history" : "file-system", dir)); 777 } 778 Statistics elapsed = new Statistics(); 779 if (historyBased) { 780 indexDownUsingHistory(env.getSourceRootFile(), args); 781 } else { 782 indexDown(sourceRoot, dir, args); 783 } 784 785 elapsed.report(LOGGER, String.format("Done file collection for directory '%s'", dir), 786 "indexer.db.collection"); 787 788 showFileCount(dir, args); 789 790 return historyBased; 791 } 792 793 /** 794 * Executes the first, serial stage of indexing, by going through set of files assembled from history. 795 * @param sourceRoot path to the source root (same as {@link RuntimeEnvironment#getSourceRootPath()}) 796 * @param args {@link IndexDownArgs} instance where the resulting files to be indexed will be stored 797 * @throws IOException on error 798 */ 799 @VisibleForTesting indexDownUsingHistory(File sourceRoot, IndexDownArgs args)800 void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws IOException { 801 802 FileCollector fileCollector = RuntimeEnvironment.getInstance().getFileCollector(project.getName()); 803 804 for (String path : fileCollector.getFiles()) { 805 File file = new File(sourceRoot, path); 806 processFileIncremental(args, file, path); 807 } 808 } 809 810 /** 811 * Optimize all index databases. 812 * 813 * @throws IOException if an error occurs 814 */ optimizeAll()815 static CountDownLatch optimizeAll() throws IOException { 816 List<IndexDatabase> dbs = new ArrayList<>(); 817 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 818 IndexerParallelizer parallelizer = env.getIndexerParallelizer(); 819 if (env.hasProjects()) { 820 for (Project project : env.getProjectList()) { 821 dbs.add(new IndexDatabase(project)); 822 } 823 } else { 824 dbs.add(new IndexDatabase()); 825 } 826 827 CountDownLatch latch = new CountDownLatch(dbs.size()); 828 for (IndexDatabase d : dbs) { 829 final IndexDatabase db = d; 830 if (db.isDirty()) { 831 parallelizer.getFixedExecutor().submit(() -> { 832 try { 833 db.update(); 834 } catch (Throwable e) { 835 LOGGER.log(Level.SEVERE, 836 "Problem updating lucene index database: ", e); 837 } finally { 838 latch.countDown(); 839 } 840 }); 841 } 842 } 843 return latch; 844 } 845 846 /** 847 * Optimize the index database. 848 * @throws IOException I/O exception 849 */ optimize()850 public void optimize() throws IOException { 851 synchronized (lock) { 852 if (running) { 853 LOGGER.warning("Optimize terminated... Someone else is updating / optimizing it!"); 854 return; 855 } 856 running = true; 857 } 858 859 IndexWriter wrt = null; 860 IOException writerException = null; 861 try { 862 Statistics elapsed = new Statistics(); 863 String projectDetail = this.project != null ? " for project " + project.getName() : ""; 864 LOGGER.log(Level.INFO, "Optimizing the index{0}", projectDetail); 865 Analyzer analyzer = new StandardAnalyzer(); 866 IndexWriterConfig conf = new IndexWriterConfig(analyzer); 867 conf.setOpenMode(OpenMode.CREATE_OR_APPEND); 868 869 wrt = new IndexWriter(indexDirectory, conf); 870 wrt.forceMerge(1); // this is deprecated and not needed anymore 871 elapsed.report(LOGGER, String.format("Done optimizing index%s", projectDetail), 872 "indexer.db.optimize"); 873 synchronized (lock) { 874 if (dirtyFile.exists() && !dirtyFile.delete()) { 875 LOGGER.log(Level.FINE, "Failed to remove \"dirty-file\": {0}", 876 dirtyFile.getAbsolutePath()); 877 } 878 dirty = false; 879 } 880 } catch (IOException e) { 881 writerException = e; 882 LOGGER.log(Level.SEVERE, "ERROR: optimizing index", e); 883 } finally { 884 if (wrt != null) { 885 try { 886 wrt.close(); 887 } catch (IOException e) { 888 if (writerException == null) { 889 writerException = e; 890 } 891 LOGGER.log(Level.WARNING, 892 "An error occurred while closing writer", e); 893 } 894 } 895 synchronized (lock) { 896 running = false; 897 } 898 } 899 900 if (writerException != null) { 901 throw writerException; 902 } 903 } 904 isDirty()905 private boolean isDirty() { 906 synchronized (lock) { 907 return dirty; 908 } 909 } 910 setDirty()911 private void setDirty() { 912 synchronized (lock) { 913 try { 914 if (!dirty) { 915 if (!dirtyFile.createNewFile() && !dirtyFile.exists()) { 916 LOGGER.log(Level.FINE, 917 "Failed to create \"dirty-file\": {0}", 918 dirtyFile.getAbsolutePath()); 919 } 920 dirty = true; 921 } 922 } catch (IOException e) { 923 LOGGER.log(Level.FINE, "When creating dirty file: ", e); 924 } 925 } 926 } 927 whatXrefFile(String path, boolean compress)928 private File whatXrefFile(String path, boolean compress) { 929 String xrefPath = compress ? TandemPath.join(path, ".gz") : path; 930 return new File(xrefDir, xrefPath); 931 } 932 933 /** 934 * Queue the removal of xref file for given path. 935 * @param path path to file under source root 936 */ removeXrefFile(String path)937 private void removeXrefFile(String path) { 938 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 939 File xrefFile = whatXrefFile(path, env.isCompressXref()); 940 PendingFileDeletion pending = new PendingFileDeletion(xrefFile.getAbsolutePath()); 941 completer.add(pending); 942 } 943 removeHistoryFile(String path)944 private void removeHistoryFile(String path) { 945 HistoryGuru.getInstance().clearCacheFile(path); 946 } 947 948 /** 949 * Remove a stale file from the index database and potentially also from history cache, 950 * and queue the removal of the associated xref file. 951 * 952 * @param removeHistory if false, do not remove history cache for this file 953 * @throws java.io.IOException if an error occurs 954 */ removeFile(boolean removeHistory)955 private void removeFile(boolean removeHistory) throws IOException { 956 String path = Util.uid2url(uidIter.term().utf8ToString()); 957 958 for (IndexChangedListener listener : listeners) { 959 listener.fileRemove(path); 960 } 961 962 removeFileDocUid(path); 963 964 removeXrefFile(path); 965 966 if (removeHistory) { 967 removeHistoryFile(path); 968 } 969 970 setDirty(); 971 972 for (IndexChangedListener listener : listeners) { 973 listener.fileRemoved(path); 974 } 975 } 976 removeFileDocUid(String path)977 private void removeFileDocUid(String path) throws IOException { 978 979 // Determine if a reversal of counts is necessary, and execute if so. 980 if (isCountingDeltas) { 981 postsIter = uidIter.postings(postsIter); 982 while (postsIter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { 983 // Read a limited-fields version of the document. 984 Document doc = reader.document(postsIter.docID(), REVERT_COUNTS_FIELDS); 985 if (doc != null) { 986 decrementLOCforDoc(path, doc); 987 break; 988 } 989 } 990 } 991 992 writer.deleteDocuments(new Term(QueryBuilder.U, uidIter.term())); 993 } 994 decrementLOCforDoc(String path, Document doc)995 private void decrementLOCforDoc(String path, Document doc) { 996 NullableNumLinesLOC nullableCounts = NumLinesLOCUtil.read(doc); 997 if (nullableCounts.getNumLines() != null && nullableCounts.getLOC() != null) { 998 NumLinesLOC counts = new NumLinesLOC(path, 999 -nullableCounts.getNumLines(), 1000 -nullableCounts.getLOC()); 1001 countsAggregator.register(counts); 1002 } 1003 } 1004 1005 /** 1006 * Add a file to the Lucene index (and generate a xref file). 1007 * 1008 * @param file The file to add 1009 * @param path The path to the file (from source root) 1010 * @param ctags a defined instance to use (only if its binary is not null) 1011 * @throws java.io.IOException if an error occurs 1012 * @throws InterruptedException if a timeout occurs 1013 */ addFile(File file, String path, Ctags ctags)1014 private void addFile(File file, String path, Ctags ctags) throws IOException, InterruptedException { 1015 1016 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 1017 AbstractAnalyzer fa = getAnalyzerFor(file, path); 1018 1019 for (IndexChangedListener listener : listeners) { 1020 listener.fileAdd(path, fa.getClass().getSimpleName()); 1021 } 1022 1023 ctags.setTabSize(project != null ? project.getTabSize() : 0); 1024 if (env.getCtagsTimeout() != 0) { 1025 ctags.setTimeout(env.getCtagsTimeout()); 1026 } 1027 fa.setCtags(ctags); 1028 fa.setCountsAggregator(countsAggregator); 1029 fa.setProject(Project.getProject(path)); 1030 fa.setScopesEnabled(env.isScopesEnabled()); 1031 fa.setFoldingEnabled(env.isFoldingEnabled()); 1032 1033 Document doc = new Document(); 1034 CountingWriter xrefOut = null; 1035 try { 1036 String xrefAbs = null; 1037 File transientXref = null; 1038 if (env.isGenerateHtml()) { 1039 xrefAbs = getXrefPath(path); 1040 transientXref = new File(TandemPath.join(xrefAbs, 1041 PendingFileCompleter.PENDING_EXTENSION)); 1042 xrefOut = newXrefWriter(path, transientXref, env.isCompressXref()); 1043 } 1044 1045 analyzerGuru.populateDocument(doc, file, path, fa, xrefOut); 1046 1047 // Avoid producing empty xref files. 1048 if (xrefOut != null && xrefOut.getCount() > 0) { 1049 PendingFileRenaming ren = new PendingFileRenaming(xrefAbs, 1050 transientXref.getAbsolutePath()); 1051 completer.add(ren); 1052 } else if (xrefOut != null) { 1053 LOGGER.log(Level.FINER, "xref for {0} would be empty, will remove", path); 1054 completer.add(new PendingFileDeletion(transientXref.toString())); 1055 } 1056 } catch (InterruptedException e) { 1057 LOGGER.log(Level.WARNING, "File ''{0}'' interrupted--{1}", 1058 new Object[]{path, e.getMessage()}); 1059 cleanupResources(doc); 1060 throw e; 1061 } catch (Exception e) { 1062 LOGGER.log(Level.INFO, 1063 "Skipped file ''{0}'' because the analyzer didn''t " 1064 + "understand it.", 1065 path); 1066 if (LOGGER.isLoggable(Level.FINE)) { 1067 LOGGER.log(Level.FINE, "Exception from analyzer " + 1068 fa.getClass().getName(), e); 1069 } 1070 cleanupResources(doc); 1071 return; 1072 } finally { 1073 fa.setCtags(null); 1074 fa.setCountsAggregator(null); 1075 if (xrefOut != null) { 1076 xrefOut.close(); 1077 } 1078 } 1079 1080 try { 1081 writer.addDocument(doc); 1082 } catch (Throwable t) { 1083 cleanupResources(doc); 1084 throw t; 1085 } 1086 1087 setDirty(); 1088 1089 for (IndexChangedListener listener : listeners) { 1090 listener.fileAdded(path, fa.getClass().getSimpleName()); 1091 } 1092 } 1093 getAnalyzerFor(File file, String path)1094 private AbstractAnalyzer getAnalyzerFor(File file, String path) 1095 throws IOException { 1096 try (InputStream in = new BufferedInputStream( 1097 new FileInputStream(file))) { 1098 return AnalyzerGuru.getAnalyzer(in, path); 1099 } 1100 } 1101 1102 /** 1103 * Do a best effort to clean up all resources allocated when populating 1104 * a Lucene document. On normal execution, these resources should be 1105 * closed automatically by the index writer once it's done with them, but 1106 * we may not get that far if something fails. 1107 * 1108 * @param doc the document whose resources to clean up 1109 */ cleanupResources(Document doc)1110 private static void cleanupResources(Document doc) { 1111 for (IndexableField f : doc) { 1112 // If the field takes input from a reader, close the reader. 1113 IOUtils.close(f.readerValue()); 1114 1115 // If the field takes input from a token stream, close the 1116 // token stream. 1117 if (f instanceof Field) { 1118 IOUtils.close(((Field) f).tokenStreamValue()); 1119 } 1120 } 1121 } 1122 1123 /** 1124 * Check if I should accept this file into the index database. 1125 * 1126 * @param file the file to check 1127 * @param ret defined instance whose {@code localRelPath} property will be 1128 * non-null afterward if and only if {@code file} is a symlink that targets 1129 * either a {@link Repository}-local filesystem object or the same object 1130 * as a previously-detected and allowed symlink. N.b. method will return 1131 * {@code false} if {@code ret.localRelPath} is set non-null. 1132 * @return a value indicating if {@code file} should be included in index 1133 */ accept(File file, AcceptSymlinkRet ret)1134 private boolean accept(File file, AcceptSymlinkRet ret) { 1135 ret.localRelPath = null; 1136 String absolutePath = file.getAbsolutePath(); 1137 1138 if (!pathAccepter.accept(file)) { 1139 return false; 1140 } 1141 1142 if (!file.canRead()) { 1143 LOGGER.log(Level.WARNING, "Could not read {0}", absolutePath); 1144 return false; 1145 } 1146 1147 try { 1148 Path absolute = Paths.get(absolutePath); 1149 if (Files.isSymbolicLink(absolute)) { 1150 File canonical = file.getCanonicalFile(); 1151 if (!absolutePath.equals(canonical.getPath()) && 1152 !acceptSymlink(absolute, canonical, ret)) { 1153 if (ret.localRelPath == null) { 1154 LOGGER.log(Level.FINE, "Skipped symlink ''{0}'' -> ''{1}''", 1155 new Object[] {absolutePath, canonical}); 1156 } 1157 return false; 1158 } 1159 } 1160 //below will only let go files and directories, anything else is considered special and is not added 1161 if (!file.isFile() && !file.isDirectory()) { 1162 LOGGER.log(Level.WARNING, "Ignored special file {0}", 1163 absolutePath); 1164 return false; 1165 } 1166 } catch (IOException exp) { 1167 LOGGER.log(Level.WARNING, "Failed to resolve name: {0}", 1168 absolutePath); 1169 LOGGER.log(Level.FINE, "Stack Trace: ", exp); 1170 } 1171 1172 if (file.isDirectory()) { 1173 // always accept directories so that their files can be examined 1174 return true; 1175 } 1176 1177 1178 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 1179 // Lookup history if indexing versioned files only. 1180 // Skip the lookup entirely (which is expensive) if unversioned files are allowed 1181 if (env.isIndexVersionedFilesOnly()) { 1182 if (HistoryGuru.getInstance().hasHistory(file)) { 1183 // versioned files should always be accepted 1184 return true; 1185 } 1186 LOGGER.log(Level.FINER, "not accepting unversioned {0}", absolutePath); 1187 return false; 1188 } 1189 // unversioned files are allowed 1190 return true; 1191 } 1192 1193 /** 1194 * Determines if {@code file} should be accepted into the index database. 1195 * @param parent parent of {@code file} 1196 * @param file directory object under consideration 1197 * @param ret defined instance whose {@code localRelPath} property will be 1198 * non-null afterward if and only if {@code file} is a symlink that targets 1199 * either a {@link Repository}-local filesystem object or the same object 1200 * as a previously-detected and allowed symlink. N.b. method will return 1201 * {@code false} if {@code ret.localRelPath} is set non-null. 1202 * @return a value indicating if {@code file} should be included in index 1203 */ accept(File parent, File file, AcceptSymlinkRet ret)1204 private boolean accept(File parent, File file, AcceptSymlinkRet ret) { 1205 ret.localRelPath = null; 1206 1207 try { 1208 File f1 = parent.getCanonicalFile(); 1209 File f2 = file.getCanonicalFile(); 1210 if (f1.equals(f2)) { 1211 LOGGER.log(Level.INFO, "Skipping links to itself...: {0} {1}", 1212 new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()}); 1213 return false; 1214 } 1215 1216 // Now, let's verify that it's not a link back up the chain... 1217 File t1 = f1; 1218 while ((t1 = t1.getParentFile()) != null) { 1219 if (f2.equals(t1)) { 1220 LOGGER.log(Level.INFO, "Skipping links to parent...: {0} {1}", 1221 new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()}); 1222 return false; 1223 } 1224 } 1225 1226 return accept(file, ret); 1227 } catch (IOException ex) { 1228 LOGGER.log(Level.WARNING, "Failed to resolve name: {0} {1}", 1229 new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()}); 1230 } 1231 return false; 1232 } 1233 1234 /** 1235 * Check if I should accept the path containing a symlink. 1236 * 1237 * @param absolute the path with a symlink to check 1238 * @param canonical the canonical file object 1239 * @param ret defined instance whose {@code localRelPath} property will be 1240 * non-null afterward if and only if {@code absolute} is a symlink that 1241 * targets either a {@link Repository}-local filesystem object or the same 1242 * object ({@code canonical}) as a previously-detected and allowed symlink. 1243 * N.b. method will return {@code false} if {@code ret.localRelPath} is set 1244 * non-null. 1245 * @return a value indicating if {@code file} should be included in index 1246 */ acceptSymlink(Path absolute, File canonical, AcceptSymlinkRet ret)1247 private boolean acceptSymlink(Path absolute, File canonical, AcceptSymlinkRet ret) { 1248 ret.localRelPath = null; 1249 1250 String absolute1 = absolute.toString(); 1251 String canonical1 = canonical.getPath(); 1252 boolean isCanonicalDir = canonical.isDirectory(); 1253 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 1254 IndexedSymlink indexed1; 1255 String absolute0; 1256 1257 if (isLocal(canonical1)) { 1258 if (!isCanonicalDir) { 1259 if (LOGGER.isLoggable(Level.FINEST)) { 1260 LOGGER.log(Level.FINEST, "Local {0} has symlink from {1}", 1261 new Object[] {canonical1, absolute1}); 1262 } 1263 /* 1264 * Always index symlinks to local files, but do not add to 1265 * indexedSymlinks for a non-directory. 1266 */ 1267 return true; 1268 } 1269 1270 /* 1271 * Do not index symlinks to local directories, because the 1272 * canonical target will be indexed on its own -- but relativize() 1273 * a path to be returned in ret so that a symlink can be replicated 1274 * in xref/. 1275 */ 1276 ret.localRelPath = absolute.getParent().relativize( 1277 canonical.toPath()).toString(); 1278 1279 // Try to put the prime absolute path into indexedSymlinks. 1280 try { 1281 String primeRelative = env.getPathRelativeToSourceRoot(canonical); 1282 absolute0 = env.getSourceRootPath() + primeRelative; 1283 } catch (ForbiddenSymlinkException | IOException e) { 1284 /* 1285 * This is not expected, as indexDown() would have operated on 1286 * the file already -- but we are forced to handle. 1287 */ 1288 LOGGER.log(Level.WARNING, String.format( 1289 "Unexpected error getting relative for %s", canonical), e); 1290 absolute0 = absolute1; 1291 } 1292 indexed1 = new IndexedSymlink(absolute0, canonical1, true); 1293 indexedSymlinks.put(canonical1, indexed1); 1294 return false; 1295 } 1296 1297 IndexedSymlink indexed0; 1298 if ((indexed0 = indexedSymlinks.get(canonical1)) != null) { 1299 if (absolute1.equals(indexed0.getAbsolute())) { 1300 return true; 1301 } 1302 1303 /* 1304 * Do not index symlinks to external directories already indexed 1305 * as linked elsewhere, because the canonical target will be 1306 * indexed already -- but relativize() a path to be returned in ret 1307 * so that this second symlink can be redone as a local 1308 * (non-external) symlink in xref/. 1309 */ 1310 ret.localRelPath = absolute.getParent().relativize( 1311 Paths.get(indexed0.getAbsolute())).toString(); 1312 1313 if (LOGGER.isLoggable(Level.FINEST)) { 1314 LOGGER.log(Level.FINEST, "External dir {0} has symlink from {1} after first {2}", 1315 new Object[] {canonical1, absolute1, indexed0.getAbsolute()}); 1316 } 1317 return false; 1318 } 1319 1320 /* 1321 * Iterate through indexedSymlinks, which is sorted so that shorter 1322 * canonical entries come first, to see if the new link is a child 1323 * canonically. 1324 */ 1325 for (IndexedSymlink a0 : indexedSymlinks.values()) { 1326 indexed0 = a0; 1327 if (!indexed0.isLocal() && canonical1.startsWith(indexed0.getCanonicalSeparated())) { 1328 absolute0 = indexed0.getAbsolute(); 1329 if (!isCanonicalDir) { 1330 if (LOGGER.isLoggable(Level.FINEST)) { 1331 LOGGER.log(Level.FINEST, 1332 "External file {0} has symlink from {1} under previous {2}", 1333 new Object[] {canonical1, absolute1, absolute0}); 1334 } 1335 // Do not add to indexedSymlinks for a non-directory. 1336 return true; 1337 } 1338 1339 /* 1340 * See above about redoing a sourceRoot symlink as a local 1341 * (non-external) symlink in xref/. 1342 */ 1343 Path abs0 = Paths.get(absolute0, canonical1.substring( 1344 indexed0.getCanonicalSeparated().length())); 1345 ret.localRelPath = absolute.getParent().relativize(abs0).toString(); 1346 1347 if (LOGGER.isLoggable(Level.FINEST)) { 1348 LOGGER.log(Level.FINEST, 1349 "External dir {0} has symlink from {1} under previous {2}", 1350 new Object[] {canonical1, absolute1, absolute0}); 1351 } 1352 return false; 1353 } 1354 } 1355 1356 Set<String> canonicalRoots = env.getCanonicalRoots(); 1357 for (String canonicalRoot : canonicalRoots) { 1358 if (canonical1.startsWith(canonicalRoot)) { 1359 if (LOGGER.isLoggable(Level.FINEST)) { 1360 LOGGER.log(Level.FINEST, "Allowed symlink {0} per canonical root {1}", 1361 new Object[] {absolute1, canonical1}); 1362 } 1363 if (isCanonicalDir) { 1364 indexed1 = new IndexedSymlink(absolute1, canonical1, false); 1365 indexedSymlinks.put(canonical1, indexed1); 1366 } 1367 return true; 1368 } 1369 } 1370 1371 Set<String> allowedSymlinks = env.getAllowedSymlinks(); 1372 for (String allowedSymlink : allowedSymlinks) { 1373 String allowedTarget; 1374 try { 1375 allowedTarget = new File(allowedSymlink).getCanonicalPath(); 1376 } catch (IOException e) { 1377 LOGGER.log(Level.FINE, "unresolvable symlink: {0}", allowedSymlink); 1378 continue; 1379 } 1380 /* 1381 * The following canonical check is sufficient because indexDown() 1382 * traverses top-down, and any intermediate symlinks would have 1383 * also been checked here for an allowed canonical match. This 1384 * technically means that if there is a set of redundant symlinks 1385 * with the same canonical target, then allowing one of the set 1386 * will allow all others in the set. 1387 */ 1388 if (canonical1.equals(allowedTarget)) { 1389 if (isCanonicalDir) { 1390 indexed1 = new IndexedSymlink(absolute1, canonical1, false); 1391 indexedSymlinks.put(canonical1, indexed1); 1392 } 1393 return true; 1394 } 1395 } 1396 return false; 1397 } 1398 1399 /** 1400 * Check if a file is local to the current project. If we don't have 1401 * projects, check if the file is in the source root. 1402 * 1403 * @param path the path to a file 1404 * @return true if the file is local to the current repository 1405 */ isLocal(String path)1406 private boolean isLocal(String path) { 1407 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 1408 String srcRoot = env.getSourceRootPath(); 1409 1410 if (path.startsWith(srcRoot + File.separator)) { 1411 if (env.hasProjects()) { 1412 String relPath = path.substring(srcRoot.length()); 1413 // If file is under the current project, then it's local. 1414 return project.equals(Project.getProject(relPath)); 1415 } else { 1416 // File is under source root, and we don't have projects, so 1417 // consider it local. 1418 return true; 1419 } 1420 } 1421 1422 return false; 1423 } 1424 handleSymlink(String path, AcceptSymlinkRet ret)1425 private void handleSymlink(String path, AcceptSymlinkRet ret) { 1426 /* 1427 * If ret.localRelPath is defined, then a symlink was detected but 1428 * not "accepted" to avoid redundancy with an already-accepted 1429 * canonical target. Set up for a deferred creation of a symlink 1430 * within xref/. 1431 */ 1432 if (ret.localRelPath != null) { 1433 File xrefPath = new File(xrefDir, path); 1434 PendingSymlinkage psym = new PendingSymlinkage(xrefPath.getAbsolutePath(), ret.localRelPath); 1435 completer.add(psym); 1436 } 1437 } 1438 1439 /** 1440 * Executes the first, serial stage of indexing, by recursively traversing the file system 1441 * and index alongside. 1442 * <p>Files at least are counted, and any deleted or updated files (based on 1443 * comparison to the Lucene index) are passed to 1444 * {@link #removeFile(boolean)}. New or updated files are noted for indexing. 1445 * @param dir the root indexDirectory to generate indexes for 1446 * @param parent path to parent directory 1447 * @param args arguments to control execution and for collecting a list of 1448 * files for indexing 1449 */ 1450 @VisibleForTesting indexDown(File dir, String parent, IndexDownArgs args)1451 void indexDown(File dir, String parent, IndexDownArgs args) throws IOException { 1452 1453 if (isInterrupted()) { 1454 return; 1455 } 1456 1457 AcceptSymlinkRet ret = new AcceptSymlinkRet(); 1458 if (!accept(dir, ret)) { 1459 handleSymlink(parent, ret); 1460 return; 1461 } 1462 1463 File[] files = dir.listFiles(); 1464 if (files == null) { 1465 LOGGER.log(Level.SEVERE, "Failed to get file listing for: {0}", 1466 dir.getPath()); 1467 return; 1468 } 1469 Arrays.sort(files, FILENAME_COMPARATOR); 1470 1471 for (File file : files) { 1472 String path = parent + File.separator + file.getName(); 1473 if (!accept(dir, file, ret)) { 1474 handleSymlink(path, ret); 1475 } else { 1476 if (file.isDirectory()) { 1477 indexDown(file, path, args); 1478 } else { 1479 processFile(args, file, path); 1480 } 1481 } 1482 } 1483 } 1484 1485 /** 1486 * Compared with {@link #processFile(IndexDownArgs, File, String)}, this method's file/path arguments 1487 * represent files that have actually changed in some way, while the other method's argument represent 1488 * files present on disk. 1489 * @param args {@link IndexDownArgs} instance 1490 * @param file File object 1491 * @param path path of the file argument relative to source root (with leading slash) 1492 * @throws IOException on error 1493 */ processFileIncremental(IndexDownArgs args, File file, String path)1494 private void processFileIncremental(IndexDownArgs args, File file, String path) throws IOException { 1495 if (uidIter != null) { 1496 path = Util.fixPathIfWindows(path); 1497 // Traverse terms until reaching one that matches the path of given file. 1498 while (uidIter != null && uidIter.term() != null 1499 && uidIter.term().compareTo(emptyBR) != 0 1500 && Util.uid2url(uidIter.term().utf8ToString()).compareTo(path) < 0) { 1501 1502 // A file that was not changed. 1503 /* 1504 * Possibly short-circuit to force reindexing of prior-version indexes. 1505 */ 1506 String termPath = Util.uid2url(uidIter.term().utf8ToString()); 1507 File termFile = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), termPath); 1508 boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) && 1509 checkSettings(termFile, termPath); 1510 if (!matchOK) { 1511 removeFile(false); 1512 1513 args.curCount++; 1514 args.works.add(new IndexFileWork(termFile, termPath)); 1515 } 1516 1517 BytesRef next = uidIter.next(); 1518 if (next == null) { 1519 uidIter = null; 1520 } 1521 } 1522 1523 if (uidIter != null && uidIter.term() != null 1524 && Util.uid2url(uidIter.term().utf8ToString()).equals(path)) { 1525 /* 1526 * At this point we know that the file has corresponding term in the index 1527 * and has changed in some way. Either it was deleted or it was changed. 1528 */ 1529 if (!file.exists()) { 1530 removeFile(true); 1531 } else { 1532 removeFile(false); 1533 1534 args.curCount++; 1535 args.works.add(new IndexFileWork(file, path)); 1536 } 1537 1538 BytesRef next = uidIter.next(); 1539 if (next == null) { 1540 uidIter = null; 1541 } 1542 } else { 1543 // Potentially new file. A file might be added and then deleted, 1544 // so it is necessary to check its existence. 1545 if (file.exists()) { 1546 args.curCount++; 1547 args.works.add(new IndexFileWork(file, path)); 1548 } 1549 } 1550 } else { 1551 if (file.exists()) { 1552 args.curCount++; 1553 args.works.add(new IndexFileWork(file, path)); 1554 } 1555 } 1556 } 1557 1558 /** 1559 * Process a file on disk w.r.t. index. 1560 * @param args {@link IndexDownArgs} instance 1561 * @param file File object 1562 * @param path path corresponding to the file parameter, relative to source root (with leading slash) 1563 * @throws IOException on error 1564 */ processFile(IndexDownArgs args, File file, String path)1565 private void processFile(IndexDownArgs args, File file, String path) throws IOException { 1566 if (uidIter != null) { 1567 path = Util.fixPathIfWindows(path); 1568 String uid = Util.path2uid(path, 1569 DateTools.timeToString(file.lastModified(), 1570 DateTools.Resolution.MILLISECOND)); // construct uid for doc 1571 BytesRef buid = new BytesRef(uid); 1572 // Traverse terms that have smaller UID than the current file, 1573 // i.e. given the ordering they positioned before the file, 1574 // or it is the file that has been modified. 1575 while (uidIter != null && uidIter.term() != null 1576 && uidIter.term().compareTo(emptyBR) != 0 1577 && uidIter.term().compareTo(buid) < 0) { 1578 1579 // If the term's path matches path of currently processed file, 1580 // it is clear that the file has been modified and thus 1581 // removeFile() will be followed by call to addFile() in indexParallel(). 1582 // In such case, instruct removeFile() not to remove history 1583 // cache for the file so that incremental history cache 1584 // generation works. 1585 String termPath = Util.uid2url(uidIter.term().utf8ToString()); 1586 removeFile(!termPath.equals(path)); 1587 1588 BytesRef next = uidIter.next(); 1589 if (next == null) { 1590 uidIter = null; 1591 } 1592 } 1593 1594 // If the file was not modified, probably skip to the next one. 1595 if (uidIter != null && uidIter.term() != null && uidIter.term().bytesEquals(buid)) { 1596 1597 /* 1598 * Possibly short-circuit to force reindexing of prior-version indexes. 1599 */ 1600 boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) && 1601 checkSettings(file, path); 1602 if (!matchOK) { 1603 removeFile(false); 1604 } 1605 1606 BytesRef next = uidIter.next(); 1607 if (next == null) { 1608 uidIter = null; 1609 } 1610 1611 if (matchOK) { 1612 return; 1613 } 1614 } 1615 } 1616 1617 args.curCount++; 1618 args.works.add(new IndexFileWork(file, path)); 1619 } 1620 1621 /** 1622 * Executes the second, parallel stage of indexing. 1623 * @param dir the parent directory (when appended to SOURCE_ROOT) 1624 * @param args contains a list of files to index, found during the earlier stage 1625 */ indexParallel(String dir, IndexDownArgs args)1626 private void indexParallel(String dir, IndexDownArgs args) { 1627 1628 int worksCount = args.works.size(); 1629 if (worksCount < 1) { 1630 return; 1631 } 1632 1633 AtomicInteger successCounter = new AtomicInteger(); 1634 AtomicInteger currentCounter = new AtomicInteger(); 1635 AtomicInteger alreadyClosedCounter = new AtomicInteger(); 1636 IndexerParallelizer parallelizer = RuntimeEnvironment.getInstance().getIndexerParallelizer(); 1637 ObjectPool<Ctags> ctagsPool = parallelizer.getCtagsPool(); 1638 1639 Map<Boolean, List<IndexFileWork>> bySuccess = null; 1640 try (Progress progress = new Progress(LOGGER, dir, worksCount)) { 1641 bySuccess = parallelizer.getForkJoinPool().submit(() -> 1642 args.works.parallelStream().collect( 1643 Collectors.groupingByConcurrent((x) -> { 1644 int tries = 0; 1645 Ctags pctags = null; 1646 boolean ret; 1647 Statistics stats = new Statistics(); 1648 while (true) { 1649 try { 1650 if (alreadyClosedCounter.get() > 0) { 1651 ret = false; 1652 } else { 1653 pctags = ctagsPool.get(); 1654 addFile(x.file, x.path, pctags); 1655 successCounter.incrementAndGet(); 1656 ret = true; 1657 } 1658 } catch (AlreadyClosedException e) { 1659 alreadyClosedCounter.incrementAndGet(); 1660 String errmsg = String.format("ERROR addFile(): %s", x.file); 1661 LOGGER.log(Level.SEVERE, errmsg, e); 1662 x.exception = e; 1663 ret = false; 1664 } catch (InterruptedException e) { 1665 // Allow one retry if interrupted 1666 if (++tries <= 1) { 1667 continue; 1668 } 1669 LOGGER.log(Level.WARNING, "No retry: {0}", x.file); 1670 x.exception = e; 1671 ret = false; 1672 } catch (RuntimeException | IOException e) { 1673 String errmsg = String.format("ERROR addFile(): %s", x.file); 1674 LOGGER.log(Level.WARNING, errmsg, e); 1675 x.exception = e; 1676 ret = false; 1677 } finally { 1678 if (pctags != null) { 1679 pctags.reset(); 1680 ctagsPool.release(pctags); 1681 } 1682 } 1683 1684 progress.increment(); 1685 stats.report(LOGGER, Level.FINEST, 1686 String.format("file ''%s'' %s", x.file, ret ? "indexed" : "failed indexing")); 1687 return ret; 1688 } 1689 }))).get(); 1690 } catch (InterruptedException | ExecutionException e) { 1691 int successCount = successCounter.intValue(); 1692 double successPct = 100.0 * successCount / worksCount; 1693 String exmsg = String.format("%d successes (%.1f%%) after aborting parallel-indexing", 1694 successCount, successPct); 1695 LOGGER.log(Level.SEVERE, exmsg, e); 1696 } 1697 1698 args.curCount = currentCounter.intValue(); 1699 1700 // Start with failureCount=worksCount, and then subtract successes. 1701 int failureCount = worksCount; 1702 if (bySuccess != null) { 1703 List<IndexFileWork> successes = bySuccess.getOrDefault(Boolean.TRUE, null); 1704 if (successes != null) { 1705 failureCount -= successes.size(); 1706 } 1707 } 1708 if (failureCount > 0) { 1709 double pctFailed = 100.0 * failureCount / worksCount; 1710 String exmsg = String.format("%d failures (%.1f%%) while parallel-indexing", failureCount, pctFailed); 1711 LOGGER.log(Level.WARNING, exmsg); 1712 } 1713 1714 /* 1715 * Encountering an AlreadyClosedException is severe enough to abort the 1716 * run, since it will fail anyway later upon trying to commit(). 1717 */ 1718 int numAlreadyClosed = alreadyClosedCounter.get(); 1719 if (numAlreadyClosed > 0) { 1720 throw new AlreadyClosedException(String.format("count=%d", numAlreadyClosed)); 1721 } 1722 } 1723 isInterrupted()1724 private boolean isInterrupted() { 1725 synchronized (lock) { 1726 return interrupted; 1727 } 1728 } 1729 1730 /** 1731 * Register an object to receive events when modifications is done to the 1732 * index database. 1733 * 1734 * @param listener the object to receive the events 1735 */ addIndexChangedListener(IndexChangedListener listener)1736 public void addIndexChangedListener(IndexChangedListener listener) { 1737 if (listener != null) { 1738 listeners.add(listener); 1739 } 1740 } 1741 1742 /** 1743 * Get all files in some of the index databases. 1744 * 1745 * @param subFiles Subdirectories of various projects or null or an empty list to get everything 1746 * @throws IOException if an error occurs 1747 * @return set of files in the index databases specified by the subFiles parameter 1748 */ getAllFiles(List<String> subFiles)1749 public static Set<String> getAllFiles(List<String> subFiles) throws IOException { 1750 Set<String> files = new HashSet<>(); 1751 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 1752 1753 if (env.hasProjects()) { 1754 if (subFiles == null || subFiles.isEmpty()) { 1755 for (Project project : env.getProjectList()) { 1756 IndexDatabase db = new IndexDatabase(project); 1757 files.addAll(db.getFiles()); 1758 } 1759 } else { 1760 for (String path : subFiles) { 1761 Project project = Project.getProject(path); 1762 if (project == null) { 1763 LOGGER.log(Level.WARNING, "Could not find a project for \"{0}\"", path); 1764 } else { 1765 IndexDatabase db = new IndexDatabase(project); 1766 files.addAll(db.getFiles()); 1767 } 1768 } 1769 } 1770 } else { 1771 IndexDatabase db = new IndexDatabase(); 1772 files = db.getFiles(); 1773 } 1774 1775 return files; 1776 } 1777 1778 /** 1779 * Get all files in this index database. 1780 * 1781 * @throws IOException If an IO error occurs while reading from the database 1782 * @return set of files in this index database 1783 */ getFiles()1784 public Set<String> getFiles() throws IOException { 1785 IndexReader ireader = null; 1786 TermsEnum iter = null; 1787 Terms terms; 1788 Set<String> files = new HashSet<>(); 1789 1790 try { 1791 ireader = DirectoryReader.open(indexDirectory); // open existing index 1792 if (ireader.numDocs() > 0) { 1793 terms = MultiTerms.getTerms(ireader, QueryBuilder.U); 1794 iter = terms.iterator(); // init uid iterator 1795 } 1796 while (iter != null && iter.term() != null) { 1797 String value = iter.term().utf8ToString(); 1798 if (value.isEmpty()) { 1799 iter.next(); 1800 continue; 1801 } 1802 1803 files.add(Util.uid2url(value)); 1804 BytesRef next = iter.next(); 1805 if (next == null) { 1806 iter = null; 1807 } 1808 } 1809 } finally { 1810 if (ireader != null) { 1811 try { 1812 ireader.close(); 1813 } catch (IOException e) { 1814 LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e); 1815 } 1816 } 1817 } 1818 1819 return files; 1820 } 1821 1822 /** 1823 * Get number of documents in this index database. 1824 * @return number of documents 1825 * @throws IOException if I/O exception occurred 1826 */ getNumFiles()1827 public int getNumFiles() throws IOException { 1828 IndexReader ireader = null; 1829 try { 1830 ireader = DirectoryReader.open(indexDirectory); // open existing index 1831 return ireader.numDocs(); 1832 } finally { 1833 if (ireader != null) { 1834 try { 1835 ireader.close(); 1836 } catch (IOException e) { 1837 LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e); 1838 } 1839 } 1840 } 1841 } 1842 listFrequentTokens(List<String> subFiles)1843 static void listFrequentTokens(List<String> subFiles) throws IOException { 1844 final int limit = 4; 1845 1846 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 1847 if (env.hasProjects()) { 1848 if (subFiles == null || subFiles.isEmpty()) { 1849 for (Project project : env.getProjectList()) { 1850 IndexDatabase db = new IndexDatabase(project); 1851 db.listTokens(limit); 1852 } 1853 } else { 1854 for (String path : subFiles) { 1855 Project project = Project.getProject(path); 1856 if (project == null) { 1857 LOGGER.log(Level.WARNING, "Could not find a project for \"{0}\"", path); 1858 } else { 1859 IndexDatabase db = new IndexDatabase(project); 1860 db.listTokens(limit); 1861 } 1862 } 1863 } 1864 } else { 1865 IndexDatabase db = new IndexDatabase(); 1866 db.listTokens(limit); 1867 } 1868 } 1869 listTokens(int freq)1870 public void listTokens(int freq) throws IOException { 1871 IndexReader ireader = null; 1872 TermsEnum iter = null; 1873 Terms terms; 1874 1875 try { 1876 ireader = DirectoryReader.open(indexDirectory); 1877 if (ireader.numDocs() > 0) { 1878 terms = MultiTerms.getTerms(ireader, QueryBuilder.DEFS); 1879 iter = terms.iterator(); // init uid iterator 1880 } 1881 while (iter != null && iter.term() != null) { 1882 if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) { 1883 LOGGER.warning(iter.term().utf8ToString()); 1884 } 1885 BytesRef next = iter.next(); 1886 if (next == null) { 1887 iter = null; 1888 } 1889 } 1890 } finally { 1891 1892 if (ireader != null) { 1893 try { 1894 ireader.close(); 1895 } catch (IOException e) { 1896 LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e); 1897 } 1898 } 1899 } 1900 } 1901 1902 /** 1903 * Get an indexReader for the Index database where a given file. 1904 * 1905 * @param path the file to get the database for 1906 * @return The index database where the file should be located or null if it 1907 * cannot be located. 1908 */ 1909 @SuppressWarnings("java:S2095") getIndexReader(String path)1910 public static IndexReader getIndexReader(String path) { 1911 IndexReader ret = null; 1912 1913 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 1914 File indexDir = new File(env.getDataRootFile(), INDEX_DIR); 1915 1916 if (env.hasProjects()) { 1917 Project p = Project.getProject(path); 1918 if (p == null) { 1919 return null; 1920 } 1921 indexDir = new File(indexDir, p.getPath()); 1922 } 1923 try { 1924 FSDirectory fdir = FSDirectory.open(indexDir.toPath(), NoLockFactory.INSTANCE); 1925 if (indexDir.exists() && DirectoryReader.indexExists(fdir)) { 1926 ret = DirectoryReader.open(fdir); 1927 } 1928 } catch (Exception ex) { 1929 LOGGER.log(Level.SEVERE, "Failed to open index: {0}", indexDir.getAbsolutePath()); 1930 LOGGER.log(Level.FINE, "Stack Trace: ", ex); 1931 } 1932 return ret; 1933 } 1934 1935 /** 1936 * Get the latest definitions for a file from the index. 1937 * 1938 * @param file the file whose definitions to find 1939 * @return definitions for the file, or {@code null} if they could not be 1940 * found 1941 * @throws IOException if an error happens when accessing the index 1942 * @throws ParseException if an error happens when building the Lucene query 1943 * @throws ClassNotFoundException if the class for the stored definitions 1944 * instance cannot be found 1945 */ getDefinitions(File file)1946 public static Definitions getDefinitions(File file) throws ParseException, IOException, ClassNotFoundException { 1947 Document doc = getDocument(file); 1948 if (doc == null) { 1949 return null; 1950 } 1951 1952 IndexableField tags = doc.getField(QueryBuilder.TAGS); 1953 if (tags != null) { 1954 return Definitions.deserialize(tags.binaryValue().bytes); 1955 } 1956 1957 // Didn't find any definitions. 1958 return null; 1959 } 1960 1961 /** 1962 * @param file File object for a file under source root 1963 * @return Document object for the file or {@code null} 1964 * @throws IOException on I/O error 1965 * @throws ParseException on problem with building Query 1966 */ getDocument(File file)1967 public static Document getDocument(File file) throws IOException, ParseException { 1968 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 1969 String path; 1970 try { 1971 path = env.getPathRelativeToSourceRoot(file); 1972 } catch (ForbiddenSymlinkException e) { 1973 LOGGER.log(Level.FINER, e.getMessage()); 1974 return null; 1975 } 1976 // Sanitize Windows path delimiters in order not to conflict with Lucene escape character. 1977 path = path.replace("\\", "/"); 1978 1979 try (IndexReader indexReader = getIndexReader(path)) { 1980 return getDocument(path, indexReader); 1981 } 1982 } 1983 1984 @Nullable getDocument(String path, IndexReader indexReader)1985 private static Document getDocument(String path, IndexReader indexReader) throws ParseException, IOException { 1986 if (indexReader == null) { 1987 // No index, no document.. 1988 return null; 1989 } 1990 1991 Document doc; 1992 Query q = new QueryBuilder().setPath(path).build(); 1993 IndexSearcher searcher = new IndexSearcher(indexReader); 1994 Statistics stat = new Statistics(); 1995 TopDocs top = searcher.search(q, 1); 1996 stat.report(LOGGER, Level.FINEST, "search via getDocument() done", 1997 "search.latency", new String[]{"category", "getdocument", 1998 "outcome", top.totalHits.value == 0 ? "empty" : "success"}); 1999 if (top.totalHits.value == 0) { 2000 // No hits, no document... 2001 return null; 2002 } 2003 doc = searcher.doc(top.scoreDocs[0].doc); 2004 String foundPath = doc.get(QueryBuilder.PATH); 2005 2006 // Only use the document if we found an exact match. 2007 if (!path.equals(foundPath)) { 2008 return null; 2009 } 2010 2011 return doc; 2012 } 2013 2014 @Override equals(Object o)2015 public boolean equals(Object o) { 2016 if (this == o) { 2017 return true; 2018 } 2019 if (o == null || getClass() != o.getClass()) { 2020 return false; 2021 } 2022 IndexDatabase that = (IndexDatabase) o; 2023 return Objects.equals(project, that.project); 2024 } 2025 2026 @Override hashCode()2027 public int hashCode() { 2028 return Objects.hash(project); 2029 } 2030 2031 private static class CountingWriter extends Writer { 2032 private long count; 2033 private final Writer out; 2034 CountingWriter(Writer out)2035 CountingWriter(Writer out) { 2036 super(out); 2037 this.out = out; 2038 } 2039 2040 @Override write(@otNull char[] chars, int off, int len)2041 public void write(@NotNull char[] chars, int off, int len) throws IOException { 2042 out.write(chars, off, len); 2043 count += len; 2044 } 2045 2046 @Override flush()2047 public void flush() throws IOException { 2048 out.flush(); 2049 } 2050 2051 @Override close()2052 public void close() throws IOException { 2053 out.close(); 2054 } 2055 getCount()2056 public long getCount() { 2057 return count; 2058 } 2059 } 2060 getXrefPath(String path)2061 private String getXrefPath(String path) { 2062 boolean compressed = RuntimeEnvironment.getInstance().isCompressXref(); 2063 File xrefFile = whatXrefFile(path, compressed); 2064 File parentFile = xrefFile.getParentFile(); 2065 2066 // If mkdirs() returns false, the failure is most likely 2067 // because the file already exists. But to check for the 2068 // file first and only add it if it doesn't exists would 2069 // only increase the file IO... 2070 if (!parentFile.mkdirs()) { 2071 assert parentFile.exists(); 2072 } 2073 2074 // Write to a pending file for later renaming. 2075 String xrefAbs = xrefFile.getAbsolutePath(); 2076 return xrefAbs; 2077 } 2078 2079 /** 2080 * Get a writer to which the xref can be written, or null if no xref 2081 * should be produced for files of this type. 2082 */ newXrefWriter(String path, File transientXref, boolean compressed)2083 private CountingWriter newXrefWriter(String path, File transientXref, boolean compressed) throws IOException { 2084 return new CountingWriter(new BufferedWriter(new OutputStreamWriter(compressed ? 2085 new GZIPOutputStream(new FileOutputStream(transientXref)) : 2086 new FileOutputStream(transientXref)))); 2087 } 2088 pickLockFactory(RuntimeEnvironment env)2089 LockFactory pickLockFactory(RuntimeEnvironment env) { 2090 switch (env.getLuceneLocking()) { 2091 case ON: 2092 case SIMPLE: 2093 return SimpleFSLockFactory.INSTANCE; 2094 case NATIVE: 2095 return NativeFSLockFactory.INSTANCE; 2096 case OFF: 2097 default: 2098 return NoLockFactory.INSTANCE; 2099 } 2100 } 2101 finishWriting()2102 private void finishWriting() throws IOException { 2103 boolean hasPendingCommit = false; 2104 try { 2105 writeAnalysisSettings(); 2106 2107 LOGGER.log(Level.FINE, "preparing to commit changes to Lucene index"); // TODO add info about which database 2108 writer.prepareCommit(); 2109 hasPendingCommit = true; 2110 2111 int n = completer.complete(); 2112 // TODO: add elapsed 2113 LOGGER.log(Level.FINE, "completed {0} object(s)", n); 2114 2115 // Just before commit(), reset the `hasPendingCommit' flag, 2116 // since after commit() is called, there is no need for 2117 // rollback() regardless of success. 2118 hasPendingCommit = false; 2119 writer.commit(); 2120 } catch (RuntimeException | IOException e) { 2121 if (hasPendingCommit) { 2122 writer.rollback(); 2123 } 2124 LOGGER.log(Level.WARNING, 2125 "An error occurred while finishing writer and completer", e); 2126 throw e; 2127 } 2128 } 2129 2130 /** 2131 * Verify TABSIZE, and evaluate AnalyzerGuru version together with ZVER -- 2132 * or return a value to indicate mismatch. 2133 * @param file the source file object 2134 * @param path the source file path 2135 * @return {@code false} if a mismatch is detected 2136 */ 2137 @VisibleForTesting checkSettings(File file, String path)2138 boolean checkSettings(File file, String path) throws IOException { 2139 2140 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 2141 boolean outIsXrefWriter = false; // potential xref writer 2142 int reqTabSize = project != null && project.hasTabSizeSetting() ? 2143 project.getTabSize() : 0; 2144 Integer actTabSize = settings.getTabSize(); 2145 if (actTabSize != null && !actTabSize.equals(reqTabSize)) { 2146 LOGGER.log(Level.FINE, "Tabsize mismatch: {0}", path); 2147 return false; 2148 } 2149 2150 int n = 0; 2151 postsIter = uidIter.postings(postsIter); 2152 while (postsIter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { 2153 ++n; 2154 // Read a limited-fields version of the document. 2155 Document doc = reader.document(postsIter.docID(), CHECK_FIELDS); 2156 if (doc == null) { 2157 LOGGER.log(Level.FINER, "No Document: {0}", path); 2158 continue; 2159 } 2160 2161 long reqGuruVersion = AnalyzerGuru.getVersionNo(); 2162 Long actGuruVersion = settings.getAnalyzerGuruVersion(); 2163 /* 2164 * For an older OpenGrok index that does not yet have a defined, 2165 * stored analyzerGuruVersion, break so that no extra work is done. 2166 * After a re-index, the guru version check will be active. 2167 */ 2168 if (actGuruVersion == null) { 2169 break; 2170 } 2171 2172 AbstractAnalyzer fa = null; 2173 String fileTypeName; 2174 if (actGuruVersion.equals(reqGuruVersion)) { 2175 fileTypeName = doc.get(QueryBuilder.TYPE); 2176 if (fileTypeName == null) { 2177 // (Should not get here, but break just in case.) 2178 LOGGER.log(Level.FINEST, "Missing TYPE field: {0}", path); 2179 break; 2180 } 2181 2182 AnalyzerFactory fac = AnalyzerGuru.findByFileTypeName(fileTypeName); 2183 if (fac != null) { 2184 fa = fac.getAnalyzer(); 2185 } 2186 } else { 2187 /* 2188 * If the stored guru version does not match, re-verify the 2189 * selection of analyzer or return a value to indicate the 2190 * analyzer is now mis-matched. 2191 */ 2192 LOGGER.log(Level.FINER, "Guru version mismatch: {0}", path); 2193 2194 fa = getAnalyzerFor(file, path); 2195 fileTypeName = fa.getFileTypeName(); 2196 String oldTypeName = doc.get(QueryBuilder.TYPE); 2197 if (!fileTypeName.equals(oldTypeName)) { 2198 if (LOGGER.isLoggable(Level.FINE)) { 2199 LOGGER.log(Level.FINE, "Changed {0} to {1}: {2}", 2200 new Object[]{oldTypeName, fileTypeName, path}); 2201 } 2202 return false; 2203 } 2204 } 2205 2206 // Verify Analyzer version, or return a value to indicate mismatch. 2207 long reqVersion = AnalyzerGuru.getAnalyzerVersionNo(fileTypeName); 2208 Long actVersion = settings.getAnalyzerVersion(fileTypeName); 2209 if (actVersion == null || !actVersion.equals(reqVersion)) { 2210 if (LOGGER.isLoggable(Level.FINE)) { 2211 LOGGER.log(Level.FINE, "{0} version mismatch: {1}", 2212 new Object[]{fileTypeName, path}); 2213 } 2214 return false; 2215 } 2216 2217 if (fa != null) { 2218 outIsXrefWriter = true; 2219 } 2220 2221 // The versions checks have passed. 2222 break; 2223 } 2224 if (n < 1) { 2225 LOGGER.log(Level.FINER, "Missing index Documents: {0}", path); 2226 return false; 2227 } 2228 2229 // If the economy mode is on, this should be treated as a match. 2230 if (!env.isGenerateHtml()) { 2231 if (xrefExistsFor(path)) { 2232 LOGGER.log(Level.FINEST, "Extraneous {0} , removing its xref file", path); 2233 removeXrefFile(path); 2234 } 2235 return true; 2236 } 2237 2238 return (!outIsXrefWriter || xrefExistsFor(path)); 2239 } 2240 writeAnalysisSettings()2241 private void writeAnalysisSettings() throws IOException { 2242 settings = new IndexAnalysisSettings3(); 2243 settings.setProjectName(project != null ? project.getName() : null); 2244 settings.setTabSize(project != null && project.hasTabSizeSetting() ? 2245 project.getTabSize() : 0); 2246 settings.setAnalyzerGuruVersion(AnalyzerGuru.getVersionNo()); 2247 settings.setAnalyzersVersions(AnalyzerGuru.getAnalyzersVersionNos()); 2248 settings.setIndexedSymlinks(indexedSymlinks); 2249 2250 IndexAnalysisSettingsAccessor dao = new IndexAnalysisSettingsAccessor(); 2251 dao.write(writer, settings); 2252 } 2253 readAnalysisSettings()2254 private IndexAnalysisSettings3 readAnalysisSettings() throws IOException { 2255 IndexAnalysisSettingsAccessor dao = new IndexAnalysisSettingsAccessor(); 2256 return dao.read(reader); 2257 } 2258 xrefExistsFor(String path)2259 private boolean xrefExistsFor(String path) { 2260 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 2261 File xrefFile = whatXrefFile(path, env.isCompressXref()); 2262 if (!xrefFile.exists()) { 2263 LOGGER.log(Level.FINEST, "Missing {0}", xrefFile); 2264 return false; 2265 } 2266 2267 return true; 2268 } 2269 2270 private static class AcceptSymlinkRet { 2271 String localRelPath; 2272 } 2273 } 2274