xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java (revision b0a8246bf4accfb3a57771e80030265cb0eb1310)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>.
23  */
24 package org.opengrok.indexer.index;
25 
26 import java.io.BufferedInputStream;
27 import java.io.BufferedWriter;
28 import java.io.File;
29 import java.io.FileInputStream;
30 import java.io.FileNotFoundException;
31 import java.io.FileOutputStream;
32 import java.io.IOException;
33 import java.io.InputStream;
34 import java.io.OutputStreamWriter;
35 import java.io.Writer;
36 import java.nio.file.Files;
37 import java.nio.file.Path;
38 import java.nio.file.Paths;
39 import java.util.ArrayList;
40 import java.util.Arrays;
41 import java.util.Comparator;
42 import java.util.HashSet;
43 import java.util.List;
44 import java.util.Map;
45 import java.util.Objects;
46 import java.util.Set;
47 import java.util.TreeMap;
48 import java.util.concurrent.CopyOnWriteArrayList;
49 import java.util.concurrent.CountDownLatch;
50 import java.util.concurrent.ExecutionException;
51 import java.util.concurrent.TimeUnit;
52 import java.util.concurrent.atomic.AtomicInteger;
53 import java.util.logging.Level;
54 import java.util.logging.Logger;
55 import java.util.stream.Collectors;
56 import java.util.zip.GZIPOutputStream;
57 
58 import jakarta.ws.rs.client.ClientBuilder;
59 import jakarta.ws.rs.client.Entity;
60 import jakarta.ws.rs.core.Response;
61 import org.apache.lucene.analysis.Analyzer;
62 import org.apache.lucene.analysis.standard.StandardAnalyzer;
63 import org.apache.lucene.document.DateTools;
64 import org.apache.lucene.document.Document;
65 import org.apache.lucene.document.Field;
66 import org.apache.lucene.index.DirectoryReader;
67 import org.apache.lucene.index.IndexReader;
68 import org.apache.lucene.index.IndexWriter;
69 import org.apache.lucene.index.IndexWriterConfig;
70 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
71 import org.apache.lucene.index.IndexableField;
72 import org.apache.lucene.index.MultiTerms;
73 import org.apache.lucene.index.PostingsEnum;
74 import org.apache.lucene.index.Term;
75 import org.apache.lucene.index.Terms;
76 import org.apache.lucene.index.TermsEnum;
77 import org.apache.lucene.queryparser.classic.ParseException;
78 import org.apache.lucene.search.DocIdSetIterator;
79 import org.apache.lucene.search.IndexSearcher;
80 import org.apache.lucene.search.Query;
81 import org.apache.lucene.search.TopDocs;
82 import org.apache.lucene.store.AlreadyClosedException;
83 import org.apache.lucene.store.FSDirectory;
84 import org.apache.lucene.store.LockFactory;
85 import org.apache.lucene.store.NativeFSLockFactory;
86 import org.apache.lucene.store.NoLockFactory;
87 import org.apache.lucene.store.SimpleFSLockFactory;
88 import org.apache.lucene.util.BytesRef;
89 import org.jetbrains.annotations.NotNull;
90 import org.jetbrains.annotations.Nullable;
91 import org.jetbrains.annotations.VisibleForTesting;
92 import org.opengrok.indexer.analysis.AbstractAnalyzer;
93 import org.opengrok.indexer.analysis.AnalyzerFactory;
94 import org.opengrok.indexer.analysis.AnalyzerGuru;
95 import org.opengrok.indexer.analysis.Ctags;
96 import org.opengrok.indexer.analysis.Definitions;
97 import org.opengrok.indexer.analysis.NullableNumLinesLOC;
98 import org.opengrok.indexer.analysis.NumLinesLOC;
99 import org.opengrok.indexer.configuration.PathAccepter;
100 import org.opengrok.indexer.configuration.Project;
101 import org.opengrok.indexer.configuration.RuntimeEnvironment;
102 import org.opengrok.indexer.history.FileCollector;
103 import org.opengrok.indexer.history.HistoryGuru;
104 import org.opengrok.indexer.history.Repository;
105 import org.opengrok.indexer.history.RepositoryInfo;
106 import org.opengrok.indexer.history.RepositoryWithHistoryTraversal;
107 import org.opengrok.indexer.logger.LoggerFactory;
108 import org.opengrok.indexer.search.QueryBuilder;
109 import org.opengrok.indexer.util.ForbiddenSymlinkException;
110 import org.opengrok.indexer.util.IOUtils;
111 import org.opengrok.indexer.util.ObjectPool;
112 import org.opengrok.indexer.util.Progress;
113 import org.opengrok.indexer.util.Statistics;
114 import org.opengrok.indexer.util.TandemPath;
115 import org.opengrok.indexer.web.Util;
116 
117 import static org.opengrok.indexer.index.IndexerUtil.getWebAppHeaders;
118 import static org.opengrok.indexer.web.ApiUtils.waitForAsyncApi;
119 
120 /**
121  * This class is used to create / update the index databases. Currently, we use
122  * one index database per project.
123  *
124  * @author Trond Norbye
125  * @author Lubos Kosco , update for lucene 4.x , 5.x
126  */
127 public class IndexDatabase {
128 
129     private static final Logger LOGGER = LoggerFactory.getLogger(IndexDatabase.class);
130 
131     private static final Comparator<File> FILENAME_COMPARATOR = Comparator.comparing(File::getName);
132 
133     private static final Set<String> CHECK_FIELDS;
134 
135     private static final Set<String> REVERT_COUNTS_FIELDS;
136 
137     private static final Object INSTANCE_LOCK = new Object();
138 
139     /**
140      * Key is canonical path; Value is the first accepted, absolute path. Map
141      * is ordered by canonical length (ASC) and then canonical value (ASC).
142      * The map is accessed by a single-thread running indexDown().
143      */
144     private final Map<String, IndexedSymlink> indexedSymlinks = new TreeMap<>(
145             Comparator.comparingInt(String::length).thenComparing(o -> o));
146 
147     private final Project project;
148     private FSDirectory indexDirectory;
149     private IndexReader reader;
150     private IndexWriter writer;
151     private IndexAnalysisSettings3 settings;
152     private PendingFileCompleter completer;
153     private NumLinesLOCAggregator countsAggregator;
154     private TermsEnum uidIter;
155     private PostingsEnum postsIter;
156     private PathAccepter pathAccepter;
157     private AnalyzerGuru analyzerGuru;
158     private File xrefDir;
159     private boolean interrupted;
160     private CopyOnWriteArrayList<IndexChangedListener> listeners;
161     private File dirtyFile;
162     private final Object lock = new Object();
163     private boolean dirty;
164     private boolean running;
165     private boolean isCountingDeltas;
166     private boolean isWithDirectoryCounts;
167     private List<String> directories;
168     private LockFactory lockfact;
169     private final BytesRef emptyBR = new BytesRef("");
170 
171     // Directory where we store indexes
172     public static final String INDEX_DIR = "index";
173     public static final String XREF_DIR = "xref";
174     public static final String SUGGESTER_DIR = "suggester";
175 
176     private final IndexDownArgsFactory indexDownArgsFactory;
177 
178     /**
179      * Create a new instance of the Index Database. Use this constructor if you
180      * don't use any projects
181      *
182      * @throws java.io.IOException if an error occurs while creating directories
183      */
IndexDatabase()184     public IndexDatabase() throws IOException {
185         this(null);
186     }
187 
188     /**
189      * Create a new instance of an Index Database for a given project.
190      *
191      * @param project the project to create the database for
192      * @param factory {@link IndexDownArgsFactory} instance
193      * @throws java.io.IOException if an error occurs while creating directories
194      */
IndexDatabase(Project project, IndexDownArgsFactory factory)195     public IndexDatabase(Project project, IndexDownArgsFactory factory) throws IOException {
196         indexDownArgsFactory = factory;
197         this.project = project;
198         lockfact = NoLockFactory.INSTANCE;
199         initialize();
200     }
201 
202     @VisibleForTesting
IndexDatabase(Project project)203     IndexDatabase(Project project) throws IOException {
204         this(project, new IndexDownArgsFactory());
205     }
206 
207     static {
208         CHECK_FIELDS = new HashSet<>();
209         CHECK_FIELDS.add(QueryBuilder.TYPE);
210 
211         REVERT_COUNTS_FIELDS = new HashSet<>();
212         REVERT_COUNTS_FIELDS.add(QueryBuilder.D);
213         REVERT_COUNTS_FIELDS.add(QueryBuilder.PATH);
214         REVERT_COUNTS_FIELDS.add(QueryBuilder.NUML);
215         REVERT_COUNTS_FIELDS.add(QueryBuilder.LOC);
216     }
217 
218     /**
219      * Update the index database for all the projects.
220      *
221      * @param listener where to signal the changes to the database
222      * @throws IOException if an error occurs
223      */
updateAll(IndexChangedListener listener)224     static CountDownLatch updateAll(IndexChangedListener listener) throws IOException {
225 
226         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
227         List<IndexDatabase> dbs = new ArrayList<>();
228 
229         if (env.hasProjects()) {
230             for (Project project : env.getProjectList()) {
231                 dbs.add(new IndexDatabase(project));
232             }
233         } else {
234             dbs.add(new IndexDatabase());
235         }
236 
237         IndexerParallelizer parallelizer = RuntimeEnvironment.getInstance().getIndexerParallelizer();
238         CountDownLatch latch = new CountDownLatch(dbs.size());
239         for (IndexDatabase d : dbs) {
240             final IndexDatabase db = d;
241             if (listener != null) {
242                 db.addIndexChangedListener(listener);
243             }
244 
245             parallelizer.getFixedExecutor().submit(() -> {
246                 try {
247                     db.update();
248                 } catch (Throwable e) {
249                     LOGGER.log(Level.SEVERE,
250                             String.format("Problem updating index database in directory %s: ",
251                                     db.indexDirectory.getDirectory()), e);
252                 } finally {
253                     latch.countDown();
254                 }
255             });
256         }
257         return latch;
258     }
259 
260     /**
261      * Update the index database for a number of sub-directories.
262      *
263      * @param listener where to signal the changes to the database
264      * @param paths list of paths to be indexed
265      */
update(IndexChangedListener listener, List<String> paths)266     public static void update(IndexChangedListener listener, List<String> paths) {
267         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
268         IndexerParallelizer parallelizer = env.getIndexerParallelizer();
269         List<IndexDatabase> dbs = new ArrayList<>();
270 
271         for (String path : paths) {
272             Project project = Project.getProject(path);
273             if (project == null && env.hasProjects()) {
274                 LOGGER.log(Level.WARNING, "Could not find a project for \"{0}\"", path);
275             } else {
276                 IndexDatabase db;
277 
278                 try {
279                     if (project == null) {
280                         db = new IndexDatabase();
281                     } else {
282                         db = new IndexDatabase(project);
283                     }
284 
285                     int idx = dbs.indexOf(db);
286                     if (idx != -1) {
287                         db = dbs.get(idx);
288                     }
289 
290                     if (db.addDirectory(path)) {
291                         if (idx == -1) {
292                             dbs.add(db);
293                         }
294                     } else {
295                         LOGGER.log(Level.WARNING, "Directory does not exist \"{0}\" .", path);
296                     }
297                 } catch (IOException e) {
298                     LOGGER.log(Level.WARNING, "An error occurred while updating index", e);
299 
300                 }
301             }
302 
303             for (final IndexDatabase db : dbs) {
304                 db.addIndexChangedListener(listener);
305                 parallelizer.getFixedExecutor().submit(() -> {
306                     try {
307                         db.update();
308                     } catch (Throwable e) {
309                         LOGGER.log(Level.SEVERE, "An error occurred while updating index", e);
310                     }
311                 });
312             }
313         }
314     }
315 
316     @SuppressWarnings("PMD.CollapsibleIfStatements")
initialize()317     private void initialize() throws IOException {
318         synchronized (INSTANCE_LOCK) {
319             RuntimeEnvironment env = RuntimeEnvironment.getInstance();
320             File indexDir = new File(env.getDataRootFile(), INDEX_DIR);
321             if (project != null) {
322                 indexDir = new File(indexDir, project.getPath());
323             }
324 
325             if (!indexDir.exists() && !indexDir.mkdirs()) {
326                 // to avoid race conditions, just recheck..
327                 if (!indexDir.exists()) {
328                     throw new FileNotFoundException("Failed to create root directory [" + indexDir.getAbsolutePath() + "]");
329                 }
330             }
331 
332             lockfact = pickLockFactory(env);
333             indexDirectory = FSDirectory.open(indexDir.toPath(), lockfact);
334             pathAccepter = env.getPathAccepter();
335             analyzerGuru = new AnalyzerGuru();
336             xrefDir = new File(env.getDataRootFile(), XREF_DIR);
337             listeners = new CopyOnWriteArrayList<>();
338             dirtyFile = new File(indexDir, "dirty");
339             dirty = dirtyFile.exists();
340             directories = new ArrayList<>();
341         }
342     }
343 
344     /**
345      * By default the indexer will traverse all directories in the project. If
346      * you add directories with this function update will just process the
347      * specified directories.
348      *
349      * @param dir The directory to scan
350      * @return <code>true</code> if the file is added, false otherwise
351      */
352     @SuppressWarnings("PMD.UseStringBufferForStringAppends")
addDirectory(String dir)353     public boolean addDirectory(String dir) {
354         String directory = dir;
355         if (directory.startsWith("\\")) {
356             directory = directory.replace('\\', '/');
357         } else if (directory.charAt(0) != '/') {
358             directory = "/" + directory;
359         }
360         File file = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), directory);
361         if (file.exists()) {
362             directories.add(directory);
363             return true;
364         }
365         return false;
366     }
367 
showFileCount(String dir, IndexDownArgs args)368     private void showFileCount(String dir, IndexDownArgs args) {
369         if (RuntimeEnvironment.getInstance().isPrintProgress()) {
370             LOGGER.log(Level.INFO, String.format("Need to process: %d files for %s", args.curCount, dir));
371         }
372     }
373 
markProjectIndexed(Project project)374     private void markProjectIndexed(Project project) {
375         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
376 
377         // Successfully indexed the project. The message is sent even if
378         // the project's isIndexed() is true because it triggers RepositoryInfo
379         // refresh.
380         if (project == null) {
381             return;
382         }
383 
384         // Also need to store the correct value in configuration
385         // when indexer writes it to a file.
386         project.setIndexed(true);
387 
388         if (env.getConfigURI() == null) {
389             return;
390         }
391 
392         Response response;
393         try {
394             response = ClientBuilder.newBuilder().connectTimeout(env.getConnectTimeout(), TimeUnit.SECONDS).build()
395                     .target(env.getConfigURI())
396                     .path("api")
397                     .path("v1")
398                     .path("projects")
399                     .path(Util.uriEncode(project.getName()))
400                     .path("indexed")
401                     .request()
402                     .headers(getWebAppHeaders())
403                     .put(Entity.text(""));
404         } catch (RuntimeException e) {
405             LOGGER.log(Level.WARNING, String.format("Could not notify the webapp that project %s was indexed",
406                     project), e);
407             return;
408         }
409 
410         if (response.getStatus() == Response.Status.ACCEPTED.getStatusCode()) {
411             try {
412                 response = waitForAsyncApi(response);
413             } catch (InterruptedException e) {
414                 LOGGER.log(Level.WARNING, "interrupted while waiting for API response", e);
415             }
416         }
417 
418         if (response.getStatusInfo().getFamily() != Response.Status.Family.SUCCESSFUL) {
419             LOGGER.log(Level.WARNING, "Could not notify the webapp that project {0} was indexed: {1}",
420                     new Object[] {project, response});
421         }
422     }
423 
getRepositoriesForProject(Project project)424     private static List<Repository> getRepositoriesForProject(Project project) {
425         List<Repository> repositoryList = new ArrayList<>();
426 
427         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
428         List<RepositoryInfo> repositoryInfoList = env.getProjectRepositoriesMap().get(project);
429 
430         if (repositoryInfoList != null) {
431             for (RepositoryInfo repositoryInfo : repositoryInfoList) {
432                 Repository repository = HistoryGuru.getInstance().getRepository(new File(repositoryInfo.getDirectoryName()));
433                 if (repository != null) {
434                     repositoryList.add(repository);
435                 }
436             }
437         }
438 
439         return repositoryList;
440     }
441 
442     /**
443      * @return whether the repositories of given project are ready for history based reindex
444      */
isReadyForHistoryBasedReindex()445     private boolean isReadyForHistoryBasedReindex() {
446         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
447 
448         // So far the history based reindex does not work without projects.
449         if (!env.hasProjects()) {
450             LOGGER.log(Level.FINEST, "projects are disabled, will be indexed by directory traversal.");
451             return false;
452         }
453 
454         if (project == null) {
455             LOGGER.log(Level.FINEST, "no project, will be indexed by directory traversal.");
456             return false;
457         }
458 
459         // History needs to be enabled for the history cache to work (see the comment below).
460         if (!project.isHistoryEnabled()) {
461             LOGGER.log(Level.FINEST, "history is disabled, will be indexed by directory traversal.");
462             return false;
463         }
464 
465         // History cache is necessary to get the last indexed revision for given repository.
466         if (!env.isHistoryCache()) {
467             LOGGER.log(Level.FINEST, "history cache is disabled, will be indexed by directory traversal.");
468             return false;
469         }
470 
471         // Per project tunable can override the global tunable, therefore env.isHistoryBasedReindex() is not checked.
472         if (!project.isHistoryBasedReindex()) {
473             LOGGER.log(Level.FINEST, "history-based reindex is disabled, will be indexed by directory traversal.");
474             return false;
475         }
476 
477         /*
478          * Check that the index is present for this project.
479          * In case of the initial indexing, the traversal of all changesets would most likely be counterproductive,
480          * assuming traversal of directory tree is cheaper than getting the files from SCM history
481          * in such case.
482          */
483         try {
484             if (getNumFiles() == 0) {
485                 LOGGER.log(Level.FINEST, "zero number of documents for project {0}, " +
486                         "will be indexed by directory traversal.", project);
487                 return false;
488             }
489         } catch (IOException e) {
490             LOGGER.log(Level.FINEST, "failed to get number of documents for project {0}," +
491                     "will be indexed by directory traversal.", project);
492             return false;
493         }
494 
495         // If there was no change to any of the repositories of the project, a FileCollector instance will be returned
496         // however the list of files therein will be empty which is legitimate situation (no change of the project).
497         // Only in a case where getFileCollector() returns null (hinting at something went wrong),
498         // the file based traversal should be done.
499         if (env.getFileCollector(project.getName()) == null) {
500             LOGGER.log(Level.FINEST, "no file collector for project {0}, will be indexed by directory traversal.",
501                     project);
502             return false;
503         }
504 
505         List<Repository> repositories = getRepositoriesForProject(project);
506         // Projects without repositories have to be indexed using indexDown().
507         if (repositories.isEmpty()) {
508             LOGGER.log(Level.FINEST, "project {0} has no repositories, will be indexed by directory traversal.",
509                     project);
510             return false;
511         }
512 
513         for (Repository repository : repositories) {
514             if (!isReadyForHistoryBasedReindex(repository)) {
515                 return false;
516             }
517         }
518 
519         // Here it is assumed there are no files untracked by the repositories of this project.
520         return true;
521     }
522 
523     /**
524      * @param repository Repository instance
525      * @return true if the repository can be used for history based reindex
526      */
527     @VisibleForTesting
isReadyForHistoryBasedReindex(Repository repository)528     boolean isReadyForHistoryBasedReindex(Repository repository) {
529         if (!repository.isHistoryEnabled()) {
530             LOGGER.log(Level.FINE, "history is disabled for {0}, " +
531                     "the associated project {1} will be indexed using directory traversal",
532                     new Object[]{repository, project});
533             return false;
534         }
535 
536         if (!repository.isHistoryBasedReindex()) {
537             LOGGER.log(Level.FINE, "history based reindex is disabled for {0}, " +
538                             "the associated project {1} will be indexed using directory traversal",
539                     new Object[]{repository, project});
540             return false;
541         }
542 
543         if (!(repository instanceof RepositoryWithHistoryTraversal)) {
544             LOGGER.log(Level.FINE, "project {0} has a repository {1} that does not support history traversal," +
545                             "the project will be indexed using directory traversal.",
546                     new Object[]{project, repository});
547             return false;
548         }
549 
550         return true;
551     }
552 
553     /**
554      * Update the content of this index database.
555      *
556      * @throws IOException if an error occurs
557      */
update()558     public void update() throws IOException {
559         synchronized (lock) {
560             if (running) {
561                 throw new IOException("Indexer already running!");
562             }
563             running = true;
564             interrupted = false;
565         }
566 
567         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
568 
569         reader = null;
570         writer = null;
571         settings = null;
572         uidIter = null;
573         postsIter = null;
574         indexedSymlinks.clear();
575 
576         IOException finishingException = null;
577         try {
578             Analyzer analyzer = AnalyzerGuru.getAnalyzer();
579             IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
580             iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
581             iwc.setRAMBufferSizeMB(env.getRamBufferSize());
582             writer = new IndexWriter(indexDirectory, iwc);
583             writer.commit(); // to make sure index exists on the disk
584             completer = new PendingFileCompleter();
585 
586             if (directories.isEmpty()) {
587                 if (project == null) {
588                     directories.add("");
589                 } else {
590                     directories.add(project.getPath());
591                 }
592             }
593 
594             for (String dir : directories) {
595                 File sourceRoot;
596                 if ("".equals(dir)) {
597                     sourceRoot = env.getSourceRootFile();
598                 } else {
599                     sourceRoot = new File(env.getSourceRootFile(), dir);
600                 }
601 
602                 dir = Util.fixPathIfWindows(dir);
603 
604                 String startUid = Util.path2uid(dir, "");
605                 reader = DirectoryReader.open(indexDirectory); // open existing index
606                 countsAggregator = new NumLinesLOCAggregator();
607                 settings = readAnalysisSettings();
608                 if (settings == null) {
609                     settings = new IndexAnalysisSettings3();
610                 }
611                 Terms terms = null;
612                 if (reader.numDocs() > 0) {
613                     terms = MultiTerms.getTerms(reader, QueryBuilder.U);
614 
615                     NumLinesLOCAccessor countsAccessor = new NumLinesLOCAccessor();
616                     if (countsAccessor.hasStored(reader)) {
617                         isWithDirectoryCounts = true;
618                         isCountingDeltas = true;
619                     } else {
620                         boolean foundCounts = countsAccessor.register(countsAggregator, reader);
621                         isWithDirectoryCounts = false;
622                         isCountingDeltas = foundCounts;
623                         if (!isCountingDeltas) {
624                             LOGGER.info("Forcing reindexing to fully compute directory counts");
625                         }
626                     }
627                 } else {
628                     isWithDirectoryCounts = false;
629                     isCountingDeltas = false;
630                 }
631 
632                 try {
633                     if (terms != null) {
634                         uidIter = terms.iterator();
635                         TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startUid)); //init uid
636                         if (stat == TermsEnum.SeekStatus.END) {
637                             uidIter = null;
638                             LOGGER.log(Level.WARNING,
639                                 "Couldn''t find a start term for {0}, empty u field?",
640                                 startUid);
641                         }
642                     }
643 
644                     // The actual indexing happens in indexParallel(). Here we merely collect the files
645                     // that need to be indexed and the files that should be removed.
646                     IndexDownArgs args = indexDownArgsFactory.getIndexDownArgs();
647                     boolean usedHistory = getIndexDownArgs(dir, sourceRoot, args);
648 
649                     // Traverse the trailing terms. This needs to be done before indexParallel() because
650                     // in some cases it can add items to the args parameter.
651                     processTrailingTerms(startUid, usedHistory, args);
652 
653                     args.curCount = 0;
654                     Statistics elapsed = new Statistics();
655                     LOGGER.log(Level.INFO, "Starting indexing of directory {0}", dir);
656                     indexParallel(dir, args);
657                     elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir),
658                             "indexer.db.directory.index");
659 
660                     /*
661                      * As a signifier that #Lines/LOC are comprehensively
662                      * stored so that later calculation is in deltas mode, we
663                      * need at least one D-document saved. For a repo with only
664                      * non-code files, however, no true #Lines/LOC will have
665                      * been saved. Subsequent re-indexing will do more work
666                      * than necessary (until a source code file is placed). We
667                      * can record zeroes for a fake file under the root to get
668                      * a D-document even for this special repo situation.
669                      *
670                      * Metrics are aggregated for directories up to the root,
671                      * so it suffices to put the fake directly under the root.
672                      */
673                     if (!isWithDirectoryCounts) {
674                         final String ROOT_FAKE_FILE = "/.OpenGrok_fake_file";
675                         countsAggregator.register(new NumLinesLOC(ROOT_FAKE_FILE, 0, 0));
676                     }
677                     NumLinesLOCAccessor countsAccessor = new NumLinesLOCAccessor();
678                     countsAccessor.store(writer, reader, countsAggregator,
679                             isWithDirectoryCounts && isCountingDeltas);
680 
681                     markProjectIndexed(project);
682                 } finally {
683                     reader.close();
684                 }
685             }
686 
687             // The RuntimeException thrown from the block above can prevent the writing from completing.
688             // This is deliberate.
689             try {
690                 finishWriting();
691             } catch (IOException e) {
692                 finishingException = e;
693             }
694         } catch (RuntimeException ex) {
695             LOGGER.log(Level.SEVERE,
696                 "Failed with unexpected RuntimeException", ex);
697             throw ex;
698         } finally {
699             completer = null;
700             try {
701                 if (writer != null) {
702                     writer.close();
703                 }
704             } catch (IOException e) {
705                 if (finishingException == null) {
706                     finishingException = e;
707                 }
708                 LOGGER.log(Level.WARNING,
709                     "An error occurred while closing writer", e);
710             } finally {
711                 writer = null;
712                 synchronized (lock) {
713                     running = false;
714                 }
715             }
716         }
717 
718         if (finishingException != null) {
719             throw finishingException;
720         }
721 
722         if (!isInterrupted() && isDirty()) {
723             if (env.isOptimizeDatabase()) {
724                 optimize();
725             }
726             env.setIndexTimestamp();
727         }
728     }
729 
processTrailingTerms(String startUid, boolean usedHistory, IndexDownArgs args)730     private void processTrailingTerms(String startUid, boolean usedHistory, IndexDownArgs args) throws IOException {
731         while (uidIter != null && uidIter.term() != null
732                 && uidIter.term().utf8ToString().startsWith(startUid)) {
733 
734             if (usedHistory) {
735                 // Allow for forced reindex. For history based reindex the trailing terms
736                 // correspond to the files that have not changed. Such files might need to be re-indexed
737                 // if the index format changed.
738                 String termPath = Util.uid2url(uidIter.term().utf8ToString());
739                 File termFile = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), termPath);
740                 boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) &&
741                         checkSettings(termFile, termPath);
742                 if (!matchOK) {
743                     removeFile(false);
744 
745                     args.curCount++;
746                     args.works.add(new IndexFileWork(termFile, termPath));
747                 }
748             } else {
749                 // Remove data for the trailing terms that getIndexDownArgs()
750                 // did not traverse. These correspond to the files that have been
751                 // removed and have higher ordering than any present files.
752                 removeFile(true);
753             }
754 
755             BytesRef next = uidIter.next();
756             if (next == null) {
757                 uidIter = null;
758             }
759         }
760     }
761 
762     /**
763      * @param dir directory path
764      * @param sourceRoot source root File object
765      * @param args {@link IndexDownArgs} instance (output)
766      * @return true if history was used to gather the {@code IndexDownArgs}
767      * @throws IOException on error
768      */
769     @VisibleForTesting
getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args)770     boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws IOException {
771         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
772         boolean historyBased = isReadyForHistoryBasedReindex();
773 
774         if (LOGGER.isLoggable(Level.INFO)) {
775             LOGGER.log(Level.INFO, String.format("Starting file collection using %s traversal for directory '%s'",
776                     historyBased ? "history" : "file-system", dir));
777         }
778         Statistics elapsed = new Statistics();
779         if (historyBased) {
780             indexDownUsingHistory(env.getSourceRootFile(), args);
781         } else {
782             indexDown(sourceRoot, dir, args);
783         }
784 
785         elapsed.report(LOGGER, String.format("Done file collection for directory '%s'", dir),
786                 "indexer.db.collection");
787 
788         showFileCount(dir, args);
789 
790         return historyBased;
791     }
792 
793     /**
794      * Executes the first, serial stage of indexing, by going through set of files assembled from history.
795      * @param sourceRoot path to the source root (same as {@link RuntimeEnvironment#getSourceRootPath()})
796      * @param args {@link IndexDownArgs} instance where the resulting files to be indexed will be stored
797      * @throws IOException on error
798      */
799     @VisibleForTesting
indexDownUsingHistory(File sourceRoot, IndexDownArgs args)800     void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws IOException {
801 
802         FileCollector fileCollector = RuntimeEnvironment.getInstance().getFileCollector(project.getName());
803 
804         for (String path : fileCollector.getFiles()) {
805             File file = new File(sourceRoot, path);
806             processFileIncremental(args, file, path);
807         }
808     }
809 
810     /**
811      * Optimize all index databases.
812      *
813      * @throws IOException if an error occurs
814      */
optimizeAll()815     static CountDownLatch optimizeAll() throws IOException {
816         List<IndexDatabase> dbs = new ArrayList<>();
817         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
818         IndexerParallelizer parallelizer = env.getIndexerParallelizer();
819         if (env.hasProjects()) {
820             for (Project project : env.getProjectList()) {
821                 dbs.add(new IndexDatabase(project));
822             }
823         } else {
824             dbs.add(new IndexDatabase());
825         }
826 
827         CountDownLatch latch = new CountDownLatch(dbs.size());
828         for (IndexDatabase d : dbs) {
829             final IndexDatabase db = d;
830             if (db.isDirty()) {
831                 parallelizer.getFixedExecutor().submit(() -> {
832                     try {
833                         db.update();
834                     } catch (Throwable e) {
835                         LOGGER.log(Level.SEVERE,
836                             "Problem updating lucene index database: ", e);
837                     } finally {
838                         latch.countDown();
839                     }
840                 });
841             }
842         }
843         return latch;
844     }
845 
846     /**
847      * Optimize the index database.
848      * @throws IOException I/O exception
849      */
optimize()850     public void optimize() throws IOException {
851         synchronized (lock) {
852             if (running) {
853                 LOGGER.warning("Optimize terminated... Someone else is updating / optimizing it!");
854                 return;
855             }
856             running = true;
857         }
858 
859         IndexWriter wrt = null;
860         IOException writerException = null;
861         try {
862             Statistics elapsed = new Statistics();
863             String projectDetail = this.project != null ? " for project " + project.getName() : "";
864             LOGGER.log(Level.INFO, "Optimizing the index{0}", projectDetail);
865             Analyzer analyzer = new StandardAnalyzer();
866             IndexWriterConfig conf = new IndexWriterConfig(analyzer);
867             conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
868 
869             wrt = new IndexWriter(indexDirectory, conf);
870             wrt.forceMerge(1); // this is deprecated and not needed anymore
871             elapsed.report(LOGGER, String.format("Done optimizing index%s", projectDetail),
872                     "indexer.db.optimize");
873             synchronized (lock) {
874                 if (dirtyFile.exists() && !dirtyFile.delete()) {
875                     LOGGER.log(Level.FINE, "Failed to remove \"dirty-file\": {0}",
876                         dirtyFile.getAbsolutePath());
877                 }
878                 dirty = false;
879             }
880         } catch (IOException e) {
881             writerException = e;
882             LOGGER.log(Level.SEVERE, "ERROR: optimizing index", e);
883         } finally {
884             if (wrt != null) {
885                 try {
886                     wrt.close();
887                 } catch (IOException e) {
888                     if (writerException == null) {
889                         writerException = e;
890                     }
891                     LOGGER.log(Level.WARNING,
892                         "An error occurred while closing writer", e);
893                 }
894             }
895             synchronized (lock) {
896                 running = false;
897             }
898         }
899 
900         if (writerException != null) {
901             throw writerException;
902         }
903     }
904 
isDirty()905     private boolean isDirty() {
906         synchronized (lock) {
907             return dirty;
908         }
909     }
910 
setDirty()911     private void setDirty() {
912         synchronized (lock) {
913             try {
914                 if (!dirty) {
915                     if (!dirtyFile.createNewFile() && !dirtyFile.exists()) {
916                         LOGGER.log(Level.FINE,
917                                 "Failed to create \"dirty-file\": {0}",
918                                 dirtyFile.getAbsolutePath());
919                     }
920                     dirty = true;
921                 }
922             } catch (IOException e) {
923                 LOGGER.log(Level.FINE, "When creating dirty file: ", e);
924             }
925         }
926     }
927 
whatXrefFile(String path, boolean compress)928     private File whatXrefFile(String path, boolean compress) {
929         String xrefPath = compress ? TandemPath.join(path, ".gz") : path;
930         return new File(xrefDir, xrefPath);
931     }
932 
933     /**
934      * Queue the removal of xref file for given path.
935      * @param path path to file under source root
936      */
removeXrefFile(String path)937     private void removeXrefFile(String path) {
938         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
939         File xrefFile = whatXrefFile(path, env.isCompressXref());
940         PendingFileDeletion pending = new PendingFileDeletion(xrefFile.getAbsolutePath());
941         completer.add(pending);
942     }
943 
removeHistoryFile(String path)944     private void removeHistoryFile(String path) {
945         HistoryGuru.getInstance().clearCacheFile(path);
946     }
947 
948     /**
949      * Remove a stale file from the index database and potentially also from history cache,
950      * and queue the removal of the associated xref file.
951      *
952      * @param removeHistory if false, do not remove history cache for this file
953      * @throws java.io.IOException if an error occurs
954      */
removeFile(boolean removeHistory)955     private void removeFile(boolean removeHistory) throws IOException {
956         String path = Util.uid2url(uidIter.term().utf8ToString());
957 
958         for (IndexChangedListener listener : listeners) {
959             listener.fileRemove(path);
960         }
961 
962         removeFileDocUid(path);
963 
964         removeXrefFile(path);
965 
966         if (removeHistory) {
967             removeHistoryFile(path);
968         }
969 
970         setDirty();
971 
972         for (IndexChangedListener listener : listeners) {
973             listener.fileRemoved(path);
974         }
975     }
976 
removeFileDocUid(String path)977     private void removeFileDocUid(String path) throws IOException {
978 
979         // Determine if a reversal of counts is necessary, and execute if so.
980         if (isCountingDeltas) {
981             postsIter = uidIter.postings(postsIter);
982             while (postsIter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
983                 // Read a limited-fields version of the document.
984                 Document doc = reader.document(postsIter.docID(), REVERT_COUNTS_FIELDS);
985                 if (doc != null) {
986                     decrementLOCforDoc(path, doc);
987                     break;
988                 }
989             }
990         }
991 
992         writer.deleteDocuments(new Term(QueryBuilder.U, uidIter.term()));
993     }
994 
decrementLOCforDoc(String path, Document doc)995     private void decrementLOCforDoc(String path, Document doc) {
996         NullableNumLinesLOC nullableCounts = NumLinesLOCUtil.read(doc);
997         if (nullableCounts.getNumLines() != null && nullableCounts.getLOC() != null) {
998             NumLinesLOC counts = new NumLinesLOC(path,
999                     -nullableCounts.getNumLines(),
1000                     -nullableCounts.getLOC());
1001             countsAggregator.register(counts);
1002         }
1003     }
1004 
1005     /**
1006      * Add a file to the Lucene index (and generate a xref file).
1007      *
1008      * @param file The file to add
1009      * @param path The path to the file (from source root)
1010      * @param ctags a defined instance to use (only if its binary is not null)
1011      * @throws java.io.IOException if an error occurs
1012      * @throws InterruptedException if a timeout occurs
1013      */
addFile(File file, String path, Ctags ctags)1014     private void addFile(File file, String path, Ctags ctags) throws IOException, InterruptedException {
1015 
1016         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1017         AbstractAnalyzer fa = getAnalyzerFor(file, path);
1018 
1019         for (IndexChangedListener listener : listeners) {
1020             listener.fileAdd(path, fa.getClass().getSimpleName());
1021         }
1022 
1023         ctags.setTabSize(project != null ? project.getTabSize() : 0);
1024         if (env.getCtagsTimeout() != 0) {
1025             ctags.setTimeout(env.getCtagsTimeout());
1026         }
1027         fa.setCtags(ctags);
1028         fa.setCountsAggregator(countsAggregator);
1029         fa.setProject(Project.getProject(path));
1030         fa.setScopesEnabled(env.isScopesEnabled());
1031         fa.setFoldingEnabled(env.isFoldingEnabled());
1032 
1033         Document doc = new Document();
1034         CountingWriter xrefOut = null;
1035         try {
1036             String xrefAbs = null;
1037             File transientXref = null;
1038             if (env.isGenerateHtml()) {
1039                 xrefAbs = getXrefPath(path);
1040                 transientXref = new File(TandemPath.join(xrefAbs,
1041                         PendingFileCompleter.PENDING_EXTENSION));
1042                 xrefOut = newXrefWriter(path, transientXref, env.isCompressXref());
1043             }
1044 
1045             analyzerGuru.populateDocument(doc, file, path, fa, xrefOut);
1046 
1047             // Avoid producing empty xref files.
1048             if (xrefOut != null && xrefOut.getCount() > 0) {
1049                 PendingFileRenaming ren = new PendingFileRenaming(xrefAbs,
1050                         transientXref.getAbsolutePath());
1051                 completer.add(ren);
1052             } else if (xrefOut != null) {
1053                 LOGGER.log(Level.FINER, "xref for {0} would be empty, will remove", path);
1054                 completer.add(new PendingFileDeletion(transientXref.toString()));
1055             }
1056         } catch (InterruptedException e) {
1057             LOGGER.log(Level.WARNING, "File ''{0}'' interrupted--{1}",
1058                 new Object[]{path, e.getMessage()});
1059             cleanupResources(doc);
1060             throw e;
1061         } catch (Exception e) {
1062             LOGGER.log(Level.INFO,
1063                     "Skipped file ''{0}'' because the analyzer didn''t "
1064                     + "understand it.",
1065                     path);
1066             if (LOGGER.isLoggable(Level.FINE)) {
1067                 LOGGER.log(Level.FINE, "Exception from analyzer " +
1068                     fa.getClass().getName(), e);
1069             }
1070             cleanupResources(doc);
1071             return;
1072         } finally {
1073             fa.setCtags(null);
1074             fa.setCountsAggregator(null);
1075             if (xrefOut != null) {
1076                 xrefOut.close();
1077             }
1078         }
1079 
1080         try {
1081             writer.addDocument(doc);
1082         } catch (Throwable t) {
1083             cleanupResources(doc);
1084             throw t;
1085         }
1086 
1087         setDirty();
1088 
1089         for (IndexChangedListener listener : listeners) {
1090             listener.fileAdded(path, fa.getClass().getSimpleName());
1091         }
1092     }
1093 
getAnalyzerFor(File file, String path)1094     private AbstractAnalyzer getAnalyzerFor(File file, String path)
1095             throws IOException {
1096         try (InputStream in = new BufferedInputStream(
1097                 new FileInputStream(file))) {
1098             return AnalyzerGuru.getAnalyzer(in, path);
1099         }
1100     }
1101 
1102     /**
1103      * Do a best effort to clean up all resources allocated when populating
1104      * a Lucene document. On normal execution, these resources should be
1105      * closed automatically by the index writer once it's done with them, but
1106      * we may not get that far if something fails.
1107      *
1108      * @param doc the document whose resources to clean up
1109      */
cleanupResources(Document doc)1110     private static void cleanupResources(Document doc) {
1111         for (IndexableField f : doc) {
1112             // If the field takes input from a reader, close the reader.
1113             IOUtils.close(f.readerValue());
1114 
1115             // If the field takes input from a token stream, close the
1116             // token stream.
1117             if (f instanceof Field) {
1118                 IOUtils.close(((Field) f).tokenStreamValue());
1119             }
1120         }
1121     }
1122 
1123     /**
1124      * Check if I should accept this file into the index database.
1125      *
1126      * @param file the file to check
1127      * @param ret defined instance whose {@code localRelPath} property will be
1128      * non-null afterward if and only if {@code file} is a symlink that targets
1129      * either a {@link Repository}-local filesystem object or the same object
1130      * as a previously-detected and allowed symlink. N.b. method will return
1131      * {@code false} if {@code ret.localRelPath} is set non-null.
1132      * @return a value indicating if {@code file} should be included in index
1133      */
accept(File file, AcceptSymlinkRet ret)1134     private boolean accept(File file, AcceptSymlinkRet ret) {
1135         ret.localRelPath = null;
1136         String absolutePath = file.getAbsolutePath();
1137 
1138         if (!pathAccepter.accept(file)) {
1139             return false;
1140         }
1141 
1142         if (!file.canRead()) {
1143             LOGGER.log(Level.WARNING, "Could not read {0}", absolutePath);
1144             return false;
1145         }
1146 
1147         try {
1148             Path absolute = Paths.get(absolutePath);
1149             if (Files.isSymbolicLink(absolute)) {
1150                 File canonical = file.getCanonicalFile();
1151                 if (!absolutePath.equals(canonical.getPath()) &&
1152                         !acceptSymlink(absolute, canonical, ret)) {
1153                     if (ret.localRelPath == null) {
1154                         LOGGER.log(Level.FINE, "Skipped symlink ''{0}'' -> ''{1}''",
1155                                 new Object[] {absolutePath, canonical});
1156                     }
1157                     return false;
1158                 }
1159             }
1160             //below will only let go files and directories, anything else is considered special and is not added
1161             if (!file.isFile() && !file.isDirectory()) {
1162                 LOGGER.log(Level.WARNING, "Ignored special file {0}",
1163                     absolutePath);
1164                 return false;
1165             }
1166         } catch (IOException exp) {
1167             LOGGER.log(Level.WARNING, "Failed to resolve name: {0}",
1168                 absolutePath);
1169             LOGGER.log(Level.FINE, "Stack Trace: ", exp);
1170         }
1171 
1172         if (file.isDirectory()) {
1173             // always accept directories so that their files can be examined
1174             return true;
1175         }
1176 
1177 
1178         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1179         // Lookup history if indexing versioned files only.
1180         // Skip the lookup entirely (which is expensive) if unversioned files are allowed
1181         if (env.isIndexVersionedFilesOnly()) {
1182             if (HistoryGuru.getInstance().hasHistory(file)) {
1183                 // versioned files should always be accepted
1184                 return true;
1185             }
1186             LOGGER.log(Level.FINER, "not accepting unversioned {0}", absolutePath);
1187             return false;
1188         }
1189         // unversioned files are allowed
1190         return true;
1191     }
1192 
1193     /**
1194      * Determines if {@code file} should be accepted into the index database.
1195      * @param parent parent of {@code file}
1196      * @param file directory object under consideration
1197      * @param ret defined instance whose {@code localRelPath} property will be
1198      * non-null afterward if and only if {@code file} is a symlink that targets
1199      * either a {@link Repository}-local filesystem object or the same object
1200      * as a previously-detected and allowed symlink. N.b. method will return
1201      * {@code false} if {@code ret.localRelPath} is set non-null.
1202      * @return a value indicating if {@code file} should be included in index
1203      */
accept(File parent, File file, AcceptSymlinkRet ret)1204     private boolean accept(File parent, File file, AcceptSymlinkRet ret) {
1205         ret.localRelPath = null;
1206 
1207         try {
1208             File f1 = parent.getCanonicalFile();
1209             File f2 = file.getCanonicalFile();
1210             if (f1.equals(f2)) {
1211                 LOGGER.log(Level.INFO, "Skipping links to itself...: {0} {1}",
1212                         new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()});
1213                 return false;
1214             }
1215 
1216             // Now, let's verify that it's not a link back up the chain...
1217             File t1 = f1;
1218             while ((t1 = t1.getParentFile()) != null) {
1219                 if (f2.equals(t1)) {
1220                     LOGGER.log(Level.INFO, "Skipping links to parent...: {0} {1}",
1221                             new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()});
1222                     return false;
1223                 }
1224             }
1225 
1226             return accept(file, ret);
1227         } catch (IOException ex) {
1228             LOGGER.log(Level.WARNING, "Failed to resolve name: {0} {1}",
1229                     new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()});
1230         }
1231         return false;
1232     }
1233 
1234     /**
1235      * Check if I should accept the path containing a symlink.
1236      *
1237      * @param absolute the path with a symlink to check
1238      * @param canonical the canonical file object
1239      * @param ret defined instance whose {@code localRelPath} property will be
1240      * non-null afterward if and only if {@code absolute} is a symlink that
1241      * targets either a {@link Repository}-local filesystem object or the same
1242      * object ({@code canonical}) as a previously-detected and allowed symlink.
1243      * N.b. method will return {@code false} if {@code ret.localRelPath} is set
1244      * non-null.
1245      * @return a value indicating if {@code file} should be included in index
1246      */
acceptSymlink(Path absolute, File canonical, AcceptSymlinkRet ret)1247     private boolean acceptSymlink(Path absolute, File canonical, AcceptSymlinkRet ret) {
1248         ret.localRelPath = null;
1249 
1250         String absolute1 = absolute.toString();
1251         String canonical1 = canonical.getPath();
1252         boolean isCanonicalDir = canonical.isDirectory();
1253         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1254         IndexedSymlink indexed1;
1255         String absolute0;
1256 
1257         if (isLocal(canonical1)) {
1258             if (!isCanonicalDir) {
1259                 if (LOGGER.isLoggable(Level.FINEST)) {
1260                     LOGGER.log(Level.FINEST, "Local {0} has symlink from {1}",
1261                             new Object[] {canonical1, absolute1});
1262                 }
1263                 /*
1264                  * Always index symlinks to local files, but do not add to
1265                  * indexedSymlinks for a non-directory.
1266                  */
1267                 return true;
1268             }
1269 
1270             /*
1271              * Do not index symlinks to local directories, because the
1272              * canonical target will be indexed on its own -- but relativize()
1273              * a path to be returned in ret so that a symlink can be replicated
1274              * in xref/.
1275              */
1276             ret.localRelPath = absolute.getParent().relativize(
1277                     canonical.toPath()).toString();
1278 
1279             // Try to put the prime absolute path into indexedSymlinks.
1280             try {
1281                 String primeRelative = env.getPathRelativeToSourceRoot(canonical);
1282                 absolute0 = env.getSourceRootPath() + primeRelative;
1283             } catch (ForbiddenSymlinkException | IOException e) {
1284                 /*
1285                  * This is not expected, as indexDown() would have operated on
1286                  * the file already -- but we are forced to handle.
1287                  */
1288                 LOGGER.log(Level.WARNING, String.format(
1289                         "Unexpected error getting relative for %s", canonical), e);
1290                 absolute0 = absolute1;
1291             }
1292             indexed1 = new IndexedSymlink(absolute0, canonical1, true);
1293             indexedSymlinks.put(canonical1, indexed1);
1294             return false;
1295         }
1296 
1297         IndexedSymlink indexed0;
1298         if ((indexed0 = indexedSymlinks.get(canonical1)) != null) {
1299             if (absolute1.equals(indexed0.getAbsolute())) {
1300                 return true;
1301             }
1302 
1303             /*
1304              * Do not index symlinks to external directories already indexed
1305              * as linked elsewhere, because the canonical target will be
1306              * indexed already -- but relativize() a path to be returned in ret
1307              * so that this second symlink can be redone as a local
1308              * (non-external) symlink in xref/.
1309              */
1310             ret.localRelPath = absolute.getParent().relativize(
1311                     Paths.get(indexed0.getAbsolute())).toString();
1312 
1313             if (LOGGER.isLoggable(Level.FINEST)) {
1314                 LOGGER.log(Level.FINEST, "External dir {0} has symlink from {1} after first {2}",
1315                         new Object[] {canonical1, absolute1, indexed0.getAbsolute()});
1316             }
1317             return false;
1318         }
1319 
1320         /*
1321          * Iterate through indexedSymlinks, which is sorted so that shorter
1322          * canonical entries come first, to see if the new link is a child
1323          * canonically.
1324          */
1325         for (IndexedSymlink a0 : indexedSymlinks.values()) {
1326             indexed0 = a0;
1327             if (!indexed0.isLocal() && canonical1.startsWith(indexed0.getCanonicalSeparated())) {
1328                 absolute0 = indexed0.getAbsolute();
1329                 if (!isCanonicalDir) {
1330                     if (LOGGER.isLoggable(Level.FINEST)) {
1331                         LOGGER.log(Level.FINEST,
1332                                 "External file {0} has symlink from {1} under previous {2}",
1333                                 new Object[] {canonical1, absolute1, absolute0});
1334                     }
1335                     // Do not add to indexedSymlinks for a non-directory.
1336                     return true;
1337                 }
1338 
1339                 /*
1340                  * See above about redoing a sourceRoot symlink as a local
1341                  * (non-external) symlink in xref/.
1342                  */
1343                 Path abs0 = Paths.get(absolute0, canonical1.substring(
1344                         indexed0.getCanonicalSeparated().length()));
1345                 ret.localRelPath = absolute.getParent().relativize(abs0).toString();
1346 
1347                 if (LOGGER.isLoggable(Level.FINEST)) {
1348                     LOGGER.log(Level.FINEST,
1349                             "External dir {0} has symlink from {1} under previous {2}",
1350                             new Object[] {canonical1, absolute1, absolute0});
1351                 }
1352                 return false;
1353             }
1354         }
1355 
1356         Set<String> canonicalRoots = env.getCanonicalRoots();
1357         for (String canonicalRoot : canonicalRoots) {
1358             if (canonical1.startsWith(canonicalRoot)) {
1359                 if (LOGGER.isLoggable(Level.FINEST)) {
1360                     LOGGER.log(Level.FINEST, "Allowed symlink {0} per canonical root {1}",
1361                             new Object[] {absolute1, canonical1});
1362                 }
1363                 if (isCanonicalDir) {
1364                     indexed1 = new IndexedSymlink(absolute1, canonical1, false);
1365                     indexedSymlinks.put(canonical1, indexed1);
1366                 }
1367                 return true;
1368             }
1369         }
1370 
1371         Set<String> allowedSymlinks = env.getAllowedSymlinks();
1372         for (String allowedSymlink : allowedSymlinks) {
1373             String allowedTarget;
1374             try {
1375                 allowedTarget = new File(allowedSymlink).getCanonicalPath();
1376             } catch (IOException e) {
1377                 LOGGER.log(Level.FINE, "unresolvable symlink: {0}", allowedSymlink);
1378                 continue;
1379             }
1380             /*
1381              * The following canonical check is sufficient because indexDown()
1382              * traverses top-down, and any intermediate symlinks would have
1383              * also been checked here for an allowed canonical match. This
1384              * technically means that if there is a set of redundant symlinks
1385              * with the same canonical target, then allowing one of the set
1386              * will allow all others in the set.
1387              */
1388             if (canonical1.equals(allowedTarget)) {
1389                 if (isCanonicalDir) {
1390                     indexed1 = new IndexedSymlink(absolute1, canonical1, false);
1391                     indexedSymlinks.put(canonical1, indexed1);
1392                 }
1393                 return true;
1394             }
1395         }
1396         return false;
1397     }
1398 
1399     /**
1400      * Check if a file is local to the current project. If we don't have
1401      * projects, check if the file is in the source root.
1402      *
1403      * @param path the path to a file
1404      * @return true if the file is local to the current repository
1405      */
isLocal(String path)1406     private boolean isLocal(String path) {
1407         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1408         String srcRoot = env.getSourceRootPath();
1409 
1410         if (path.startsWith(srcRoot + File.separator)) {
1411             if (env.hasProjects()) {
1412                 String relPath = path.substring(srcRoot.length());
1413                 // If file is under the current project, then it's local.
1414                 return project.equals(Project.getProject(relPath));
1415             } else {
1416                 // File is under source root, and we don't have projects, so
1417                 // consider it local.
1418                 return true;
1419             }
1420         }
1421 
1422         return false;
1423     }
1424 
handleSymlink(String path, AcceptSymlinkRet ret)1425     private void handleSymlink(String path, AcceptSymlinkRet ret) {
1426         /*
1427          * If ret.localRelPath is defined, then a symlink was detected but
1428          * not "accepted" to avoid redundancy with an already-accepted
1429          * canonical target. Set up for a deferred creation of a symlink
1430          * within xref/.
1431          */
1432         if (ret.localRelPath != null) {
1433             File xrefPath = new File(xrefDir, path);
1434             PendingSymlinkage psym = new PendingSymlinkage(xrefPath.getAbsolutePath(), ret.localRelPath);
1435             completer.add(psym);
1436         }
1437     }
1438 
1439     /**
1440      * Executes the first, serial stage of indexing, by recursively traversing the file system
1441      * and index alongside.
1442      * <p>Files at least are counted, and any deleted or updated files (based on
1443      * comparison to the Lucene index) are passed to
1444      * {@link #removeFile(boolean)}. New or updated files are noted for indexing.
1445      * @param dir the root indexDirectory to generate indexes for
1446      * @param parent path to parent directory
1447      * @param args arguments to control execution and for collecting a list of
1448      * files for indexing
1449      */
1450     @VisibleForTesting
indexDown(File dir, String parent, IndexDownArgs args)1451     void indexDown(File dir, String parent, IndexDownArgs args) throws IOException {
1452 
1453         if (isInterrupted()) {
1454             return;
1455         }
1456 
1457         AcceptSymlinkRet ret = new AcceptSymlinkRet();
1458         if (!accept(dir, ret)) {
1459             handleSymlink(parent, ret);
1460             return;
1461         }
1462 
1463         File[] files = dir.listFiles();
1464         if (files == null) {
1465             LOGGER.log(Level.SEVERE, "Failed to get file listing for: {0}",
1466                 dir.getPath());
1467             return;
1468         }
1469         Arrays.sort(files, FILENAME_COMPARATOR);
1470 
1471         for (File file : files) {
1472             String path = parent + File.separator + file.getName();
1473             if (!accept(dir, file, ret)) {
1474                 handleSymlink(path, ret);
1475             } else {
1476                 if (file.isDirectory()) {
1477                     indexDown(file, path, args);
1478                 } else {
1479                     processFile(args, file, path);
1480                 }
1481             }
1482         }
1483     }
1484 
1485     /**
1486      * Compared with {@link #processFile(IndexDownArgs, File, String)}, this method's file/path arguments
1487      * represent files that have actually changed in some way, while the other method's argument represent
1488      * files present on disk.
1489      * @param args {@link IndexDownArgs} instance
1490      * @param file File object
1491      * @param path path of the file argument relative to source root (with leading slash)
1492      * @throws IOException on error
1493      */
processFileIncremental(IndexDownArgs args, File file, String path)1494     private void processFileIncremental(IndexDownArgs args, File file, String path) throws IOException {
1495         if (uidIter != null) {
1496             path = Util.fixPathIfWindows(path);
1497             // Traverse terms until reaching one that matches the path of given file.
1498             while (uidIter != null && uidIter.term() != null
1499                     && uidIter.term().compareTo(emptyBR) != 0
1500                     && Util.uid2url(uidIter.term().utf8ToString()).compareTo(path) < 0) {
1501 
1502                 // A file that was not changed.
1503                 /*
1504                  * Possibly short-circuit to force reindexing of prior-version indexes.
1505                  */
1506                 String termPath = Util.uid2url(uidIter.term().utf8ToString());
1507                 File termFile = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), termPath);
1508                 boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) &&
1509                         checkSettings(termFile, termPath);
1510                 if (!matchOK) {
1511                     removeFile(false);
1512 
1513                     args.curCount++;
1514                     args.works.add(new IndexFileWork(termFile, termPath));
1515                 }
1516 
1517                 BytesRef next = uidIter.next();
1518                 if (next == null) {
1519                     uidIter = null;
1520                 }
1521             }
1522 
1523             if (uidIter != null && uidIter.term() != null
1524                     && Util.uid2url(uidIter.term().utf8ToString()).equals(path)) {
1525                 /*
1526                  * At this point we know that the file has corresponding term in the index
1527                  * and has changed in some way. Either it was deleted or it was changed.
1528                  */
1529                 if (!file.exists()) {
1530                     removeFile(true);
1531                 } else {
1532                     removeFile(false);
1533 
1534                     args.curCount++;
1535                     args.works.add(new IndexFileWork(file, path));
1536                 }
1537 
1538                 BytesRef next = uidIter.next();
1539                 if (next == null) {
1540                     uidIter = null;
1541                 }
1542             } else {
1543                 // Potentially new file. A file might be added and then deleted,
1544                 // so it is necessary to check its existence.
1545                 if (file.exists()) {
1546                     args.curCount++;
1547                     args.works.add(new IndexFileWork(file, path));
1548                 }
1549             }
1550         } else {
1551             if (file.exists()) {
1552                 args.curCount++;
1553                 args.works.add(new IndexFileWork(file, path));
1554             }
1555         }
1556     }
1557 
1558     /**
1559      * Process a file on disk w.r.t. index.
1560      * @param args {@link IndexDownArgs} instance
1561      * @param file File object
1562      * @param path path corresponding to the file parameter, relative to source root (with leading slash)
1563      * @throws IOException on error
1564      */
processFile(IndexDownArgs args, File file, String path)1565     private void processFile(IndexDownArgs args, File file, String path) throws IOException {
1566         if (uidIter != null) {
1567             path = Util.fixPathIfWindows(path);
1568             String uid = Util.path2uid(path,
1569                 DateTools.timeToString(file.lastModified(),
1570                 DateTools.Resolution.MILLISECOND)); // construct uid for doc
1571             BytesRef buid = new BytesRef(uid);
1572             // Traverse terms that have smaller UID than the current file,
1573             // i.e. given the ordering they positioned before the file,
1574             // or it is the file that has been modified.
1575             while (uidIter != null && uidIter.term() != null
1576                     && uidIter.term().compareTo(emptyBR) != 0
1577                     && uidIter.term().compareTo(buid) < 0) {
1578 
1579                 // If the term's path matches path of currently processed file,
1580                 // it is clear that the file has been modified and thus
1581                 // removeFile() will be followed by call to addFile() in indexParallel().
1582                 // In such case, instruct removeFile() not to remove history
1583                 // cache for the file so that incremental history cache
1584                 // generation works.
1585                 String termPath = Util.uid2url(uidIter.term().utf8ToString());
1586                 removeFile(!termPath.equals(path));
1587 
1588                 BytesRef next = uidIter.next();
1589                 if (next == null) {
1590                     uidIter = null;
1591                 }
1592             }
1593 
1594             // If the file was not modified, probably skip to the next one.
1595             if (uidIter != null && uidIter.term() != null && uidIter.term().bytesEquals(buid)) {
1596 
1597                 /*
1598                  * Possibly short-circuit to force reindexing of prior-version indexes.
1599                  */
1600                 boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) &&
1601                         checkSettings(file, path);
1602                 if (!matchOK) {
1603                     removeFile(false);
1604                 }
1605 
1606                 BytesRef next = uidIter.next();
1607                 if (next == null) {
1608                     uidIter = null;
1609                 }
1610 
1611                 if (matchOK) {
1612                     return;
1613                 }
1614             }
1615         }
1616 
1617         args.curCount++;
1618         args.works.add(new IndexFileWork(file, path));
1619     }
1620 
1621     /**
1622      * Executes the second, parallel stage of indexing.
1623      * @param dir the parent directory (when appended to SOURCE_ROOT)
1624      * @param args contains a list of files to index, found during the earlier stage
1625      */
indexParallel(String dir, IndexDownArgs args)1626     private void indexParallel(String dir, IndexDownArgs args) {
1627 
1628         int worksCount = args.works.size();
1629         if (worksCount < 1) {
1630             return;
1631         }
1632 
1633         AtomicInteger successCounter = new AtomicInteger();
1634         AtomicInteger currentCounter = new AtomicInteger();
1635         AtomicInteger alreadyClosedCounter = new AtomicInteger();
1636         IndexerParallelizer parallelizer = RuntimeEnvironment.getInstance().getIndexerParallelizer();
1637         ObjectPool<Ctags> ctagsPool = parallelizer.getCtagsPool();
1638 
1639         Map<Boolean, List<IndexFileWork>> bySuccess = null;
1640         try (Progress progress = new Progress(LOGGER, dir, worksCount)) {
1641             bySuccess = parallelizer.getForkJoinPool().submit(() ->
1642                 args.works.parallelStream().collect(
1643                 Collectors.groupingByConcurrent((x) -> {
1644                     int tries = 0;
1645                     Ctags pctags = null;
1646                     boolean ret;
1647                     Statistics stats = new Statistics();
1648                     while (true) {
1649                         try {
1650                             if (alreadyClosedCounter.get() > 0) {
1651                                 ret = false;
1652                             } else {
1653                                 pctags = ctagsPool.get();
1654                                 addFile(x.file, x.path, pctags);
1655                                 successCounter.incrementAndGet();
1656                                 ret = true;
1657                             }
1658                         } catch (AlreadyClosedException e) {
1659                             alreadyClosedCounter.incrementAndGet();
1660                             String errmsg = String.format("ERROR addFile(): %s", x.file);
1661                             LOGGER.log(Level.SEVERE, errmsg, e);
1662                             x.exception = e;
1663                             ret = false;
1664                         } catch (InterruptedException e) {
1665                             // Allow one retry if interrupted
1666                             if (++tries <= 1) {
1667                                 continue;
1668                             }
1669                             LOGGER.log(Level.WARNING, "No retry: {0}", x.file);
1670                             x.exception = e;
1671                             ret = false;
1672                         } catch (RuntimeException | IOException e) {
1673                             String errmsg = String.format("ERROR addFile(): %s", x.file);
1674                             LOGGER.log(Level.WARNING, errmsg, e);
1675                             x.exception = e;
1676                             ret = false;
1677                         } finally {
1678                             if (pctags != null) {
1679                                 pctags.reset();
1680                                 ctagsPool.release(pctags);
1681                             }
1682                         }
1683 
1684                         progress.increment();
1685                         stats.report(LOGGER, Level.FINEST,
1686                                 String.format("file ''%s'' %s", x.file, ret ? "indexed" : "failed indexing"));
1687                         return ret;
1688                     }
1689                 }))).get();
1690         } catch (InterruptedException | ExecutionException e) {
1691             int successCount = successCounter.intValue();
1692             double successPct = 100.0 * successCount / worksCount;
1693             String exmsg = String.format("%d successes (%.1f%%) after aborting parallel-indexing",
1694                 successCount, successPct);
1695             LOGGER.log(Level.SEVERE, exmsg, e);
1696         }
1697 
1698         args.curCount = currentCounter.intValue();
1699 
1700         // Start with failureCount=worksCount, and then subtract successes.
1701         int failureCount = worksCount;
1702         if (bySuccess != null) {
1703             List<IndexFileWork> successes = bySuccess.getOrDefault(Boolean.TRUE, null);
1704             if (successes != null) {
1705                 failureCount -= successes.size();
1706             }
1707         }
1708         if (failureCount > 0) {
1709             double pctFailed = 100.0 * failureCount / worksCount;
1710             String exmsg = String.format("%d failures (%.1f%%) while parallel-indexing", failureCount, pctFailed);
1711             LOGGER.log(Level.WARNING, exmsg);
1712         }
1713 
1714         /*
1715          * Encountering an AlreadyClosedException is severe enough to abort the
1716          * run, since it will fail anyway later upon trying to commit().
1717          */
1718         int numAlreadyClosed = alreadyClosedCounter.get();
1719         if (numAlreadyClosed > 0) {
1720             throw new AlreadyClosedException(String.format("count=%d", numAlreadyClosed));
1721         }
1722     }
1723 
isInterrupted()1724     private boolean isInterrupted() {
1725         synchronized (lock) {
1726             return interrupted;
1727         }
1728     }
1729 
1730     /**
1731      * Register an object to receive events when modifications is done to the
1732      * index database.
1733      *
1734      * @param listener the object to receive the events
1735      */
addIndexChangedListener(IndexChangedListener listener)1736     public void addIndexChangedListener(IndexChangedListener listener) {
1737         if (listener != null) {
1738             listeners.add(listener);
1739         }
1740     }
1741 
1742     /**
1743      * Get all files in some of the index databases.
1744      *
1745      * @param subFiles Subdirectories of various projects or null or an empty list to get everything
1746      * @throws IOException if an error occurs
1747      * @return set of files in the index databases specified by the subFiles parameter
1748      */
getAllFiles(List<String> subFiles)1749     public static Set<String> getAllFiles(List<String> subFiles) throws IOException {
1750         Set<String> files = new HashSet<>();
1751         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1752 
1753         if (env.hasProjects()) {
1754             if (subFiles == null || subFiles.isEmpty()) {
1755                 for (Project project : env.getProjectList()) {
1756                     IndexDatabase db = new IndexDatabase(project);
1757                     files.addAll(db.getFiles());
1758                 }
1759             } else {
1760                 for (String path : subFiles) {
1761                     Project project = Project.getProject(path);
1762                     if (project == null) {
1763                         LOGGER.log(Level.WARNING, "Could not find a project for \"{0}\"", path);
1764                     } else {
1765                         IndexDatabase db = new IndexDatabase(project);
1766                         files.addAll(db.getFiles());
1767                     }
1768                 }
1769             }
1770         } else {
1771             IndexDatabase db = new IndexDatabase();
1772             files = db.getFiles();
1773         }
1774 
1775         return files;
1776     }
1777 
1778     /**
1779      * Get all files in this index database.
1780      *
1781      * @throws IOException If an IO error occurs while reading from the database
1782      * @return set of files in this index database
1783      */
getFiles()1784     public Set<String> getFiles() throws IOException {
1785         IndexReader ireader = null;
1786         TermsEnum iter = null;
1787         Terms terms;
1788         Set<String> files = new HashSet<>();
1789 
1790         try {
1791             ireader = DirectoryReader.open(indexDirectory); // open existing index
1792             if (ireader.numDocs() > 0) {
1793                 terms = MultiTerms.getTerms(ireader, QueryBuilder.U);
1794                 iter = terms.iterator(); // init uid iterator
1795             }
1796             while (iter != null && iter.term() != null) {
1797                 String value = iter.term().utf8ToString();
1798                 if (value.isEmpty()) {
1799                     iter.next();
1800                     continue;
1801                 }
1802 
1803                 files.add(Util.uid2url(value));
1804                 BytesRef next = iter.next();
1805                 if (next == null) {
1806                     iter = null;
1807                 }
1808             }
1809         } finally {
1810             if (ireader != null) {
1811                 try {
1812                     ireader.close();
1813                 } catch (IOException e) {
1814                     LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e);
1815                 }
1816             }
1817         }
1818 
1819         return files;
1820     }
1821 
1822     /**
1823      * Get number of documents in this index database.
1824      * @return number of documents
1825      * @throws IOException if I/O exception occurred
1826      */
getNumFiles()1827     public int getNumFiles() throws IOException {
1828         IndexReader ireader = null;
1829         try {
1830             ireader = DirectoryReader.open(indexDirectory); // open existing index
1831             return ireader.numDocs();
1832         } finally {
1833             if (ireader != null) {
1834                 try {
1835                     ireader.close();
1836                 } catch (IOException e) {
1837                     LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e);
1838                 }
1839             }
1840         }
1841     }
1842 
listFrequentTokens(List<String> subFiles)1843     static void listFrequentTokens(List<String> subFiles) throws IOException {
1844         final int limit = 4;
1845 
1846         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1847         if (env.hasProjects()) {
1848             if (subFiles == null || subFiles.isEmpty()) {
1849                 for (Project project : env.getProjectList()) {
1850                     IndexDatabase db = new IndexDatabase(project);
1851                     db.listTokens(limit);
1852                 }
1853             } else {
1854                 for (String path : subFiles) {
1855                     Project project = Project.getProject(path);
1856                     if (project == null) {
1857                         LOGGER.log(Level.WARNING, "Could not find a project for \"{0}\"", path);
1858                     } else {
1859                         IndexDatabase db = new IndexDatabase(project);
1860                         db.listTokens(limit);
1861                     }
1862                 }
1863             }
1864         } else {
1865             IndexDatabase db = new IndexDatabase();
1866             db.listTokens(limit);
1867         }
1868     }
1869 
listTokens(int freq)1870     public void listTokens(int freq) throws IOException {
1871         IndexReader ireader = null;
1872         TermsEnum iter = null;
1873         Terms terms;
1874 
1875         try {
1876             ireader = DirectoryReader.open(indexDirectory);
1877             if (ireader.numDocs() > 0) {
1878                 terms = MultiTerms.getTerms(ireader, QueryBuilder.DEFS);
1879                 iter = terms.iterator(); // init uid iterator
1880             }
1881             while (iter != null && iter.term() != null) {
1882                 if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) {
1883                     LOGGER.warning(iter.term().utf8ToString());
1884                 }
1885                 BytesRef next = iter.next();
1886                 if (next == null) {
1887                     iter = null;
1888                 }
1889             }
1890         } finally {
1891 
1892             if (ireader != null) {
1893                 try {
1894                     ireader.close();
1895                 } catch (IOException e) {
1896                     LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e);
1897                 }
1898             }
1899         }
1900     }
1901 
1902     /**
1903      * Get an indexReader for the Index database where a given file.
1904      *
1905      * @param path the file to get the database for
1906      * @return The index database where the file should be located or null if it
1907      * cannot be located.
1908      */
1909     @SuppressWarnings("java:S2095")
getIndexReader(String path)1910     public static IndexReader getIndexReader(String path) {
1911         IndexReader ret = null;
1912 
1913         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1914         File indexDir = new File(env.getDataRootFile(), INDEX_DIR);
1915 
1916         if (env.hasProjects()) {
1917             Project p = Project.getProject(path);
1918             if (p == null) {
1919                 return null;
1920             }
1921             indexDir = new File(indexDir, p.getPath());
1922         }
1923         try {
1924             FSDirectory fdir = FSDirectory.open(indexDir.toPath(), NoLockFactory.INSTANCE);
1925             if (indexDir.exists() && DirectoryReader.indexExists(fdir)) {
1926                 ret = DirectoryReader.open(fdir);
1927             }
1928         } catch (Exception ex) {
1929             LOGGER.log(Level.SEVERE, "Failed to open index: {0}", indexDir.getAbsolutePath());
1930             LOGGER.log(Level.FINE, "Stack Trace: ", ex);
1931         }
1932         return ret;
1933     }
1934 
1935     /**
1936      * Get the latest definitions for a file from the index.
1937      *
1938      * @param file the file whose definitions to find
1939      * @return definitions for the file, or {@code null} if they could not be
1940      * found
1941      * @throws IOException if an error happens when accessing the index
1942      * @throws ParseException if an error happens when building the Lucene query
1943      * @throws ClassNotFoundException if the class for the stored definitions
1944      * instance cannot be found
1945      */
getDefinitions(File file)1946     public static Definitions getDefinitions(File file) throws ParseException, IOException, ClassNotFoundException {
1947         Document doc = getDocument(file);
1948         if (doc == null) {
1949             return null;
1950         }
1951 
1952         IndexableField tags = doc.getField(QueryBuilder.TAGS);
1953         if (tags != null) {
1954             return Definitions.deserialize(tags.binaryValue().bytes);
1955         }
1956 
1957         // Didn't find any definitions.
1958         return null;
1959     }
1960 
1961     /**
1962      * @param file File object for a file under source root
1963      * @return Document object for the file or {@code null}
1964      * @throws IOException on I/O error
1965      * @throws ParseException on problem with building Query
1966      */
getDocument(File file)1967     public static Document getDocument(File file) throws IOException, ParseException {
1968         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1969         String path;
1970         try {
1971             path = env.getPathRelativeToSourceRoot(file);
1972         } catch (ForbiddenSymlinkException e) {
1973             LOGGER.log(Level.FINER, e.getMessage());
1974             return null;
1975         }
1976         // Sanitize Windows path delimiters in order not to conflict with Lucene escape character.
1977         path = path.replace("\\", "/");
1978 
1979         try (IndexReader indexReader = getIndexReader(path)) {
1980             return getDocument(path, indexReader);
1981         }
1982     }
1983 
1984     @Nullable
getDocument(String path, IndexReader indexReader)1985     private static Document getDocument(String path, IndexReader indexReader) throws ParseException, IOException {
1986         if (indexReader == null) {
1987             // No index, no document..
1988             return null;
1989         }
1990 
1991         Document doc;
1992         Query q = new QueryBuilder().setPath(path).build();
1993         IndexSearcher searcher = new IndexSearcher(indexReader);
1994         Statistics stat = new Statistics();
1995         TopDocs top = searcher.search(q, 1);
1996         stat.report(LOGGER, Level.FINEST, "search via getDocument() done",
1997                 "search.latency", new String[]{"category", "getdocument",
1998                         "outcome", top.totalHits.value == 0 ? "empty" : "success"});
1999         if (top.totalHits.value == 0) {
2000             // No hits, no document...
2001             return null;
2002         }
2003         doc = searcher.doc(top.scoreDocs[0].doc);
2004         String foundPath = doc.get(QueryBuilder.PATH);
2005 
2006         // Only use the document if we found an exact match.
2007         if (!path.equals(foundPath)) {
2008             return null;
2009         }
2010 
2011         return doc;
2012     }
2013 
2014     @Override
equals(Object o)2015     public boolean equals(Object o) {
2016         if (this == o) {
2017             return true;
2018         }
2019         if (o == null || getClass() != o.getClass()) {
2020             return false;
2021         }
2022         IndexDatabase that = (IndexDatabase) o;
2023         return Objects.equals(project, that.project);
2024     }
2025 
2026     @Override
hashCode()2027     public int hashCode() {
2028         return Objects.hash(project);
2029     }
2030 
2031     private static class CountingWriter extends Writer {
2032         private long count;
2033         private final Writer out;
2034 
CountingWriter(Writer out)2035         CountingWriter(Writer out) {
2036             super(out);
2037             this.out = out;
2038         }
2039 
2040         @Override
write(@otNull char[] chars, int off, int len)2041         public void write(@NotNull char[] chars, int off, int len) throws IOException {
2042             out.write(chars, off, len);
2043             count += len;
2044         }
2045 
2046         @Override
flush()2047         public void flush() throws IOException {
2048             out.flush();
2049         }
2050 
2051         @Override
close()2052         public void close() throws IOException {
2053             out.close();
2054         }
2055 
getCount()2056         public long getCount() {
2057             return count;
2058         }
2059     }
2060 
getXrefPath(String path)2061     private String getXrefPath(String path) {
2062         boolean compressed = RuntimeEnvironment.getInstance().isCompressXref();
2063         File xrefFile = whatXrefFile(path, compressed);
2064         File parentFile = xrefFile.getParentFile();
2065 
2066         // If mkdirs() returns false, the failure is most likely
2067         // because the file already exists. But to check for the
2068         // file first and only add it if it doesn't exists would
2069         // only increase the file IO...
2070         if (!parentFile.mkdirs()) {
2071             assert parentFile.exists();
2072         }
2073 
2074         // Write to a pending file for later renaming.
2075         String xrefAbs = xrefFile.getAbsolutePath();
2076         return xrefAbs;
2077     }
2078 
2079     /**
2080      * Get a writer to which the xref can be written, or null if no xref
2081      * should be produced for files of this type.
2082      */
newXrefWriter(String path, File transientXref, boolean compressed)2083     private CountingWriter newXrefWriter(String path, File transientXref, boolean compressed) throws IOException {
2084         return new CountingWriter(new BufferedWriter(new OutputStreamWriter(compressed ?
2085                 new GZIPOutputStream(new FileOutputStream(transientXref)) :
2086                 new FileOutputStream(transientXref))));
2087     }
2088 
pickLockFactory(RuntimeEnvironment env)2089     LockFactory pickLockFactory(RuntimeEnvironment env) {
2090         switch (env.getLuceneLocking()) {
2091             case ON:
2092             case SIMPLE:
2093                 return SimpleFSLockFactory.INSTANCE;
2094             case NATIVE:
2095                 return NativeFSLockFactory.INSTANCE;
2096             case OFF:
2097             default:
2098                 return NoLockFactory.INSTANCE;
2099         }
2100     }
2101 
finishWriting()2102     private void finishWriting() throws IOException {
2103         boolean hasPendingCommit = false;
2104         try {
2105             writeAnalysisSettings();
2106 
2107             LOGGER.log(Level.FINE, "preparing to commit changes to Lucene index"); // TODO add info about which database
2108             writer.prepareCommit();
2109             hasPendingCommit = true;
2110 
2111             int n = completer.complete();
2112             // TODO: add elapsed
2113             LOGGER.log(Level.FINE, "completed {0} object(s)", n);
2114 
2115             // Just before commit(), reset the `hasPendingCommit' flag,
2116             // since after commit() is called, there is no need for
2117             // rollback() regardless of success.
2118             hasPendingCommit = false;
2119             writer.commit();
2120         } catch (RuntimeException | IOException e) {
2121             if (hasPendingCommit) {
2122                 writer.rollback();
2123             }
2124             LOGGER.log(Level.WARNING,
2125                 "An error occurred while finishing writer and completer", e);
2126             throw e;
2127         }
2128     }
2129 
2130     /**
2131      * Verify TABSIZE, and evaluate AnalyzerGuru version together with ZVER --
2132      * or return a value to indicate mismatch.
2133      * @param file the source file object
2134      * @param path the source file path
2135      * @return {@code false} if a mismatch is detected
2136      */
2137     @VisibleForTesting
checkSettings(File file, String path)2138     boolean checkSettings(File file, String path) throws IOException {
2139 
2140         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
2141         boolean outIsXrefWriter = false; // potential xref writer
2142         int reqTabSize = project != null && project.hasTabSizeSetting() ?
2143             project.getTabSize() : 0;
2144         Integer actTabSize = settings.getTabSize();
2145         if (actTabSize != null && !actTabSize.equals(reqTabSize)) {
2146             LOGGER.log(Level.FINE, "Tabsize mismatch: {0}", path);
2147             return false;
2148         }
2149 
2150         int n = 0;
2151         postsIter = uidIter.postings(postsIter);
2152         while (postsIter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
2153             ++n;
2154             // Read a limited-fields version of the document.
2155             Document doc = reader.document(postsIter.docID(), CHECK_FIELDS);
2156             if (doc == null) {
2157                 LOGGER.log(Level.FINER, "No Document: {0}", path);
2158                 continue;
2159             }
2160 
2161             long reqGuruVersion = AnalyzerGuru.getVersionNo();
2162             Long actGuruVersion = settings.getAnalyzerGuruVersion();
2163             /*
2164              * For an older OpenGrok index that does not yet have a defined,
2165              * stored analyzerGuruVersion, break so that no extra work is done.
2166              * After a re-index, the guru version check will be active.
2167              */
2168             if (actGuruVersion == null) {
2169                 break;
2170             }
2171 
2172             AbstractAnalyzer fa = null;
2173             String fileTypeName;
2174             if (actGuruVersion.equals(reqGuruVersion)) {
2175                 fileTypeName = doc.get(QueryBuilder.TYPE);
2176                 if (fileTypeName == null) {
2177                     // (Should not get here, but break just in case.)
2178                     LOGGER.log(Level.FINEST, "Missing TYPE field: {0}", path);
2179                     break;
2180                 }
2181 
2182                 AnalyzerFactory fac = AnalyzerGuru.findByFileTypeName(fileTypeName);
2183                 if (fac != null) {
2184                     fa = fac.getAnalyzer();
2185                 }
2186             } else {
2187                 /*
2188                  * If the stored guru version does not match, re-verify the
2189                  * selection of analyzer or return a value to indicate the
2190                  * analyzer is now mis-matched.
2191                  */
2192                 LOGGER.log(Level.FINER, "Guru version mismatch: {0}", path);
2193 
2194                 fa = getAnalyzerFor(file, path);
2195                 fileTypeName = fa.getFileTypeName();
2196                 String oldTypeName = doc.get(QueryBuilder.TYPE);
2197                 if (!fileTypeName.equals(oldTypeName)) {
2198                     if (LOGGER.isLoggable(Level.FINE)) {
2199                         LOGGER.log(Level.FINE, "Changed {0} to {1}: {2}",
2200                             new Object[]{oldTypeName, fileTypeName, path});
2201                     }
2202                     return false;
2203                 }
2204             }
2205 
2206             // Verify Analyzer version, or return a value to indicate mismatch.
2207             long reqVersion = AnalyzerGuru.getAnalyzerVersionNo(fileTypeName);
2208             Long actVersion = settings.getAnalyzerVersion(fileTypeName);
2209             if (actVersion == null || !actVersion.equals(reqVersion)) {
2210                 if (LOGGER.isLoggable(Level.FINE)) {
2211                     LOGGER.log(Level.FINE, "{0} version mismatch: {1}",
2212                         new Object[]{fileTypeName, path});
2213                 }
2214                 return false;
2215             }
2216 
2217             if (fa != null) {
2218                 outIsXrefWriter = true;
2219             }
2220 
2221             // The versions checks have passed.
2222             break;
2223         }
2224         if (n < 1) {
2225             LOGGER.log(Level.FINER, "Missing index Documents: {0}", path);
2226             return false;
2227         }
2228 
2229         // If the economy mode is on, this should be treated as a match.
2230         if (!env.isGenerateHtml()) {
2231             if (xrefExistsFor(path)) {
2232                 LOGGER.log(Level.FINEST, "Extraneous {0} , removing its xref file", path);
2233                 removeXrefFile(path);
2234             }
2235             return true;
2236         }
2237 
2238         return (!outIsXrefWriter || xrefExistsFor(path));
2239     }
2240 
writeAnalysisSettings()2241     private void writeAnalysisSettings() throws IOException {
2242         settings = new IndexAnalysisSettings3();
2243         settings.setProjectName(project != null ? project.getName() : null);
2244         settings.setTabSize(project != null && project.hasTabSizeSetting() ?
2245             project.getTabSize() : 0);
2246         settings.setAnalyzerGuruVersion(AnalyzerGuru.getVersionNo());
2247         settings.setAnalyzersVersions(AnalyzerGuru.getAnalyzersVersionNos());
2248         settings.setIndexedSymlinks(indexedSymlinks);
2249 
2250         IndexAnalysisSettingsAccessor dao = new IndexAnalysisSettingsAccessor();
2251         dao.write(writer, settings);
2252     }
2253 
readAnalysisSettings()2254     private IndexAnalysisSettings3 readAnalysisSettings() throws IOException {
2255         IndexAnalysisSettingsAccessor dao = new IndexAnalysisSettingsAccessor();
2256         return dao.read(reader);
2257     }
2258 
xrefExistsFor(String path)2259     private boolean xrefExistsFor(String path) {
2260         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
2261         File xrefFile = whatXrefFile(path, env.isCompressXref());
2262         if (!xrefFile.exists()) {
2263             LOGGER.log(Level.FINEST, "Missing {0}", xrefFile);
2264             return false;
2265         }
2266 
2267         return true;
2268     }
2269 
2270     private static class AcceptSymlinkRet {
2271         String localRelPath;
2272     }
2273 }
2274