xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java (revision b0a8246bf4accfb3a57771e80030265cb0eb1310)
1b5840353SAdam Hornáček /*
2b5840353SAdam Hornáček  * CDDL HEADER START
3b5840353SAdam Hornáček  *
4b5840353SAdam Hornáček  * The contents of this file are subject to the terms of the
5b5840353SAdam Hornáček  * Common Development and Distribution License (the "License").
6b5840353SAdam Hornáček  * You may not use this file except in compliance with the License.
7b5840353SAdam Hornáček  *
8b5840353SAdam Hornáček  * See LICENSE.txt included in this distribution for the specific
9b5840353SAdam Hornáček  * language governing permissions and limitations under the License.
10b5840353SAdam Hornáček  *
11b5840353SAdam Hornáček  * When distributing Covered Code, include this CDDL HEADER in each
12b5840353SAdam Hornáček  * file and include the License file at LICENSE.txt.
13b5840353SAdam Hornáček  * If applicable, add the following below this CDDL HEADER, with the
14b5840353SAdam Hornáček  * fields enclosed by brackets "[]" replaced with your own identifying
15b5840353SAdam Hornáček  * information: Portions Copyright [yyyy] [name of copyright owner]
16b5840353SAdam Hornáček  *
17b5840353SAdam Hornáček  * CDDL HEADER END
18b5840353SAdam Hornáček  */
19b5840353SAdam Hornáček 
20b5840353SAdam Hornáček /*
216bfa9427SVladimir Kotal  * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved.
2251e20d51SAdam Hornáček  * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>.
23b5840353SAdam Hornáček  */
249805b761SAdam Hornáček package org.opengrok.indexer.index;
25b5840353SAdam Hornáček 
26b5840353SAdam Hornáček import java.io.BufferedInputStream;
27b5840353SAdam Hornáček import java.io.BufferedWriter;
28b5840353SAdam Hornáček import java.io.File;
29b5840353SAdam Hornáček import java.io.FileInputStream;
30b5840353SAdam Hornáček import java.io.FileNotFoundException;
31b5840353SAdam Hornáček import java.io.FileOutputStream;
32b5840353SAdam Hornáček import java.io.IOException;
33b5840353SAdam Hornáček import java.io.InputStream;
34b5840353SAdam Hornáček import java.io.OutputStreamWriter;
35b5840353SAdam Hornáček import java.io.Writer;
36b5840353SAdam Hornáček import java.nio.file.Files;
37b5840353SAdam Hornáček import java.nio.file.Path;
38b5840353SAdam Hornáček import java.nio.file.Paths;
39b5840353SAdam Hornáček import java.util.ArrayList;
40b5840353SAdam Hornáček import java.util.Arrays;
41b5840353SAdam Hornáček import java.util.Comparator;
42b5840353SAdam Hornáček import java.util.HashSet;
43b5840353SAdam Hornáček import java.util.List;
44b5840353SAdam Hornáček import java.util.Map;
45b13c5a0eSAdam Hornacek import java.util.Objects;
46b5840353SAdam Hornáček import java.util.Set;
475054bfafSChris Fraire import java.util.TreeMap;
48b5840353SAdam Hornáček import java.util.concurrent.CopyOnWriteArrayList;
49e829566cSChris Fraire import java.util.concurrent.CountDownLatch;
50b5840353SAdam Hornáček import java.util.concurrent.ExecutionException;
511c258122SVladimir Kotal import java.util.concurrent.TimeUnit;
52b5840353SAdam Hornáček import java.util.concurrent.atomic.AtomicInteger;
53b5840353SAdam Hornáček import java.util.logging.Level;
54b5840353SAdam Hornáček import java.util.logging.Logger;
55b5840353SAdam Hornáček import java.util.stream.Collectors;
56b5840353SAdam Hornáček import java.util.zip.GZIPOutputStream;
579a4e74f4SAdam Hornacek 
589a4e74f4SAdam Hornacek import jakarta.ws.rs.client.ClientBuilder;
599a4e74f4SAdam Hornacek import jakarta.ws.rs.client.Entity;
609a4e74f4SAdam Hornacek import jakarta.ws.rs.core.Response;
61b5840353SAdam Hornáček import org.apache.lucene.analysis.Analyzer;
62b5840353SAdam Hornáček import org.apache.lucene.analysis.standard.StandardAnalyzer;
63b5840353SAdam Hornáček import org.apache.lucene.document.DateTools;
64b5840353SAdam Hornáček import org.apache.lucene.document.Document;
65b5840353SAdam Hornáček import org.apache.lucene.document.Field;
66b5840353SAdam Hornáček import org.apache.lucene.index.DirectoryReader;
67b5840353SAdam Hornáček import org.apache.lucene.index.IndexReader;
68b5840353SAdam Hornáček import org.apache.lucene.index.IndexWriter;
69b5840353SAdam Hornáček import org.apache.lucene.index.IndexWriterConfig;
70b5840353SAdam Hornáček import org.apache.lucene.index.IndexWriterConfig.OpenMode;
71b5840353SAdam Hornáček import org.apache.lucene.index.IndexableField;
724cf88309SLubos Kosco import org.apache.lucene.index.MultiTerms;
73b5840353SAdam Hornáček import org.apache.lucene.index.PostingsEnum;
74b5840353SAdam Hornáček import org.apache.lucene.index.Term;
75b5840353SAdam Hornáček import org.apache.lucene.index.Terms;
76b5840353SAdam Hornáček import org.apache.lucene.index.TermsEnum;
77b5840353SAdam Hornáček import org.apache.lucene.queryparser.classic.ParseException;
78b5840353SAdam Hornáček import org.apache.lucene.search.DocIdSetIterator;
79b5840353SAdam Hornáček import org.apache.lucene.search.IndexSearcher;
80b5840353SAdam Hornáček import org.apache.lucene.search.Query;
81b5840353SAdam Hornáček import org.apache.lucene.search.TopDocs;
82b5840353SAdam Hornáček import org.apache.lucene.store.AlreadyClosedException;
83b5840353SAdam Hornáček import org.apache.lucene.store.FSDirectory;
84b5840353SAdam Hornáček import org.apache.lucene.store.LockFactory;
85b5840353SAdam Hornáček import org.apache.lucene.store.NativeFSLockFactory;
86b5840353SAdam Hornáček import org.apache.lucene.store.NoLockFactory;
87b5840353SAdam Hornáček import org.apache.lucene.store.SimpleFSLockFactory;
88b5840353SAdam Hornáček import org.apache.lucene.util.BytesRef;
8940c74b99SVladimir Kotal import org.jetbrains.annotations.NotNull;
901665873bSVladimir Kotal import org.jetbrains.annotations.Nullable;
912d8cba21SVladimir Kotal import org.jetbrains.annotations.VisibleForTesting;
9257eefa47SKryštof Tulinger import org.opengrok.indexer.analysis.AbstractAnalyzer;
9357eefa47SKryštof Tulinger import org.opengrok.indexer.analysis.AnalyzerFactory;
949805b761SAdam Hornáček import org.opengrok.indexer.analysis.AnalyzerGuru;
959805b761SAdam Hornáček import org.opengrok.indexer.analysis.Ctags;
969805b761SAdam Hornáček import org.opengrok.indexer.analysis.Definitions;
9741351de3SChris Fraire import org.opengrok.indexer.analysis.NullableNumLinesLOC;
9841351de3SChris Fraire import org.opengrok.indexer.analysis.NumLinesLOC;
9940669eceSChris Fraire import org.opengrok.indexer.configuration.PathAccepter;
1009805b761SAdam Hornáček import org.opengrok.indexer.configuration.Project;
1019805b761SAdam Hornáček import org.opengrok.indexer.configuration.RuntimeEnvironment;
102fadf9080SVladimir Kotal import org.opengrok.indexer.history.FileCollector;
1039805b761SAdam Hornáček import org.opengrok.indexer.history.HistoryGuru;
104fbe755ccSChris Fraire import org.opengrok.indexer.history.Repository;
105fc53bae7SVladimir Kotal import org.opengrok.indexer.history.RepositoryInfo;
106fc53bae7SVladimir Kotal import org.opengrok.indexer.history.RepositoryWithHistoryTraversal;
1079805b761SAdam Hornáček import org.opengrok.indexer.logger.LoggerFactory;
1089805b761SAdam Hornáček import org.opengrok.indexer.search.QueryBuilder;
1099805b761SAdam Hornáček import org.opengrok.indexer.util.ForbiddenSymlinkException;
1109805b761SAdam Hornáček import org.opengrok.indexer.util.IOUtils;
1119805b761SAdam Hornáček import org.opengrok.indexer.util.ObjectPool;
1121e75da15SVladimir Kotal import org.opengrok.indexer.util.Progress;
1139805b761SAdam Hornáček import org.opengrok.indexer.util.Statistics;
1144da26a1eSChris Fraire import org.opengrok.indexer.util.TandemPath;
1159805b761SAdam Hornáček import org.opengrok.indexer.web.Util;
116b5840353SAdam Hornáček 
11744387dc8SVladimir Kotal import static org.opengrok.indexer.index.IndexerUtil.getWebAppHeaders;
118cce4eb5fSVladimir Kotal import static org.opengrok.indexer.web.ApiUtils.waitForAsyncApi;
1193ed49cb6SVladimir Kotal 
120b5840353SAdam Hornáček /**
12129312e6dSVladimir Kotal  * This class is used to create / update the index databases. Currently, we use
122b5840353SAdam Hornáček  * one index database per project.
123b5840353SAdam Hornáček  *
124b5840353SAdam Hornáček  * @author Trond Norbye
125b5840353SAdam Hornáček  * @author Lubos Kosco , update for lucene 4.x , 5.x
126b5840353SAdam Hornáček  */
127b5840353SAdam Hornáček public class IndexDatabase {
128b5840353SAdam Hornáček 
129b5840353SAdam Hornáček     private static final Logger LOGGER = LoggerFactory.getLogger(IndexDatabase.class);
130b5840353SAdam Hornáček 
131b13c5a0eSAdam Hornacek     private static final Comparator<File> FILENAME_COMPARATOR = Comparator.comparing(File::getName);
132b5840353SAdam Hornáček 
133b5840353SAdam Hornáček     private static final Set<String> CHECK_FIELDS;
134b5840353SAdam Hornáček 
13541351de3SChris Fraire     private static final Set<String> REVERT_COUNTS_FIELDS;
13641351de3SChris Fraire 
13729312e6dSVladimir Kotal     private static final Object INSTANCE_LOCK = new Object();
138b5840353SAdam Hornáček 
1397d004396SChris Fraire     /**
1407d004396SChris Fraire      * Key is canonical path; Value is the first accepted, absolute path. Map
1417d004396SChris Fraire      * is ordered by canonical length (ASC) and then canonical value (ASC).
1427d004396SChris Fraire      * The map is accessed by a single-thread running indexDown().
1437d004396SChris Fraire      */
1447d004396SChris Fraire     private final Map<String, IndexedSymlink> indexedSymlinks = new TreeMap<>(
1457d004396SChris Fraire             Comparator.comparingInt(String::length).thenComparing(o -> o));
146b5840353SAdam Hornáček 
147b13c5a0eSAdam Hornacek     private final Project project;
148b5840353SAdam Hornáček     private FSDirectory indexDirectory;
149b5840353SAdam Hornáček     private IndexReader reader;
150b5840353SAdam Hornáček     private IndexWriter writer;
1517d004396SChris Fraire     private IndexAnalysisSettings3 settings;
152b5840353SAdam Hornáček     private PendingFileCompleter completer;
15341351de3SChris Fraire     private NumLinesLOCAggregator countsAggregator;
154b5840353SAdam Hornáček     private TermsEnum uidIter;
155b5840353SAdam Hornáček     private PostingsEnum postsIter;
15640669eceSChris Fraire     private PathAccepter pathAccepter;
157b5840353SAdam Hornáček     private AnalyzerGuru analyzerGuru;
158b5840353SAdam Hornáček     private File xrefDir;
159b5840353SAdam Hornáček     private boolean interrupted;
160b5840353SAdam Hornáček     private CopyOnWriteArrayList<IndexChangedListener> listeners;
161b5840353SAdam Hornáček     private File dirtyFile;
162b5840353SAdam Hornáček     private final Object lock = new Object();
163b5840353SAdam Hornáček     private boolean dirty;
164b5840353SAdam Hornáček     private boolean running;
16541351de3SChris Fraire     private boolean isCountingDeltas;
16641351de3SChris Fraire     private boolean isWithDirectoryCounts;
167b5840353SAdam Hornáček     private List<String> directories;
168b5840353SAdam Hornáček     private LockFactory lockfact;
169b5840353SAdam Hornáček     private final BytesRef emptyBR = new BytesRef("");
170b5840353SAdam Hornáček 
171b5840353SAdam Hornáček     // Directory where we store indexes
172b5840353SAdam Hornáček     public static final String INDEX_DIR = "index";
173b5840353SAdam Hornáček     public static final String XREF_DIR = "xref";
174911e8af0SAdam Hornáček     public static final String SUGGESTER_DIR = "suggester";
175b5840353SAdam Hornáček 
1762d8cba21SVladimir Kotal     private final IndexDownArgsFactory indexDownArgsFactory;
1772d8cba21SVladimir Kotal 
178b5840353SAdam Hornáček     /**
179b5840353SAdam Hornáček      * Create a new instance of the Index Database. Use this constructor if you
180b5840353SAdam Hornáček      * don't use any projects
181b5840353SAdam Hornáček      *
182b5840353SAdam Hornáček      * @throws java.io.IOException if an error occurs while creating directories
183b5840353SAdam Hornáček      */
IndexDatabase()184b5840353SAdam Hornáček     public IndexDatabase() throws IOException {
185b5840353SAdam Hornáček         this(null);
186b5840353SAdam Hornáček     }
187b5840353SAdam Hornáček 
188b5840353SAdam Hornáček     /**
189ff44f24aSAdam Hornáček      * Create a new instance of an Index Database for a given project.
190b5840353SAdam Hornáček      *
191b5840353SAdam Hornáček      * @param project the project to create the database for
1922d8cba21SVladimir Kotal      * @param factory {@link IndexDownArgsFactory} instance
1931665873bSVladimir Kotal      * @throws java.io.IOException if an error occurs while creating directories
194b5840353SAdam Hornáček      */
IndexDatabase(Project project, IndexDownArgsFactory factory)1952d8cba21SVladimir Kotal     public IndexDatabase(Project project, IndexDownArgsFactory factory) throws IOException {
1962d8cba21SVladimir Kotal         indexDownArgsFactory = factory;
197b5840353SAdam Hornáček         this.project = project;
198b5840353SAdam Hornáček         lockfact = NoLockFactory.INSTANCE;
199b5840353SAdam Hornáček         initialize();
200b5840353SAdam Hornáček     }
201b5840353SAdam Hornáček 
2022d8cba21SVladimir Kotal     @VisibleForTesting
IndexDatabase(Project project)2032d8cba21SVladimir Kotal     IndexDatabase(Project project) throws IOException {
2042d8cba21SVladimir Kotal         this(project, new IndexDownArgsFactory());
2052d8cba21SVladimir Kotal     }
2062d8cba21SVladimir Kotal 
207b5840353SAdam Hornáček     static {
208b5840353SAdam Hornáček         CHECK_FIELDS = new HashSet<>();
209b5840353SAdam Hornáček         CHECK_FIELDS.add(QueryBuilder.TYPE);
21041351de3SChris Fraire 
21141351de3SChris Fraire         REVERT_COUNTS_FIELDS = new HashSet<>();
21241351de3SChris Fraire         REVERT_COUNTS_FIELDS.add(QueryBuilder.D);
21341351de3SChris Fraire         REVERT_COUNTS_FIELDS.add(QueryBuilder.PATH);
21441351de3SChris Fraire         REVERT_COUNTS_FIELDS.add(QueryBuilder.NUML);
21541351de3SChris Fraire         REVERT_COUNTS_FIELDS.add(QueryBuilder.LOC);
216b5840353SAdam Hornáček     }
217b5840353SAdam Hornáček 
218b5840353SAdam Hornáček     /**
2191665873bSVladimir Kotal      * Update the index database for all the projects.
220b5840353SAdam Hornáček      *
221b5840353SAdam Hornáček      * @param listener where to signal the changes to the database
222b5840353SAdam Hornáček      * @throws IOException if an error occurs
223b5840353SAdam Hornáček      */
updateAll(IndexChangedListener listener)2241665873bSVladimir Kotal     static CountDownLatch updateAll(IndexChangedListener listener) throws IOException {
2251665873bSVladimir Kotal 
226b5840353SAdam Hornáček         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
227b5840353SAdam Hornáček         List<IndexDatabase> dbs = new ArrayList<>();
228b5840353SAdam Hornáček 
229b5840353SAdam Hornáček         if (env.hasProjects()) {
230b5840353SAdam Hornáček             for (Project project : env.getProjectList()) {
231b5840353SAdam Hornáček                 dbs.add(new IndexDatabase(project));
232b5840353SAdam Hornáček             }
233b5840353SAdam Hornáček         } else {
234b5840353SAdam Hornáček             dbs.add(new IndexDatabase());
235b5840353SAdam Hornáček         }
236b5840353SAdam Hornáček 
237a96f0325SVladimir Kotal         IndexerParallelizer parallelizer = RuntimeEnvironment.getInstance().getIndexerParallelizer();
238e829566cSChris Fraire         CountDownLatch latch = new CountDownLatch(dbs.size());
239b5840353SAdam Hornáček         for (IndexDatabase d : dbs) {
240b5840353SAdam Hornáček             final IndexDatabase db = d;
241b5840353SAdam Hornáček             if (listener != null) {
242b5840353SAdam Hornáček                 db.addIndexChangedListener(listener);
243b5840353SAdam Hornáček             }
244b5840353SAdam Hornáček 
245b13c5a0eSAdam Hornacek             parallelizer.getFixedExecutor().submit(() -> {
246b5840353SAdam Hornáček                 try {
247e829566cSChris Fraire                     db.update();
248b5840353SAdam Hornáček                 } catch (Throwable e) {
249ccfc3b5bSVladimir Kotal                     LOGGER.log(Level.SEVERE,
250ccfc3b5bSVladimir Kotal                             String.format("Problem updating index database in directory %s: ",
251ccfc3b5bSVladimir Kotal                                     db.indexDirectory.getDirectory()), e);
252e829566cSChris Fraire                 } finally {
253e829566cSChris Fraire                     latch.countDown();
254b5840353SAdam Hornáček                 }
255b5840353SAdam Hornáček             });
256b5840353SAdam Hornáček         }
257e829566cSChris Fraire         return latch;
258b5840353SAdam Hornáček     }
259b5840353SAdam Hornáček 
260b5840353SAdam Hornáček     /**
261ff44f24aSAdam Hornáček      * Update the index database for a number of sub-directories.
262b5840353SAdam Hornáček      *
263b5840353SAdam Hornáček      * @param listener where to signal the changes to the database
264b5840353SAdam Hornáček      * @param paths list of paths to be indexed
265b5840353SAdam Hornáček      */
update(IndexChangedListener listener, List<String> paths)26630bba29fSChris Fraire     public static void update(IndexChangedListener listener, List<String> paths) {
267b5840353SAdam Hornáček         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
268e829566cSChris Fraire         IndexerParallelizer parallelizer = env.getIndexerParallelizer();
269b5840353SAdam Hornáček         List<IndexDatabase> dbs = new ArrayList<>();
270b5840353SAdam Hornáček 
271b5840353SAdam Hornáček         for (String path : paths) {
272b5840353SAdam Hornáček             Project project = Project.getProject(path);
273b5840353SAdam Hornáček             if (project == null && env.hasProjects()) {
274b5840353SAdam Hornáček                 LOGGER.log(Level.WARNING, "Could not find a project for \"{0}\"", path);
275b5840353SAdam Hornáček             } else {
276b5840353SAdam Hornáček                 IndexDatabase db;
277b5840353SAdam Hornáček 
278b5840353SAdam Hornáček                 try {
279b5840353SAdam Hornáček                     if (project == null) {
280b5840353SAdam Hornáček                         db = new IndexDatabase();
281b5840353SAdam Hornáček                     } else {
282b5840353SAdam Hornáček                         db = new IndexDatabase(project);
283b5840353SAdam Hornáček                     }
284b5840353SAdam Hornáček 
285b5840353SAdam Hornáček                     int idx = dbs.indexOf(db);
286b5840353SAdam Hornáček                     if (idx != -1) {
287b5840353SAdam Hornáček                         db = dbs.get(idx);
288b5840353SAdam Hornáček                     }
289b5840353SAdam Hornáček 
290b5840353SAdam Hornáček                     if (db.addDirectory(path)) {
291b5840353SAdam Hornáček                         if (idx == -1) {
292b5840353SAdam Hornáček                             dbs.add(db);
293b5840353SAdam Hornáček                         }
294b5840353SAdam Hornáček                     } else {
295b5840353SAdam Hornáček                         LOGGER.log(Level.WARNING, "Directory does not exist \"{0}\" .", path);
296b5840353SAdam Hornáček                     }
297b5840353SAdam Hornáček                 } catch (IOException e) {
298b5840353SAdam Hornáček                     LOGGER.log(Level.WARNING, "An error occurred while updating index", e);
299b5840353SAdam Hornáček 
300b5840353SAdam Hornáček                 }
301b5840353SAdam Hornáček             }
302b5840353SAdam Hornáček 
303b5840353SAdam Hornáček             for (final IndexDatabase db : dbs) {
304b5840353SAdam Hornáček                 db.addIndexChangedListener(listener);
305b13c5a0eSAdam Hornacek                 parallelizer.getFixedExecutor().submit(() -> {
306b5840353SAdam Hornáček                     try {
307e829566cSChris Fraire                         db.update();
308b5840353SAdam Hornáček                     } catch (Throwable e) {
309b5840353SAdam Hornáček                         LOGGER.log(Level.SEVERE, "An error occurred while updating index", e);
310b5840353SAdam Hornáček                     }
311b5840353SAdam Hornáček                 });
312b5840353SAdam Hornáček             }
313b5840353SAdam Hornáček         }
314b5840353SAdam Hornáček     }
315b5840353SAdam Hornáček 
316b5840353SAdam Hornáček     @SuppressWarnings("PMD.CollapsibleIfStatements")
initialize()317b5840353SAdam Hornáček     private void initialize() throws IOException {
318b5840353SAdam Hornáček         synchronized (INSTANCE_LOCK) {
319b5840353SAdam Hornáček             RuntimeEnvironment env = RuntimeEnvironment.getInstance();
320b5840353SAdam Hornáček             File indexDir = new File(env.getDataRootFile(), INDEX_DIR);
321b5840353SAdam Hornáček             if (project != null) {
322b5840353SAdam Hornáček                 indexDir = new File(indexDir, project.getPath());
323b5840353SAdam Hornáček             }
324b5840353SAdam Hornáček 
325b5840353SAdam Hornáček             if (!indexDir.exists() && !indexDir.mkdirs()) {
326b5840353SAdam Hornáček                 // to avoid race conditions, just recheck..
327b5840353SAdam Hornáček                 if (!indexDir.exists()) {
328b5840353SAdam Hornáček                     throw new FileNotFoundException("Failed to create root directory [" + indexDir.getAbsolutePath() + "]");
329b5840353SAdam Hornáček                 }
330b5840353SAdam Hornáček             }
331b5840353SAdam Hornáček 
332b5840353SAdam Hornáček             lockfact = pickLockFactory(env);
333b5840353SAdam Hornáček             indexDirectory = FSDirectory.open(indexDir.toPath(), lockfact);
33440669eceSChris Fraire             pathAccepter = env.getPathAccepter();
335b5840353SAdam Hornáček             analyzerGuru = new AnalyzerGuru();
336b5840353SAdam Hornáček             xrefDir = new File(env.getDataRootFile(), XREF_DIR);
337b5840353SAdam Hornáček             listeners = new CopyOnWriteArrayList<>();
338b5840353SAdam Hornáček             dirtyFile = new File(indexDir, "dirty");
339b5840353SAdam Hornáček             dirty = dirtyFile.exists();
340b5840353SAdam Hornáček             directories = new ArrayList<>();
341b5840353SAdam Hornáček         }
342b5840353SAdam Hornáček     }
343b5840353SAdam Hornáček 
344b5840353SAdam Hornáček     /**
345b5840353SAdam Hornáček      * By default the indexer will traverse all directories in the project. If
346b5840353SAdam Hornáček      * you add directories with this function update will just process the
347b5840353SAdam Hornáček      * specified directories.
348b5840353SAdam Hornáček      *
349b5840353SAdam Hornáček      * @param dir The directory to scan
350b5840353SAdam Hornáček      * @return <code>true</code> if the file is added, false otherwise
351b5840353SAdam Hornáček      */
352b5840353SAdam Hornáček     @SuppressWarnings("PMD.UseStringBufferForStringAppends")
addDirectory(String dir)353b5840353SAdam Hornáček     public boolean addDirectory(String dir) {
354b5840353SAdam Hornáček         String directory = dir;
355b5840353SAdam Hornáček         if (directory.startsWith("\\")) {
356b5840353SAdam Hornáček             directory = directory.replace('\\', '/');
357b5840353SAdam Hornáček         } else if (directory.charAt(0) != '/') {
358b5840353SAdam Hornáček             directory = "/" + directory;
359b5840353SAdam Hornáček         }
360b5840353SAdam Hornáček         File file = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), directory);
361b5840353SAdam Hornáček         if (file.exists()) {
362b5840353SAdam Hornáček             directories.add(directory);
363b5840353SAdam Hornáček             return true;
364b5840353SAdam Hornáček         }
365b5840353SAdam Hornáček         return false;
366b5840353SAdam Hornáček     }
367b5840353SAdam Hornáček 
showFileCount(String dir, IndexDownArgs args)3689e27fc85SVladimir Kotal     private void showFileCount(String dir, IndexDownArgs args) {
369b5840353SAdam Hornáček         if (RuntimeEnvironment.getInstance().isPrintProgress()) {
370b8ad1421SVladimir Kotal             LOGGER.log(Level.INFO, String.format("Need to process: %d files for %s", args.curCount, dir));
371b5840353SAdam Hornáček         }
372b5840353SAdam Hornáček     }
373b5840353SAdam Hornáček 
markProjectIndexed(Project project)3744ce4e2b9SAdam Hornáček     private void markProjectIndexed(Project project) {
375b5840353SAdam Hornáček         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
376b5840353SAdam Hornáček 
377b5840353SAdam Hornáček         // Successfully indexed the project. The message is sent even if
378b5840353SAdam Hornáček         // the project's isIndexed() is true because it triggers RepositoryInfo
379b5840353SAdam Hornáček         // refresh.
380d1d1e50bSVladimir Kotal         if (project == null) {
381d1d1e50bSVladimir Kotal             return;
382d1d1e50bSVladimir Kotal         }
383d1d1e50bSVladimir Kotal 
384a155cd9bSVladimir Kotal         // Also need to store the correct value in configuration
385a155cd9bSVladimir Kotal         // when indexer writes it to a file.
386a155cd9bSVladimir Kotal         project.setIndexed(true);
387a155cd9bSVladimir Kotal 
388d1d1e50bSVladimir Kotal         if (env.getConfigURI() == null) {
389d1d1e50bSVladimir Kotal             return;
390d1d1e50bSVladimir Kotal         }
391d1d1e50bSVladimir Kotal 
392cce4eb5fSVladimir Kotal         Response response;
393d1d1e50bSVladimir Kotal         try {
394cce4eb5fSVladimir Kotal             response = ClientBuilder.newBuilder().connectTimeout(env.getConnectTimeout(), TimeUnit.SECONDS).build()
39555402125SVladimir Kotal                     .target(env.getConfigURI())
3964ce4e2b9SAdam Hornáček                     .path("api")
3974ce4e2b9SAdam Hornáček                     .path("v1")
3984ce4e2b9SAdam Hornáček                     .path("projects")
399d6df19e1SAdam Hornacek                     .path(Util.uriEncode(project.getName()))
4004ce4e2b9SAdam Hornáček                     .path("indexed")
4014ce4e2b9SAdam Hornáček                     .request()
40244387dc8SVladimir Kotal                     .headers(getWebAppHeaders())
4034ce4e2b9SAdam Hornáček                     .put(Entity.text(""));
404d1d1e50bSVladimir Kotal         } catch (RuntimeException e) {
405cce4eb5fSVladimir Kotal             LOGGER.log(Level.WARNING, String.format("Could not notify the webapp that project %s was indexed",
406aaec17ecSVladimir Kotal                     project), e);
407d1d1e50bSVladimir Kotal             return;
408d1d1e50bSVladimir Kotal         }
4094ce4e2b9SAdam Hornáček 
410cce4eb5fSVladimir Kotal         if (response.getStatus() == Response.Status.ACCEPTED.getStatusCode()) {
411cce4eb5fSVladimir Kotal             try {
412cce4eb5fSVladimir Kotal                 response = waitForAsyncApi(response);
413cce4eb5fSVladimir Kotal             } catch (InterruptedException e) {
414cce4eb5fSVladimir Kotal                 LOGGER.log(Level.WARNING, "interrupted while waiting for API response", e);
415cce4eb5fSVladimir Kotal             }
416cce4eb5fSVladimir Kotal         }
417cce4eb5fSVladimir Kotal 
418cce4eb5fSVladimir Kotal         if (response.getStatusInfo().getFamily() != Response.Status.Family.SUCCESSFUL) {
419cce4eb5fSVladimir Kotal             LOGGER.log(Level.WARNING, "Could not notify the webapp that project {0} was indexed: {1}",
420cce4eb5fSVladimir Kotal                     new Object[] {project, response});
421b5840353SAdam Hornáček         }
422b5840353SAdam Hornáček     }
423b5840353SAdam Hornáček 
getRepositoriesForProject(Project project)424fc53bae7SVladimir Kotal     private static List<Repository> getRepositoriesForProject(Project project) {
425fc53bae7SVladimir Kotal         List<Repository> repositoryList = new ArrayList<>();
426fc53bae7SVladimir Kotal 
427fc53bae7SVladimir Kotal         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
428fc53bae7SVladimir Kotal         List<RepositoryInfo> repositoryInfoList = env.getProjectRepositoriesMap().get(project);
429fc53bae7SVladimir Kotal 
4307e740a87SVladimir Kotal         if (repositoryInfoList != null) {
431fc53bae7SVladimir Kotal             for (RepositoryInfo repositoryInfo : repositoryInfoList) {
432fc53bae7SVladimir Kotal                 Repository repository = HistoryGuru.getInstance().getRepository(new File(repositoryInfo.getDirectoryName()));
433fc53bae7SVladimir Kotal                 if (repository != null) {
434fc53bae7SVladimir Kotal                     repositoryList.add(repository);
435fc53bae7SVladimir Kotal                 }
436fc53bae7SVladimir Kotal             }
4377e740a87SVladimir Kotal         }
438fc53bae7SVladimir Kotal 
439fc53bae7SVladimir Kotal         return repositoryList;
440fc53bae7SVladimir Kotal     }
441fc53bae7SVladimir Kotal 
4421665873bSVladimir Kotal     /**
443f4571972SVladimir Kotal      * @return whether the repositories of given project are ready for history based reindex
4441665873bSVladimir Kotal      */
isReadyForHistoryBasedReindex()445f4571972SVladimir Kotal     private boolean isReadyForHistoryBasedReindex() {
446a6a884eaSVladimir Kotal         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
447a6a884eaSVladimir Kotal 
448a6a884eaSVladimir Kotal         // So far the history based reindex does not work without projects.
449a6a884eaSVladimir Kotal         if (!env.hasProjects()) {
450a6a884eaSVladimir Kotal             LOGGER.log(Level.FINEST, "projects are disabled, will be indexed by directory traversal.");
451a6a884eaSVladimir Kotal             return false;
452a6a884eaSVladimir Kotal         }
453a6a884eaSVladimir Kotal 
454fc53bae7SVladimir Kotal         if (project == null) {
455bd29b310SVladimir Kotal             LOGGER.log(Level.FINEST, "no project, will be indexed by directory traversal.");
456b2fc531dSVladimir Kotal             return false;
457fc53bae7SVladimir Kotal         }
45832b4cd63SVladimir Kotal 
45932b4cd63SVladimir Kotal         // History needs to be enabled for the history cache to work (see the comment below).
460fc53bae7SVladimir Kotal         if (!project.isHistoryEnabled()) {
461bd29b310SVladimir Kotal             LOGGER.log(Level.FINEST, "history is disabled, will be indexed by directory traversal.");
462fc53bae7SVladimir Kotal             return false;
463fc53bae7SVladimir Kotal         }
46432b4cd63SVladimir Kotal 
46532b4cd63SVladimir Kotal         // History cache is necessary to get the last indexed revision for given repository.
46632b4cd63SVladimir Kotal         if (!env.isHistoryCache()) {
467bd29b310SVladimir Kotal             LOGGER.log(Level.FINEST, "history cache is disabled, will be indexed by directory traversal.");
46832b4cd63SVladimir Kotal             return false;
46932b4cd63SVladimir Kotal         }
47032b4cd63SVladimir Kotal 
471a6a884eaSVladimir Kotal         // Per project tunable can override the global tunable, therefore env.isHistoryBasedReindex() is not checked.
472a6a884eaSVladimir Kotal         if (!project.isHistoryBasedReindex()) {
473bd29b310SVladimir Kotal             LOGGER.log(Level.FINEST, "history-based reindex is disabled, will be indexed by directory traversal.");
474fadf9080SVladimir Kotal             return false;
475fadf9080SVladimir Kotal         }
476fadf9080SVladimir Kotal 
477cd7eae86SVladimir Kotal         /*
478cd7eae86SVladimir Kotal          * Check that the index is present for this project.
479ac7d53afSVladimir Kotal          * In case of the initial indexing, the traversal of all changesets would most likely be counterproductive,
480cd7eae86SVladimir Kotal          * assuming traversal of directory tree is cheaper than getting the files from SCM history
481cd7eae86SVladimir Kotal          * in such case.
482cd7eae86SVladimir Kotal          */
483cd7eae86SVladimir Kotal         try {
484cd7eae86SVladimir Kotal             if (getNumFiles() == 0) {
485cd7eae86SVladimir Kotal                 LOGGER.log(Level.FINEST, "zero number of documents for project {0}, " +
486cd7eae86SVladimir Kotal                         "will be indexed by directory traversal.", project);
487cd7eae86SVladimir Kotal                 return false;
488cd7eae86SVladimir Kotal             }
489cd7eae86SVladimir Kotal         } catch (IOException e) {
490cd7eae86SVladimir Kotal             LOGGER.log(Level.FINEST, "failed to get number of documents for project {0}," +
491cd7eae86SVladimir Kotal                     "will be indexed by directory traversal.", project);
492cd7eae86SVladimir Kotal             return false;
493cd7eae86SVladimir Kotal         }
494cd7eae86SVladimir Kotal 
495a6a884eaSVladimir Kotal         // If there was no change to any of the repositories of the project, a FileCollector instance will be returned
496a6a884eaSVladimir Kotal         // however the list of files therein will be empty which is legitimate situation (no change of the project).
497a6a884eaSVladimir Kotal         // Only in a case where getFileCollector() returns null (hinting at something went wrong),
498a6a884eaSVladimir Kotal         // the file based traversal should be done.
499fadf9080SVladimir Kotal         if (env.getFileCollector(project.getName()) == null) {
500a6a884eaSVladimir Kotal             LOGGER.log(Level.FINEST, "no file collector for project {0}, will be indexed by directory traversal.",
501bd29b310SVladimir Kotal                     project);
502fadf9080SVladimir Kotal             return false;
503fadf9080SVladimir Kotal         }
5047a438acaSVladimir Kotal 
505fc53bae7SVladimir Kotal         List<Repository> repositories = getRepositoriesForProject(project);
506fc53bae7SVladimir Kotal         // Projects without repositories have to be indexed using indexDown().
507fc53bae7SVladimir Kotal         if (repositories.isEmpty()) {
5081665873bSVladimir Kotal             LOGGER.log(Level.FINEST, "project {0} has no repositories, will be indexed by directory traversal.",
5091665873bSVladimir Kotal                     project);
510fc53bae7SVladimir Kotal             return false;
511fc53bae7SVladimir Kotal         }
5121665873bSVladimir Kotal 
513fc53bae7SVladimir Kotal         for (Repository repository : repositories) {
514f4571972SVladimir Kotal             if (!isReadyForHistoryBasedReindex(repository)) {
5152cf46988SVladimir Kotal                 return false;
5162cf46988SVladimir Kotal             }
5172cf46988SVladimir Kotal         }
5182cf46988SVladimir Kotal 
5192cf46988SVladimir Kotal         // Here it is assumed there are no files untracked by the repositories of this project.
5202cf46988SVladimir Kotal         return true;
5212cf46988SVladimir Kotal     }
5222cf46988SVladimir Kotal 
5232cf46988SVladimir Kotal     /**
5242cf46988SVladimir Kotal      * @param repository Repository instance
5252cf46988SVladimir Kotal      * @return true if the repository can be used for history based reindex
5262cf46988SVladimir Kotal      */
5272d8cba21SVladimir Kotal     @VisibleForTesting
isReadyForHistoryBasedReindex(Repository repository)528f4571972SVladimir Kotal     boolean isReadyForHistoryBasedReindex(Repository repository) {
529ca2549f2SVladimir Kotal         if (!repository.isHistoryEnabled()) {
530ca2549f2SVladimir Kotal             LOGGER.log(Level.FINE, "history is disabled for {0}, " +
531ca2549f2SVladimir Kotal                     "the associated project {1} will be indexed using directory traversal",
532ca2549f2SVladimir Kotal                     new Object[]{repository, project});
533ca2549f2SVladimir Kotal             return false;
534ca2549f2SVladimir Kotal         }
535ca2549f2SVladimir Kotal 
5365650cf15SVladimir Kotal         if (!repository.isHistoryBasedReindex()) {
5375650cf15SVladimir Kotal             LOGGER.log(Level.FINE, "history based reindex is disabled for {0}, " +
5385650cf15SVladimir Kotal                             "the associated project {1} will be indexed using directory traversal",
5395650cf15SVladimir Kotal                     new Object[]{repository, project});
5405650cf15SVladimir Kotal             return false;
5415650cf15SVladimir Kotal         }
5425650cf15SVladimir Kotal 
543fc53bae7SVladimir Kotal         if (!(repository instanceof RepositoryWithHistoryTraversal)) {
5441665873bSVladimir Kotal             LOGGER.log(Level.FINE, "project {0} has a repository {1} that does not support history traversal," +
5451665873bSVladimir Kotal                             "the project will be indexed using directory traversal.",
5461665873bSVladimir Kotal                     new Object[]{project, repository});
5471665873bSVladimir Kotal             return false;
5481665873bSVladimir Kotal         }
5491665873bSVladimir Kotal 
550fc53bae7SVladimir Kotal         return true;
551fc53bae7SVladimir Kotal     }
552fc53bae7SVladimir Kotal 
5531665873bSVladimir Kotal     /**
554ff44f24aSAdam Hornáček      * Update the content of this index database.
555b5840353SAdam Hornáček      *
556b5840353SAdam Hornáček      * @throws IOException if an error occurs
557b5840353SAdam Hornáček      */
update()558e829566cSChris Fraire     public void update() throws IOException {
559b5840353SAdam Hornáček         synchronized (lock) {
560b5840353SAdam Hornáček             if (running) {
561b5840353SAdam Hornáček                 throw new IOException("Indexer already running!");
562b5840353SAdam Hornáček             }
563b5840353SAdam Hornáček             running = true;
564b5840353SAdam Hornáček             interrupted = false;
565b5840353SAdam Hornáček         }
566b5840353SAdam Hornáček 
567b5840353SAdam Hornáček         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
568b5840353SAdam Hornáček 
569b5840353SAdam Hornáček         reader = null;
570b5840353SAdam Hornáček         writer = null;
571b5840353SAdam Hornáček         settings = null;
572b5840353SAdam Hornáček         uidIter = null;
573b5840353SAdam Hornáček         postsIter = null;
5747d004396SChris Fraire         indexedSymlinks.clear();
575b5840353SAdam Hornáček 
576b5840353SAdam Hornáček         IOException finishingException = null;
577b5840353SAdam Hornáček         try {
578b5840353SAdam Hornáček             Analyzer analyzer = AnalyzerGuru.getAnalyzer();
579b5840353SAdam Hornáček             IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
580b5840353SAdam Hornáček             iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
581b5840353SAdam Hornáček             iwc.setRAMBufferSizeMB(env.getRamBufferSize());
582b5840353SAdam Hornáček             writer = new IndexWriter(indexDirectory, iwc);
583b5840353SAdam Hornáček             writer.commit(); // to make sure index exists on the disk
584b5840353SAdam Hornáček             completer = new PendingFileCompleter();
585b5840353SAdam Hornáček 
586b5840353SAdam Hornáček             if (directories.isEmpty()) {
587b5840353SAdam Hornáček                 if (project == null) {
588b5840353SAdam Hornáček                     directories.add("");
589b5840353SAdam Hornáček                 } else {
590b5840353SAdam Hornáček                     directories.add(project.getPath());
591b5840353SAdam Hornáček                 }
592b5840353SAdam Hornáček             }
593b5840353SAdam Hornáček 
594b5840353SAdam Hornáček             for (String dir : directories) {
595b5840353SAdam Hornáček                 File sourceRoot;
596b5840353SAdam Hornáček                 if ("".equals(dir)) {
597b5840353SAdam Hornáček                     sourceRoot = env.getSourceRootFile();
598b5840353SAdam Hornáček                 } else {
599b5840353SAdam Hornáček                     sourceRoot = new File(env.getSourceRootFile(), dir);
600b5840353SAdam Hornáček                 }
601b5840353SAdam Hornáček 
602807ead8fSLubos Kosco                 dir = Util.fixPathIfWindows(dir);
603807ead8fSLubos Kosco 
604af698321SVladimir Kotal                 String startUid = Util.path2uid(dir, "");
605b5840353SAdam Hornáček                 reader = DirectoryReader.open(indexDirectory); // open existing index
60641351de3SChris Fraire                 countsAggregator = new NumLinesLOCAggregator();
607b5840353SAdam Hornáček                 settings = readAnalysisSettings();
608b5840353SAdam Hornáček                 if (settings == null) {
6097d004396SChris Fraire                     settings = new IndexAnalysisSettings3();
610b5840353SAdam Hornáček                 }
611b5840353SAdam Hornáček                 Terms terms = null;
61241351de3SChris Fraire                 if (reader.numDocs() > 0) {
6134cf88309SLubos Kosco                     terms = MultiTerms.getTerms(reader, QueryBuilder.U);
61441351de3SChris Fraire 
61541351de3SChris Fraire                     NumLinesLOCAccessor countsAccessor = new NumLinesLOCAccessor();
61641351de3SChris Fraire                     if (countsAccessor.hasStored(reader)) {
61741351de3SChris Fraire                         isWithDirectoryCounts = true;
61841351de3SChris Fraire                         isCountingDeltas = true;
61941351de3SChris Fraire                     } else {
62041351de3SChris Fraire                         boolean foundCounts = countsAccessor.register(countsAggregator, reader);
62141351de3SChris Fraire                         isWithDirectoryCounts = false;
62241351de3SChris Fraire                         isCountingDeltas = foundCounts;
62341351de3SChris Fraire                         if (!isCountingDeltas) {
62441351de3SChris Fraire                             LOGGER.info("Forcing reindexing to fully compute directory counts");
62541351de3SChris Fraire                         }
62641351de3SChris Fraire                     }
6279474ffdfSChris Fraire                 } else {
6289474ffdfSChris Fraire                     isWithDirectoryCounts = false;
6299474ffdfSChris Fraire                     isCountingDeltas = false;
630b5840353SAdam Hornáček                 }
631b5840353SAdam Hornáček 
632b5840353SAdam Hornáček                 try {
633b5840353SAdam Hornáček                     if (terms != null) {
634b5840353SAdam Hornáček                         uidIter = terms.iterator();
635af698321SVladimir Kotal                         TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startUid)); //init uid
636b5840353SAdam Hornáček                         if (stat == TermsEnum.SeekStatus.END) {
637b5840353SAdam Hornáček                             uidIter = null;
638b5840353SAdam Hornáček                             LOGGER.log(Level.WARNING,
639911e8af0SAdam Hornáček                                 "Couldn''t find a start term for {0}, empty u field?",
640af698321SVladimir Kotal                                 startUid);
641b5840353SAdam Hornáček                         }
642b5840353SAdam Hornáček                     }
643b5840353SAdam Hornáček 
6441665873bSVladimir Kotal                     // The actual indexing happens in indexParallel(). Here we merely collect the files
645af698321SVladimir Kotal                     // that need to be indexed and the files that should be removed.
6462d8cba21SVladimir Kotal                     IndexDownArgs args = indexDownArgsFactory.getIndexDownArgs();
647f4192d95SVladimir Kotal                     boolean usedHistory = getIndexDownArgs(dir, sourceRoot, args);
648fc53bae7SVladimir Kotal 
649aa329234SVladimir Kotal                     // Traverse the trailing terms. This needs to be done before indexParallel() because
650aa329234SVladimir Kotal                     // in some cases it can add items to the args parameter.
651aa329234SVladimir Kotal                     processTrailingTerms(startUid, usedHistory, args);
652aa329234SVladimir Kotal 
6531665873bSVladimir Kotal                     args.curCount = 0;
654fc53bae7SVladimir Kotal                     Statistics elapsed = new Statistics();
655e04153a6SVladimir Kotal                     LOGGER.log(Level.INFO, "Starting indexing of directory {0}", dir);
65630bba29fSChris Fraire                     indexParallel(dir, args);
657277e83cdSVladimir Kotal                     elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir),
658277e83cdSVladimir Kotal                             "indexer.db.directory.index");
659b5840353SAdam Hornáček 
660ea1e4d20SChris Fraire                     /*
661125d0ce7SChris Fraire                      * As a signifier that #Lines/LOC are comprehensively
662125d0ce7SChris Fraire                      * stored so that later calculation is in deltas mode, we
663125d0ce7SChris Fraire                      * need at least one D-document saved. For a repo with only
664125d0ce7SChris Fraire                      * non-code files, however, no true #Lines/LOC will have
665125d0ce7SChris Fraire                      * been saved. Subsequent re-indexing will do more work
666125d0ce7SChris Fraire                      * than necessary (until a source code file is placed). We
667125d0ce7SChris Fraire                      * can record zeroes for a fake file under the root to get
668125d0ce7SChris Fraire                      * a D-document even for this special repo situation.
669125d0ce7SChris Fraire                      *
670125d0ce7SChris Fraire                      * Metrics are aggregated for directories up to the root,
671125d0ce7SChris Fraire                      * so it suffices to put the fake directly under the root.
672ea1e4d20SChris Fraire                      */
6732bb04ff3SChris Fraire                     if (!isWithDirectoryCounts) {
6742825cf76SVladimir Kotal                         final String ROOT_FAKE_FILE = "/.OpenGrok_fake_file";
675125d0ce7SChris Fraire                         countsAggregator.register(new NumLinesLOC(ROOT_FAKE_FILE, 0, 0));
6762bb04ff3SChris Fraire                     }
67741351de3SChris Fraire                     NumLinesLOCAccessor countsAccessor = new NumLinesLOCAccessor();
67841351de3SChris Fraire                     countsAccessor.store(writer, reader, countsAggregator,
67941351de3SChris Fraire                             isWithDirectoryCounts && isCountingDeltas);
68041351de3SChris Fraire 
681b5840353SAdam Hornáček                     markProjectIndexed(project);
682b5840353SAdam Hornáček                 } finally {
683b5840353SAdam Hornáček                     reader.close();
684b5840353SAdam Hornáček                 }
685b5840353SAdam Hornáček             }
686d1cb2d98SVladimir Kotal 
6870bac597fSVladimir Kotal             // The RuntimeException thrown from the block above can prevent the writing from completing.
6880bac597fSVladimir Kotal             // This is deliberate.
689ddc390bcSVladimir Kotal             try {
690ddc390bcSVladimir Kotal                 finishWriting();
691ddc390bcSVladimir Kotal             } catch (IOException e) {
692ddc390bcSVladimir Kotal                 finishingException = e;
693ddc390bcSVladimir Kotal             }
694d1cb2d98SVladimir Kotal         } catch (RuntimeException ex) {
695d1cb2d98SVladimir Kotal             LOGGER.log(Level.SEVERE,
696d1cb2d98SVladimir Kotal                 "Failed with unexpected RuntimeException", ex);
697d1cb2d98SVladimir Kotal             throw ex;
698d1cb2d98SVladimir Kotal         } finally {
699b5840353SAdam Hornáček             completer = null;
700b5840353SAdam Hornáček             try {
701a72324b1SAdam Hornáček                 if (writer != null) {
702a72324b1SAdam Hornáček                     writer.close();
703a72324b1SAdam Hornáček                 }
704b5840353SAdam Hornáček             } catch (IOException e) {
705a72324b1SAdam Hornáček                 if (finishingException == null) {
706a72324b1SAdam Hornáček                     finishingException = e;
707a72324b1SAdam Hornáček                 }
708b5840353SAdam Hornáček                 LOGGER.log(Level.WARNING,
709b5840353SAdam Hornáček                     "An error occurred while closing writer", e);
710b5840353SAdam Hornáček             } finally {
711b5840353SAdam Hornáček                 writer = null;
712b5840353SAdam Hornáček                 synchronized (lock) {
713b5840353SAdam Hornáček                     running = false;
714b5840353SAdam Hornáček                 }
715b5840353SAdam Hornáček             }
716b5840353SAdam Hornáček         }
717b5840353SAdam Hornáček 
718a72324b1SAdam Hornáček         if (finishingException != null) {
719a72324b1SAdam Hornáček             throw finishingException;
720a72324b1SAdam Hornáček         }
721b5840353SAdam Hornáček 
722b5840353SAdam Hornáček         if (!isInterrupted() && isDirty()) {
723b5840353SAdam Hornáček             if (env.isOptimizeDatabase()) {
724b5840353SAdam Hornáček                 optimize();
725b5840353SAdam Hornáček             }
726b5840353SAdam Hornáček             env.setIndexTimestamp();
727b5840353SAdam Hornáček         }
728b5840353SAdam Hornáček     }
729b5840353SAdam Hornáček 
processTrailingTerms(String startUid, boolean usedHistory, IndexDownArgs args)730aa329234SVladimir Kotal     private void processTrailingTerms(String startUid, boolean usedHistory, IndexDownArgs args) throws IOException {
731f4192d95SVladimir Kotal         while (uidIter != null && uidIter.term() != null
732f4192d95SVladimir Kotal                 && uidIter.term().utf8ToString().startsWith(startUid)) {
733f4192d95SVladimir Kotal 
734f4192d95SVladimir Kotal             if (usedHistory) {
735f4192d95SVladimir Kotal                 // Allow for forced reindex. For history based reindex the trailing terms
736f4192d95SVladimir Kotal                 // correspond to the files that have not changed. Such files might need to be re-indexed
737f4192d95SVladimir Kotal                 // if the index format changed.
738f4192d95SVladimir Kotal                 String termPath = Util.uid2url(uidIter.term().utf8ToString());
739f4192d95SVladimir Kotal                 File termFile = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), termPath);
740f4192d95SVladimir Kotal                 boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) &&
741f4192d95SVladimir Kotal                         checkSettings(termFile, termPath);
742f4192d95SVladimir Kotal                 if (!matchOK) {
743aa329234SVladimir Kotal                     removeFile(false);
744aa329234SVladimir Kotal 
745aa329234SVladimir Kotal                     args.curCount++;
746aa329234SVladimir Kotal                     args.works.add(new IndexFileWork(termFile, termPath));
747f4192d95SVladimir Kotal                 }
748f4192d95SVladimir Kotal             } else {
749f4192d95SVladimir Kotal                 // Remove data for the trailing terms that getIndexDownArgs()
750f4192d95SVladimir Kotal                 // did not traverse. These correspond to the files that have been
751f4192d95SVladimir Kotal                 // removed and have higher ordering than any present files.
752f4192d95SVladimir Kotal                 removeFile(true);
753f4192d95SVladimir Kotal             }
754f4192d95SVladimir Kotal 
755f4192d95SVladimir Kotal             BytesRef next = uidIter.next();
756f4192d95SVladimir Kotal             if (next == null) {
757f4192d95SVladimir Kotal                 uidIter = null;
758f4192d95SVladimir Kotal             }
759f4192d95SVladimir Kotal         }
760f4192d95SVladimir Kotal     }
761f4192d95SVladimir Kotal 
762f4192d95SVladimir Kotal     /**
763f4192d95SVladimir Kotal      * @param dir directory path
764f4192d95SVladimir Kotal      * @param sourceRoot source root File object
765f4192d95SVladimir Kotal      * @param args {@link IndexDownArgs} instance (output)
766f4192d95SVladimir Kotal      * @return true if history was used to gather the {@code IndexDownArgs}
767f4192d95SVladimir Kotal      * @throws IOException on error
768f4192d95SVladimir Kotal      */
7692d8cba21SVladimir Kotal     @VisibleForTesting
getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args)770fadf9080SVladimir Kotal     boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws IOException {
771b8ad1421SVladimir Kotal         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
772c54bd05dSVladimir Kotal         boolean historyBased = isReadyForHistoryBasedReindex();
773b8ad1421SVladimir Kotal 
774c54bd05dSVladimir Kotal         if (LOGGER.isLoggable(Level.INFO)) {
775c54bd05dSVladimir Kotal             LOGGER.log(Level.INFO, String.format("Starting file collection using %s traversal for directory '%s'",
776c54bd05dSVladimir Kotal                     historyBased ? "history" : "file-system", dir));
77732b4cd63SVladimir Kotal         }
778c54bd05dSVladimir Kotal         Statistics elapsed = new Statistics();
779c54bd05dSVladimir Kotal         if (historyBased) {
780c54bd05dSVladimir Kotal             indexDownUsingHistory(env.getSourceRootFile(), args);
781c54bd05dSVladimir Kotal         } else {
782c54bd05dSVladimir Kotal             indexDown(sourceRoot, dir, args);
783c54bd05dSVladimir Kotal         }
784c54bd05dSVladimir Kotal 
785c54bd05dSVladimir Kotal         elapsed.report(LOGGER, String.format("Done file collection for directory '%s'", dir),
786c54bd05dSVladimir Kotal                 "indexer.db.collection");
78732b4cd63SVladimir Kotal 
788af698321SVladimir Kotal         showFileCount(dir, args);
789f4192d95SVladimir Kotal 
790c54bd05dSVladimir Kotal         return historyBased;
791af698321SVladimir Kotal     }
79212365fb4SVladimir Kotal 
793af698321SVladimir Kotal     /**
794af698321SVladimir Kotal      * Executes the first, serial stage of indexing, by going through set of files assembled from history.
795af698321SVladimir Kotal      * @param sourceRoot path to the source root (same as {@link RuntimeEnvironment#getSourceRootPath()})
796af698321SVladimir Kotal      * @param args {@link IndexDownArgs} instance where the resulting files to be indexed will be stored
797af698321SVladimir Kotal      * @throws IOException on error
798af698321SVladimir Kotal      */
7992d8cba21SVladimir Kotal     @VisibleForTesting
indexDownUsingHistory(File sourceRoot, IndexDownArgs args)800fadf9080SVladimir Kotal     void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws IOException {
801af698321SVladimir Kotal 
802fadf9080SVladimir Kotal         FileCollector fileCollector = RuntimeEnvironment.getInstance().getFileCollector(project.getName());
803af698321SVladimir Kotal 
804fadf9080SVladimir Kotal         for (String path : fileCollector.getFiles()) {
8051665873bSVladimir Kotal             File file = new File(sourceRoot, path);
806af698321SVladimir Kotal             processFileIncremental(args, file, path);
8071665873bSVladimir Kotal         }
8081665873bSVladimir Kotal     }
8091665873bSVladimir Kotal 
810b5840353SAdam Hornáček     /**
811ff44f24aSAdam Hornáček      * Optimize all index databases.
812b5840353SAdam Hornáček      *
813b5840353SAdam Hornáček      * @throws IOException if an error occurs
814b5840353SAdam Hornáček      */
optimizeAll()815e829566cSChris Fraire     static CountDownLatch optimizeAll() throws IOException {
816b5840353SAdam Hornáček         List<IndexDatabase> dbs = new ArrayList<>();
817b5840353SAdam Hornáček         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
818e829566cSChris Fraire         IndexerParallelizer parallelizer = env.getIndexerParallelizer();
819b5840353SAdam Hornáček         if (env.hasProjects()) {
820b5840353SAdam Hornáček             for (Project project : env.getProjectList()) {
821b5840353SAdam Hornáček                 dbs.add(new IndexDatabase(project));
822b5840353SAdam Hornáček             }
823b5840353SAdam Hornáček         } else {
824b5840353SAdam Hornáček             dbs.add(new IndexDatabase());
825b5840353SAdam Hornáček         }
826b5840353SAdam Hornáček 
827e829566cSChris Fraire         CountDownLatch latch = new CountDownLatch(dbs.size());
828b5840353SAdam Hornáček         for (IndexDatabase d : dbs) {
829b5840353SAdam Hornáček             final IndexDatabase db = d;
830b5840353SAdam Hornáček             if (db.isDirty()) {
831b13c5a0eSAdam Hornacek                 parallelizer.getFixedExecutor().submit(() -> {
832b5840353SAdam Hornáček                     try {
833e829566cSChris Fraire                         db.update();
834b5840353SAdam Hornáček                     } catch (Throwable e) {
835b5840353SAdam Hornáček                         LOGGER.log(Level.SEVERE,
836b5840353SAdam Hornáček                             "Problem updating lucene index database: ", e);
837e829566cSChris Fraire                     } finally {
838e829566cSChris Fraire                         latch.countDown();
839b5840353SAdam Hornáček                     }
840b5840353SAdam Hornáček                 });
841b5840353SAdam Hornáček             }
842b5840353SAdam Hornáček         }
843e829566cSChris Fraire         return latch;
844b5840353SAdam Hornáček     }
845b5840353SAdam Hornáček 
846b5840353SAdam Hornáček     /**
847ff44f24aSAdam Hornáček      * Optimize the index database.
84881b586e6SVladimir Kotal      * @throws IOException I/O exception
849b5840353SAdam Hornáček      */
optimize()850b5840353SAdam Hornáček     public void optimize() throws IOException {
851b5840353SAdam Hornáček         synchronized (lock) {
852b5840353SAdam Hornáček             if (running) {
853b5840353SAdam Hornáček                 LOGGER.warning("Optimize terminated... Someone else is updating / optimizing it!");
854b5840353SAdam Hornáček                 return;
855b5840353SAdam Hornáček             }
856b5840353SAdam Hornáček             running = true;
857b5840353SAdam Hornáček         }
858b5840353SAdam Hornáček 
859b5840353SAdam Hornáček         IndexWriter wrt = null;
860b5840353SAdam Hornáček         IOException writerException = null;
861b5840353SAdam Hornáček         try {
862b5840353SAdam Hornáček             Statistics elapsed = new Statistics();
863b5840353SAdam Hornáček             String projectDetail = this.project != null ? " for project " + project.getName() : "";
864b5840353SAdam Hornáček             LOGGER.log(Level.INFO, "Optimizing the index{0}", projectDetail);
865b5840353SAdam Hornáček             Analyzer analyzer = new StandardAnalyzer();
866b5840353SAdam Hornáček             IndexWriterConfig conf = new IndexWriterConfig(analyzer);
867b5840353SAdam Hornáček             conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
868b5840353SAdam Hornáček 
869b5840353SAdam Hornáček             wrt = new IndexWriter(indexDirectory, conf);
870b5840353SAdam Hornáček             wrt.forceMerge(1); // this is deprecated and not needed anymore
871277e83cdSVladimir Kotal             elapsed.report(LOGGER, String.format("Done optimizing index%s", projectDetail),
872277e83cdSVladimir Kotal                     "indexer.db.optimize");
873b5840353SAdam Hornáček             synchronized (lock) {
874b5840353SAdam Hornáček                 if (dirtyFile.exists() && !dirtyFile.delete()) {
875b5840353SAdam Hornáček                     LOGGER.log(Level.FINE, "Failed to remove \"dirty-file\": {0}",
876b5840353SAdam Hornáček                         dirtyFile.getAbsolutePath());
877b5840353SAdam Hornáček                 }
878b5840353SAdam Hornáček                 dirty = false;
879b5840353SAdam Hornáček             }
880b5840353SAdam Hornáček         } catch (IOException e) {
881b5840353SAdam Hornáček             writerException = e;
8822173ed7bSChris Fraire             LOGGER.log(Level.SEVERE, "ERROR: optimizing index", e);
883b5840353SAdam Hornáček         } finally {
884b5840353SAdam Hornáček             if (wrt != null) {
885b5840353SAdam Hornáček                 try {
886b5840353SAdam Hornáček                     wrt.close();
887b5840353SAdam Hornáček                 } catch (IOException e) {
888a72324b1SAdam Hornáček                     if (writerException == null) {
889a72324b1SAdam Hornáček                         writerException = e;
890a72324b1SAdam Hornáček                     }
891b5840353SAdam Hornáček                     LOGGER.log(Level.WARNING,
892b5840353SAdam Hornáček                         "An error occurred while closing writer", e);
893b5840353SAdam Hornáček                 }
894b5840353SAdam Hornáček             }
895b5840353SAdam Hornáček             synchronized (lock) {
896b5840353SAdam Hornáček                 running = false;
897b5840353SAdam Hornáček             }
898b5840353SAdam Hornáček         }
899b5840353SAdam Hornáček 
900a72324b1SAdam Hornáček         if (writerException != null) {
901a72324b1SAdam Hornáček             throw writerException;
902a72324b1SAdam Hornáček         }
903b5840353SAdam Hornáček     }
904b5840353SAdam Hornáček 
isDirty()905b5840353SAdam Hornáček     private boolean isDirty() {
906b5840353SAdam Hornáček         synchronized (lock) {
907b5840353SAdam Hornáček             return dirty;
908b5840353SAdam Hornáček         }
909b5840353SAdam Hornáček     }
910b5840353SAdam Hornáček 
setDirty()911b5840353SAdam Hornáček     private void setDirty() {
912b5840353SAdam Hornáček         synchronized (lock) {
913b5840353SAdam Hornáček             try {
914b5840353SAdam Hornáček                 if (!dirty) {
915b5840353SAdam Hornáček                     if (!dirtyFile.createNewFile() && !dirtyFile.exists()) {
916b5840353SAdam Hornáček                         LOGGER.log(Level.FINE,
917b5840353SAdam Hornáček                                 "Failed to create \"dirty-file\": {0}",
918b5840353SAdam Hornáček                                 dirtyFile.getAbsolutePath());
919b5840353SAdam Hornáček                     }
920b5840353SAdam Hornáček                     dirty = true;
921b5840353SAdam Hornáček                 }
922b5840353SAdam Hornáček             } catch (IOException e) {
923b5840353SAdam Hornáček                 LOGGER.log(Level.FINE, "When creating dirty file: ", e);
924b5840353SAdam Hornáček             }
925b5840353SAdam Hornáček         }
926b5840353SAdam Hornáček     }
927b5840353SAdam Hornáček 
whatXrefFile(String path, boolean compress)928ee13dbaeSChris Fraire     private File whatXrefFile(String path, boolean compress) {
9294da26a1eSChris Fraire         String xrefPath = compress ? TandemPath.join(path, ".gz") : path;
9304da26a1eSChris Fraire         return new File(xrefDir, xrefPath);
931ee13dbaeSChris Fraire     }
932ee13dbaeSChris Fraire 
933b5840353SAdam Hornáček     /**
934ff44f24aSAdam Hornáček      * Queue the removal of xref file for given path.
935b5840353SAdam Hornáček      * @param path path to file under source root
936b5840353SAdam Hornáček      */
removeXrefFile(String path)937b5840353SAdam Hornáček     private void removeXrefFile(String path) {
938ee13dbaeSChris Fraire         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
939ee13dbaeSChris Fraire         File xrefFile = whatXrefFile(path, env.isCompressXref());
94012365fb4SVladimir Kotal         PendingFileDeletion pending = new PendingFileDeletion(xrefFile.getAbsolutePath());
941b5840353SAdam Hornáček         completer.add(pending);
942b5840353SAdam Hornáček     }
943b5840353SAdam Hornáček 
removeHistoryFile(String path)944b5840353SAdam Hornáček     private void removeHistoryFile(String path) {
945b5840353SAdam Hornáček         HistoryGuru.getInstance().clearCacheFile(path);
946b5840353SAdam Hornáček     }
947b5840353SAdam Hornáček 
948b5840353SAdam Hornáček     /**
9491665873bSVladimir Kotal      * Remove a stale file from the index database and potentially also from history cache,
9501665873bSVladimir Kotal      * and queue the removal of the associated xref file.
951b5840353SAdam Hornáček      *
952b5840353SAdam Hornáček      * @param removeHistory if false, do not remove history cache for this file
953b5840353SAdam Hornáček      * @throws java.io.IOException if an error occurs
954b5840353SAdam Hornáček      */
removeFile(boolean removeHistory)955af698321SVladimir Kotal     private void removeFile(boolean removeHistory) throws IOException {
956af698321SVladimir Kotal         String path = Util.uid2url(uidIter.term().utf8ToString());
957af698321SVladimir Kotal 
9581665873bSVladimir Kotal         for (IndexChangedListener listener : listeners) {
9591665873bSVladimir Kotal             listener.fileRemove(path);
9601665873bSVladimir Kotal         }
9611665873bSVladimir Kotal 
962af698321SVladimir Kotal         removeFileDocUid(path);
9631665873bSVladimir Kotal 
9641665873bSVladimir Kotal         removeXrefFile(path);
9651665873bSVladimir Kotal 
9661665873bSVladimir Kotal         if (removeHistory) {
9671665873bSVladimir Kotal             removeHistoryFile(path);
9681665873bSVladimir Kotal         }
9691665873bSVladimir Kotal 
9701665873bSVladimir Kotal         setDirty();
9711665873bSVladimir Kotal 
9721665873bSVladimir Kotal         for (IndexChangedListener listener : listeners) {
9731665873bSVladimir Kotal             listener.fileRemoved(path);
9741665873bSVladimir Kotal         }
9751665873bSVladimir Kotal     }
9761665873bSVladimir Kotal 
removeFileDocUid(String path)977af698321SVladimir Kotal     private void removeFileDocUid(String path) throws IOException {
978b5840353SAdam Hornáček 
97941351de3SChris Fraire         // Determine if a reversal of counts is necessary, and execute if so.
98041351de3SChris Fraire         if (isCountingDeltas) {
98141351de3SChris Fraire             postsIter = uidIter.postings(postsIter);
98241351de3SChris Fraire             while (postsIter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
98341351de3SChris Fraire                 // Read a limited-fields version of the document.
98441351de3SChris Fraire                 Document doc = reader.document(postsIter.docID(), REVERT_COUNTS_FIELDS);
98541351de3SChris Fraire                 if (doc != null) {
9861665873bSVladimir Kotal                     decrementLOCforDoc(path, doc);
98741351de3SChris Fraire                     break;
98841351de3SChris Fraire                 }
98941351de3SChris Fraire             }
99041351de3SChris Fraire         }
99141351de3SChris Fraire 
992b5840353SAdam Hornáček         writer.deleteDocuments(new Term(QueryBuilder.U, uidIter.term()));
993b5840353SAdam Hornáček     }
994b5840353SAdam Hornáček 
decrementLOCforDoc(String path, Document doc)9951665873bSVladimir Kotal     private void decrementLOCforDoc(String path, Document doc) {
9961665873bSVladimir Kotal         NullableNumLinesLOC nullableCounts = NumLinesLOCUtil.read(doc);
9971665873bSVladimir Kotal         if (nullableCounts.getNumLines() != null && nullableCounts.getLOC() != null) {
9981665873bSVladimir Kotal             NumLinesLOC counts = new NumLinesLOC(path,
9991665873bSVladimir Kotal                     -nullableCounts.getNumLines(),
10001665873bSVladimir Kotal                     -nullableCounts.getLOC());
10011665873bSVladimir Kotal             countsAggregator.register(counts);
1002b5840353SAdam Hornáček         }
1003b5840353SAdam Hornáček     }
1004b5840353SAdam Hornáček 
1005b5840353SAdam Hornáček     /**
1006ff44f24aSAdam Hornáček      * Add a file to the Lucene index (and generate a xref file).
1007b5840353SAdam Hornáček      *
1008b5840353SAdam Hornáček      * @param file The file to add
1009b5840353SAdam Hornáček      * @param path The path to the file (from source root)
1010b5840353SAdam Hornáček      * @param ctags a defined instance to use (only if its binary is not null)
1011b5840353SAdam Hornáček      * @throws java.io.IOException if an error occurs
1012b5840353SAdam Hornáček      * @throws InterruptedException if a timeout occurs
1013b5840353SAdam Hornáček      */
addFile(File file, String path, Ctags ctags)1014af698321SVladimir Kotal     private void addFile(File file, String path, Ctags ctags) throws IOException, InterruptedException {
1015b82c5e9dSVladimir Kotal 
1016b82c5e9dSVladimir Kotal         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
101757eefa47SKryštof Tulinger         AbstractAnalyzer fa = getAnalyzerFor(file, path);
1018b5840353SAdam Hornáček 
1019b5840353SAdam Hornáček         for (IndexChangedListener listener : listeners) {
1020b5840353SAdam Hornáček             listener.fileAdd(path, fa.getClass().getSimpleName());
1021b5840353SAdam Hornáček         }
1022b5840353SAdam Hornáček 
1023b5840353SAdam Hornáček         ctags.setTabSize(project != null ? project.getTabSize() : 0);
1024b82c5e9dSVladimir Kotal         if (env.getCtagsTimeout() != 0) {
1025b82c5e9dSVladimir Kotal             ctags.setTimeout(env.getCtagsTimeout());
1026b82c5e9dSVladimir Kotal         }
1027a72324b1SAdam Hornáček         fa.setCtags(ctags);
102841351de3SChris Fraire         fa.setCountsAggregator(countsAggregator);
1029b5840353SAdam Hornáček         fa.setProject(Project.getProject(path));
1030b82c5e9dSVladimir Kotal         fa.setScopesEnabled(env.isScopesEnabled());
1031b82c5e9dSVladimir Kotal         fa.setFoldingEnabled(env.isFoldingEnabled());
1032b5840353SAdam Hornáček 
1033b5840353SAdam Hornáček         Document doc = new Document();
103440c74b99SVladimir Kotal         CountingWriter xrefOut = null;
103540c74b99SVladimir Kotal         try {
103640c74b99SVladimir Kotal             String xrefAbs = null;
103740c74b99SVladimir Kotal             File transientXref = null;
103840c74b99SVladimir Kotal             if (env.isGenerateHtml()) {
103940c74b99SVladimir Kotal                 xrefAbs = getXrefPath(path);
104040c74b99SVladimir Kotal                 transientXref = new File(TandemPath.join(xrefAbs,
104140c74b99SVladimir Kotal                         PendingFileCompleter.PENDING_EXTENSION));
104240c74b99SVladimir Kotal                 xrefOut = newXrefWriter(path, transientXref, env.isCompressXref());
104340c74b99SVladimir Kotal             }
104440c74b99SVladimir Kotal 
1045b5840353SAdam Hornáček             analyzerGuru.populateDocument(doc, file, path, fa, xrefOut);
104640c74b99SVladimir Kotal 
104740c74b99SVladimir Kotal             // Avoid producing empty xref files.
104840c74b99SVladimir Kotal             if (xrefOut != null && xrefOut.getCount() > 0) {
104940c74b99SVladimir Kotal                 PendingFileRenaming ren = new PendingFileRenaming(xrefAbs,
105040c74b99SVladimir Kotal                         transientXref.getAbsolutePath());
105140c74b99SVladimir Kotal                 completer.add(ren);
105240c74b99SVladimir Kotal             } else if (xrefOut != null) {
1053589e205bSVladimir Kotal                 LOGGER.log(Level.FINER, "xref for {0} would be empty, will remove", path);
1054589e205bSVladimir Kotal                 completer.add(new PendingFileDeletion(transientXref.toString()));
105540c74b99SVladimir Kotal             }
1056b5840353SAdam Hornáček         } catch (InterruptedException e) {
1057b5840353SAdam Hornáček             LOGGER.log(Level.WARNING, "File ''{0}'' interrupted--{1}",
1058b5840353SAdam Hornáček                 new Object[]{path, e.getMessage()});
1059b5840353SAdam Hornáček             cleanupResources(doc);
1060b5840353SAdam Hornáček             throw e;
1061b5840353SAdam Hornáček         } catch (Exception e) {
1062b5840353SAdam Hornáček             LOGGER.log(Level.INFO,
1063b5840353SAdam Hornáček                     "Skipped file ''{0}'' because the analyzer didn''t "
1064b5840353SAdam Hornáček                     + "understand it.",
1065b5840353SAdam Hornáček                     path);
1066b5840353SAdam Hornáček             if (LOGGER.isLoggable(Level.FINE)) {
1067b5840353SAdam Hornáček                 LOGGER.log(Level.FINE, "Exception from analyzer " +
1068b5840353SAdam Hornáček                     fa.getClass().getName(), e);
1069b5840353SAdam Hornáček             }
1070b5840353SAdam Hornáček             cleanupResources(doc);
1071b5840353SAdam Hornáček             return;
1072b5840353SAdam Hornáček         } finally {
1073b5840353SAdam Hornáček             fa.setCtags(null);
107441351de3SChris Fraire             fa.setCountsAggregator(null);
107540c74b99SVladimir Kotal             if (xrefOut != null) {
107640c74b99SVladimir Kotal                 xrefOut.close();
107740c74b99SVladimir Kotal             }
1078b5840353SAdam Hornáček         }
1079b5840353SAdam Hornáček 
1080b5840353SAdam Hornáček         try {
1081b5840353SAdam Hornáček             writer.addDocument(doc);
1082b5840353SAdam Hornáček         } catch (Throwable t) {
1083b5840353SAdam Hornáček             cleanupResources(doc);
1084b5840353SAdam Hornáček             throw t;
1085b5840353SAdam Hornáček         }
1086b5840353SAdam Hornáček 
1087b5840353SAdam Hornáček         setDirty();
10881665873bSVladimir Kotal 
1089b5840353SAdam Hornáček         for (IndexChangedListener listener : listeners) {
1090b5840353SAdam Hornáček             listener.fileAdded(path, fa.getClass().getSimpleName());
1091b5840353SAdam Hornáček         }
1092b5840353SAdam Hornáček     }
1093b5840353SAdam Hornáček 
getAnalyzerFor(File file, String path)109457eefa47SKryštof Tulinger     private AbstractAnalyzer getAnalyzerFor(File file, String path)
1095b5840353SAdam Hornáček             throws IOException {
1096b5840353SAdam Hornáček         try (InputStream in = new BufferedInputStream(
1097b5840353SAdam Hornáček                 new FileInputStream(file))) {
1098b5840353SAdam Hornáček             return AnalyzerGuru.getAnalyzer(in, path);
1099b5840353SAdam Hornáček         }
1100b5840353SAdam Hornáček     }
1101b5840353SAdam Hornáček 
1102b5840353SAdam Hornáček     /**
1103b5840353SAdam Hornáček      * Do a best effort to clean up all resources allocated when populating
1104b5840353SAdam Hornáček      * a Lucene document. On normal execution, these resources should be
1105b5840353SAdam Hornáček      * closed automatically by the index writer once it's done with them, but
1106b5840353SAdam Hornáček      * we may not get that far if something fails.
1107b5840353SAdam Hornáček      *
1108b5840353SAdam Hornáček      * @param doc the document whose resources to clean up
1109b5840353SAdam Hornáček      */
cleanupResources(Document doc)1110b5840353SAdam Hornáček     private static void cleanupResources(Document doc) {
1111b5840353SAdam Hornáček         for (IndexableField f : doc) {
1112b5840353SAdam Hornáček             // If the field takes input from a reader, close the reader.
1113b5840353SAdam Hornáček             IOUtils.close(f.readerValue());
1114b5840353SAdam Hornáček 
1115b5840353SAdam Hornáček             // If the field takes input from a token stream, close the
1116b5840353SAdam Hornáček             // token stream.
1117b5840353SAdam Hornáček             if (f instanceof Field) {
1118b5840353SAdam Hornáček                 IOUtils.close(((Field) f).tokenStreamValue());
1119b5840353SAdam Hornáček             }
1120b5840353SAdam Hornáček         }
1121b5840353SAdam Hornáček     }
1122b5840353SAdam Hornáček 
1123b5840353SAdam Hornáček     /**
1124ff44f24aSAdam Hornáček      * Check if I should accept this file into the index database.
1125b5840353SAdam Hornáček      *
1126b5840353SAdam Hornáček      * @param file the file to check
1127fbe755ccSChris Fraire      * @param ret defined instance whose {@code localRelPath} property will be
1128fbe755ccSChris Fraire      * non-null afterward if and only if {@code file} is a symlink that targets
1129fbe755ccSChris Fraire      * either a {@link Repository}-local filesystem object or the same object
1130fbe755ccSChris Fraire      * as a previously-detected and allowed symlink. N.b. method will return
1131fbe755ccSChris Fraire      * {@code false} if {@code ret.localRelPath} is set non-null.
1132fbe755ccSChris Fraire      * @return a value indicating if {@code file} should be included in index
1133b5840353SAdam Hornáček      */
accept(File file, AcceptSymlinkRet ret)1134fbe755ccSChris Fraire     private boolean accept(File file, AcceptSymlinkRet ret) {
1135fbe755ccSChris Fraire         ret.localRelPath = null;
1136b5840353SAdam Hornáček         String absolutePath = file.getAbsolutePath();
1137b5840353SAdam Hornáček 
113840669eceSChris Fraire         if (!pathAccepter.accept(file)) {
1139b5840353SAdam Hornáček             return false;
1140b5840353SAdam Hornáček         }
1141b5840353SAdam Hornáček 
1142b5840353SAdam Hornáček         if (!file.canRead()) {
1143b5840353SAdam Hornáček             LOGGER.log(Level.WARNING, "Could not read {0}", absolutePath);
1144b5840353SAdam Hornáček             return false;
1145b5840353SAdam Hornáček         }
1146b5840353SAdam Hornáček 
1147b5840353SAdam Hornáček         try {
1148b5840353SAdam Hornáček             Path absolute = Paths.get(absolutePath);
1149b5840353SAdam Hornáček             if (Files.isSymbolicLink(absolute)) {
1150b5840353SAdam Hornáček                 File canonical = file.getCanonicalFile();
1151fbe755ccSChris Fraire                 if (!absolutePath.equals(canonical.getPath()) &&
1152fbe755ccSChris Fraire                         !acceptSymlink(absolute, canonical, ret)) {
11537d004396SChris Fraire                     if (ret.localRelPath == null) {
1154b5840353SAdam Hornáček                         LOGGER.log(Level.FINE, "Skipped symlink ''{0}'' -> ''{1}''",
1155b5840353SAdam Hornáček                                 new Object[] {absolutePath, canonical});
11567d004396SChris Fraire                     }
1157b5840353SAdam Hornáček                     return false;
1158b5840353SAdam Hornáček                 }
1159b5840353SAdam Hornáček             }
1160b5840353SAdam Hornáček             //below will only let go files and directories, anything else is considered special and is not added
1161b5840353SAdam Hornáček             if (!file.isFile() && !file.isDirectory()) {
1162b5840353SAdam Hornáček                 LOGGER.log(Level.WARNING, "Ignored special file {0}",
1163b5840353SAdam Hornáček                     absolutePath);
1164b5840353SAdam Hornáček                 return false;
1165b5840353SAdam Hornáček             }
1166b5840353SAdam Hornáček         } catch (IOException exp) {
1167b5840353SAdam Hornáček             LOGGER.log(Level.WARNING, "Failed to resolve name: {0}",
1168b5840353SAdam Hornáček                 absolutePath);
1169b5840353SAdam Hornáček             LOGGER.log(Level.FINE, "Stack Trace: ", exp);
1170b5840353SAdam Hornáček         }
1171b5840353SAdam Hornáček 
1172b5840353SAdam Hornáček         if (file.isDirectory()) {
1173b5840353SAdam Hornáček             // always accept directories so that their files can be examined
1174b5840353SAdam Hornáček             return true;
1175b5840353SAdam Hornáček         }
1176b5840353SAdam Hornáček 
11774dbece33Sanatoly 
11784dbece33Sanatoly         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
11794dbece33Sanatoly         // Lookup history if indexing versioned files only.
11804dbece33Sanatoly         // Skip the lookup entirely (which is expensive) if unversioned files are allowed
11814dbece33Sanatoly         if (env.isIndexVersionedFilesOnly()) {
1182b5840353SAdam Hornáček             if (HistoryGuru.getInstance().hasHistory(file)) {
1183b5840353SAdam Hornáček                 // versioned files should always be accepted
1184b5840353SAdam Hornáček                 return true;
1185b5840353SAdam Hornáček             }
11864dbece33Sanatoly             LOGGER.log(Level.FINER, "not accepting unversioned {0}", absolutePath);
11874dbece33Sanatoly             return false;
1188b5840353SAdam Hornáček         }
11894dbece33Sanatoly         // unversioned files are allowed
11904dbece33Sanatoly         return true;
1191b5840353SAdam Hornáček     }
1192b5840353SAdam Hornáček 
1193b5840353SAdam Hornáček     /**
1194b5840353SAdam Hornáček      * Determines if {@code file} should be accepted into the index database.
1195b5840353SAdam Hornáček      * @param parent parent of {@code file}
1196b5840353SAdam Hornáček      * @param file directory object under consideration
1197fbe755ccSChris Fraire      * @param ret defined instance whose {@code localRelPath} property will be
1198fbe755ccSChris Fraire      * non-null afterward if and only if {@code file} is a symlink that targets
1199fbe755ccSChris Fraire      * either a {@link Repository}-local filesystem object or the same object
1200fbe755ccSChris Fraire      * as a previously-detected and allowed symlink. N.b. method will return
1201fbe755ccSChris Fraire      * {@code false} if {@code ret.localRelPath} is set non-null.
1202fbe755ccSChris Fraire      * @return a value indicating if {@code file} should be included in index
1203b5840353SAdam Hornáček      */
accept(File parent, File file, AcceptSymlinkRet ret)1204fbe755ccSChris Fraire     private boolean accept(File parent, File file, AcceptSymlinkRet ret) {
1205fbe755ccSChris Fraire         ret.localRelPath = null;
1206b5840353SAdam Hornáček 
1207b5840353SAdam Hornáček         try {
1208b5840353SAdam Hornáček             File f1 = parent.getCanonicalFile();
1209b5840353SAdam Hornáček             File f2 = file.getCanonicalFile();
1210b5840353SAdam Hornáček             if (f1.equals(f2)) {
1211b5840353SAdam Hornáček                 LOGGER.log(Level.INFO, "Skipping links to itself...: {0} {1}",
1212b5840353SAdam Hornáček                         new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()});
1213b5840353SAdam Hornáček                 return false;
1214b5840353SAdam Hornáček             }
1215b5840353SAdam Hornáček 
1216b5840353SAdam Hornáček             // Now, let's verify that it's not a link back up the chain...
1217b5840353SAdam Hornáček             File t1 = f1;
1218b5840353SAdam Hornáček             while ((t1 = t1.getParentFile()) != null) {
1219b5840353SAdam Hornáček                 if (f2.equals(t1)) {
1220b5840353SAdam Hornáček                     LOGGER.log(Level.INFO, "Skipping links to parent...: {0} {1}",
1221b5840353SAdam Hornáček                             new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()});
1222b5840353SAdam Hornáček                     return false;
1223b5840353SAdam Hornáček                 }
1224b5840353SAdam Hornáček             }
1225b5840353SAdam Hornáček 
1226fbe755ccSChris Fraire             return accept(file, ret);
1227b5840353SAdam Hornáček         } catch (IOException ex) {
1228b5840353SAdam Hornáček             LOGGER.log(Level.WARNING, "Failed to resolve name: {0} {1}",
1229b5840353SAdam Hornáček                     new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()});
1230b5840353SAdam Hornáček         }
1231b5840353SAdam Hornáček         return false;
1232b5840353SAdam Hornáček     }
1233b5840353SAdam Hornáček 
1234b5840353SAdam Hornáček     /**
1235ff44f24aSAdam Hornáček      * Check if I should accept the path containing a symlink.
1236b5840353SAdam Hornáček      *
1237b5840353SAdam Hornáček      * @param absolute the path with a symlink to check
1238b5840353SAdam Hornáček      * @param canonical the canonical file object
1239fbe755ccSChris Fraire      * @param ret defined instance whose {@code localRelPath} property will be
1240fbe755ccSChris Fraire      * non-null afterward if and only if {@code absolute} is a symlink that
1241fbe755ccSChris Fraire      * targets either a {@link Repository}-local filesystem object or the same
1242fbe755ccSChris Fraire      * object ({@code canonical}) as a previously-detected and allowed symlink.
1243fbe755ccSChris Fraire      * N.b. method will return {@code false} if {@code ret.localRelPath} is set
1244fbe755ccSChris Fraire      * non-null.
1245fbe755ccSChris Fraire      * @return a value indicating if {@code file} should be included in index
1246b5840353SAdam Hornáček      */
acceptSymlink(Path absolute, File canonical, AcceptSymlinkRet ret)1247fbe755ccSChris Fraire     private boolean acceptSymlink(Path absolute, File canonical, AcceptSymlinkRet ret) {
1248fbe755ccSChris Fraire         ret.localRelPath = null;
1249b5840353SAdam Hornáček 
1250fbe755ccSChris Fraire         String absolute1 = absolute.toString();
1251fbe755ccSChris Fraire         String canonical1 = canonical.getPath();
1252fbe755ccSChris Fraire         boolean isCanonicalDir = canonical.isDirectory();
12537d004396SChris Fraire         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
12547d004396SChris Fraire         IndexedSymlink indexed1;
12557d004396SChris Fraire         String absolute0;
1256fbe755ccSChris Fraire 
1257fbe755ccSChris Fraire         if (isLocal(canonical1)) {
1258fbe755ccSChris Fraire             if (!isCanonicalDir) {
1259fbe755ccSChris Fraire                 if (LOGGER.isLoggable(Level.FINEST)) {
1260fbe755ccSChris Fraire                     LOGGER.log(Level.FINEST, "Local {0} has symlink from {1}",
1261fbe755ccSChris Fraire                             new Object[] {canonical1, absolute1});
1262fbe755ccSChris Fraire                 }
12637d004396SChris Fraire                 /*
12647d004396SChris Fraire                  * Always index symlinks to local files, but do not add to
12657d004396SChris Fraire                  * indexedSymlinks for a non-directory.
12667d004396SChris Fraire                  */
1267b5840353SAdam Hornáček                 return true;
12685054bfafSChris Fraire             }
12695054bfafSChris Fraire 
1270fbe755ccSChris Fraire             /*
12717d004396SChris Fraire              * Do not index symlinks to local directories, because the
12725054bfafSChris Fraire              * canonical target will be indexed on its own -- but relativize()
12735054bfafSChris Fraire              * a path to be returned in ret so that a symlink can be replicated
12745054bfafSChris Fraire              * in xref/.
1275fbe755ccSChris Fraire              */
1276fbe755ccSChris Fraire             ret.localRelPath = absolute.getParent().relativize(
1277b5840353SAdam Hornáček                     canonical.toPath()).toString();
12787d004396SChris Fraire 
12797d004396SChris Fraire             // Try to put the prime absolute path into indexedSymlinks.
12807d004396SChris Fraire             try {
12817d004396SChris Fraire                 String primeRelative = env.getPathRelativeToSourceRoot(canonical);
12827d004396SChris Fraire                 absolute0 = env.getSourceRootPath() + primeRelative;
12837d004396SChris Fraire             } catch (ForbiddenSymlinkException | IOException e) {
12847d004396SChris Fraire                 /*
12857d004396SChris Fraire                  * This is not expected, as indexDown() would have operated on
12867d004396SChris Fraire                  * the file already -- but we are forced to handle.
12877d004396SChris Fraire                  */
12887d004396SChris Fraire                 LOGGER.log(Level.WARNING, String.format(
12897d004396SChris Fraire                         "Unexpected error getting relative for %s", canonical), e);
12907d004396SChris Fraire                 absolute0 = absolute1;
12917d004396SChris Fraire             }
12927d004396SChris Fraire             indexed1 = new IndexedSymlink(absolute0, canonical1, true);
12937d004396SChris Fraire             indexedSymlinks.put(canonical1, indexed1);
1294b5840353SAdam Hornáček             return false;
1295b5840353SAdam Hornáček         }
12965054bfafSChris Fraire 
12977d004396SChris Fraire         IndexedSymlink indexed0;
12987d004396SChris Fraire         if ((indexed0 = indexedSymlinks.get(canonical1)) != null) {
12997d004396SChris Fraire             if (absolute1.equals(indexed0.getAbsolute())) {
1300fbe755ccSChris Fraire                 return true;
13015054bfafSChris Fraire             }
13025054bfafSChris Fraire 
1303fbe755ccSChris Fraire             /*
13047d004396SChris Fraire              * Do not index symlinks to external directories already indexed
13055054bfafSChris Fraire              * as linked elsewhere, because the canonical target will be
13065054bfafSChris Fraire              * indexed already -- but relativize() a path to be returned in ret
13075054bfafSChris Fraire              * so that this second symlink can be redone as a local
13085054bfafSChris Fraire              * (non-external) symlink in xref/.
1309fbe755ccSChris Fraire              */
1310fbe755ccSChris Fraire             ret.localRelPath = absolute.getParent().relativize(
13117d004396SChris Fraire                     Paths.get(indexed0.getAbsolute())).toString();
1312fbe755ccSChris Fraire 
13135054bfafSChris Fraire             if (LOGGER.isLoggable(Level.FINEST)) {
13145054bfafSChris Fraire                 LOGGER.log(Level.FINEST, "External dir {0} has symlink from {1} after first {2}",
13157d004396SChris Fraire                         new Object[] {canonical1, absolute1, indexed0.getAbsolute()});
1316fbe755ccSChris Fraire             }
1317b5840353SAdam Hornáček             return false;
1318b5840353SAdam Hornáček         }
13195054bfafSChris Fraire 
13205054bfafSChris Fraire         /*
13217d004396SChris Fraire          * Iterate through indexedSymlinks, which is sorted so that shorter
13227d004396SChris Fraire          * canonical entries come first, to see if the new link is a child
13237d004396SChris Fraire          * canonically.
13245054bfafSChris Fraire          */
13257d004396SChris Fraire         for (IndexedSymlink a0 : indexedSymlinks.values()) {
13267d004396SChris Fraire             indexed0 = a0;
13277d004396SChris Fraire             if (!indexed0.isLocal() && canonical1.startsWith(indexed0.getCanonicalSeparated())) {
13287d004396SChris Fraire                 absolute0 = indexed0.getAbsolute();
13295054bfafSChris Fraire                 if (!isCanonicalDir) {
13305054bfafSChris Fraire                     if (LOGGER.isLoggable(Level.FINEST)) {
13315054bfafSChris Fraire                         LOGGER.log(Level.FINEST,
13325054bfafSChris Fraire                                 "External file {0} has symlink from {1} under previous {2}",
13337d004396SChris Fraire                                 new Object[] {canonical1, absolute1, absolute0});
13345054bfafSChris Fraire                     }
13357d004396SChris Fraire                     // Do not add to indexedSymlinks for a non-directory.
13365054bfafSChris Fraire                     return true;
13375054bfafSChris Fraire                 }
13385054bfafSChris Fraire 
13395054bfafSChris Fraire                 /*
13405054bfafSChris Fraire                  * See above about redoing a sourceRoot symlink as a local
13415054bfafSChris Fraire                  * (non-external) symlink in xref/.
13425054bfafSChris Fraire                  */
13437d004396SChris Fraire                 Path abs0 = Paths.get(absolute0, canonical1.substring(
13447d004396SChris Fraire                         indexed0.getCanonicalSeparated().length()));
13455054bfafSChris Fraire                 ret.localRelPath = absolute.getParent().relativize(abs0).toString();
13465054bfafSChris Fraire 
13475054bfafSChris Fraire                 if (LOGGER.isLoggable(Level.FINEST)) {
13485054bfafSChris Fraire                     LOGGER.log(Level.FINEST,
13495054bfafSChris Fraire                             "External dir {0} has symlink from {1} under previous {2}",
13507d004396SChris Fraire                             new Object[] {canonical1, absolute1, absolute0});
13515054bfafSChris Fraire                 }
13525054bfafSChris Fraire                 return false;
13535054bfafSChris Fraire             }
1354b5840353SAdam Hornáček         }
1355fbe755ccSChris Fraire 
13560e0ac58dSChris Fraire         Set<String> canonicalRoots = env.getCanonicalRoots();
13570e0ac58dSChris Fraire         for (String canonicalRoot : canonicalRoots) {
13580e0ac58dSChris Fraire             if (canonical1.startsWith(canonicalRoot)) {
13595054bfafSChris Fraire                 if (LOGGER.isLoggable(Level.FINEST)) {
13605054bfafSChris Fraire                     LOGGER.log(Level.FINEST, "Allowed symlink {0} per canonical root {1}",
13610e0ac58dSChris Fraire                             new Object[] {absolute1, canonical1});
13620e0ac58dSChris Fraire                 }
13637d004396SChris Fraire                 if (isCanonicalDir) {
13647d004396SChris Fraire                     indexed1 = new IndexedSymlink(absolute1, canonical1, false);
13657d004396SChris Fraire                     indexedSymlinks.put(canonical1, indexed1);
13667d004396SChris Fraire                 }
13670e0ac58dSChris Fraire                 return true;
13680e0ac58dSChris Fraire             }
13690e0ac58dSChris Fraire         }
13700e0ac58dSChris Fraire 
13710e0ac58dSChris Fraire         Set<String> allowedSymlinks = env.getAllowedSymlinks();
1372fbe755ccSChris Fraire         for (String allowedSymlink : allowedSymlinks) {
1373fbe755ccSChris Fraire             String allowedTarget;
1374fbe755ccSChris Fraire             try {
1375fbe755ccSChris Fraire                 allowedTarget = new File(allowedSymlink).getCanonicalPath();
1376fbe755ccSChris Fraire             } catch (IOException e) {
1377fbe755ccSChris Fraire                 LOGGER.log(Level.FINE, "unresolvable symlink: {0}", allowedSymlink);
1378fbe755ccSChris Fraire                 continue;
1379fbe755ccSChris Fraire             }
1380fbe755ccSChris Fraire             /*
1381fbe755ccSChris Fraire              * The following canonical check is sufficient because indexDown()
1382fbe755ccSChris Fraire              * traverses top-down, and any intermediate symlinks would have
1383fbe755ccSChris Fraire              * also been checked here for an allowed canonical match. This
1384fbe755ccSChris Fraire              * technically means that if there is a set of redundant symlinks
1385fbe755ccSChris Fraire              * with the same canonical target, then allowing one of the set
1386fbe755ccSChris Fraire              * will allow all others in the set.
1387fbe755ccSChris Fraire              */
1388fbe755ccSChris Fraire             if (canonical1.equals(allowedTarget)) {
13897d004396SChris Fraire                 if (isCanonicalDir) {
13907d004396SChris Fraire                     indexed1 = new IndexedSymlink(absolute1, canonical1, false);
13917d004396SChris Fraire                     indexedSymlinks.put(canonical1, indexed1);
13927d004396SChris Fraire                 }
1393fbe755ccSChris Fraire                 return true;
1394b5840353SAdam Hornáček             }
1395b5840353SAdam Hornáček         }
1396b5840353SAdam Hornáček         return false;
1397b5840353SAdam Hornáček     }
1398b5840353SAdam Hornáček 
1399b5840353SAdam Hornáček     /**
1400b5840353SAdam Hornáček      * Check if a file is local to the current project. If we don't have
1401b5840353SAdam Hornáček      * projects, check if the file is in the source root.
1402b5840353SAdam Hornáček      *
1403b5840353SAdam Hornáček      * @param path the path to a file
1404b5840353SAdam Hornáček      * @return true if the file is local to the current repository
1405b5840353SAdam Hornáček      */
isLocal(String path)1406b5840353SAdam Hornáček     private boolean isLocal(String path) {
1407b5840353SAdam Hornáček         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1408b5840353SAdam Hornáček         String srcRoot = env.getSourceRootPath();
1409b5840353SAdam Hornáček 
14107d004396SChris Fraire         if (path.startsWith(srcRoot + File.separator)) {
1411b5840353SAdam Hornáček             if (env.hasProjects()) {
1412b5840353SAdam Hornáček                 String relPath = path.substring(srcRoot.length());
14137d004396SChris Fraire                 // If file is under the current project, then it's local.
14147d004396SChris Fraire                 return project.equals(Project.getProject(relPath));
1415b5840353SAdam Hornáček             } else {
1416b5840353SAdam Hornáček                 // File is under source root, and we don't have projects, so
1417b5840353SAdam Hornáček                 // consider it local.
14187d004396SChris Fraire                 return true;
1419b5840353SAdam Hornáček             }
1420b5840353SAdam Hornáček         }
1421b5840353SAdam Hornáček 
14227d004396SChris Fraire         return false;
1423b5840353SAdam Hornáček     }
1424b5840353SAdam Hornáček 
handleSymlink(String path, AcceptSymlinkRet ret)14251665873bSVladimir Kotal     private void handleSymlink(String path, AcceptSymlinkRet ret) {
1426fbe755ccSChris Fraire         /*
1427fbe755ccSChris Fraire          * If ret.localRelPath is defined, then a symlink was detected but
1428fbe755ccSChris Fraire          * not "accepted" to avoid redundancy with an already-accepted
1429fbe755ccSChris Fraire          * canonical target. Set up for a deferred creation of a symlink
1430fbe755ccSChris Fraire          * within xref/.
1431b5840353SAdam Hornáček          */
1432fbe755ccSChris Fraire         if (ret.localRelPath != null) {
14331665873bSVladimir Kotal             File xrefPath = new File(xrefDir, path);
14341665873bSVladimir Kotal             PendingSymlinkage psym = new PendingSymlinkage(xrefPath.getAbsolutePath(), ret.localRelPath);
1435b5840353SAdam Hornáček             completer.add(psym);
1436b5840353SAdam Hornáček         }
14371665873bSVladimir Kotal     }
14381665873bSVladimir Kotal 
14391665873bSVladimir Kotal     /**
1440af698321SVladimir Kotal      * Executes the first, serial stage of indexing, by recursively traversing the file system
1441af698321SVladimir Kotal      * and index alongside.
14421665873bSVladimir Kotal      * <p>Files at least are counted, and any deleted or updated files (based on
14431665873bSVladimir Kotal      * comparison to the Lucene index) are passed to
1444af698321SVladimir Kotal      * {@link #removeFile(boolean)}. New or updated files are noted for indexing.
14451665873bSVladimir Kotal      * @param dir the root indexDirectory to generate indexes for
14461665873bSVladimir Kotal      * @param parent path to parent directory
14471665873bSVladimir Kotal      * @param args arguments to control execution and for collecting a list of
14481665873bSVladimir Kotal      * files for indexing
14491665873bSVladimir Kotal      */
14502d8cba21SVladimir Kotal     @VisibleForTesting
indexDown(File dir, String parent, IndexDownArgs args)14512d8cba21SVladimir Kotal     void indexDown(File dir, String parent, IndexDownArgs args) throws IOException {
14521665873bSVladimir Kotal 
14531665873bSVladimir Kotal         if (isInterrupted()) {
14541665873bSVladimir Kotal             return;
14551665873bSVladimir Kotal         }
14561665873bSVladimir Kotal 
14571665873bSVladimir Kotal         AcceptSymlinkRet ret = new AcceptSymlinkRet();
14581665873bSVladimir Kotal         if (!accept(dir, ret)) {
14591665873bSVladimir Kotal             handleSymlink(parent, ret);
1460b5840353SAdam Hornáček             return;
1461b5840353SAdam Hornáček         }
1462b5840353SAdam Hornáček 
1463b5840353SAdam Hornáček         File[] files = dir.listFiles();
1464b5840353SAdam Hornáček         if (files == null) {
1465b5840353SAdam Hornáček             LOGGER.log(Level.SEVERE, "Failed to get file listing for: {0}",
1466b5840353SAdam Hornáček                 dir.getPath());
1467b5840353SAdam Hornáček             return;
1468b5840353SAdam Hornáček         }
1469b5840353SAdam Hornáček         Arrays.sort(files, FILENAME_COMPARATOR);
1470b5840353SAdam Hornáček 
1471b5840353SAdam Hornáček         for (File file : files) {
14729489792cSVladimir Kotal             String path = parent + File.separator + file.getName();
1473fbe755ccSChris Fraire             if (!accept(dir, file, ret)) {
14741665873bSVladimir Kotal                 handleSymlink(path, ret);
1475b5840353SAdam Hornáček             } else {
1476b5840353SAdam Hornáček                 if (file.isDirectory()) {
1477b5840353SAdam Hornáček                     indexDown(file, path, args);
1478b5840353SAdam Hornáček                 } else {
1479af698321SVladimir Kotal                     processFile(args, file, path);
1480af698321SVladimir Kotal                 }
1481af698321SVladimir Kotal             }
1482af698321SVladimir Kotal         }
1483af698321SVladimir Kotal     }
1484af698321SVladimir Kotal 
1485af698321SVladimir Kotal     /**
1486af698321SVladimir Kotal      * Compared with {@link #processFile(IndexDownArgs, File, String)}, this method's file/path arguments
1487af698321SVladimir Kotal      * represent files that have actually changed in some way, while the other method's argument represent
1488af698321SVladimir Kotal      * files present on disk.
1489af698321SVladimir Kotal      * @param args {@link IndexDownArgs} instance
1490af698321SVladimir Kotal      * @param file File object
1491af698321SVladimir Kotal      * @param path path of the file argument relative to source root (with leading slash)
1492af698321SVladimir Kotal      * @throws IOException on error
1493af698321SVladimir Kotal      */
processFileIncremental(IndexDownArgs args, File file, String path)1494af698321SVladimir Kotal     private void processFileIncremental(IndexDownArgs args, File file, String path) throws IOException {
1495af698321SVladimir Kotal         if (uidIter != null) {
1496*b0a8246bSVladimir Kotal             path = Util.fixPathIfWindows(path);
1497af698321SVladimir Kotal             // Traverse terms until reaching one that matches the path of given file.
1498af698321SVladimir Kotal             while (uidIter != null && uidIter.term() != null
1499af698321SVladimir Kotal                     && uidIter.term().compareTo(emptyBR) != 0
1500af698321SVladimir Kotal                     && Util.uid2url(uidIter.term().utf8ToString()).compareTo(path) < 0) {
1501af698321SVladimir Kotal 
1502af698321SVladimir Kotal                 // A file that was not changed.
1503af698321SVladimir Kotal                 /*
1504af698321SVladimir Kotal                  * Possibly short-circuit to force reindexing of prior-version indexes.
1505af698321SVladimir Kotal                  */
1506af698321SVladimir Kotal                 String termPath = Util.uid2url(uidIter.term().utf8ToString());
1507af698321SVladimir Kotal                 File termFile = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), termPath);
1508af698321SVladimir Kotal                 boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) &&
1509af698321SVladimir Kotal                         checkSettings(termFile, termPath);
1510af698321SVladimir Kotal                 if (!matchOK) {
1511af698321SVladimir Kotal                     removeFile(false);
1512af698321SVladimir Kotal 
1513af698321SVladimir Kotal                     args.curCount++;
1514af698321SVladimir Kotal                     args.works.add(new IndexFileWork(termFile, termPath));
1515af698321SVladimir Kotal                 }
1516af698321SVladimir Kotal 
1517af698321SVladimir Kotal                 BytesRef next = uidIter.next();
1518af698321SVladimir Kotal                 if (next == null) {
1519af698321SVladimir Kotal                     uidIter = null;
1520af698321SVladimir Kotal                 }
1521af698321SVladimir Kotal             }
1522af698321SVladimir Kotal 
1523af698321SVladimir Kotal             if (uidIter != null && uidIter.term() != null
1524af698321SVladimir Kotal                     && Util.uid2url(uidIter.term().utf8ToString()).equals(path)) {
1525af698321SVladimir Kotal                 /*
1526af698321SVladimir Kotal                  * At this point we know that the file has corresponding term in the index
1527af698321SVladimir Kotal                  * and has changed in some way. Either it was deleted or it was changed.
1528af698321SVladimir Kotal                  */
1529af698321SVladimir Kotal                 if (!file.exists()) {
1530af698321SVladimir Kotal                     removeFile(true);
1531af698321SVladimir Kotal                 } else {
1532af698321SVladimir Kotal                     removeFile(false);
1533af698321SVladimir Kotal 
1534af698321SVladimir Kotal                     args.curCount++;
1535af698321SVladimir Kotal                     args.works.add(new IndexFileWork(file, path));
1536af698321SVladimir Kotal                 }
1537af698321SVladimir Kotal 
1538af698321SVladimir Kotal                 BytesRef next = uidIter.next();
1539af698321SVladimir Kotal                 if (next == null) {
1540af698321SVladimir Kotal                     uidIter = null;
1541af698321SVladimir Kotal                 }
1542af698321SVladimir Kotal             } else {
1543af698321SVladimir Kotal                 // Potentially new file. A file might be added and then deleted,
1544af698321SVladimir Kotal                 // so it is necessary to check its existence.
1545af698321SVladimir Kotal                 if (file.exists()) {
1546af698321SVladimir Kotal                     args.curCount++;
1547af698321SVladimir Kotal                     args.works.add(new IndexFileWork(file, path));
1548af698321SVladimir Kotal                 }
1549af698321SVladimir Kotal             }
15502d8cba21SVladimir Kotal         } else {
15512d8cba21SVladimir Kotal             if (file.exists()) {
15522d8cba21SVladimir Kotal                 args.curCount++;
15532d8cba21SVladimir Kotal                 args.works.add(new IndexFileWork(file, path));
1554af698321SVladimir Kotal             }
15552d8cba21SVladimir Kotal         }
1556af698321SVladimir Kotal     }
1557af698321SVladimir Kotal 
1558af698321SVladimir Kotal     /**
1559af698321SVladimir Kotal      * Process a file on disk w.r.t. index.
1560af698321SVladimir Kotal      * @param args {@link IndexDownArgs} instance
1561af698321SVladimir Kotal      * @param file File object
1562af698321SVladimir Kotal      * @param path path corresponding to the file parameter, relative to source root (with leading slash)
1563af698321SVladimir Kotal      * @throws IOException on error
1564af698321SVladimir Kotal      */
processFile(IndexDownArgs args, File file, String path)1565af698321SVladimir Kotal     private void processFile(IndexDownArgs args, File file, String path) throws IOException {
1566b5840353SAdam Hornáček         if (uidIter != null) {
1567807ead8fSLubos Kosco             path = Util.fixPathIfWindows(path);
1568b5840353SAdam Hornáček             String uid = Util.path2uid(path,
1569b5840353SAdam Hornáček                 DateTools.timeToString(file.lastModified(),
1570b5840353SAdam Hornáček                 DateTools.Resolution.MILLISECOND)); // construct uid for doc
1571b5840353SAdam Hornáček             BytesRef buid = new BytesRef(uid);
1572b8ad1421SVladimir Kotal             // Traverse terms that have smaller UID than the current file,
1573b8ad1421SVladimir Kotal             // i.e. given the ordering they positioned before the file,
1574b5840353SAdam Hornáček             // or it is the file that has been modified.
1575b5840353SAdam Hornáček             while (uidIter != null && uidIter.term() != null
1576b5840353SAdam Hornáček                     && uidIter.term().compareTo(emptyBR) != 0
1577b5840353SAdam Hornáček                     && uidIter.term().compareTo(buid) < 0) {
1578b5840353SAdam Hornáček 
1579b5840353SAdam Hornáček                 // If the term's path matches path of currently processed file,
1580b5840353SAdam Hornáček                 // it is clear that the file has been modified and thus
1581097be244SVladimir Kotal                 // removeFile() will be followed by call to addFile() in indexParallel().
1582b5840353SAdam Hornáček                 // In such case, instruct removeFile() not to remove history
1583b5840353SAdam Hornáček                 // cache for the file so that incremental history cache
1584b5840353SAdam Hornáček                 // generation works.
1585b5840353SAdam Hornáček                 String termPath = Util.uid2url(uidIter.term().utf8ToString());
1586af698321SVladimir Kotal                 removeFile(!termPath.equals(path));
1587b5840353SAdam Hornáček 
1588b5840353SAdam Hornáček                 BytesRef next = uidIter.next();
1589b5840353SAdam Hornáček                 if (next == null) {
1590b5840353SAdam Hornáček                     uidIter = null;
1591b5840353SAdam Hornáček                 }
1592b5840353SAdam Hornáček             }
1593b5840353SAdam Hornáček 
1594c9982635SVladimir Kotal             // If the file was not modified, probably skip to the next one.
1595b8ad1421SVladimir Kotal             if (uidIter != null && uidIter.term() != null && uidIter.term().bytesEquals(buid)) {
1596c9982635SVladimir Kotal 
159741351de3SChris Fraire                 /*
159841351de3SChris Fraire                  * Possibly short-circuit to force reindexing of prior-version indexes.
159941351de3SChris Fraire                  */
160041351de3SChris Fraire                 boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) &&
160141351de3SChris Fraire                         checkSettings(file, path);
160241351de3SChris Fraire                 if (!matchOK) {
1603af698321SVladimir Kotal                     removeFile(false);
1604b5840353SAdam Hornáček                 }
1605b5840353SAdam Hornáček 
1606b5840353SAdam Hornáček                 BytesRef next = uidIter.next();
1607b5840353SAdam Hornáček                 if (next == null) {
1608b5840353SAdam Hornáček                     uidIter = null;
1609b5840353SAdam Hornáček                 }
1610b5840353SAdam Hornáček 
161141351de3SChris Fraire                 if (matchOK) {
1612af698321SVladimir Kotal                     return;
1613b5840353SAdam Hornáček                 }
1614b5840353SAdam Hornáček             }
1615b5840353SAdam Hornáček         }
1616b5840353SAdam Hornáček 
1617b8ad1421SVladimir Kotal         args.curCount++;
1618b5840353SAdam Hornáček         args.works.add(new IndexFileWork(file, path));
1619b5840353SAdam Hornáček     }
1620b5840353SAdam Hornáček 
1621b5840353SAdam Hornáček     /**
1622b5840353SAdam Hornáček      * Executes the second, parallel stage of indexing.
162330bba29fSChris Fraire      * @param dir the parent directory (when appended to SOURCE_ROOT)
1624b8ad1421SVladimir Kotal      * @param args contains a list of files to index, found during the earlier stage
1625b5840353SAdam Hornáček      */
indexParallel(String dir, IndexDownArgs args)162630bba29fSChris Fraire     private void indexParallel(String dir, IndexDownArgs args) {
1627b5840353SAdam Hornáček 
1628b5840353SAdam Hornáček         int worksCount = args.works.size();
1629a72324b1SAdam Hornáček         if (worksCount < 1) {
1630a72324b1SAdam Hornáček             return;
1631a72324b1SAdam Hornáček         }
1632b5840353SAdam Hornáček 
1633b5840353SAdam Hornáček         AtomicInteger successCounter = new AtomicInteger();
1634b5840353SAdam Hornáček         AtomicInteger currentCounter = new AtomicInteger();
1635b5840353SAdam Hornáček         AtomicInteger alreadyClosedCounter = new AtomicInteger();
1636e92cec67SVladimir Kotal         IndexerParallelizer parallelizer = RuntimeEnvironment.getInstance().getIndexerParallelizer();
1637b5840353SAdam Hornáček         ObjectPool<Ctags> ctagsPool = parallelizer.getCtagsPool();
1638b5840353SAdam Hornáček 
1639b5840353SAdam Hornáček         Map<Boolean, List<IndexFileWork>> bySuccess = null;
16401e75da15SVladimir Kotal         try (Progress progress = new Progress(LOGGER, dir, worksCount)) {
1641b5840353SAdam Hornáček             bySuccess = parallelizer.getForkJoinPool().submit(() ->
1642b5840353SAdam Hornáček                 args.works.parallelStream().collect(
1643b5840353SAdam Hornáček                 Collectors.groupingByConcurrent((x) -> {
1644b5840353SAdam Hornáček                     int tries = 0;
1645b5840353SAdam Hornáček                     Ctags pctags = null;
1646b5840353SAdam Hornáček                     boolean ret;
164757509b90SVladimir Kotal                     Statistics stats = new Statistics();
1648b5840353SAdam Hornáček                     while (true) {
1649b5840353SAdam Hornáček                         try {
1650b5840353SAdam Hornáček                             if (alreadyClosedCounter.get() > 0) {
1651b5840353SAdam Hornáček                                 ret = false;
1652b5840353SAdam Hornáček                             } else {
1653b5840353SAdam Hornáček                                 pctags = ctagsPool.get();
1654b5840353SAdam Hornáček                                 addFile(x.file, x.path, pctags);
1655b5840353SAdam Hornáček                                 successCounter.incrementAndGet();
1656b5840353SAdam Hornáček                                 ret = true;
1657b5840353SAdam Hornáček                             }
1658b5840353SAdam Hornáček                         } catch (AlreadyClosedException e) {
1659b5840353SAdam Hornáček                             alreadyClosedCounter.incrementAndGet();
1660e92cec67SVladimir Kotal                             String errmsg = String.format("ERROR addFile(): %s", x.file);
1661b5840353SAdam Hornáček                             LOGGER.log(Level.SEVERE, errmsg, e);
1662b5840353SAdam Hornáček                             x.exception = e;
1663b5840353SAdam Hornáček                             ret = false;
1664b5840353SAdam Hornáček                         } catch (InterruptedException e) {
1665b5840353SAdam Hornáček                             // Allow one retry if interrupted
1666a72324b1SAdam Hornáček                             if (++tries <= 1) {
1667a72324b1SAdam Hornáček                                 continue;
1668a72324b1SAdam Hornáček                             }
1669b5840353SAdam Hornáček                             LOGGER.log(Level.WARNING, "No retry: {0}", x.file);
1670b5840353SAdam Hornáček                             x.exception = e;
1671b5840353SAdam Hornáček                             ret = false;
1672b5840353SAdam Hornáček                         } catch (RuntimeException | IOException e) {
1673e92cec67SVladimir Kotal                             String errmsg = String.format("ERROR addFile(): %s", x.file);
1674b5840353SAdam Hornáček                             LOGGER.log(Level.WARNING, errmsg, e);
1675b5840353SAdam Hornáček                             x.exception = e;
1676b5840353SAdam Hornáček                             ret = false;
1677b5840353SAdam Hornáček                         } finally {
1678b5840353SAdam Hornáček                             if (pctags != null) {
1679b5840353SAdam Hornáček                                 pctags.reset();
1680b5840353SAdam Hornáček                                 ctagsPool.release(pctags);
1681b5840353SAdam Hornáček                             }
1682b5840353SAdam Hornáček                         }
1683b5840353SAdam Hornáček 
16841e75da15SVladimir Kotal                         progress.increment();
168557509b90SVladimir Kotal                         stats.report(LOGGER, Level.FINEST,
168657509b90SVladimir Kotal                                 String.format("file ''%s'' %s", x.file, ret ? "indexed" : "failed indexing"));
1687b5840353SAdam Hornáček                         return ret;
1688b5840353SAdam Hornáček                     }
1689b5840353SAdam Hornáček                 }))).get();
1690b5840353SAdam Hornáček         } catch (InterruptedException | ExecutionException e) {
1691b5840353SAdam Hornáček             int successCount = successCounter.intValue();
1692b5840353SAdam Hornáček             double successPct = 100.0 * successCount / worksCount;
1693e92cec67SVladimir Kotal             String exmsg = String.format("%d successes (%.1f%%) after aborting parallel-indexing",
1694b5840353SAdam Hornáček                 successCount, successPct);
1695b5840353SAdam Hornáček             LOGGER.log(Level.SEVERE, exmsg, e);
1696b5840353SAdam Hornáček         }
1697b5840353SAdam Hornáček 
16981665873bSVladimir Kotal         args.curCount = currentCounter.intValue();
1699b5840353SAdam Hornáček 
1700b5840353SAdam Hornáček         // Start with failureCount=worksCount, and then subtract successes.
1701b5840353SAdam Hornáček         int failureCount = worksCount;
1702b5840353SAdam Hornáček         if (bySuccess != null) {
1703e92cec67SVladimir Kotal             List<IndexFileWork> successes = bySuccess.getOrDefault(Boolean.TRUE, null);
1704a72324b1SAdam Hornáček             if (successes != null) {
1705a72324b1SAdam Hornáček                 failureCount -= successes.size();
1706a72324b1SAdam Hornáček             }
1707b5840353SAdam Hornáček         }
1708b5840353SAdam Hornáček         if (failureCount > 0) {
1709b5840353SAdam Hornáček             double pctFailed = 100.0 * failureCount / worksCount;
1710e92cec67SVladimir Kotal             String exmsg = String.format("%d failures (%.1f%%) while parallel-indexing", failureCount, pctFailed);
1711b5840353SAdam Hornáček             LOGGER.log(Level.WARNING, exmsg);
1712b5840353SAdam Hornáček         }
1713b5840353SAdam Hornáček 
1714e92cec67SVladimir Kotal         /*
1715b5840353SAdam Hornáček          * Encountering an AlreadyClosedException is severe enough to abort the
1716b5840353SAdam Hornáček          * run, since it will fail anyway later upon trying to commit().
1717b5840353SAdam Hornáček          */
1718b5840353SAdam Hornáček         int numAlreadyClosed = alreadyClosedCounter.get();
1719b5840353SAdam Hornáček         if (numAlreadyClosed > 0) {
1720e92cec67SVladimir Kotal             throw new AlreadyClosedException(String.format("count=%d", numAlreadyClosed));
1721b5840353SAdam Hornáček         }
1722b5840353SAdam Hornáček     }
1723b5840353SAdam Hornáček 
isInterrupted()1724b5840353SAdam Hornáček     private boolean isInterrupted() {
1725b5840353SAdam Hornáček         synchronized (lock) {
1726b5840353SAdam Hornáček             return interrupted;
1727b5840353SAdam Hornáček         }
1728b5840353SAdam Hornáček     }
1729b5840353SAdam Hornáček 
1730b5840353SAdam Hornáček     /**
1731b5840353SAdam Hornáček      * Register an object to receive events when modifications is done to the
1732b5840353SAdam Hornáček      * index database.
1733b5840353SAdam Hornáček      *
1734b5840353SAdam Hornáček      * @param listener the object to receive the events
1735b5840353SAdam Hornáček      */
addIndexChangedListener(IndexChangedListener listener)1736b5840353SAdam Hornáček     public void addIndexChangedListener(IndexChangedListener listener) {
1737d7648fccSVladimir Kotal         if (listener != null) {
1738b5840353SAdam Hornáček             listeners.add(listener);
1739b5840353SAdam Hornáček         }
1740d7648fccSVladimir Kotal     }
1741b5840353SAdam Hornáček 
1742b5840353SAdam Hornáček     /**
17437516a8e8SVladimir Kotal      * Get all files in some of the index databases.
1744b5840353SAdam Hornáček      *
17457516a8e8SVladimir Kotal      * @param subFiles Subdirectories of various projects or null or an empty list to get everything
1746b5840353SAdam Hornáček      * @throws IOException if an error occurs
1747b5840353SAdam Hornáček      * @return set of files in the index databases specified by the subFiles parameter
1748b5840353SAdam Hornáček      */
getAllFiles(List<String> subFiles)1749b5840353SAdam Hornáček     public static Set<String> getAllFiles(List<String> subFiles) throws IOException {
1750b5840353SAdam Hornáček         Set<String> files = new HashSet<>();
1751b5840353SAdam Hornáček         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1752b5840353SAdam Hornáček 
1753b5840353SAdam Hornáček         if (env.hasProjects()) {
1754b5840353SAdam Hornáček             if (subFiles == null || subFiles.isEmpty()) {
1755b5840353SAdam Hornáček                 for (Project project : env.getProjectList()) {
1756b5840353SAdam Hornáček                     IndexDatabase db = new IndexDatabase(project);
1757b5840353SAdam Hornáček                     files.addAll(db.getFiles());
1758b5840353SAdam Hornáček                 }
1759b5840353SAdam Hornáček             } else {
1760b5840353SAdam Hornáček                 for (String path : subFiles) {
1761b5840353SAdam Hornáček                     Project project = Project.getProject(path);
1762b5840353SAdam Hornáček                     if (project == null) {
1763b5840353SAdam Hornáček                         LOGGER.log(Level.WARNING, "Could not find a project for \"{0}\"", path);
1764b5840353SAdam Hornáček                     } else {
1765b5840353SAdam Hornáček                         IndexDatabase db = new IndexDatabase(project);
1766b5840353SAdam Hornáček                         files.addAll(db.getFiles());
1767b5840353SAdam Hornáček                     }
1768b5840353SAdam Hornáček                 }
1769b5840353SAdam Hornáček             }
1770b5840353SAdam Hornáček         } else {
1771b5840353SAdam Hornáček             IndexDatabase db = new IndexDatabase();
1772b5840353SAdam Hornáček             files = db.getFiles();
1773b5840353SAdam Hornáček         }
1774b5840353SAdam Hornáček 
1775b5840353SAdam Hornáček         return files;
1776b5840353SAdam Hornáček     }
1777b5840353SAdam Hornáček 
1778b5840353SAdam Hornáček     /**
1779b5840353SAdam Hornáček      * Get all files in this index database.
1780b5840353SAdam Hornáček      *
1781b5840353SAdam Hornáček      * @throws IOException If an IO error occurs while reading from the database
1782b5840353SAdam Hornáček      * @return set of files in this index database
1783b5840353SAdam Hornáček      */
getFiles()1784b5840353SAdam Hornáček     public Set<String> getFiles() throws IOException {
1785b5840353SAdam Hornáček         IndexReader ireader = null;
1786b5840353SAdam Hornáček         TermsEnum iter = null;
1787b5840353SAdam Hornáček         Terms terms;
1788b5840353SAdam Hornáček         Set<String> files = new HashSet<>();
1789b5840353SAdam Hornáček 
1790b5840353SAdam Hornáček         try {
1791b5840353SAdam Hornáček             ireader = DirectoryReader.open(indexDirectory); // open existing index
179241351de3SChris Fraire             if (ireader.numDocs() > 0) {
17934cf88309SLubos Kosco                 terms = MultiTerms.getTerms(ireader, QueryBuilder.U);
1794b5840353SAdam Hornáček                 iter = terms.iterator(); // init uid iterator
1795b5840353SAdam Hornáček             }
1796b5840353SAdam Hornáček             while (iter != null && iter.term() != null) {
17977516a8e8SVladimir Kotal                 String value = iter.term().utf8ToString();
17987516a8e8SVladimir Kotal                 if (value.isEmpty()) {
17997516a8e8SVladimir Kotal                     iter.next();
18007516a8e8SVladimir Kotal                     continue;
18017516a8e8SVladimir Kotal                 }
18027516a8e8SVladimir Kotal 
18037516a8e8SVladimir Kotal                 files.add(Util.uid2url(value));
1804b5840353SAdam Hornáček                 BytesRef next = iter.next();
1805b5840353SAdam Hornáček                 if (next == null) {
1806b5840353SAdam Hornáček                     iter = null;
1807b5840353SAdam Hornáček                 }
1808b5840353SAdam Hornáček             }
1809b5840353SAdam Hornáček         } finally {
1810b5840353SAdam Hornáček             if (ireader != null) {
1811b5840353SAdam Hornáček                 try {
1812b5840353SAdam Hornáček                     ireader.close();
1813b5840353SAdam Hornáček                 } catch (IOException e) {
1814b5840353SAdam Hornáček                     LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e);
1815b5840353SAdam Hornáček                 }
1816b5840353SAdam Hornáček             }
1817b5840353SAdam Hornáček         }
1818b5840353SAdam Hornáček 
1819b5840353SAdam Hornáček         return files;
1820b5840353SAdam Hornáček     }
1821b5840353SAdam Hornáček 
1822b5840353SAdam Hornáček     /**
1823b5840353SAdam Hornáček      * Get number of documents in this index database.
1824b5840353SAdam Hornáček      * @return number of documents
1825b5840353SAdam Hornáček      * @throws IOException if I/O exception occurred
1826b5840353SAdam Hornáček      */
getNumFiles()1827b5840353SAdam Hornáček     public int getNumFiles() throws IOException {
1828b5840353SAdam Hornáček         IndexReader ireader = null;
1829b5840353SAdam Hornáček         try {
1830b5840353SAdam Hornáček             ireader = DirectoryReader.open(indexDirectory); // open existing index
183141351de3SChris Fraire             return ireader.numDocs();
1832b5840353SAdam Hornáček         } finally {
1833b5840353SAdam Hornáček             if (ireader != null) {
1834b5840353SAdam Hornáček                 try {
1835b5840353SAdam Hornáček                     ireader.close();
1836b5840353SAdam Hornáček                 } catch (IOException e) {
1837b5840353SAdam Hornáček                     LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e);
1838b5840353SAdam Hornáček                 }
1839b5840353SAdam Hornáček             }
1840b5840353SAdam Hornáček         }
1841b5840353SAdam Hornáček     }
1842b5840353SAdam Hornáček 
listFrequentTokens(List<String> subFiles)1843b5840353SAdam Hornáček     static void listFrequentTokens(List<String> subFiles) throws IOException {
1844b5840353SAdam Hornáček         final int limit = 4;
1845b5840353SAdam Hornáček 
1846b5840353SAdam Hornáček         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1847b5840353SAdam Hornáček         if (env.hasProjects()) {
1848b5840353SAdam Hornáček             if (subFiles == null || subFiles.isEmpty()) {
1849b5840353SAdam Hornáček                 for (Project project : env.getProjectList()) {
1850b5840353SAdam Hornáček                     IndexDatabase db = new IndexDatabase(project);
1851b5840353SAdam Hornáček                     db.listTokens(limit);
1852b5840353SAdam Hornáček                 }
1853b5840353SAdam Hornáček             } else {
1854b5840353SAdam Hornáček                 for (String path : subFiles) {
1855b5840353SAdam Hornáček                     Project project = Project.getProject(path);
1856b5840353SAdam Hornáček                     if (project == null) {
1857b5840353SAdam Hornáček                         LOGGER.log(Level.WARNING, "Could not find a project for \"{0}\"", path);
1858b5840353SAdam Hornáček                     } else {
1859b5840353SAdam Hornáček                         IndexDatabase db = new IndexDatabase(project);
1860b5840353SAdam Hornáček                         db.listTokens(limit);
1861b5840353SAdam Hornáček                     }
1862b5840353SAdam Hornáček                 }
1863b5840353SAdam Hornáček             }
1864b5840353SAdam Hornáček         } else {
1865b5840353SAdam Hornáček             IndexDatabase db = new IndexDatabase();
1866b5840353SAdam Hornáček             db.listTokens(limit);
1867b5840353SAdam Hornáček         }
1868b5840353SAdam Hornáček     }
1869b5840353SAdam Hornáček 
listTokens(int freq)1870b5840353SAdam Hornáček     public void listTokens(int freq) throws IOException {
1871b5840353SAdam Hornáček         IndexReader ireader = null;
1872b5840353SAdam Hornáček         TermsEnum iter = null;
1873b5840353SAdam Hornáček         Terms terms;
1874b5840353SAdam Hornáček 
1875b5840353SAdam Hornáček         try {
1876b5840353SAdam Hornáček             ireader = DirectoryReader.open(indexDirectory);
187741351de3SChris Fraire             if (ireader.numDocs() > 0) {
18784cf88309SLubos Kosco                 terms = MultiTerms.getTerms(ireader, QueryBuilder.DEFS);
1879b5840353SAdam Hornáček                 iter = terms.iterator(); // init uid iterator
1880b5840353SAdam Hornáček             }
1881b5840353SAdam Hornáček             while (iter != null && iter.term() != null) {
1882b5840353SAdam Hornáček                 if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) {
1883b5840353SAdam Hornáček                     LOGGER.warning(iter.term().utf8ToString());
1884b5840353SAdam Hornáček                 }
1885b5840353SAdam Hornáček                 BytesRef next = iter.next();
1886ff44f24aSAdam Hornáček                 if (next == null) {
1887ff44f24aSAdam Hornáček                     iter = null;
1888ff44f24aSAdam Hornáček                 }
1889b5840353SAdam Hornáček             }
1890b5840353SAdam Hornáček         } finally {
1891b5840353SAdam Hornáček 
1892b5840353SAdam Hornáček             if (ireader != null) {
1893b5840353SAdam Hornáček                 try {
1894b5840353SAdam Hornáček                     ireader.close();
1895b5840353SAdam Hornáček                 } catch (IOException e) {
1896b5840353SAdam Hornáček                     LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e);
1897b5840353SAdam Hornáček                 }
1898b5840353SAdam Hornáček             }
1899b5840353SAdam Hornáček         }
1900b5840353SAdam Hornáček     }
1901b5840353SAdam Hornáček 
1902b5840353SAdam Hornáček     /**
1903ff44f24aSAdam Hornáček      * Get an indexReader for the Index database where a given file.
1904b5840353SAdam Hornáček      *
1905b5840353SAdam Hornáček      * @param path the file to get the database for
1906b5840353SAdam Hornáček      * @return The index database where the file should be located or null if it
1907b5840353SAdam Hornáček      * cannot be located.
1908b5840353SAdam Hornáček      */
1909523d6b7bSVladimir Kotal     @SuppressWarnings("java:S2095")
getIndexReader(String path)1910b5840353SAdam Hornáček     public static IndexReader getIndexReader(String path) {
1911b5840353SAdam Hornáček         IndexReader ret = null;
1912b5840353SAdam Hornáček 
1913b5840353SAdam Hornáček         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1914b5840353SAdam Hornáček         File indexDir = new File(env.getDataRootFile(), INDEX_DIR);
1915b5840353SAdam Hornáček 
1916b5840353SAdam Hornáček         if (env.hasProjects()) {
1917b5840353SAdam Hornáček             Project p = Project.getProject(path);
1918b5840353SAdam Hornáček             if (p == null) {
1919b5840353SAdam Hornáček                 return null;
1920b5840353SAdam Hornáček             }
1921b5840353SAdam Hornáček             indexDir = new File(indexDir, p.getPath());
1922b5840353SAdam Hornáček         }
1923b5840353SAdam Hornáček         try {
1924b5840353SAdam Hornáček             FSDirectory fdir = FSDirectory.open(indexDir.toPath(), NoLockFactory.INSTANCE);
1925b5840353SAdam Hornáček             if (indexDir.exists() && DirectoryReader.indexExists(fdir)) {
1926b5840353SAdam Hornáček                 ret = DirectoryReader.open(fdir);
1927b5840353SAdam Hornáček             }
1928b5840353SAdam Hornáček         } catch (Exception ex) {
1929b5840353SAdam Hornáček             LOGGER.log(Level.SEVERE, "Failed to open index: {0}", indexDir.getAbsolutePath());
1930b5840353SAdam Hornáček             LOGGER.log(Level.FINE, "Stack Trace: ", ex);
1931b5840353SAdam Hornáček         }
1932b5840353SAdam Hornáček         return ret;
1933b5840353SAdam Hornáček     }
1934b5840353SAdam Hornáček 
1935b5840353SAdam Hornáček     /**
1936b5840353SAdam Hornáček      * Get the latest definitions for a file from the index.
1937b5840353SAdam Hornáček      *
1938b5840353SAdam Hornáček      * @param file the file whose definitions to find
1939b5840353SAdam Hornáček      * @return definitions for the file, or {@code null} if they could not be
1940b5840353SAdam Hornáček      * found
1941b5840353SAdam Hornáček      * @throws IOException if an error happens when accessing the index
1942b5840353SAdam Hornáček      * @throws ParseException if an error happens when building the Lucene query
1943b5840353SAdam Hornáček      * @throws ClassNotFoundException if the class for the stored definitions
1944b5840353SAdam Hornáček      * instance cannot be found
1945b5840353SAdam Hornáček      */
getDefinitions(File file)1946d7648fccSVladimir Kotal     public static Definitions getDefinitions(File file) throws ParseException, IOException, ClassNotFoundException {
1947d7648fccSVladimir Kotal         Document doc = getDocument(file);
1948d7648fccSVladimir Kotal         if (doc == null) {
1949d7648fccSVladimir Kotal             return null;
1950d7648fccSVladimir Kotal         }
1951d7648fccSVladimir Kotal 
1952d7648fccSVladimir Kotal         IndexableField tags = doc.getField(QueryBuilder.TAGS);
1953d7648fccSVladimir Kotal         if (tags != null) {
1954d7648fccSVladimir Kotal             return Definitions.deserialize(tags.binaryValue().bytes);
1955d7648fccSVladimir Kotal         }
1956d7648fccSVladimir Kotal 
1957d7648fccSVladimir Kotal         // Didn't find any definitions.
1958d7648fccSVladimir Kotal         return null;
1959d7648fccSVladimir Kotal     }
1960d7648fccSVladimir Kotal 
1961d7648fccSVladimir Kotal     /**
196232b4cd63SVladimir Kotal      * @param file File object for a file under source root
1963d7648fccSVladimir Kotal      * @return Document object for the file or {@code null}
1964f044bd7fSVladimir Kotal      * @throws IOException on I/O error
1965f044bd7fSVladimir Kotal      * @throws ParseException on problem with building Query
1966d7648fccSVladimir Kotal      */
getDocument(File file)1967f044bd7fSVladimir Kotal     public static Document getDocument(File file) throws IOException, ParseException {
1968b5840353SAdam Hornáček         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1969b5840353SAdam Hornáček         String path;
1970b5840353SAdam Hornáček         try {
1971b5840353SAdam Hornáček             path = env.getPathRelativeToSourceRoot(file);
1972b5840353SAdam Hornáček         } catch (ForbiddenSymlinkException e) {
1973b5840353SAdam Hornáček             LOGGER.log(Level.FINER, e.getMessage());
1974b5840353SAdam Hornáček             return null;
1975b5840353SAdam Hornáček         }
1976b2315481SVladimir Kotal         // Sanitize Windows path delimiters in order not to conflict with Lucene escape character.
1977b5840353SAdam Hornáček         path = path.replace("\\", "/");
1978b5840353SAdam Hornáček 
19791665873bSVladimir Kotal         try (IndexReader indexReader = getIndexReader(path)) {
19801665873bSVladimir Kotal             return getDocument(path, indexReader);
19811665873bSVladimir Kotal         }
19821665873bSVladimir Kotal     }
19831665873bSVladimir Kotal 
19841665873bSVladimir Kotal     @Nullable
getDocument(String path, IndexReader indexReader)19851665873bSVladimir Kotal     private static Document getDocument(String path, IndexReader indexReader) throws ParseException, IOException {
19861665873bSVladimir Kotal         if (indexReader == null) {
1987d7648fccSVladimir Kotal             // No index, no document..
1988b5840353SAdam Hornáček             return null;
1989b5840353SAdam Hornáček         }
1990b5840353SAdam Hornáček 
1991d7648fccSVladimir Kotal         Document doc;
1992b5840353SAdam Hornáček         Query q = new QueryBuilder().setPath(path).build();
19931665873bSVladimir Kotal         IndexSearcher searcher = new IndexSearcher(indexReader);
199414a2bde4SVladimir Kotal         Statistics stat = new Statistics();
1995b5840353SAdam Hornáček         TopDocs top = searcher.search(q, 1);
19961665873bSVladimir Kotal         stat.report(LOGGER, Level.FINEST, "search via getDocument() done",
199714a2bde4SVladimir Kotal                 "search.latency", new String[]{"category", "getdocument",
199814a2bde4SVladimir Kotal                         "outcome", top.totalHits.value == 0 ? "empty" : "success"});
19994cf88309SLubos Kosco         if (top.totalHits.value == 0) {
2000d7648fccSVladimir Kotal             // No hits, no document...
2001b5840353SAdam Hornáček             return null;
2002b5840353SAdam Hornáček         }
2003d7648fccSVladimir Kotal         doc = searcher.doc(top.scoreDocs[0].doc);
2004b5840353SAdam Hornáček         String foundPath = doc.get(QueryBuilder.PATH);
2005b5840353SAdam Hornáček 
2006d7648fccSVladimir Kotal         // Only use the document if we found an exact match.
2007d7648fccSVladimir Kotal         if (!path.equals(foundPath)) {
2008d7648fccSVladimir Kotal             return null;
2009b5840353SAdam Hornáček         }
2010d7648fccSVladimir Kotal 
2011d7648fccSVladimir Kotal         return doc;
2012b5840353SAdam Hornáček     }
2013b5840353SAdam Hornáček 
2014b5840353SAdam Hornáček     @Override
equals(Object o)2015b13c5a0eSAdam Hornacek     public boolean equals(Object o) {
2016b13c5a0eSAdam Hornacek         if (this == o) {
2017b5840353SAdam Hornáček             return true;
2018b5840353SAdam Hornáček         }
2019b13c5a0eSAdam Hornacek         if (o == null || getClass() != o.getClass()) {
2020b13c5a0eSAdam Hornacek             return false;
2021b13c5a0eSAdam Hornacek         }
2022b13c5a0eSAdam Hornacek         IndexDatabase that = (IndexDatabase) o;
2023b13c5a0eSAdam Hornacek         return Objects.equals(project, that.project);
2024b13c5a0eSAdam Hornacek     }
2025b5840353SAdam Hornáček 
2026b5840353SAdam Hornáček     @Override
hashCode()2027b5840353SAdam Hornáček     public int hashCode() {
2028b13c5a0eSAdam Hornacek         return Objects.hash(project);
2029b5840353SAdam Hornáček     }
2030b5840353SAdam Hornáček 
20319b95c3c4SVladimir Kotal     private static class CountingWriter extends Writer {
203240c74b99SVladimir Kotal         private long count;
20339b95c3c4SVladimir Kotal         private final Writer out;
203440c74b99SVladimir Kotal 
CountingWriter(Writer out)203540c74b99SVladimir Kotal         CountingWriter(Writer out) {
203640c74b99SVladimir Kotal             super(out);
203740c74b99SVladimir Kotal             this.out = out;
203840c74b99SVladimir Kotal         }
203940c74b99SVladimir Kotal 
204040c74b99SVladimir Kotal         @Override
write(@otNull char[] chars, int off, int len)204140c74b99SVladimir Kotal         public void write(@NotNull char[] chars, int off, int len) throws IOException {
204240c74b99SVladimir Kotal             out.write(chars, off, len);
204340c74b99SVladimir Kotal             count += len;
204440c74b99SVladimir Kotal         }
204540c74b99SVladimir Kotal 
204640c74b99SVladimir Kotal         @Override
flush()204740c74b99SVladimir Kotal         public void flush() throws IOException {
204840c74b99SVladimir Kotal             out.flush();
204940c74b99SVladimir Kotal         }
205040c74b99SVladimir Kotal 
205140c74b99SVladimir Kotal         @Override
close()205240c74b99SVladimir Kotal         public void close() throws IOException {
205340c74b99SVladimir Kotal             out.close();
205440c74b99SVladimir Kotal         }
205540c74b99SVladimir Kotal 
getCount()205640c74b99SVladimir Kotal         public long getCount() {
205740c74b99SVladimir Kotal             return count;
205840c74b99SVladimir Kotal         }
205940c74b99SVladimir Kotal     }
206040c74b99SVladimir Kotal 
getXrefPath(String path)206140c74b99SVladimir Kotal     private String getXrefPath(String path) {
206240c74b99SVladimir Kotal         boolean compressed = RuntimeEnvironment.getInstance().isCompressXref();
2063ee13dbaeSChris Fraire         File xrefFile = whatXrefFile(path, compressed);
2064b5840353SAdam Hornáček         File parentFile = xrefFile.getParentFile();
2065b5840353SAdam Hornáček 
2066b5840353SAdam Hornáček         // If mkdirs() returns false, the failure is most likely
2067b5840353SAdam Hornáček         // because the file already exists. But to check for the
2068b5840353SAdam Hornáček         // file first and only add it if it doesn't exists would
2069b5840353SAdam Hornáček         // only increase the file IO...
2070b5840353SAdam Hornáček         if (!parentFile.mkdirs()) {
2071b5840353SAdam Hornáček             assert parentFile.exists();
2072b5840353SAdam Hornáček         }
2073b5840353SAdam Hornáček 
2074b5840353SAdam Hornáček         // Write to a pending file for later renaming.
2075b5840353SAdam Hornáček         String xrefAbs = xrefFile.getAbsolutePath();
207640c74b99SVladimir Kotal         return xrefAbs;
2077b5840353SAdam Hornáček     }
2078b5840353SAdam Hornáček 
207940c74b99SVladimir Kotal     /**
208040c74b99SVladimir Kotal      * Get a writer to which the xref can be written, or null if no xref
208140c74b99SVladimir Kotal      * should be produced for files of this type.
208240c74b99SVladimir Kotal      */
newXrefWriter(String path, File transientXref, boolean compressed)208340c74b99SVladimir Kotal     private CountingWriter newXrefWriter(String path, File transientXref, boolean compressed) throws IOException {
208440c74b99SVladimir Kotal         return new CountingWriter(new BufferedWriter(new OutputStreamWriter(compressed ?
208540c74b99SVladimir Kotal                 new GZIPOutputStream(new FileOutputStream(transientXref)) :
208640c74b99SVladimir Kotal                 new FileOutputStream(transientXref))));
2087b5840353SAdam Hornáček     }
2088b5840353SAdam Hornáček 
pickLockFactory(RuntimeEnvironment env)2089b5840353SAdam Hornáček     LockFactory pickLockFactory(RuntimeEnvironment env) {
2090b5840353SAdam Hornáček         switch (env.getLuceneLocking()) {
2091b5840353SAdam Hornáček             case ON:
2092b5840353SAdam Hornáček             case SIMPLE:
2093b5840353SAdam Hornáček                 return SimpleFSLockFactory.INSTANCE;
2094b5840353SAdam Hornáček             case NATIVE:
2095b5840353SAdam Hornáček                 return NativeFSLockFactory.INSTANCE;
2096b5840353SAdam Hornáček             case OFF:
2097b5840353SAdam Hornáček             default:
2098b5840353SAdam Hornáček                 return NoLockFactory.INSTANCE;
2099b5840353SAdam Hornáček         }
2100b5840353SAdam Hornáček     }
2101b5840353SAdam Hornáček 
finishWriting()2102b5840353SAdam Hornáček     private void finishWriting() throws IOException {
2103b5840353SAdam Hornáček         boolean hasPendingCommit = false;
2104b5840353SAdam Hornáček         try {
2105b5840353SAdam Hornáček             writeAnalysisSettings();
2106b5840353SAdam Hornáček 
2107b8ad1421SVladimir Kotal             LOGGER.log(Level.FINE, "preparing to commit changes to Lucene index"); // TODO add info about which database
2108b5840353SAdam Hornáček             writer.prepareCommit();
2109b5840353SAdam Hornáček             hasPendingCommit = true;
2110b5840353SAdam Hornáček 
2111b5840353SAdam Hornáček             int n = completer.complete();
2112b8ad1421SVladimir Kotal             // TODO: add elapsed
2113b5840353SAdam Hornáček             LOGGER.log(Level.FINE, "completed {0} object(s)", n);
2114b5840353SAdam Hornáček 
2115b5840353SAdam Hornáček             // Just before commit(), reset the `hasPendingCommit' flag,
2116b5840353SAdam Hornáček             // since after commit() is called, there is no need for
2117b5840353SAdam Hornáček             // rollback() regardless of success.
2118b5840353SAdam Hornáček             hasPendingCommit = false;
2119b5840353SAdam Hornáček             writer.commit();
2120b5840353SAdam Hornáček         } catch (RuntimeException | IOException e) {
2121a72324b1SAdam Hornáček             if (hasPendingCommit) {
2122a72324b1SAdam Hornáček                 writer.rollback();
2123a72324b1SAdam Hornáček             }
2124b5840353SAdam Hornáček             LOGGER.log(Level.WARNING,
2125b5840353SAdam Hornáček                 "An error occurred while finishing writer and completer", e);
2126b5840353SAdam Hornáček             throw e;
2127b5840353SAdam Hornáček         }
2128b5840353SAdam Hornáček     }
2129b5840353SAdam Hornáček 
2130b5840353SAdam Hornáček     /**
2131b5840353SAdam Hornáček      * Verify TABSIZE, and evaluate AnalyzerGuru version together with ZVER --
2132b5840353SAdam Hornáček      * or return a value to indicate mismatch.
2133b5840353SAdam Hornáček      * @param file the source file object
2134b5840353SAdam Hornáček      * @param path the source file path
2135b5840353SAdam Hornáček      * @return {@code false} if a mismatch is detected
2136b5840353SAdam Hornáček      */
2137aa329234SVladimir Kotal     @VisibleForTesting
checkSettings(File file, String path)2138aa329234SVladimir Kotal     boolean checkSettings(File file, String path) throws IOException {
2139ee13dbaeSChris Fraire 
2140c9982635SVladimir Kotal         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
214173f75cddSVladimir Kotal         boolean outIsXrefWriter = false; // potential xref writer
2142b5840353SAdam Hornáček         int reqTabSize = project != null && project.hasTabSizeSetting() ?
2143b5840353SAdam Hornáček             project.getTabSize() : 0;
2144b5840353SAdam Hornáček         Integer actTabSize = settings.getTabSize();
2145b5840353SAdam Hornáček         if (actTabSize != null && !actTabSize.equals(reqTabSize)) {
2146b5840353SAdam Hornáček             LOGGER.log(Level.FINE, "Tabsize mismatch: {0}", path);
2147b5840353SAdam Hornáček             return false;
2148b5840353SAdam Hornáček         }
2149b5840353SAdam Hornáček 
2150b5840353SAdam Hornáček         int n = 0;
2151b5840353SAdam Hornáček         postsIter = uidIter.postings(postsIter);
2152b5840353SAdam Hornáček         while (postsIter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
2153b5840353SAdam Hornáček             ++n;
2154b5840353SAdam Hornáček             // Read a limited-fields version of the document.
2155b5840353SAdam Hornáček             Document doc = reader.document(postsIter.docID(), CHECK_FIELDS);
2156b5840353SAdam Hornáček             if (doc == null) {
2157b5840353SAdam Hornáček                 LOGGER.log(Level.FINER, "No Document: {0}", path);
2158b5840353SAdam Hornáček                 continue;
2159b5840353SAdam Hornáček             }
2160b5840353SAdam Hornáček 
2161b5840353SAdam Hornáček             long reqGuruVersion = AnalyzerGuru.getVersionNo();
2162b5840353SAdam Hornáček             Long actGuruVersion = settings.getAnalyzerGuruVersion();
2163c9982635SVladimir Kotal             /*
2164b5840353SAdam Hornáček              * For an older OpenGrok index that does not yet have a defined,
2165b5840353SAdam Hornáček              * stored analyzerGuruVersion, break so that no extra work is done.
2166b5840353SAdam Hornáček              * After a re-index, the guru version check will be active.
2167b5840353SAdam Hornáček              */
2168b5840353SAdam Hornáček             if (actGuruVersion == null) {
2169b5840353SAdam Hornáček                 break;
2170b5840353SAdam Hornáček             }
2171b5840353SAdam Hornáček 
217257eefa47SKryštof Tulinger             AbstractAnalyzer fa = null;
2173b5840353SAdam Hornáček             String fileTypeName;
2174b5840353SAdam Hornáček             if (actGuruVersion.equals(reqGuruVersion)) {
2175b5840353SAdam Hornáček                 fileTypeName = doc.get(QueryBuilder.TYPE);
2176b5840353SAdam Hornáček                 if (fileTypeName == null) {
2177b5840353SAdam Hornáček                     // (Should not get here, but break just in case.)
2178b5840353SAdam Hornáček                     LOGGER.log(Level.FINEST, "Missing TYPE field: {0}", path);
2179b5840353SAdam Hornáček                     break;
2180b5840353SAdam Hornáček                 }
2181ee13dbaeSChris Fraire 
2182b8ad1421SVladimir Kotal                 AnalyzerFactory fac = AnalyzerGuru.findByFileTypeName(fileTypeName);
2183ee13dbaeSChris Fraire                 if (fac != null) {
2184ee13dbaeSChris Fraire                     fa = fac.getAnalyzer();
2185ee13dbaeSChris Fraire                 }
2186b5840353SAdam Hornáček             } else {
2187c9982635SVladimir Kotal                 /*
2188b5840353SAdam Hornáček                  * If the stored guru version does not match, re-verify the
2189b5840353SAdam Hornáček                  * selection of analyzer or return a value to indicate the
2190b5840353SAdam Hornáček                  * analyzer is now mis-matched.
2191b5840353SAdam Hornáček                  */
2192b5840353SAdam Hornáček                 LOGGER.log(Level.FINER, "Guru version mismatch: {0}", path);
2193b5840353SAdam Hornáček 
2194ee13dbaeSChris Fraire                 fa = getAnalyzerFor(file, path);
2195b5840353SAdam Hornáček                 fileTypeName = fa.getFileTypeName();
2196b5840353SAdam Hornáček                 String oldTypeName = doc.get(QueryBuilder.TYPE);
2197b5840353SAdam Hornáček                 if (!fileTypeName.equals(oldTypeName)) {
2198b5840353SAdam Hornáček                     if (LOGGER.isLoggable(Level.FINE)) {
2199b5840353SAdam Hornáček                         LOGGER.log(Level.FINE, "Changed {0} to {1}: {2}",
2200b5840353SAdam Hornáček                             new Object[]{oldTypeName, fileTypeName, path});
2201b5840353SAdam Hornáček                     }
2202b5840353SAdam Hornáček                     return false;
2203b5840353SAdam Hornáček                 }
2204b5840353SAdam Hornáček             }
2205b5840353SAdam Hornáček 
2206b5840353SAdam Hornáček             // Verify Analyzer version, or return a value to indicate mismatch.
2207b5840353SAdam Hornáček             long reqVersion = AnalyzerGuru.getAnalyzerVersionNo(fileTypeName);
2208b5840353SAdam Hornáček             Long actVersion = settings.getAnalyzerVersion(fileTypeName);
2209b5840353SAdam Hornáček             if (actVersion == null || !actVersion.equals(reqVersion)) {
2210b5840353SAdam Hornáček                 if (LOGGER.isLoggable(Level.FINE)) {
2211b5840353SAdam Hornáček                     LOGGER.log(Level.FINE, "{0} version mismatch: {1}",
2212b5840353SAdam Hornáček                         new Object[]{fileTypeName, path});
2213b5840353SAdam Hornáček                 }
2214b5840353SAdam Hornáček                 return false;
2215b5840353SAdam Hornáček             }
2216b5840353SAdam Hornáček 
2217ee13dbaeSChris Fraire             if (fa != null) {
221873f75cddSVladimir Kotal                 outIsXrefWriter = true;
2219ee13dbaeSChris Fraire             }
2220ee13dbaeSChris Fraire 
2221b5840353SAdam Hornáček             // The versions checks have passed.
2222b5840353SAdam Hornáček             break;
2223b5840353SAdam Hornáček         }
2224b5840353SAdam Hornáček         if (n < 1) {
2225b5840353SAdam Hornáček             LOGGER.log(Level.FINER, "Missing index Documents: {0}", path);
2226b5840353SAdam Hornáček             return false;
2227b5840353SAdam Hornáček         }
2228b5840353SAdam Hornáček 
2229c9982635SVladimir Kotal         // If the economy mode is on, this should be treated as a match.
2230c9982635SVladimir Kotal         if (!env.isGenerateHtml()) {
2231c9982635SVladimir Kotal             if (xrefExistsFor(path)) {
2232c9982635SVladimir Kotal                 LOGGER.log(Level.FINEST, "Extraneous {0} , removing its xref file", path);
2233c9982635SVladimir Kotal                 removeXrefFile(path);
2234c9982635SVladimir Kotal             }
2235b5840353SAdam Hornáček             return true;
2236b5840353SAdam Hornáček         }
2237b5840353SAdam Hornáček 
2238c9982635SVladimir Kotal         return (!outIsXrefWriter || xrefExistsFor(path));
2239c9982635SVladimir Kotal     }
2240c9982635SVladimir Kotal 
writeAnalysisSettings()2241b5840353SAdam Hornáček     private void writeAnalysisSettings() throws IOException {
22427d004396SChris Fraire         settings = new IndexAnalysisSettings3();
2243b5840353SAdam Hornáček         settings.setProjectName(project != null ? project.getName() : null);
2244b5840353SAdam Hornáček         settings.setTabSize(project != null && project.hasTabSizeSetting() ?
2245b5840353SAdam Hornáček             project.getTabSize() : 0);
2246b5840353SAdam Hornáček         settings.setAnalyzerGuruVersion(AnalyzerGuru.getVersionNo());
2247b5840353SAdam Hornáček         settings.setAnalyzersVersions(AnalyzerGuru.getAnalyzersVersionNos());
22487d004396SChris Fraire         settings.setIndexedSymlinks(indexedSymlinks);
2249b5840353SAdam Hornáček 
2250b5840353SAdam Hornáček         IndexAnalysisSettingsAccessor dao = new IndexAnalysisSettingsAccessor();
2251b5840353SAdam Hornáček         dao.write(writer, settings);
2252b5840353SAdam Hornáček     }
2253b5840353SAdam Hornáček 
readAnalysisSettings()22547d004396SChris Fraire     private IndexAnalysisSettings3 readAnalysisSettings() throws IOException {
2255b5840353SAdam Hornáček         IndexAnalysisSettingsAccessor dao = new IndexAnalysisSettingsAccessor();
2256b5840353SAdam Hornáček         return dao.read(reader);
2257b5840353SAdam Hornáček     }
2258b5840353SAdam Hornáček 
xrefExistsFor(String path)2259c9982635SVladimir Kotal     private boolean xrefExistsFor(String path) {
2260ee13dbaeSChris Fraire         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
2261293ff322SChris Fraire         File xrefFile = whatXrefFile(path, env.isCompressXref());
2262293ff322SChris Fraire         if (!xrefFile.exists()) {
2263293ff322SChris Fraire             LOGGER.log(Level.FINEST, "Missing {0}", xrefFile);
2264c9982635SVladimir Kotal             return false;
2265ee13dbaeSChris Fraire         }
2266c9982635SVladimir Kotal 
2267c9982635SVladimir Kotal         return true;
2268ee13dbaeSChris Fraire     }
2269ee13dbaeSChris Fraire 
2270fbe755ccSChris Fraire     private static class AcceptSymlinkRet {
2271fbe755ccSChris Fraire         String localRelPath;
2272fbe755ccSChris Fraire     }
2273b5840353SAdam Hornáček }
2274