/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * See LICENSE.txt included in this distribution for the specific * language governing permissions and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at LICENSE.txt. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved. * Portions Copyright (c) 2017, 2020, Chris Fraire . */ package org.opengrok.indexer.index; import java.io.BufferedInputStream; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; import java.io.Writer; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.TreeMap; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; import java.util.zip.GZIPOutputStream; import jakarta.ws.rs.client.ClientBuilder; import jakarta.ws.rs.client.Entity; import jakarta.ws.rs.core.Response; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.MultiTerms; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockFactory; import org.apache.lucene.store.NativeFSLockFactory; import org.apache.lucene.store.NoLockFactory; import org.apache.lucene.store.SimpleFSLockFactory; import org.apache.lucene.util.BytesRef; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.jetbrains.annotations.VisibleForTesting; import org.opengrok.indexer.analysis.AbstractAnalyzer; import org.opengrok.indexer.analysis.AnalyzerFactory; import org.opengrok.indexer.analysis.AnalyzerGuru; import org.opengrok.indexer.analysis.Ctags; import org.opengrok.indexer.analysis.Definitions; import org.opengrok.indexer.analysis.NullableNumLinesLOC; import org.opengrok.indexer.analysis.NumLinesLOC; import org.opengrok.indexer.configuration.PathAccepter; import org.opengrok.indexer.configuration.Project; import org.opengrok.indexer.configuration.RuntimeEnvironment; import org.opengrok.indexer.history.FileCollector; import org.opengrok.indexer.history.HistoryGuru; import org.opengrok.indexer.history.Repository; import org.opengrok.indexer.history.RepositoryInfo; import org.opengrok.indexer.history.RepositoryWithHistoryTraversal; import org.opengrok.indexer.logger.LoggerFactory; import org.opengrok.indexer.search.QueryBuilder; import org.opengrok.indexer.util.ForbiddenSymlinkException; import org.opengrok.indexer.util.IOUtils; import org.opengrok.indexer.util.ObjectPool; import org.opengrok.indexer.util.Progress; import org.opengrok.indexer.util.Statistics; import org.opengrok.indexer.util.TandemPath; import org.opengrok.indexer.web.Util; import static org.opengrok.indexer.index.IndexerUtil.getWebAppHeaders; import static org.opengrok.indexer.web.ApiUtils.waitForAsyncApi; /** * This class is used to create / update the index databases. Currently, we use * one index database per project. * * @author Trond Norbye * @author Lubos Kosco , update for lucene 4.x , 5.x */ public class IndexDatabase { private static final Logger LOGGER = LoggerFactory.getLogger(IndexDatabase.class); private static final Comparator FILENAME_COMPARATOR = Comparator.comparing(File::getName); private static final Set CHECK_FIELDS; private static final Set REVERT_COUNTS_FIELDS; private static final Object INSTANCE_LOCK = new Object(); /** * Key is canonical path; Value is the first accepted, absolute path. Map * is ordered by canonical length (ASC) and then canonical value (ASC). * The map is accessed by a single-thread running indexDown(). */ private final Map indexedSymlinks = new TreeMap<>( Comparator.comparingInt(String::length).thenComparing(o -> o)); private final Project project; private FSDirectory indexDirectory; private IndexReader reader; private IndexWriter writer; private IndexAnalysisSettings3 settings; private PendingFileCompleter completer; private NumLinesLOCAggregator countsAggregator; private TermsEnum uidIter; private PostingsEnum postsIter; private PathAccepter pathAccepter; private AnalyzerGuru analyzerGuru; private File xrefDir; private boolean interrupted; private CopyOnWriteArrayList listeners; private File dirtyFile; private final Object lock = new Object(); private boolean dirty; private boolean running; private boolean isCountingDeltas; private boolean isWithDirectoryCounts; private List directories; private LockFactory lockfact; private final BytesRef emptyBR = new BytesRef(""); // Directory where we store indexes public static final String INDEX_DIR = "index"; public static final String XREF_DIR = "xref"; public static final String SUGGESTER_DIR = "suggester"; private final IndexDownArgsFactory indexDownArgsFactory; /** * Create a new instance of the Index Database. Use this constructor if you * don't use any projects * * @throws java.io.IOException if an error occurs while creating directories */ public IndexDatabase() throws IOException { this(null); } /** * Create a new instance of an Index Database for a given project. * * @param project the project to create the database for * @param factory {@link IndexDownArgsFactory} instance * @throws java.io.IOException if an error occurs while creating directories */ public IndexDatabase(Project project, IndexDownArgsFactory factory) throws IOException { indexDownArgsFactory = factory; this.project = project; lockfact = NoLockFactory.INSTANCE; initialize(); } @VisibleForTesting IndexDatabase(Project project) throws IOException { this(project, new IndexDownArgsFactory()); } static { CHECK_FIELDS = new HashSet<>(); CHECK_FIELDS.add(QueryBuilder.TYPE); REVERT_COUNTS_FIELDS = new HashSet<>(); REVERT_COUNTS_FIELDS.add(QueryBuilder.D); REVERT_COUNTS_FIELDS.add(QueryBuilder.PATH); REVERT_COUNTS_FIELDS.add(QueryBuilder.NUML); REVERT_COUNTS_FIELDS.add(QueryBuilder.LOC); } /** * Update the index database for all the projects. * * @param listener where to signal the changes to the database * @throws IOException if an error occurs */ static CountDownLatch updateAll(IndexChangedListener listener) throws IOException { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); List dbs = new ArrayList<>(); if (env.hasProjects()) { for (Project project : env.getProjectList()) { dbs.add(new IndexDatabase(project)); } } else { dbs.add(new IndexDatabase()); } IndexerParallelizer parallelizer = RuntimeEnvironment.getInstance().getIndexerParallelizer(); CountDownLatch latch = new CountDownLatch(dbs.size()); for (IndexDatabase d : dbs) { final IndexDatabase db = d; if (listener != null) { db.addIndexChangedListener(listener); } parallelizer.getFixedExecutor().submit(() -> { try { db.update(); } catch (Throwable e) { LOGGER.log(Level.SEVERE, String.format("Problem updating index database in directory %s: ", db.indexDirectory.getDirectory()), e); } finally { latch.countDown(); } }); } return latch; } /** * Update the index database for a number of sub-directories. * * @param listener where to signal the changes to the database * @param paths list of paths to be indexed */ public static void update(IndexChangedListener listener, List paths) { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); IndexerParallelizer parallelizer = env.getIndexerParallelizer(); List dbs = new ArrayList<>(); for (String path : paths) { Project project = Project.getProject(path); if (project == null && env.hasProjects()) { LOGGER.log(Level.WARNING, "Could not find a project for \"{0}\"", path); } else { IndexDatabase db; try { if (project == null) { db = new IndexDatabase(); } else { db = new IndexDatabase(project); } int idx = dbs.indexOf(db); if (idx != -1) { db = dbs.get(idx); } if (db.addDirectory(path)) { if (idx == -1) { dbs.add(db); } } else { LOGGER.log(Level.WARNING, "Directory does not exist \"{0}\" .", path); } } catch (IOException e) { LOGGER.log(Level.WARNING, "An error occurred while updating index", e); } } for (final IndexDatabase db : dbs) { db.addIndexChangedListener(listener); parallelizer.getFixedExecutor().submit(() -> { try { db.update(); } catch (Throwable e) { LOGGER.log(Level.SEVERE, "An error occurred while updating index", e); } }); } } } @SuppressWarnings("PMD.CollapsibleIfStatements") private void initialize() throws IOException { synchronized (INSTANCE_LOCK) { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); File indexDir = new File(env.getDataRootFile(), INDEX_DIR); if (project != null) { indexDir = new File(indexDir, project.getPath()); } if (!indexDir.exists() && !indexDir.mkdirs()) { // to avoid race conditions, just recheck.. if (!indexDir.exists()) { throw new FileNotFoundException("Failed to create root directory [" + indexDir.getAbsolutePath() + "]"); } } lockfact = pickLockFactory(env); indexDirectory = FSDirectory.open(indexDir.toPath(), lockfact); pathAccepter = env.getPathAccepter(); analyzerGuru = new AnalyzerGuru(); xrefDir = new File(env.getDataRootFile(), XREF_DIR); listeners = new CopyOnWriteArrayList<>(); dirtyFile = new File(indexDir, "dirty"); dirty = dirtyFile.exists(); directories = new ArrayList<>(); } } /** * By default the indexer will traverse all directories in the project. If * you add directories with this function update will just process the * specified directories. * * @param dir The directory to scan * @return true if the file is added, false otherwise */ @SuppressWarnings("PMD.UseStringBufferForStringAppends") public boolean addDirectory(String dir) { String directory = dir; if (directory.startsWith("\\")) { directory = directory.replace('\\', '/'); } else if (directory.charAt(0) != '/') { directory = "/" + directory; } File file = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), directory); if (file.exists()) { directories.add(directory); return true; } return false; } private void showFileCount(String dir, IndexDownArgs args) { if (RuntimeEnvironment.getInstance().isPrintProgress()) { LOGGER.log(Level.INFO, String.format("Need to process: %d files for %s", args.curCount, dir)); } } private void markProjectIndexed(Project project) { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); // Successfully indexed the project. The message is sent even if // the project's isIndexed() is true because it triggers RepositoryInfo // refresh. if (project == null) { return; } // Also need to store the correct value in configuration // when indexer writes it to a file. project.setIndexed(true); if (env.getConfigURI() == null) { return; } Response response; try { response = ClientBuilder.newBuilder().connectTimeout(env.getConnectTimeout(), TimeUnit.SECONDS).build() .target(env.getConfigURI()) .path("api") .path("v1") .path("projects") .path(Util.uriEncode(project.getName())) .path("indexed") .request() .headers(getWebAppHeaders()) .put(Entity.text("")); } catch (RuntimeException e) { LOGGER.log(Level.WARNING, String.format("Could not notify the webapp that project %s was indexed", project), e); return; } if (response.getStatus() == Response.Status.ACCEPTED.getStatusCode()) { try { response = waitForAsyncApi(response); } catch (InterruptedException e) { LOGGER.log(Level.WARNING, "interrupted while waiting for API response", e); } } if (response.getStatusInfo().getFamily() != Response.Status.Family.SUCCESSFUL) { LOGGER.log(Level.WARNING, "Could not notify the webapp that project {0} was indexed: {1}", new Object[] {project, response}); } } private static List getRepositoriesForProject(Project project) { List repositoryList = new ArrayList<>(); RuntimeEnvironment env = RuntimeEnvironment.getInstance(); List repositoryInfoList = env.getProjectRepositoriesMap().get(project); if (repositoryInfoList != null) { for (RepositoryInfo repositoryInfo : repositoryInfoList) { Repository repository = HistoryGuru.getInstance().getRepository(new File(repositoryInfo.getDirectoryName())); if (repository != null) { repositoryList.add(repository); } } } return repositoryList; } /** * @return whether the repositories of given project are ready for history based reindex */ private boolean isReadyForHistoryBasedReindex() { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); // So far the history based reindex does not work without projects. if (!env.hasProjects()) { LOGGER.log(Level.FINEST, "projects are disabled, will be indexed by directory traversal."); return false; } if (project == null) { LOGGER.log(Level.FINEST, "no project, will be indexed by directory traversal."); return false; } // History needs to be enabled for the history cache to work (see the comment below). if (!project.isHistoryEnabled()) { LOGGER.log(Level.FINEST, "history is disabled, will be indexed by directory traversal."); return false; } // History cache is necessary to get the last indexed revision for given repository. if (!env.isHistoryCache()) { LOGGER.log(Level.FINEST, "history cache is disabled, will be indexed by directory traversal."); return false; } // Per project tunable can override the global tunable, therefore env.isHistoryBasedReindex() is not checked. if (!project.isHistoryBasedReindex()) { LOGGER.log(Level.FINEST, "history-based reindex is disabled, will be indexed by directory traversal."); return false; } /* * Check that the index is present for this project. * In case of the initial indexing, the traversal of all changesets would most likely be counterproductive, * assuming traversal of directory tree is cheaper than getting the files from SCM history * in such case. */ try { if (getNumFiles() == 0) { LOGGER.log(Level.FINEST, "zero number of documents for project {0}, " + "will be indexed by directory traversal.", project); return false; } } catch (IOException e) { LOGGER.log(Level.FINEST, "failed to get number of documents for project {0}," + "will be indexed by directory traversal.", project); return false; } // If there was no change to any of the repositories of the project, a FileCollector instance will be returned // however the list of files therein will be empty which is legitimate situation (no change of the project). // Only in a case where getFileCollector() returns null (hinting at something went wrong), // the file based traversal should be done. if (env.getFileCollector(project.getName()) == null) { LOGGER.log(Level.FINEST, "no file collector for project {0}, will be indexed by directory traversal.", project); return false; } List repositories = getRepositoriesForProject(project); // Projects without repositories have to be indexed using indexDown(). if (repositories.isEmpty()) { LOGGER.log(Level.FINEST, "project {0} has no repositories, will be indexed by directory traversal.", project); return false; } for (Repository repository : repositories) { if (!isReadyForHistoryBasedReindex(repository)) { return false; } } // Here it is assumed there are no files untracked by the repositories of this project. return true; } /** * @param repository Repository instance * @return true if the repository can be used for history based reindex */ @VisibleForTesting boolean isReadyForHistoryBasedReindex(Repository repository) { if (!repository.isHistoryEnabled()) { LOGGER.log(Level.FINE, "history is disabled for {0}, " + "the associated project {1} will be indexed using directory traversal", new Object[]{repository, project}); return false; } if (!repository.isHistoryBasedReindex()) { LOGGER.log(Level.FINE, "history based reindex is disabled for {0}, " + "the associated project {1} will be indexed using directory traversal", new Object[]{repository, project}); return false; } if (!(repository instanceof RepositoryWithHistoryTraversal)) { LOGGER.log(Level.FINE, "project {0} has a repository {1} that does not support history traversal," + "the project will be indexed using directory traversal.", new Object[]{project, repository}); return false; } return true; } /** * Update the content of this index database. * * @throws IOException if an error occurs */ public void update() throws IOException { synchronized (lock) { if (running) { throw new IOException("Indexer already running!"); } running = true; interrupted = false; } RuntimeEnvironment env = RuntimeEnvironment.getInstance(); reader = null; writer = null; settings = null; uidIter = null; postsIter = null; indexedSymlinks.clear(); IOException finishingException = null; try { Analyzer analyzer = AnalyzerGuru.getAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setRAMBufferSizeMB(env.getRamBufferSize()); writer = new IndexWriter(indexDirectory, iwc); writer.commit(); // to make sure index exists on the disk completer = new PendingFileCompleter(); if (directories.isEmpty()) { if (project == null) { directories.add(""); } else { directories.add(project.getPath()); } } for (String dir : directories) { File sourceRoot; if ("".equals(dir)) { sourceRoot = env.getSourceRootFile(); } else { sourceRoot = new File(env.getSourceRootFile(), dir); } dir = Util.fixPathIfWindows(dir); String startUid = Util.path2uid(dir, ""); reader = DirectoryReader.open(indexDirectory); // open existing index countsAggregator = new NumLinesLOCAggregator(); settings = readAnalysisSettings(); if (settings == null) { settings = new IndexAnalysisSettings3(); } Terms terms = null; if (reader.numDocs() > 0) { terms = MultiTerms.getTerms(reader, QueryBuilder.U); NumLinesLOCAccessor countsAccessor = new NumLinesLOCAccessor(); if (countsAccessor.hasStored(reader)) { isWithDirectoryCounts = true; isCountingDeltas = true; } else { boolean foundCounts = countsAccessor.register(countsAggregator, reader); isWithDirectoryCounts = false; isCountingDeltas = foundCounts; if (!isCountingDeltas) { LOGGER.info("Forcing reindexing to fully compute directory counts"); } } } else { isWithDirectoryCounts = false; isCountingDeltas = false; } try { if (terms != null) { uidIter = terms.iterator(); TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startUid)); //init uid if (stat == TermsEnum.SeekStatus.END) { uidIter = null; LOGGER.log(Level.WARNING, "Couldn''t find a start term for {0}, empty u field?", startUid); } } // The actual indexing happens in indexParallel(). Here we merely collect the files // that need to be indexed and the files that should be removed. IndexDownArgs args = indexDownArgsFactory.getIndexDownArgs(); boolean usedHistory = getIndexDownArgs(dir, sourceRoot, args); // Traverse the trailing terms. This needs to be done before indexParallel() because // in some cases it can add items to the args parameter. processTrailingTerms(startUid, usedHistory, args); args.curCount = 0; Statistics elapsed = new Statistics(); LOGGER.log(Level.INFO, "Starting indexing of directory {0}", dir); indexParallel(dir, args); elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir), "indexer.db.directory.index"); /* * As a signifier that #Lines/LOC are comprehensively * stored so that later calculation is in deltas mode, we * need at least one D-document saved. For a repo with only * non-code files, however, no true #Lines/LOC will have * been saved. Subsequent re-indexing will do more work * than necessary (until a source code file is placed). We * can record zeroes for a fake file under the root to get * a D-document even for this special repo situation. * * Metrics are aggregated for directories up to the root, * so it suffices to put the fake directly under the root. */ if (!isWithDirectoryCounts) { final String ROOT_FAKE_FILE = "/.OpenGrok_fake_file"; countsAggregator.register(new NumLinesLOC(ROOT_FAKE_FILE, 0, 0)); } NumLinesLOCAccessor countsAccessor = new NumLinesLOCAccessor(); countsAccessor.store(writer, reader, countsAggregator, isWithDirectoryCounts && isCountingDeltas); markProjectIndexed(project); } finally { reader.close(); } } // The RuntimeException thrown from the block above can prevent the writing from completing. // This is deliberate. try { finishWriting(); } catch (IOException e) { finishingException = e; } } catch (RuntimeException ex) { LOGGER.log(Level.SEVERE, "Failed with unexpected RuntimeException", ex); throw ex; } finally { completer = null; try { if (writer != null) { writer.close(); } } catch (IOException e) { if (finishingException == null) { finishingException = e; } LOGGER.log(Level.WARNING, "An error occurred while closing writer", e); } finally { writer = null; synchronized (lock) { running = false; } } } if (finishingException != null) { throw finishingException; } if (!isInterrupted() && isDirty()) { if (env.isOptimizeDatabase()) { optimize(); } env.setIndexTimestamp(); } } private void processTrailingTerms(String startUid, boolean usedHistory, IndexDownArgs args) throws IOException { while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startUid)) { if (usedHistory) { // Allow for forced reindex. For history based reindex the trailing terms // correspond to the files that have not changed. Such files might need to be re-indexed // if the index format changed. String termPath = Util.uid2url(uidIter.term().utf8ToString()); File termFile = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), termPath); boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) && checkSettings(termFile, termPath); if (!matchOK) { removeFile(false); args.curCount++; args.works.add(new IndexFileWork(termFile, termPath)); } } else { // Remove data for the trailing terms that getIndexDownArgs() // did not traverse. These correspond to the files that have been // removed and have higher ordering than any present files. removeFile(true); } BytesRef next = uidIter.next(); if (next == null) { uidIter = null; } } } /** * @param dir directory path * @param sourceRoot source root File object * @param args {@link IndexDownArgs} instance (output) * @return true if history was used to gather the {@code IndexDownArgs} * @throws IOException on error */ @VisibleForTesting boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws IOException { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); boolean historyBased = isReadyForHistoryBasedReindex(); if (LOGGER.isLoggable(Level.INFO)) { LOGGER.log(Level.INFO, String.format("Starting file collection using %s traversal for directory '%s'", historyBased ? "history" : "file-system", dir)); } Statistics elapsed = new Statistics(); if (historyBased) { indexDownUsingHistory(env.getSourceRootFile(), args); } else { indexDown(sourceRoot, dir, args); } elapsed.report(LOGGER, String.format("Done file collection for directory '%s'", dir), "indexer.db.collection"); showFileCount(dir, args); return historyBased; } /** * Executes the first, serial stage of indexing, by going through set of files assembled from history. * @param sourceRoot path to the source root (same as {@link RuntimeEnvironment#getSourceRootPath()}) * @param args {@link IndexDownArgs} instance where the resulting files to be indexed will be stored * @throws IOException on error */ @VisibleForTesting void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws IOException { FileCollector fileCollector = RuntimeEnvironment.getInstance().getFileCollector(project.getName()); for (String path : fileCollector.getFiles()) { File file = new File(sourceRoot, path); processFileIncremental(args, file, path); } } /** * Optimize all index databases. * * @throws IOException if an error occurs */ static CountDownLatch optimizeAll() throws IOException { List dbs = new ArrayList<>(); RuntimeEnvironment env = RuntimeEnvironment.getInstance(); IndexerParallelizer parallelizer = env.getIndexerParallelizer(); if (env.hasProjects()) { for (Project project : env.getProjectList()) { dbs.add(new IndexDatabase(project)); } } else { dbs.add(new IndexDatabase()); } CountDownLatch latch = new CountDownLatch(dbs.size()); for (IndexDatabase d : dbs) { final IndexDatabase db = d; if (db.isDirty()) { parallelizer.getFixedExecutor().submit(() -> { try { db.update(); } catch (Throwable e) { LOGGER.log(Level.SEVERE, "Problem updating lucene index database: ", e); } finally { latch.countDown(); } }); } } return latch; } /** * Optimize the index database. * @throws IOException I/O exception */ public void optimize() throws IOException { synchronized (lock) { if (running) { LOGGER.warning("Optimize terminated... Someone else is updating / optimizing it!"); return; } running = true; } IndexWriter wrt = null; IOException writerException = null; try { Statistics elapsed = new Statistics(); String projectDetail = this.project != null ? " for project " + project.getName() : ""; LOGGER.log(Level.INFO, "Optimizing the index{0}", projectDetail); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig conf = new IndexWriterConfig(analyzer); conf.setOpenMode(OpenMode.CREATE_OR_APPEND); wrt = new IndexWriter(indexDirectory, conf); wrt.forceMerge(1); // this is deprecated and not needed anymore elapsed.report(LOGGER, String.format("Done optimizing index%s", projectDetail), "indexer.db.optimize"); synchronized (lock) { if (dirtyFile.exists() && !dirtyFile.delete()) { LOGGER.log(Level.FINE, "Failed to remove \"dirty-file\": {0}", dirtyFile.getAbsolutePath()); } dirty = false; } } catch (IOException e) { writerException = e; LOGGER.log(Level.SEVERE, "ERROR: optimizing index", e); } finally { if (wrt != null) { try { wrt.close(); } catch (IOException e) { if (writerException == null) { writerException = e; } LOGGER.log(Level.WARNING, "An error occurred while closing writer", e); } } synchronized (lock) { running = false; } } if (writerException != null) { throw writerException; } } private boolean isDirty() { synchronized (lock) { return dirty; } } private void setDirty() { synchronized (lock) { try { if (!dirty) { if (!dirtyFile.createNewFile() && !dirtyFile.exists()) { LOGGER.log(Level.FINE, "Failed to create \"dirty-file\": {0}", dirtyFile.getAbsolutePath()); } dirty = true; } } catch (IOException e) { LOGGER.log(Level.FINE, "When creating dirty file: ", e); } } } private File whatXrefFile(String path, boolean compress) { String xrefPath = compress ? TandemPath.join(path, ".gz") : path; return new File(xrefDir, xrefPath); } /** * Queue the removal of xref file for given path. * @param path path to file under source root */ private void removeXrefFile(String path) { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); File xrefFile = whatXrefFile(path, env.isCompressXref()); PendingFileDeletion pending = new PendingFileDeletion(xrefFile.getAbsolutePath()); completer.add(pending); } private void removeHistoryFile(String path) { HistoryGuru.getInstance().clearCacheFile(path); } /** * Remove a stale file from the index database and potentially also from history cache, * and queue the removal of the associated xref file. * * @param removeHistory if false, do not remove history cache for this file * @throws java.io.IOException if an error occurs */ private void removeFile(boolean removeHistory) throws IOException { String path = Util.uid2url(uidIter.term().utf8ToString()); for (IndexChangedListener listener : listeners) { listener.fileRemove(path); } removeFileDocUid(path); removeXrefFile(path); if (removeHistory) { removeHistoryFile(path); } setDirty(); for (IndexChangedListener listener : listeners) { listener.fileRemoved(path); } } private void removeFileDocUid(String path) throws IOException { // Determine if a reversal of counts is necessary, and execute if so. if (isCountingDeltas) { postsIter = uidIter.postings(postsIter); while (postsIter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { // Read a limited-fields version of the document. Document doc = reader.document(postsIter.docID(), REVERT_COUNTS_FIELDS); if (doc != null) { decrementLOCforDoc(path, doc); break; } } } writer.deleteDocuments(new Term(QueryBuilder.U, uidIter.term())); } private void decrementLOCforDoc(String path, Document doc) { NullableNumLinesLOC nullableCounts = NumLinesLOCUtil.read(doc); if (nullableCounts.getNumLines() != null && nullableCounts.getLOC() != null) { NumLinesLOC counts = new NumLinesLOC(path, -nullableCounts.getNumLines(), -nullableCounts.getLOC()); countsAggregator.register(counts); } } /** * Add a file to the Lucene index (and generate a xref file). * * @param file The file to add * @param path The path to the file (from source root) * @param ctags a defined instance to use (only if its binary is not null) * @throws java.io.IOException if an error occurs * @throws InterruptedException if a timeout occurs */ private void addFile(File file, String path, Ctags ctags) throws IOException, InterruptedException { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); AbstractAnalyzer fa = getAnalyzerFor(file, path); for (IndexChangedListener listener : listeners) { listener.fileAdd(path, fa.getClass().getSimpleName()); } ctags.setTabSize(project != null ? project.getTabSize() : 0); if (env.getCtagsTimeout() != 0) { ctags.setTimeout(env.getCtagsTimeout()); } fa.setCtags(ctags); fa.setCountsAggregator(countsAggregator); fa.setProject(Project.getProject(path)); fa.setScopesEnabled(env.isScopesEnabled()); fa.setFoldingEnabled(env.isFoldingEnabled()); Document doc = new Document(); CountingWriter xrefOut = null; try { String xrefAbs = null; File transientXref = null; if (env.isGenerateHtml()) { xrefAbs = getXrefPath(path); transientXref = new File(TandemPath.join(xrefAbs, PendingFileCompleter.PENDING_EXTENSION)); xrefOut = newXrefWriter(path, transientXref, env.isCompressXref()); } analyzerGuru.populateDocument(doc, file, path, fa, xrefOut); // Avoid producing empty xref files. if (xrefOut != null && xrefOut.getCount() > 0) { PendingFileRenaming ren = new PendingFileRenaming(xrefAbs, transientXref.getAbsolutePath()); completer.add(ren); } else if (xrefOut != null) { LOGGER.log(Level.FINER, "xref for {0} would be empty, will remove", path); completer.add(new PendingFileDeletion(transientXref.toString())); } } catch (InterruptedException e) { LOGGER.log(Level.WARNING, "File ''{0}'' interrupted--{1}", new Object[]{path, e.getMessage()}); cleanupResources(doc); throw e; } catch (Exception e) { LOGGER.log(Level.INFO, "Skipped file ''{0}'' because the analyzer didn''t " + "understand it.", path); if (LOGGER.isLoggable(Level.FINE)) { LOGGER.log(Level.FINE, "Exception from analyzer " + fa.getClass().getName(), e); } cleanupResources(doc); return; } finally { fa.setCtags(null); fa.setCountsAggregator(null); if (xrefOut != null) { xrefOut.close(); } } try { writer.addDocument(doc); } catch (Throwable t) { cleanupResources(doc); throw t; } setDirty(); for (IndexChangedListener listener : listeners) { listener.fileAdded(path, fa.getClass().getSimpleName()); } } private AbstractAnalyzer getAnalyzerFor(File file, String path) throws IOException { try (InputStream in = new BufferedInputStream( new FileInputStream(file))) { return AnalyzerGuru.getAnalyzer(in, path); } } /** * Do a best effort to clean up all resources allocated when populating * a Lucene document. On normal execution, these resources should be * closed automatically by the index writer once it's done with them, but * we may not get that far if something fails. * * @param doc the document whose resources to clean up */ private static void cleanupResources(Document doc) { for (IndexableField f : doc) { // If the field takes input from a reader, close the reader. IOUtils.close(f.readerValue()); // If the field takes input from a token stream, close the // token stream. if (f instanceof Field) { IOUtils.close(((Field) f).tokenStreamValue()); } } } /** * Check if I should accept this file into the index database. * * @param file the file to check * @param ret defined instance whose {@code localRelPath} property will be * non-null afterward if and only if {@code file} is a symlink that targets * either a {@link Repository}-local filesystem object or the same object * as a previously-detected and allowed symlink. N.b. method will return * {@code false} if {@code ret.localRelPath} is set non-null. * @return a value indicating if {@code file} should be included in index */ private boolean accept(File file, AcceptSymlinkRet ret) { ret.localRelPath = null; String absolutePath = file.getAbsolutePath(); if (!pathAccepter.accept(file)) { return false; } if (!file.canRead()) { LOGGER.log(Level.WARNING, "Could not read {0}", absolutePath); return false; } try { Path absolute = Paths.get(absolutePath); if (Files.isSymbolicLink(absolute)) { File canonical = file.getCanonicalFile(); if (!absolutePath.equals(canonical.getPath()) && !acceptSymlink(absolute, canonical, ret)) { if (ret.localRelPath == null) { LOGGER.log(Level.FINE, "Skipped symlink ''{0}'' -> ''{1}''", new Object[] {absolutePath, canonical}); } return false; } } //below will only let go files and directories, anything else is considered special and is not added if (!file.isFile() && !file.isDirectory()) { LOGGER.log(Level.WARNING, "Ignored special file {0}", absolutePath); return false; } } catch (IOException exp) { LOGGER.log(Level.WARNING, "Failed to resolve name: {0}", absolutePath); LOGGER.log(Level.FINE, "Stack Trace: ", exp); } if (file.isDirectory()) { // always accept directories so that their files can be examined return true; } RuntimeEnvironment env = RuntimeEnvironment.getInstance(); // Lookup history if indexing versioned files only. // Skip the lookup entirely (which is expensive) if unversioned files are allowed if (env.isIndexVersionedFilesOnly()) { if (HistoryGuru.getInstance().hasHistory(file)) { // versioned files should always be accepted return true; } LOGGER.log(Level.FINER, "not accepting unversioned {0}", absolutePath); return false; } // unversioned files are allowed return true; } /** * Determines if {@code file} should be accepted into the index database. * @param parent parent of {@code file} * @param file directory object under consideration * @param ret defined instance whose {@code localRelPath} property will be * non-null afterward if and only if {@code file} is a symlink that targets * either a {@link Repository}-local filesystem object or the same object * as a previously-detected and allowed symlink. N.b. method will return * {@code false} if {@code ret.localRelPath} is set non-null. * @return a value indicating if {@code file} should be included in index */ private boolean accept(File parent, File file, AcceptSymlinkRet ret) { ret.localRelPath = null; try { File f1 = parent.getCanonicalFile(); File f2 = file.getCanonicalFile(); if (f1.equals(f2)) { LOGGER.log(Level.INFO, "Skipping links to itself...: {0} {1}", new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()}); return false; } // Now, let's verify that it's not a link back up the chain... File t1 = f1; while ((t1 = t1.getParentFile()) != null) { if (f2.equals(t1)) { LOGGER.log(Level.INFO, "Skipping links to parent...: {0} {1}", new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()}); return false; } } return accept(file, ret); } catch (IOException ex) { LOGGER.log(Level.WARNING, "Failed to resolve name: {0} {1}", new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()}); } return false; } /** * Check if I should accept the path containing a symlink. * * @param absolute the path with a symlink to check * @param canonical the canonical file object * @param ret defined instance whose {@code localRelPath} property will be * non-null afterward if and only if {@code absolute} is a symlink that * targets either a {@link Repository}-local filesystem object or the same * object ({@code canonical}) as a previously-detected and allowed symlink. * N.b. method will return {@code false} if {@code ret.localRelPath} is set * non-null. * @return a value indicating if {@code file} should be included in index */ private boolean acceptSymlink(Path absolute, File canonical, AcceptSymlinkRet ret) { ret.localRelPath = null; String absolute1 = absolute.toString(); String canonical1 = canonical.getPath(); boolean isCanonicalDir = canonical.isDirectory(); RuntimeEnvironment env = RuntimeEnvironment.getInstance(); IndexedSymlink indexed1; String absolute0; if (isLocal(canonical1)) { if (!isCanonicalDir) { if (LOGGER.isLoggable(Level.FINEST)) { LOGGER.log(Level.FINEST, "Local {0} has symlink from {1}", new Object[] {canonical1, absolute1}); } /* * Always index symlinks to local files, but do not add to * indexedSymlinks for a non-directory. */ return true; } /* * Do not index symlinks to local directories, because the * canonical target will be indexed on its own -- but relativize() * a path to be returned in ret so that a symlink can be replicated * in xref/. */ ret.localRelPath = absolute.getParent().relativize( canonical.toPath()).toString(); // Try to put the prime absolute path into indexedSymlinks. try { String primeRelative = env.getPathRelativeToSourceRoot(canonical); absolute0 = env.getSourceRootPath() + primeRelative; } catch (ForbiddenSymlinkException | IOException e) { /* * This is not expected, as indexDown() would have operated on * the file already -- but we are forced to handle. */ LOGGER.log(Level.WARNING, String.format( "Unexpected error getting relative for %s", canonical), e); absolute0 = absolute1; } indexed1 = new IndexedSymlink(absolute0, canonical1, true); indexedSymlinks.put(canonical1, indexed1); return false; } IndexedSymlink indexed0; if ((indexed0 = indexedSymlinks.get(canonical1)) != null) { if (absolute1.equals(indexed0.getAbsolute())) { return true; } /* * Do not index symlinks to external directories already indexed * as linked elsewhere, because the canonical target will be * indexed already -- but relativize() a path to be returned in ret * so that this second symlink can be redone as a local * (non-external) symlink in xref/. */ ret.localRelPath = absolute.getParent().relativize( Paths.get(indexed0.getAbsolute())).toString(); if (LOGGER.isLoggable(Level.FINEST)) { LOGGER.log(Level.FINEST, "External dir {0} has symlink from {1} after first {2}", new Object[] {canonical1, absolute1, indexed0.getAbsolute()}); } return false; } /* * Iterate through indexedSymlinks, which is sorted so that shorter * canonical entries come first, to see if the new link is a child * canonically. */ for (IndexedSymlink a0 : indexedSymlinks.values()) { indexed0 = a0; if (!indexed0.isLocal() && canonical1.startsWith(indexed0.getCanonicalSeparated())) { absolute0 = indexed0.getAbsolute(); if (!isCanonicalDir) { if (LOGGER.isLoggable(Level.FINEST)) { LOGGER.log(Level.FINEST, "External file {0} has symlink from {1} under previous {2}", new Object[] {canonical1, absolute1, absolute0}); } // Do not add to indexedSymlinks for a non-directory. return true; } /* * See above about redoing a sourceRoot symlink as a local * (non-external) symlink in xref/. */ Path abs0 = Paths.get(absolute0, canonical1.substring( indexed0.getCanonicalSeparated().length())); ret.localRelPath = absolute.getParent().relativize(abs0).toString(); if (LOGGER.isLoggable(Level.FINEST)) { LOGGER.log(Level.FINEST, "External dir {0} has symlink from {1} under previous {2}", new Object[] {canonical1, absolute1, absolute0}); } return false; } } Set canonicalRoots = env.getCanonicalRoots(); for (String canonicalRoot : canonicalRoots) { if (canonical1.startsWith(canonicalRoot)) { if (LOGGER.isLoggable(Level.FINEST)) { LOGGER.log(Level.FINEST, "Allowed symlink {0} per canonical root {1}", new Object[] {absolute1, canonical1}); } if (isCanonicalDir) { indexed1 = new IndexedSymlink(absolute1, canonical1, false); indexedSymlinks.put(canonical1, indexed1); } return true; } } Set allowedSymlinks = env.getAllowedSymlinks(); for (String allowedSymlink : allowedSymlinks) { String allowedTarget; try { allowedTarget = new File(allowedSymlink).getCanonicalPath(); } catch (IOException e) { LOGGER.log(Level.FINE, "unresolvable symlink: {0}", allowedSymlink); continue; } /* * The following canonical check is sufficient because indexDown() * traverses top-down, and any intermediate symlinks would have * also been checked here for an allowed canonical match. This * technically means that if there is a set of redundant symlinks * with the same canonical target, then allowing one of the set * will allow all others in the set. */ if (canonical1.equals(allowedTarget)) { if (isCanonicalDir) { indexed1 = new IndexedSymlink(absolute1, canonical1, false); indexedSymlinks.put(canonical1, indexed1); } return true; } } return false; } /** * Check if a file is local to the current project. If we don't have * projects, check if the file is in the source root. * * @param path the path to a file * @return true if the file is local to the current repository */ private boolean isLocal(String path) { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); String srcRoot = env.getSourceRootPath(); if (path.startsWith(srcRoot + File.separator)) { if (env.hasProjects()) { String relPath = path.substring(srcRoot.length()); // If file is under the current project, then it's local. return project.equals(Project.getProject(relPath)); } else { // File is under source root, and we don't have projects, so // consider it local. return true; } } return false; } private void handleSymlink(String path, AcceptSymlinkRet ret) { /* * If ret.localRelPath is defined, then a symlink was detected but * not "accepted" to avoid redundancy with an already-accepted * canonical target. Set up for a deferred creation of a symlink * within xref/. */ if (ret.localRelPath != null) { File xrefPath = new File(xrefDir, path); PendingSymlinkage psym = new PendingSymlinkage(xrefPath.getAbsolutePath(), ret.localRelPath); completer.add(psym); } } /** * Executes the first, serial stage of indexing, by recursively traversing the file system * and index alongside. *

Files at least are counted, and any deleted or updated files (based on * comparison to the Lucene index) are passed to * {@link #removeFile(boolean)}. New or updated files are noted for indexing. * @param dir the root indexDirectory to generate indexes for * @param parent path to parent directory * @param args arguments to control execution and for collecting a list of * files for indexing */ @VisibleForTesting void indexDown(File dir, String parent, IndexDownArgs args) throws IOException { if (isInterrupted()) { return; } AcceptSymlinkRet ret = new AcceptSymlinkRet(); if (!accept(dir, ret)) { handleSymlink(parent, ret); return; } File[] files = dir.listFiles(); if (files == null) { LOGGER.log(Level.SEVERE, "Failed to get file listing for: {0}", dir.getPath()); return; } Arrays.sort(files, FILENAME_COMPARATOR); for (File file : files) { String path = parent + File.separator + file.getName(); if (!accept(dir, file, ret)) { handleSymlink(path, ret); } else { if (file.isDirectory()) { indexDown(file, path, args); } else { processFile(args, file, path); } } } } /** * Compared with {@link #processFile(IndexDownArgs, File, String)}, this method's file/path arguments * represent files that have actually changed in some way, while the other method's argument represent * files present on disk. * @param args {@link IndexDownArgs} instance * @param file File object * @param path path of the file argument relative to source root (with leading slash) * @throws IOException on error */ private void processFileIncremental(IndexDownArgs args, File file, String path) throws IOException { if (uidIter != null) { path = Util.fixPathIfWindows(path); // Traverse terms until reaching one that matches the path of given file. while (uidIter != null && uidIter.term() != null && uidIter.term().compareTo(emptyBR) != 0 && Util.uid2url(uidIter.term().utf8ToString()).compareTo(path) < 0) { // A file that was not changed. /* * Possibly short-circuit to force reindexing of prior-version indexes. */ String termPath = Util.uid2url(uidIter.term().utf8ToString()); File termFile = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), termPath); boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) && checkSettings(termFile, termPath); if (!matchOK) { removeFile(false); args.curCount++; args.works.add(new IndexFileWork(termFile, termPath)); } BytesRef next = uidIter.next(); if (next == null) { uidIter = null; } } if (uidIter != null && uidIter.term() != null && Util.uid2url(uidIter.term().utf8ToString()).equals(path)) { /* * At this point we know that the file has corresponding term in the index * and has changed in some way. Either it was deleted or it was changed. */ if (!file.exists()) { removeFile(true); } else { removeFile(false); args.curCount++; args.works.add(new IndexFileWork(file, path)); } BytesRef next = uidIter.next(); if (next == null) { uidIter = null; } } else { // Potentially new file. A file might be added and then deleted, // so it is necessary to check its existence. if (file.exists()) { args.curCount++; args.works.add(new IndexFileWork(file, path)); } } } else { if (file.exists()) { args.curCount++; args.works.add(new IndexFileWork(file, path)); } } } /** * Process a file on disk w.r.t. index. * @param args {@link IndexDownArgs} instance * @param file File object * @param path path corresponding to the file parameter, relative to source root (with leading slash) * @throws IOException on error */ private void processFile(IndexDownArgs args, File file, String path) throws IOException { if (uidIter != null) { path = Util.fixPathIfWindows(path); String uid = Util.path2uid(path, DateTools.timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND)); // construct uid for doc BytesRef buid = new BytesRef(uid); // Traverse terms that have smaller UID than the current file, // i.e. given the ordering they positioned before the file, // or it is the file that has been modified. while (uidIter != null && uidIter.term() != null && uidIter.term().compareTo(emptyBR) != 0 && uidIter.term().compareTo(buid) < 0) { // If the term's path matches path of currently processed file, // it is clear that the file has been modified and thus // removeFile() will be followed by call to addFile() in indexParallel(). // In such case, instruct removeFile() not to remove history // cache for the file so that incremental history cache // generation works. String termPath = Util.uid2url(uidIter.term().utf8ToString()); removeFile(!termPath.equals(path)); BytesRef next = uidIter.next(); if (next == null) { uidIter = null; } } // If the file was not modified, probably skip to the next one. if (uidIter != null && uidIter.term() != null && uidIter.term().bytesEquals(buid)) { /* * Possibly short-circuit to force reindexing of prior-version indexes. */ boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) && checkSettings(file, path); if (!matchOK) { removeFile(false); } BytesRef next = uidIter.next(); if (next == null) { uidIter = null; } if (matchOK) { return; } } } args.curCount++; args.works.add(new IndexFileWork(file, path)); } /** * Executes the second, parallel stage of indexing. * @param dir the parent directory (when appended to SOURCE_ROOT) * @param args contains a list of files to index, found during the earlier stage */ private void indexParallel(String dir, IndexDownArgs args) { int worksCount = args.works.size(); if (worksCount < 1) { return; } AtomicInteger successCounter = new AtomicInteger(); AtomicInteger currentCounter = new AtomicInteger(); AtomicInteger alreadyClosedCounter = new AtomicInteger(); IndexerParallelizer parallelizer = RuntimeEnvironment.getInstance().getIndexerParallelizer(); ObjectPool ctagsPool = parallelizer.getCtagsPool(); Map> bySuccess = null; try (Progress progress = new Progress(LOGGER, dir, worksCount)) { bySuccess = parallelizer.getForkJoinPool().submit(() -> args.works.parallelStream().collect( Collectors.groupingByConcurrent((x) -> { int tries = 0; Ctags pctags = null; boolean ret; Statistics stats = new Statistics(); while (true) { try { if (alreadyClosedCounter.get() > 0) { ret = false; } else { pctags = ctagsPool.get(); addFile(x.file, x.path, pctags); successCounter.incrementAndGet(); ret = true; } } catch (AlreadyClosedException e) { alreadyClosedCounter.incrementAndGet(); String errmsg = String.format("ERROR addFile(): %s", x.file); LOGGER.log(Level.SEVERE, errmsg, e); x.exception = e; ret = false; } catch (InterruptedException e) { // Allow one retry if interrupted if (++tries <= 1) { continue; } LOGGER.log(Level.WARNING, "No retry: {0}", x.file); x.exception = e; ret = false; } catch (RuntimeException | IOException e) { String errmsg = String.format("ERROR addFile(): %s", x.file); LOGGER.log(Level.WARNING, errmsg, e); x.exception = e; ret = false; } finally { if (pctags != null) { pctags.reset(); ctagsPool.release(pctags); } } progress.increment(); stats.report(LOGGER, Level.FINEST, String.format("file ''%s'' %s", x.file, ret ? "indexed" : "failed indexing")); return ret; } }))).get(); } catch (InterruptedException | ExecutionException e) { int successCount = successCounter.intValue(); double successPct = 100.0 * successCount / worksCount; String exmsg = String.format("%d successes (%.1f%%) after aborting parallel-indexing", successCount, successPct); LOGGER.log(Level.SEVERE, exmsg, e); } args.curCount = currentCounter.intValue(); // Start with failureCount=worksCount, and then subtract successes. int failureCount = worksCount; if (bySuccess != null) { List successes = bySuccess.getOrDefault(Boolean.TRUE, null); if (successes != null) { failureCount -= successes.size(); } } if (failureCount > 0) { double pctFailed = 100.0 * failureCount / worksCount; String exmsg = String.format("%d failures (%.1f%%) while parallel-indexing", failureCount, pctFailed); LOGGER.log(Level.WARNING, exmsg); } /* * Encountering an AlreadyClosedException is severe enough to abort the * run, since it will fail anyway later upon trying to commit(). */ int numAlreadyClosed = alreadyClosedCounter.get(); if (numAlreadyClosed > 0) { throw new AlreadyClosedException(String.format("count=%d", numAlreadyClosed)); } } private boolean isInterrupted() { synchronized (lock) { return interrupted; } } /** * Register an object to receive events when modifications is done to the * index database. * * @param listener the object to receive the events */ public void addIndexChangedListener(IndexChangedListener listener) { if (listener != null) { listeners.add(listener); } } /** * Get all files in some of the index databases. * * @param subFiles Subdirectories of various projects or null or an empty list to get everything * @throws IOException if an error occurs * @return set of files in the index databases specified by the subFiles parameter */ public static Set getAllFiles(List subFiles) throws IOException { Set files = new HashSet<>(); RuntimeEnvironment env = RuntimeEnvironment.getInstance(); if (env.hasProjects()) { if (subFiles == null || subFiles.isEmpty()) { for (Project project : env.getProjectList()) { IndexDatabase db = new IndexDatabase(project); files.addAll(db.getFiles()); } } else { for (String path : subFiles) { Project project = Project.getProject(path); if (project == null) { LOGGER.log(Level.WARNING, "Could not find a project for \"{0}\"", path); } else { IndexDatabase db = new IndexDatabase(project); files.addAll(db.getFiles()); } } } } else { IndexDatabase db = new IndexDatabase(); files = db.getFiles(); } return files; } /** * Get all files in this index database. * * @throws IOException If an IO error occurs while reading from the database * @return set of files in this index database */ public Set getFiles() throws IOException { IndexReader ireader = null; TermsEnum iter = null; Terms terms; Set files = new HashSet<>(); try { ireader = DirectoryReader.open(indexDirectory); // open existing index if (ireader.numDocs() > 0) { terms = MultiTerms.getTerms(ireader, QueryBuilder.U); iter = terms.iterator(); // init uid iterator } while (iter != null && iter.term() != null) { String value = iter.term().utf8ToString(); if (value.isEmpty()) { iter.next(); continue; } files.add(Util.uid2url(value)); BytesRef next = iter.next(); if (next == null) { iter = null; } } } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e); } } } return files; } /** * Get number of documents in this index database. * @return number of documents * @throws IOException if I/O exception occurred */ public int getNumFiles() throws IOException { IndexReader ireader = null; try { ireader = DirectoryReader.open(indexDirectory); // open existing index return ireader.numDocs(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e); } } } } static void listFrequentTokens(List subFiles) throws IOException { final int limit = 4; RuntimeEnvironment env = RuntimeEnvironment.getInstance(); if (env.hasProjects()) { if (subFiles == null || subFiles.isEmpty()) { for (Project project : env.getProjectList()) { IndexDatabase db = new IndexDatabase(project); db.listTokens(limit); } } else { for (String path : subFiles) { Project project = Project.getProject(path); if (project == null) { LOGGER.log(Level.WARNING, "Could not find a project for \"{0}\"", path); } else { IndexDatabase db = new IndexDatabase(project); db.listTokens(limit); } } } } else { IndexDatabase db = new IndexDatabase(); db.listTokens(limit); } } public void listTokens(int freq) throws IOException { IndexReader ireader = null; TermsEnum iter = null; Terms terms; try { ireader = DirectoryReader.open(indexDirectory); if (ireader.numDocs() > 0) { terms = MultiTerms.getTerms(ireader, QueryBuilder.DEFS); iter = terms.iterator(); // init uid iterator } while (iter != null && iter.term() != null) { if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) { LOGGER.warning(iter.term().utf8ToString()); } BytesRef next = iter.next(); if (next == null) { iter = null; } } } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e); } } } } /** * Get an indexReader for the Index database where a given file. * * @param path the file to get the database for * @return The index database where the file should be located or null if it * cannot be located. */ @SuppressWarnings("java:S2095") public static IndexReader getIndexReader(String path) { IndexReader ret = null; RuntimeEnvironment env = RuntimeEnvironment.getInstance(); File indexDir = new File(env.getDataRootFile(), INDEX_DIR); if (env.hasProjects()) { Project p = Project.getProject(path); if (p == null) { return null; } indexDir = new File(indexDir, p.getPath()); } try { FSDirectory fdir = FSDirectory.open(indexDir.toPath(), NoLockFactory.INSTANCE); if (indexDir.exists() && DirectoryReader.indexExists(fdir)) { ret = DirectoryReader.open(fdir); } } catch (Exception ex) { LOGGER.log(Level.SEVERE, "Failed to open index: {0}", indexDir.getAbsolutePath()); LOGGER.log(Level.FINE, "Stack Trace: ", ex); } return ret; } /** * Get the latest definitions for a file from the index. * * @param file the file whose definitions to find * @return definitions for the file, or {@code null} if they could not be * found * @throws IOException if an error happens when accessing the index * @throws ParseException if an error happens when building the Lucene query * @throws ClassNotFoundException if the class for the stored definitions * instance cannot be found */ public static Definitions getDefinitions(File file) throws ParseException, IOException, ClassNotFoundException { Document doc = getDocument(file); if (doc == null) { return null; } IndexableField tags = doc.getField(QueryBuilder.TAGS); if (tags != null) { return Definitions.deserialize(tags.binaryValue().bytes); } // Didn't find any definitions. return null; } /** * @param file File object for a file under source root * @return Document object for the file or {@code null} * @throws IOException on I/O error * @throws ParseException on problem with building Query */ public static Document getDocument(File file) throws IOException, ParseException { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); String path; try { path = env.getPathRelativeToSourceRoot(file); } catch (ForbiddenSymlinkException e) { LOGGER.log(Level.FINER, e.getMessage()); return null; } // Sanitize Windows path delimiters in order not to conflict with Lucene escape character. path = path.replace("\\", "/"); try (IndexReader indexReader = getIndexReader(path)) { return getDocument(path, indexReader); } } @Nullable private static Document getDocument(String path, IndexReader indexReader) throws ParseException, IOException { if (indexReader == null) { // No index, no document.. return null; } Document doc; Query q = new QueryBuilder().setPath(path).build(); IndexSearcher searcher = new IndexSearcher(indexReader); Statistics stat = new Statistics(); TopDocs top = searcher.search(q, 1); stat.report(LOGGER, Level.FINEST, "search via getDocument() done", "search.latency", new String[]{"category", "getdocument", "outcome", top.totalHits.value == 0 ? "empty" : "success"}); if (top.totalHits.value == 0) { // No hits, no document... return null; } doc = searcher.doc(top.scoreDocs[0].doc); String foundPath = doc.get(QueryBuilder.PATH); // Only use the document if we found an exact match. if (!path.equals(foundPath)) { return null; } return doc; } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } IndexDatabase that = (IndexDatabase) o; return Objects.equals(project, that.project); } @Override public int hashCode() { return Objects.hash(project); } private static class CountingWriter extends Writer { private long count; private final Writer out; CountingWriter(Writer out) { super(out); this.out = out; } @Override public void write(@NotNull char[] chars, int off, int len) throws IOException { out.write(chars, off, len); count += len; } @Override public void flush() throws IOException { out.flush(); } @Override public void close() throws IOException { out.close(); } public long getCount() { return count; } } private String getXrefPath(String path) { boolean compressed = RuntimeEnvironment.getInstance().isCompressXref(); File xrefFile = whatXrefFile(path, compressed); File parentFile = xrefFile.getParentFile(); // If mkdirs() returns false, the failure is most likely // because the file already exists. But to check for the // file first and only add it if it doesn't exists would // only increase the file IO... if (!parentFile.mkdirs()) { assert parentFile.exists(); } // Write to a pending file for later renaming. String xrefAbs = xrefFile.getAbsolutePath(); return xrefAbs; } /** * Get a writer to which the xref can be written, or null if no xref * should be produced for files of this type. */ private CountingWriter newXrefWriter(String path, File transientXref, boolean compressed) throws IOException { return new CountingWriter(new BufferedWriter(new OutputStreamWriter(compressed ? new GZIPOutputStream(new FileOutputStream(transientXref)) : new FileOutputStream(transientXref)))); } LockFactory pickLockFactory(RuntimeEnvironment env) { switch (env.getLuceneLocking()) { case ON: case SIMPLE: return SimpleFSLockFactory.INSTANCE; case NATIVE: return NativeFSLockFactory.INSTANCE; case OFF: default: return NoLockFactory.INSTANCE; } } private void finishWriting() throws IOException { boolean hasPendingCommit = false; try { writeAnalysisSettings(); LOGGER.log(Level.FINE, "preparing to commit changes to Lucene index"); // TODO add info about which database writer.prepareCommit(); hasPendingCommit = true; int n = completer.complete(); // TODO: add elapsed LOGGER.log(Level.FINE, "completed {0} object(s)", n); // Just before commit(), reset the `hasPendingCommit' flag, // since after commit() is called, there is no need for // rollback() regardless of success. hasPendingCommit = false; writer.commit(); } catch (RuntimeException | IOException e) { if (hasPendingCommit) { writer.rollback(); } LOGGER.log(Level.WARNING, "An error occurred while finishing writer and completer", e); throw e; } } /** * Verify TABSIZE, and evaluate AnalyzerGuru version together with ZVER -- * or return a value to indicate mismatch. * @param file the source file object * @param path the source file path * @return {@code false} if a mismatch is detected */ @VisibleForTesting boolean checkSettings(File file, String path) throws IOException { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); boolean outIsXrefWriter = false; // potential xref writer int reqTabSize = project != null && project.hasTabSizeSetting() ? project.getTabSize() : 0; Integer actTabSize = settings.getTabSize(); if (actTabSize != null && !actTabSize.equals(reqTabSize)) { LOGGER.log(Level.FINE, "Tabsize mismatch: {0}", path); return false; } int n = 0; postsIter = uidIter.postings(postsIter); while (postsIter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { ++n; // Read a limited-fields version of the document. Document doc = reader.document(postsIter.docID(), CHECK_FIELDS); if (doc == null) { LOGGER.log(Level.FINER, "No Document: {0}", path); continue; } long reqGuruVersion = AnalyzerGuru.getVersionNo(); Long actGuruVersion = settings.getAnalyzerGuruVersion(); /* * For an older OpenGrok index that does not yet have a defined, * stored analyzerGuruVersion, break so that no extra work is done. * After a re-index, the guru version check will be active. */ if (actGuruVersion == null) { break; } AbstractAnalyzer fa = null; String fileTypeName; if (actGuruVersion.equals(reqGuruVersion)) { fileTypeName = doc.get(QueryBuilder.TYPE); if (fileTypeName == null) { // (Should not get here, but break just in case.) LOGGER.log(Level.FINEST, "Missing TYPE field: {0}", path); break; } AnalyzerFactory fac = AnalyzerGuru.findByFileTypeName(fileTypeName); if (fac != null) { fa = fac.getAnalyzer(); } } else { /* * If the stored guru version does not match, re-verify the * selection of analyzer or return a value to indicate the * analyzer is now mis-matched. */ LOGGER.log(Level.FINER, "Guru version mismatch: {0}", path); fa = getAnalyzerFor(file, path); fileTypeName = fa.getFileTypeName(); String oldTypeName = doc.get(QueryBuilder.TYPE); if (!fileTypeName.equals(oldTypeName)) { if (LOGGER.isLoggable(Level.FINE)) { LOGGER.log(Level.FINE, "Changed {0} to {1}: {2}", new Object[]{oldTypeName, fileTypeName, path}); } return false; } } // Verify Analyzer version, or return a value to indicate mismatch. long reqVersion = AnalyzerGuru.getAnalyzerVersionNo(fileTypeName); Long actVersion = settings.getAnalyzerVersion(fileTypeName); if (actVersion == null || !actVersion.equals(reqVersion)) { if (LOGGER.isLoggable(Level.FINE)) { LOGGER.log(Level.FINE, "{0} version mismatch: {1}", new Object[]{fileTypeName, path}); } return false; } if (fa != null) { outIsXrefWriter = true; } // The versions checks have passed. break; } if (n < 1) { LOGGER.log(Level.FINER, "Missing index Documents: {0}", path); return false; } // If the economy mode is on, this should be treated as a match. if (!env.isGenerateHtml()) { if (xrefExistsFor(path)) { LOGGER.log(Level.FINEST, "Extraneous {0} , removing its xref file", path); removeXrefFile(path); } return true; } return (!outIsXrefWriter || xrefExistsFor(path)); } private void writeAnalysisSettings() throws IOException { settings = new IndexAnalysisSettings3(); settings.setProjectName(project != null ? project.getName() : null); settings.setTabSize(project != null && project.hasTabSizeSetting() ? project.getTabSize() : 0); settings.setAnalyzerGuruVersion(AnalyzerGuru.getVersionNo()); settings.setAnalyzersVersions(AnalyzerGuru.getAnalyzersVersionNos()); settings.setIndexedSymlinks(indexedSymlinks); IndexAnalysisSettingsAccessor dao = new IndexAnalysisSettingsAccessor(); dao.write(writer, settings); } private IndexAnalysisSettings3 readAnalysisSettings() throws IOException { IndexAnalysisSettingsAccessor dao = new IndexAnalysisSettingsAccessor(); return dao.read(reader); } private boolean xrefExistsFor(String path) { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); File xrefFile = whatXrefFile(path, env.isCompressXref()); if (!xrefFile.exists()) { LOGGER.log(Level.FINEST, "Missing {0}", xrefFile); return false; } return true; } private static class AcceptSymlinkRet { String localRelPath; } }