xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchEngine.java (revision 70091cc094f5cc7eac3b7666324e952b7be992ab)
1b5840353SAdam Hornáček /*
2b5840353SAdam Hornáček  * CDDL HEADER START
3b5840353SAdam Hornáček  *
4b5840353SAdam Hornáček  * The contents of this file are subject to the terms of the
5b5840353SAdam Hornáček  * Common Development and Distribution License (the "License").
6b5840353SAdam Hornáček  * You may not use this file except in compliance with the License.
7b5840353SAdam Hornáček  *
8b5840353SAdam Hornáček  * See LICENSE.txt included in this distribution for the specific
9b5840353SAdam Hornáček  * language governing permissions and limitations under the License.
10b5840353SAdam Hornáček  *
11b5840353SAdam Hornáček  * When distributing Covered Code, include this CDDL HEADER in each
12b5840353SAdam Hornáček  * file and include the License file at LICENSE.txt.
13b5840353SAdam Hornáček  * If applicable, add the following below this CDDL HEADER, with the
14b5840353SAdam Hornáček  * fields enclosed by brackets "[]" replaced with your own identifying
15b5840353SAdam Hornáček  * information: Portions Copyright [yyyy] [name of copyright owner]
16b5840353SAdam Hornáček  *
17b5840353SAdam Hornáček  * CDDL HEADER END
18b5840353SAdam Hornáček  */
19b5840353SAdam Hornáček 
20b5840353SAdam Hornáček  /*
214b8c49e9SVladimir Kotal  * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
22b5840353SAdam Hornáček  * Portions Copyright (c) 2018, Chris Fraire <cfraire@me.com>.
23b5840353SAdam Hornáček  */
249805b761SAdam Hornáček package org.opengrok.indexer.search;
25b5840353SAdam Hornáček 
26b5840353SAdam Hornáček import java.io.BufferedReader;
27b5840353SAdam Hornáček import java.io.File;
28b5840353SAdam Hornáček import java.io.FileInputStream;
29b5840353SAdam Hornáček import java.io.FileNotFoundException;
30b5840353SAdam Hornáček import java.io.FileReader;
31b5840353SAdam Hornáček import java.io.IOException;
32b5840353SAdam Hornáček import java.io.InputStreamReader;
33b5840353SAdam Hornáček import java.io.Reader;
34b5840353SAdam Hornáček import java.nio.charset.StandardCharsets;
35b5840353SAdam Hornáček import java.util.ArrayList;
36b5840353SAdam Hornáček import java.util.List;
37b5840353SAdam Hornáček import java.util.SortedSet;
38b5840353SAdam Hornáček import java.util.TreeSet;
39b5840353SAdam Hornáček import java.util.logging.Level;
40b5840353SAdam Hornáček import java.util.logging.Logger;
41b5840353SAdam Hornáček import java.util.zip.GZIPInputStream;
42b5840353SAdam Hornáček import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
43b5840353SAdam Hornáček import org.apache.lucene.document.Document;
44b5840353SAdam Hornáček import org.apache.lucene.index.DirectoryReader;
45b5840353SAdam Hornáček import org.apache.lucene.index.IndexReader;
46b5840353SAdam Hornáček import org.apache.lucene.index.IndexableField;
47b5840353SAdam Hornáček import org.apache.lucene.index.MultiReader;
48b5840353SAdam Hornáček import org.apache.lucene.queryparser.classic.ParseException;
49b5840353SAdam Hornáček import org.apache.lucene.search.IndexSearcher;
50b5840353SAdam Hornáček import org.apache.lucene.search.Query;
51b5840353SAdam Hornáček import org.apache.lucene.search.ScoreDoc;
52b5840353SAdam Hornáček import org.apache.lucene.search.TopScoreDocCollector;
53b5840353SAdam Hornáček import org.apache.lucene.store.FSDirectory;
54b5840353SAdam Hornáček import org.apache.lucene.util.Version;
5557eefa47SKryštof Tulinger import org.opengrok.indexer.analysis.AbstractAnalyzer;
569805b761SAdam Hornáček import org.opengrok.indexer.analysis.CompatibleAnalyser;
579805b761SAdam Hornáček import org.opengrok.indexer.analysis.Definitions;
589805b761SAdam Hornáček import org.opengrok.indexer.analysis.Scopes;
599805b761SAdam Hornáček import org.opengrok.indexer.configuration.Project;
609805b761SAdam Hornáček import org.opengrok.indexer.configuration.RuntimeEnvironment;
619805b761SAdam Hornáček import org.opengrok.indexer.configuration.SuperIndexSearcher;
629805b761SAdam Hornáček import org.opengrok.indexer.history.HistoryException;
639805b761SAdam Hornáček import org.opengrok.indexer.index.IndexDatabase;
649805b761SAdam Hornáček import org.opengrok.indexer.logger.LoggerFactory;
659805b761SAdam Hornáček import org.opengrok.indexer.search.Summary.Fragment;
669805b761SAdam Hornáček import org.opengrok.indexer.search.context.Context;
679805b761SAdam Hornáček import org.opengrok.indexer.search.context.HistoryContext;
68ef6b5de2SVladimir Kotal import org.opengrok.indexer.util.Statistics;
694da26a1eSChris Fraire import org.opengrok.indexer.util.TandemPath;
709805b761SAdam Hornáček import org.opengrok.indexer.web.Prefix;
71b5840353SAdam Hornáček 
72b5840353SAdam Hornáček /**
734b8c49e9SVladimir Kotal  * This is an encapsulation of the details on how to search in the index database.
744b8c49e9SVladimir Kotal  * This is used for searching via the REST API.
75b5840353SAdam Hornáček  *
76b5840353SAdam Hornáček  * @author Trond Norbye 2005
77b5840353SAdam Hornáček  * @author Lubos Kosco - upgrade to lucene 3.x, 4.x, 5.x
78b5840353SAdam Hornáček  */
79b5840353SAdam Hornáček public class SearchEngine {
80b5840353SAdam Hornáček 
81b5840353SAdam Hornáček     private static final Logger LOGGER = LoggerFactory.getLogger(SearchEngine.class);
82b5840353SAdam Hornáček 
83b5840353SAdam Hornáček     /**
84b5840353SAdam Hornáček      * Message text used when logging exceptions thrown when searching.
85b5840353SAdam Hornáček      */
86b5840353SAdam Hornáček     private static final String SEARCH_EXCEPTION_MSG = "Exception searching {0}";
87b5840353SAdam Hornáček     //NOTE below will need to be changed after new lucene upgrade, if they
88b5840353SAdam Hornáček     //increase the version - every change of below makes us incompatible with the
89b5840353SAdam Hornáček     //old index and we need to ask for reindex
90b5840353SAdam Hornáček     /**
91ff44f24aSAdam Hornáček      * Version of Lucene index common for the whole application.
92b5840353SAdam Hornáček      */
93b5840353SAdam Hornáček     public static final Version LUCENE_VERSION = Version.LATEST;
94b5840353SAdam Hornáček     public static final String LUCENE_VERSION_HELP = LUCENE_VERSION.major + "_" + LUCENE_VERSION.minor + "_" + LUCENE_VERSION.bugfix;
95b5840353SAdam Hornáček     /**
96b5840353SAdam Hornáček      * Holds value of property definition.
97b5840353SAdam Hornáček      */
98b5840353SAdam Hornáček     private String definition;
99b5840353SAdam Hornáček     /**
100b5840353SAdam Hornáček      * Holds value of property file.
101b5840353SAdam Hornáček      */
102b5840353SAdam Hornáček     private String file;
103b5840353SAdam Hornáček     /**
104b5840353SAdam Hornáček      * Holds value of property freetext.
105b5840353SAdam Hornáček      */
106b5840353SAdam Hornáček     private String freetext;
107b5840353SAdam Hornáček     /**
108b5840353SAdam Hornáček      * Holds value of property history.
109b5840353SAdam Hornáček      */
110b5840353SAdam Hornáček     private String history;
111b5840353SAdam Hornáček     /**
112b5840353SAdam Hornáček      * Holds value of property symbol.
113b5840353SAdam Hornáček      */
114b5840353SAdam Hornáček     private String symbol;
115b5840353SAdam Hornáček     /**
116ff44f24aSAdam Hornáček      * Holds value of property type.
117b5840353SAdam Hornáček      */
118b5840353SAdam Hornáček     private String type;
119b5840353SAdam Hornáček     /**
120b5840353SAdam Hornáček      * Holds value of property indexDatabase.
121b5840353SAdam Hornáček      */
122b5840353SAdam Hornáček     private Query query;
123b5840353SAdam Hornáček     private QueryBuilder queryBuilder;
124b5840353SAdam Hornáček     private final CompatibleAnalyser analyzer = new CompatibleAnalyser();
125b5840353SAdam Hornáček     private Context sourceContext;
126b5840353SAdam Hornáček     private HistoryContext historyContext;
127b5840353SAdam Hornáček     private Summarizer summarizer;
128b5840353SAdam Hornáček     // internal structure to hold the results from lucene
129b5840353SAdam Hornáček     private final List<Document> docs;
130b5840353SAdam Hornáček     private final char[] content = new char[1024 * 8];
131b5840353SAdam Hornáček     private String source;
132b5840353SAdam Hornáček     private String data;
133b5840353SAdam Hornáček     int hitsPerPage = RuntimeEnvironment.getInstance().getHitsPerPage();
134b5840353SAdam Hornáček     int cachePages = RuntimeEnvironment.getInstance().getCachePages();
135b5840353SAdam Hornáček     int totalHits = 0;
136b5840353SAdam Hornáček     private ScoreDoc[] hits;
137b5840353SAdam Hornáček     private TopScoreDocCollector collector;
138b5840353SAdam Hornáček     private IndexSearcher searcher;
139b5840353SAdam Hornáček     boolean allCollected;
140b5840353SAdam Hornáček     private final ArrayList<SuperIndexSearcher> searcherList = new ArrayList<>();
141b5840353SAdam Hornáček 
142b5840353SAdam Hornáček     /**
143ff44f24aSAdam Hornáček      * Creates a new instance of SearchEngine.
144b5840353SAdam Hornáček      */
SearchEngine()145b5840353SAdam Hornáček     public SearchEngine() {
146b5840353SAdam Hornáček         docs = new ArrayList<>();
147b5840353SAdam Hornáček     }
148b5840353SAdam Hornáček 
149b5840353SAdam Hornáček     /**
150b5840353SAdam Hornáček      * Create a QueryBuilder using the fields that have been set on this
151b5840353SAdam Hornáček      * SearchEngine.
152b5840353SAdam Hornáček      *
153b5840353SAdam Hornáček      * @return a query builder
154b5840353SAdam Hornáček      */
createQueryBuilder()155b5840353SAdam Hornáček     private QueryBuilder createQueryBuilder() {
156b5840353SAdam Hornáček         return new QueryBuilder()
157b5840353SAdam Hornáček                 .setFreetext(freetext)
158b5840353SAdam Hornáček                 .setDefs(definition)
159b5840353SAdam Hornáček                 .setRefs(symbol)
160b5840353SAdam Hornáček                 .setPath(file)
161b5840353SAdam Hornáček                 .setHist(history)
162b5840353SAdam Hornáček                 .setType(type);
163b5840353SAdam Hornáček     }
164b5840353SAdam Hornáček 
isValidQuery()165b5840353SAdam Hornáček     public boolean isValidQuery() {
166b5840353SAdam Hornáček         boolean ret;
167b5840353SAdam Hornáček         try {
168b5840353SAdam Hornáček             query = createQueryBuilder().build();
169b5840353SAdam Hornáček             ret = (query != null);
170b5840353SAdam Hornáček         } catch (ParseException e) {
171b5840353SAdam Hornáček             ret = false;
172b5840353SAdam Hornáček         }
173b5840353SAdam Hornáček 
174b5840353SAdam Hornáček         return ret;
175b5840353SAdam Hornáček     }
176b5840353SAdam Hornáček 
177b5840353SAdam Hornáček     /**
178b5840353SAdam Hornáček      * Search one index. This is used if no projects are set up.
179b5840353SAdam Hornáček      * @param paging whether to use paging (if yes, first X pages will load
180b5840353SAdam Hornáček      * faster)
181b5840353SAdam Hornáček      * @param root which db to search
182b5840353SAdam Hornáček      * @throws IOException
183b5840353SAdam Hornáček      */
searchSingleDatabase(File root, boolean paging)184b5840353SAdam Hornáček     private void searchSingleDatabase(File root, boolean paging) throws IOException {
185b5840353SAdam Hornáček         IndexReader ireader = DirectoryReader.open(FSDirectory.open(root.toPath()));
186b5840353SAdam Hornáček         searcher = new IndexSearcher(ireader);
1871204ce31SVladimir Kotal         searchIndex(searcher, paging);
188b5840353SAdam Hornáček     }
189b5840353SAdam Hornáček 
190b5840353SAdam Hornáček     /**
191b5840353SAdam Hornáček      * Perform search on multiple indexes in parallel.
192b5840353SAdam Hornáček      * @param paging whether to use paging (if yes, first X pages will load
193b5840353SAdam Hornáček      * faster)
194b5840353SAdam Hornáček      * @param root list of projects to search
195b5840353SAdam Hornáček      * @throws IOException
196b5840353SAdam Hornáček      */
searchMultiDatabase(List<Project> root, boolean paging)197b5840353SAdam Hornáček     private void searchMultiDatabase(List<Project> root, boolean paging) throws IOException {
198b5840353SAdam Hornáček         SortedSet<String> projects = new TreeSet<>();
199b5840353SAdam Hornáček         for (Project p : root) {
200b5840353SAdam Hornáček             projects.add(p.getName());
201b5840353SAdam Hornáček         }
202b5840353SAdam Hornáček 
203b5840353SAdam Hornáček         // We use MultiReader even for single project. This should
204b5840353SAdam Hornáček         // not matter given that MultiReader is just a cheap wrapper
205b5840353SAdam Hornáček         // around set of IndexReader objects.
206b5840353SAdam Hornáček         MultiReader searchables = RuntimeEnvironment.getInstance().
207b5840353SAdam Hornáček             getMultiReader(projects, searcherList);
208b5840353SAdam Hornáček         searcher = new IndexSearcher(searchables);
2091204ce31SVladimir Kotal         searchIndex(searcher, paging);
2101204ce31SVladimir Kotal     }
2111204ce31SVladimir Kotal 
searchIndex(IndexSearcher searcher, boolean paging)2121204ce31SVladimir Kotal     private void searchIndex(IndexSearcher searcher, boolean paging) throws IOException {
2134cf88309SLubos Kosco         collector = TopScoreDocCollector.create(hitsPerPage * cachePages, Short.MAX_VALUE);
214ef6b5de2SVladimir Kotal         Statistics stat = new Statistics();
215b5840353SAdam Hornáček         searcher.search(query, collector);
216b5840353SAdam Hornáček         totalHits = collector.getTotalHits();
217ef6b5de2SVladimir Kotal         stat.report(LOGGER, Level.FINEST, "search via SearchEngine done",
218ef6b5de2SVladimir Kotal                 "search.latency", new String[]{"category", "engine",
219ef6b5de2SVladimir Kotal                         "outcome", totalHits > 0 ? "success" : "empty"});
220b5840353SAdam Hornáček         if (!paging && totalHits > 0) {
2214cf88309SLubos Kosco             collector = TopScoreDocCollector.create(totalHits, Short.MAX_VALUE);
222b5840353SAdam Hornáček             searcher.search(query, collector);
223b5840353SAdam Hornáček         }
224b5840353SAdam Hornáček         hits = collector.topDocs().scoreDocs;
225b5840353SAdam Hornáček         for (ScoreDoc hit : hits) {
226b5840353SAdam Hornáček             int docId = hit.doc;
227b5840353SAdam Hornáček             Document d = searcher.doc(docId);
228b5840353SAdam Hornáček             docs.add(d);
229b5840353SAdam Hornáček         }
230b5840353SAdam Hornáček     }
231b5840353SAdam Hornáček 
232b5840353SAdam Hornáček     /**
233b5840353SAdam Hornáček      * Gets the instance from {@code search(...)} if it was called.
234b5840353SAdam Hornáček      * @return defined instance or {@code null}
235b5840353SAdam Hornáček      */
getQuery()236b5840353SAdam Hornáček     public String getQuery() {
237b5840353SAdam Hornáček         return query != null ? query.toString() : null;
238b5840353SAdam Hornáček     }
239b5840353SAdam Hornáček 
240b5840353SAdam Hornáček     /**
241b5840353SAdam Hornáček      * Gets the instance from {@code search(...)} if it was called.
242b5840353SAdam Hornáček      * @return defined instance or {@code null}
243b5840353SAdam Hornáček      */
getQueryObject()244b5840353SAdam Hornáček     public Query getQueryObject() {
245b5840353SAdam Hornáček         return query;
246b5840353SAdam Hornáček     }
247b5840353SAdam Hornáček 
248b5840353SAdam Hornáček     /**
249b5840353SAdam Hornáček      * Gets the builder from {@code search(...)} if it was called.
250b5840353SAdam Hornáček      * <p>
251b5840353SAdam Hornáček      * (Modifying the builder will have no effect on this
252b5840353SAdam Hornáček      * {@link SearchEngine}.)
253b5840353SAdam Hornáček      * @return defined instance or {@code null}
254b5840353SAdam Hornáček      */
getQueryBuilder()255b5840353SAdam Hornáček     public QueryBuilder getQueryBuilder() {
256b5840353SAdam Hornáček         return queryBuilder;
257b5840353SAdam Hornáček     }
258b5840353SAdam Hornáček 
259b5840353SAdam Hornáček     /**
260b5840353SAdam Hornáček      * Gets the searcher from {@code search(...)} if it was called.
261b5840353SAdam Hornáček      * @return defined instance or {@code null}
262b5840353SAdam Hornáček      */
getSearcher()263b5840353SAdam Hornáček     public IndexSearcher getSearcher() {
264b5840353SAdam Hornáček         return searcher;
265b5840353SAdam Hornáček     }
266b5840353SAdam Hornáček 
267b5840353SAdam Hornáček     /**
268b5840353SAdam Hornáček      * Execute a search aware of current request, limited to specific project names.
269b5840353SAdam Hornáček      *
270b5840353SAdam Hornáček      * This filters out all projects which are not allowed for the current request.
271b5840353SAdam Hornáček      *
272b5840353SAdam Hornáček      * Before calling this function,
273b5840353SAdam Hornáček      * you must set the appropriate search criteria with the set-functions. Note
274b5840353SAdam Hornáček      * that this search will return the first cachePages of hitsPerPage, for
275b5840353SAdam Hornáček      * more you need to call more.
276b5840353SAdam Hornáček      *
277b5840353SAdam Hornáček      * Call to search() must be eventually followed by call to destroy()
278b5840353SAdam Hornáček      * so that IndexSearcher objects are properly freed.
279b5840353SAdam Hornáček      *
280d8a7afe2SAdam Hornacek      * @param projects projects to search
281b5840353SAdam Hornáček      * @return The number of hits
282b5840353SAdam Hornáček      */
search(List<Project> projects)283d8a7afe2SAdam Hornacek     public int search(List<Project> projects) {
284d8a7afe2SAdam Hornacek         return search(projects, new File(RuntimeEnvironment.getInstance().getDataRootFile(), IndexDatabase.INDEX_DIR));
285b5840353SAdam Hornáček     }
286b5840353SAdam Hornáček 
287b5840353SAdam Hornáček     /**
288b5840353SAdam Hornáček      * Execute a search without authorization.
289b5840353SAdam Hornáček      *
290b5840353SAdam Hornáček      * Before calling this function, you must set the
291b5840353SAdam Hornáček      * appropriate search criteria with the set-functions. Note that this search
292b5840353SAdam Hornáček      * will return the first cachePages of hitsPerPage, for more you need to
293b5840353SAdam Hornáček      * call more.
294b5840353SAdam Hornáček      *
295b5840353SAdam Hornáček      * Call to search() must be eventually followed by call to destroy()
296b5840353SAdam Hornáček      * so that IndexSearcher objects are properly freed.
297b5840353SAdam Hornáček      *
298b5840353SAdam Hornáček      * @return The number of hits
299b5840353SAdam Hornáček      */
search()300b5840353SAdam Hornáček     public int search() {
301b5840353SAdam Hornáček         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
302b5840353SAdam Hornáček         return search(
303b5840353SAdam Hornáček                 env.hasProjects() ? env.getProjectList() : new ArrayList<>(),
304b5840353SAdam Hornáček                 new File(env.getDataRootFile(), IndexDatabase.INDEX_DIR));
305b5840353SAdam Hornáček     }
306b5840353SAdam Hornáček 
307b5840353SAdam Hornáček     /**
308b5840353SAdam Hornáček      * Execute a search on projects or root file.
309b5840353SAdam Hornáček      *
310b5840353SAdam Hornáček      * If @param projects is an empty list it tries to search in @code
311b5840353SAdam Hornáček      * searchSingleDatabase with root set to @param root
312b5840353SAdam Hornáček      *
313b5840353SAdam Hornáček      * Call to search() must be eventually followed by call to destroy()
314b5840353SAdam Hornáček      * so that IndexSearcher objects are properly freed.
315b5840353SAdam Hornáček      *
316b5840353SAdam Hornáček      * @return The number of hits
317b5840353SAdam Hornáček      */
search(List<Project> projects, File root)318b5840353SAdam Hornáček     private int search(List<Project> projects, File root) {
319b5840353SAdam Hornáček         source = RuntimeEnvironment.getInstance().getSourceRootPath();
320b5840353SAdam Hornáček         data = RuntimeEnvironment.getInstance().getDataRootPath();
321b5840353SAdam Hornáček         docs.clear();
322b5840353SAdam Hornáček 
323b5840353SAdam Hornáček         QueryBuilder newBuilder = createQueryBuilder();
324b5840353SAdam Hornáček         try {
325b5840353SAdam Hornáček             query = newBuilder.build();
326b5840353SAdam Hornáček             if (query != null) {
327b5840353SAdam Hornáček 
328b5840353SAdam Hornáček                 if (projects.isEmpty()) {
329b5840353SAdam Hornáček                     // search the index database
330b5840353SAdam Hornáček                     //NOTE this assumes that src does not contain any project, just
331b5840353SAdam Hornáček                     // data files - so no authorization can be enforced
332b5840353SAdam Hornáček                     searchSingleDatabase(root, true);
333b5840353SAdam Hornáček                 } else {
334b5840353SAdam Hornáček                     // search all projects
335b5840353SAdam Hornáček                     //TODO support paging per project (in search.java)
336b5840353SAdam Hornáček                     //TODO optimize if only one project by falling back to SingleDatabase ?
337b5840353SAdam Hornáček                     //NOTE projects are already filtered if we accessed through web page @see search(HttpServletRequest)
338b5840353SAdam Hornáček                     searchMultiDatabase(projects, false);
339b5840353SAdam Hornáček                 }
340b5840353SAdam Hornáček             }
341b5840353SAdam Hornáček         } catch (Exception e) {
342b5840353SAdam Hornáček             LOGGER.log(
343b5840353SAdam Hornáček                     Level.WARNING, SEARCH_EXCEPTION_MSG, e);
344b5840353SAdam Hornáček         }
345b5840353SAdam Hornáček 
346b5840353SAdam Hornáček         if (!docs.isEmpty()) {
347b5840353SAdam Hornáček             sourceContext = null;
348b5840353SAdam Hornáček             summarizer = null;
349b5840353SAdam Hornáček             try {
350b5840353SAdam Hornáček                 sourceContext = new Context(query, newBuilder);
351b5840353SAdam Hornáček                 if (sourceContext.isEmpty()) {
352b5840353SAdam Hornáček                     sourceContext = null;
353b5840353SAdam Hornáček                 }
354b5840353SAdam Hornáček                 summarizer = new Summarizer(query, analyzer);
355b5840353SAdam Hornáček             } catch (Exception e) {
356b5840353SAdam Hornáček                 LOGGER.log(Level.WARNING, "An error occurred while creating summary", e);
357b5840353SAdam Hornáček             }
358b5840353SAdam Hornáček 
359b5840353SAdam Hornáček             historyContext = null;
360b5840353SAdam Hornáček             try {
361b5840353SAdam Hornáček                 historyContext = new HistoryContext(query);
362b5840353SAdam Hornáček                 if (historyContext.isEmpty()) {
363b5840353SAdam Hornáček                     historyContext = null;
364b5840353SAdam Hornáček                 }
365b5840353SAdam Hornáček             } catch (Exception e) {
366b5840353SAdam Hornáček                 LOGGER.log(Level.WARNING, "An error occurred while getting history context", e);
367b5840353SAdam Hornáček             }
368b5840353SAdam Hornáček         }
369b5840353SAdam Hornáček         int count = hits == null ? 0 : hits.length;
370b5840353SAdam Hornáček         queryBuilder = newBuilder;
371b5840353SAdam Hornáček         return count;
372b5840353SAdam Hornáček     }
373b5840353SAdam Hornáček 
374b5840353SAdam Hornáček     /**
375b5840353SAdam Hornáček      * Gets the queried score docs from {@code search(...)} if it was called.
376b5840353SAdam Hornáček      * @return a defined instance if a query succeeded, or {@code null}
377b5840353SAdam Hornáček      */
scoreDocs()378b5840353SAdam Hornáček     public ScoreDoc[] scoreDocs() {
379b5840353SAdam Hornáček         return hits;
380b5840353SAdam Hornáček     }
381b5840353SAdam Hornáček 
382b5840353SAdam Hornáček     /**
383b5840353SAdam Hornáček      * Gets the document of the specified {@code docId} from
384b5840353SAdam Hornáček      * {@code search(...)} if it was called.
38581b586e6SVladimir Kotal      *
38681b586e6SVladimir Kotal      * @param docId document ID
387b5840353SAdam Hornáček      * @return a defined instance if a query succeeded
388b5840353SAdam Hornáček      * @throws java.io.IOException if an error occurs obtaining the Lucene
389b5840353SAdam Hornáček      * document by ID
390b5840353SAdam Hornáček      */
doc(int docId)391b5840353SAdam Hornáček     public Document doc(int docId) throws IOException {
392b5840353SAdam Hornáček         if (searcher == null) {
393b5840353SAdam Hornáček             throw new IllegalStateException("search(...) did not succeed");
394b5840353SAdam Hornáček         }
395b5840353SAdam Hornáček         return searcher.doc(docId);
396b5840353SAdam Hornáček     }
397b5840353SAdam Hornáček 
398b5840353SAdam Hornáček     /**
399ba599c91SVladimir Kotal      * Get results , if no search was started before, no results are returned.
400ba599c91SVladimir Kotal      * This method will requery if {@code end} is more than first query from search,
401b5840353SAdam Hornáček      * hence performance hit applies, if you want results in later pages than
402ba599c91SVladimir Kotal      * number of cachePages. {@code end} has to be bigger than {@code start} !
403b5840353SAdam Hornáček      *
404b5840353SAdam Hornáček      * @param start start of the hit list
405b5840353SAdam Hornáček      * @param end end of the hit list
406b5840353SAdam Hornáček      * @param ret list of results from start to end or null/empty if no search
407b5840353SAdam Hornáček      * was started
408b5840353SAdam Hornáček      */
results(int start, int end, List<Hit> ret)409b5840353SAdam Hornáček     public void results(int start, int end, List<Hit> ret) {
410b5840353SAdam Hornáček 
411b5840353SAdam Hornáček         //return if no start search() was done
412b5840353SAdam Hornáček         if (hits == null || (end < start)) {
413b5840353SAdam Hornáček             ret.clear();
414b5840353SAdam Hornáček             return;
415b5840353SAdam Hornáček         }
416b5840353SAdam Hornáček 
417b5840353SAdam Hornáček         ret.clear();
418b5840353SAdam Hornáček 
419b5840353SAdam Hornáček         // TODO check if below fits for if end=old hits.length, or it should include it
420ba599c91SVladimir Kotal         if (end > hits.length && !allCollected) {
421b5840353SAdam Hornáček             //do the requery, we want more than 5 pages
4224cf88309SLubos Kosco             collector = TopScoreDocCollector.create(totalHits, Short.MAX_VALUE);
423b5840353SAdam Hornáček             try {
424b5840353SAdam Hornáček                 searcher.search(query, collector);
425b5840353SAdam Hornáček             } catch (Exception e) { // this exception should never be hit, since search() will hit this before
426b5840353SAdam Hornáček                 LOGGER.log(
427b5840353SAdam Hornáček                         Level.WARNING, SEARCH_EXCEPTION_MSG, e);
428b5840353SAdam Hornáček             }
429b5840353SAdam Hornáček             hits = collector.topDocs().scoreDocs;
430b5840353SAdam Hornáček             Document d = null;
431b5840353SAdam Hornáček             for (int i = start; i < hits.length; i++) {
432b5840353SAdam Hornáček                 int docId = hits[i].doc;
433b5840353SAdam Hornáček                 try {
434b5840353SAdam Hornáček                     d = searcher.doc(docId);
435b5840353SAdam Hornáček                 } catch (Exception e) {
436b5840353SAdam Hornáček                     LOGGER.log(
437b5840353SAdam Hornáček                             Level.SEVERE, SEARCH_EXCEPTION_MSG, e);
438b5840353SAdam Hornáček                 }
439b5840353SAdam Hornáček                 docs.add(d);
440b5840353SAdam Hornáček             }
441b5840353SAdam Hornáček             allCollected = true;
442b5840353SAdam Hornáček         }
443b5840353SAdam Hornáček 
444ff44f24aSAdam Hornáček         //TODO generation of ret(results) could be cashed and consumers of engine would just print them in whatever
445ff44f24aSAdam Hornáček         // form they need, this way we could get rid of docs
446b5840353SAdam Hornáček         // the only problem is that count of docs is usually smaller than number of results
447b5840353SAdam Hornáček         for (int ii = start; ii < end; ++ii) {
448b5840353SAdam Hornáček             boolean alt = (ii % 2 == 0);
449b5840353SAdam Hornáček             boolean hasContext = false;
450b5840353SAdam Hornáček             try {
451b5840353SAdam Hornáček                 Document doc = docs.get(ii);
452b5840353SAdam Hornáček                 String filename = doc.get(QueryBuilder.PATH);
453b5840353SAdam Hornáček 
45457eefa47SKryštof Tulinger                 AbstractAnalyzer.Genre genre = AbstractAnalyzer.Genre.get(doc.get(QueryBuilder.T));
455b5840353SAdam Hornáček                 Definitions tags = null;
456b5840353SAdam Hornáček                 IndexableField tagsField = doc.getField(QueryBuilder.TAGS);
457b5840353SAdam Hornáček                 if (tagsField != null) {
458b5840353SAdam Hornáček                     tags = Definitions.deserialize(tagsField.binaryValue().bytes);
459b5840353SAdam Hornáček                 }
460b5840353SAdam Hornáček                 Scopes scopes = null;
461b5840353SAdam Hornáček                 IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
462b5840353SAdam Hornáček                 if (scopesField != null) {
463b5840353SAdam Hornáček                     scopes = Scopes.deserialize(scopesField.binaryValue().bytes);
464b5840353SAdam Hornáček                 }
465b5840353SAdam Hornáček                 int nhits = docs.size();
466b5840353SAdam Hornáček 
467b5840353SAdam Hornáček                 if (sourceContext != null) {
468b5840353SAdam Hornáček                     sourceContext.toggleAlt();
469b5840353SAdam Hornáček                     try {
47057eefa47SKryštof Tulinger                         if (AbstractAnalyzer.Genre.PLAIN == genre && (source != null)) {
471b5840353SAdam Hornáček                             // SRCROOT is read with UTF-8 as a default.
472b5840353SAdam Hornáček                             hasContext = sourceContext.getContext(
473b5840353SAdam Hornáček                                 new InputStreamReader(new FileInputStream(
474b5840353SAdam Hornáček                                 source + filename), StandardCharsets.UTF_8),
475b5840353SAdam Hornáček                                 null, null, null, filename, tags, nhits > 100,
476*70091cc0SVladimir Kotal                                 getDefinition() != null, ret, scopes);
47757eefa47SKryštof Tulinger                         } else if (AbstractAnalyzer.Genre.XREFABLE == genre && data != null && summarizer != null) {
478b5840353SAdam Hornáček                             int l;
479b5840353SAdam Hornáček                             /**
480b5840353SAdam Hornáček                              * For backward compatibility, read the
481b5840353SAdam Hornáček                              * OpenGrok-produced document using the system
482b5840353SAdam Hornáček                              * default charset.
483b5840353SAdam Hornáček                              */
484b5840353SAdam Hornáček                             try (Reader r = RuntimeEnvironment.getInstance().isCompressXref()
4854da26a1eSChris Fraire                                     ? new HTMLStripCharFilter(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(
4864da26a1eSChris Fraire                                             TandemPath.join(data + Prefix.XREF_P + filename, ".gz"))))))
487b5840353SAdam Hornáček                                     : new HTMLStripCharFilter(new BufferedReader(new FileReader(data + Prefix.XREF_P + filename)))) {
488b5840353SAdam Hornáček                                 l = r.read(content);
489b5840353SAdam Hornáček                             }
490ff44f24aSAdam Hornáček                             //TODO FIX below fragmenter according to either summarizer or context
491ff44f24aSAdam Hornáček                             // (to get line numbers, might be hard, since xref writers will need to be fixed too,
492ff44f24aSAdam Hornáček                             // they generate just one line of html code now :( )
493b5840353SAdam Hornáček                             Summary sum = summarizer.getSummary(new String(content, 0, l));
494d1e826faSAdam Hornáček                             Fragment[] fragments = sum.getFragments();
495d1e826faSAdam Hornáček                             for (Fragment fragment : fragments) {
496d1e826faSAdam Hornáček                                 String match = fragment.toString();
497b5840353SAdam Hornáček                                 if (match.length() > 0) {
498d1e826faSAdam Hornáček                                     if (!fragment.isEllipsis()) {
499d1e826faSAdam Hornáček                                         Hit hit = new Hit(filename, fragment.toString(), "", true, alt);
500b5840353SAdam Hornáček                                         ret.add(hit);
501b5840353SAdam Hornáček                                     }
502b5840353SAdam Hornáček                                     hasContext = true;
503b5840353SAdam Hornáček                                 }
504b5840353SAdam Hornáček                             }
505b5840353SAdam Hornáček                         } else {
506b5840353SAdam Hornáček                             LOGGER.log(Level.WARNING, "Unknown genre: {0} for {1}", new Object[]{genre, filename});
507b5840353SAdam Hornáček                             hasContext |= sourceContext.getContext(null, null, null, null, filename, tags, false, false, ret, scopes);
508b5840353SAdam Hornáček                         }
509b5840353SAdam Hornáček                     } catch (FileNotFoundException exp) {
510b5840353SAdam Hornáček                         LOGGER.log(Level.WARNING, "Couldn''t read summary from {0} ({1})", new Object[]{filename, exp.getMessage()});
511b5840353SAdam Hornáček                         hasContext |= sourceContext.getContext(null, null, null, null, filename, tags, false, false, ret, scopes);
512b5840353SAdam Hornáček                     }
513b5840353SAdam Hornáček                 }
514b5840353SAdam Hornáček                 if (historyContext != null) {
515b5840353SAdam Hornáček                     hasContext |= historyContext.getContext(source + filename, filename, ret);
516b5840353SAdam Hornáček                 }
517b5840353SAdam Hornáček                 if (!hasContext) {
518b5840353SAdam Hornáček                     ret.add(new Hit(filename, "...", "", false, alt));
519b5840353SAdam Hornáček                 }
520b5840353SAdam Hornáček             } catch (IOException | ClassNotFoundException | HistoryException e) {
521b5840353SAdam Hornáček                 LOGGER.log(
522b5840353SAdam Hornáček                         Level.WARNING, SEARCH_EXCEPTION_MSG, e);
523b5840353SAdam Hornáček             }
524b5840353SAdam Hornáček         }
525b5840353SAdam Hornáček     }
526b5840353SAdam Hornáček 
destroy()527b5840353SAdam Hornáček     public void destroy() {
528b5840353SAdam Hornáček         for (SuperIndexSearcher is : searcherList) {
529b5840353SAdam Hornáček             try {
530b5840353SAdam Hornáček                 is.getSearcherManager().release(is);
531b5840353SAdam Hornáček             } catch (IOException ex) {
532b5840353SAdam Hornáček                 LOGGER.log(Level.WARNING, "cannot release indexSearcher", ex);
533b5840353SAdam Hornáček             }
534b5840353SAdam Hornáček         }
535b5840353SAdam Hornáček     }
536b5840353SAdam Hornáček 
537b5840353SAdam Hornáček     /**
538b5840353SAdam Hornáček      * Getter for property definition.
539b5840353SAdam Hornáček      *
540b5840353SAdam Hornáček      * @return Value of property definition.
541b5840353SAdam Hornáček      */
getDefinition()542b5840353SAdam Hornáček     public String getDefinition() {
543b5840353SAdam Hornáček         return this.definition;
544b5840353SAdam Hornáček     }
545b5840353SAdam Hornáček 
546b5840353SAdam Hornáček     /**
547b5840353SAdam Hornáček      * Setter for property definition.
548b5840353SAdam Hornáček      *
549b5840353SAdam Hornáček      * @param definition New value of property definition.
550b5840353SAdam Hornáček      */
setDefinition(String definition)551b5840353SAdam Hornáček     public void setDefinition(String definition) {
552b5840353SAdam Hornáček         this.definition = definition;
553b5840353SAdam Hornáček     }
554b5840353SAdam Hornáček 
555b5840353SAdam Hornáček     /**
556b5840353SAdam Hornáček      * Getter for property file.
557b5840353SAdam Hornáček      *
558b5840353SAdam Hornáček      * @return Value of property file.
559b5840353SAdam Hornáček      */
getFile()560b5840353SAdam Hornáček     public String getFile() {
561b5840353SAdam Hornáček         return this.file;
562b5840353SAdam Hornáček     }
563b5840353SAdam Hornáček 
564b5840353SAdam Hornáček     /**
565b5840353SAdam Hornáček      * Setter for property file.
566b5840353SAdam Hornáček      *
567b5840353SAdam Hornáček      * @param file New value of property file.
568b5840353SAdam Hornáček      */
setFile(String file)569b5840353SAdam Hornáček     public void setFile(String file) {
570b5840353SAdam Hornáček         this.file = file;
571b5840353SAdam Hornáček     }
572b5840353SAdam Hornáček 
573b5840353SAdam Hornáček     /**
574b5840353SAdam Hornáček      * Getter for property freetext.
575b5840353SAdam Hornáček      *
576b5840353SAdam Hornáček      * @return Value of property freetext.
577b5840353SAdam Hornáček      */
getFreetext()578b5840353SAdam Hornáček     public String getFreetext() {
579b5840353SAdam Hornáček         return this.freetext;
580b5840353SAdam Hornáček     }
581b5840353SAdam Hornáček 
582b5840353SAdam Hornáček     /**
583b5840353SAdam Hornáček      * Setter for property freetext.
584b5840353SAdam Hornáček      *
585b5840353SAdam Hornáček      * @param freetext New value of property freetext.
586b5840353SAdam Hornáček      */
setFreetext(String freetext)587b5840353SAdam Hornáček     public void setFreetext(String freetext) {
588b5840353SAdam Hornáček         this.freetext = freetext;
589b5840353SAdam Hornáček     }
590b5840353SAdam Hornáček 
591b5840353SAdam Hornáček     /**
592b5840353SAdam Hornáček      * Getter for property history.
593b5840353SAdam Hornáček      *
594b5840353SAdam Hornáček      * @return Value of property history.
595b5840353SAdam Hornáček      */
getHistory()596b5840353SAdam Hornáček     public String getHistory() {
597b5840353SAdam Hornáček         return this.history;
598b5840353SAdam Hornáček     }
599b5840353SAdam Hornáček 
600b5840353SAdam Hornáček     /**
601b5840353SAdam Hornáček      * Setter for property history.
602b5840353SAdam Hornáček      *
603b5840353SAdam Hornáček      * @param history New value of property history.
604b5840353SAdam Hornáček      */
setHistory(String history)605b5840353SAdam Hornáček     public void setHistory(String history) {
606b5840353SAdam Hornáček         this.history = history;
607b5840353SAdam Hornáček     }
608b5840353SAdam Hornáček 
609b5840353SAdam Hornáček     /**
610b5840353SAdam Hornáček      * Getter for property symbol.
611b5840353SAdam Hornáček      *
612b5840353SAdam Hornáček      * @return Value of property symbol.
613b5840353SAdam Hornáček      */
getSymbol()614b5840353SAdam Hornáček     public String getSymbol() {
615b5840353SAdam Hornáček         return this.symbol;
616b5840353SAdam Hornáček     }
617b5840353SAdam Hornáček 
618b5840353SAdam Hornáček     /**
619b5840353SAdam Hornáček      * Setter for property symbol.
620b5840353SAdam Hornáček      *
621b5840353SAdam Hornáček      * @param symbol New value of property symbol.
622b5840353SAdam Hornáček      */
setSymbol(String symbol)623b5840353SAdam Hornáček     public void setSymbol(String symbol) {
624b5840353SAdam Hornáček         this.symbol = symbol;
625b5840353SAdam Hornáček     }
626b5840353SAdam Hornáček 
627b5840353SAdam Hornáček     /**
628b5840353SAdam Hornáček      * Getter for property type.
629b5840353SAdam Hornáček      *
630b5840353SAdam Hornáček      * @return Value of property type.
631b5840353SAdam Hornáček      */
getType()632b5840353SAdam Hornáček     public String getType() {
633b5840353SAdam Hornáček         return this.type;
634b5840353SAdam Hornáček     }
635b5840353SAdam Hornáček 
636b5840353SAdam Hornáček     /**
637b5840353SAdam Hornáček      * Setter for property type.
638b5840353SAdam Hornáček      *
639b5840353SAdam Hornáček      * @param fileType New value of property type.
640b5840353SAdam Hornáček      */
setType(String fileType)641b5840353SAdam Hornáček     public void setType(String fileType) {
642b5840353SAdam Hornáček         this.type = fileType;
643b5840353SAdam Hornáček     }
644b5840353SAdam Hornáček }
645