xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/Context.java (revision d6df19e1b22784c78f567cf74c42f18e3901b900)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2011, Jens Elkner.
23  * Portions Copyright (c) 2018, 2020, Chris Fraire <cfraire@me.com>.
24  */
25 package org.opengrok.indexer.search.context;
26 
27 import java.io.IOException;
28 import java.io.Reader;
29 import java.io.Writer;
30 import java.util.List;
31 import java.util.Map;
32 import java.util.TreeMap;
33 import java.util.logging.Level;
34 import java.util.logging.Logger;
35 
36 import org.apache.lucene.document.Document;
37 import org.apache.lucene.index.IndexableField;
38 import org.apache.lucene.search.IndexSearcher;
39 import org.apache.lucene.search.Query;
40 import org.opengrok.indexer.analysis.AbstractAnalyzer;
41 import org.opengrok.indexer.analysis.Definitions;
42 import org.opengrok.indexer.analysis.Scopes;
43 import org.opengrok.indexer.analysis.Scopes.Scope;
44 import org.opengrok.indexer.analysis.plain.PlainAnalyzerFactory;
45 import org.opengrok.indexer.configuration.RuntimeEnvironment;
46 import org.opengrok.indexer.logger.LoggerFactory;
47 import org.opengrok.indexer.search.Hit;
48 import org.opengrok.indexer.search.QueryBuilder;
49 import org.opengrok.indexer.util.IOUtils;
50 import org.opengrok.indexer.web.Util;
51 
52 /**
53  * This is supposed to get the matching lines from sourcefile.
54  * since Lucene does not easily give the match context.
55  */
56 public class Context {
57 
58     static final int MAXFILEREAD = 1024 * 1024;
59 
60     private static final Logger LOGGER = LoggerFactory.getLogger(Context.class);
61 
62     private final Query query;
63     private final QueryBuilder qbuilder;
64     private final LineMatcher[] m;
65     private final String queryAsURI;
66 
67     /**
68      * Map whose keys tell which fields to look for in the source file, and
69      * whose values tell if the field is case insensitive (true for
70      * insensitivity, false for sensitivity).
71      */
72     private static final Map<String, Boolean> TOKEN_FIELDS = Map.of(
73             QueryBuilder.FULL, Boolean.TRUE,
74             QueryBuilder.REFS, Boolean.FALSE,
75             QueryBuilder.DEFS, Boolean.FALSE
76     );
77 
78     /**
79      * Initializes a context generator for matchers derived from the specified
80      * {@code query} -- which might be {@code null} and result in
81      * {@link #isEmpty()} returning {@code true}.
82      * @param query the query to generate the result for
83      * @param qbuilder required builder used to create {@code query}
84      */
Context(Query query, QueryBuilder qbuilder)85     public Context(Query query, QueryBuilder qbuilder) {
86         if (qbuilder == null) {
87             throw new IllegalArgumentException("qbuilder is null");
88         }
89 
90         this.query = query;
91         this.qbuilder = qbuilder;
92         QueryMatchers qm = new QueryMatchers();
93         m = qm.getMatchers(query, TOKEN_FIELDS);
94         if (m != null) {
95             queryAsURI = buildQueryAsURI(qbuilder.getQueries());
96         } else {
97             queryAsURI = "";
98         }
99     }
100 
101     /**
102      * Toggles the alternating value (initially {@code true}).
103      */
toggleAlt()104     public void toggleAlt() {
105         alt = !alt;
106     }
107 
isEmpty()108     public boolean isEmpty() {
109         return m == null;
110     }
111 
112     /**
113      * Look for context for this instance's initialized query in a search result
114      * {@link Document}, and output according to the parameters.
115      * @param env required environment
116      * @param searcher required search that produced the document
117      * @param docId document ID for producing context
118      * @param dest required target to write
119      * @param urlPrefix prefix for links
120      * @param morePrefix optional link to more... page
121      * @param limit a value indicating if the number of matching lines should be
122      * limited. N.b. unlike
123      * {@link #getContext(java.io.Reader, java.io.Writer, java.lang.String, java.lang.String, java.lang.String,
124      * org.opengrok.indexer.analysis.Definitions, boolean, boolean, java.util.List, org.opengrok.indexer.analysis.Scopes)},
125      * the {@code limit} argument will not be interpreted w.r.t.
126      * {@link RuntimeEnvironment#isQuickContextScan()}.
127      * @param tabSize optional positive tab size that must accord with the value
128      * used when indexing or else postings may be wrongly shifted until
129      * re-indexing
130      * @return Did it get any matching context?
131      */
getContext2(RuntimeEnvironment env, IndexSearcher searcher, int docId, Appendable dest, String urlPrefix, String morePrefix, boolean limit, int tabSize)132     public boolean getContext2(RuntimeEnvironment env, IndexSearcher searcher,
133         int docId, Appendable dest, String urlPrefix, String morePrefix,
134         boolean limit, int tabSize) {
135 
136         if (isEmpty()) {
137             return false;
138         }
139 
140         Document doc;
141         try {
142             doc = searcher.doc(docId);
143         } catch (IOException e) {
144             LOGGER.log(Level.WARNING, "ERROR getting searcher doc(int)", e);
145             return false;
146         }
147 
148         Definitions tags = null;
149         try {
150             IndexableField tagsField = doc.getField(QueryBuilder.TAGS);
151             if (tagsField != null) {
152                 tags = Definitions.deserialize(tagsField.binaryValue().bytes);
153             }
154         } catch (ClassNotFoundException | IOException e) {
155             LOGGER.log(Level.WARNING, "ERROR Definitions.deserialize(...)", e);
156             return false;
157         }
158 
159         Scopes scopes;
160         try {
161             IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
162             if (scopesField != null) {
163                 scopes = Scopes.deserialize(scopesField.binaryValue().bytes);
164             } else {
165                 scopes = new Scopes();
166             }
167         } catch (ClassNotFoundException | IOException e) {
168             LOGGER.log(Level.WARNING, "ERROR Scopes.deserialize(...)", e);
169             return false;
170         }
171 
172         /*
173          * UnifiedHighlighter demands an analyzer "even if in some
174          * circumstances it isn't used"; here it is not meant to be used.
175          */
176         PlainAnalyzerFactory fac = PlainAnalyzerFactory.DEFAULT_INSTANCE;
177         AbstractAnalyzer anz = fac.getAnalyzer();
178 
179         String path = doc.get(QueryBuilder.PATH);
180         String pathE = Util.uriEncodePath(path);
181         String urlPrefixE = urlPrefix == null ? "" : Util.uriEncodePath(urlPrefix);
182         String moreURL = morePrefix == null ? null : Util.uriEncodePath(morePrefix) + pathE + "?" + queryAsURI;
183 
184         ContextArgs args = new ContextArgs(env.getContextSurround(), env.getContextLimit());
185         /*
186          * Lucene adds to the following value in FieldHighlighter, so avoid
187          * integer overflow by not using Integer.MAX_VALUE -- Short is good
188          * enough.
189          */
190         int linelimit = limit ? args.getContextLimit() : Short.MAX_VALUE;
191 
192         ContextFormatter formatter = new ContextFormatter(args);
193         formatter.setUrl(urlPrefixE + pathE);
194         formatter.setDefs(tags);
195         formatter.setScopes(scopes);
196         formatter.setMoreUrl(moreURL);
197         formatter.setMoreLimit(linelimit);
198 
199         OGKUnifiedHighlighter uhi = new OGKUnifiedHighlighter(env, searcher, anz);
200         uhi.setBreakIterator(StrictLineBreakIterator::new);
201         uhi.setFormatter(formatter);
202         uhi.setTabSize(tabSize);
203 
204         try {
205             List<String> fieldList = qbuilder.getContextFields();
206             String[] fields = fieldList.toArray(new String[0]);
207 
208             String res = uhi.highlightFieldsUnion(fields, query, docId,
209                 linelimit);
210             if (res != null) {
211                 dest.append(res);
212                 return true;
213             }
214         } catch (IOException e) {
215             LOGGER.log(Level.WARNING, "ERROR highlightFieldsUnion(...)", e);
216             // Continue below.
217         } catch (Throwable e) {
218             LOGGER.log(Level.SEVERE, "ERROR highlightFieldsUnion(...)", e);
219             throw e;
220         }
221         return false;
222     }
223 
224     /**
225      * Build the {@code queryAsURI} string that holds the query in a form
226      * that's suitable for sending it as part of a URI.
227      *
228      * @param subqueries a map containing the query text for each field
229      */
buildQueryAsURI(Map<String, String> subqueries)230     private String buildQueryAsURI(Map<String, String> subqueries) {
231         if (subqueries.isEmpty()) {
232             return "";
233         }
234         StringBuilder sb = new StringBuilder();
235         for (Map.Entry<String, String> entry : subqueries.entrySet()) {
236             String field = entry.getKey();
237             String queryText = entry.getValue();
238             sb.append(field).append("=").append(Util.uriEncode(queryText)).append('&');
239         }
240         sb.setLength(sb.length() - 1);
241         return sb.toString();
242     }
243 
244     private boolean alt = true;
245 
getContext(Reader in, Writer out, String urlPrefix, String morePrefix, String path, Definitions tags, boolean limit, boolean isDefSearch, List<Hit> hits)246     public boolean getContext(Reader in, Writer out, String urlPrefix,
247         String morePrefix, String path, Definitions tags,
248         boolean limit, boolean isDefSearch, List<Hit> hits) {
249         return getContext(in, out, urlPrefix, morePrefix, path, tags, limit, isDefSearch, hits, null);
250     }
251     /**
252      * ???.
253      * Closes the given <var>in</var> reader on return.
254      *
255      * @param in File to be matched
256      * @param out to write the context
257      * @param urlPrefix URL prefix
258      * @param morePrefix to link to more... page
259      * @param path path of the file
260      * @param tags format to highlight defs.
261      * @param limit should the number of matching lines be limited?
262      * @param isDefSearch is definition search
263      * @param hits list of hits
264      * @param scopes scopes object
265      * @return Did it get any matching context?
266      */
getContext(Reader in, Writer out, String urlPrefix, String morePrefix, String path, Definitions tags, boolean limit, boolean isDefSearch, List<Hit> hits, Scopes scopes)267     public boolean getContext(Reader in, Writer out, String urlPrefix,
268             String morePrefix, String path, Definitions tags,
269             boolean limit, boolean isDefSearch, List<Hit> hits, Scopes scopes) {
270         if (m == null) {
271             IOUtils.close(in);
272             return false;
273         }
274         boolean anything = false;
275         TreeMap<Integer, String[]> matchingTags = null;
276         String urlPrefixE = (urlPrefix == null) ? "" : Util.uriEncodePath(urlPrefix);
277         String pathE = Util.uriEncodePath(path);
278         if (tags != null) {
279             matchingTags = new TreeMap<>();
280             try {
281                 for (Definitions.Tag tag : tags.getTags()) {
282                     for (LineMatcher lineMatcher : m) {
283                         if (lineMatcher.match(tag.symbol) == LineMatcher.MATCHED) {
284                             String scope = null;
285                             String scopeUrl = null;
286                             if (scopes != null) {
287                                 Scope scp = scopes.getScope(tag.line);
288                                 scope = scp.getName() + "()";
289                                 scopeUrl = "<a href=\"" + urlPrefixE + pathE + "#" +
290                                         scp.getLineFrom() + "\">" + scope + "</a>";
291                             }
292 
293                             /* desc[0] is matched symbol
294                              * desc[1] is line number
295                              * desc[2] is type
296                              * desc[3] is matching line;
297                              * desc[4] is scope
298                              */
299                             String[] desc = {
300                                     tag.symbol,
301                                     Integer.toString(tag.line),
302                                     tag.type,
303                                     tag.text,
304                                     scope,
305                             };
306                             if (in == null) {
307                                 if (out == null) {
308                                     Hit hit = new Hit(path,
309                                             Util.htmlize(desc[3]).replace(
310                                                     desc[0], "<b>" + desc[0] + "</b>"),
311                                             desc[1], false, alt);
312                                     hits.add(hit);
313                                 } else {
314                                     out.write("<a class=\"s\" href=\"");
315                                     out.write(urlPrefixE);
316                                     out.write(pathE);
317                                     out.write("#");
318                                     out.write(desc[1]);
319                                     out.write("\"><span class=\"l\">");
320                                     out.write(desc[1]);
321                                     out.write("</span> ");
322                                     out.write(Util.htmlize(desc[3]).replace(
323                                             desc[0], "<b>" + desc[0] + "</b>"));
324                                     out.write("</a> ");
325 
326                                     if (desc[4] != null) {
327                                         out.write("<span class=\"scope\"><a href\"");
328                                         out.write(scopeUrl);
329                                         out.write("\">in ");
330                                         out.write(desc[4]);
331                                         out.write("</a></span> ");
332                                     }
333                                     out.write("<i>");
334                                     out.write(desc[2]);
335                                     out.write("</i><br/>");
336                                 }
337                                 anything = true;
338                             } else {
339                                 matchingTags.put(tag.line, desc);
340                             }
341                             break;
342                         }
343                     }
344                 }
345             } catch (Exception e) {
346                 if (hits != null) {
347                     // @todo verify why we ignore all exceptions?
348                     LOGGER.log(Level.WARNING, "Could not get context for " + path, e);
349                 }
350             }
351         }
352 
353         // Just to get the matching tag send a null in
354         if (in == null) {
355             return anything;
356         }
357 
358         PlainLineTokenizer tokens = new PlainLineTokenizer(null);
359         boolean truncated = false;
360         boolean lim = limit;
361         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
362         if (!env.isQuickContextScan()) {
363             lim = false;
364         }
365 
366         if (lim) {
367             char[] buffer = new char[MAXFILEREAD];
368             int charsRead;
369             try {
370                 charsRead = in.read(buffer);
371                 if (charsRead == MAXFILEREAD) {
372                     // we probably only read parts of the file, so set the
373                     // truncated flag to enable the [all...] link that
374                     // requests all matches
375                     truncated = true;
376                     // truncate to last line read (don't look more than 100
377                     // characters back)
378                     for (int i = charsRead - 1; i > charsRead - 100; i--) {
379                         if (buffer[i] == '\n') {
380                             charsRead = i;
381                             break;
382                         }
383                     }
384                 }
385             } catch (IOException e) {
386                 LOGGER.log(Level.WARNING, "An error occurred while reading data", e);
387                 return anything;
388             }
389             if (charsRead == 0) {
390                 return anything;
391             }
392 
393             tokens.reInit(buffer, charsRead, out, urlPrefixE + pathE + "#", matchingTags, scopes);
394         } else {
395             tokens.reInit(in, out, urlPrefixE + pathE + "#", matchingTags, scopes);
396         }
397 
398         if (hits != null) {
399             tokens.setAlt(alt);
400             tokens.setHitList(hits);
401             tokens.setFilename(path);
402         }
403 
404         int limit_max_lines = env.getContextLimit();
405         try {
406             String token;
407             int matchState;
408             int matchedLines = 0;
409             while ((token = tokens.yylex()) != null && (!lim ||
410                     matchedLines < limit_max_lines)) {
411                 for (LineMatcher lineMatcher : m) {
412                     matchState = lineMatcher.match(token);
413                     if (matchState == LineMatcher.MATCHED) {
414                         if (!isDefSearch) {
415                             tokens.printContext();
416                         } else if (tokens.tags.containsKey(tokens.markedLine)) {
417                             tokens.printContext();
418                         }
419                         matchedLines++;
420                         break;
421                     } else if (matchState == LineMatcher.WAIT) {
422                         tokens.holdOn();
423                     } else {
424                         tokens.neverMind();
425                     }
426                 }
427             }
428             anything = matchedLines > 0;
429             tokens.dumpRest();
430             if (lim && (truncated || matchedLines == limit_max_lines) && out != null) {
431                 out.write("<a href=\"" + Util.uriEncodePath(morePrefix) + pathE + "?" + queryAsURI + "\">[all...]</a>");
432             }
433         } catch (IOException e) {
434             LOGGER.log(Level.WARNING, "Could not get context for " + path, e);
435         } finally {
436             IOUtils.close(in);
437 
438             if (out != null) {
439                 try {
440                     out.flush();
441                 } catch (IOException e) {
442                     LOGGER.log(Level.WARNING, "Failed to flush stream: ", e);
443                 }
444             }
445         }
446         return anything;
447     }
448 }
449