1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. 22 * Portions Copyright (c) 2011, Jens Elkner. 23 * Portions Copyright (c) 2018, 2020, Chris Fraire <cfraire@me.com>. 24 */ 25 package org.opengrok.indexer.search.context; 26 27 import java.io.IOException; 28 import java.io.Reader; 29 import java.io.Writer; 30 import java.util.List; 31 import java.util.Map; 32 import java.util.TreeMap; 33 import java.util.logging.Level; 34 import java.util.logging.Logger; 35 36 import org.apache.lucene.document.Document; 37 import org.apache.lucene.index.IndexableField; 38 import org.apache.lucene.search.IndexSearcher; 39 import org.apache.lucene.search.Query; 40 import org.opengrok.indexer.analysis.AbstractAnalyzer; 41 import org.opengrok.indexer.analysis.Definitions; 42 import org.opengrok.indexer.analysis.Scopes; 43 import org.opengrok.indexer.analysis.Scopes.Scope; 44 import org.opengrok.indexer.analysis.plain.PlainAnalyzerFactory; 45 import org.opengrok.indexer.configuration.RuntimeEnvironment; 46 import org.opengrok.indexer.logger.LoggerFactory; 47 import org.opengrok.indexer.search.Hit; 48 import org.opengrok.indexer.search.QueryBuilder; 49 import org.opengrok.indexer.util.IOUtils; 50 import org.opengrok.indexer.web.Util; 51 52 /** 53 * This is supposed to get the matching lines from sourcefile. 54 * since Lucene does not easily give the match context. 55 */ 56 public class Context { 57 58 static final int MAXFILEREAD = 1024 * 1024; 59 60 private static final Logger LOGGER = LoggerFactory.getLogger(Context.class); 61 62 private final Query query; 63 private final QueryBuilder qbuilder; 64 private final LineMatcher[] m; 65 private final String queryAsURI; 66 67 /** 68 * Map whose keys tell which fields to look for in the source file, and 69 * whose values tell if the field is case insensitive (true for 70 * insensitivity, false for sensitivity). 71 */ 72 private static final Map<String, Boolean> TOKEN_FIELDS = Map.of( 73 QueryBuilder.FULL, Boolean.TRUE, 74 QueryBuilder.REFS, Boolean.FALSE, 75 QueryBuilder.DEFS, Boolean.FALSE 76 ); 77 78 /** 79 * Initializes a context generator for matchers derived from the specified 80 * {@code query} -- which might be {@code null} and result in 81 * {@link #isEmpty()} returning {@code true}. 82 * @param query the query to generate the result for 83 * @param qbuilder required builder used to create {@code query} 84 */ Context(Query query, QueryBuilder qbuilder)85 public Context(Query query, QueryBuilder qbuilder) { 86 if (qbuilder == null) { 87 throw new IllegalArgumentException("qbuilder is null"); 88 } 89 90 this.query = query; 91 this.qbuilder = qbuilder; 92 QueryMatchers qm = new QueryMatchers(); 93 m = qm.getMatchers(query, TOKEN_FIELDS); 94 if (m != null) { 95 queryAsURI = buildQueryAsURI(qbuilder.getQueries()); 96 } else { 97 queryAsURI = ""; 98 } 99 } 100 101 /** 102 * Toggles the alternating value (initially {@code true}). 103 */ toggleAlt()104 public void toggleAlt() { 105 alt = !alt; 106 } 107 isEmpty()108 public boolean isEmpty() { 109 return m == null; 110 } 111 112 /** 113 * Look for context for this instance's initialized query in a search result 114 * {@link Document}, and output according to the parameters. 115 * @param env required environment 116 * @param searcher required search that produced the document 117 * @param docId document ID for producing context 118 * @param dest required target to write 119 * @param urlPrefix prefix for links 120 * @param morePrefix optional link to more... page 121 * @param limit a value indicating if the number of matching lines should be 122 * limited. N.b. unlike 123 * {@link #getContext(java.io.Reader, java.io.Writer, java.lang.String, java.lang.String, java.lang.String, 124 * org.opengrok.indexer.analysis.Definitions, boolean, boolean, java.util.List, org.opengrok.indexer.analysis.Scopes)}, 125 * the {@code limit} argument will not be interpreted w.r.t. 126 * {@link RuntimeEnvironment#isQuickContextScan()}. 127 * @param tabSize optional positive tab size that must accord with the value 128 * used when indexing or else postings may be wrongly shifted until 129 * re-indexing 130 * @return Did it get any matching context? 131 */ getContext2(RuntimeEnvironment env, IndexSearcher searcher, int docId, Appendable dest, String urlPrefix, String morePrefix, boolean limit, int tabSize)132 public boolean getContext2(RuntimeEnvironment env, IndexSearcher searcher, 133 int docId, Appendable dest, String urlPrefix, String morePrefix, 134 boolean limit, int tabSize) { 135 136 if (isEmpty()) { 137 return false; 138 } 139 140 Document doc; 141 try { 142 doc = searcher.doc(docId); 143 } catch (IOException e) { 144 LOGGER.log(Level.WARNING, "ERROR getting searcher doc(int)", e); 145 return false; 146 } 147 148 Definitions tags = null; 149 try { 150 IndexableField tagsField = doc.getField(QueryBuilder.TAGS); 151 if (tagsField != null) { 152 tags = Definitions.deserialize(tagsField.binaryValue().bytes); 153 } 154 } catch (ClassNotFoundException | IOException e) { 155 LOGGER.log(Level.WARNING, "ERROR Definitions.deserialize(...)", e); 156 return false; 157 } 158 159 Scopes scopes; 160 try { 161 IndexableField scopesField = doc.getField(QueryBuilder.SCOPES); 162 if (scopesField != null) { 163 scopes = Scopes.deserialize(scopesField.binaryValue().bytes); 164 } else { 165 scopes = new Scopes(); 166 } 167 } catch (ClassNotFoundException | IOException e) { 168 LOGGER.log(Level.WARNING, "ERROR Scopes.deserialize(...)", e); 169 return false; 170 } 171 172 /* 173 * UnifiedHighlighter demands an analyzer "even if in some 174 * circumstances it isn't used"; here it is not meant to be used. 175 */ 176 PlainAnalyzerFactory fac = PlainAnalyzerFactory.DEFAULT_INSTANCE; 177 AbstractAnalyzer anz = fac.getAnalyzer(); 178 179 String path = doc.get(QueryBuilder.PATH); 180 String pathE = Util.uriEncodePath(path); 181 String urlPrefixE = urlPrefix == null ? "" : Util.uriEncodePath(urlPrefix); 182 String moreURL = morePrefix == null ? null : Util.uriEncodePath(morePrefix) + pathE + "?" + queryAsURI; 183 184 ContextArgs args = new ContextArgs(env.getContextSurround(), env.getContextLimit()); 185 /* 186 * Lucene adds to the following value in FieldHighlighter, so avoid 187 * integer overflow by not using Integer.MAX_VALUE -- Short is good 188 * enough. 189 */ 190 int linelimit = limit ? args.getContextLimit() : Short.MAX_VALUE; 191 192 ContextFormatter formatter = new ContextFormatter(args); 193 formatter.setUrl(urlPrefixE + pathE); 194 formatter.setDefs(tags); 195 formatter.setScopes(scopes); 196 formatter.setMoreUrl(moreURL); 197 formatter.setMoreLimit(linelimit); 198 199 OGKUnifiedHighlighter uhi = new OGKUnifiedHighlighter(env, searcher, anz); 200 uhi.setBreakIterator(StrictLineBreakIterator::new); 201 uhi.setFormatter(formatter); 202 uhi.setTabSize(tabSize); 203 204 try { 205 List<String> fieldList = qbuilder.getContextFields(); 206 String[] fields = fieldList.toArray(new String[0]); 207 208 String res = uhi.highlightFieldsUnion(fields, query, docId, 209 linelimit); 210 if (res != null) { 211 dest.append(res); 212 return true; 213 } 214 } catch (IOException e) { 215 LOGGER.log(Level.WARNING, "ERROR highlightFieldsUnion(...)", e); 216 // Continue below. 217 } catch (Throwable e) { 218 LOGGER.log(Level.SEVERE, "ERROR highlightFieldsUnion(...)", e); 219 throw e; 220 } 221 return false; 222 } 223 224 /** 225 * Build the {@code queryAsURI} string that holds the query in a form 226 * that's suitable for sending it as part of a URI. 227 * 228 * @param subqueries a map containing the query text for each field 229 */ buildQueryAsURI(Map<String, String> subqueries)230 private String buildQueryAsURI(Map<String, String> subqueries) { 231 if (subqueries.isEmpty()) { 232 return ""; 233 } 234 StringBuilder sb = new StringBuilder(); 235 for (Map.Entry<String, String> entry : subqueries.entrySet()) { 236 String field = entry.getKey(); 237 String queryText = entry.getValue(); 238 sb.append(field).append("=").append(Util.uriEncode(queryText)).append('&'); 239 } 240 sb.setLength(sb.length() - 1); 241 return sb.toString(); 242 } 243 244 private boolean alt = true; 245 getContext(Reader in, Writer out, String urlPrefix, String morePrefix, String path, Definitions tags, boolean limit, boolean isDefSearch, List<Hit> hits)246 public boolean getContext(Reader in, Writer out, String urlPrefix, 247 String morePrefix, String path, Definitions tags, 248 boolean limit, boolean isDefSearch, List<Hit> hits) { 249 return getContext(in, out, urlPrefix, morePrefix, path, tags, limit, isDefSearch, hits, null); 250 } 251 /** 252 * ???. 253 * Closes the given <var>in</var> reader on return. 254 * 255 * @param in File to be matched 256 * @param out to write the context 257 * @param urlPrefix URL prefix 258 * @param morePrefix to link to more... page 259 * @param path path of the file 260 * @param tags format to highlight defs. 261 * @param limit should the number of matching lines be limited? 262 * @param isDefSearch is definition search 263 * @param hits list of hits 264 * @param scopes scopes object 265 * @return Did it get any matching context? 266 */ getContext(Reader in, Writer out, String urlPrefix, String morePrefix, String path, Definitions tags, boolean limit, boolean isDefSearch, List<Hit> hits, Scopes scopes)267 public boolean getContext(Reader in, Writer out, String urlPrefix, 268 String morePrefix, String path, Definitions tags, 269 boolean limit, boolean isDefSearch, List<Hit> hits, Scopes scopes) { 270 if (m == null) { 271 IOUtils.close(in); 272 return false; 273 } 274 boolean anything = false; 275 TreeMap<Integer, String[]> matchingTags = null; 276 String urlPrefixE = (urlPrefix == null) ? "" : Util.uriEncodePath(urlPrefix); 277 String pathE = Util.uriEncodePath(path); 278 if (tags != null) { 279 matchingTags = new TreeMap<>(); 280 try { 281 for (Definitions.Tag tag : tags.getTags()) { 282 for (LineMatcher lineMatcher : m) { 283 if (lineMatcher.match(tag.symbol) == LineMatcher.MATCHED) { 284 String scope = null; 285 String scopeUrl = null; 286 if (scopes != null) { 287 Scope scp = scopes.getScope(tag.line); 288 scope = scp.getName() + "()"; 289 scopeUrl = "<a href=\"" + urlPrefixE + pathE + "#" + 290 scp.getLineFrom() + "\">" + scope + "</a>"; 291 } 292 293 /* desc[0] is matched symbol 294 * desc[1] is line number 295 * desc[2] is type 296 * desc[3] is matching line; 297 * desc[4] is scope 298 */ 299 String[] desc = { 300 tag.symbol, 301 Integer.toString(tag.line), 302 tag.type, 303 tag.text, 304 scope, 305 }; 306 if (in == null) { 307 if (out == null) { 308 Hit hit = new Hit(path, 309 Util.htmlize(desc[3]).replace( 310 desc[0], "<b>" + desc[0] + "</b>"), 311 desc[1], false, alt); 312 hits.add(hit); 313 } else { 314 out.write("<a class=\"s\" href=\""); 315 out.write(urlPrefixE); 316 out.write(pathE); 317 out.write("#"); 318 out.write(desc[1]); 319 out.write("\"><span class=\"l\">"); 320 out.write(desc[1]); 321 out.write("</span> "); 322 out.write(Util.htmlize(desc[3]).replace( 323 desc[0], "<b>" + desc[0] + "</b>")); 324 out.write("</a> "); 325 326 if (desc[4] != null) { 327 out.write("<span class=\"scope\"><a href\""); 328 out.write(scopeUrl); 329 out.write("\">in "); 330 out.write(desc[4]); 331 out.write("</a></span> "); 332 } 333 out.write("<i>"); 334 out.write(desc[2]); 335 out.write("</i><br/>"); 336 } 337 anything = true; 338 } else { 339 matchingTags.put(tag.line, desc); 340 } 341 break; 342 } 343 } 344 } 345 } catch (Exception e) { 346 if (hits != null) { 347 // @todo verify why we ignore all exceptions? 348 LOGGER.log(Level.WARNING, "Could not get context for " + path, e); 349 } 350 } 351 } 352 353 // Just to get the matching tag send a null in 354 if (in == null) { 355 return anything; 356 } 357 358 PlainLineTokenizer tokens = new PlainLineTokenizer(null); 359 boolean truncated = false; 360 boolean lim = limit; 361 RuntimeEnvironment env = RuntimeEnvironment.getInstance(); 362 if (!env.isQuickContextScan()) { 363 lim = false; 364 } 365 366 if (lim) { 367 char[] buffer = new char[MAXFILEREAD]; 368 int charsRead; 369 try { 370 charsRead = in.read(buffer); 371 if (charsRead == MAXFILEREAD) { 372 // we probably only read parts of the file, so set the 373 // truncated flag to enable the [all...] link that 374 // requests all matches 375 truncated = true; 376 // truncate to last line read (don't look more than 100 377 // characters back) 378 for (int i = charsRead - 1; i > charsRead - 100; i--) { 379 if (buffer[i] == '\n') { 380 charsRead = i; 381 break; 382 } 383 } 384 } 385 } catch (IOException e) { 386 LOGGER.log(Level.WARNING, "An error occurred while reading data", e); 387 return anything; 388 } 389 if (charsRead == 0) { 390 return anything; 391 } 392 393 tokens.reInit(buffer, charsRead, out, urlPrefixE + pathE + "#", matchingTags, scopes); 394 } else { 395 tokens.reInit(in, out, urlPrefixE + pathE + "#", matchingTags, scopes); 396 } 397 398 if (hits != null) { 399 tokens.setAlt(alt); 400 tokens.setHitList(hits); 401 tokens.setFilename(path); 402 } 403 404 int limit_max_lines = env.getContextLimit(); 405 try { 406 String token; 407 int matchState; 408 int matchedLines = 0; 409 while ((token = tokens.yylex()) != null && (!lim || 410 matchedLines < limit_max_lines)) { 411 for (LineMatcher lineMatcher : m) { 412 matchState = lineMatcher.match(token); 413 if (matchState == LineMatcher.MATCHED) { 414 if (!isDefSearch) { 415 tokens.printContext(); 416 } else if (tokens.tags.containsKey(tokens.markedLine)) { 417 tokens.printContext(); 418 } 419 matchedLines++; 420 break; 421 } else if (matchState == LineMatcher.WAIT) { 422 tokens.holdOn(); 423 } else { 424 tokens.neverMind(); 425 } 426 } 427 } 428 anything = matchedLines > 0; 429 tokens.dumpRest(); 430 if (lim && (truncated || matchedLines == limit_max_lines) && out != null) { 431 out.write("<a href=\"" + Util.uriEncodePath(morePrefix) + pathE + "?" + queryAsURI + "\">[all...]</a>"); 432 } 433 } catch (IOException e) { 434 LOGGER.log(Level.WARNING, "Could not get context for " + path, e); 435 } finally { 436 IOUtils.close(in); 437 438 if (out != null) { 439 try { 440 out.flush(); 441 } catch (IOException e) { 442 LOGGER.log(Level.WARNING, "Failed to flush stream: ", e); 443 } 444 } 445 } 446 return anything; 447 } 448 } 449