1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2018, 2020, Chris Fraire <cfraire@me.com>. 22 */ 23 package org.opengrok.indexer.search.context; 24 25 import java.util.SortedMap; 26 import java.util.TreeMap; 27 import java.util.regex.Matcher; 28 import org.apache.lucene.search.uhighlight.Passage; 29 import org.opengrok.indexer.util.SourceSplitter; 30 import org.opengrok.indexer.util.StringUtils; 31 32 /** 33 * Represents an object that can translate {@link Passage} instances into 34 * indexed {@link LineHighlight} instances, taking into account a configurable 35 * number of leading and trailing lines of context for each match. 36 */ 37 public class PassageConverter { 38 39 private final ContextArgs args; 40 41 /** 42 * Initializes a converter for the specified arguments. 43 * @param args required instance 44 */ PassageConverter(ContextArgs args)45 public PassageConverter(ContextArgs args) { 46 if (args == null) { 47 throw new IllegalArgumentException("args is null"); 48 } 49 this.args = args; 50 } 51 52 /** 53 * @return the initialized value 54 */ getArgs()55 public ContextArgs getArgs() { 56 return args; 57 } 58 59 /** 60 * Converts the specified passages into a sorted map of 61 * {@link LineHighlight} instances keyed by line offsets. 62 * @param passages a defined instance 63 * @param splitter a defined instance 64 * @return a defined instance 65 */ convert(Passage[] passages, SourceSplitter splitter)66 public SortedMap<Integer, LineHighlight> convert(Passage[] passages, 67 SourceSplitter splitter) { 68 69 SortedMap<Integer, LineHighlight> res = new TreeMap<>(); 70 for (Passage passage : passages) { 71 int start = passage.getStartOffset(); 72 int end = passage.getEndOffset(); 73 if (start >= end) { 74 continue; 75 } 76 77 int m = splitter.findLineIndex(start); 78 if (m < 0) { 79 continue; 80 } 81 int n = splitter.findLineIndex(end - 1); 82 if (n < 0) { 83 continue; 84 } 85 86 m = Math.max(0, m - args.getContextSurround()); 87 n = Math.min(splitter.count() - 1, n + args.getContextSurround()); 88 89 // Ensure an entry in `res' for every passage line. 90 for (int i = m; i <= n; ++i) { 91 if (!res.containsKey(i)) { 92 res.put(i, new LineHighlight(i)); 93 } 94 } 95 96 // Create LineHighlight entries for passage matches. 97 for (int i = 0; i < passage.getNumMatches(); ++i) { 98 int mstart = passage.getMatchStarts()[i]; 99 int mm = splitter.findLineIndex(mstart); 100 int mend = passage.getMatchEnds()[i]; 101 int nn = splitter.findLineIndex(mend - 1); 102 if (mstart < mend && mm >= m && mm <= n && nn >= m && nn <= n) { 103 if (mm == nn) { 104 int lbeg = splitter.getOffset(mm); 105 int lstart = mstart - lbeg; 106 int lend = mend - lbeg; 107 LineHighlight lhigh = res.get(mm); 108 lhigh.addMarkup(PhraseHighlight.create(lstart, lend)); 109 } else { 110 int lbeg = splitter.getOffset(mm); 111 int loff = mstart - lbeg; 112 LineHighlight lhigh = res.get(mm); 113 lhigh.addMarkup(PhraseHighlight.createStarter(loff)); 114 115 lbeg = splitter.getOffset(nn); 116 loff = mend - lbeg; 117 lhigh = res.get(nn); 118 lhigh.addMarkup(PhraseHighlight.createEnder(loff)); 119 120 /* 121 * Designate any intermediate lines as 122 * wholly-highlighted 123 */ 124 for (int j = mm + 1; j <= nn - 1; ++j) { 125 lhigh = res.get(j); 126 lhigh.addMarkup(PhraseHighlight.createEntire()); 127 } 128 } 129 } 130 } 131 } 132 133 /* 134 * Condense PhraseHighlight instances within lines, and elide as 135 * necessary to the reportable length. 136 */ 137 for (LineHighlight lhi : res.values()) { 138 lhi.condenseMarkups(); 139 String line = splitter.getLine(lhi.getLineno()); 140 Matcher eolMatcher = StringUtils.STANDARD_EOL.matcher(line); 141 if (eolMatcher.find()) { 142 line = line.substring(0, eolMatcher.start()); 143 } 144 elideLine(lhi, line); 145 } 146 147 return res; 148 } 149 elideLine(LineHighlight lhi, String line)150 private void elideLine(LineHighlight lhi, String line) { 151 int excess = line.length() - args.getContextWidth(); 152 if (excess <= 0) { 153 return; 154 } 155 156 /* 157 * The search/ view does not show leading whitespace anyway, so elide it 158 * straight away. 159 */ 160 int nwhsp0 = countStartingWhitespace(line); 161 if (nwhsp0 > 0) { 162 // Account for an ellipsis. 163 ++excess; 164 int leftAdj = Math.min(nwhsp0, excess); 165 lhi.setLelide(leftAdj); 166 excess -= leftAdj; 167 if (excess <= 0) { 168 return; 169 } 170 } 171 172 int nwhspz = countEndingWhitespace(line); 173 /* 174 * If the end of the line has enough whitespace to be elided (pre- 175 * accounting for another ellipsis), just truncate it. 176 */ 177 if (lhi.countMarkups() < 1 || 178 lhi.getMarkup(lhi.countMarkups() - 1).getLineEnd() < 179 args.getContextWidth() || 180 nwhspz >= excess + 1) { 181 // Account for an ellipsis. 182 ++excess; 183 lhi.setRelide(line.length() - excess); 184 return; 185 } 186 187 /* 188 * Find the width of bounds of markups. 189 */ 190 int lbound = -1, rbound = -1; 191 if (lhi.countMarkups() > 0) { 192 PhraseHighlight phi = lhi.getMarkup(0); 193 lbound = phi.getLineStart(); 194 if (lbound >= line.length()) { 195 lbound = line.length() - 1; 196 } 197 198 phi = lhi.getMarkup(lhi.countMarkups() - 1); 199 rbound = phi.getLineEnd(); 200 if (rbound > line.length()) { 201 rbound = line.length(); 202 } 203 } 204 205 /* 206 * If the markup bounds are separated from the left margin, calculate 207 * elision bounds that contain as much of the highlighted area as 208 * possible, favoring the leftward highlights if the highlighted area 209 * exceeds the context-width. 210 */ 211 if (lbound > 0 && rbound >= lbound) { 212 /* 213 * First use a rough estimate of three-quarters of a context-width 214 * before the midpoint of lbound and rbound. 215 */ 216 int calcLeft = Math.max(0, (lbound + rbound) / 2 - 217 args.getContextWidth() * 3 / 4 - 1); 218 // If past the lbound, then snap it left. 219 if (calcLeft > lbound) { 220 calcLeft = lbound; 221 } 222 if (calcLeft > lhi.getLelide()) { 223 // Possibly account for an ellipsis. 224 if (lhi.getLelide() < 1) { 225 ++excess; 226 } 227 int leftAdj = Math.min(calcLeft - lhi.getLelide(), excess); 228 excess -= leftAdj; 229 lhi.setLelide(lhi.getLelide() + leftAdj); 230 } 231 } 232 233 // Possibly truncate the line finally. 234 if (excess > 0) { 235 // Account for another ellipsis. 236 ++excess; 237 lhi.setRelide(line.length() - excess); 238 /* 239 * Possibly shift the left elision leftward in case the rough 240 * estimate above was too far rightward. 241 */ 242 if (lhi.getLelide() > 0) { 243 lhi.setLelide(lhi.getRelide() - args.getContextWidth() + 244 2 /* two ellipses */); 245 } 246 } 247 } 248 countStartingWhitespace(String line)249 private int countStartingWhitespace(String line) { 250 int n = 0; 251 for (int i = 0; i < line.length(); ++i) { 252 char c = line.charAt(i); 253 if (!Character.isWhitespace(c)) { 254 break; 255 } 256 ++n; 257 } 258 return n; 259 } 260 countEndingWhitespace(String line)261 private int countEndingWhitespace(String line) { 262 int n = 0; 263 for (int i = line.length() - 1; i >= 0; --i) { 264 char c = line.charAt(i); 265 if (!Character.isWhitespace(c)) { 266 break; 267 } 268 ++n; 269 } 270 return n; 271 } 272 } 273