xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/PassageConverter.java (revision 5d9f3aa0ca3da3a714233f987fa732f62c0965f6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2018, 2020, Chris Fraire <cfraire@me.com>.
22  */
23 package org.opengrok.indexer.search.context;
24 
25 import java.util.SortedMap;
26 import java.util.TreeMap;
27 import java.util.regex.Matcher;
28 import org.apache.lucene.search.uhighlight.Passage;
29 import org.opengrok.indexer.util.SourceSplitter;
30 import org.opengrok.indexer.util.StringUtils;
31 
32 /**
33  * Represents an object that can translate {@link Passage} instances into
34  * indexed {@link LineHighlight} instances, taking into account a configurable
35  * number of leading and trailing lines of context for each match.
36  */
37 public class PassageConverter {
38 
39     private final ContextArgs args;
40 
41     /**
42      * Initializes a converter for the specified arguments.
43      * @param args required instance
44      */
PassageConverter(ContextArgs args)45     public PassageConverter(ContextArgs args) {
46         if (args == null) {
47             throw new IllegalArgumentException("args is null");
48         }
49         this.args = args;
50     }
51 
52     /**
53      * @return the initialized value
54      */
getArgs()55     public ContextArgs getArgs() {
56         return args;
57     }
58 
59     /**
60      * Converts the specified passages into a sorted map of
61      * {@link LineHighlight} instances keyed by line offsets.
62      * @param passages a defined instance
63      * @param splitter a defined instance
64      * @return a defined instance
65      */
convert(Passage[] passages, SourceSplitter splitter)66     public SortedMap<Integer, LineHighlight> convert(Passage[] passages,
67         SourceSplitter splitter) {
68 
69         SortedMap<Integer, LineHighlight> res = new TreeMap<>();
70         for (Passage passage : passages) {
71             int start = passage.getStartOffset();
72             int end = passage.getEndOffset();
73             if (start >= end) {
74                 continue;
75             }
76 
77             int m = splitter.findLineIndex(start);
78             if (m < 0) {
79                 continue;
80             }
81             int n = splitter.findLineIndex(end - 1);
82             if (n < 0) {
83                 continue;
84             }
85 
86             m = Math.max(0, m - args.getContextSurround());
87             n = Math.min(splitter.count() - 1, n + args.getContextSurround());
88 
89             // Ensure an entry in `res' for every passage line.
90             for (int i = m; i <= n; ++i) {
91                 if (!res.containsKey(i)) {
92                     res.put(i, new LineHighlight(i));
93                 }
94             }
95 
96             // Create LineHighlight entries for passage matches.
97             for (int i = 0; i < passage.getNumMatches(); ++i) {
98                 int mstart = passage.getMatchStarts()[i];
99                 int mm = splitter.findLineIndex(mstart);
100                 int mend = passage.getMatchEnds()[i];
101                 int nn = splitter.findLineIndex(mend - 1);
102                 if (mstart < mend && mm >= m && mm <= n && nn >= m && nn <= n) {
103                     if (mm == nn) {
104                         int lbeg = splitter.getOffset(mm);
105                         int lstart = mstart - lbeg;
106                         int lend = mend - lbeg;
107                         LineHighlight lhigh = res.get(mm);
108                         lhigh.addMarkup(PhraseHighlight.create(lstart, lend));
109                     } else {
110                         int lbeg = splitter.getOffset(mm);
111                         int loff = mstart - lbeg;
112                         LineHighlight lhigh = res.get(mm);
113                         lhigh.addMarkup(PhraseHighlight.createStarter(loff));
114 
115                         lbeg = splitter.getOffset(nn);
116                         loff = mend - lbeg;
117                         lhigh = res.get(nn);
118                         lhigh.addMarkup(PhraseHighlight.createEnder(loff));
119 
120                         /*
121                          * Designate any intermediate lines as
122                          * wholly-highlighted
123                          */
124                         for (int j = mm + 1; j <= nn - 1; ++j) {
125                             lhigh = res.get(j);
126                             lhigh.addMarkup(PhraseHighlight.createEntire());
127                         }
128                     }
129                 }
130             }
131         }
132 
133         /*
134          * Condense PhraseHighlight instances within lines, and elide as
135          * necessary to the reportable length.
136          */
137         for (LineHighlight lhi : res.values()) {
138             lhi.condenseMarkups();
139             String line = splitter.getLine(lhi.getLineno());
140             Matcher eolMatcher = StringUtils.STANDARD_EOL.matcher(line);
141             if (eolMatcher.find()) {
142                 line = line.substring(0, eolMatcher.start());
143             }
144             elideLine(lhi, line);
145         }
146 
147         return res;
148     }
149 
elideLine(LineHighlight lhi, String line)150     private void elideLine(LineHighlight lhi, String line) {
151         int excess = line.length() - args.getContextWidth();
152         if (excess <= 0) {
153             return;
154         }
155 
156         /*
157          * The search/ view does not show leading whitespace anyway, so elide it
158          * straight away.
159          */
160         int nwhsp0 = countStartingWhitespace(line);
161         if (nwhsp0 > 0) {
162             // Account for an ellipsis.
163             ++excess;
164             int leftAdj = Math.min(nwhsp0, excess);
165             lhi.setLelide(leftAdj);
166             excess -= leftAdj;
167             if (excess <= 0) {
168                 return;
169             }
170         }
171 
172         int nwhspz = countEndingWhitespace(line);
173         /*
174          * If the end of the line has enough whitespace to be elided (pre-
175          * accounting for another ellipsis), just truncate it.
176          */
177         if (lhi.countMarkups() < 1 ||
178                 lhi.getMarkup(lhi.countMarkups() - 1).getLineEnd() <
179                         args.getContextWidth() ||
180                 nwhspz >= excess + 1) {
181             // Account for an ellipsis.
182             ++excess;
183             lhi.setRelide(line.length() - excess);
184             return;
185         }
186 
187         /*
188          * Find the width of bounds of markups.
189          */
190         int lbound = -1, rbound = -1;
191         if (lhi.countMarkups() > 0) {
192             PhraseHighlight phi = lhi.getMarkup(0);
193             lbound = phi.getLineStart();
194             if (lbound >= line.length()) {
195                 lbound = line.length() - 1;
196             }
197 
198             phi = lhi.getMarkup(lhi.countMarkups() - 1);
199             rbound = phi.getLineEnd();
200             if (rbound > line.length()) {
201                 rbound = line.length();
202             }
203         }
204 
205         /*
206          * If the markup bounds are separated from the left margin, calculate
207          * elision bounds that contain as much of the highlighted area as
208          * possible, favoring the leftward highlights if the highlighted area
209          * exceeds the context-width.
210          */
211         if (lbound > 0 && rbound >= lbound) {
212             /*
213              * First use a rough estimate of three-quarters of a context-width
214              * before the midpoint of lbound and rbound.
215              */
216             int calcLeft = Math.max(0, (lbound + rbound) / 2 -
217                 args.getContextWidth() * 3 / 4 - 1);
218             // If past the lbound, then snap it left.
219             if (calcLeft > lbound) {
220                 calcLeft = lbound;
221             }
222             if (calcLeft > lhi.getLelide()) {
223                 // Possibly account for an ellipsis.
224                 if (lhi.getLelide() < 1) {
225                     ++excess;
226                 }
227                 int leftAdj = Math.min(calcLeft - lhi.getLelide(), excess);
228                 excess -= leftAdj;
229                 lhi.setLelide(lhi.getLelide() + leftAdj);
230             }
231         }
232 
233         // Possibly truncate the line finally.
234         if (excess > 0) {
235             // Account for another ellipsis.
236             ++excess;
237             lhi.setRelide(line.length() - excess);
238             /*
239              * Possibly shift the left elision leftward in case the rough
240              * estimate above was too far rightward.
241              */
242             if (lhi.getLelide() > 0) {
243                 lhi.setLelide(lhi.getRelide() - args.getContextWidth() +
244                         2 /* two ellipses */);
245             }
246         }
247     }
248 
countStartingWhitespace(String line)249     private int countStartingWhitespace(String line) {
250         int n = 0;
251         for (int i = 0; i < line.length(); ++i) {
252             char c = line.charAt(i);
253             if (!Character.isWhitespace(c)) {
254                 break;
255             }
256             ++n;
257         }
258         return n;
259     }
260 
countEndingWhitespace(String line)261     private int countEndingWhitespace(String line) {
262         int n = 0;
263         for (int i = line.length() - 1; i >= 0; --i) {
264             char c = line.charAt(i);
265             if (!Character.isWhitespace(c)) {
266                 break;
267             }
268             ++n;
269         }
270         return n;
271     }
272 }
273