xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/ContextFormatter.java (revision 5d9f3aa0ca3da3a714233f987fa732f62c0965f6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2018, 2020, Chris Fraire <cfraire@me.com>.
22  */
23 package org.opengrok.indexer.search.context;
24 
25 import java.io.IOException;
26 import java.util.ArrayList;
27 import java.util.List;
28 import java.util.SortedMap;
29 import java.util.logging.Level;
30 import java.util.logging.Logger;
31 import java.util.regex.Matcher;
32 import java.util.regex.Pattern;
33 import org.apache.lucene.search.uhighlight.Passage;
34 import org.apache.lucene.search.uhighlight.PassageFormatter;
35 import org.opengrok.indexer.analysis.Definitions;
36 import org.opengrok.indexer.analysis.Definitions.Tag;
37 import org.opengrok.indexer.analysis.Scopes;
38 import org.opengrok.indexer.logger.LoggerFactory;
39 import org.opengrok.indexer.util.SourceSplitter;
40 import org.opengrok.indexer.util.StringUtils;
41 import org.opengrok.indexer.web.HtmlConsts;
42 import org.opengrok.indexer.web.Util;
43 
44 /**
45  * Represents a subclass of {@link PassageFormatter} that uses
46  * {@link PassageConverter}.
47  */
48 public class ContextFormatter extends PassageFormatter {
49 
50     private static final String MORE_LABEL = "[all " + HtmlConsts.HELLIP + "]";
51 
52     private static final Logger LOGGER = LoggerFactory.getLogger(
53         ContextFormatter.class);
54 
55     /**
56      * Matches a non-word character.
57      */
58     private static final Pattern NONWORD_CHAR = Pattern.compile("(?U)\\W");
59 
60     private final PassageConverter cvt;
61     private final List<String> marks = new ArrayList<>();
62     private String url;
63     private Definitions defs;
64     private Scopes scopes;
65 
66     /**
67      * An optional URL for linking when the {@link #moreLimit} (if positive) is
68      * reached.
69      */
70     private String moreUrl;
71     private int moreLimit;
72 
73     /**
74      * Cached splitter, keyed by {@link #originalText}.
75      */
76     private SourceSplitter splitter;
77     private String originalText;
78 
79     /**
80      * Initializes a formatter for the specified arguments.
81      * @param args required instance
82      */
ContextFormatter(ContextArgs args)83     public ContextFormatter(ContextArgs args) {
84         this.cvt = new PassageConverter(args);
85     }
86 
87     /**
88      * Gets the initialized value.
89      * @return a defined instance
90      */
getArgs()91     public ContextArgs getArgs() {
92         return cvt.getArgs();
93     }
94 
95     /**
96      * Gets the required URL to use for linking lines.
97      * @return the URL or {@code null}
98      */
getUrl()99     public String getUrl() {
100         return url;
101     }
102 
103     /**
104      * Sets the required URL to use for linking lines.
105      * @param value the URL to use
106      */
setUrl(String value)107     public void setUrl(String value) {
108         this.url = value;
109     }
110 
111     /**
112      * Gets the optional URL to use if {@link #getMoreLimit()} is reached.
113      * @return the URL or {@code null}
114      */
getMoreUrl()115     public String getMoreUrl() {
116         return moreUrl;
117     }
118 
119     /**
120      * Sets the optional URL to use if {@link #getMoreLimit()} is reached.
121      * @param value the URL to use
122      */
setMoreUrl(String value)123     public void setMoreUrl(String value) {
124         this.moreUrl = value;
125     }
126 
127     /**
128      * Gets the optional line limit to specify (if positive) a maximum number
129      * of lines to format and -- if {@link #getMoreUrl()} is defined -- a "more"
130      * link to display. Default is zero (i.e. inactive).
131      * @return the line limit value
132      */
getMoreLimit()133     public int getMoreLimit() {
134         return moreLimit;
135     }
136 
137     /**
138      * Sets the optional line limit to specify (if positive) a maximum number
139      * of lines to format and -- if {@link #getMoreUrl()} is defined -- a "more"
140      * link to display.
141      * @param value the line limit
142      */
setMoreLimit(int value)143     public void setMoreLimit(int value) {
144         if (value < 0) {
145             throw new IllegalArgumentException("value is negative");
146         }
147         this.moreLimit = value;
148     }
149 
150     /**
151      * Gets the optional definitions.
152      * @return the defs
153      */
getDefs()154     public Definitions getDefs() {
155         return defs;
156     }
157 
158     /**
159      * Sets the optional definitions.
160      * @param value definitions
161      */
setDefs(Definitions value)162     public void setDefs(Definitions value) {
163         this.defs = value;
164     }
165 
166     /**
167      * Gets the optional scopes to use.
168      * @return the scopes
169      */
getScopes()170     public Scopes getScopes() {
171         return scopes;
172     }
173 
174     /**
175      * Sets the optional scopes to use.
176      * @param value scopes
177      */
setScopes(Scopes value)178     public void setScopes(Scopes value) {
179         this.scopes = value;
180     }
181 
182     /**
183      * Splits {@code originalText} using {@link SourceSplitter}, converts
184      * passages using {@link PassageConverter}, and formats for presentation in
185      * OpenGrok UI using the instance's properties (e.g., {@link #getUrl()} and
186      * {@link #getDefs()}).
187      * @param passages a required instance
188      * @param originalText a required instance
189      * @return a defined {@link FormattedLines} instance, which might be empty
190      * @throws IllegalStateException if {@link #getUrl()} is null
191      */
192     @Override
format(Passage[] passages, String originalText)193     public Object format(Passage[] passages, String originalText) {
194         String lineUrl = url;
195         if (lineUrl == null) {
196             throw new IllegalStateException("Url property is null");
197         }
198 
199         if (this.originalText == null || !this.originalText.equals(
200                 originalText)) {
201             splitter = new SourceSplitter();
202             splitter.reset(originalText);
203             this.originalText = originalText;
204         }
205 
206         FormattedLines res = new FormattedLines();
207         StringBuilder bld = new StringBuilder();
208         SortedMap<Integer, LineHighlight> lines = cvt.convert(passages,
209             splitter);
210         int numl = 0;
211         boolean limited = false;
212         for (LineHighlight lhi : lines.values()) {
213             ++numl;
214             if (moreLimit > 0 && numl > moreLimit) {
215                 limited = true;
216                 break;
217             }
218 
219             String line = splitter.getLine(lhi.getLineno());
220             Matcher eolMatcher = StringUtils.STANDARD_EOL.matcher(line);
221             if (eolMatcher.find()) {
222                 line = line.substring(0, eolMatcher.start());
223             }
224 
225             try {
226                 marks.clear();
227                 startLine(bld, lineUrl, lhi.getLineno());
228                 int loff = 0;
229                 int hioff = 0;
230                 while (loff < line.length()) {
231                     // If there are no more markups, use all remaining text.
232                     if (hioff >= lhi.countMarkups() ||
233                             lhi.getMarkup(hioff).getLineStart() >=
234                             line.length()) {
235                         lhi.hsub(bld, line, loff);
236                         break;
237                     }
238 
239                     PhraseHighlight phi = lhi.getMarkup(hioff++);
240 
241                     /*
242                      * If the highlight is a sub-string wholly within the
243                      * line, add it to the `marks' list.
244                      */
245                     if (phi.getLineStart() >= 0 &&
246                             phi.getLineEnd() <= line.length()) {
247                         marks.add(line.substring(phi.getLineStart(),
248                                 phi.getLineEnd()));
249                     }
250 
251                     // Append any line text preceding the phrase highlight ...
252                     if (phi.getLineStart() >= 0) {
253                         lhi.hsub(bld, line, loff, phi.getLineStart());
254                         loff += phi.getLineStart() - loff;
255                     }
256                     // ... then start the BOLD.
257                     bld.append(HtmlConsts.B);
258 
259                     // Include the text of the highlight ...
260                     if (phi.getLineEnd() >= line.length()) {
261                         lhi.hsub(bld, line, loff);
262                         loff = line.length();
263                     } else {
264                         lhi.hsub(bld, line, loff, phi.getLineEnd());
265                         loff += phi.getLineEnd() - loff;
266                     }
267                     // ... then end the BOLD.
268                     bld.append(HtmlConsts.ZB);
269                 }
270 
271                 finishLine(bld, lhi.getLineno(), marks);
272                 // Regardless of true EOL, write a <br/>.
273                 bld.append(HtmlConsts.BR);
274                 /**
275                  * Appending a LF here would hurt the more.jsp view, while
276                  * search.jsp (where getContext() does it) is indifferent -- so
277                  * skip it.
278                  */
279                 res.put(lhi.getLineno(), bld.toString());
280                 bld.setLength(0);
281             } catch (IOException e) {
282                 LOGGER.log(Level.SEVERE, "Could not format()", e);
283                 return res;
284             }
285         }
286 
287         res.setLimited(limited);
288         if (moreUrl != null) {
289             bld.append("<a href=\"");
290             bld.append(moreUrl);
291             bld.append("\">");
292             bld.append(MORE_LABEL);
293             bld.append("</a>");
294             bld.append(HtmlConsts.BR);
295             bld.append("\n");
296             res.setFooter(bld.toString());
297             bld.setLength(0);
298         }
299         return res;
300     }
301 
startLine(Appendable dest, String lineUrl, int lineOffset)302     private void startLine(Appendable dest, String lineUrl, int lineOffset)
303             throws IOException {
304         dest.append("<a class=\"s\" href=\"");
305         dest.append(lineUrl);
306         String num = String.valueOf(lineOffset + 1);
307         dest.append("#");
308         dest.append(num);
309         dest.append("\"><span class=\"l\">");
310         dest.append(num);
311         dest.append("</span> ");
312     }
313 
finishLine(Appendable dest, int lineOffset, List<String> marks)314     private void finishLine(Appendable dest, int lineOffset, List<String> marks)
315             throws IOException {
316         dest.append("</a>");
317         writeScope(lineOffset, dest);
318         writeTag(lineOffset, dest, marks);
319     }
320 
writeScope(int lineOffset, Appendable dest)321     private void writeScope(int lineOffset, Appendable dest)
322             throws IOException {
323         Scopes.Scope scope = null;
324         if (scopes != null) {
325             // N.b. use ctags 1-based indexing vs 0-based.
326             scope = scopes.getScope(lineOffset + 1);
327         }
328         if (scope != null && scope != scopes.getScope(-1)) {
329             dest.append("  <a class=\"scope\" href=\"");
330             dest.append(url);
331             dest.append("#");
332             dest.append(String.valueOf(scope.getLineFrom()));
333             dest.append("\">in ");
334             Util.htmlize(scope.getName(), dest);
335             dest.append("()</a>");
336         }
337     }
338 
writeTag(int lineOffset, Appendable dest, List<String> marks)339     private void writeTag(int lineOffset, Appendable dest, List<String> marks)
340             throws IOException {
341         if (defs != null) {
342             // N.b. use ctags 1-based indexing vs 0-based.
343             List<Tag> linetags =  defs.getTags(lineOffset + 1);
344             if (linetags != null) {
345                 Tag pickedTag = findTagForMark(linetags, marks);
346                 if (pickedTag != null) {
347                     dest.append("  <i>");
348                     Util.htmlize(pickedTag.type, dest);
349                     dest.append("</i>");
350                 }
351             }
352         }
353     }
354 
355     /**
356      * Search the cross product of {@code linetags} and {@code marks} for any
357      * mark that starts with a {@link Tag#symbol} and where any subsequent
358      * character is a non-word ({@code (?U)\W}) character.
359      * @return a defined instance or {@code null}
360      */
findTagForMark(List<Tag> linetags, List<String> marks)361     private Tag findTagForMark(List<Tag> linetags, List<String> marks) {
362         for (Tag tag : linetags) {
363             if (tag.type != null) {
364                 for (String mark : marks) {
365                     if (mark.startsWith(tag.symbol) && (mark.length() ==
366                             tag.symbol.length() || isNonWord(
367                                 mark.charAt(tag.symbol.length())))) {
368                         return tag;
369                     }
370                 }
371             }
372         }
373         return null;
374     }
375 
isNonWord(char c)376     private static boolean isNonWord(char c) {
377         String cword = String.valueOf(c);
378         return NONWORD_CHAR.matcher(cword).matches();
379     }
380 }
381