xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/search/Results.java (revision d6df19e1b22784c78f567cf74c42f18e3901b900)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2011, Jens Elkner.
23  * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>.
24  */
25 package org.opengrok.indexer.search;
26 
27 import static org.opengrok.indexer.web.messages.MessagesContainer.MESSAGES_MAIN_PAGE_TAG;
28 
29 import java.io.BufferedReader;
30 import java.io.File;
31 import java.io.FileInputStream;
32 import java.io.IOException;
33 import java.io.Reader;
34 import java.io.Writer;
35 import java.nio.charset.StandardCharsets;
36 import java.text.DateFormat;
37 import java.text.ParseException;
38 import java.util.ArrayList;
39 import java.util.LinkedHashMap;
40 import java.util.Map;
41 import java.util.logging.Level;
42 import java.util.logging.Logger;
43 import java.util.zip.GZIPInputStream;
44 import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
45 import org.apache.lucene.document.DateTools;
46 import org.apache.lucene.document.Document;
47 import org.apache.lucene.index.CorruptIndexException;
48 import org.apache.lucene.index.IndexableField;
49 import org.apache.lucene.search.IndexSearcher;
50 import org.apache.lucene.search.ScoreDoc;
51 import org.opengrok.indexer.analysis.AbstractAnalyzer;
52 import org.opengrok.indexer.analysis.Definitions;
53 import org.opengrok.indexer.analysis.Scopes;
54 import org.opengrok.indexer.configuration.Project;
55 import org.opengrok.indexer.configuration.RuntimeEnvironment;
56 import org.opengrok.indexer.history.HistoryException;
57 import org.opengrok.indexer.logger.LoggerFactory;
58 import org.opengrok.indexer.search.context.HistoryContext;
59 import org.opengrok.indexer.util.IOUtils;
60 import org.opengrok.indexer.util.TandemPath;
61 import org.opengrok.indexer.web.Prefix;
62 import org.opengrok.indexer.web.SearchHelper;
63 import org.opengrok.indexer.web.Util;
64 import org.opengrok.indexer.web.messages.MessagesUtils;
65 
66 /**
67  * @author Chandan slightly rewritten by Lubos Kosco
68  */
69 public final class Results {
70 
71     private static final Logger LOGGER = LoggerFactory.getLogger(Results.class);
72 
Results()73     private Results() {
74         // Util class, should not be constructed
75     }
76 
77     /**
78      * Create a has map keyed by the directory of the document found.
79      *
80      * @param searcher searcher to use.
81      * @param hits hits produced by the given searcher's search
82      * @param startIdx the index of the first hit to check
83      * @param stopIdx the index of the last hit to check
84      * @return a (directory, hitDocument) hashmap
85      * @throws CorruptIndexException
86      * @throws IOException
87      */
createMap( IndexSearcher searcher, ScoreDoc[] hits, int startIdx, long stopIdx)88     private static Map<String, ArrayList<Integer>> createMap(
89         IndexSearcher searcher, ScoreDoc[] hits, int startIdx, long stopIdx)
90             throws CorruptIndexException, IOException {
91 
92         LinkedHashMap<String, ArrayList<Integer>> dirHash =
93                 new LinkedHashMap<>();
94         for (int i = startIdx; i < stopIdx; i++) {
95             int docId = hits[i].doc;
96             Document doc = searcher.doc(docId);
97 
98             String rpath = doc.get(QueryBuilder.PATH);
99             if (rpath == null) {
100                 continue;
101             }
102 
103             String parent = rpath.substring(0, rpath.lastIndexOf('/'));
104             ArrayList<Integer> dirDocs = dirHash.computeIfAbsent(parent, k -> new ArrayList<>());
105             dirDocs.add(docId);
106         }
107         return dirHash;
108     }
109 
getTags(File basedir, String path, boolean compressed)110     private static String getTags(File basedir, String path, boolean compressed) {
111         char[] content = new char[1024 * 8];
112         try (HTMLStripCharFilter r = new HTMLStripCharFilter(getXrefReader(basedir, path, compressed))) {
113             int len = r.read(content);
114             return new String(content, 0, len);
115         } catch (Exception e) {
116             String fnm = compressed ? TandemPath.join(basedir + path, ".gz") :
117                     basedir + path;
118             LOGGER.log(Level.WARNING, "An error reading tags from " + fnm, e);
119         }
120         return "";
121     }
122 
123     /** Return a reader for the specified xref file. */
getXrefReader( File basedir, String path, boolean compressed)124     private static Reader getXrefReader(
125                     File basedir, String path, boolean compressed)
126             throws IOException {
127         /*
128          * For backward compatibility, read the OpenGrok-produced document
129          * using the system default charset.
130          */
131         if (compressed) {
132             return new BufferedReader(IOUtils.createBOMStrippedReader(
133                     new GZIPInputStream(new FileInputStream(new File(basedir,
134                             TandemPath.join(path, ".gz"))))));
135         } else {
136             return new BufferedReader(IOUtils.createBOMStrippedReader(
137                     new FileInputStream(new File(basedir, path))));
138         }
139     }
140 
141     /**
142      * Prints out results in html form. The following search helper fields are
143      * required to be properly initialized: <ul>
144      * <li>{@link SearchHelper#dataRoot}</li>
145      * <li>{@link SearchHelper#contextPath}</li>
146      * <li>{@link SearchHelper#searcher}</li> <li>{@link SearchHelper#hits}</li>
147      * <li>{@link SearchHelper#historyContext} (ignored if {@code null})</li>
148      * <li>{@link SearchHelper#sourceContext} (ignored if {@code null})</li>
149      * <li>{@link SearchHelper#summarizer} (if sourceContext is not
150      * {@code null})</li> <li>{@link SearchHelper#sourceRoot} (if
151      * sourceContext or historyContext is not {@code null})</li> </ul>
152      *
153      * @param out write destination
154      * @param sh search helper which has all required fields set
155      * @param start index of the first hit to print
156      * @param end index of the last hit to print
157      * @throws HistoryException history exception
158      * @throws IOException I/O exception
159      * @throws ClassNotFoundException class not found
160      */
prettyPrint(Writer out, SearchHelper sh, int start, long end)161     public static void prettyPrint(Writer out, SearchHelper sh, int start,
162             long end)
163             throws HistoryException, IOException, ClassNotFoundException {
164         Project p;
165         String contextPath = sh.getContextPath();
166         String ctxE = Util.uriEncodePath(contextPath);
167         String xrefPrefix = contextPath + Prefix.XREF_P;
168         String morePrefix = contextPath + Prefix.MORE_P;
169         String xrefPrefixE = ctxE + Prefix.XREF_P;
170         File xrefDataDir = new File(sh.getDataRoot(), Prefix.XREF_P.toString());
171 
172         RuntimeEnvironment env = RuntimeEnvironment.getInstance();
173 
174         boolean evenRow = true;
175         out.write("<tbody class=\"search-result\">");
176         for (Map.Entry<String, ArrayList<Integer>> entry :
177                 createMap(sh.getSearcher(), sh.getHits(), start, end).entrySet()) {
178             String parent = entry.getKey();
179             out.write("<tr class=\"dir\"><td colspan=\"3\"><a href=\"");
180             out.write(xrefPrefixE);
181             out.write(Util.uriEncodePath(parent));
182             out.write("/\">");
183             out.write(htmlize(parent));
184             out.write("/</a>");
185             if (sh.getDesc() != null) {
186                 out.write(" - <i>");
187                 out.write(sh.getDesc().get(parent));
188                 out.write("</i>");
189             }
190 
191             p = Project.getProject(parent);
192             String messages = MessagesUtils.messagesToJson(p, MESSAGES_MAIN_PAGE_TAG);
193             if (p != null && !messages.isEmpty()) {
194                 out.write(" <a href=\"" + xrefPrefix + "/" + p.getName() + "\">");
195                 out.write("<span class=\"note-" + MessagesUtils.getMessageLevel(p.getName(), MESSAGES_MAIN_PAGE_TAG) +
196                         " important-note important-note-rounded\" data-messages='" + messages + "'>!</span>");
197                 out.write("</a>");
198             }
199 
200             int tabSize = sh.getTabSize(p);
201             PrintPlainFinalArgs fargs = new PrintPlainFinalArgs(out, sh, env,
202                 xrefPrefix, tabSize, morePrefix);
203 
204             out.write("</td></tr>");
205             for (int docId : entry.getValue()) {
206                 Document doc = sh.getSearcher().doc(docId);
207                 String rpath = doc.get(QueryBuilder.PATH);
208                 String rpathE = Util.uriEncodePath(rpath);
209                 if (evenRow) {
210                     out.write("<tr class=\"search-result-even-row\">");
211                 } else {
212                     out.write("<tr>");
213                 }
214                 evenRow = !evenRow;
215                 Util.writeHAD(out, sh.getContextPath(), rpathE, false);
216                 out.write("<td class=\"f\"><a href=\"");
217                 out.write(xrefPrefixE);
218                 out.write(rpathE);
219                 out.write("\"");
220                 if (env.isLastEditedDisplayMode()) {
221                     printLastEditedDate(out, doc);
222                 }
223                 out.write(">");
224                 out.write(htmlize(rpath.substring(rpath.lastIndexOf('/') + 1)));
225                 out.write("</a>");
226                 out.write("</td><td><code class=\"con\">");
227                 if (sh.getSourceContext() != null) {
228                     AbstractAnalyzer.Genre genre = AbstractAnalyzer.Genre.get(
229                             doc.get(QueryBuilder.T));
230                     Summarizer summarizer = sh.getSummarizer();
231                     if (AbstractAnalyzer.Genre.XREFABLE == genre && summarizer != null) {
232                         String xtags = getTags(xrefDataDir, rpath, env.isCompressXref());
233                         // FIXME use Highlighter from lucene contrib here,
234                         // instead of summarizer, we'd also get rid of
235                         // apache lucene in whole source ...
236                         out.write(summarizer.getSummary(xtags).toString());
237                     } else if (AbstractAnalyzer.Genre.HTML == genre && summarizer != null) {
238                         String htags = getTags(sh.getSourceRoot(), rpath, false);
239                         out.write(summarizer.getSummary(htags).toString());
240                     } else if (genre == AbstractAnalyzer.Genre.PLAIN) {
241                         printPlain(fargs, doc, docId, rpath);
242                     }
243                 }
244 
245                 HistoryContext historyContext = sh.getHistoryContext();
246                 if (historyContext != null) {
247                     historyContext.getContext(new File(sh.getSourceRoot(), rpath),
248                             rpath, out, sh.getContextPath());
249                 }
250                 out.write("</code></td></tr>\n");
251             }
252         }
253         out.write("</tbody>");
254     }
255 
printLastEditedDate(final Writer out, final Document doc)256     private static void printLastEditedDate(final Writer out, final Document doc) throws IOException {
257         try {
258             DateFormat df = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT);
259             String dd = df.format(DateTools.stringToDate(doc.get("date")));
260             out.write(" class=\"result-annotate\" title=\"");
261             out.write("Last modified: ");
262             out.write(dd);
263             out.write("\"");
264         } catch (ParseException ex) {
265             LOGGER.log(Level.WARNING, "An error parsing date information", ex);
266         }
267     }
268 
printPlain(PrintPlainFinalArgs fargs, Document doc, int docId, String rpath)269     private static void printPlain(PrintPlainFinalArgs fargs, Document doc,
270         int docId, String rpath) throws ClassNotFoundException, IOException {
271 
272         fargs.shelp.getSourceContext().toggleAlt();
273 
274         boolean didPresentNew = fargs.shelp.getSourceContext().getContext2(fargs.env,
275             fargs.shelp.getSearcher(), docId, fargs.out, fargs.xrefPrefix,
276             fargs.morePrefix, true, fargs.tabSize);
277 
278         if (!didPresentNew) {
279             /*
280              * Fall back to the old view, which re-analyzes text using
281              * PlainLinetokenizer. E.g., when source code is updated (thus
282              * affecting timestamps) but re-indexing is not yet complete.
283              */
284             Definitions tags = null;
285             IndexableField tagsField = doc.getField(QueryBuilder.TAGS);
286             if (tagsField != null) {
287                 tags = Definitions.deserialize(tagsField.binaryValue().bytes);
288             }
289             Scopes scopes;
290             IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
291             if (scopesField != null) {
292                 scopes = Scopes.deserialize(scopesField.binaryValue().bytes);
293             } else {
294                 scopes = new Scopes();
295             }
296             boolean isDefSearch = fargs.shelp.getBuilder().isDefSearch();
297             // SRCROOT is read with UTF-8 as a default.
298             File sourceFile = new File(fargs.shelp.getSourceRoot(), rpath);
299             try (FileInputStream fis = new FileInputStream(sourceFile);
300                  Reader r = IOUtils.createBOMStrippedReader(fis, StandardCharsets.UTF_8.name())) {
301                 fargs.shelp.getSourceContext().getContext(r, fargs.out,
302                     fargs.xrefPrefix, fargs.morePrefix, rpath, tags, true,
303                     isDefSearch, null, scopes);
304             } catch (IOException ex) {
305                 String errMsg = String.format("No context for %s", sourceFile);
306                 if (LOGGER.isLoggable(Level.FINE)) {
307                     // WARNING but with FINE detail
308                     LOGGER.log(Level.WARNING, errMsg, ex);
309                 } else {
310                     LOGGER.log(Level.WARNING, errMsg);
311                 }
312             }
313         }
314     }
315 
htmlize(String raw)316     private static String htmlize(String raw) {
317         return Util.htmlize(raw);
318     }
319 
320     private static class PrintPlainFinalArgs {
321         final Writer out;
322         final SearchHelper shelp;
323         final RuntimeEnvironment env;
324         final String xrefPrefix;
325         final String morePrefix;
326         final int tabSize;
327 
PrintPlainFinalArgs(Writer out, SearchHelper shelp, RuntimeEnvironment env, String xrefPrefix, int tabSize, String morePrefix)328         PrintPlainFinalArgs(Writer out, SearchHelper shelp,
329                 RuntimeEnvironment env, String xrefPrefix, int tabSize,
330                 String morePrefix) {
331             this.out = out;
332             this.shelp = shelp;
333             this.env = env;
334             this.xrefPrefix = xrefPrefix;
335             this.morePrefix = morePrefix;
336             this.tabSize = tabSize;
337         }
338     }
339 }
340