xref: /OpenGrok/opengrok-indexer/src/main/jflex/search/context/PlainLineTokenizer.lex (revision d219b4cea555a12b602d2d5518daa22134ad4879)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2005, 2018, Oracle and/or its affiliates. All rights reserved.
22  */
23 
24 /**
25  * for plain text tokenizers
26  */
27 package org.opengrok.indexer.search.context;
28 
29 import java.io.CharArrayReader;
30 import java.io.IOException;
31 import java.io.Reader;
32 import java.io.Writer;
33 import java.util.List;
34 import java.util.TreeMap;
35 import org.opengrok.indexer.search.Hit;
36 import org.opengrok.indexer.web.Util;
37 import org.opengrok.indexer.analysis.Scopes;
38 import org.opengrok.indexer.analysis.Scopes.Scope;
39 
40 %%
41 
42 %public
43 %class PlainLineTokenizer
44 // This tokenizer relies on JFlex line-counting.
45 %line
46 %unicode
47 %type String
48 %ignorecase
49 %char
50 
51 %{
52   /**
53    * Buffer that holds all the text from the start of the current line, or,
54    * in the case of a match that spans multiple lines, from the start of the
55    * first line part of the matching region.
56    */
57   private final StringBuilder markedContents = new StringBuilder();
58   int markedPos = 0;
59   int curLinePos = 0;
60   int matchStart = -1;
61   int markedLine = 1; // lines are indexed from 1
62   int rest = 0;
63   boolean wait = false;
64   boolean dumpRest = false;
65   Writer out;
66   String url;
67   TreeMap<Integer, String[]> tags;
68   boolean prevHi = false;
69   Integer prevLn = null;
70   List<Hit> hits;
71   Hit hit;
72   StringBuilder sb;
73   boolean alt;
74   Scopes scopes = null;
75 
76   /**
77    * Set the writer that should receive all output
78    * @param out The new writer to write to
79    */
setWriter(Writer out)80   public void setWriter(Writer out) {
81         yyline = 1;
82         this.out = out;
83   }
84 
85   /**
86    * Set the name of the file we are working on (needed if we would like to
87    * generate a list of hits instead of generating html)
88    * @param filename the name of the file
89    */
setFilename(String filename)90   public void setFilename(String filename) {
91      this.url = filename;
92      hit = new Hit(filename, null, null, false, alt);
93   }
94 
95   /**
96    * Set the list we should create Hit objects for
97    * @param hits the hits we should add Hit objects
98    */
setHitList(List<Hit> hits)99   public void setHitList(List<Hit> hits) {
100      this.hits = hits;
101   }
102 
setAlt(boolean alt)103     public void setAlt(boolean alt) {
104         this.alt = alt;
105     }
106 
107 
reInit(char[]buf,int len,Writer out,String url,TreeMap<Integer,String[]> tags,Scopes scopes)108   public void reInit(char[] buf, int len, Writer out, String url, TreeMap<Integer, String[]> tags, Scopes scopes) {
109         reInit(new CharArrayReader(buf, 0, len), out, url, tags, scopes);
110   }
111 
reInit(Reader in,Writer out,String url,TreeMap<Integer,String[]> tags,Scopes scopes)112   public void reInit(Reader in, Writer out, String url, TreeMap<Integer, String[]> tags, Scopes scopes) {
113         yyreset(in);
114 
115         markedContents.setLength(0);
116         wait = false;
117         dumpRest = false;
118         rest = 0;
119         markedPos = 0;
120         curLinePos = 0;
121         matchStart = -1;
122         markedLine = 1;
123         yyline = 1;
124         this.out = out;
125         this.url = url;
126         this.tags = tags;
127         if(this.tags == null) {
128                 this.tags = new TreeMap<Integer, String[]>();
129         }
130         this.scopes = scopes;
131         prevHi = false;
132   }
133 
134   /** Current token could be part of a match. Hold on... */
holdOn()135   public void holdOn() {
136      if(!wait) {
137         wait = true;
138         matchStart = markedContents.length() - yylength();
139      }
140   }
141 
142   /** Not a match after all. */
neverMind()143   public void neverMind() {
144         wait = false;
145         if(!dumpRest) {
146                 markedPos = curLinePos;
147                 markedLine = yyline;
148         }
149         matchStart = -1;
150   }
151 
152 
printWithNum(int start,int end,int lineNo,boolean bold)153   private int printWithNum(int start, int end, int lineNo,
154                            boolean bold) throws IOException {
155         if (bold) {
156             out.write("<b>");
157         }
158 
159         for(int i=start;i<end; i++) {
160                 char ch = markedContents.charAt(i);
161                 switch(ch) {
162                 case '\n':
163                         ++lineNo;
164                         Integer ln = Integer.valueOf(lineNo);
165                         boolean hi = tags.containsKey(ln);
166 
167                         if (bold) {
168                             out.write("</b>");
169                         }
170 
171                         out.write("</a>");
172                         if (prevHi) {
173                                 out.write(" <i> ");
174                                 String[] desc = tags.remove(prevLn);
175                                 out.write(desc[2]);
176                                 out.write(" </i>");
177                         }
178                         out.write("<br/>");
179 
180                         prevHi = hi;
181                         prevLn = ln;
182                         if (hi) out.write("<span class=\"h\">"); //TODO: closing tag - where?
183                         out.write("<a class=\"s\" href=\"");
184                         out.write(url);
185                         String num = String.valueOf(lineNo);
186                         out.write(num);
187                         out.write("\"><span class=\"l\">");
188                         out.write(num);
189                         out.write("</span> ");
190                         if (bold) {
191                             out.write("<b>");
192                         }
193                         break;
194                 case '<':
195                         out.write("&lt;");
196                         break;
197                 case '>':
198                         out.write("&gt;");
199                         break;
200                 case '&':
201                         out.write("&amp;");
202                         break;
203                 default:
204                         out.write(ch);
205                 }
206         }
207 
208         if (bold) {
209             out.write("</b>");
210         }
211 
212         return lineNo;
213   }
214 
formatWithNum(int start,int end,int lineNo)215   private int formatWithNum(int start, int end, int lineNo) {
216         for(int i=start;i<end; i++) {
217                 char ch = markedContents.charAt(i);
218                 switch(ch) {
219                 case '\n':
220                         ++lineNo;
221                         Integer ln = Integer.valueOf(lineNo);
222                         boolean hi = tags.containsKey(ln);
223                         if (prevHi) {
224                            String[] desc = tags.remove(prevLn);
225                            hit.setTag(desc[2]);
226                         }
227                         prevHi = hi;
228                         prevLn = ln;
229                         sb.append(' ');
230                         break;
231                 case '<':
232                         sb.append("&lt;");
233                         break;
234                 case '>':
235                         sb.append("&gt;");
236                         break;
237                 case '&':
238                         sb.append("&amp;");
239                         break;
240                 default:
241                         sb.append(ch);
242                 }
243         }
244         return lineNo;
245   }
246 
247 
printContext()248   public void printContext() throws IOException {
249         if (sb == null) {
250             sb = new StringBuilder();
251         }
252 
253         if (hit == null) {
254            hit = new Hit(url, null, null, false, alt);
255         }
256 
257         wait = false;
258         if (matchStart == -1) {
259                 matchStart = markedContents.length() - yylength();
260         }
261 
262         if (curLinePos == markedPos) {
263                 Integer ln = Integer.valueOf(markedLine);
264                 prevHi = tags.containsKey(ln);
265                 prevLn = ln;
266                 if (prevHi) {
267                         prevLn = ln;
268                 }
269 
270                 if (out != null) {
271                     out.write("<a class=\"s\" href=\"");
272                     out.write(url);
273                     String num = String.valueOf(markedLine);
274                     out.write(num);
275                     out.write("\"><span class=\"l\">");
276                     out.write(num);
277                     out.write("</span> ");
278                 }
279         }
280 
281         if (out != null) {
282            // print first part of line without normal font
283            markedLine = printWithNum(
284                     markedPos, matchStart, markedLine, false);
285            // use bold font for the match
286            markedLine = printWithNum(
287                    matchStart, markedContents.length(), markedLine, true);
288         } else {
289            markedLine = formatWithNum(markedPos, matchStart, markedLine);
290            hit.setLineno(String.valueOf(markedLine));
291            sb.append("<b>");
292            markedLine = formatWithNum(
293                     matchStart, markedContents.length(), markedLine);
294            sb.append("</b>");
295         }
296 
297         // Remove everything up to the start of the current line in the
298         // buffered contents.
299         markedContents.delete(0, curLinePos);
300         curLinePos = 0;
301         markedPos = markedContents.length();
302         matchStart = -1;
303         dumpRest = true;
304         rest = markedPos;
305   }
dumpRest()306   public void dumpRest() throws IOException {
307         if (dumpRest) {
308         final int maxLooks = 100;
309         for (int i=0; ; i++) {
310             final boolean endOfBuffer = (i >= markedContents.length() - rest);
311             final boolean newline = !endOfBuffer && markedContents.charAt(rest+i) == '\n';
312             if (endOfBuffer || newline || i >= maxLooks) {
313                            if (out != null) {
314                                 printWithNum(rest, rest+i-1,
315                                              markedLine, false);
316 
317                 // Assume that this line has been truncated if we don't find
318                 // a newline after looking at maxLooks characters, or if we
319                 // reach the end of the buffer and the size of the buffer is
320                 // Context.MAXFILEREAD (which means that the file has probably
321                 // been truncated).
322                 if (!newline &&
323                       ((i >= maxLooks) ||
324                        (endOfBuffer && (yychar + yylength()) == Context.MAXFILEREAD))) {
325                     out.write(" (&hellip;)");
326                 }
327 
328                                 out.write("</a>");
329 
330                                 Scope scope = null;
331                                 if (scopes != null) {
332                                     scope = scopes.getScope(markedLine-1);
333                                 }
334                                 if (scope != null && scope != scopes.getScope(-1)) {
335                                     out.write(" <a class=\"scope\" href=\"");
336                                     out.write(url);
337                                     out.write(String.valueOf(scope.getLineFrom()));
338                                     out.write("\">in ");
339                                     out.write(scope.getName());
340                                     out.write("()</a> ");
341                                 }
342 
343                                 if (prevHi) {
344                                         out.write(" <i> ");
345                                         String[] desc = tags.remove(prevLn);
346                                         out.write(desc[2]);
347                                         out.write(" </i>");
348                                 }
349                                 out.write("<br/>");
350                            } else {
351                                formatWithNum(rest, rest+i-1, markedLine);
352                                hit.setLine(sb.toString());
353                                if (prevHi) {
354                                   String[] desc = tags.remove(prevLn);
355                                   hit.setTag(desc[2]);
356                                }
357                                hits.add(hit);
358                            }
359                            break;
360                         }
361                 }
362         }
363         if (tags.size() > 0) {
364         if (out != null) {
365            for(Integer rem : tags.keySet()) {
366                 String[] desc = tags.get(rem);
367                 out.write("<a class=\"s\" href=\"");
368                 out.write(url);
369                 out.write(desc[1]);
370                 out.write("\"><span class=\"l\">");
371                 out.write(desc[1]);
372                 out.write("</span> ");
373                 out.write(Util.htmlize(desc[3]).replace(desc[0], "<b>" + desc[0] + "</b>"));
374                 out.write("</a> ");
375                 if (desc[4] != null) {
376                     out.write("<i>");
377                     out.write(desc[4]);
378                     out.write("</i> ");
379                 }
380                 out.write("<i> ");
381                 out.write(desc[2]);
382                 out.write(" </i><br/>");
383            }
384         } else {
385            for(Integer rem : tags.keySet()) {
386                 String[] desc = tags.get(rem);
387                 hit = new Hit(url, "<html>" + Util.htmlize(desc[3]).replace(desc[0], "<b>" + desc[0] + "</b>"),
388                               desc[1], false, alt);
389                 hit.setTag(desc[2]);
390                 hits.add(hit);
391            }
392         }
393         }
394   }
395 %}
396 
397 //WhiteSpace     = [ \t\f\r]+|\n
398 Identifier = [a-zA-Z\p{Letter}_] [a-zA-Z\p{Letter}0-9\p{Number}_]*
399 Number = [0-9]+|[0-9]+\.[0-9]+| "0[xX]" [0-9a-fA-F]+
400 Printable = [\@\$\%\^\&\-+=\?\.\:]
401 
402 
403 %%
404 {Identifier}|{Number}|{Printable}    {
405     String text = yytext();
406     markedContents.append(text);
407     return text;
408 }
409 <<EOF>>   { return null;}
410 
411 \n      {
412                 markedContents.append(yycharat(0));
413                 if(!wait) {
414                         markedPos = markedContents.length();
415                         markedLine = yyline+1;
416                         matchStart = -1;
417                         curLinePos = markedPos;
418                 }
419                 if(dumpRest) {
420                         int endPos = markedContents.length() - yylength();
421                         if (out != null) {
422                            printWithNum(rest, endPos, markedLine, false);
423                            out.write("</a>");
424 
425                            Scope scope = null;
426                            if (scopes != null) {
427                                scope = scopes.getScope(markedLine-1);
428                            }
429                            if (scope != null && scope != scopes.getScope(-1)) {
430                                out.write(" <a class=\"scope\" href=\"");
431                                out.write(url);
432                                out.write(String.valueOf(scope.getLineFrom()));
433                                out.write("\">in ");
434                                out.write(scope.getName());
435                                out.write("()</a> ");
436                            }
437 
438                            if(prevHi){
439                                 out.write(" <i> ");
440                                 String[] desc = tags.remove(prevLn);
441                                 out.write(desc[2]);
442                                 out.write("</i> ");
443                            }
444                            out.write("<br/>");
445                         } else {
446                            formatWithNum(rest, endPos, markedLine);
447                            hit.setLine(sb.toString());
448                            if(prevHi){
449                                 String[] desc = tags.remove(prevLn);
450                                 hit.setTag(desc[2]);
451                            }
452                            hits.add(hit);
453                            sb.setLength(0);
454                            hit = new Hit(url, null, null, false, alt);
455                      }
456                         dumpRest = false;
457 
458                 }
459                 if (!wait) {
460                     // We have dumped the rest of the line, begun a new line,
461                     // and we're not inside a possible match, so it's safe to
462                     // forget the buffered contents.
463                     markedContents.setLength(0);
464                     markedPos = 0;
465                     curLinePos = 0;
466                 }
467         }
468 
469 [^\n]       { markedContents.append(yycharat(0)); }
470