xref: /OpenGrok/opengrok-indexer/src/main/jflex/analysis/sh/ShXref.lex (revision d219b4cea555a12b602d2d5518daa22134ad4879)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>.
23  */
24 
25 package org.opengrok.indexer.analysis.sh;
26 
27 import java.io.IOException;
28 import java.util.Stack;
29 import org.opengrok.indexer.analysis.JFlexSymbolMatcher;
30 import org.opengrok.indexer.util.StringUtils;
31 import org.opengrok.indexer.web.HtmlConsts;
32 %%
33 %public
34 %class ShXref
35 %extends JFlexSymbolMatcher
36 %unicode
37 %int
38 %char
39 %include ../CommonLexer.lexh
40 %include ../CommonXref.lexh
41 %{
42   private final Stack<String> styleStack = new Stack<String>();
43 
44   // State variables for the HEREDOC state. They tell what the stop word is,
45   // and whether leading tabs should be removed from the input lines before
46   // comparing with the stop word.
47   private String heredocStopWord;
48   private boolean heredocStripLeadingTabs;
49 
50   /**
51    * Resets the sh tracked state; {@inheritDoc}
52    */
53   @Override
reset()54   public void reset() {
55       super.reset();
56       heredocStopWord = null;
57       heredocStripLeadingTabs = false;
58   }
59 
60   @Override
clearStack()61   protected void clearStack() {
62       super.clearStack();
63       styleStack.clear();
64   }
65 
pushSpan(int newState,String className)66   public void pushSpan(int newState, String className) throws IOException {
67       onDisjointSpanChanged(className, yychar);
68       yypush(newState);
69       styleStack.push(className);
70   }
71 
72   @Override
yypop()73   public void yypop() throws IOException {
74       onDisjointSpanChanged(null, yychar);
75       super.yypop();
76       styleStack.pop();
77 
78       if (!styleStack.empty()) {
79           String style = styleStack.peek();
80           onDisjointSpanChanged(style, yychar);
81       }
82   }
83 
84   /**
85    * Check the contents of a line to see if it matches the stop word for a
86    * here-document.
87    *
88    * @param line a line in the input file
89    * @return true if the line terminates a here-document, false otherwise
90    */
isHeredocStopWord(String line)91   private boolean isHeredocStopWord(String line) {
92     // Skip leading tabs if heredocStripLeadingTabs is true.
93     int i = 0;
94     while (heredocStripLeadingTabs &&
95               i < line.length() && line.charAt(i) == '\t') {
96       i++;
97     }
98 
99     // Compare remaining characters on the line with the stop word.
100     return line.substring(i).equals(heredocStopWord);
101   }
102 
chkLOC()103   protected void chkLOC() {
104       switch (yystate()) {
105           case SCOMMENT:
106               break;
107           default:
108               phLOC();
109               break;
110       }
111   }
112 %}
113 
114 File = {FNameChar}+ "." ([a-zA-Z]+)
115 
116 /*
117  * States:
118  * STRING - double-quoted string, ex: "hello, world!"
119  * SCOMMENT - single-line comment, ex: # this is a comment
120  * QSTRING - single-quoted string, ex: 'hello, world!'
121  * SUBSHELL - commands executed in a sub-shell,
122  *               example 1: (echo $header; cat file.txt)
123  *               example 2 (command substitution): $(cat file.txt)
124  * BACKQUOTE - command substitution using back-quotes, ex: `cat file.txt`
125  * BRACEGROUP - group of commands in braces, possibly ksh command substitution
126  *              extension, ex: ${ cat file.txt; }
127  * HEREDOC - here-document, example: cat<<EOF ... EOF
128  */
129 %state STRING SCOMMENT QSTRING SUBSHELL BACKQUOTE BRACEGROUP HEREDOC
130 
131 %include ../Common.lexh
132 %include ../CommonURI.lexh
133 %include ../CommonPath.lexh
134 %include ../CommonLaxFPath.lexh
135 %include Sh.lexh
136 %%
137 <STRING>{
138  "$" {Identifier}    {
139     chkLOC();
140     String id = yytext();
141     onRefsTermMatched(id, yychar);
142  }
143 
144   /* This rule matches associative arrays inside strings,
145      for instance "${array["string"]}". Push a new STRING
146      state on the stack to prevent premature exit from the
147      STRING state. */
148   \$\{ {Identifier} \[\"    {
149     chkLOC();
150     onNonSymbolMatched(yytext(), yychar);
151     pushSpan(STRING, HtmlConsts.STRING_CLASS);
152   }
153 }
154 
155 <YYINITIAL, SUBSHELL, BACKQUOTE, BRACEGROUP> {
156 \$ ? {Identifier}    {
157     chkLOC();
158     String id = yytext();
159     onFilteredSymbolMatched(id, yychar, Consts.shkwd);
160  }
161 
162 {Number}        {
163     chkLOC();
164     String lastClassName = getDisjointSpanClassName();
165     onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar);
166     onNonSymbolMatched(yytext(), yychar);
167     onDisjointSpanChanged(lastClassName, yychar);
168  }
169 
170  \$ ? \"    {
171     chkLOC();
172     pushSpan(STRING, HtmlConsts.STRING_CLASS);
173     onNonSymbolMatched(yytext(), yychar);
174  }
175  \$ ? \'    {
176     chkLOC();
177     pushSpan(QSTRING, HtmlConsts.STRING_CLASS);
178     onNonSymbolMatched(yytext(), yychar);
179  }
180  "#"     {
181     pushSpan(SCOMMENT, HtmlConsts.COMMENT_CLASS);
182     onNonSymbolMatched(yytext(), yychar);
183  }
184 
185  // Recognize here-documents. At least a subset of them.
186  "<<" "-"? {WhspChar}* {Identifier} {WhspChar}*    {
187    chkLOC();
188    String text = yytext();
189    onNonSymbolMatched(text, yychar);
190 
191    heredocStripLeadingTabs = (text.charAt(2) == '-');
192    heredocStopWord = text.substring(heredocStripLeadingTabs ? 3 : 2).trim();
193    pushSpan(HEREDOC, HtmlConsts.STRING_CLASS);
194  }
195 
196  // Any sequence of more than two < characters should not start HEREDOC. Use
197  // this rule to catch them before the HEREDOC rule.
198  "<<" "<" +    {
199    chkLOC();
200    onNonSymbolMatched(yytext(), yychar);
201  }
202 
203  {Unary_op_req_lookahead} / \W    {
204     chkLOC();
205     onNonSymbolMatched(yytext(), yychar);
206  }
207  {Unary_op_req_lookahead} $    {
208     chkLOC();
209     onNonSymbolMatched(yytext(), yychar);
210  }
211  {WhspChar}+ {Unary_op_char} / ")"    {
212     chkLOC();
213     onNonSymbolMatched(yytext(), yychar);
214  }
215  {Binary_op}    {
216     chkLOC();
217     onNonSymbolMatched(yytext(), yychar);
218  }
219 }
220 
221 <STRING> {
222  \\[\"\$\`\\] |
223  \" {WhspChar}* \"    { chkLOC(); onNonSymbolMatched(yytext(), yychar); }
224  \"     { chkLOC(); onNonSymbolMatched(yytext(), yychar); yypop(); }
225  \$\(    {
226     chkLOC();
227     pushSpan(SUBSHELL, null);
228     onNonSymbolMatched(yytext(), yychar);
229  }
230  [`]    {
231     chkLOC();
232     pushSpan(BACKQUOTE, null);
233     onNonSymbolMatched(yytext(), yychar);
234  }
235 
236  /* Bug #15661: Recognize ksh command substitution within strings. According
237   * to ksh man page http://www2.research.att.com/~gsf/man/man1/ksh-man.html#Command%20Substitution
238   * the opening brace must be followed by a blank.
239   */
240  "${" / {WhspChar} | {EOL}    {
241     chkLOC();
242     pushSpan(BRACEGROUP, null);
243     onNonSymbolMatched(yytext(), yychar);
244  }
245 }
246 
247 <QSTRING> {
248  \\[\'] |
249  \' {WhspChar}* \'    { chkLOC(); onNonSymbolMatched(yytext(), yychar); }
250  \'    { chkLOC(); onNonSymbolMatched(yytext(), yychar); yypop(); }
251 }
252 
253 <SCOMMENT> {
254 {WhspChar}*{EOL}    {
255     yypop();
256     onEndOfLineMatched(yytext(), yychar);
257  }
258 }
259 
260 <SUBSHELL> {
261   \)   { chkLOC(); onNonSymbolMatched(yytext(), yychar); yypop(); }
262 }
263 
264 <BACKQUOTE> {
265   [`]    { chkLOC(); onNonSymbolMatched(yytext(), yychar); yypop(); }
266 }
267 
268 <BRACEGROUP> {
269  /* Bug #15661: Terminate a ksh brace group. According to ksh man page
270   * http://www2.research.att.com/~gsf/man/man1/ksh-man.html#Command%20Substitution
271   * the closing brace must be on beginning of line, or it must be preceded by
272   * a semi-colon and (optionally) whitespace.
273   */
274   ^ {WhspChar}* \}    {
275     chkLOC();
276     onNonSymbolMatched(yytext(), yychar);
277     yypop();
278   }
279   ; {WhspChar}* \}    {
280     chkLOC();
281     onNonSymbolMatched(yytext(), yychar);
282     yypop();
283   }
284 }
285 
286 <HEREDOC> {
287   [^\s]+    {
288     chkLOC();
289     String line = yytext();
290     if (isHeredocStopWord(line)) {
291       yypop();
292     }
293     onNonSymbolMatched(line, yychar);
294   }
295 
296   {EOL}    { onEndOfLineMatched(yytext(), yychar); }
297   \s    { onNonSymbolMatched(yytext(), yychar); }
298 }
299 
300 <YYINITIAL, SUBSHELL, BACKQUOTE, BRACEGROUP> {
301   /* Don't enter new state if special character is escaped. */
302   \\[`\)\(\{\"\'\$\#\\]    { chkLOC(); onNonSymbolMatched(yytext(), yychar); }
303 
304   /* $# should not start a comment. */
305   "$#"    { chkLOC(); onNonSymbolMatched(yytext(), yychar); }
306 
307   \$ ? \(    {
308     chkLOC();
309     pushSpan(SUBSHELL, null);
310     onNonSymbolMatched(yytext(), yychar);
311   }
312   [`]    {
313     chkLOC();
314     pushSpan(BACKQUOTE, null);
315     onNonSymbolMatched(yytext(), yychar);
316   }
317 
318  /* Bug #15661: Recognize ksh command substitution within strings. According
319   * to ksh man page http://www2.research.att.com/~gsf/man/man1/ksh-man.html#Command%20Substitution
320   * the opening brace must be followed by a blank. Make the initial dollar sign
321   * optional so that we get the nesting right and don't terminate the brace
322   * group too early if the ${ cmd; } expression contains nested { cmd; } groups.
323   */
324   \$ ? \{ / {WhspChar} | {EOL}    {
325     chkLOC();
326     pushSpan(BRACEGROUP, null);
327     onNonSymbolMatched(yytext(), yychar);
328   }
329 }
330 
331 <YYINITIAL, SUBSHELL, BACKQUOTE, BRACEGROUP, STRING, SCOMMENT, QSTRING> {
332 {File}    {
333     chkLOC();
334     String path = yytext();
335     onFilelikeMatched(path, yychar);
336 }
337 
338 {RelaxedMiddleFPath}    {
339     chkLOC();
340     onPathlikeMatched(yytext(), '/', false, yychar);
341  }
342 
343 {WhspChar}*{EOL}    { onEndOfLineMatched(yytext(), yychar); }
344 [[\s]--[\n]]    { onNonSymbolMatched(yytext(), yychar); }
345 [^\n]    { chkLOC(); onNonSymbolMatched(yytext(), yychar); }
346 }
347 
348 <STRING, SCOMMENT, QSTRING> {
349 {FNameChar}+ "@" {FNameChar}+ "." {FNameChar}+    {
350           chkLOC();
351           onEmailAddressMatched(yytext(), yychar);
352         }
353 }
354 
355 <STRING, SCOMMENT> {
356     {BrowseableURI}    {
357         chkLOC();
358         onUriMatched(yytext(), yychar);
359     }
360 }
361 
362 <QSTRING> {
363     {BrowseableURI}    {
364         chkLOC();
365         onUriMatched(yytext(), yychar, StringUtils.APOS_NO_BSESC);
366     }
367 }
368