xref: /OpenGrok/opengrok-indexer/src/main/jflex/analysis/powershell/PowershellSymbolTokenizer.lex (revision d219b4cea555a12b602d2d5518daa22134ad4879)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2017-2018, Chris Fraire <cfraire@me.com>.
23  */
24 
25 package org.opengrok.indexer.analysis.powershell;
26 
27 import java.util.Locale;
28 import java.util.regex.Matcher;
29 import org.opengrok.indexer.analysis.JFlexSymbolMatcher;
30 %%
31 %public
32 %class PoshSymbolTokenizer
33 %extends JFlexSymbolMatcher
34 %unicode
35 %ignorecase
36 %int
37 %include ../CommonLexer.lexh
38 %char
39 %{
onCertainlyPublish(String symbol,int yyoffset)40     private boolean onCertainlyPublish(String symbol, int yyoffset) {
41         return onPossiblyPublish(symbol, yyoffset, true);
42     }
43 
onPossiblyPublish(String symbol,int yyoffset)44     private boolean onPossiblyPublish(String symbol, int yyoffset) {
45         return onPossiblyPublish(symbol, yyoffset, false);
46     }
47 
onPossiblyPublish(String symbol,int yyoffset,boolean skipKeywordCheck)48     private boolean onPossiblyPublish(String symbol, int yyoffset,
49         boolean skipKeywordCheck) {
50         if (skipKeywordCheck || !Consts.poshkwd.contains(symbol.
51                 toLowerCase(Locale.ROOT))) {
52             onSymbolMatched(symbol, yychar + yyoffset);
53             return true;
54         }
55         return false;
56     }
57 %}
58 
59 /*
60  * States:
61  * STRING   - double-quoted string, ex: "hello, world!"
62  * QSTRING  - single-quoted string, ex: 'hello, world!'
63  * COMMENT - multiple-line comment.
64  * SCOMMENT - single-line comment, ex: # this is a comment
65  * SUBSHELL - commands executed in a sub-shell,
66  *               example 1: (echo $header; cat file.txt)
67  * HERESTRING  - here-string, example: cat @" ... "@
68  * HEREQSTRING - here-string, example: cat @' ... '@
69  * DATATYPE - bracketed .NET datatype specification
70  * DOTSYNTAX - await possible dot syntax -- e.g. property or methods
71  */
72 %state STRING COMMENT SCOMMENT QSTRING SUBSHELL HERESTRING HEREQSTRING
73 %state DATATYPE DOTSYNTAX
74 
75 %include ../Common.lexh
76 %include Powershell.lexh
77 %%
78 
79 <STRING> {
80  {ComplexVariable}    {
81     int startOffset = 2;            // trim away the "${" prefix
82     int endOffset = yylength() - 1; // trim away the "}" suffix
83     String id = yytext().substring(startOffset, endOffset);
84     if (onPossiblyPublish(id, startOffset)) return yystate();
85  }
86  {SimpleVariable}    {
87     int startOffset = 1;	// trim away the "$" prefix
88     String id = yytext().substring(startOffset);
89     if (onPossiblyPublish(id, startOffset)) return yystate();
90  }
91 }
92 
93 <YYINITIAL, SUBSHELL> {
94  ^ {Label}    {
95     String id = yytext();
96     if (onPossiblyPublish(id, 0)) return yystate();
97  }
98  {Break} |
99  {Continue}    {
100     String capture = yytext();
101     Matcher m = PoshUtils.GOTO_LABEL.matcher(capture);
102     if (m.find()) {
103         String label   = m.group(3);
104         onCertainlyPublish(label, m.start(3));
105         return yystate();
106     }
107  }
108 
109  {DataType}    {
110     yypushback(yylength());
111     yypush(DATATYPE);
112  }
113 }
114 
115 <YYINITIAL, SUBSHELL, DOTSYNTAX> {
116  {ComplexVariable}    {
117     int startOffset = 2;	// trim away the "${" prefix
118     String id = yytext().substring(startOffset, yylength() - 1);
119     if (onPossiblyPublish(id, startOffset)) return yystate();
120     if (yystate() != DOTSYNTAX) yypush(DOTSYNTAX);
121  }
122  {SimpleVariable}    {
123     int startOffset = 1;	// trim away the "$" prefix
124     String id = yytext().substring(startOffset);
125     if (onPossiblyPublish(id, startOffset)) return yystate();
126     if (yystate() != DOTSYNTAX) yypush(DOTSYNTAX);
127  }
128 }
129 
130 <YYINITIAL, SUBSHELL> {
131  {Operator}    {
132     String capture = yytext();
133     int startOffset = 1;	// trim away the "-" prefix
134     String id = capture.substring(startOffset);
135     if (!Consts.poshkwd.contains(capture.toLowerCase(Locale.ROOT)) &&
136             onPossiblyPublish(id, startOffset)) {
137         return yystate();
138     }
139  }
140 
141  {Number}    {}
142 
143  \"     { yypush(STRING); }
144  \'     { yypush(QSTRING); }
145  "#"    { yypush(SCOMMENT); }
146  "<#"   { yypush(COMMENT); }
147  \@\"   { yypush(HERESTRING); }
148  \@\'   { yypush(HEREQSTRING); }
149 }
150 
151 <DOTSYNTAX> {
152  "."    {
153     // noop
154  }
155 
156  [^]    {
157     yypushback(yylength());
158     yypop();
159  }
160 }
161 
162 <YYINITIAL, SUBSHELL, DATATYPE, DOTSYNTAX> {
163  {Identifier}    {
164     String id = yytext();
165     if (onPossiblyPublish(id, 0)) return yystate();
166  }
167 }
168 
169 <DATATYPE> {
170  "]"    {
171     yypushback(yylength());
172     yypop();
173  }
174 }
175 
176 <STRING> {
177  [`][\"\$`] |
178  \"\"    {}
179 
180  \$? \"     { yypop(); }
181 }
182 
183 <STRING, HERESTRING> {
184  "$("    { yypush(SUBSHELL); }
185 }
186 
187 <QSTRING> {
188  \'\'    {}
189  \'      { yypop(); }
190 }
191 
192 <COMMENT> {
193  "#>"    { yypop();}
194 }
195 
196 <SCOMMENT> {
197  {EOL}   { yypop();}
198 }
199 
200 <SUBSHELL> {
201   \)    { yypop(); }
202 }
203 
204 <HERESTRING> {
205   "`$"    {}
206 
207  {SimpleVariable}    {
208     int startOffset = 1;	// trim away the "$" prefix
209     String id = yytext().substring(startOffset);
210     if (onPossiblyPublish(id, startOffset)) return yystate();
211  }
212 
213  {ComplexVariable}    {
214     int startOffset = 2;            // trim away the "${" prefix
215     int endOffset = yylength() - 1; // trim away the "}" suffix
216     String id = yytext().substring(startOffset, endOffset);
217     if (onPossiblyPublish(id, startOffset)) return yystate();
218  }
219 
220  ^ \"\@     { yypop(); }
221 }
222 
223 <HEREQSTRING> {
224  ^ "'@"     { yypop(); }
225 }
226 
227 <YYINITIAL, SUBSHELL> {
228   /* Don't enter new state if special character is escaped. */
229   [`][`\(\)\{\}\"\'\$\#\\]    {}
230 
231   /* $# should not start a comment. */
232   "$#"    {}
233 
234   \$ ? \(    { yypush(SUBSHELL); }
235 }
236 
237 <YYINITIAL, DATATYPE, SUBSHELL, STRING, COMMENT, SCOMMENT, QSTRING, HERESTRING,
238     HEREQSTRING> {
239 {WhspChar}+ |
240 [^]    {}
241 }
242