1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. 22 * Portions Copyright (c) 2017-2018, Chris Fraire <cfraire@me.com>. 23 */ 24 25 package org.opengrok.indexer.analysis.powershell; 26 27 import java.util.Locale; 28 import java.util.regex.Matcher; 29 import org.opengrok.indexer.analysis.JFlexSymbolMatcher; 30 %% 31 %public 32 %class PoshSymbolTokenizer 33 %extends JFlexSymbolMatcher 34 %unicode 35 %ignorecase 36 %int 37 %include ../CommonLexer.lexh 38 %char 39 %{ onCertainlyPublish(String symbol,int yyoffset)40 private boolean onCertainlyPublish(String symbol, int yyoffset) { 41 return onPossiblyPublish(symbol, yyoffset, true); 42 } 43 onPossiblyPublish(String symbol,int yyoffset)44 private boolean onPossiblyPublish(String symbol, int yyoffset) { 45 return onPossiblyPublish(symbol, yyoffset, false); 46 } 47 onPossiblyPublish(String symbol,int yyoffset,boolean skipKeywordCheck)48 private boolean onPossiblyPublish(String symbol, int yyoffset, 49 boolean skipKeywordCheck) { 50 if (skipKeywordCheck || !Consts.poshkwd.contains(symbol. 51 toLowerCase(Locale.ROOT))) { 52 onSymbolMatched(symbol, yychar + yyoffset); 53 return true; 54 } 55 return false; 56 } 57 %} 58 59 /* 60 * States: 61 * STRING - double-quoted string, ex: "hello, world!" 62 * QSTRING - single-quoted string, ex: 'hello, world!' 63 * COMMENT - multiple-line comment. 64 * SCOMMENT - single-line comment, ex: # this is a comment 65 * SUBSHELL - commands executed in a sub-shell, 66 * example 1: (echo $header; cat file.txt) 67 * HERESTRING - here-string, example: cat @" ... "@ 68 * HEREQSTRING - here-string, example: cat @' ... '@ 69 * DATATYPE - bracketed .NET datatype specification 70 * DOTSYNTAX - await possible dot syntax -- e.g. property or methods 71 */ 72 %state STRING COMMENT SCOMMENT QSTRING SUBSHELL HERESTRING HEREQSTRING 73 %state DATATYPE DOTSYNTAX 74 75 %include ../Common.lexh 76 %include Powershell.lexh 77 %% 78 79 <STRING> { 80 {ComplexVariable} { 81 int startOffset = 2; // trim away the "${" prefix 82 int endOffset = yylength() - 1; // trim away the "}" suffix 83 String id = yytext().substring(startOffset, endOffset); 84 if (onPossiblyPublish(id, startOffset)) return yystate(); 85 } 86 {SimpleVariable} { 87 int startOffset = 1; // trim away the "$" prefix 88 String id = yytext().substring(startOffset); 89 if (onPossiblyPublish(id, startOffset)) return yystate(); 90 } 91 } 92 93 <YYINITIAL, SUBSHELL> { 94 ^ {Label} { 95 String id = yytext(); 96 if (onPossiblyPublish(id, 0)) return yystate(); 97 } 98 {Break} | 99 {Continue} { 100 String capture = yytext(); 101 Matcher m = PoshUtils.GOTO_LABEL.matcher(capture); 102 if (m.find()) { 103 String label = m.group(3); 104 onCertainlyPublish(label, m.start(3)); 105 return yystate(); 106 } 107 } 108 109 {DataType} { 110 yypushback(yylength()); 111 yypush(DATATYPE); 112 } 113 } 114 115 <YYINITIAL, SUBSHELL, DOTSYNTAX> { 116 {ComplexVariable} { 117 int startOffset = 2; // trim away the "${" prefix 118 String id = yytext().substring(startOffset, yylength() - 1); 119 if (onPossiblyPublish(id, startOffset)) return yystate(); 120 if (yystate() != DOTSYNTAX) yypush(DOTSYNTAX); 121 } 122 {SimpleVariable} { 123 int startOffset = 1; // trim away the "$" prefix 124 String id = yytext().substring(startOffset); 125 if (onPossiblyPublish(id, startOffset)) return yystate(); 126 if (yystate() != DOTSYNTAX) yypush(DOTSYNTAX); 127 } 128 } 129 130 <YYINITIAL, SUBSHELL> { 131 {Operator} { 132 String capture = yytext(); 133 int startOffset = 1; // trim away the "-" prefix 134 String id = capture.substring(startOffset); 135 if (!Consts.poshkwd.contains(capture.toLowerCase(Locale.ROOT)) && 136 onPossiblyPublish(id, startOffset)) { 137 return yystate(); 138 } 139 } 140 141 {Number} {} 142 143 \" { yypush(STRING); } 144 \' { yypush(QSTRING); } 145 "#" { yypush(SCOMMENT); } 146 "<#" { yypush(COMMENT); } 147 \@\" { yypush(HERESTRING); } 148 \@\' { yypush(HEREQSTRING); } 149 } 150 151 <DOTSYNTAX> { 152 "." { 153 // noop 154 } 155 156 [^] { 157 yypushback(yylength()); 158 yypop(); 159 } 160 } 161 162 <YYINITIAL, SUBSHELL, DATATYPE, DOTSYNTAX> { 163 {Identifier} { 164 String id = yytext(); 165 if (onPossiblyPublish(id, 0)) return yystate(); 166 } 167 } 168 169 <DATATYPE> { 170 "]" { 171 yypushback(yylength()); 172 yypop(); 173 } 174 } 175 176 <STRING> { 177 [`][\"\$`] | 178 \"\" {} 179 180 \$? \" { yypop(); } 181 } 182 183 <STRING, HERESTRING> { 184 "$(" { yypush(SUBSHELL); } 185 } 186 187 <QSTRING> { 188 \'\' {} 189 \' { yypop(); } 190 } 191 192 <COMMENT> { 193 "#>" { yypop();} 194 } 195 196 <SCOMMENT> { 197 {EOL} { yypop();} 198 } 199 200 <SUBSHELL> { 201 \) { yypop(); } 202 } 203 204 <HERESTRING> { 205 "`$" {} 206 207 {SimpleVariable} { 208 int startOffset = 1; // trim away the "$" prefix 209 String id = yytext().substring(startOffset); 210 if (onPossiblyPublish(id, startOffset)) return yystate(); 211 } 212 213 {ComplexVariable} { 214 int startOffset = 2; // trim away the "${" prefix 215 int endOffset = yylength() - 1; // trim away the "}" suffix 216 String id = yytext().substring(startOffset, endOffset); 217 if (onPossiblyPublish(id, startOffset)) return yystate(); 218 } 219 220 ^ \"\@ { yypop(); } 221 } 222 223 <HEREQSTRING> { 224 ^ "'@" { yypop(); } 225 } 226 227 <YYINITIAL, SUBSHELL> { 228 /* Don't enter new state if special character is escaped. */ 229 [`][`\(\)\{\}\"\'\$\#\\] {} 230 231 /* $# should not start a comment. */ 232 "$#" {} 233 234 \$ ? \( { yypush(SUBSHELL); } 235 } 236 237 <YYINITIAL, DATATYPE, SUBSHELL, STRING, COMMENT, SCOMMENT, QSTRING, HERESTRING, 238 HEREQSTRING> { 239 {WhspChar}+ | 240 [^] {} 241 } 242