1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. 22 * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>. 23 */ 24 25 package org.opengrok.indexer.analysis.sh; 26 27 import java.io.IOException; 28 import java.util.Stack; 29 import org.opengrok.indexer.analysis.JFlexSymbolMatcher; 30 import org.opengrok.indexer.util.StringUtils; 31 import org.opengrok.indexer.web.HtmlConsts; 32 %% 33 %public 34 %class ShXref 35 %extends JFlexSymbolMatcher 36 %unicode 37 %int 38 %char 39 %include ../CommonLexer.lexh 40 %include ../CommonXref.lexh 41 %{ 42 private final Stack<String> styleStack = new Stack<String>(); 43 44 // State variables for the HEREDOC state. They tell what the stop word is, 45 // and whether leading tabs should be removed from the input lines before 46 // comparing with the stop word. 47 private String heredocStopWord; 48 private boolean heredocStripLeadingTabs; 49 50 /** 51 * Resets the sh tracked state; {@inheritDoc} 52 */ 53 @Override reset()54 public void reset() { 55 super.reset(); 56 heredocStopWord = null; 57 heredocStripLeadingTabs = false; 58 } 59 60 @Override clearStack()61 protected void clearStack() { 62 super.clearStack(); 63 styleStack.clear(); 64 } 65 pushSpan(int newState,String className)66 public void pushSpan(int newState, String className) throws IOException { 67 onDisjointSpanChanged(className, yychar); 68 yypush(newState); 69 styleStack.push(className); 70 } 71 72 @Override yypop()73 public void yypop() throws IOException { 74 onDisjointSpanChanged(null, yychar); 75 super.yypop(); 76 styleStack.pop(); 77 78 if (!styleStack.empty()) { 79 String style = styleStack.peek(); 80 onDisjointSpanChanged(style, yychar); 81 } 82 } 83 84 /** 85 * Check the contents of a line to see if it matches the stop word for a 86 * here-document. 87 * 88 * @param line a line in the input file 89 * @return true if the line terminates a here-document, false otherwise 90 */ isHeredocStopWord(String line)91 private boolean isHeredocStopWord(String line) { 92 // Skip leading tabs if heredocStripLeadingTabs is true. 93 int i = 0; 94 while (heredocStripLeadingTabs && 95 i < line.length() && line.charAt(i) == '\t') { 96 i++; 97 } 98 99 // Compare remaining characters on the line with the stop word. 100 return line.substring(i).equals(heredocStopWord); 101 } 102 chkLOC()103 protected void chkLOC() { 104 switch (yystate()) { 105 case SCOMMENT: 106 break; 107 default: 108 phLOC(); 109 break; 110 } 111 } 112 %} 113 114 File = {FNameChar}+ "." ([a-zA-Z]+) 115 116 /* 117 * States: 118 * STRING - double-quoted string, ex: "hello, world!" 119 * SCOMMENT - single-line comment, ex: # this is a comment 120 * QSTRING - single-quoted string, ex: 'hello, world!' 121 * SUBSHELL - commands executed in a sub-shell, 122 * example 1: (echo $header; cat file.txt) 123 * example 2 (command substitution): $(cat file.txt) 124 * BACKQUOTE - command substitution using back-quotes, ex: `cat file.txt` 125 * BRACEGROUP - group of commands in braces, possibly ksh command substitution 126 * extension, ex: ${ cat file.txt; } 127 * HEREDOC - here-document, example: cat<<EOF ... EOF 128 */ 129 %state STRING SCOMMENT QSTRING SUBSHELL BACKQUOTE BRACEGROUP HEREDOC 130 131 %include ../Common.lexh 132 %include ../CommonURI.lexh 133 %include ../CommonPath.lexh 134 %include ../CommonLaxFPath.lexh 135 %include Sh.lexh 136 %% 137 <STRING>{ 138 "$" {Identifier} { 139 chkLOC(); 140 String id = yytext(); 141 onRefsTermMatched(id, yychar); 142 } 143 144 /* This rule matches associative arrays inside strings, 145 for instance "${array["string"]}". Push a new STRING 146 state on the stack to prevent premature exit from the 147 STRING state. */ 148 \$\{ {Identifier} \[\" { 149 chkLOC(); 150 onNonSymbolMatched(yytext(), yychar); 151 pushSpan(STRING, HtmlConsts.STRING_CLASS); 152 } 153 } 154 155 <YYINITIAL, SUBSHELL, BACKQUOTE, BRACEGROUP> { 156 \$ ? {Identifier} { 157 chkLOC(); 158 String id = yytext(); 159 onFilteredSymbolMatched(id, yychar, Consts.shkwd); 160 } 161 162 {Number} { 163 chkLOC(); 164 String lastClassName = getDisjointSpanClassName(); 165 onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar); 166 onNonSymbolMatched(yytext(), yychar); 167 onDisjointSpanChanged(lastClassName, yychar); 168 } 169 170 \$ ? \" { 171 chkLOC(); 172 pushSpan(STRING, HtmlConsts.STRING_CLASS); 173 onNonSymbolMatched(yytext(), yychar); 174 } 175 \$ ? \' { 176 chkLOC(); 177 pushSpan(QSTRING, HtmlConsts.STRING_CLASS); 178 onNonSymbolMatched(yytext(), yychar); 179 } 180 "#" { 181 pushSpan(SCOMMENT, HtmlConsts.COMMENT_CLASS); 182 onNonSymbolMatched(yytext(), yychar); 183 } 184 185 // Recognize here-documents. At least a subset of them. 186 "<<" "-"? {WhspChar}* {Identifier} {WhspChar}* { 187 chkLOC(); 188 String text = yytext(); 189 onNonSymbolMatched(text, yychar); 190 191 heredocStripLeadingTabs = (text.charAt(2) == '-'); 192 heredocStopWord = text.substring(heredocStripLeadingTabs ? 3 : 2).trim(); 193 pushSpan(HEREDOC, HtmlConsts.STRING_CLASS); 194 } 195 196 // Any sequence of more than two < characters should not start HEREDOC. Use 197 // this rule to catch them before the HEREDOC rule. 198 "<<" "<" + { 199 chkLOC(); 200 onNonSymbolMatched(yytext(), yychar); 201 } 202 203 {Unary_op_req_lookahead} / \W { 204 chkLOC(); 205 onNonSymbolMatched(yytext(), yychar); 206 } 207 {Unary_op_req_lookahead} $ { 208 chkLOC(); 209 onNonSymbolMatched(yytext(), yychar); 210 } 211 {WhspChar}+ {Unary_op_char} / ")" { 212 chkLOC(); 213 onNonSymbolMatched(yytext(), yychar); 214 } 215 {Binary_op} { 216 chkLOC(); 217 onNonSymbolMatched(yytext(), yychar); 218 } 219 } 220 221 <STRING> { 222 \\[\"\$\`\\] | 223 \" {WhspChar}* \" { chkLOC(); onNonSymbolMatched(yytext(), yychar); } 224 \" { chkLOC(); onNonSymbolMatched(yytext(), yychar); yypop(); } 225 \$\( { 226 chkLOC(); 227 pushSpan(SUBSHELL, null); 228 onNonSymbolMatched(yytext(), yychar); 229 } 230 [`] { 231 chkLOC(); 232 pushSpan(BACKQUOTE, null); 233 onNonSymbolMatched(yytext(), yychar); 234 } 235 236 /* Bug #15661: Recognize ksh command substitution within strings. According 237 * to ksh man page http://www2.research.att.com/~gsf/man/man1/ksh-man.html#Command%20Substitution 238 * the opening brace must be followed by a blank. 239 */ 240 "${" / {WhspChar} | {EOL} { 241 chkLOC(); 242 pushSpan(BRACEGROUP, null); 243 onNonSymbolMatched(yytext(), yychar); 244 } 245 } 246 247 <QSTRING> { 248 \\[\'] | 249 \' {WhspChar}* \' { chkLOC(); onNonSymbolMatched(yytext(), yychar); } 250 \' { chkLOC(); onNonSymbolMatched(yytext(), yychar); yypop(); } 251 } 252 253 <SCOMMENT> { 254 {WhspChar}*{EOL} { 255 yypop(); 256 onEndOfLineMatched(yytext(), yychar); 257 } 258 } 259 260 <SUBSHELL> { 261 \) { chkLOC(); onNonSymbolMatched(yytext(), yychar); yypop(); } 262 } 263 264 <BACKQUOTE> { 265 [`] { chkLOC(); onNonSymbolMatched(yytext(), yychar); yypop(); } 266 } 267 268 <BRACEGROUP> { 269 /* Bug #15661: Terminate a ksh brace group. According to ksh man page 270 * http://www2.research.att.com/~gsf/man/man1/ksh-man.html#Command%20Substitution 271 * the closing brace must be on beginning of line, or it must be preceded by 272 * a semi-colon and (optionally) whitespace. 273 */ 274 ^ {WhspChar}* \} { 275 chkLOC(); 276 onNonSymbolMatched(yytext(), yychar); 277 yypop(); 278 } 279 ; {WhspChar}* \} { 280 chkLOC(); 281 onNonSymbolMatched(yytext(), yychar); 282 yypop(); 283 } 284 } 285 286 <HEREDOC> { 287 [^\s]+ { 288 chkLOC(); 289 String line = yytext(); 290 if (isHeredocStopWord(line)) { 291 yypop(); 292 } 293 onNonSymbolMatched(line, yychar); 294 } 295 296 {EOL} { onEndOfLineMatched(yytext(), yychar); } 297 \s { onNonSymbolMatched(yytext(), yychar); } 298 } 299 300 <YYINITIAL, SUBSHELL, BACKQUOTE, BRACEGROUP> { 301 /* Don't enter new state if special character is escaped. */ 302 \\[`\)\(\{\"\'\$\#\\] { chkLOC(); onNonSymbolMatched(yytext(), yychar); } 303 304 /* $# should not start a comment. */ 305 "$#" { chkLOC(); onNonSymbolMatched(yytext(), yychar); } 306 307 \$ ? \( { 308 chkLOC(); 309 pushSpan(SUBSHELL, null); 310 onNonSymbolMatched(yytext(), yychar); 311 } 312 [`] { 313 chkLOC(); 314 pushSpan(BACKQUOTE, null); 315 onNonSymbolMatched(yytext(), yychar); 316 } 317 318 /* Bug #15661: Recognize ksh command substitution within strings. According 319 * to ksh man page http://www2.research.att.com/~gsf/man/man1/ksh-man.html#Command%20Substitution 320 * the opening brace must be followed by a blank. Make the initial dollar sign 321 * optional so that we get the nesting right and don't terminate the brace 322 * group too early if the ${ cmd; } expression contains nested { cmd; } groups. 323 */ 324 \$ ? \{ / {WhspChar} | {EOL} { 325 chkLOC(); 326 pushSpan(BRACEGROUP, null); 327 onNonSymbolMatched(yytext(), yychar); 328 } 329 } 330 331 <YYINITIAL, SUBSHELL, BACKQUOTE, BRACEGROUP, STRING, SCOMMENT, QSTRING> { 332 {File} { 333 chkLOC(); 334 String path = yytext(); 335 onFilelikeMatched(path, yychar); 336 } 337 338 {RelaxedMiddleFPath} { 339 chkLOC(); 340 onPathlikeMatched(yytext(), '/', false, yychar); 341 } 342 343 {WhspChar}*{EOL} { onEndOfLineMatched(yytext(), yychar); } 344 [[\s]--[\n]] { onNonSymbolMatched(yytext(), yychar); } 345 [^\n] { chkLOC(); onNonSymbolMatched(yytext(), yychar); } 346 } 347 348 <STRING, SCOMMENT, QSTRING> { 349 {FNameChar}+ "@" {FNameChar}+ "." {FNameChar}+ { 350 chkLOC(); 351 onEmailAddressMatched(yytext(), yychar); 352 } 353 } 354 355 <STRING, SCOMMENT> { 356 {BrowseableURI} { 357 chkLOC(); 358 onUriMatched(yytext(), yychar); 359 } 360 } 361 362 <QSTRING> { 363 {BrowseableURI} { 364 chkLOC(); 365 onUriMatched(yytext(), yychar, StringUtils.APOS_NO_BSESC); 366 } 367 } 368