1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. 22 * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>. 23 */ 24 25 /* 26 * Gets Scala symbols - ignores comments, strings, keywords 27 */ 28 29 package org.opengrok.indexer.analysis.scala; 30 31 import org.opengrok.indexer.analysis.JFlexSymbolMatcher; 32 %% 33 %public 34 %class ScalaSymbolTokenizer 35 %extends JFlexSymbolMatcher 36 %unicode 37 %int 38 %include ../CommonLexer.lexh 39 %char 40 %{ 41 private int nestedComment; 42 43 @Override reset()44 public void reset() { 45 super.reset(); 46 nestedComment = 0; 47 } 48 %} 49 50 /* 51 * STRING : string literal 52 * ISTRING : string literal with interpolation 53 * MSTRING : multi-line string literal 54 * IMSTRING : multi-line string literal with interpolation 55 * QSTRING : character literal 56 * SCOMMENT : single-line comment 57 * COMMENT : multi-line comment 58 */ 59 %state STRING ISTRING MSTRING IMSTRING QSTRING SCOMMENT COMMENT 60 61 %include ../Common.lexh 62 %include Scala.lexh 63 %% 64 65 <YYINITIAL> { 66 {Identifier} {String id = yytext(); 67 if(!Consts.kwd.contains(id)){ 68 onSymbolMatched(id, yychar); 69 return yystate(); } 70 } 71 72 {BacktickIdentifier} { 73 String capture = yytext(); 74 String id = capture.substring(1, capture.length() - 1); 75 if (!Consts.kwd.contains(id)) { 76 onSymbolMatched(id, yychar + 1); 77 return yystate(); 78 } 79 } 80 81 {OpSuffixIdentifier} { 82 String capture = yytext(); 83 int uoff = capture.lastIndexOf("_"); 84 // ctags include the "_" in the symbol, so follow that too. 85 String id = capture.substring(0, uoff + 1); 86 if (!Consts.kwd.contains(id)) { 87 onSymbolMatched(id, yychar); 88 return yystate(); 89 } 90 } 91 92 {Number} {} 93 ([fs] | "raw") \" { yybegin(ISTRING); } 94 {Identifier}? \" { yybegin(STRING); } 95 \' { yybegin(QSTRING); } 96 ([fs] | "raw") \"\"\" { yybegin(IMSTRING); } 97 {Identifier}? \"\"\" { yybegin(MSTRING); } 98 "/*" "*"+ "/" { 99 // noop 100 } 101 "//" { yybegin(SCOMMENT); } 102 } 103 104 <STRING, ISTRING> { 105 \\[\"\\] {} 106 \" { yybegin(YYINITIAL); } 107 } 108 109 <ISTRING, IMSTRING> { 110 /* 111 * TODO : support "arbitrary expressions" inside curly brackets 112 */ 113 \$ {Identifier} { 114 String capture = yytext(); 115 String id = capture.substring(1); 116 if (!Consts.kwd.contains(id)) { 117 onSymbolMatched(id, yychar + 1); 118 return yystate(); 119 } 120 } 121 } 122 123 <QSTRING> { 124 \\[\'\\] {} 125 \' { yybegin(YYINITIAL); } 126 } 127 128 <MSTRING, IMSTRING> { 129 /* 130 * For multi-line string, "Unicode escapes work as everywhere else, but none 131 * of the escape sequences [in 'Escape Sequences'] are interpreted." 132 */ 133 \"\"\" { 134 yybegin(YYINITIAL);; 135 } 136 } 137 <YYINITIAL, COMMENT> { 138 "/*" "*"* { 139 if (nestedComment++ == 0) { 140 yybegin(COMMENT); 141 } 142 } 143 } 144 145 <COMMENT> { 146 "*/" { 147 if (--nestedComment == 0) { 148 yybegin(YYINITIAL); 149 } 150 } 151 } 152 153 <SCOMMENT> { 154 {EOL} { yybegin(YYINITIAL);} 155 } 156 157 <YYINITIAL> { 158 {OpIdentifier} { 159 // noop 160 } 161 } 162 163 <YYINITIAL, STRING, ISTRING, MSTRING, IMSTRING, COMMENT, SCOMMENT, QSTRING> { 164 {WhspChar}+ | 165 [^] {} 166 } 167