xref: /OpenGrok/opengrok-indexer/src/main/jflex/analysis/scala/ScalaSymbolTokenizer.lex (revision d219b4cea555a12b602d2d5518daa22134ad4879)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>.
23  */
24 
25 /*
26  * Gets Scala symbols - ignores comments, strings, keywords
27  */
28 
29 package org.opengrok.indexer.analysis.scala;
30 
31 import org.opengrok.indexer.analysis.JFlexSymbolMatcher;
32 %%
33 %public
34 %class ScalaSymbolTokenizer
35 %extends JFlexSymbolMatcher
36 %unicode
37 %int
38 %include ../CommonLexer.lexh
39 %char
40 %{
41     private int nestedComment;
42 
43     @Override
reset()44     public void reset() {
45         super.reset();
46         nestedComment = 0;
47     }
48 %}
49 
50 /*
51  * STRING : string literal
52  * ISTRING : string literal with interpolation
53  * MSTRING : multi-line string literal
54  * IMSTRING : multi-line string literal with interpolation
55  * QSTRING : character literal
56  * SCOMMENT : single-line comment
57  * COMMENT : multi-line comment
58  */
59 %state STRING ISTRING MSTRING IMSTRING QSTRING SCOMMENT COMMENT
60 
61 %include ../Common.lexh
62 %include Scala.lexh
63 %%
64 
65 <YYINITIAL> {
66 {Identifier} {String id = yytext();
67                 if(!Consts.kwd.contains(id)){
68                         onSymbolMatched(id, yychar);
69                         return yystate(); }
70               }
71 
72  {BacktickIdentifier} {
73     String capture = yytext();
74     String id = capture.substring(1, capture.length() - 1);
75     if (!Consts.kwd.contains(id)) {
76         onSymbolMatched(id, yychar + 1);
77         return yystate();
78     }
79  }
80 
81  {OpSuffixIdentifier}    {
82     String capture = yytext();
83     int uoff = capture.lastIndexOf("_");
84     // ctags include the "_" in the symbol, so follow that too.
85     String id = capture.substring(0, uoff + 1);
86     if (!Consts.kwd.contains(id)) {
87         onSymbolMatched(id, yychar);
88         return yystate();
89     }
90  }
91 
92  {Number}    {}
93  ([fs] | "raw") \"    { yybegin(ISTRING); }
94  {Identifier}? \"    { yybegin(STRING); }
95  \'     { yybegin(QSTRING); }
96  ([fs] | "raw") \"\"\"    { yybegin(IMSTRING); }
97  {Identifier}? \"\"\" { yybegin(MSTRING); }
98  "/*" "*"+ "/"    {
99     // noop
100  }
101  "//"   { yybegin(SCOMMENT); }
102 }
103 
104 <STRING, ISTRING> {
105  \\[\"\\]    {}
106  \"     { yybegin(YYINITIAL); }
107 }
108 
109 <ISTRING, IMSTRING> {
110     /*
111      * TODO : support "arbitrary expressions" inside curly brackets
112      */
113     \$ {Identifier}    {
114         String capture = yytext();
115         String id = capture.substring(1);
116         if (!Consts.kwd.contains(id)) {
117             onSymbolMatched(id, yychar + 1);
118             return yystate();
119        }
120     }
121 }
122 
123 <QSTRING> {
124  \\[\'\\]    {}
125  \'     { yybegin(YYINITIAL); }
126 }
127 
128 <MSTRING, IMSTRING> {
129  /*
130   * For multi-line string, "Unicode escapes work as everywhere else, but none
131   * of the escape sequences [in 'Escape Sequences'] are interpreted."
132   */
133  \"\"\"    {
134     yybegin(YYINITIAL);;
135  }
136 }
137 <YYINITIAL, COMMENT> {
138     "/*" "*"*    {
139         if (nestedComment++ == 0) {
140             yybegin(COMMENT);
141         }
142     }
143 }
144 
145 <COMMENT> {
146  "*/"    {
147     if (--nestedComment == 0) {
148         yybegin(YYINITIAL);
149     }
150  }
151 }
152 
153 <SCOMMENT> {
154 {EOL}      { yybegin(YYINITIAL);}
155 }
156 
157 <YYINITIAL> {
158  {OpIdentifier}    {
159     // noop
160  }
161 }
162 
163 <YYINITIAL, STRING, ISTRING, MSTRING, IMSTRING, COMMENT, SCOMMENT, QSTRING> {
164 {WhspChar}+ |
165 [^]    {}
166 }
167