xref: /OpenGrok/opengrok-indexer/src/main/jflex/analysis/kotlin/KotlinSymbolTokenizer.lex (revision d219b4cea555a12b602d2d5518daa22134ad4879)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>.
23  */
24 
25 /*
26  * Gets Kotlin symbols - ignores comments, strings, keywords
27  */
28 
29 // comments can be nested in kotlin, so below logic doesn't allow that with yybegin we save only one nesting
30 // same for strings
31 
32 package org.opengrok.indexer.analysis.kotlin;
33 
34 import org.opengrok.indexer.analysis.JFlexSymbolMatcher;
35 %%
36 %public
37 %class KotlinSymbolTokenizer
38 %extends JFlexSymbolMatcher
39 %unicode
40 %buffer 32766
41 %int
42 %include ../CommonLexer.lexh
43 %char
44 %{
45     private int nestedComment;
46 
47     @Override
reset()48     public void reset() {
49         super.reset();
50         nestedComment = 0;
51     }
52 %}
53 
54 %state STRING COMMENT SCOMMENT QSTRING TSTRING
55 
56 %include ../Common.lexh
57 %include Kotlin.lexh
58 %%
59 
60 /* TODO : support identifiers escaped by ` `*/
61 <YYINITIAL> {
62 {Identifier} {String id = yytext();
63                 if(!Consts.kwd.contains(id)){
64                         onSymbolMatched(id, yychar);
65                         return yystate(); }
66               }
67  {Number}    {}
68  \"     { yybegin(STRING); }
69  \'     { yybegin(QSTRING); }
70  \"\"\"   { yybegin(TSTRING); }
71  "//"   { yybegin(SCOMMENT); }
72 
73 }
74 
75 <STRING> {
76  \\[\"\$\\]    {}
77  \"     { yybegin(YYINITIAL); }
78 }
79 
80 <QSTRING> {
81  \\[\'\\]    {}
82  \'     { yybegin(YYINITIAL); }
83 }
84 
85 <TSTRING> {
86  /*
87   * "raw string ... doesn't support backslash escaping"
88   */
89   \"\"\"     { yybegin(YYINITIAL); }
90 }
91 
92 <STRING, TSTRING> {
93     /*
94      * TODO : support template expressions inside curly brackets
95      */
96     \$ {Identifier}    {
97         String capture = yytext();
98         String sigil = capture.substring(0, 1);
99         String id = capture.substring(1);
100         if (!Consts.kwd.contains(id)) {
101             onSymbolMatched(id, yychar + 1);
102             return yystate();
103        }
104     }
105 }
106 
107 <YYINITIAL, COMMENT> {
108     "/*"    {
109         if (nestedComment++ == 0) {
110             yybegin(COMMENT);
111         }
112     }
113 }
114 
115 <COMMENT> {
116 "*/"    {
117     if (--nestedComment == 0) {
118         yybegin(YYINITIAL);
119     }
120  }
121 }
122 
123 <SCOMMENT> {
124 {EOL}      { yybegin(YYINITIAL);}
125 }
126 
127 <YYINITIAL, STRING, COMMENT, SCOMMENT, QSTRING, TSTRING> {
128 {WhspChar}+ |
129 [^]    {}
130 }
131