xref: /OpenGrok/opengrok-indexer/src/main/jflex/analysis/rust/RustSymbolTokenizer.lex (revision d219b4cea555a12b602d2d5518daa22134ad4879)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2016 Nikolay Denev.
23  * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>.
24  */
25 
26 /*
27  * Gets Rust symbols - ignores comments, strings, keywords
28  */
29 
30 package org.opengrok.indexer.analysis.rust;
31 
32 import org.opengrok.indexer.analysis.JFlexSymbolMatcher;
33 %%
34 %public
35 %class RustSymbolTokenizer
36 %extends JFlexSymbolMatcher
37 %unicode
38 %int
39 %include ../CommonLexer.lexh
40 %char
41 %{
42   /**
43    * Stores the number of hashes beginning and ending a raw string or raw byte
44    * string. E.g., r##"blah"## has rawHashCount == 2.
45    */
46   int rawHashCount;
47 
48   int nestedComment;
49 
50   @Override
reset()51   public void reset() {
52       super.reset();
53       rawHashCount = 0;
54       nestedComment = 0;
55   }
56 %}
57 
58 %state STRING RSTRING COMMENT SCOMMENT
59 
60 %include ../Common.lexh
61 %include Rust.lexh
62 %%
63 
64 <YYINITIAL> {
65 {Identifier} {
66     String id = yytext();
67                 if(!Consts.kwd.contains(id)){
68                         onSymbolMatched(id, yychar);
69                         return yystate();
70                 }
71  }
72  {Number}    {}
73  [b]?\"     { yybegin(STRING); }
74  [b]?[r][#]*\" {
75     yybegin(RSTRING);
76     rawHashCount = RustUtils.countRawHashes(yytext());
77  }
78  [b]?\' ([^\n\r\'\\] | \\[^\n\r]) \' |
79  [b]?\' \\[xX]{HEXDIG}{HEXDIG} \' |
80  [b]?\' \\[uU]\{ {HEXDIG}{1,6} \}\'    {}
81  "/*"   {
82     ++nestedComment;
83     yybegin(COMMENT);
84  }
85  "//"   { yybegin(SCOMMENT); }
86 }
87 
88 <STRING> {
89  \\[\"\\]    {}
90  \"     { yybegin(YYINITIAL); }
91 }
92 
93 <RSTRING> {
94     \"[#]*    {
95         String capture = yytext();
96         if (RustUtils.isRawEnding(capture, rawHashCount)) {
97             yybegin(YYINITIAL);
98             int excess = capture.length() - 1 - rawHashCount;
99             if (excess > 0) yypushback(excess);
100         }
101     }
102 }
103 
104 <STRING, RSTRING> {
105     {WhspChar}*{EOL}    {
106         // no-op
107     }
108 }
109 
110 <COMMENT> {
111     "*/"    { if (--nestedComment == 0) yybegin(YYINITIAL); }
112     "/*"    { ++nestedComment; }
113 }
114 
115 <SCOMMENT> {
116 {WhspChar}+    {}
117 {EOL}      { yybegin(YYINITIAL);}
118 }
119 
120 <YYINITIAL, STRING, RSTRING, COMMENT, SCOMMENT> {
121 [^]    {}
122 }
123