xref: /OpenGrok/opengrok-indexer/src/main/jflex/analysis/haskell/HaskellSymbolTokenizer.lex (revision d219b4cea555a12b602d2d5518daa22134ad4879)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>.
23  */
24 
25 /*
26  * Get Haskell symbols
27  */
28 
29 package org.opengrok.indexer.analysis.haskell;
30 
31 import java.io.IOException;
32 import org.opengrok.indexer.analysis.JFlexSymbolMatcher;
33 
34 /**
35  * @author Harry Pan
36  */
37 %%
38 %public
39 %class HaskellSymbolTokenizer
40 %extends JFlexSymbolMatcher
41 %unicode
42 %int
43 %include ../CommonLexer.lexh
44 %char
45 %{
46     private int nestedComment;
47 
reset()48     public void reset() {
49         super.reset();
50         nestedComment = 0;
51     }
52 %}
53 
54 %state STRING CHAR COMMENT BCOMMENT
55 
56 %include ../Common.lexh
57 %include Haskell.lexh
58 %%
59 
60 <YYINITIAL> {
61     {Identifier} {
62         String id = yytext();
63         if (!Consts.kwd.contains(id)) {
64             onSymbolMatched(id, yychar);
65             return yystate();
66         }
67     }
68     {Number}    {}
69     \"   { yybegin(STRING);   }
70     \'   { yybegin(CHAR);     }
71     "--" { yybegin(COMMENT);  }
72 
73     {NotComments}    {}
74 }
75 
76 <STRING> {
77     \\[\"\\]    {}
78     \"   { yybegin(YYINITIAL); }
79 }
80 
81 <CHAR> {    // we don't need to consider the case where prime is part of an identifier since it is handled above
82     \\[\'\\]    {}
83     \'   { yybegin(YYINITIAL); }
84 }
85 
86 <COMMENT> {
87     {EOL}    { yybegin(YYINITIAL); }
88 }
89 
90 <YYINITIAL, BCOMMENT> {
91     "{-"    {
92         if (nestedComment++ == 0) {
93             yybegin(BCOMMENT);
94         }
95     }
96 }
97 
98 <BCOMMENT> {
99     "-}"    {
100         if (--nestedComment == 0) {
101             yybegin(YYINITIAL);
102         }
103     }
104 }
105 
106 // fallback
107 {WhspChar}+ |
108 [^] {}
109