xref: /OpenGrok/opengrok-indexer/src/main/jflex/analysis/haskell/HaskellXref.lex (revision d219b4cea555a12b602d2d5518daa22134ad4879)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>.
23  */
24 
25 /*
26  * Cross reference a Haskell file
27  */
28 
29 package org.opengrok.indexer.analysis.haskell;
30 
31 import java.io.IOException;
32 import org.opengrok.indexer.analysis.JFlexSymbolMatcher;
33 import org.opengrok.indexer.web.HtmlConsts;
34 
35 /**
36  * @author Harry Pan
37  */
38 %%
39 %public
40 %class HaskellXref
41 %extends JFlexSymbolMatcher
42 %unicode
43 %int
44 %char
45 %include ../CommonLexer.lexh
46 %include ../CommonXref.lexh
47 %{
48     private int nestedComment;
49 
50     @Override
reset()51     public void reset() {
52         super.reset();
53         nestedComment = 0;
54     }
55 
56     @Override
yypop()57     public void yypop() throws IOException {
58         onDisjointSpanChanged(null, yychar);
59         super.yypop();
60     }
61 
chkLOC()62     protected void chkLOC() {
63         switch (yystate()) {
64             case COMMENT:
65             case BCOMMENT:
66                 break;
67             default:
68                 phLOC();
69                 break;
70         }
71     }
72 %}
73 
74 %state STRING CHAR COMMENT BCOMMENT
75 
76 %include ../Common.lexh
77 %include ../CommonURI.lexh
78 %include ../CommonPath.lexh
79 %include Haskell.lexh
80 %%
81 <YYINITIAL> {
82     {Identifier} {
83         chkLOC();
84         String id = yytext();
85         onFilteredSymbolMatched(id, yychar, Consts.kwd);
86     }
87     {Number}     {
88         chkLOC();
89         onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar);
90         onNonSymbolMatched(yytext(), yychar);
91         onDisjointSpanChanged(null, yychar);
92     }
93     \"           {
94         chkLOC();
95         yypush(STRING);
96         onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
97         onNonSymbolMatched(yytext(), yychar);
98     }
99     \'           {
100         chkLOC();
101         yypush(CHAR);
102         onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
103         onNonSymbolMatched(yytext(), yychar);
104     }
105     "--"         {
106         yypush(COMMENT);
107         onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar);
108         onNonSymbolMatched(yytext(), yychar);
109     }
110 
111     {NotComments}    { chkLOC(); onNonSymbolMatched(yytext(), yychar); }
112 }
113 
114 <STRING> {
115     \\[\"\\]    { chkLOC(); onNonSymbolMatched(yytext(), yychar); }
116     \"          {
117         chkLOC();
118         onNonSymbolMatched(yytext(), yychar);
119         yypop();
120     }
121     /*
122      * "A string may include a 'gap'-—two backslants enclosing white
123      * characters—-which is ignored. This allows one to write long strings on
124      * more than one line by writing a backslant at the end of one line and at
125      * the start of the next." N.b. OpenGrok does not explicltly recognize the
126      * "gap" but since a STRING must end in a non-escaped quotation mark, just
127      * allow STRINGs to be multi-line regardless of syntax.
128      */
129 }
130 
131 <CHAR> {    // we don't need to consider the case where prime is part of an identifier since it is handled above
132     \\[\'\\]    { chkLOC(); onNonSymbolMatched(yytext(), yychar); }
133     \'          {
134         chkLOC();
135         onNonSymbolMatched(yytext(), yychar);
136         yypop();
137     }
138     /*
139      * N.b. though only a single char is valid Haskell syntax, OpenGrok just
140      * waits to end CHAR at a non-escaped apostrophe regardless of count.
141      */
142 }
143 
144 <COMMENT> {
145     {WhspChar}*{EOL}    {
146         yypop();
147         onEndOfLineMatched(yytext(), yychar);
148     }
149 }
150 
151 <YYINITIAL, BCOMMENT> {
152     "{-"    {
153         if (nestedComment++ == 0) {
154             yypush(BCOMMENT);
155             onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar);
156         }
157         onNonSymbolMatched(yytext(), yychar);
158     }
159 }
160 
161 <BCOMMENT> {
162     "-}"    {
163         onNonSymbolMatched(yytext(), yychar);
164         if (--nestedComment == 0) {
165             yypop();
166         }
167     }
168 }
169 
170 {WhspChar}*{EOL}    { onEndOfLineMatched(yytext(), yychar); }
171 [[\s]--[\n]]        { onNonSymbolMatched(yytext(), yychar); }
172 [^\n]               { chkLOC(); onNonSymbolMatched(yytext(), yychar); }
173 
174 <STRING, COMMENT, BCOMMENT> {
175     {FPath}    {
176         chkLOC();
177         onPathlikeMatched(yytext(), '/', false, yychar);
178     }
179     {FNameChar}+ "@" {FNameChar}+ "." {FNameChar}+    {
180         chkLOC();
181         onEmailAddressMatched(yytext(), yychar);
182     }
183 }
184 
185 <STRING, COMMENT> {
186     {BrowseableURI}    {
187         chkLOC();
188         onUriMatched(yytext(), yychar);
189     }
190 }
191 
192 <BCOMMENT> {
193     /*
194      * Right curly bracket is not a valid URI character, so it won't be in a
195      * {BrowseableURI} capture, but a hyphen is valid. Thus a nested comment
196      * ending token, -}, can hide at the end of a URI. Work around this by
197      * capturing a possibly-trailing right curly bracket, and match a special,
198      * Haskell-specific collateral capture pattern.
199      */
200     {BrowseableURI} \}?    {
201         onUriMatched(yytext(), yychar, HaskellUtils.MAYBE_END_NESTED_COMMENT);
202     }
203 }
204