xref: /OpenGrok/opengrok-indexer/src/main/jflex/analysis/eiffel/EiffelProductions.lexh (revision d219b4cea555a12b602d2d5518daa22134ad4879)
1*d219b4ceSAdam Hornacek/*
2*d219b4ceSAdam Hornacek * CDDL HEADER START
3*d219b4ceSAdam Hornacek *
4*d219b4ceSAdam Hornacek * The contents of this file are subject to the terms of the
5*d219b4ceSAdam Hornacek * Common Development and Distribution License (the "License").
6*d219b4ceSAdam Hornacek * You may not use this file except in compliance with the License.
7*d219b4ceSAdam Hornacek *
8*d219b4ceSAdam Hornacek * See LICENSE.txt included in this distribution for the specific
9*d219b4ceSAdam Hornacek * language governing permissions and limitations under the License.
10*d219b4ceSAdam Hornacek *
11*d219b4ceSAdam Hornacek * When distributing Covered Code, include this CDDL HEADER in each
12*d219b4ceSAdam Hornacek * file and include the License file at LICENSE.txt.
13*d219b4ceSAdam Hornacek * If applicable, add the following below this CDDL HEADER, with the
14*d219b4ceSAdam Hornacek * fields enclosed by brackets "[]" replaced with your own identifying
15*d219b4ceSAdam Hornacek * information: Portions Copyright [yyyy] [name of copyright owner]
16*d219b4ceSAdam Hornacek *
17*d219b4ceSAdam Hornacek * CDDL HEADER END
18*d219b4ceSAdam Hornacek */
19*d219b4ceSAdam Hornacek
20*d219b4ceSAdam Hornacek/*
21*d219b4ceSAdam Hornacek * Copyright (c) 2017-2019, Chris Fraire <cfraire@me.com>.
22*d219b4ceSAdam Hornacek */
23*d219b4ceSAdam Hornacek
24*d219b4ceSAdam Hornacek/*
25*d219b4ceSAdam Hornacek * 8.32.16 Syntax (non-production): Identifier --  a sequence of one or more
26*d219b4ceSAdam Hornacek * alpha_numeric characters of which the first is a letter.
27*d219b4ceSAdam Hornacek */
28*d219b4ceSAdam HornacekIdentifier = {letter} {alpha_numeric}*
29*d219b4ceSAdam Hornacek
30*d219b4ceSAdam Hornacek/*
31*d219b4ceSAdam Hornacek * 8.32.2 Definition: Letter, alpha_betic, numeric, alpha_numeric, printable
32*d219b4ceSAdam Hornacek *
33*d219b4ceSAdam Hornacek * 1 Any of the following fifty-two, each a lower-case or upper-case element of
34*d219b4ceSAdam Hornacek *   the Roman alphabet
35*d219b4ceSAdam Hornacek * 2 If the underlying character set is 8-bit extended ASCII, the characters of
36*d219b4ceSAdam Hornacek *   codes 192 to 255 in that set.
37*d219b4ceSAdam Hornacek * 3 If the underlying character set is Unicode, all characters defined as
38*d219b4ceSAdam Hornacek *   letters in that set.
39*d219b4ceSAdam Hornacek *
40*d219b4ceSAdam Hornacek * alpha_betic character is a letter or an underscore _.
41*d219b4ceSAdam Hornacek * numeric character is one of the ten characters 0-9
42*d219b4ceSAdam Hornacek * alpha_numeric character is alpha_betic or numeric
43*d219b4ceSAdam Hornacek */
44*d219b4ceSAdam Hornacekletter = [\p{Letter}] // This suffices to cover 1,2,3 above.
45*d219b4ceSAdam Hornacekalpha_betic = ({letter} | [_])
46*d219b4ceSAdam Hornaceknumeric = [0-9]
47*d219b4ceSAdam Hornacekalpha_numeric = ({alpha_betic} | {numeric})
48*d219b4ceSAdam Hornacek
49*d219b4ceSAdam Hornacek/*
50*d219b4ceSAdam Hornacek * printable character is any of the characters listed as printable in the
51*d219b4ceSAdam Hornacek * definition of the character set (Unicode or extended ASCII). “Printable”
52*d219b4ceSAdam Hornacek * characters exclude such special characters as new line and backspace.
53*d219b4ceSAdam Hornacek *
54*d219b4ceSAdam Hornacek * N.b. for OpenGrok purposes, take "printable" as Wikipedia's definition of
55*d219b4ceSAdam Hornacek * Unicode Graphic Character.
56*d219b4ceSAdam Hornacek *
57*d219b4ceSAdam Hornacek * 8.29.15 Validity: Verbatim String rule
58*d219b4ceSAdam Hornacek *
59*d219b4ceSAdam Hornacek * Regarding α as the (possibly empty) Simple_string appearing in a
60*d219b4ceSAdam Hornacek * Verbatim_string_opener: Every character in α is printable, and not a double
61*d219b4ceSAdam Hornacek * quote.
62*d219b4ceSAdam Hornacek */
63*d219b4ceSAdam HornacekVstring_alpha = [[\p{Letter}\p{Mark}\p{Number}\p{Punctuation}\p{Symbol}\p{Zs}]--[\"]]
64*d219b4ceSAdam Hornacek
65*d219b4ceSAdam HornacekNumber = [\+\-]? ({Integer} | {Real})
66*d219b4ceSAdam Hornacek/*
67*d219b4ceSAdam Hornacek * Integer = Δ [Integer_base] Digit_sequence
68*d219b4ceSAdam Hornacek * Integer_base = Δ "0" Integer_base_letter
69*d219b4ceSAdam Hornacek * Integer_base_letter = Δ "b" | "c" | "x" | "B" | "C" | "X"
70*d219b4ceSAdam Hornacek * Digit_sequence = Δ Digit+
71*d219b4ceSAdam Hornacek * Digit = Δ "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
72*d219b4ceSAdam Hornacek *     "a" | "b" | "c" | "d" | "e" | "f" |
73*d219b4ceSAdam Hornacek *     "A" | "B" | "C" | "D" | "E" | "F" | "_"
74*d219b4ceSAdam Hornacek * "Neither the first nor the last Digit of a Digit_sequence is an underscore"
75*d219b4ceSAdam Hornacek */
76*d219b4ceSAdam HornacekInteger = ({Decimal_integer} | {Hexadecimal} | {Binary} | {Octal})
77*d219b4ceSAdam HornacekDecimal_integer = ({DIGIT}+ | {DIGIT} ("_" | {DIGIT})+ {DIGIT})
78*d219b4ceSAdam HornacekHexadecimal = 0[xX] ({HEXDIG}+ | {HEXDIG} ("_" | {HEXDIG})+ {HEXDIG})
79*d219b4ceSAdam HornacekBinary = 0[bB] ({BINDIG}+ | {BINDIG} ("_" | {BINDIG})+ {BINDIG})
80*d219b4ceSAdam HornacekOctal = 0[cC] ({OCTDIG}+ | {OCTDIG} ("_" | {OCTDIG})+ {OCTDIG})
81*d219b4ceSAdam HornacekDIGIT = [0-9]
82*d219b4ceSAdam HornacekHEXDIG = [0-9a-fA-F]
83*d219b4ceSAdam HornacekBINDIG = [01]
84*d219b4ceSAdam HornacekOCTDIG = [0-7]
85*d219b4ceSAdam Hornacek/*
86*d219b4ceSAdam Hornacek * Real =
87*d219b4ceSAdam Hornacek * • An optional decimal Integer, giving the integral part.
88*d219b4ceSAdam Hornacek * • A required ‘‘.’’ (dot).
89*d219b4ceSAdam Hornacek * • An optional decimal Integer, giving the fractional part.
90*d219b4ceSAdam Hornacek * • An optional exponent, which is the letter e or E followed by an optional
91*d219b4ceSAdam Hornacek *     Sign (+ or –) and a decimal Integer.
92*d219b4ceSAdam Hornacek */
93*d219b4ceSAdam HornacekReal = ({Decimal_integer}? "." {Decimal_integer} |
94*d219b4ceSAdam Hornacek    {Decimal_integer} ".") ([eE] [\+\-] {Decimal_integer})?
95*d219b4ceSAdam Hornacek
96*d219b4ceSAdam Hornacek/*
97*d219b4ceSAdam Hornacek * Character_constant = Δ "'" Character "'"
98*d219b4ceSAdam Hornacek */
99*d219b4ceSAdam HornacekCharacter_constant = \' ([^\'\n\r] | {Special_character}) \'
100*d219b4ceSAdam Hornacek
101*d219b4ceSAdam HornacekSpecial_character = % ([A-Da-dFfHhLlNnQqRrSsT-Vt-v%\'\"\(\)\<\>] | "/" [0-9]+ "/")
102*d219b4ceSAdam Hornacek
103*d219b4ceSAdam Hornacek/*
104*d219b4ceSAdam Hornacek * SCOMMENT : single-line comment
105*d219b4ceSAdam Hornacek * STRING : basic manifest string (literal)
106*d219b4ceSAdam Hornacek * VSTRING : verbatim manifest string (literal)
107*d219b4ceSAdam Hornacek */
108*d219b4ceSAdam Hornacek%state SCOMMENT STRING VSTRING
109*d219b4ceSAdam Hornacek
110*d219b4ceSAdam Hornacek%%
111*d219b4ceSAdam Hornacek<YYINITIAL> {
112*d219b4ceSAdam Hornacek    {Identifier}    {
113*d219b4ceSAdam Hornacek        chkLOC();
114*d219b4ceSAdam Hornacek        String id = yytext();
115*d219b4ceSAdam Hornacek        if (offerSymbol(id, 0, false) && returnOnSymbol()) {
116*d219b4ceSAdam Hornacek            return yystate();
117*d219b4ceSAdam Hornacek        }
118*d219b4ceSAdam Hornacek    }
119*d219b4ceSAdam Hornacek
120*d219b4ceSAdam Hornacek    {Number}    {
121*d219b4ceSAdam Hornacek        chkLOC();
122*d219b4ceSAdam Hornacek        onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar);
123*d219b4ceSAdam Hornacek        offer(yytext());
124*d219b4ceSAdam Hornacek        onDisjointSpanChanged(null, yychar);
125*d219b4ceSAdam Hornacek    }
126*d219b4ceSAdam Hornacek
127*d219b4ceSAdam Hornacek    {Character_constant}    {
128*d219b4ceSAdam Hornacek        chkLOC();
129*d219b4ceSAdam Hornacek        onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
130*d219b4ceSAdam Hornacek        offer(yytext());
131*d219b4ceSAdam Hornacek        onDisjointSpanChanged(null, yychar);
132*d219b4ceSAdam Hornacek    }
133*d219b4ceSAdam Hornacek
134*d219b4ceSAdam Hornacek    "--"    {
135*d219b4ceSAdam Hornacek        yypush(SCOMMENT);
136*d219b4ceSAdam Hornacek        onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar);
137*d219b4ceSAdam Hornacek        offer(yytext());
138*d219b4ceSAdam Hornacek    }
139*d219b4ceSAdam Hornacek
140*d219b4ceSAdam Hornacek    \"    {
141*d219b4ceSAdam Hornacek        chkLOC();
142*d219b4ceSAdam Hornacek        yypush(STRING);
143*d219b4ceSAdam Hornacek        onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
144*d219b4ceSAdam Hornacek        offer(yytext());
145*d219b4ceSAdam Hornacek    }
146*d219b4ceSAdam Hornacek    \" {Vstring_alpha}* [\[\{]    {
147*d219b4ceSAdam Hornacek        chkLOC();
148*d219b4ceSAdam Hornacek        vop(yytext());
149*d219b4ceSAdam Hornacek    }
150*d219b4ceSAdam Hornacek}
151*d219b4ceSAdam Hornacek
152*d219b4ceSAdam Hornacek<SCOMMENT> {
153*d219b4ceSAdam Hornacek    {WhspChar}*{EOL}    {
154*d219b4ceSAdam Hornacek        onDisjointSpanChanged(null, yychar);
155*d219b4ceSAdam Hornacek        yypop();
156*d219b4ceSAdam Hornacek        onEndOfLineMatched(yytext(), yychar);
157*d219b4ceSAdam Hornacek    }
158*d219b4ceSAdam Hornacek}
159*d219b4ceSAdam Hornacek
160*d219b4ceSAdam Hornacek<STRING> {
161*d219b4ceSAdam Hornacek    \"    {
162*d219b4ceSAdam Hornacek        chkLOC();
163*d219b4ceSAdam Hornacek        offer(yytext());
164*d219b4ceSAdam Hornacek        onDisjointSpanChanged(null, yychar);
165*d219b4ceSAdam Hornacek        yypop();
166*d219b4ceSAdam Hornacek    }
167*d219b4ceSAdam Hornacek    /*
168*d219b4ceSAdam Hornacek     * Eiffel defines "Line_wrapping_part" as a sequence of characters
169*d219b4ceSAdam Hornacek     * consisting of the following, in order: % (percent character); zero or
170*d219b4ceSAdam Hornacek     * more blanks or tabs; New_line; zero or more blanks or tabs; % again. For
171*d219b4ceSAdam Hornacek     * OpenGrok purposes, just recognizing LFs in STRING is good enough; though
172*d219b4ceSAdam Hornacek     * see also the {Special_character} handling for STRING,VSTRING.
173*d219b4ceSAdam Hornacek     */
174*d219b4ceSAdam Hornacek}
175*d219b4ceSAdam Hornacek
176*d219b4ceSAdam Hornacek<VSTRING> {
177*d219b4ceSAdam Hornacek    [\]\}] {Vstring_alpha}* \"    {
178*d219b4ceSAdam Hornacek        chkLOC();
179*d219b4ceSAdam Hornacek        maybeEndVerbatim(yytext());
180*d219b4ceSAdam Hornacek    }
181*d219b4ceSAdam Hornacek}
182*d219b4ceSAdam Hornacek
183*d219b4ceSAdam Hornacek<STRING, VSTRING> {
184*d219b4ceSAdam Hornacek    {Special_character}    {
185*d219b4ceSAdam Hornacek        chkLOC();
186*d219b4ceSAdam Hornacek        offer(yytext());
187*d219b4ceSAdam Hornacek    }
188*d219b4ceSAdam Hornacek
189*d219b4ceSAdam Hornacek    {WhspChar}*{EOL}    {
190*d219b4ceSAdam Hornacek        onDisjointSpanChanged(null, yychar);
191*d219b4ceSAdam Hornacek        onEndOfLineMatched(yytext(), yychar);
192*d219b4ceSAdam Hornacek        onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
193*d219b4ceSAdam Hornacek    }
194*d219b4ceSAdam Hornacek}
195*d219b4ceSAdam Hornacek
196*d219b4ceSAdam Hornacek<YYINITIAL, SCOMMENT, STRING, VSTRING> {
197*d219b4ceSAdam Hornacek    {WhspChar}*{EOL}    { onEndOfLineMatched(yytext(), yychar); }
198*d219b4ceSAdam Hornacek    \s    { offer(yytext()); }
199*d219b4ceSAdam Hornacek    [^]    {
200*d219b4ceSAdam Hornacek        chkLOC();
201*d219b4ceSAdam Hornacek        offer(yytext());
202*d219b4ceSAdam Hornacek    }
203*d219b4ceSAdam Hornacek}
204*d219b4ceSAdam Hornacek
205*d219b4ceSAdam Hornacek<SCOMMENT, STRING, VSTRING> {
206*d219b4ceSAdam Hornacek    {BrowseableURI}    {
207*d219b4ceSAdam Hornacek        chkLOC();
208*d219b4ceSAdam Hornacek        if (takeAllContent()) {
209*d219b4ceSAdam Hornacek            onUriMatched(yytext(), yychar);
210*d219b4ceSAdam Hornacek        }
211*d219b4ceSAdam Hornacek    }
212*d219b4ceSAdam Hornacek}
213