xref: /OpenGrok/opengrok-indexer/src/main/jflex/analysis/eiffel/EiffelProductions.lexh (revision d219b4cea555a12b602d2d5518daa22134ad4879)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * See LICENSE.txt included in this distribution for the specific
9 * language governing permissions and limitations under the License.
10 *
11 * When distributing Covered Code, include this CDDL HEADER in each
12 * file and include the License file at LICENSE.txt.
13 * If applicable, add the following below this CDDL HEADER, with the
14 * fields enclosed by brackets "[]" replaced with your own identifying
15 * information: Portions Copyright [yyyy] [name of copyright owner]
16 *
17 * CDDL HEADER END
18 */
19
20/*
21 * Copyright (c) 2017-2019, Chris Fraire <cfraire@me.com>.
22 */
23
24/*
25 * 8.32.16 Syntax (non-production): Identifier --  a sequence of one or more
26 * alpha_numeric characters of which the first is a letter.
27 */
28Identifier = {letter} {alpha_numeric}*
29
30/*
31 * 8.32.2 Definition: Letter, alpha_betic, numeric, alpha_numeric, printable
32 *
33 * 1 Any of the following fifty-two, each a lower-case or upper-case element of
34 *   the Roman alphabet
35 * 2 If the underlying character set is 8-bit extended ASCII, the characters of
36 *   codes 192 to 255 in that set.
37 * 3 If the underlying character set is Unicode, all characters defined as
38 *   letters in that set.
39 *
40 * alpha_betic character is a letter or an underscore _.
41 * numeric character is one of the ten characters 0-9
42 * alpha_numeric character is alpha_betic or numeric
43 */
44letter = [\p{Letter}] // This suffices to cover 1,2,3 above.
45alpha_betic = ({letter} | [_])
46numeric = [0-9]
47alpha_numeric = ({alpha_betic} | {numeric})
48
49/*
50 * printable character is any of the characters listed as printable in the
51 * definition of the character set (Unicode or extended ASCII). “Printable”
52 * characters exclude such special characters as new line and backspace.
53 *
54 * N.b. for OpenGrok purposes, take "printable" as Wikipedia's definition of
55 * Unicode Graphic Character.
56 *
57 * 8.29.15 Validity: Verbatim String rule
58 *
59 * Regarding α as the (possibly empty) Simple_string appearing in a
60 * Verbatim_string_opener: Every character in α is printable, and not a double
61 * quote.
62 */
63Vstring_alpha = [[\p{Letter}\p{Mark}\p{Number}\p{Punctuation}\p{Symbol}\p{Zs}]--[\"]]
64
65Number = [\+\-]? ({Integer} | {Real})
66/*
67 * Integer = Δ [Integer_base] Digit_sequence
68 * Integer_base = Δ "0" Integer_base_letter
69 * Integer_base_letter = Δ "b" | "c" | "x" | "B" | "C" | "X"
70 * Digit_sequence = Δ Digit+
71 * Digit = Δ "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
72 *     "a" | "b" | "c" | "d" | "e" | "f" |
73 *     "A" | "B" | "C" | "D" | "E" | "F" | "_"
74 * "Neither the first nor the last Digit of a Digit_sequence is an underscore"
75 */
76Integer = ({Decimal_integer} | {Hexadecimal} | {Binary} | {Octal})
77Decimal_integer = ({DIGIT}+ | {DIGIT} ("_" | {DIGIT})+ {DIGIT})
78Hexadecimal = 0[xX] ({HEXDIG}+ | {HEXDIG} ("_" | {HEXDIG})+ {HEXDIG})
79Binary = 0[bB] ({BINDIG}+ | {BINDIG} ("_" | {BINDIG})+ {BINDIG})
80Octal = 0[cC] ({OCTDIG}+ | {OCTDIG} ("_" | {OCTDIG})+ {OCTDIG})
81DIGIT = [0-9]
82HEXDIG = [0-9a-fA-F]
83BINDIG = [01]
84OCTDIG = [0-7]
85/*
86 * Real =
87 * • An optional decimal Integer, giving the integral part.
88 * • A required ‘‘.’’ (dot).
89 * • An optional decimal Integer, giving the fractional part.
90 * • An optional exponent, which is the letter e or E followed by an optional
91 *     Sign (+ or –) and a decimal Integer.
92 */
93Real = ({Decimal_integer}? "." {Decimal_integer} |
94    {Decimal_integer} ".") ([eE] [\+\-] {Decimal_integer})?
95
96/*
97 * Character_constant = Δ "'" Character "'"
98 */
99Character_constant = \' ([^\'\n\r] | {Special_character}) \'
100
101Special_character = % ([A-Da-dFfHhLlNnQqRrSsT-Vt-v%\'\"\(\)\<\>] | "/" [0-9]+ "/")
102
103/*
104 * SCOMMENT : single-line comment
105 * STRING : basic manifest string (literal)
106 * VSTRING : verbatim manifest string (literal)
107 */
108%state SCOMMENT STRING VSTRING
109
110%%
111<YYINITIAL> {
112    {Identifier}    {
113        chkLOC();
114        String id = yytext();
115        if (offerSymbol(id, 0, false) && returnOnSymbol()) {
116            return yystate();
117        }
118    }
119
120    {Number}    {
121        chkLOC();
122        onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar);
123        offer(yytext());
124        onDisjointSpanChanged(null, yychar);
125    }
126
127    {Character_constant}    {
128        chkLOC();
129        onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
130        offer(yytext());
131        onDisjointSpanChanged(null, yychar);
132    }
133
134    "--"    {
135        yypush(SCOMMENT);
136        onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar);
137        offer(yytext());
138    }
139
140    \"    {
141        chkLOC();
142        yypush(STRING);
143        onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
144        offer(yytext());
145    }
146    \" {Vstring_alpha}* [\[\{]    {
147        chkLOC();
148        vop(yytext());
149    }
150}
151
152<SCOMMENT> {
153    {WhspChar}*{EOL}    {
154        onDisjointSpanChanged(null, yychar);
155        yypop();
156        onEndOfLineMatched(yytext(), yychar);
157    }
158}
159
160<STRING> {
161    \"    {
162        chkLOC();
163        offer(yytext());
164        onDisjointSpanChanged(null, yychar);
165        yypop();
166    }
167    /*
168     * Eiffel defines "Line_wrapping_part" as a sequence of characters
169     * consisting of the following, in order: % (percent character); zero or
170     * more blanks or tabs; New_line; zero or more blanks or tabs; % again. For
171     * OpenGrok purposes, just recognizing LFs in STRING is good enough; though
172     * see also the {Special_character} handling for STRING,VSTRING.
173     */
174}
175
176<VSTRING> {
177    [\]\}] {Vstring_alpha}* \"    {
178        chkLOC();
179        maybeEndVerbatim(yytext());
180    }
181}
182
183<STRING, VSTRING> {
184    {Special_character}    {
185        chkLOC();
186        offer(yytext());
187    }
188
189    {WhspChar}*{EOL}    {
190        onDisjointSpanChanged(null, yychar);
191        onEndOfLineMatched(yytext(), yychar);
192        onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
193    }
194}
195
196<YYINITIAL, SCOMMENT, STRING, VSTRING> {
197    {WhspChar}*{EOL}    { onEndOfLineMatched(yytext(), yychar); }
198    \s    { offer(yytext()); }
199    [^]    {
200        chkLOC();
201        offer(yytext());
202    }
203}
204
205<SCOMMENT, STRING, VSTRING> {
206    {BrowseableURI}    {
207        chkLOC();
208        if (takeAllContent()) {
209            onUriMatched(yytext(), yychar);
210        }
211    }
212}
213