xref: /OpenGrok/opengrok-indexer/src/main/jflex/analysis/ada/AdaProductions.lexh (revision d219b4cea555a12b602d2d5518daa22134ad4879)
1*d219b4ceSAdam Hornacek/*
2*d219b4ceSAdam Hornacek * CDDL HEADER START
3*d219b4ceSAdam Hornacek *
4*d219b4ceSAdam Hornacek * The contents of this file are subject to the terms of the
5*d219b4ceSAdam Hornacek * Common Development and Distribution License (the "License").
6*d219b4ceSAdam Hornacek * You may not use this file except in compliance with the License.
7*d219b4ceSAdam Hornacek *
8*d219b4ceSAdam Hornacek * See LICENSE.txt included in this distribution for the specific
9*d219b4ceSAdam Hornacek * language governing permissions and limitations under the License.
10*d219b4ceSAdam Hornacek *
11*d219b4ceSAdam Hornacek * When distributing Covered Code, include this CDDL HEADER in each
12*d219b4ceSAdam Hornacek * file and include the License file at LICENSE.txt.
13*d219b4ceSAdam Hornacek * If applicable, add the following below this CDDL HEADER, with the
14*d219b4ceSAdam Hornacek * fields enclosed by brackets "[]" replaced with your own identifying
15*d219b4ceSAdam Hornacek * information: Portions Copyright [yyyy] [name of copyright owner]
16*d219b4ceSAdam Hornacek *
17*d219b4ceSAdam Hornacek * CDDL HEADER END
18*d219b4ceSAdam Hornacek */
19*d219b4ceSAdam Hornacek
20*d219b4ceSAdam Hornacek/*
21*d219b4ceSAdam Hornacek * Copyright (c) 2010, 2016, Oracle and/or its affiliates. All rights reserved.
22*d219b4ceSAdam Hornacek * Portions Copyright (c) 2017, 2019, Chris Fraire <cfraire@me.com>.
23*d219b4ceSAdam Hornacek */
24*d219b4ceSAdam Hornacek
25*d219b4ceSAdam Hornacek/*
26*d219b4ceSAdam Hornacek * Regex productions shared between AdaXref and AdaSymbolTokenizer
27*d219b4ceSAdam Hornacek */
28*d219b4ceSAdam Hornacek
29*d219b4ceSAdam Hornacek/*
30*d219b4ceSAdam Hornacek * Identifiers syntax
31*d219b4ceSAdam Hornacek * 2.3-1: Identifiers are used as names.
32*d219b4ceSAdam Hornacek * 2.3-2/2: identifier ::=
33*d219b4ceSAdam Hornacek *     identifier_start {identifier_start identifier_extend}
34*d219b4ceSAdam Hornacek */
35*d219b4ceSAdam HornacekIdentifier = {Identifier_start} ({Identifier_start} | {Identifier_extend})*
36*d219b4ceSAdam Hornacek/*
37*d219b4ceSAdam Hornacek * 2.3-3/2: identifier_start ::= letter_uppercase | letter_lowercase |
38*d219b4ceSAdam Hornacek *     letter_titlecase | letter_modifier | letter_other | number_letter
39*d219b4ceSAdam Hornacek */
40*d219b4ceSAdam HornacekIdentifier_start = [\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]
41*d219b4ceSAdam Hornacek/*
42*d219b4ceSAdam Hornacek * 2.3-3.1/2: identifier_extend ::= mark_non_spacing | mark_spacing_combining |
43*d219b4ceSAdam Hornacek *     number_decimal | punctuation_connector | other_format
44*d219b4ceSAdam Hornacek */
45*d219b4ceSAdam HornacekIdentifier_extend = [\p{Mn}\p{Mc}\p{Nd}\p{Pc}\p{Cf}]
46*d219b4ceSAdam Hornacek/*
47*d219b4ceSAdam Hornacek * 2.3-4/2 reads "After eliminating the characters in category other_format, an
48*d219b4ceSAdam Hornacek * identifier shall not contain two consecutive characters in category
49*d219b4ceSAdam Hornacek * punctuation_connector, or end with a character in that category," but that
50*d219b4ceSAdam Hornacek * it not enforceable in jflex regexes, as its syntax does not allow negative
51*d219b4ceSAdam Hornacek * look-behind assertions.
52*d219b4ceSAdam Hornacek */
53*d219b4ceSAdam Hornacek
54*d219b4ceSAdam Hornacek/*
55*d219b4ceSAdam Hornacek * 2.4-1: There are two kinds of numeric_literals, real literals and integer
56*d219b4ceSAdam Hornacek * literals. A real literal is a numeric_literal that includes a point; an
57*d219b4ceSAdam Hornacek * integer literal is a numeric_literal without a point.
58*d219b4ceSAdam Hornacek *
59*d219b4ceSAdam Hornacek * 2.4-2: numeric_literal ::= decimal_literal | based_literal
60*d219b4ceSAdam Hornacek */
61*d219b4ceSAdam HornacekNumeric_literal = ({NONCONFORM_NUMBER} | {Decimal_literal} | {Based_literal})
62*d219b4ceSAdam Hornacek/*
63*d219b4ceSAdam Hornacek * 2.4.1-1: A decimal_literal is a numeric_literal in the conventional decimal
64*d219b4ceSAdam Hornacek * notation (that is, the base is ten).
65*d219b4ceSAdam Hornacek *
66*d219b4ceSAdam Hornacek * 2.4.1-2: decimal_literal ::= numeral [.numeral] [exponent]
67*d219b4ceSAdam Hornacek */
68*d219b4ceSAdam HornacekDecimal_literal = {Numeral} ([\.]{Numeral})? {Exponent}?
69*d219b4ceSAdam Hornacek/*
70*d219b4ceSAdam Hornacek * 2.4.1-3: numeral ::= digit {[underline] digit}
71*d219b4ceSAdam Hornacek */
72*d219b4ceSAdam HornacekNumeral = {Digit} ([_]? {Digit})*
73*d219b4ceSAdam Hornacek/*
74*d219b4ceSAdam Hornacek * 2.4.1-4: exponent ::= E [+] numeral | E – numeral
75*d219b4ceSAdam Hornacek */
76*d219b4ceSAdam HornacekExponent = [E] [\+\-]? {Numeral}
77*d219b4ceSAdam Hornacek/*
78*d219b4ceSAdam Hornacek * 2.4.1-4.1/2: digit ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
79*d219b4ceSAdam Hornacek */
80*d219b4ceSAdam HornacekDigit = [0-9]
81*d219b4ceSAdam Hornacek/*
82*d219b4ceSAdam Hornacek * 2.4.1-5 reads "An exponent for an integer literal shall not have a minus
83*d219b4ceSAdam Hornacek * sign," but that rule is not distinguished here in regex.
84*d219b4ceSAdam Hornacek */
85*d219b4ceSAdam Hornacek/*
86*d219b4ceSAdam Hornacek * 2.4.2-1: A based_literal is a numeric_literal expressed in a form that
87*d219b4ceSAdam Hornacek * specifies the base explicitly.
88*d219b4ceSAdam Hornacek *
89*d219b4ceSAdam Hornacek * 2.4.2-2: based_literal ::= base # based_numeral [.based_numeral] # [exponent]
90*d219b4ceSAdam Hornacek */
91*d219b4ceSAdam HornacekBased_literal = {Base}[#]{Based_numeral} ([\.]{Based_numeral})? [#]{Exponent}?
92*d219b4ceSAdam Hornacek/*
93*d219b4ceSAdam Hornacek * 2.4.2-3: base ::= numeral
94*d219b4ceSAdam Hornacek * 2.4.2-6: The base (the numeric value of the decimal numeral preceding the
95*d219b4ceSAdam Hornacek *     first #) shall be at least two and at most sixteen.
96*d219b4ceSAdam Hornacek */
97*d219b4ceSAdam HornacekBase = ([2-9] | [1][0-6])
98*d219b4ceSAdam Hornacek/*
99*d219b4ceSAdam Hornacek * 2.4.2-4: based_numeral ::= extended_digit {[underline] extended_digit}
100*d219b4ceSAdam Hornacek */
101*d219b4ceSAdam HornacekBased_numeral = {Extended_digit} ([_]? {Extended_digit})*
102*d219b4ceSAdam Hornacek/*
103*d219b4ceSAdam Hornacek * 2.4.2-5: extended_digit ::= digit | A | B | C | D | E | F
104*d219b4ceSAdam Hornacek */
105*d219b4ceSAdam HornacekExtended_digit = [0-9A-F]
106*d219b4ceSAdam Hornacek/*
107*d219b4ceSAdam Hornacek * This is unconventional numeric syntax seen in large open-source Ada projects
108*d219b4ceSAdam Hornacek */
109*d219b4ceSAdam HornacekNONCONFORM_NUMBER = ("0x"? {Extended_digit}+ | {Numeral} ([\.]{Numeral})?[f])
110*d219b4ceSAdam Hornacek
111*d219b4ceSAdam Hornacek/*
112*d219b4ceSAdam Hornacek * 2.5-1: A character_literal is formed by enclosing a graphic character
113*d219b4ceSAdam Hornacek * between two apostrophe characters.
114*d219b4ceSAdam Hornacek *
115*d219b4ceSAdam Hornacek * 2.5-2: character_literal ::= 'graphic_character'
116*d219b4ceSAdam Hornacek */
117*d219b4ceSAdam HornacekCharacter_literal = ['] [^] [']
118*d219b4ceSAdam Hornacek
119*d219b4ceSAdam Hornacek/*
120*d219b4ceSAdam Hornacek * 2.6-1: A string_literal is formed by a sequence of graphic characters
121*d219b4ceSAdam Hornacek * (possibly none) enclosed between two quotation marks used as string
122*d219b4ceSAdam Hornacek * brackets.
123*d219b4ceSAdam Hornacek * 2.6-2: string_literal ::= "{string_element}"
124*d219b4ceSAdam Hornacek * 2.6-3: string_element ::= "" | non_quotation_mark_graphic_character
125*d219b4ceSAdam Hornacek * 2.6-4: A string_element is either a pair of quotation marks (""), or a
126*d219b4ceSAdam Hornacek * single graphic_character other than a quotation mark.
127*d219b4ceSAdam Hornacek */
128*d219b4ceSAdam HornacekString_literal = [\"] ([\"][\"] | [^\"])* [\"]
129*d219b4ceSAdam Hornacek
130*d219b4ceSAdam Hornacek/*
131*d219b4ceSAdam Hornacek * 2.7-2: comment ::= --{non_end_of_line_character}
132*d219b4ceSAdam Hornacek */
133*d219b4ceSAdam HornacekComment_token = "--"
134*d219b4ceSAdam Hornacek
135*d219b4ceSAdam HornacekFileExt = ([Aa][Dd][AaBbSs] | [Dd][Ii][Ff][Ff] | [Pp][Aa][Tt][Cc][Hh])
136*d219b4ceSAdam HornacekFile = [a-zA-Z]{FNameChar}* "." {FileExt}
137*d219b4ceSAdam Hornacek
138*d219b4ceSAdam Hornacek%state SCOMMENT
139*d219b4ceSAdam Hornacek
140*d219b4ceSAdam Hornacek%%
141*d219b4ceSAdam Hornacek<YYINITIAL> {
142*d219b4ceSAdam Hornacek    {Identifier}    {
143*d219b4ceSAdam Hornacek        chkLOC();
144*d219b4ceSAdam Hornacek        String id = yytext();
145*d219b4ceSAdam Hornacek        if (offerSymbol(id, 0, false) && returnOnSymbol()) {
146*d219b4ceSAdam Hornacek            return yystate();
147*d219b4ceSAdam Hornacek        }
148*d219b4ceSAdam Hornacek    }
149*d219b4ceSAdam Hornacek
150*d219b4ceSAdam Hornacek    {Character_literal}    {
151*d219b4ceSAdam Hornacek        chkLOC();
152*d219b4ceSAdam Hornacek        takeLiteral(yytext(), HtmlConsts.STRING_CLASS);
153*d219b4ceSAdam Hornacek    }
154*d219b4ceSAdam Hornacek
155*d219b4ceSAdam Hornacek    {Numeric_literal}    {
156*d219b4ceSAdam Hornacek        chkLOC();
157*d219b4ceSAdam Hornacek        takeLiteral(yytext(), HtmlConsts.NUMBER_CLASS);
158*d219b4ceSAdam Hornacek    }
159*d219b4ceSAdam Hornacek
160*d219b4ceSAdam Hornacek    {String_literal}    {
161*d219b4ceSAdam Hornacek        chkLOC();
162*d219b4ceSAdam Hornacek        takeLiteral(yytext(), HtmlConsts.STRING_CLASS);
163*d219b4ceSAdam Hornacek    }
164*d219b4ceSAdam Hornacek
165*d219b4ceSAdam Hornacek    {Comment_token}    {
166*d219b4ceSAdam Hornacek        yypush(SCOMMENT);
167*d219b4ceSAdam Hornacek        onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar);
168*d219b4ceSAdam Hornacek        offer(yytext());
169*d219b4ceSAdam Hornacek    }
170*d219b4ceSAdam Hornacek}
171*d219b4ceSAdam Hornacek
172*d219b4ceSAdam Hornacek<SCOMMENT> {
173*d219b4ceSAdam Hornacek    {WhspChar}*{EOL}    {
174*d219b4ceSAdam Hornacek        String capture = yytext();
175*d219b4ceSAdam Hornacek        yypushback(capture.length());
176*d219b4ceSAdam Hornacek        yypop();
177*d219b4ceSAdam Hornacek        onDisjointSpanChanged(null, yychar);
178*d219b4ceSAdam Hornacek    }
179*d219b4ceSAdam Hornacek}
180*d219b4ceSAdam Hornacek
181*d219b4ceSAdam Hornacek<YYINITIAL> {
182*d219b4ceSAdam Hornacek    {WhspChar}*{EOL}    {
183*d219b4ceSAdam Hornacek        onEndOfLineMatched(yytext(), yychar);
184*d219b4ceSAdam Hornacek    }
185*d219b4ceSAdam Hornacek}
186*d219b4ceSAdam Hornacek
187*d219b4ceSAdam Hornacek<YYINITIAL, SCOMMENT> {
188*d219b4ceSAdam Hornacek    // Only one whitespace char at a time
189*d219b4ceSAdam Hornacek    [[\s]--[\n\r]]    {
190*d219b4ceSAdam Hornacek        offer(yytext());
191*d219b4ceSAdam Hornacek    }
192*d219b4ceSAdam Hornacek    // Only one character at a time because of \s restriction above.
193*d219b4ceSAdam Hornacek    [^\n\r]    {
194*d219b4ceSAdam Hornacek        chkLOC();
195*d219b4ceSAdam Hornacek        offer(yytext());
196*d219b4ceSAdam Hornacek    }
197*d219b4ceSAdam Hornacek}
198*d219b4ceSAdam Hornacek
199*d219b4ceSAdam Hornacek// "comment links"
200*d219b4ceSAdam Hornacek<SCOMMENT> {
201*d219b4ceSAdam Hornacek    {FPath}    {
202*d219b4ceSAdam Hornacek        if (takeAllContent()) {
203*d219b4ceSAdam Hornacek            onPathlikeMatched(yytext(), '/', false, yychar);
204*d219b4ceSAdam Hornacek        }
205*d219b4ceSAdam Hornacek    }
206*d219b4ceSAdam Hornacek
207*d219b4ceSAdam Hornacek    {File}    {
208*d219b4ceSAdam Hornacek        if (takeAllContent()) {
209*d219b4ceSAdam Hornacek            String path = yytext();
210*d219b4ceSAdam Hornacek            onFilelikeMatched(path, yychar);
211*d219b4ceSAdam Hornacek        }
212*d219b4ceSAdam Hornacek    }
213*d219b4ceSAdam Hornacek
214*d219b4ceSAdam Hornacek    {BrowseableURI}    {
215*d219b4ceSAdam Hornacek        if (takeAllContent()) {
216*d219b4ceSAdam Hornacek            onUriMatched(yytext(), yychar);
217*d219b4ceSAdam Hornacek        }
218*d219b4ceSAdam Hornacek    }
219*d219b4ceSAdam Hornacek
220*d219b4ceSAdam Hornacek    {FNameChar}+ "@" {FNameChar}+ "." {FNameChar}+    {
221*d219b4ceSAdam Hornacek        if (takeAllContent()) {
222*d219b4ceSAdam Hornacek            onEmailAddressMatched(yytext(), yychar);
223*d219b4ceSAdam Hornacek        }
224*d219b4ceSAdam Hornacek    }
225*d219b4ceSAdam Hornacek}
226