xref: /OpenGrok/opengrok-indexer/src/main/jflex/analysis/ada/AdaProductions.lexh (revision d219b4cea555a12b602d2d5518daa22134ad4879)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * See LICENSE.txt included in this distribution for the specific
9 * language governing permissions and limitations under the License.
10 *
11 * When distributing Covered Code, include this CDDL HEADER in each
12 * file and include the License file at LICENSE.txt.
13 * If applicable, add the following below this CDDL HEADER, with the
14 * fields enclosed by brackets "[]" replaced with your own identifying
15 * information: Portions Copyright [yyyy] [name of copyright owner]
16 *
17 * CDDL HEADER END
18 */
19
20/*
21 * Copyright (c) 2010, 2016, Oracle and/or its affiliates. All rights reserved.
22 * Portions Copyright (c) 2017, 2019, Chris Fraire <cfraire@me.com>.
23 */
24
25/*
26 * Regex productions shared between AdaXref and AdaSymbolTokenizer
27 */
28
29/*
30 * Identifiers syntax
31 * 2.3-1: Identifiers are used as names.
32 * 2.3-2/2: identifier ::=
33 *     identifier_start {identifier_start identifier_extend}
34 */
35Identifier = {Identifier_start} ({Identifier_start} | {Identifier_extend})*
36/*
37 * 2.3-3/2: identifier_start ::= letter_uppercase | letter_lowercase |
38 *     letter_titlecase | letter_modifier | letter_other | number_letter
39 */
40Identifier_start = [\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]
41/*
42 * 2.3-3.1/2: identifier_extend ::= mark_non_spacing | mark_spacing_combining |
43 *     number_decimal | punctuation_connector | other_format
44 */
45Identifier_extend = [\p{Mn}\p{Mc}\p{Nd}\p{Pc}\p{Cf}]
46/*
47 * 2.3-4/2 reads "After eliminating the characters in category other_format, an
48 * identifier shall not contain two consecutive characters in category
49 * punctuation_connector, or end with a character in that category," but that
50 * it not enforceable in jflex regexes, as its syntax does not allow negative
51 * look-behind assertions.
52 */
53
54/*
55 * 2.4-1: There are two kinds of numeric_literals, real literals and integer
56 * literals. A real literal is a numeric_literal that includes a point; an
57 * integer literal is a numeric_literal without a point.
58 *
59 * 2.4-2: numeric_literal ::= decimal_literal | based_literal
60 */
61Numeric_literal = ({NONCONFORM_NUMBER} | {Decimal_literal} | {Based_literal})
62/*
63 * 2.4.1-1: A decimal_literal is a numeric_literal in the conventional decimal
64 * notation (that is, the base is ten).
65 *
66 * 2.4.1-2: decimal_literal ::= numeral [.numeral] [exponent]
67 */
68Decimal_literal = {Numeral} ([\.]{Numeral})? {Exponent}?
69/*
70 * 2.4.1-3: numeral ::= digit {[underline] digit}
71 */
72Numeral = {Digit} ([_]? {Digit})*
73/*
74 * 2.4.1-4: exponent ::= E [+] numeral | E – numeral
75 */
76Exponent = [E] [\+\-]? {Numeral}
77/*
78 * 2.4.1-4.1/2: digit ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
79 */
80Digit = [0-9]
81/*
82 * 2.4.1-5 reads "An exponent for an integer literal shall not have a minus
83 * sign," but that rule is not distinguished here in regex.
84 */
85/*
86 * 2.4.2-1: A based_literal is a numeric_literal expressed in a form that
87 * specifies the base explicitly.
88 *
89 * 2.4.2-2: based_literal ::= base # based_numeral [.based_numeral] # [exponent]
90 */
91Based_literal = {Base}[#]{Based_numeral} ([\.]{Based_numeral})? [#]{Exponent}?
92/*
93 * 2.4.2-3: base ::= numeral
94 * 2.4.2-6: The base (the numeric value of the decimal numeral preceding the
95 *     first #) shall be at least two and at most sixteen.
96 */
97Base = ([2-9] | [1][0-6])
98/*
99 * 2.4.2-4: based_numeral ::= extended_digit {[underline] extended_digit}
100 */
101Based_numeral = {Extended_digit} ([_]? {Extended_digit})*
102/*
103 * 2.4.2-5: extended_digit ::= digit | A | B | C | D | E | F
104 */
105Extended_digit = [0-9A-F]
106/*
107 * This is unconventional numeric syntax seen in large open-source Ada projects
108 */
109NONCONFORM_NUMBER = ("0x"? {Extended_digit}+ | {Numeral} ([\.]{Numeral})?[f])
110
111/*
112 * 2.5-1: A character_literal is formed by enclosing a graphic character
113 * between two apostrophe characters.
114 *
115 * 2.5-2: character_literal ::= 'graphic_character'
116 */
117Character_literal = ['] [^] [']
118
119/*
120 * 2.6-1: A string_literal is formed by a sequence of graphic characters
121 * (possibly none) enclosed between two quotation marks used as string
122 * brackets.
123 * 2.6-2: string_literal ::= "{string_element}"
124 * 2.6-3: string_element ::= "" | non_quotation_mark_graphic_character
125 * 2.6-4: A string_element is either a pair of quotation marks (""), or a
126 * single graphic_character other than a quotation mark.
127 */
128String_literal = [\"] ([\"][\"] | [^\"])* [\"]
129
130/*
131 * 2.7-2: comment ::= --{non_end_of_line_character}
132 */
133Comment_token = "--"
134
135FileExt = ([Aa][Dd][AaBbSs] | [Dd][Ii][Ff][Ff] | [Pp][Aa][Tt][Cc][Hh])
136File = [a-zA-Z]{FNameChar}* "." {FileExt}
137
138%state SCOMMENT
139
140%%
141<YYINITIAL> {
142    {Identifier}    {
143        chkLOC();
144        String id = yytext();
145        if (offerSymbol(id, 0, false) && returnOnSymbol()) {
146            return yystate();
147        }
148    }
149
150    {Character_literal}    {
151        chkLOC();
152        takeLiteral(yytext(), HtmlConsts.STRING_CLASS);
153    }
154
155    {Numeric_literal}    {
156        chkLOC();
157        takeLiteral(yytext(), HtmlConsts.NUMBER_CLASS);
158    }
159
160    {String_literal}    {
161        chkLOC();
162        takeLiteral(yytext(), HtmlConsts.STRING_CLASS);
163    }
164
165    {Comment_token}    {
166        yypush(SCOMMENT);
167        onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar);
168        offer(yytext());
169    }
170}
171
172<SCOMMENT> {
173    {WhspChar}*{EOL}    {
174        String capture = yytext();
175        yypushback(capture.length());
176        yypop();
177        onDisjointSpanChanged(null, yychar);
178    }
179}
180
181<YYINITIAL> {
182    {WhspChar}*{EOL}    {
183        onEndOfLineMatched(yytext(), yychar);
184    }
185}
186
187<YYINITIAL, SCOMMENT> {
188    // Only one whitespace char at a time
189    [[\s]--[\n\r]]    {
190        offer(yytext());
191    }
192    // Only one character at a time because of \s restriction above.
193    [^\n\r]    {
194        chkLOC();
195        offer(yytext());
196    }
197}
198
199// "comment links"
200<SCOMMENT> {
201    {FPath}    {
202        if (takeAllContent()) {
203            onPathlikeMatched(yytext(), '/', false, yychar);
204        }
205    }
206
207    {File}    {
208        if (takeAllContent()) {
209            String path = yytext();
210            onFilelikeMatched(path, yychar);
211        }
212    }
213
214    {BrowseableURI}    {
215        if (takeAllContent()) {
216            onUriMatched(yytext(), yychar);
217        }
218    }
219
220    {FNameChar}+ "@" {FNameChar}+ "." {FNameChar}+    {
221        if (takeAllContent()) {
222            onEmailAddressMatched(yytext(), yychar);
223        }
224    }
225}
226