xref: /OpenGrok/opengrok-indexer/src/main/jflex/analysis/scala/Scala.lexh (revision d219b4cea555a12b602d2d5518daa22134ad4879)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * See LICENSE.txt included in this distribution for the specific
9 * language governing permissions and limitations under the License.
10 *
11 * When distributing Covered Code, include this CDDL HEADER in each
12 * file and include the License file at LICENSE.txt.
13 * If applicable, add the following below this CDDL HEADER, with the
14 * fields enclosed by brackets "[]" replaced with your own identifying
15 * information: Portions Copyright [yyyy] [name of copyright owner]
16 *
17 * CDDL HEADER END
18 */
19
20/*
21 * Copyright (c) 2006, 2016, Oracle and/or its affiliates. All rights reserved.
22 * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>.
23 */
24
25/*
26 * id  ::=  plainid
27 *      |  ‘`’ { charNoBackQuoteOrNewline | UnicodeEscape | charEscapeSeq } ‘`’
28 * plainid ::=  upper idrest
29 *          |  varid
30 *          |  op
31 * varid ::=  lower idrest
32 * op  ::=  opchar {opchar}
33 */
34Identifier = {plainid_mod1}
35OpSuffixIdentifier = {plainid_mod2}
36OpIdentifier = {op}
37BacktickIdentifier = [`] ({charNoBackQuoteOrNewline} | {UnicodeEscape} |
38    {charEscapeSeq})+ [`]
39plainid_mod1 = ({upper} | {lower}) {idrest_char}*
40plainid_mod2 = ({upper} | {lower}) {idrest_char}* "_" {op}
41op = {opchar}+
42/*
43 * opchar ::= // printableChar not matched by (whiteSpace | upper | lower |
44 *     // letter | digit | paren | delim)
45 * opchar ::= Unicode_Sm | Unicode_So ; N.b. [cfraire] I believe the definition
46 *    from 2.12 where the exclusion parenthetical above contains
47 *    "opchar | Unicode_Sm | Unicode_So" is a typo and this is the proper form.
48 * printableChar ::= // all characters in [\u0020, \u007F] inclusive
49 */
50opchar = ([[\u{0020}-\u{007F}]--[\sA-Z\$_\p{Lu}a-z\p{Ll}\p{Lo}\p{Lt}\p{Nl}0-9\(\)\[\]\{\}`\'\"\.;,]] |
51    [\p{Sm}\p{So}])
52/*
53 * upper   ::=  ‘A’ | … | ‘Z’ | ‘$’ | ‘_’  // and Unicode category Lu
54 * lower   ::=  ‘a’ | … | ‘z’ // and Unicode category Ll
55 * idrest  ::=  {letter | digit} [‘_’ op]
56 * letter  ::=  upper | lower // and Unicode categories Lo, Lt, Nl
57 * UnicodeEscape ::= ‘\’ ‘u’ {‘u’} hexDigit hexDigit hexDigit hexDigit
58 * charEscapeSeq    ::= ‘\’ (‘b’ | ‘t’ | ‘n’ | ‘f’ | ‘r’ | ‘"’ | ‘'’ | ‘\’)
59 */
60upper = [A-Z\$_\p{Lu}]
61lower = [a-z\p{Ll}]
62idrest_char = ({letter} | {digit}) // N.b. here OpenGrok ignores the op suffix
63letter = ({upper} | {lower} | [\p{Lo}\p{Lt}\p{Nl}])
64charNoBackQuoteOrNewline = [[^]--[\n\r`]]
65UnicodeEscape = \\ [u]+ {hexDigit}{4}
66charEscapeSeq = \\ [btnfr\"\'\\]
67
68Number = [\-]? ({integerLiteral} | {floatingPointLiteral})
69/*
70 * Numeric Literal ::= [‘-’] integerLiteral
71 *                  |  [‘-’] floatingPointLiteral
72 *
73 * integerLiteral  ::=  (decimalNumeral | hexNumeral) [‘L’ | ‘l’]
74 * decimalNumeral  ::=  ‘0’ | nonZeroDigit {digit}
75 * hexNumeral      ::=  ‘0’ (‘x’ | ‘X’) hexDigit {hexDigit}
76 * digit           ::=  ‘0’ | nonZeroDigit
77 * nonZeroDigit    ::=  ‘1’ | … | ‘9’
78 * hexDigit        ::= ‘0’ | … | ‘9’ | ‘A’ | … | ‘F’ | ‘a’ | … | ‘f’
79 */
80integerLiteral = ({decimalNumeral} | {hexNumeral}) [Ll]?
81decimalNumeral = ([0] | {nonZeroDigit} {digit}*)
82hexNumeral = [0][xX] {hexDigit}+
83digit = [0-9]
84nonZeroDigit = [1-9]
85hexDigit = [0-9A-Fa-f]
86
87/*
88 * floatingPointLiteral  ::=
89 *     digit {digit} ‘.’ digit {digit} [exponentPart] [floatType]
90 *     |  ‘.’ digit {digit} [exponentPart] [floatType]
91 *     |  digit {digit} exponentPart [floatType]
92 *     |  digit {digit} [exponentPart] floatType
93 * exponentPart          ::=  (‘E’ | ‘e’) [‘+’ | ‘-’] digit {digit}
94 * floatType             ::=  ‘F’ | ‘f’ | ‘D’ | ‘d’
95 */
96floatingPointLiteral = ({digit}+ "." {digit}+ {exponentPart}? {floatType}? |
97    "." {digit}+ {exponentPart}? {floatType}? |
98    {digit}+ {exponentPart} {floatType}? |
99    {digit}+ {exponentPart}? {floatType})
100exponentPart = [Ee] [\+\-]? {digit}+
101floatType = [FfDd]
102