xref: /OpenGrok/opengrok-indexer/src/main/jflex/analysis/scala/Scala.lexh (revision d219b4cea555a12b602d2d5518daa22134ad4879)
1*d219b4ceSAdam Hornacek/*
2*d219b4ceSAdam Hornacek * CDDL HEADER START
3*d219b4ceSAdam Hornacek *
4*d219b4ceSAdam Hornacek * The contents of this file are subject to the terms of the
5*d219b4ceSAdam Hornacek * Common Development and Distribution License (the "License").
6*d219b4ceSAdam Hornacek * You may not use this file except in compliance with the License.
7*d219b4ceSAdam Hornacek *
8*d219b4ceSAdam Hornacek * See LICENSE.txt included in this distribution for the specific
9*d219b4ceSAdam Hornacek * language governing permissions and limitations under the License.
10*d219b4ceSAdam Hornacek *
11*d219b4ceSAdam Hornacek * When distributing Covered Code, include this CDDL HEADER in each
12*d219b4ceSAdam Hornacek * file and include the License file at LICENSE.txt.
13*d219b4ceSAdam Hornacek * If applicable, add the following below this CDDL HEADER, with the
14*d219b4ceSAdam Hornacek * fields enclosed by brackets "[]" replaced with your own identifying
15*d219b4ceSAdam Hornacek * information: Portions Copyright [yyyy] [name of copyright owner]
16*d219b4ceSAdam Hornacek *
17*d219b4ceSAdam Hornacek * CDDL HEADER END
18*d219b4ceSAdam Hornacek */
19*d219b4ceSAdam Hornacek
20*d219b4ceSAdam Hornacek/*
21*d219b4ceSAdam Hornacek * Copyright (c) 2006, 2016, Oracle and/or its affiliates. All rights reserved.
22*d219b4ceSAdam Hornacek * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>.
23*d219b4ceSAdam Hornacek */
24*d219b4ceSAdam Hornacek
25*d219b4ceSAdam Hornacek/*
26*d219b4ceSAdam Hornacek * id  ::=  plainid
27*d219b4ceSAdam Hornacek *      |  ‘`’ { charNoBackQuoteOrNewline | UnicodeEscape | charEscapeSeq } ‘`’
28*d219b4ceSAdam Hornacek * plainid ::=  upper idrest
29*d219b4ceSAdam Hornacek *          |  varid
30*d219b4ceSAdam Hornacek *          |  op
31*d219b4ceSAdam Hornacek * varid ::=  lower idrest
32*d219b4ceSAdam Hornacek * op  ::=  opchar {opchar}
33*d219b4ceSAdam Hornacek */
34*d219b4ceSAdam HornacekIdentifier = {plainid_mod1}
35*d219b4ceSAdam HornacekOpSuffixIdentifier = {plainid_mod2}
36*d219b4ceSAdam HornacekOpIdentifier = {op}
37*d219b4ceSAdam HornacekBacktickIdentifier = [`] ({charNoBackQuoteOrNewline} | {UnicodeEscape} |
38*d219b4ceSAdam Hornacek    {charEscapeSeq})+ [`]
39*d219b4ceSAdam Hornacekplainid_mod1 = ({upper} | {lower}) {idrest_char}*
40*d219b4ceSAdam Hornacekplainid_mod2 = ({upper} | {lower}) {idrest_char}* "_" {op}
41*d219b4ceSAdam Hornacekop = {opchar}+
42*d219b4ceSAdam Hornacek/*
43*d219b4ceSAdam Hornacek * opchar ::= // printableChar not matched by (whiteSpace | upper | lower |
44*d219b4ceSAdam Hornacek *     // letter | digit | paren | delim)
45*d219b4ceSAdam Hornacek * opchar ::= Unicode_Sm | Unicode_So ; N.b. [cfraire] I believe the definition
46*d219b4ceSAdam Hornacek *    from 2.12 where the exclusion parenthetical above contains
47*d219b4ceSAdam Hornacek *    "opchar | Unicode_Sm | Unicode_So" is a typo and this is the proper form.
48*d219b4ceSAdam Hornacek * printableChar ::= // all characters in [\u0020, \u007F] inclusive
49*d219b4ceSAdam Hornacek */
50*d219b4ceSAdam Hornacekopchar = ([[\u{0020}-\u{007F}]--[\sA-Z\$_\p{Lu}a-z\p{Ll}\p{Lo}\p{Lt}\p{Nl}0-9\(\)\[\]\{\}`\'\"\.;,]] |
51*d219b4ceSAdam Hornacek    [\p{Sm}\p{So}])
52*d219b4ceSAdam Hornacek/*
53*d219b4ceSAdam Hornacek * upper   ::=  ‘A’ | … | ‘Z’ | ‘$’ | ‘_’  // and Unicode category Lu
54*d219b4ceSAdam Hornacek * lower   ::=  ‘a’ | … | ‘z’ // and Unicode category Ll
55*d219b4ceSAdam Hornacek * idrest  ::=  {letter | digit} [‘_’ op]
56*d219b4ceSAdam Hornacek * letter  ::=  upper | lower // and Unicode categories Lo, Lt, Nl
57*d219b4ceSAdam Hornacek * UnicodeEscape ::= ‘\’ ‘u’ {‘u’} hexDigit hexDigit hexDigit hexDigit
58*d219b4ceSAdam Hornacek * charEscapeSeq    ::= ‘\’ (‘b’ | ‘t’ | ‘n’ | ‘f’ | ‘r’ | ‘"’ | ‘'’ | ‘\’)
59*d219b4ceSAdam Hornacek */
60*d219b4ceSAdam Hornacekupper = [A-Z\$_\p{Lu}]
61*d219b4ceSAdam Hornaceklower = [a-z\p{Ll}]
62*d219b4ceSAdam Hornacekidrest_char = ({letter} | {digit}) // N.b. here OpenGrok ignores the op suffix
63*d219b4ceSAdam Hornacekletter = ({upper} | {lower} | [\p{Lo}\p{Lt}\p{Nl}])
64*d219b4ceSAdam HornacekcharNoBackQuoteOrNewline = [[^]--[\n\r`]]
65*d219b4ceSAdam HornacekUnicodeEscape = \\ [u]+ {hexDigit}{4}
66*d219b4ceSAdam HornacekcharEscapeSeq = \\ [btnfr\"\'\\]
67*d219b4ceSAdam Hornacek
68*d219b4ceSAdam HornacekNumber = [\-]? ({integerLiteral} | {floatingPointLiteral})
69*d219b4ceSAdam Hornacek/*
70*d219b4ceSAdam Hornacek * Numeric Literal ::= [‘-’] integerLiteral
71*d219b4ceSAdam Hornacek *                  |  [‘-’] floatingPointLiteral
72*d219b4ceSAdam Hornacek *
73*d219b4ceSAdam Hornacek * integerLiteral  ::=  (decimalNumeral | hexNumeral) [‘L’ | ‘l’]
74*d219b4ceSAdam Hornacek * decimalNumeral  ::=  ‘0’ | nonZeroDigit {digit}
75*d219b4ceSAdam Hornacek * hexNumeral      ::=  ‘0’ (‘x’ | ‘X’) hexDigit {hexDigit}
76*d219b4ceSAdam Hornacek * digit           ::=  ‘0’ | nonZeroDigit
77*d219b4ceSAdam Hornacek * nonZeroDigit    ::=  ‘1’ | … | ‘9’
78*d219b4ceSAdam Hornacek * hexDigit        ::= ‘0’ | … | ‘9’ | ‘A’ | … | ‘F’ | ‘a’ | … | ‘f’
79*d219b4ceSAdam Hornacek */
80*d219b4ceSAdam HornacekintegerLiteral = ({decimalNumeral} | {hexNumeral}) [Ll]?
81*d219b4ceSAdam HornacekdecimalNumeral = ([0] | {nonZeroDigit} {digit}*)
82*d219b4ceSAdam HornacekhexNumeral = [0][xX] {hexDigit}+
83*d219b4ceSAdam Hornacekdigit = [0-9]
84*d219b4ceSAdam HornaceknonZeroDigit = [1-9]
85*d219b4ceSAdam HornacekhexDigit = [0-9A-Fa-f]
86*d219b4ceSAdam Hornacek
87*d219b4ceSAdam Hornacek/*
88*d219b4ceSAdam Hornacek * floatingPointLiteral  ::=
89*d219b4ceSAdam Hornacek *     digit {digit} ‘.’ digit {digit} [exponentPart] [floatType]
90*d219b4ceSAdam Hornacek *     |  ‘.’ digit {digit} [exponentPart] [floatType]
91*d219b4ceSAdam Hornacek *     |  digit {digit} exponentPart [floatType]
92*d219b4ceSAdam Hornacek *     |  digit {digit} [exponentPart] floatType
93*d219b4ceSAdam Hornacek * exponentPart          ::=  (‘E’ | ‘e’) [‘+’ | ‘-’] digit {digit}
94*d219b4ceSAdam Hornacek * floatType             ::=  ‘F’ | ‘f’ | ‘D’ | ‘d’
95*d219b4ceSAdam Hornacek */
96*d219b4ceSAdam HornacekfloatingPointLiteral = ({digit}+ "." {digit}+ {exponentPart}? {floatType}? |
97*d219b4ceSAdam Hornacek    "." {digit}+ {exponentPart}? {floatType}? |
98*d219b4ceSAdam Hornacek    {digit}+ {exponentPart} {floatType}? |
99*d219b4ceSAdam Hornacek    {digit}+ {exponentPart}? {floatType})
100*d219b4ceSAdam HornacekexponentPart = [Ee] [\+\-]? {digit}+
101*d219b4ceSAdam HornacekfloatType = [FfDd]
102