1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20/* 21 * Copyright (c) 2010, 2016, Oracle and/or its affiliates. All rights reserved. 22 * Portions Copyright (c) 2017, 2019, Chris Fraire <cfraire@me.com>. 23 */ 24 25/* 26 * Regex productions shared between AdaXref and AdaSymbolTokenizer 27 */ 28 29/* 30 * Identifiers syntax 31 * 2.3-1: Identifiers are used as names. 32 * 2.3-2/2: identifier ::= 33 * identifier_start {identifier_start identifier_extend} 34 */ 35Identifier = {Identifier_start} ({Identifier_start} | {Identifier_extend})* 36/* 37 * 2.3-3/2: identifier_start ::= letter_uppercase | letter_lowercase | 38 * letter_titlecase | letter_modifier | letter_other | number_letter 39 */ 40Identifier_start = [\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}] 41/* 42 * 2.3-3.1/2: identifier_extend ::= mark_non_spacing | mark_spacing_combining | 43 * number_decimal | punctuation_connector | other_format 44 */ 45Identifier_extend = [\p{Mn}\p{Mc}\p{Nd}\p{Pc}\p{Cf}] 46/* 47 * 2.3-4/2 reads "After eliminating the characters in category other_format, an 48 * identifier shall not contain two consecutive characters in category 49 * punctuation_connector, or end with a character in that category," but that 50 * it not enforceable in jflex regexes, as its syntax does not allow negative 51 * look-behind assertions. 52 */ 53 54/* 55 * 2.4-1: There are two kinds of numeric_literals, real literals and integer 56 * literals. A real literal is a numeric_literal that includes a point; an 57 * integer literal is a numeric_literal without a point. 58 * 59 * 2.4-2: numeric_literal ::= decimal_literal | based_literal 60 */ 61Numeric_literal = ({NONCONFORM_NUMBER} | {Decimal_literal} | {Based_literal}) 62/* 63 * 2.4.1-1: A decimal_literal is a numeric_literal in the conventional decimal 64 * notation (that is, the base is ten). 65 * 66 * 2.4.1-2: decimal_literal ::= numeral [.numeral] [exponent] 67 */ 68Decimal_literal = {Numeral} ([\.]{Numeral})? {Exponent}? 69/* 70 * 2.4.1-3: numeral ::= digit {[underline] digit} 71 */ 72Numeral = {Digit} ([_]? {Digit})* 73/* 74 * 2.4.1-4: exponent ::= E [+] numeral | E – numeral 75 */ 76Exponent = [E] [\+\-]? {Numeral} 77/* 78 * 2.4.1-4.1/2: digit ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 79 */ 80Digit = [0-9] 81/* 82 * 2.4.1-5 reads "An exponent for an integer literal shall not have a minus 83 * sign," but that rule is not distinguished here in regex. 84 */ 85/* 86 * 2.4.2-1: A based_literal is a numeric_literal expressed in a form that 87 * specifies the base explicitly. 88 * 89 * 2.4.2-2: based_literal ::= base # based_numeral [.based_numeral] # [exponent] 90 */ 91Based_literal = {Base}[#]{Based_numeral} ([\.]{Based_numeral})? [#]{Exponent}? 92/* 93 * 2.4.2-3: base ::= numeral 94 * 2.4.2-6: The base (the numeric value of the decimal numeral preceding the 95 * first #) shall be at least two and at most sixteen. 96 */ 97Base = ([2-9] | [1][0-6]) 98/* 99 * 2.4.2-4: based_numeral ::= extended_digit {[underline] extended_digit} 100 */ 101Based_numeral = {Extended_digit} ([_]? {Extended_digit})* 102/* 103 * 2.4.2-5: extended_digit ::= digit | A | B | C | D | E | F 104 */ 105Extended_digit = [0-9A-F] 106/* 107 * This is unconventional numeric syntax seen in large open-source Ada projects 108 */ 109NONCONFORM_NUMBER = ("0x"? {Extended_digit}+ | {Numeral} ([\.]{Numeral})?[f]) 110 111/* 112 * 2.5-1: A character_literal is formed by enclosing a graphic character 113 * between two apostrophe characters. 114 * 115 * 2.5-2: character_literal ::= 'graphic_character' 116 */ 117Character_literal = ['] [^] ['] 118 119/* 120 * 2.6-1: A string_literal is formed by a sequence of graphic characters 121 * (possibly none) enclosed between two quotation marks used as string 122 * brackets. 123 * 2.6-2: string_literal ::= "{string_element}" 124 * 2.6-3: string_element ::= "" | non_quotation_mark_graphic_character 125 * 2.6-4: A string_element is either a pair of quotation marks (""), or a 126 * single graphic_character other than a quotation mark. 127 */ 128String_literal = [\"] ([\"][\"] | [^\"])* [\"] 129 130/* 131 * 2.7-2: comment ::= --{non_end_of_line_character} 132 */ 133Comment_token = "--" 134 135FileExt = ([Aa][Dd][AaBbSs] | [Dd][Ii][Ff][Ff] | [Pp][Aa][Tt][Cc][Hh]) 136File = [a-zA-Z]{FNameChar}* "." {FileExt} 137 138%state SCOMMENT 139 140%% 141<YYINITIAL> { 142 {Identifier} { 143 chkLOC(); 144 String id = yytext(); 145 if (offerSymbol(id, 0, false) && returnOnSymbol()) { 146 return yystate(); 147 } 148 } 149 150 {Character_literal} { 151 chkLOC(); 152 takeLiteral(yytext(), HtmlConsts.STRING_CLASS); 153 } 154 155 {Numeric_literal} { 156 chkLOC(); 157 takeLiteral(yytext(), HtmlConsts.NUMBER_CLASS); 158 } 159 160 {String_literal} { 161 chkLOC(); 162 takeLiteral(yytext(), HtmlConsts.STRING_CLASS); 163 } 164 165 {Comment_token} { 166 yypush(SCOMMENT); 167 onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar); 168 offer(yytext()); 169 } 170} 171 172<SCOMMENT> { 173 {WhspChar}*{EOL} { 174 String capture = yytext(); 175 yypushback(capture.length()); 176 yypop(); 177 onDisjointSpanChanged(null, yychar); 178 } 179} 180 181<YYINITIAL> { 182 {WhspChar}*{EOL} { 183 onEndOfLineMatched(yytext(), yychar); 184 } 185} 186 187<YYINITIAL, SCOMMENT> { 188 // Only one whitespace char at a time 189 [[\s]--[\n\r]] { 190 offer(yytext()); 191 } 192 // Only one character at a time because of \s restriction above. 193 [^\n\r] { 194 chkLOC(); 195 offer(yytext()); 196 } 197} 198 199// "comment links" 200<SCOMMENT> { 201 {FPath} { 202 if (takeAllContent()) { 203 onPathlikeMatched(yytext(), '/', false, yychar); 204 } 205 } 206 207 {File} { 208 if (takeAllContent()) { 209 String path = yytext(); 210 onFilelikeMatched(path, yychar); 211 } 212 } 213 214 {BrowseableURI} { 215 if (takeAllContent()) { 216 onUriMatched(yytext(), yychar); 217 } 218 } 219 220 {FNameChar}+ "@" {FNameChar}+ "." {FNameChar}+ { 221 if (takeAllContent()) { 222 onEmailAddressMatched(yytext(), yychar); 223 } 224 } 225} 226