1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20/* 21 * Copyright (c) 2017-2019, Chris Fraire <cfraire@me.com>. 22 */ 23 24/* 25 * 8.32.16 Syntax (non-production): Identifier -- a sequence of one or more 26 * alpha_numeric characters of which the first is a letter. 27 */ 28Identifier = {letter} {alpha_numeric}* 29 30/* 31 * 8.32.2 Definition: Letter, alpha_betic, numeric, alpha_numeric, printable 32 * 33 * 1 Any of the following fifty-two, each a lower-case or upper-case element of 34 * the Roman alphabet 35 * 2 If the underlying character set is 8-bit extended ASCII, the characters of 36 * codes 192 to 255 in that set. 37 * 3 If the underlying character set is Unicode, all characters defined as 38 * letters in that set. 39 * 40 * alpha_betic character is a letter or an underscore _. 41 * numeric character is one of the ten characters 0-9 42 * alpha_numeric character is alpha_betic or numeric 43 */ 44letter = [\p{Letter}] // This suffices to cover 1,2,3 above. 45alpha_betic = ({letter} | [_]) 46numeric = [0-9] 47alpha_numeric = ({alpha_betic} | {numeric}) 48 49/* 50 * printable character is any of the characters listed as printable in the 51 * definition of the character set (Unicode or extended ASCII). “Printable” 52 * characters exclude such special characters as new line and backspace. 53 * 54 * N.b. for OpenGrok purposes, take "printable" as Wikipedia's definition of 55 * Unicode Graphic Character. 56 * 57 * 8.29.15 Validity: Verbatim String rule 58 * 59 * Regarding α as the (possibly empty) Simple_string appearing in a 60 * Verbatim_string_opener: Every character in α is printable, and not a double 61 * quote. 62 */ 63Vstring_alpha = [[\p{Letter}\p{Mark}\p{Number}\p{Punctuation}\p{Symbol}\p{Zs}]--[\"]] 64 65Number = [\+\-]? ({Integer} | {Real}) 66/* 67 * Integer = Δ [Integer_base] Digit_sequence 68 * Integer_base = Δ "0" Integer_base_letter 69 * Integer_base_letter = Δ "b" | "c" | "x" | "B" | "C" | "X" 70 * Digit_sequence = Δ Digit+ 71 * Digit = Δ "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | 72 * "a" | "b" | "c" | "d" | "e" | "f" | 73 * "A" | "B" | "C" | "D" | "E" | "F" | "_" 74 * "Neither the first nor the last Digit of a Digit_sequence is an underscore" 75 */ 76Integer = ({Decimal_integer} | {Hexadecimal} | {Binary} | {Octal}) 77Decimal_integer = ({DIGIT}+ | {DIGIT} ("_" | {DIGIT})+ {DIGIT}) 78Hexadecimal = 0[xX] ({HEXDIG}+ | {HEXDIG} ("_" | {HEXDIG})+ {HEXDIG}) 79Binary = 0[bB] ({BINDIG}+ | {BINDIG} ("_" | {BINDIG})+ {BINDIG}) 80Octal = 0[cC] ({OCTDIG}+ | {OCTDIG} ("_" | {OCTDIG})+ {OCTDIG}) 81DIGIT = [0-9] 82HEXDIG = [0-9a-fA-F] 83BINDIG = [01] 84OCTDIG = [0-7] 85/* 86 * Real = 87 * • An optional decimal Integer, giving the integral part. 88 * • A required ‘‘.’’ (dot). 89 * • An optional decimal Integer, giving the fractional part. 90 * • An optional exponent, which is the letter e or E followed by an optional 91 * Sign (+ or –) and a decimal Integer. 92 */ 93Real = ({Decimal_integer}? "." {Decimal_integer} | 94 {Decimal_integer} ".") ([eE] [\+\-] {Decimal_integer})? 95 96/* 97 * Character_constant = Δ "'" Character "'" 98 */ 99Character_constant = \' ([^\'\n\r] | {Special_character}) \' 100 101Special_character = % ([A-Da-dFfHhLlNnQqRrSsT-Vt-v%\'\"\(\)\<\>] | "/" [0-9]+ "/") 102 103/* 104 * SCOMMENT : single-line comment 105 * STRING : basic manifest string (literal) 106 * VSTRING : verbatim manifest string (literal) 107 */ 108%state SCOMMENT STRING VSTRING 109 110%% 111<YYINITIAL> { 112 {Identifier} { 113 chkLOC(); 114 String id = yytext(); 115 if (offerSymbol(id, 0, false) && returnOnSymbol()) { 116 return yystate(); 117 } 118 } 119 120 {Number} { 121 chkLOC(); 122 onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar); 123 offer(yytext()); 124 onDisjointSpanChanged(null, yychar); 125 } 126 127 {Character_constant} { 128 chkLOC(); 129 onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar); 130 offer(yytext()); 131 onDisjointSpanChanged(null, yychar); 132 } 133 134 "--" { 135 yypush(SCOMMENT); 136 onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar); 137 offer(yytext()); 138 } 139 140 \" { 141 chkLOC(); 142 yypush(STRING); 143 onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar); 144 offer(yytext()); 145 } 146 \" {Vstring_alpha}* [\[\{] { 147 chkLOC(); 148 vop(yytext()); 149 } 150} 151 152<SCOMMENT> { 153 {WhspChar}*{EOL} { 154 onDisjointSpanChanged(null, yychar); 155 yypop(); 156 onEndOfLineMatched(yytext(), yychar); 157 } 158} 159 160<STRING> { 161 \" { 162 chkLOC(); 163 offer(yytext()); 164 onDisjointSpanChanged(null, yychar); 165 yypop(); 166 } 167 /* 168 * Eiffel defines "Line_wrapping_part" as a sequence of characters 169 * consisting of the following, in order: % (percent character); zero or 170 * more blanks or tabs; New_line; zero or more blanks or tabs; % again. For 171 * OpenGrok purposes, just recognizing LFs in STRING is good enough; though 172 * see also the {Special_character} handling for STRING,VSTRING. 173 */ 174} 175 176<VSTRING> { 177 [\]\}] {Vstring_alpha}* \" { 178 chkLOC(); 179 maybeEndVerbatim(yytext()); 180 } 181} 182 183<STRING, VSTRING> { 184 {Special_character} { 185 chkLOC(); 186 offer(yytext()); 187 } 188 189 {WhspChar}*{EOL} { 190 onDisjointSpanChanged(null, yychar); 191 onEndOfLineMatched(yytext(), yychar); 192 onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar); 193 } 194} 195 196<YYINITIAL, SCOMMENT, STRING, VSTRING> { 197 {WhspChar}*{EOL} { onEndOfLineMatched(yytext(), yychar); } 198 \s { offer(yytext()); } 199 [^] { 200 chkLOC(); 201 offer(yytext()); 202 } 203} 204 205<SCOMMENT, STRING, VSTRING> { 206 {BrowseableURI} { 207 chkLOC(); 208 if (takeAllContent()) { 209 onUriMatched(yytext(), yychar); 210 } 211 } 212} 213