1*d219b4ceSAdam Hornacek/* 2*d219b4ceSAdam Hornacek * CDDL HEADER START 3*d219b4ceSAdam Hornacek * 4*d219b4ceSAdam Hornacek * The contents of this file are subject to the terms of the 5*d219b4ceSAdam Hornacek * Common Development and Distribution License (the "License"). 6*d219b4ceSAdam Hornacek * You may not use this file except in compliance with the License. 7*d219b4ceSAdam Hornacek * 8*d219b4ceSAdam Hornacek * See LICENSE.txt included in this distribution for the specific 9*d219b4ceSAdam Hornacek * language governing permissions and limitations under the License. 10*d219b4ceSAdam Hornacek * 11*d219b4ceSAdam Hornacek * When distributing Covered Code, include this CDDL HEADER in each 12*d219b4ceSAdam Hornacek * file and include the License file at LICENSE.txt. 13*d219b4ceSAdam Hornacek * If applicable, add the following below this CDDL HEADER, with the 14*d219b4ceSAdam Hornacek * fields enclosed by brackets "[]" replaced with your own identifying 15*d219b4ceSAdam Hornacek * information: Portions Copyright [yyyy] [name of copyright owner] 16*d219b4ceSAdam Hornacek * 17*d219b4ceSAdam Hornacek * CDDL HEADER END 18*d219b4ceSAdam Hornacek */ 19*d219b4ceSAdam Hornacek 20*d219b4ceSAdam Hornacek/* 21*d219b4ceSAdam Hornacek * Copyright (c) 2017-2019, Chris Fraire <cfraire@me.com>. 22*d219b4ceSAdam Hornacek */ 23*d219b4ceSAdam Hornacek 24*d219b4ceSAdam Hornacek/* 25*d219b4ceSAdam Hornacek * 8.32.16 Syntax (non-production): Identifier -- a sequence of one or more 26*d219b4ceSAdam Hornacek * alpha_numeric characters of which the first is a letter. 27*d219b4ceSAdam Hornacek */ 28*d219b4ceSAdam HornacekIdentifier = {letter} {alpha_numeric}* 29*d219b4ceSAdam Hornacek 30*d219b4ceSAdam Hornacek/* 31*d219b4ceSAdam Hornacek * 8.32.2 Definition: Letter, alpha_betic, numeric, alpha_numeric, printable 32*d219b4ceSAdam Hornacek * 33*d219b4ceSAdam Hornacek * 1 Any of the following fifty-two, each a lower-case or upper-case element of 34*d219b4ceSAdam Hornacek * the Roman alphabet 35*d219b4ceSAdam Hornacek * 2 If the underlying character set is 8-bit extended ASCII, the characters of 36*d219b4ceSAdam Hornacek * codes 192 to 255 in that set. 37*d219b4ceSAdam Hornacek * 3 If the underlying character set is Unicode, all characters defined as 38*d219b4ceSAdam Hornacek * letters in that set. 39*d219b4ceSAdam Hornacek * 40*d219b4ceSAdam Hornacek * alpha_betic character is a letter or an underscore _. 41*d219b4ceSAdam Hornacek * numeric character is one of the ten characters 0-9 42*d219b4ceSAdam Hornacek * alpha_numeric character is alpha_betic or numeric 43*d219b4ceSAdam Hornacek */ 44*d219b4ceSAdam Hornacekletter = [\p{Letter}] // This suffices to cover 1,2,3 above. 45*d219b4ceSAdam Hornacekalpha_betic = ({letter} | [_]) 46*d219b4ceSAdam Hornaceknumeric = [0-9] 47*d219b4ceSAdam Hornacekalpha_numeric = ({alpha_betic} | {numeric}) 48*d219b4ceSAdam Hornacek 49*d219b4ceSAdam Hornacek/* 50*d219b4ceSAdam Hornacek * printable character is any of the characters listed as printable in the 51*d219b4ceSAdam Hornacek * definition of the character set (Unicode or extended ASCII). “Printable” 52*d219b4ceSAdam Hornacek * characters exclude such special characters as new line and backspace. 53*d219b4ceSAdam Hornacek * 54*d219b4ceSAdam Hornacek * N.b. for OpenGrok purposes, take "printable" as Wikipedia's definition of 55*d219b4ceSAdam Hornacek * Unicode Graphic Character. 56*d219b4ceSAdam Hornacek * 57*d219b4ceSAdam Hornacek * 8.29.15 Validity: Verbatim String rule 58*d219b4ceSAdam Hornacek * 59*d219b4ceSAdam Hornacek * Regarding α as the (possibly empty) Simple_string appearing in a 60*d219b4ceSAdam Hornacek * Verbatim_string_opener: Every character in α is printable, and not a double 61*d219b4ceSAdam Hornacek * quote. 62*d219b4ceSAdam Hornacek */ 63*d219b4ceSAdam HornacekVstring_alpha = [[\p{Letter}\p{Mark}\p{Number}\p{Punctuation}\p{Symbol}\p{Zs}]--[\"]] 64*d219b4ceSAdam Hornacek 65*d219b4ceSAdam HornacekNumber = [\+\-]? ({Integer} | {Real}) 66*d219b4ceSAdam Hornacek/* 67*d219b4ceSAdam Hornacek * Integer = Δ [Integer_base] Digit_sequence 68*d219b4ceSAdam Hornacek * Integer_base = Δ "0" Integer_base_letter 69*d219b4ceSAdam Hornacek * Integer_base_letter = Δ "b" | "c" | "x" | "B" | "C" | "X" 70*d219b4ceSAdam Hornacek * Digit_sequence = Δ Digit+ 71*d219b4ceSAdam Hornacek * Digit = Δ "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | 72*d219b4ceSAdam Hornacek * "a" | "b" | "c" | "d" | "e" | "f" | 73*d219b4ceSAdam Hornacek * "A" | "B" | "C" | "D" | "E" | "F" | "_" 74*d219b4ceSAdam Hornacek * "Neither the first nor the last Digit of a Digit_sequence is an underscore" 75*d219b4ceSAdam Hornacek */ 76*d219b4ceSAdam HornacekInteger = ({Decimal_integer} | {Hexadecimal} | {Binary} | {Octal}) 77*d219b4ceSAdam HornacekDecimal_integer = ({DIGIT}+ | {DIGIT} ("_" | {DIGIT})+ {DIGIT}) 78*d219b4ceSAdam HornacekHexadecimal = 0[xX] ({HEXDIG}+ | {HEXDIG} ("_" | {HEXDIG})+ {HEXDIG}) 79*d219b4ceSAdam HornacekBinary = 0[bB] ({BINDIG}+ | {BINDIG} ("_" | {BINDIG})+ {BINDIG}) 80*d219b4ceSAdam HornacekOctal = 0[cC] ({OCTDIG}+ | {OCTDIG} ("_" | {OCTDIG})+ {OCTDIG}) 81*d219b4ceSAdam HornacekDIGIT = [0-9] 82*d219b4ceSAdam HornacekHEXDIG = [0-9a-fA-F] 83*d219b4ceSAdam HornacekBINDIG = [01] 84*d219b4ceSAdam HornacekOCTDIG = [0-7] 85*d219b4ceSAdam Hornacek/* 86*d219b4ceSAdam Hornacek * Real = 87*d219b4ceSAdam Hornacek * • An optional decimal Integer, giving the integral part. 88*d219b4ceSAdam Hornacek * • A required ‘‘.’’ (dot). 89*d219b4ceSAdam Hornacek * • An optional decimal Integer, giving the fractional part. 90*d219b4ceSAdam Hornacek * • An optional exponent, which is the letter e or E followed by an optional 91*d219b4ceSAdam Hornacek * Sign (+ or –) and a decimal Integer. 92*d219b4ceSAdam Hornacek */ 93*d219b4ceSAdam HornacekReal = ({Decimal_integer}? "." {Decimal_integer} | 94*d219b4ceSAdam Hornacek {Decimal_integer} ".") ([eE] [\+\-] {Decimal_integer})? 95*d219b4ceSAdam Hornacek 96*d219b4ceSAdam Hornacek/* 97*d219b4ceSAdam Hornacek * Character_constant = Δ "'" Character "'" 98*d219b4ceSAdam Hornacek */ 99*d219b4ceSAdam HornacekCharacter_constant = \' ([^\'\n\r] | {Special_character}) \' 100*d219b4ceSAdam Hornacek 101*d219b4ceSAdam HornacekSpecial_character = % ([A-Da-dFfHhLlNnQqRrSsT-Vt-v%\'\"\(\)\<\>] | "/" [0-9]+ "/") 102*d219b4ceSAdam Hornacek 103*d219b4ceSAdam Hornacek/* 104*d219b4ceSAdam Hornacek * SCOMMENT : single-line comment 105*d219b4ceSAdam Hornacek * STRING : basic manifest string (literal) 106*d219b4ceSAdam Hornacek * VSTRING : verbatim manifest string (literal) 107*d219b4ceSAdam Hornacek */ 108*d219b4ceSAdam Hornacek%state SCOMMENT STRING VSTRING 109*d219b4ceSAdam Hornacek 110*d219b4ceSAdam Hornacek%% 111*d219b4ceSAdam Hornacek<YYINITIAL> { 112*d219b4ceSAdam Hornacek {Identifier} { 113*d219b4ceSAdam Hornacek chkLOC(); 114*d219b4ceSAdam Hornacek String id = yytext(); 115*d219b4ceSAdam Hornacek if (offerSymbol(id, 0, false) && returnOnSymbol()) { 116*d219b4ceSAdam Hornacek return yystate(); 117*d219b4ceSAdam Hornacek } 118*d219b4ceSAdam Hornacek } 119*d219b4ceSAdam Hornacek 120*d219b4ceSAdam Hornacek {Number} { 121*d219b4ceSAdam Hornacek chkLOC(); 122*d219b4ceSAdam Hornacek onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar); 123*d219b4ceSAdam Hornacek offer(yytext()); 124*d219b4ceSAdam Hornacek onDisjointSpanChanged(null, yychar); 125*d219b4ceSAdam Hornacek } 126*d219b4ceSAdam Hornacek 127*d219b4ceSAdam Hornacek {Character_constant} { 128*d219b4ceSAdam Hornacek chkLOC(); 129*d219b4ceSAdam Hornacek onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar); 130*d219b4ceSAdam Hornacek offer(yytext()); 131*d219b4ceSAdam Hornacek onDisjointSpanChanged(null, yychar); 132*d219b4ceSAdam Hornacek } 133*d219b4ceSAdam Hornacek 134*d219b4ceSAdam Hornacek "--" { 135*d219b4ceSAdam Hornacek yypush(SCOMMENT); 136*d219b4ceSAdam Hornacek onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar); 137*d219b4ceSAdam Hornacek offer(yytext()); 138*d219b4ceSAdam Hornacek } 139*d219b4ceSAdam Hornacek 140*d219b4ceSAdam Hornacek \" { 141*d219b4ceSAdam Hornacek chkLOC(); 142*d219b4ceSAdam Hornacek yypush(STRING); 143*d219b4ceSAdam Hornacek onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar); 144*d219b4ceSAdam Hornacek offer(yytext()); 145*d219b4ceSAdam Hornacek } 146*d219b4ceSAdam Hornacek \" {Vstring_alpha}* [\[\{] { 147*d219b4ceSAdam Hornacek chkLOC(); 148*d219b4ceSAdam Hornacek vop(yytext()); 149*d219b4ceSAdam Hornacek } 150*d219b4ceSAdam Hornacek} 151*d219b4ceSAdam Hornacek 152*d219b4ceSAdam Hornacek<SCOMMENT> { 153*d219b4ceSAdam Hornacek {WhspChar}*{EOL} { 154*d219b4ceSAdam Hornacek onDisjointSpanChanged(null, yychar); 155*d219b4ceSAdam Hornacek yypop(); 156*d219b4ceSAdam Hornacek onEndOfLineMatched(yytext(), yychar); 157*d219b4ceSAdam Hornacek } 158*d219b4ceSAdam Hornacek} 159*d219b4ceSAdam Hornacek 160*d219b4ceSAdam Hornacek<STRING> { 161*d219b4ceSAdam Hornacek \" { 162*d219b4ceSAdam Hornacek chkLOC(); 163*d219b4ceSAdam Hornacek offer(yytext()); 164*d219b4ceSAdam Hornacek onDisjointSpanChanged(null, yychar); 165*d219b4ceSAdam Hornacek yypop(); 166*d219b4ceSAdam Hornacek } 167*d219b4ceSAdam Hornacek /* 168*d219b4ceSAdam Hornacek * Eiffel defines "Line_wrapping_part" as a sequence of characters 169*d219b4ceSAdam Hornacek * consisting of the following, in order: % (percent character); zero or 170*d219b4ceSAdam Hornacek * more blanks or tabs; New_line; zero or more blanks or tabs; % again. For 171*d219b4ceSAdam Hornacek * OpenGrok purposes, just recognizing LFs in STRING is good enough; though 172*d219b4ceSAdam Hornacek * see also the {Special_character} handling for STRING,VSTRING. 173*d219b4ceSAdam Hornacek */ 174*d219b4ceSAdam Hornacek} 175*d219b4ceSAdam Hornacek 176*d219b4ceSAdam Hornacek<VSTRING> { 177*d219b4ceSAdam Hornacek [\]\}] {Vstring_alpha}* \" { 178*d219b4ceSAdam Hornacek chkLOC(); 179*d219b4ceSAdam Hornacek maybeEndVerbatim(yytext()); 180*d219b4ceSAdam Hornacek } 181*d219b4ceSAdam Hornacek} 182*d219b4ceSAdam Hornacek 183*d219b4ceSAdam Hornacek<STRING, VSTRING> { 184*d219b4ceSAdam Hornacek {Special_character} { 185*d219b4ceSAdam Hornacek chkLOC(); 186*d219b4ceSAdam Hornacek offer(yytext()); 187*d219b4ceSAdam Hornacek } 188*d219b4ceSAdam Hornacek 189*d219b4ceSAdam Hornacek {WhspChar}*{EOL} { 190*d219b4ceSAdam Hornacek onDisjointSpanChanged(null, yychar); 191*d219b4ceSAdam Hornacek onEndOfLineMatched(yytext(), yychar); 192*d219b4ceSAdam Hornacek onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar); 193*d219b4ceSAdam Hornacek } 194*d219b4ceSAdam Hornacek} 195*d219b4ceSAdam Hornacek 196*d219b4ceSAdam Hornacek<YYINITIAL, SCOMMENT, STRING, VSTRING> { 197*d219b4ceSAdam Hornacek {WhspChar}*{EOL} { onEndOfLineMatched(yytext(), yychar); } 198*d219b4ceSAdam Hornacek \s { offer(yytext()); } 199*d219b4ceSAdam Hornacek [^] { 200*d219b4ceSAdam Hornacek chkLOC(); 201*d219b4ceSAdam Hornacek offer(yytext()); 202*d219b4ceSAdam Hornacek } 203*d219b4ceSAdam Hornacek} 204*d219b4ceSAdam Hornacek 205*d219b4ceSAdam Hornacek<SCOMMENT, STRING, VSTRING> { 206*d219b4ceSAdam Hornacek {BrowseableURI} { 207*d219b4ceSAdam Hornacek chkLOC(); 208*d219b4ceSAdam Hornacek if (takeAllContent()) { 209*d219b4ceSAdam Hornacek onUriMatched(yytext(), yychar); 210*d219b4ceSAdam Hornacek } 211*d219b4ceSAdam Hornacek } 212*d219b4ceSAdam Hornacek} 213