1*d219b4ceSAdam Hornacek/* 2*d219b4ceSAdam Hornacek * CDDL HEADER START 3*d219b4ceSAdam Hornacek * 4*d219b4ceSAdam Hornacek * The contents of this file are subject to the terms of the 5*d219b4ceSAdam Hornacek * Common Development and Distribution License (the "License"). 6*d219b4ceSAdam Hornacek * You may not use this file except in compliance with the License. 7*d219b4ceSAdam Hornacek * 8*d219b4ceSAdam Hornacek * See LICENSE.txt included in this distribution for the specific 9*d219b4ceSAdam Hornacek * language governing permissions and limitations under the License. 10*d219b4ceSAdam Hornacek * 11*d219b4ceSAdam Hornacek * When distributing Covered Code, include this CDDL HEADER in each 12*d219b4ceSAdam Hornacek * file and include the License file at LICENSE.txt. 13*d219b4ceSAdam Hornacek * If applicable, add the following below this CDDL HEADER, with the 14*d219b4ceSAdam Hornacek * fields enclosed by brackets "[]" replaced with your own identifying 15*d219b4ceSAdam Hornacek * information: Portions Copyright [yyyy] [name of copyright owner] 16*d219b4ceSAdam Hornacek * 17*d219b4ceSAdam Hornacek * CDDL HEADER END 18*d219b4ceSAdam Hornacek */ 19*d219b4ceSAdam Hornacek 20*d219b4ceSAdam Hornacek/* 21*d219b4ceSAdam Hornacek * Copyright (c) 2010, 2016, Oracle and/or its affiliates. All rights reserved. 22*d219b4ceSAdam Hornacek * Portions Copyright (c) 2017, 2019-2020, Chris Fraire <cfraire@me.com>. 23*d219b4ceSAdam Hornacek */ 24*d219b4ceSAdam Hornacek 25*d219b4ceSAdam Hornacek/* 26*d219b4ceSAdam Hornacek * Regex productions shared between RubyXref and RubySymbolTokenizer 27*d219b4ceSAdam Hornacek */ 28*d219b4ceSAdam Hornacek 29*d219b4ceSAdam HornacekMaybeWhsp = {WhspChar}* 30*d219b4ceSAdam Hornacek 31*d219b4ceSAdam Hornacek/* 32*d219b4ceSAdam Hornacek * globals_rdoc: Pre-defined variables 33*d219b4ceSAdam Hornacek * regexp_rdoc: Special global variables 34*d219b4ceSAdam Hornacek */ 35*d219b4ceSAdam HornacekSPIdentifier = \$ ( [\!\@\&\`\'\+\1\~\=\/\\\,\;\<\>\_\0\*\$\?\:\"] | 36*d219b4ceSAdam Hornacek "-0" | "-a" | "-d" | "-F" | "-i" | "-I" | "-l" | "-p" | "-v" | "-w" | 37*d219b4ceSAdam Hornacek [~&\`\'\+] | [0-9]+ ) 38*d219b4ceSAdam Hornacek 39*d219b4ceSAdam HornacekAnyIdentifier = ({Local_var} | {Instance_var} | {Class_var} | {Global_var} | 40*d219b4ceSAdam Hornacek {Method_name}) 41*d219b4ceSAdam Hornacek 42*d219b4ceSAdam Hornacek/* 43*d219b4ceSAdam Hornacek * A local variable name must start with a lowercase US-ASCII letter or a 44*d219b4ceSAdam Hornacek * character with the eight bit set. Typically local variables are US-ASCII 45*d219b4ceSAdam Hornacek * compatible since the keys to type them exist on all keyboards. (Ruby 46*d219b4ceSAdam Hornacek * programs must be written in a US-ASCII-compatible character set. In such 47*d219b4ceSAdam Hornacek * character sets if the eight bit is set it indicates an extended character. 48*d219b4ceSAdam Hornacek * Ruby allows local variables to contain such characters.) 49*d219b4ceSAdam Hornacek * A local variable name may contain letters, numbers, an _ (underscore or low 50*d219b4ceSAdam Hornacek * line) or a character with the eighth bit set. 51*d219b4ceSAdam Hornacek */ 52*d219b4ceSAdam HornacekLocal_var = {Local_char1} {Local_nextchar}* 53*d219b4ceSAdam HornacekLocal_char1 = ([a-z] | {Char8}) 54*d219b4ceSAdam HornacekLocal_nextchar = ([a-zA-Z0-9_] | {Char8}) 55*d219b4ceSAdam HornacekChar8 = [\xA0-\xFF] 56*d219b4ceSAdam Hornacek/* 57*d219b4ceSAdam Hornacek * An instance variable must start with a @ (“at” sign or commercial at). 58*d219b4ceSAdam Hornacek * Otherwise instance variable names follow the rules as local variable names. 59*d219b4ceSAdam Hornacek * Since the instance variable starts with an @ the second character may be an 60*d219b4ceSAdam Hornacek * upper-case letter. 61*d219b4ceSAdam Hornacek */ 62*d219b4ceSAdam HornacekInstance_var = [@]{Local_nextchar}+ 63*d219b4ceSAdam Hornacek/* 64*d219b4ceSAdam Hornacek * A class variable must start with a @@ (two “at” signs). The rest of the name 65*d219b4ceSAdam Hornacek * follows the same rules as instance variables. 66*d219b4ceSAdam Hornacek */ 67*d219b4ceSAdam HornacekClass_var = [@][@]{Local_nextchar}+ 68*d219b4ceSAdam Hornacek/* 69*d219b4ceSAdam Hornacek * Global variables start with a $ (dollar sign). The rest of the name follows 70*d219b4ceSAdam Hornacek * the same rules as instance variables. 71*d219b4ceSAdam Hornacek */ 72*d219b4ceSAdam HornacekGlobal_var = [$]{Local_nextchar}+ 73*d219b4ceSAdam Hornacek 74*d219b4ceSAdam Hornacek/* 75*d219b4ceSAdam Hornacek * methods_rdoc: Method Names 76*d219b4ceSAdam Hornacek * 77*d219b4ceSAdam Hornacek * Method names may be one of the operators or must start a letter or a 78*d219b4ceSAdam Hornacek * character with the eight bit set. 79*d219b4ceSAdam Hornacek * 80*d219b4ceSAdam Hornacek * Method names may end with a ! (bang or exclamation mark), a ? (question 81*d219b4ceSAdam Hornacek * mark) or = equals sign. 82*d219b4ceSAdam Hornacek * 83*d219b4ceSAdam Hornacek * N.b. an '=' suffix is not included in {Method_name}, because that character 84*d219b4ceSAdam Hornacek * in a name is aligned with the operation (assignment) and not with the target 85*d219b4ceSAdam Hornacek * (variable). E.g., `def birthdate=` is for an assignment of `birthdate'. 86*d219b4ceSAdam Hornacek */ 87*d219b4ceSAdam HornacekMethod_name_base = ([a-zA-Z] | {Char8}) {Local_nextchar}* 88*d219b4ceSAdam HornacekMethod_name = {Method_name_base} [\!\?]? 89*d219b4ceSAdam Hornacek 90*d219b4ceSAdam Hornacek/* 91*d219b4ceSAdam Hornacek * modules_and_classes_rdoc: Nesting 92*d219b4ceSAdam Hornacek * 93*d219b4ceSAdam Hornacek * You may also define inner modules using :: 94*d219b4ceSAdam Hornacek */ 95*d219b4ceSAdam HornacekModules_nested = {AnyIdentifier}("::"{AnyIdentifier})+ 96*d219b4ceSAdam Hornacek 97*d219b4ceSAdam Hornacek/* 98*d219b4ceSAdam Hornacek * literals_rdoc: Numbers 99*d219b4ceSAdam Hornacek */ 100*d219b4ceSAdam HornacekNumeric_literal = ({Decimal_literal} | {Decimal_prefixed} | {Hex_prefixed} | 101*d219b4ceSAdam Hornacek {Octal_prefixed} | {Binary_prefixed}) 102*d219b4ceSAdam Hornacek/* 103*d219b4ceSAdam Hornacek * You can write integers of any size as follows: 1234 1_234 104*d219b4ceSAdam Hornacek * Floating point numbers may be written as follows: 12.34 1234e-2 1.234E1 105*d219b4ceSAdam Hornacek */ 106*d219b4ceSAdam HornacekDecimal_literal = {Numeral} ([\.]{Numeral})? {Exponent}? 107*d219b4ceSAdam HornacekNumeral = {Digit} ([_]? {Digit})* 108*d219b4ceSAdam HornacekExponent = [Ee] [\+\-]? {Numeral} 109*d219b4ceSAdam HornacekDigit = [0-9] 110*d219b4ceSAdam Hornacek/* 111*d219b4ceSAdam Hornacek * You can use a special prefix to write numbers in decimal, hexadecimal, octal 112*d219b4ceSAdam Hornacek * or binary formats. For decimal numbers use a prefix of 0d, for hexadecimal 113*d219b4ceSAdam Hornacek * numbers use a prefix of 0x, for octal numbers use a prefix of 0 or 0o, for 114*d219b4ceSAdam Hornacek * binary numbers use a prefix of 0b. The alphabetic component of the number is 115*d219b4ceSAdam Hornacek * not case-sensitive. 116*d219b4ceSAdam Hornacek * 117*d219b4ceSAdam Hornacek * Like integers and floats you may use an underscore for readability. 118*d219b4ceSAdam Hornacek * 119*d219b4ceSAdam Hornacek * Examples: 0d170 0D170 0xaa 0xAa 0xAA 0Xaa 0XAa 0XaA 0252 0o252 0O252 120*d219b4ceSAdam Hornacek * 0b10101010 0B10101010 121*d219b4ceSAdam Hornacek */ 122*d219b4ceSAdam HornacekDecimal_prefixed = [0][Dd] {Numeral} 123*d219b4ceSAdam HornacekHex_prefixed = [0][Xx] {Hex_numeral} 124*d219b4ceSAdam HornacekHex_numeral = {Hex_digit} ([_]? {Hex_digit})* 125*d219b4ceSAdam HornacekHex_digit = [0-9A-Fa-z] 126*d219b4ceSAdam Hornacek/* 127*d219b4ceSAdam Hornacek * The "0"-prefixed octal number as a regex will be captured by 128*d219b4ceSAdam Hornacek * {Decimal_literal} so it is not defined in {Octal_prefixed}. 129*d219b4ceSAdam Hornacek */ 130*d219b4ceSAdam HornacekOctal_prefixed = [0][Oo]{Octal_numeral} 131*d219b4ceSAdam HornacekOctal_numeral = {Octal_digit} ([_]? {Octal_digit})* 132*d219b4ceSAdam HornacekOctal_digit = [0-7] 133*d219b4ceSAdam HornacekBinary_prefixed = [0][Bb]{Binary_numeral} 134*d219b4ceSAdam HornacekBinary_numeral = {Binary_digit} ([_]? {Binary_digit})* 135*d219b4ceSAdam HornacekBinary_digit = [01] 136*d219b4ceSAdam Hornacek 137*d219b4ceSAdam Hornacek/* 138*d219b4ceSAdam Hornacek * There is also a character literal notation to represent single character 139*d219b4ceSAdam Hornacek * strings, which syntax is a question mark (?) followed by a single character 140*d219b4ceSAdam Hornacek * or escape sequence that corresponds to a single codepoint in the script 141*d219b4ceSAdam Hornacek * encoding: ?a #=> "a" ?abc #=> SyntaxError ?\n #=> "\n" ?\s #=> " " 142*d219b4ceSAdam Hornacek * ?\\ #=> "\\" ?\u{41} #=> "A" ?\C-a #=> "\x01" ?\M-a #=> "\xE1" 143*d219b4ceSAdam Hornacek * ?\M-\C-a #=> "\x81" ?\C-\M-a #=> "\x81", same as above ?あ #=> "あ" 144*d219b4ceSAdam Hornacek * N.b. the Ruby rule about ?abc is not enforced in this regex. 145*d219b4ceSAdam Hornacek */ 146*d219b4ceSAdam HornacekCharacter_literal = [?] ({Character_literal_esc} | [^\s]) 147*d219b4ceSAdam HornacekCharacter_literal_esc = [\\] ([MC][\-][^\s] | "u{" [0-9]+ "}" | 148*d219b4ceSAdam Hornacek "M-C-" [^\s] | "C-M-" [^\s] | [^\s]) 149*d219b4ceSAdam Hornacek 150*d219b4ceSAdam Hornacek/* 151*d219b4ceSAdam Hornacek * literals_rdoc: Strings 152*d219b4ceSAdam Hornacek * 153*d219b4ceSAdam Hornacek * The most common way of writing strings is using ". The string may be many 154*d219b4ceSAdam Hornacek * lines long. Any internal " must be escaped. Strings may allow interpolation 155*d219b4ceSAdam Hornacek * of other values using #{...}, or they may be cross-referenced as URLs or 156*d219b4ceSAdam Hornacek * files, so they are handled as separate yy states. 157*d219b4ceSAdam Hornacek */ 158*d219b4ceSAdam Hornacek 159*d219b4ceSAdam HornacekWxSigils = [[\W]--[\$\@\"\'\`\#\r\n]] 160*d219b4ceSAdam Hornacek 161*d219b4ceSAdam HornacekFileExt = ([Rr][Bb] | [Rr][Uu][Bb][Yy] | [Dd][Ii][Ff][Ff] | 162*d219b4ceSAdam Hornacek [Pp][Aa][Tt][Cc][Hh]) 163*d219b4ceSAdam HornacekFile = [a-zA-Z]{FNameChar}* "." {FileExt} 164*d219b4ceSAdam Hornacek 165*d219b4ceSAdam HornacekPOD_begin = "=begin" 166*d219b4ceSAdam HornacekPOD_end = "=end" 167*d219b4ceSAdam Hornacek 168*d219b4ceSAdam HornacekQuo0 = [[\`\(\)\<\>\[\]\{\}\p{P}\p{S}]] 169*d219b4ceSAdam HornacekQuoP = [%]{Quo0} 170*d219b4ceSAdam HornacekQuoPC = [%][IQRSWX]{Quo0} 171*d219b4ceSAdam HornacekQuoPC_xN = [%][iqrswx]{Quo0} 172*d219b4ceSAdam Hornacek 173*d219b4ceSAdam HornacekSymbol = [:]{AnyIdentifier} 174*d219b4ceSAdam HornacekSymquo = [:][\"] 175*d219b4ceSAdam HornacekSymquo_xN = [:][\'] 176*d219b4ceSAdam Hornacek 177*d219b4ceSAdam Hornacek// 178*d219b4ceSAdam Hornacek// Track some keywords that can be used to identify heuristically a possible 179*d219b4ceSAdam Hornacek// beginning of the shortcut syntax, //, for m//. Also include any perlfunc 180*d219b4ceSAdam Hornacek// that takes /PATTERN/. Heuristics using punctuation are defined inline later 181*d219b4ceSAdam Hornacek// in some rules. 182*d219b4ceSAdam Hornacek// 183*d219b4ceSAdam HornacekMwords_1 = ("and" | "or" | "not") 184*d219b4ceSAdam HornacekMwords_2 = ("begin" | "end" | "unless" | "until" | "when" | "while") 185*d219b4ceSAdam HornacekMwords = ({Mwords_1} | {Mwords_2}) 186*d219b4ceSAdam Hornacek 187*d219b4ceSAdam HornacekMpunc1YYIN = [\(\!\[] 188*d219b4ceSAdam HornacekMpunc2IN = ([!=]"~" | [\:\?\=\+\-\<\>] | "=="[=]?|"!="|"<="|">="|"<=>"|"=>") 189*d219b4ceSAdam Hornacek 190*d219b4ceSAdam HornacekHere_marker = {Local_nextchar}+ 191*d219b4ceSAdam HornacekHere_EOF1 = {Here_marker} 192*d219b4ceSAdam HornacekHere_EOF2 = [\'][^\r\n\']*[\'] 193*d219b4ceSAdam HornacekHere_EOF3 = [\`][^\r\n\`]*[\`] 194*d219b4ceSAdam Hornacek 195*d219b4ceSAdam Hornacek/* 196*d219b4ceSAdam Hornacek * YYINITIAL : nothing yet parsed or just after an non-continuation EOL 197*d219b4ceSAdam Hornacek * INTRA : saw content from YYINITIAL but not yet other state or [{}] or non- 198*d219b4ceSAdam Hornacek * continuation {EOL} 199*d219b4ceSAdam Hornacek * SCOMMENT : single-line comment 200*d219b4ceSAdam Hornacek * POD : embedded documentation 201*d219b4ceSAdam Hornacek * QUO : quote-like that is OK to match paths|files|URLs|e-mails 202*d219b4ceSAdam Hornacek * QUOxN : "" but with no interpolation 203*d219b4ceSAdam Hornacek * QUOxL : quote-like that is not OK to match paths|files|URLs|e-mails 204*d219b4ceSAdam Hornacek * because a non-traditional character is used as the quote-like delimiter 205*d219b4ceSAdam Hornacek * QUOxLxN : "" but with no interpolation 206*d219b4ceSAdam Hornacek * QM : a quote-like has ended, and quote modifier chars are awaited 207*d219b4ceSAdam Hornacek * HERE : Here-docs 208*d219b4ceSAdam Hornacek * HERExN : Here-docs with no interpolation 209*d219b4ceSAdam Hornacek * HEREin : Indented Here-docs 210*d219b4ceSAdam Hornacek * HEREinxN : Indented Here-docs with no interpolation 211*d219b4ceSAdam Hornacek */ 212*d219b4ceSAdam Hornacek%state INTRA SCOMMENT POD 213*d219b4ceSAdam Hornacek%state QUO QUOxN QUOxL QUOxLxN QM 214*d219b4ceSAdam Hornacek%state HERE HERExN HEREin HEREinxN 215*d219b4ceSAdam Hornacek 216*d219b4ceSAdam Hornacek%% 217*d219b4ceSAdam Hornacek<HERE, HERExN> { 218*d219b4ceSAdam Hornacek ^ {Here_marker} / {MaybeWhsp}{EOL} { 219*d219b4ceSAdam Hornacek chkLOC(); 220*d219b4ceSAdam Hornacek maybeEndHere(yytext()); 221*d219b4ceSAdam Hornacek } 222*d219b4ceSAdam Hornacek} 223*d219b4ceSAdam Hornacek 224*d219b4ceSAdam Hornacek<HEREin, HEREinxN> { 225*d219b4ceSAdam Hornacek ^ {MaybeWhsp} {Here_marker} / {MaybeWhsp}{EOL} { 226*d219b4ceSAdam Hornacek chkLOC(); 227*d219b4ceSAdam Hornacek maybeEndHere(yytext()); 228*d219b4ceSAdam Hornacek } 229*d219b4ceSAdam Hornacek} 230*d219b4ceSAdam Hornacek 231*d219b4ceSAdam Hornacek<INTRA> { 232*d219b4ceSAdam Hornacek // Syntax that switches back to YYINITIAL but preserves otherwise the stack 233*d219b4ceSAdam Hornacek [\{] | 234*d219b4ceSAdam Hornacek "&&" | 235*d219b4ceSAdam Hornacek "||" { 236*d219b4ceSAdam Hornacek chkLOC(); 237*d219b4ceSAdam Hornacek yypushback(yytext().length()); 238*d219b4ceSAdam Hornacek yybegin(YYINITIAL); 239*d219b4ceSAdam Hornacek } 240*d219b4ceSAdam Hornacek} 241*d219b4ceSAdam Hornacek 242*d219b4ceSAdam Hornacek<YYINITIAL, INTRA> { 243*d219b4ceSAdam Hornacek [\\] {EOL} { 244*d219b4ceSAdam Hornacek maybeIntraState(); 245*d219b4ceSAdam Hornacek offer("\\"); 246*d219b4ceSAdam Hornacek onEndOfLineMatched(yytext(), yychar); 247*d219b4ceSAdam Hornacek } 248*d219b4ceSAdam Hornacek 249*d219b4ceSAdam Hornacek "<<"[~\-]? ({Here_EOF1} | {Here_EOF2} | {Here_EOF3}) { 250*d219b4ceSAdam Hornacek chkLOC(); 251*d219b4ceSAdam Hornacek maybeIntraState(); 252*d219b4ceSAdam Hornacek hop(yytext()); 253*d219b4ceSAdam Hornacek } 254*d219b4ceSAdam Hornacek 255*d219b4ceSAdam Hornacek {Instance_var} | {Class_var} | {Global_var} | {Symbol} { 256*d219b4ceSAdam Hornacek chkLOC(); 257*d219b4ceSAdam Hornacek maybeIntraState(); 258*d219b4ceSAdam Hornacek String id = yytext(); 259*d219b4ceSAdam Hornacek if (offerSymbol(id, 0, true) && returnOnSymbol()) { 260*d219b4ceSAdam Hornacek return yystate(); 261*d219b4ceSAdam Hornacek } 262*d219b4ceSAdam Hornacek } 263*d219b4ceSAdam Hornacek 264*d219b4ceSAdam Hornacek {Local_var} | {Method_name} { 265*d219b4ceSAdam Hornacek chkLOC(); 266*d219b4ceSAdam Hornacek maybeIntraState(); 267*d219b4ceSAdam Hornacek String id = yytext(); 268*d219b4ceSAdam Hornacek if (offerSymbol(id, 0, false) && returnOnSymbol()) { 269*d219b4ceSAdam Hornacek return yystate(); 270*d219b4ceSAdam Hornacek } 271*d219b4ceSAdam Hornacek } 272*d219b4ceSAdam Hornacek 273*d219b4ceSAdam Hornacek {SPIdentifier} { 274*d219b4ceSAdam Hornacek chkLOC(); 275*d219b4ceSAdam Hornacek maybeIntraState(); 276*d219b4ceSAdam Hornacek offerKeyword(yytext()); 277*d219b4ceSAdam Hornacek } 278*d219b4ceSAdam Hornacek 279*d219b4ceSAdam Hornacek {Modules_nested} { 280*d219b4ceSAdam Hornacek chkLOC(); 281*d219b4ceSAdam Hornacek maybeIntraState(); 282*d219b4ceSAdam Hornacek takeModules(yytext()); 283*d219b4ceSAdam Hornacek } 284*d219b4ceSAdam Hornacek 285*d219b4ceSAdam Hornacek {Character_literal} { 286*d219b4ceSAdam Hornacek chkLOC(); 287*d219b4ceSAdam Hornacek maybeIntraState(); 288*d219b4ceSAdam Hornacek onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar); 289*d219b4ceSAdam Hornacek offer(yytext()); 290*d219b4ceSAdam Hornacek onDisjointSpanChanged(null, yychar); 291*d219b4ceSAdam Hornacek } 292*d219b4ceSAdam Hornacek 293*d219b4ceSAdam Hornacek {Numeric_literal} { 294*d219b4ceSAdam Hornacek chkLOC(); 295*d219b4ceSAdam Hornacek maybeIntraState(); 296*d219b4ceSAdam Hornacek onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar); 297*d219b4ceSAdam Hornacek offer(yytext()); 298*d219b4ceSAdam Hornacek onDisjointSpanChanged(null, yychar); 299*d219b4ceSAdam Hornacek } 300*d219b4ceSAdam Hornacek 301*d219b4ceSAdam Hornacek \" { chkLOC(); qop(yytext(), 0, false); } 302*d219b4ceSAdam Hornacek \' { chkLOC(); qop(yytext(), 0, true); } 303*d219b4ceSAdam Hornacek \# { 304*d219b4ceSAdam Hornacek yypush(SCOMMENT); 305*d219b4ceSAdam Hornacek onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar); 306*d219b4ceSAdam Hornacek offer(yytext()); 307*d219b4ceSAdam Hornacek } 308*d219b4ceSAdam Hornacek 309*d219b4ceSAdam Hornacek // Quote with two character names plus possibly a {WxSigils} spacer 310*d219b4ceSAdam Hornacek ^ {QuoPC} { chkLOC(); qop(yytext(), 2, false); } 311*d219b4ceSAdam Hornacek {WxSigils}{QuoPC} { chkLOC(); qop(yytext(), 3, false); } 312*d219b4ceSAdam Hornacek ^ {QuoPC_xN} { chkLOC(); qop(yytext(), 2, true); } 313*d219b4ceSAdam Hornacek {WxSigils}{QuoPC_xN} { chkLOC(); qop(yytext(), 3, true); } 314*d219b4ceSAdam Hornacek 315*d219b4ceSAdam Hornacek // Quote with one character names plus possibly a {WxSigils} spacer 316*d219b4ceSAdam Hornacek ^ {QuoP} { chkLOC(); qop(yytext(), 1, false); } 317*d219b4ceSAdam Hornacek {WxSigils}{QuoP} { chkLOC(); qop(yytext(), 2, false); } 318*d219b4ceSAdam Hornacek ^ {Symquo} { chkLOC(); qop(yytext(), 1, false); } 319*d219b4ceSAdam Hornacek {WxSigils}{Symquo} { chkLOC(); qop(yytext(), 2, false); } 320*d219b4ceSAdam Hornacek ^ {Symquo_xN} { chkLOC(); qop(yytext(), 1, true); } 321*d219b4ceSAdam Hornacek {WxSigils}{Symquo_xN} { chkLOC(); qop(yytext(), 2, true); } 322*d219b4ceSAdam Hornacek 323*d219b4ceSAdam Hornacek // POD-end without having seen POD-begin is akin to a one-line comment 324*d219b4ceSAdam Hornacek ^ {POD_end} [^\n\r]* { 325*d219b4ceSAdam Hornacek offer(yytext()); 326*d219b4ceSAdam Hornacek } 327*d219b4ceSAdam Hornacek 328*d219b4ceSAdam Hornacek // POD start 329*d219b4ceSAdam Hornacek ^ {POD_begin} { 330*d219b4ceSAdam Hornacek yypush(POD); 331*d219b4ceSAdam Hornacek onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar); 332*d219b4ceSAdam Hornacek offer(yytext()); 333*d219b4ceSAdam Hornacek } 334*d219b4ceSAdam Hornacek 335*d219b4ceSAdam Hornacek [\{\}] { 336*d219b4ceSAdam Hornacek chkLOC(); 337*d219b4ceSAdam Hornacek String capture = yytext(); 338*d219b4ceSAdam Hornacek if (!maybeEndInterpolation(capture)) { 339*d219b4ceSAdam Hornacek offer(capture); 340*d219b4ceSAdam Hornacek } 341*d219b4ceSAdam Hornacek } 342*d219b4ceSAdam Hornacek} 343*d219b4ceSAdam Hornacek 344*d219b4ceSAdam Hornacek<YYINITIAL> { 345*d219b4ceSAdam Hornacek "/" { 346*d219b4ceSAdam Hornacek chkLOC(); 347*d219b4ceSAdam Hornacek // OK to pass a fake "m/" with doWrite=false 348*d219b4ceSAdam Hornacek qop(false, "m/", 1, false); 349*d219b4ceSAdam Hornacek onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar); 350*d219b4ceSAdam Hornacek offer(yytext()); 351*d219b4ceSAdam Hornacek } 352*d219b4ceSAdam Hornacek} 353*d219b4ceSAdam Hornacek 354*d219b4ceSAdam Hornacek<YYINITIAL, INTRA> { 355*d219b4ceSAdam Hornacek // Use some heuristics to identify double-slash syntax for the m// 356*d219b4ceSAdam Hornacek // operator. We can't handle all possible appearances of `//', because the 357*d219b4ceSAdam Hornacek // first slash cannot always be distinguished from division (/) without 358*d219b4ceSAdam Hornacek // true parsing. 359*d219b4ceSAdam Hornacek 360*d219b4ceSAdam Hornacek {Mpunc1YYIN} \s* "/" { chkLOC(); hqopPunc(yytext()); } 361*d219b4ceSAdam Hornacek} 362*d219b4ceSAdam Hornacek 363*d219b4ceSAdam Hornacek<INTRA> { 364*d219b4ceSAdam Hornacek // Continue with more punctuation heuristics 365*d219b4ceSAdam Hornacek 366*d219b4ceSAdam Hornacek {Mpunc2IN} \s* "/" { chkLOC(); hqopPunc(yytext()); } 367*d219b4ceSAdam Hornacek} 368*d219b4ceSAdam Hornacek 369*d219b4ceSAdam Hornacek<YYINITIAL, INTRA> { 370*d219b4ceSAdam Hornacek // Define keyword heuristics 371*d219b4ceSAdam Hornacek 372*d219b4ceSAdam Hornacek ^ {Mwords} \s* "/" { 373*d219b4ceSAdam Hornacek chkLOC(); 374*d219b4ceSAdam Hornacek hqopSymbol(yytext()); 375*d219b4ceSAdam Hornacek } 376*d219b4ceSAdam Hornacek 377*d219b4ceSAdam Hornacek {WxSigils}{Mwords} \s* "/" { 378*d219b4ceSAdam Hornacek chkLOC(); 379*d219b4ceSAdam Hornacek String capture = yytext(); 380*d219b4ceSAdam Hornacek if (takeAllContent()) { 381*d219b4ceSAdam Hornacek String boundary = capture.substring(0, 1); 382*d219b4ceSAdam Hornacek offer(boundary); 383*d219b4ceSAdam Hornacek } 384*d219b4ceSAdam Hornacek hqopSymbol(capture.substring(1)); 385*d219b4ceSAdam Hornacek } 386*d219b4ceSAdam Hornacek} 387*d219b4ceSAdam Hornacek 388*d219b4ceSAdam Hornacek<QUO, QUOxN, QUOxL, QUOxLxN, HERE, HEREin> { 389*d219b4ceSAdam Hornacek \\ \S { 390*d219b4ceSAdam Hornacek chkLOC(); 391*d219b4ceSAdam Hornacek offer(yytext()); 392*d219b4ceSAdam Hornacek } 393*d219b4ceSAdam Hornacek} 394*d219b4ceSAdam Hornacek 395*d219b4ceSAdam Hornacek<QUO, QUOxL> { 396*d219b4ceSAdam Hornacek "#{" { 397*d219b4ceSAdam Hornacek chkLOC(); 398*d219b4ceSAdam Hornacek offer(yytext()); 399*d219b4ceSAdam Hornacek onDisjointSpanChanged(null, yychar); 400*d219b4ceSAdam Hornacek yypush(YYINITIAL); 401*d219b4ceSAdam Hornacek pushData(); 402*d219b4ceSAdam Hornacek interpop(); 403*d219b4ceSAdam Hornacek } 404*d219b4ceSAdam Hornacek} 405*d219b4ceSAdam Hornacek 406*d219b4ceSAdam Hornacek<QUO, QUOxN, QUOxL, QUOxLxN> { 407*d219b4ceSAdam Hornacek {Quo0} { 408*d219b4ceSAdam Hornacek chkLOC(); 409*d219b4ceSAdam Hornacek String capture = yytext(); 410*d219b4ceSAdam Hornacek offer(capture); 411*d219b4ceSAdam Hornacek if (maybeEndQuote(capture)) { 412*d219b4ceSAdam Hornacek yypop(); 413*d219b4ceSAdam Hornacek if (areModifiersOK()) { 414*d219b4ceSAdam Hornacek yypush(QM); 415*d219b4ceSAdam Hornacek } 416*d219b4ceSAdam Hornacek onDisjointSpanChanged(null, yychar); 417*d219b4ceSAdam Hornacek } 418*d219b4ceSAdam Hornacek } 419*d219b4ceSAdam Hornacek} 420*d219b4ceSAdam Hornacek 421*d219b4ceSAdam Hornacek<QUO, QUOxN, QUOxL, QUOxLxN, HERE, HERExN, HEREin, HEREinxN> { 422*d219b4ceSAdam Hornacek {WhspChar}*{EOL} { 423*d219b4ceSAdam Hornacek onDisjointSpanChanged(null, yychar); 424*d219b4ceSAdam Hornacek onEndOfLineMatched(yytext(), yychar); 425*d219b4ceSAdam Hornacek onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar); 426*d219b4ceSAdam Hornacek } 427*d219b4ceSAdam Hornacek} 428*d219b4ceSAdam Hornacek 429*d219b4ceSAdam Hornacek<QM> { 430*d219b4ceSAdam Hornacek // /PAT/imxouesn 431*d219b4ceSAdam Hornacek [a-z] { 432*d219b4ceSAdam Hornacek chkLOC(); 433*d219b4ceSAdam Hornacek offer(yytext()); 434*d219b4ceSAdam Hornacek } 435*d219b4ceSAdam Hornacek 436*d219b4ceSAdam Hornacek // anything else ends the quote-modifiers state 437*d219b4ceSAdam Hornacek [^] { 438*d219b4ceSAdam Hornacek yypop(); 439*d219b4ceSAdam Hornacek yypushback(yytext().length()); 440*d219b4ceSAdam Hornacek } 441*d219b4ceSAdam Hornacek} 442*d219b4ceSAdam Hornacek 443*d219b4ceSAdam Hornacek<POD> { 444*d219b4ceSAdam Hornacek ^ {POD_end} [^\n\r]* { 445*d219b4ceSAdam Hornacek yypop(); 446*d219b4ceSAdam Hornacek offer(yytext()); 447*d219b4ceSAdam Hornacek onDisjointSpanChanged(null, yychar); 448*d219b4ceSAdam Hornacek } 449*d219b4ceSAdam Hornacek 450*d219b4ceSAdam Hornacek {WhspChar}*{EOL} { 451*d219b4ceSAdam Hornacek onEndOfLineMatched(yytext(), yychar); 452*d219b4ceSAdam Hornacek } 453*d219b4ceSAdam Hornacek} 454*d219b4ceSAdam Hornacek 455*d219b4ceSAdam Hornacek<SCOMMENT> { 456*d219b4ceSAdam Hornacek {WhspChar}*{EOL} { 457*d219b4ceSAdam Hornacek String capture = yytext(); 458*d219b4ceSAdam Hornacek yypushback(capture.length()); 459*d219b4ceSAdam Hornacek yypop(); 460*d219b4ceSAdam Hornacek onDisjointSpanChanged(null, yychar); 461*d219b4ceSAdam Hornacek } 462*d219b4ceSAdam Hornacek} 463*d219b4ceSAdam Hornacek 464*d219b4ceSAdam Hornacek<YYINITIAL, INTRA> { 465*d219b4ceSAdam Hornacek {WhspChar}*{EOL} { 466*d219b4ceSAdam Hornacek String capture = yytext(); 467*d219b4ceSAdam Hornacek if (maybeStartHere()) { 468*d219b4ceSAdam Hornacek yypushback(capture.length()); 469*d219b4ceSAdam Hornacek } else { 470*d219b4ceSAdam Hornacek onEndOfLineMatched(yytext(), yychar); 471*d219b4ceSAdam Hornacek yybegin(YYINITIAL); 472*d219b4ceSAdam Hornacek } 473*d219b4ceSAdam Hornacek } 474*d219b4ceSAdam Hornacek} 475*d219b4ceSAdam Hornacek 476*d219b4ceSAdam Hornacek<YYINITIAL, INTRA, SCOMMENT, POD, QUO, QUOxN, QUOxL, QUOxLxN, 477*d219b4ceSAdam Hornacek HERE, HERExN, HEREin, HEREinxN> { 478*d219b4ceSAdam Hornacek 479*d219b4ceSAdam Hornacek // Only one whitespace char at a time or else {WxSigils} can be broken 480*d219b4ceSAdam Hornacek {WhspChar} | 481*d219b4ceSAdam Hornacek [[\s]--[\n\r]] { 482*d219b4ceSAdam Hornacek offer(yytext()); 483*d219b4ceSAdam Hornacek } 484*d219b4ceSAdam Hornacek // Only one char at a time due to restriction on {WhspChar} above. 485*d219b4ceSAdam Hornacek [^\n\r] { 486*d219b4ceSAdam Hornacek chkLOC(); 487*d219b4ceSAdam Hornacek maybeIntraState(); 488*d219b4ceSAdam Hornacek offer(yytext()); 489*d219b4ceSAdam Hornacek } 490*d219b4ceSAdam Hornacek} 491*d219b4ceSAdam Hornacek 492*d219b4ceSAdam Hornacek// "string links" and "comment links" 493*d219b4ceSAdam Hornacek<SCOMMENT, POD, QUO, QUOxN, HERE, HERExN, HEREin, HEREinxN> { 494*d219b4ceSAdam Hornacek {FPath} { 495*d219b4ceSAdam Hornacek chkLOC(); 496*d219b4ceSAdam Hornacek if (takeAllContent()) { 497*d219b4ceSAdam Hornacek onPathlikeMatched(yytext(), '/', false, yychar); 498*d219b4ceSAdam Hornacek } 499*d219b4ceSAdam Hornacek } 500*d219b4ceSAdam Hornacek 501*d219b4ceSAdam Hornacek {File} { 502*d219b4ceSAdam Hornacek chkLOC(); 503*d219b4ceSAdam Hornacek if (takeAllContent()) { 504*d219b4ceSAdam Hornacek String path = yytext(); 505*d219b4ceSAdam Hornacek onFilelikeMatched(path, yychar); 506*d219b4ceSAdam Hornacek } 507*d219b4ceSAdam Hornacek } 508*d219b4ceSAdam Hornacek 509*d219b4ceSAdam Hornacek {FNameChar}+ "@" {FNameChar}+ "." {FNameChar}+ { 510*d219b4ceSAdam Hornacek chkLOC(); 511*d219b4ceSAdam Hornacek if (takeAllContent()) { 512*d219b4ceSAdam Hornacek onEmailAddressMatched(yytext(), yychar); 513*d219b4ceSAdam Hornacek } 514*d219b4ceSAdam Hornacek } 515*d219b4ceSAdam Hornacek} 516*d219b4ceSAdam Hornacek 517*d219b4ceSAdam Hornacek<SCOMMENT, POD, HERE, HERExN, HEREin, HEREinxN> { 518*d219b4ceSAdam Hornacek {BrowseableURI} { 519*d219b4ceSAdam Hornacek chkLOC(); 520*d219b4ceSAdam Hornacek if (takeAllContent()) { 521*d219b4ceSAdam Hornacek onUriMatched(yytext(), yychar, null); 522*d219b4ceSAdam Hornacek } 523*d219b4ceSAdam Hornacek // no skipLink() needed except in QUO* states 524*d219b4ceSAdam Hornacek } 525*d219b4ceSAdam Hornacek} 526*d219b4ceSAdam Hornacek 527*d219b4ceSAdam Hornacek<QUO, QUOxN> { 528*d219b4ceSAdam Hornacek {BrowseableURI} { 529*d219b4ceSAdam Hornacek chkLOC(); 530*d219b4ceSAdam Hornacek String capture = yytext(); 531*d219b4ceSAdam Hornacek Pattern collateralCapture = getCollateralCapturePattern(); 532*d219b4ceSAdam Hornacek if (takeAllContent()) { 533*d219b4ceSAdam Hornacek onUriMatched(capture, yychar, collateralCapture); 534*d219b4ceSAdam Hornacek } else { 535*d219b4ceSAdam Hornacek skipLink(capture, collateralCapture); 536*d219b4ceSAdam Hornacek } 537*d219b4ceSAdam Hornacek } 538*d219b4ceSAdam Hornacek} 539