1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. 22 * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>. 23 */ 24 25 package org.opengrok.indexer.analysis.plain; 26 27 import org.opengrok.indexer.analysis.JFlexSymbolMatcher; 28 import org.opengrok.indexer.analysis.EmphasisHint; 29 import org.opengrok.indexer.util.StringUtils; 30 import org.opengrok.indexer.web.HtmlConsts; 31 %% 32 %public 33 %class XMLXref 34 %extends JFlexSymbolMatcher 35 %unicode 36 %ignorecase 37 %int 38 %char 39 %include ../CommonLexer.lexh 40 %include ../CommonXref.lexh 41 %{ chkLOC()42 protected void chkLOC() { 43 switch (yystate()) { 44 case COMMENT: 45 break; 46 default: 47 phLOC(); 48 break; 49 } 50 } 51 %} 52 53 File = {FNameChar}+ "." ([a-zA-Z]+) {FNameChar}* 54 55 /* 56 * Differs from FPath in that the path segments are only constrained to be 57 * {FNameChar} -- except the last character must be {ASCII_ALPHA} or {DIGIT}. 58 */ 59 AlmostAnyFPath = "/"? {FNameChar}+ ("/" {FNameChar}+)+[a-zA-Z0-9] 60 61 FileChar = [a-zA-Z_0-9_\-\/] 62 NameChar = {FileChar}|"." 63 64 %state TAG STRING COMMENT SSTRING CDATA 65 %include ../Common.lexh 66 %include ../CommonURI.lexh 67 %include ../CommonPath.lexh 68 %% 69 70 <YYINITIAL> { 71 "<!--" { 72 yybegin(COMMENT); 73 onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar); 74 onNonSymbolMatched("<!--", yychar); 75 } 76 "<![CDATA[" { 77 chkLOC(); 78 yybegin(CDATA); 79 onNonSymbolMatched("<", yychar); 80 onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar); 81 onNonSymbolMatched("![CDATA[", yychar); 82 onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar); 83 } 84 "<" { chkLOC(); yybegin(TAG); onNonSymbolMatched("<", yychar); } 85 } 86 87 <TAG> { 88 [a-zA-Z_0-9]+{WhspChar}*\= { 89 chkLOC(); 90 onNonSymbolMatched(yytext(), EmphasisHint.STRONG, yychar); 91 } 92 [a-zA-Z_0-9]+ { 93 chkLOC(); 94 onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar); 95 onNonSymbolMatched(yytext(), yychar); 96 onDisjointSpanChanged(null, yychar); 97 } 98 \" { 99 chkLOC(); 100 yybegin(STRING); 101 onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar); 102 onNonSymbolMatched(yytext(), yychar); 103 } 104 \' { 105 chkLOC(); 106 yybegin(SSTRING); 107 onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar); 108 onNonSymbolMatched(yytext(), yychar); 109 } 110 [><] { 111 chkLOC(); 112 yybegin(YYINITIAL); 113 onNonSymbolMatched(yytext(), yychar); 114 } 115 } 116 117 <STRING> { 118 \" {WhspChar}* \" { chkLOC(); onNonSymbolMatched(yytext(), yychar); } 119 \" { 120 chkLOC(); 121 yybegin(TAG); 122 onNonSymbolMatched(yytext(), yychar); 123 onDisjointSpanChanged(null, yychar); 124 } 125 } 126 127 <SSTRING> { 128 \' {WhspChar}* \' { chkLOC(); onNonSymbolMatched(yytext(), yychar); } 129 \' { 130 chkLOC(); 131 yybegin(TAG); 132 onNonSymbolMatched(yytext(), yychar); 133 onDisjointSpanChanged(null, yychar); 134 } 135 } 136 137 <COMMENT> { 138 "-->" { 139 yybegin(YYINITIAL); 140 onNonSymbolMatched(yytext(), yychar); 141 onDisjointSpanChanged(null, yychar); 142 } 143 } 144 145 <CDATA> { 146 "]]>" { 147 chkLOC(); 148 yybegin(YYINITIAL); 149 onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar); 150 onNonSymbolMatched("]]", yychar); 151 onDisjointSpanChanged(null, yychar); 152 onNonSymbolMatched(">", yychar); 153 } 154 } 155 156 <YYINITIAL, COMMENT, CDATA, STRING, SSTRING, TAG> { 157 158 {File}|{AlmostAnyFPath} 159 { 160 chkLOC(); 161 final String path = yytext(); 162 final boolean isJavaClass=StringUtils.isPossiblyJavaClass(path); 163 final char separator = isJavaClass ? '.' : '/'; 164 onPathlikeMatched(path, separator, isJavaClass, yychar); 165 } 166 167 {BrowseableURI} { 168 chkLOC(); 169 onUriMatched(yytext(), yychar); 170 } 171 172 {NameChar}+ "@" {NameChar}+ "." {NameChar}+ 173 { 174 chkLOC(); 175 onEmailAddressMatched(yytext(), yychar); 176 } 177 178 {WhspChar}*{EOL} { onEndOfLineMatched(yytext(), yychar); } 179 [[\s]--[\n]] { onNonSymbolMatched(yytext(), yychar); } 180 [^\n] { chkLOC(); onNonSymbolMatched(yytext(), yychar); } 181 } 182