xref: /OpenGrok/opengrok-indexer/src/main/jflex/analysis/plain/XMLXref.lex (revision d219b4cea555a12b602d2d5518daa22134ad4879)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>.
23  */
24 
25 package org.opengrok.indexer.analysis.plain;
26 
27 import org.opengrok.indexer.analysis.JFlexSymbolMatcher;
28 import org.opengrok.indexer.analysis.EmphasisHint;
29 import org.opengrok.indexer.util.StringUtils;
30 import org.opengrok.indexer.web.HtmlConsts;
31 %%
32 %public
33 %class XMLXref
34 %extends JFlexSymbolMatcher
35 %unicode
36 %ignorecase
37 %int
38 %char
39 %include ../CommonLexer.lexh
40 %include ../CommonXref.lexh
41 %{
chkLOC()42     protected void chkLOC() {
43         switch (yystate()) {
44             case COMMENT:
45                 break;
46             default:
47                 phLOC();
48                 break;
49         }
50     }
51 %}
52 
53 File = {FNameChar}+ "." ([a-zA-Z]+) {FNameChar}*
54 
55 /*
56  * Differs from FPath in that the path segments are only constrained to be
57  * {FNameChar} -- except the last character must be {ASCII_ALPHA} or {DIGIT}.
58  */
59 AlmostAnyFPath = "/"? {FNameChar}+ ("/" {FNameChar}+)+[a-zA-Z0-9]
60 
61 FileChar = [a-zA-Z_0-9_\-\/]
62 NameChar = {FileChar}|"."
63 
64 %state TAG STRING COMMENT SSTRING CDATA
65 %include ../Common.lexh
66 %include ../CommonURI.lexh
67 %include ../CommonPath.lexh
68 %%
69 
70 <YYINITIAL> {
71  "<!--"    {
72     yybegin(COMMENT);
73     onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar);
74     onNonSymbolMatched("<!--", yychar);
75  }
76  "<![CDATA[" {
77     chkLOC();
78     yybegin(CDATA);
79     onNonSymbolMatched("<", yychar);
80     onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar);
81     onNonSymbolMatched("![CDATA[", yychar);
82     onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar);
83  }
84  "<"    { chkLOC(); yybegin(TAG); onNonSymbolMatched("<", yychar); }
85 }
86 
87 <TAG> {
88  [a-zA-Z_0-9]+{WhspChar}*\=    {
89     chkLOC();
90     onNonSymbolMatched(yytext(), EmphasisHint.STRONG, yychar);
91  }
92  [a-zA-Z_0-9]+    {
93     chkLOC();
94     onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar);
95     onNonSymbolMatched(yytext(), yychar);
96     onDisjointSpanChanged(null, yychar);
97  }
98  \"      {
99     chkLOC();
100     yybegin(STRING);
101     onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
102     onNonSymbolMatched(yytext(), yychar);
103  }
104  \'      {
105     chkLOC();
106     yybegin(SSTRING);
107     onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
108     onNonSymbolMatched(yytext(), yychar);
109  }
110  [><]    {
111     chkLOC();
112     yybegin(YYINITIAL);
113     onNonSymbolMatched(yytext(), yychar);
114  }
115 }
116 
117 <STRING> {
118  \" {WhspChar}* \"    { chkLOC(); onNonSymbolMatched(yytext(), yychar); }
119  \"     {
120     chkLOC();
121     yybegin(TAG);
122     onNonSymbolMatched(yytext(), yychar);
123     onDisjointSpanChanged(null, yychar);
124  }
125 }
126 
127 <SSTRING> {
128  \' {WhspChar}* \'    { chkLOC(); onNonSymbolMatched(yytext(), yychar); }
129  \'     {
130     chkLOC();
131     yybegin(TAG);
132     onNonSymbolMatched(yytext(), yychar);
133     onDisjointSpanChanged(null, yychar);
134  }
135 }
136 
137 <COMMENT> {
138  "-->"     {
139     yybegin(YYINITIAL);
140     onNonSymbolMatched(yytext(), yychar);
141     onDisjointSpanChanged(null, yychar);
142  }
143 }
144 
145 <CDATA> {
146   "]]>" {
147     chkLOC();
148     yybegin(YYINITIAL);
149     onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar);
150     onNonSymbolMatched("]]", yychar);
151     onDisjointSpanChanged(null, yychar);
152     onNonSymbolMatched(">", yychar);
153   }
154 }
155 
156 <YYINITIAL, COMMENT, CDATA, STRING, SSTRING, TAG> {
157 
158 {File}|{AlmostAnyFPath}
159   {
160     chkLOC();
161     final String path = yytext();
162     final boolean isJavaClass=StringUtils.isPossiblyJavaClass(path);
163     final char separator = isJavaClass ? '.' : '/';
164     onPathlikeMatched(path, separator, isJavaClass, yychar);
165   }
166 
167 {BrowseableURI}    {
168           chkLOC();
169           onUriMatched(yytext(), yychar);
170         }
171 
172 {NameChar}+ "@" {NameChar}+ "." {NameChar}+
173         {
174           chkLOC();
175           onEmailAddressMatched(yytext(), yychar);
176         }
177 
178 {WhspChar}*{EOL}    { onEndOfLineMatched(yytext(), yychar); }
179 [[\s]--[\n]]    { onNonSymbolMatched(yytext(), yychar); }
180 [^\n]    { chkLOC(); onNonSymbolMatched(yytext(), yychar); }
181 }
182