1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved. 22 * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>. 23 */ 24 25 /* 26 * Cross reference a Haskell file 27 */ 28 29 package org.opengrok.indexer.analysis.haskell; 30 31 import java.io.IOException; 32 import org.opengrok.indexer.analysis.JFlexSymbolMatcher; 33 import org.opengrok.indexer.web.HtmlConsts; 34 35 /** 36 * @author Harry Pan 37 */ 38 %% 39 %public 40 %class HaskellXref 41 %extends JFlexSymbolMatcher 42 %unicode 43 %int 44 %char 45 %include ../CommonLexer.lexh 46 %include ../CommonXref.lexh 47 %{ 48 private int nestedComment; 49 50 @Override reset()51 public void reset() { 52 super.reset(); 53 nestedComment = 0; 54 } 55 56 @Override yypop()57 public void yypop() throws IOException { 58 onDisjointSpanChanged(null, yychar); 59 super.yypop(); 60 } 61 chkLOC()62 protected void chkLOC() { 63 switch (yystate()) { 64 case COMMENT: 65 case BCOMMENT: 66 break; 67 default: 68 phLOC(); 69 break; 70 } 71 } 72 %} 73 74 %state STRING CHAR COMMENT BCOMMENT 75 76 %include ../Common.lexh 77 %include ../CommonURI.lexh 78 %include ../CommonPath.lexh 79 %include Haskell.lexh 80 %% 81 <YYINITIAL> { 82 {Identifier} { 83 chkLOC(); 84 String id = yytext(); 85 onFilteredSymbolMatched(id, yychar, Consts.kwd); 86 } 87 {Number} { 88 chkLOC(); 89 onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar); 90 onNonSymbolMatched(yytext(), yychar); 91 onDisjointSpanChanged(null, yychar); 92 } 93 \" { 94 chkLOC(); 95 yypush(STRING); 96 onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar); 97 onNonSymbolMatched(yytext(), yychar); 98 } 99 \' { 100 chkLOC(); 101 yypush(CHAR); 102 onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar); 103 onNonSymbolMatched(yytext(), yychar); 104 } 105 "--" { 106 yypush(COMMENT); 107 onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar); 108 onNonSymbolMatched(yytext(), yychar); 109 } 110 111 {NotComments} { chkLOC(); onNonSymbolMatched(yytext(), yychar); } 112 } 113 114 <STRING> { 115 \\[\"\\] { chkLOC(); onNonSymbolMatched(yytext(), yychar); } 116 \" { 117 chkLOC(); 118 onNonSymbolMatched(yytext(), yychar); 119 yypop(); 120 } 121 /* 122 * "A string may include a 'gap'-—two backslants enclosing white 123 * characters—-which is ignored. This allows one to write long strings on 124 * more than one line by writing a backslant at the end of one line and at 125 * the start of the next." N.b. OpenGrok does not explicltly recognize the 126 * "gap" but since a STRING must end in a non-escaped quotation mark, just 127 * allow STRINGs to be multi-line regardless of syntax. 128 */ 129 } 130 131 <CHAR> { // we don't need to consider the case where prime is part of an identifier since it is handled above 132 \\[\'\\] { chkLOC(); onNonSymbolMatched(yytext(), yychar); } 133 \' { 134 chkLOC(); 135 onNonSymbolMatched(yytext(), yychar); 136 yypop(); 137 } 138 /* 139 * N.b. though only a single char is valid Haskell syntax, OpenGrok just 140 * waits to end CHAR at a non-escaped apostrophe regardless of count. 141 */ 142 } 143 144 <COMMENT> { 145 {WhspChar}*{EOL} { 146 yypop(); 147 onEndOfLineMatched(yytext(), yychar); 148 } 149 } 150 151 <YYINITIAL, BCOMMENT> { 152 "{-" { 153 if (nestedComment++ == 0) { 154 yypush(BCOMMENT); 155 onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar); 156 } 157 onNonSymbolMatched(yytext(), yychar); 158 } 159 } 160 161 <BCOMMENT> { 162 "-}" { 163 onNonSymbolMatched(yytext(), yychar); 164 if (--nestedComment == 0) { 165 yypop(); 166 } 167 } 168 } 169 170 {WhspChar}*{EOL} { onEndOfLineMatched(yytext(), yychar); } 171 [[\s]--[\n]] { onNonSymbolMatched(yytext(), yychar); } 172 [^\n] { chkLOC(); onNonSymbolMatched(yytext(), yychar); } 173 174 <STRING, COMMENT, BCOMMENT> { 175 {FPath} { 176 chkLOC(); 177 onPathlikeMatched(yytext(), '/', false, yychar); 178 } 179 {FNameChar}+ "@" {FNameChar}+ "." {FNameChar}+ { 180 chkLOC(); 181 onEmailAddressMatched(yytext(), yychar); 182 } 183 } 184 185 <STRING, COMMENT> { 186 {BrowseableURI} { 187 chkLOC(); 188 onUriMatched(yytext(), yychar); 189 } 190 } 191 192 <BCOMMENT> { 193 /* 194 * Right curly bracket is not a valid URI character, so it won't be in a 195 * {BrowseableURI} capture, but a hyphen is valid. Thus a nested comment 196 * ending token, -}, can hide at the end of a URI. Work around this by 197 * capturing a possibly-trailing right curly bracket, and match a special, 198 * Haskell-specific collateral capture pattern. 199 */ 200 {BrowseableURI} \}? { 201 onUriMatched(yytext(), yychar, HaskellUtils.MAYBE_END_NESTED_COMMENT); 202 } 203 } 204