1*d219b4ceSAdam Hornacek /* 2*d219b4ceSAdam Hornacek * CDDL HEADER START 3*d219b4ceSAdam Hornacek * 4*d219b4ceSAdam Hornacek * The contents of this file are subject to the terms of the 5*d219b4ceSAdam Hornacek * Common Development and Distribution License (the "License"). 6*d219b4ceSAdam Hornacek * You may not use this file except in compliance with the License. 7*d219b4ceSAdam Hornacek * 8*d219b4ceSAdam Hornacek * See LICENSE.txt included in this distribution for the specific 9*d219b4ceSAdam Hornacek * language governing permissions and limitations under the License. 10*d219b4ceSAdam Hornacek * 11*d219b4ceSAdam Hornacek * When distributing Covered Code, include this CDDL HEADER in each 12*d219b4ceSAdam Hornacek * file and include the License file at LICENSE.txt. 13*d219b4ceSAdam Hornacek * If applicable, add the following below this CDDL HEADER, with the 14*d219b4ceSAdam Hornacek * fields enclosed by brackets "[]" replaced with your own identifying 15*d219b4ceSAdam Hornacek * information: Portions Copyright [yyyy] [name of copyright owner] 16*d219b4ceSAdam Hornacek * 17*d219b4ceSAdam Hornacek * CDDL HEADER END 18*d219b4ceSAdam Hornacek */ 19*d219b4ceSAdam Hornacek 20*d219b4ceSAdam Hornacek /* 21*d219b4ceSAdam Hornacek * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved. 22*d219b4ceSAdam Hornacek * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>. 23*d219b4ceSAdam Hornacek */ 24*d219b4ceSAdam Hornacek 25*d219b4ceSAdam Hornacek /* 26*d219b4ceSAdam Hornacek * Cross reference a Haskell file 27*d219b4ceSAdam Hornacek */ 28*d219b4ceSAdam Hornacek 29*d219b4ceSAdam Hornacek package org.opengrok.indexer.analysis.haskell; 30*d219b4ceSAdam Hornacek 31*d219b4ceSAdam Hornacek import java.io.IOException; 32*d219b4ceSAdam Hornacek import org.opengrok.indexer.analysis.JFlexSymbolMatcher; 33*d219b4ceSAdam Hornacek import org.opengrok.indexer.web.HtmlConsts; 34*d219b4ceSAdam Hornacek 35*d219b4ceSAdam Hornacek /** 36*d219b4ceSAdam Hornacek * @author Harry Pan 37*d219b4ceSAdam Hornacek */ 38*d219b4ceSAdam Hornacek %% 39*d219b4ceSAdam Hornacek %public 40*d219b4ceSAdam Hornacek %class HaskellXref 41*d219b4ceSAdam Hornacek %extends JFlexSymbolMatcher 42*d219b4ceSAdam Hornacek %unicode 43*d219b4ceSAdam Hornacek %int 44*d219b4ceSAdam Hornacek %char 45*d219b4ceSAdam Hornacek %include ../CommonLexer.lexh 46*d219b4ceSAdam Hornacek %include ../CommonXref.lexh 47*d219b4ceSAdam Hornacek %{ 48*d219b4ceSAdam Hornacek private int nestedComment; 49*d219b4ceSAdam Hornacek 50*d219b4ceSAdam Hornacek @Override reset()51*d219b4ceSAdam Hornacek public void reset() { 52*d219b4ceSAdam Hornacek super.reset(); 53*d219b4ceSAdam Hornacek nestedComment = 0; 54*d219b4ceSAdam Hornacek } 55*d219b4ceSAdam Hornacek 56*d219b4ceSAdam Hornacek @Override yypop()57*d219b4ceSAdam Hornacek public void yypop() throws IOException { 58*d219b4ceSAdam Hornacek onDisjointSpanChanged(null, yychar); 59*d219b4ceSAdam Hornacek super.yypop(); 60*d219b4ceSAdam Hornacek } 61*d219b4ceSAdam Hornacek chkLOC()62*d219b4ceSAdam Hornacek protected void chkLOC() { 63*d219b4ceSAdam Hornacek switch (yystate()) { 64*d219b4ceSAdam Hornacek case COMMENT: 65*d219b4ceSAdam Hornacek case BCOMMENT: 66*d219b4ceSAdam Hornacek break; 67*d219b4ceSAdam Hornacek default: 68*d219b4ceSAdam Hornacek phLOC(); 69*d219b4ceSAdam Hornacek break; 70*d219b4ceSAdam Hornacek } 71*d219b4ceSAdam Hornacek } 72*d219b4ceSAdam Hornacek %} 73*d219b4ceSAdam Hornacek 74*d219b4ceSAdam Hornacek %state STRING CHAR COMMENT BCOMMENT 75*d219b4ceSAdam Hornacek 76*d219b4ceSAdam Hornacek %include ../Common.lexh 77*d219b4ceSAdam Hornacek %include ../CommonURI.lexh 78*d219b4ceSAdam Hornacek %include ../CommonPath.lexh 79*d219b4ceSAdam Hornacek %include Haskell.lexh 80*d219b4ceSAdam Hornacek %% 81*d219b4ceSAdam Hornacek <YYINITIAL> { 82*d219b4ceSAdam Hornacek {Identifier} { 83*d219b4ceSAdam Hornacek chkLOC(); 84*d219b4ceSAdam Hornacek String id = yytext(); 85*d219b4ceSAdam Hornacek onFilteredSymbolMatched(id, yychar, Consts.kwd); 86*d219b4ceSAdam Hornacek } 87*d219b4ceSAdam Hornacek {Number} { 88*d219b4ceSAdam Hornacek chkLOC(); 89*d219b4ceSAdam Hornacek onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar); 90*d219b4ceSAdam Hornacek onNonSymbolMatched(yytext(), yychar); 91*d219b4ceSAdam Hornacek onDisjointSpanChanged(null, yychar); 92*d219b4ceSAdam Hornacek } 93*d219b4ceSAdam Hornacek \" { 94*d219b4ceSAdam Hornacek chkLOC(); 95*d219b4ceSAdam Hornacek yypush(STRING); 96*d219b4ceSAdam Hornacek onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar); 97*d219b4ceSAdam Hornacek onNonSymbolMatched(yytext(), yychar); 98*d219b4ceSAdam Hornacek } 99*d219b4ceSAdam Hornacek \' { 100*d219b4ceSAdam Hornacek chkLOC(); 101*d219b4ceSAdam Hornacek yypush(CHAR); 102*d219b4ceSAdam Hornacek onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar); 103*d219b4ceSAdam Hornacek onNonSymbolMatched(yytext(), yychar); 104*d219b4ceSAdam Hornacek } 105*d219b4ceSAdam Hornacek "--" { 106*d219b4ceSAdam Hornacek yypush(COMMENT); 107*d219b4ceSAdam Hornacek onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar); 108*d219b4ceSAdam Hornacek onNonSymbolMatched(yytext(), yychar); 109*d219b4ceSAdam Hornacek } 110*d219b4ceSAdam Hornacek 111*d219b4ceSAdam Hornacek {NotComments} { chkLOC(); onNonSymbolMatched(yytext(), yychar); } 112*d219b4ceSAdam Hornacek } 113*d219b4ceSAdam Hornacek 114*d219b4ceSAdam Hornacek <STRING> { 115*d219b4ceSAdam Hornacek \\[\"\\] { chkLOC(); onNonSymbolMatched(yytext(), yychar); } 116*d219b4ceSAdam Hornacek \" { 117*d219b4ceSAdam Hornacek chkLOC(); 118*d219b4ceSAdam Hornacek onNonSymbolMatched(yytext(), yychar); 119*d219b4ceSAdam Hornacek yypop(); 120*d219b4ceSAdam Hornacek } 121*d219b4ceSAdam Hornacek /* 122*d219b4ceSAdam Hornacek * "A string may include a 'gap'-—two backslants enclosing white 123*d219b4ceSAdam Hornacek * characters—-which is ignored. This allows one to write long strings on 124*d219b4ceSAdam Hornacek * more than one line by writing a backslant at the end of one line and at 125*d219b4ceSAdam Hornacek * the start of the next." N.b. OpenGrok does not explicltly recognize the 126*d219b4ceSAdam Hornacek * "gap" but since a STRING must end in a non-escaped quotation mark, just 127*d219b4ceSAdam Hornacek * allow STRINGs to be multi-line regardless of syntax. 128*d219b4ceSAdam Hornacek */ 129*d219b4ceSAdam Hornacek } 130*d219b4ceSAdam Hornacek 131*d219b4ceSAdam Hornacek <CHAR> { // we don't need to consider the case where prime is part of an identifier since it is handled above 132*d219b4ceSAdam Hornacek \\[\'\\] { chkLOC(); onNonSymbolMatched(yytext(), yychar); } 133*d219b4ceSAdam Hornacek \' { 134*d219b4ceSAdam Hornacek chkLOC(); 135*d219b4ceSAdam Hornacek onNonSymbolMatched(yytext(), yychar); 136*d219b4ceSAdam Hornacek yypop(); 137*d219b4ceSAdam Hornacek } 138*d219b4ceSAdam Hornacek /* 139*d219b4ceSAdam Hornacek * N.b. though only a single char is valid Haskell syntax, OpenGrok just 140*d219b4ceSAdam Hornacek * waits to end CHAR at a non-escaped apostrophe regardless of count. 141*d219b4ceSAdam Hornacek */ 142*d219b4ceSAdam Hornacek } 143*d219b4ceSAdam Hornacek 144*d219b4ceSAdam Hornacek <COMMENT> { 145*d219b4ceSAdam Hornacek {WhspChar}*{EOL} { 146*d219b4ceSAdam Hornacek yypop(); 147*d219b4ceSAdam Hornacek onEndOfLineMatched(yytext(), yychar); 148*d219b4ceSAdam Hornacek } 149*d219b4ceSAdam Hornacek } 150*d219b4ceSAdam Hornacek 151*d219b4ceSAdam Hornacek <YYINITIAL, BCOMMENT> { 152*d219b4ceSAdam Hornacek "{-" { 153*d219b4ceSAdam Hornacek if (nestedComment++ == 0) { 154*d219b4ceSAdam Hornacek yypush(BCOMMENT); 155*d219b4ceSAdam Hornacek onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar); 156*d219b4ceSAdam Hornacek } 157*d219b4ceSAdam Hornacek onNonSymbolMatched(yytext(), yychar); 158*d219b4ceSAdam Hornacek } 159*d219b4ceSAdam Hornacek } 160*d219b4ceSAdam Hornacek 161*d219b4ceSAdam Hornacek <BCOMMENT> { 162*d219b4ceSAdam Hornacek "-}" { 163*d219b4ceSAdam Hornacek onNonSymbolMatched(yytext(), yychar); 164*d219b4ceSAdam Hornacek if (--nestedComment == 0) { 165*d219b4ceSAdam Hornacek yypop(); 166*d219b4ceSAdam Hornacek } 167*d219b4ceSAdam Hornacek } 168*d219b4ceSAdam Hornacek } 169*d219b4ceSAdam Hornacek 170*d219b4ceSAdam Hornacek {WhspChar}*{EOL} { onEndOfLineMatched(yytext(), yychar); } 171*d219b4ceSAdam Hornacek [[\s]--[\n]] { onNonSymbolMatched(yytext(), yychar); } 172*d219b4ceSAdam Hornacek [^\n] { chkLOC(); onNonSymbolMatched(yytext(), yychar); } 173*d219b4ceSAdam Hornacek 174*d219b4ceSAdam Hornacek <STRING, COMMENT, BCOMMENT> { 175*d219b4ceSAdam Hornacek {FPath} { 176*d219b4ceSAdam Hornacek chkLOC(); 177*d219b4ceSAdam Hornacek onPathlikeMatched(yytext(), '/', false, yychar); 178*d219b4ceSAdam Hornacek } 179*d219b4ceSAdam Hornacek {FNameChar}+ "@" {FNameChar}+ "." {FNameChar}+ { 180*d219b4ceSAdam Hornacek chkLOC(); 181*d219b4ceSAdam Hornacek onEmailAddressMatched(yytext(), yychar); 182*d219b4ceSAdam Hornacek } 183*d219b4ceSAdam Hornacek } 184*d219b4ceSAdam Hornacek 185*d219b4ceSAdam Hornacek <STRING, COMMENT> { 186*d219b4ceSAdam Hornacek {BrowseableURI} { 187*d219b4ceSAdam Hornacek chkLOC(); 188*d219b4ceSAdam Hornacek onUriMatched(yytext(), yychar); 189*d219b4ceSAdam Hornacek } 190*d219b4ceSAdam Hornacek } 191*d219b4ceSAdam Hornacek 192*d219b4ceSAdam Hornacek <BCOMMENT> { 193*d219b4ceSAdam Hornacek /* 194*d219b4ceSAdam Hornacek * Right curly bracket is not a valid URI character, so it won't be in a 195*d219b4ceSAdam Hornacek * {BrowseableURI} capture, but a hyphen is valid. Thus a nested comment 196*d219b4ceSAdam Hornacek * ending token, -}, can hide at the end of a URI. Work around this by 197*d219b4ceSAdam Hornacek * capturing a possibly-trailing right curly bracket, and match a special, 198*d219b4ceSAdam Hornacek * Haskell-specific collateral capture pattern. 199*d219b4ceSAdam Hornacek */ 200*d219b4ceSAdam Hornacek {BrowseableURI} \}? { 201*d219b4ceSAdam Hornacek onUriMatched(yytext(), yychar, HaskellUtils.MAYBE_END_NESTED_COMMENT); 202*d219b4ceSAdam Hornacek } 203*d219b4ceSAdam Hornacek } 204