xref: /OpenGrok/opengrok-indexer/src/main/jflex/analysis/php/PhpXref.lex (revision d219b4cea555a12b602d2d5518daa22134ad4879)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>.
23  */
24 
25 /*
26  * Cross reference a PHP file
27  */
28 
29 package org.opengrok.indexer.analysis.php;
30 
31 import java.io.IOException;
32 import java.util.Arrays;
33 import java.util.HashSet;
34 import java.util.Set;
35 import java.util.Stack;
36 import org.opengrok.indexer.analysis.JFlexSymbolMatcher;
37 import org.opengrok.indexer.analysis.EmphasisHint;
38 import org.opengrok.indexer.web.HtmlConsts;
39 %%
40 %public
41 %class PhpXref
42 %extends JFlexSymbolMatcher
43 %unicode
44 %ignorecase
45 %int
46 %char
47 %include ../CommonLexer.lexh
48 %include ../CommonXref.lexh
49 %{
50   private final static Set<String> PSEUDO_TYPES;
51   private final Stack<String> popStrings = new Stack<>();
52   private final Stack<String> docLabels = new Stack<String>();
53 
54   static {
55     PSEUDO_TYPES = new HashSet<String>(Arrays.asList(
56         new String[] {
57             "string", "integer", "int", "boolean", "bool", "float", "double",
58             "object", "mixed", "array", "resource", "void", "null", "callback",
59             "false", "true", "self", "callable"
60         }
61     ));
62   }
63 
64   @Override
clearStack()65   protected void clearStack() {
66       super.clearStack();
67       popStrings.clear();
68       docLabels.clear();
69   }
70 
71   /**
72    * save current yy state to stack
73    * @param newState state id
74    * @param popString string for the state
75    */
yypush(int newState,String popString)76   public void yypush(int newState, String popString) {
77       super.yypush(newState);
78       popStrings.push(popString);
79   }
80 
81   /**
82    * save current yy state to stack
83    * @param newState state id
84    */
85   @Override
yypush(int newState)86   public void yypush(int newState) {
87       yypush(newState, null);
88   }
89 
90   /**
91    * pop last state from stack
92    * @throws IOException in case of any I/O problem
93    */
94   @Override
yypop()95   public void yypop() throws IOException {
96       String popString = popStrings.pop();
97       if (popString != null) {
98           onDisjointSpanChanged(popString, yychar);
99       }
100       super.yypop();
101   }
102 
writeDocTag()103   private void writeDocTag() throws IOException {
104     String capture = yytext();
105     String sigil = capture.substring(0, 1);
106     String tag = capture.substring(1);
107     onNonSymbolMatched(sigil, yychar);
108     onNonSymbolMatched(tag, EmphasisHint.STRONG, yychar);
109   }
110 
isTabOrSpace(int i)111   private boolean isTabOrSpace(int i) {
112     return yycharat(i) == '\t' || yycharat(i) == ' ';
113   }
114 
isHtmlState(int state)115   private static boolean isHtmlState(int state) {
116     return state == TAG_NAME            || state == AFTER_TAG_NAME
117         || state == ATTRIBUTE_NOQUOTE   || state == ATTRIBUTE_SINGLE
118         || state == ATTRIBUTE_DOUBLE    || state == HTMLCOMMENT
119         || state == YYINITIAL;
120   }
121 
chkLOC()122   protected void chkLOC() {
123       switch (yystate()) {
124           case HTMLCOMMENT:
125           case SCOMMENT:
126           case COMMENT:
127           case DOCCOMMENT:
128           case DOCCOM_TYPE_THEN_NAME:
129           case DOCCOM_NAME:
130           case DOCCOM_TYPE:
131               break;
132           default:
133               phLOC();
134               break;
135       }
136   }
137 %}
138 
139 Identifier = [a-zA-Z_\u007F-\u10FFFF] [a-zA-Z0-9_\u007F-\u10FFFF]*
140 
141 File = [a-zA-Z]{FNameChar}* "." ("php"|"php3"|"php4"|"phps"|"phtml"|"inc"|"diff"|"patch")
142 
143 BinaryNumber = 0[b|B][01]+
144 OctalNumber = 0[0-7]+
145 DecimalNumber = [1-9][0-9]+
146 HexadecimalNumber = 0[xX][0-9a-fA-F]+
147 FloatNumber = (([0-9]* "." [0-9]+) | ([0-9]+ "." [0-9]*) | [0-9]+)([eE][+-]?[0-9]+)?
148 Number = [+-]?({BinaryNumber}|{OctalNumber}|{DecimalNumber}|{HexadecimalNumber}|{FloatNumber})
149 
150 //do not support <script language="php"> and </script> opening/closing tags
151 OpeningTag = ("<?" "php"?) | "<?="
152 ClosingTag = "?>"
153 
154 CastTypes = "int"|"integer"|"real"|"double"|"float"|"string"|"binary"|"array"
155             |"object"|"bool"|"boolean"|"unset"
156 
157 DoubleQuoteEscapeSequences = \\ (([nrtfve\\$]) | ([xX] [0-9a-fA-F]{1,2}) |  ([0-7]{1,3}))
158 SingleQuoteEscapeSequences = \\ [\\\']
159 
160 DocPreviousChar = "*" | {WhspChar}+
161 
162 //does not supported nested type expressions like ((array|integer)[]|boolean)[]
163 //that would require additional states
164 DocType = {IndividualDocType} (\| {IndividualDocType})*
165 IndividualDocType = ({SimpleDocType} "[]"? | ( \( {SimpleDocType} "[]"? ( \| {SimpleDocType} "[]"? )* \)\[\] ))
166 SimpleDocType = {Identifier}
167 
168 DocParamWithType = "return" | "throws" | "throw" | "var" | "see"  //"see" can take a URL
169 DocParamWithTypeAndName = "param" | "global" | "property" | "property-read"
170                           | "property-write"
171 DocParamWithName = "uses"
172 DocInlineTags = "internal" | "inheritDoc" | "link" | "example"
173 //method needs special treatment
174 
175 HtmlNameStart = [a-zA-Z_\u00C0-\u10FFFFFF]
176 HtmlName      = {HtmlNameStart} ({HtmlNameStart} | [\-.0-9\u00B7])*
177 
178 %state TAG_NAME AFTER_TAG_NAME ATTRIBUTE_NOQUOTE ATTRIBUTE_SINGLE ATTRIBUTE_DOUBLE HTMLCOMMENT
179 %state IN_SCRIPT STRING SCOMMENT HEREDOC NOWDOC COMMENT QSTRING BACKQUOTE STRINGEXPR STRINGVAR
180 %state DOCCOMMENT DOCCOM_TYPE_THEN_NAME DOCCOM_NAME DOCCOM_TYPE
181 
182 %include ../Common.lexh
183 %include ../CommonURI.lexh
184 %include ../CommonPath.lexh
185 %%
186 <YYINITIAL> { //HTML
187     "<" | "</"      {
188         chkLOC();
189         onNonSymbolMatched(yytext(), yychar);
190         yypush(TAG_NAME);
191     }
192 
193     "<!--" {
194         onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar);
195         onNonSymbolMatched(yytext(), yychar);
196         yybegin(HTMLCOMMENT);
197     }
198 }
199 
200 <TAG_NAME> {
201     {HtmlName} {
202         chkLOC();
203         String lastClassName = getDisjointSpanClassName();
204         onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar);
205         onNonSymbolMatched(yytext(), yychar);
206         onDisjointSpanChanged(lastClassName, yychar);
207         yybegin(AFTER_TAG_NAME);
208     }
209 
210     {HtmlName}:{HtmlName} {
211         chkLOC();
212         String lastClassName = getDisjointSpanClassName();
213         onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar);
214         int i = 0;
215         while (yycharat(i) != ':') i++;
216         onNonSymbolMatched(yytext().substring(0,i), yychar);
217         onDisjointSpanChanged(null, yychar);
218         onNonSymbolMatched(":", yychar);
219         onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar);
220         onNonSymbolMatched(yytext().substring(i + 1), yychar);
221         onDisjointSpanChanged(lastClassName, yychar);
222         yybegin(AFTER_TAG_NAME);
223     }
224 }
225 
226 <AFTER_TAG_NAME> {
227     {HtmlName} {
228         chkLOC();
229         //attribute
230         onNonSymbolMatched(yytext(), EmphasisHint.STRONG, yychar);
231     }
232 
233     "=" {WhspChar}* (\" | \')? {
234         chkLOC();
235         char attributeDelim = yycharat(yylength()-1);
236         onNonSymbolMatched("=", yychar);
237         onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
238         onNonSymbolMatched(yytext().substring(1), yychar);
239         if (attributeDelim == '\'') {
240             yypush(ATTRIBUTE_SINGLE);
241         } else if (attributeDelim == '"') {
242             yypush(ATTRIBUTE_DOUBLE);
243         } else {
244             yypush(ATTRIBUTE_NOQUOTE);
245         }
246     }
247 }
248 
249 <TAG_NAME, AFTER_TAG_NAME> {
250     ">"     {
251         chkLOC();
252         onNonSymbolMatched(yytext(), yychar);
253         yypop(); //to YYINITIAL
254     }
255 }
256 
257 <YYINITIAL, TAG_NAME, AFTER_TAG_NAME> {
258     {OpeningTag}    {
259         chkLOC();
260         onNonSymbolMatched(yytext(), EmphasisHint.STRONG, yychar);
261         yypush(IN_SCRIPT); }
262 }
263 
264 <ATTRIBUTE_NOQUOTE> {
265     {WhspChar}* {EOL} {
266         onDisjointSpanChanged(null, yychar);
267         onEndOfLineMatched(yytext(), yychar);
268         yypop();
269     }
270     {WhspChar}+   {
271         onNonSymbolMatched(yytext(), yychar);
272         onDisjointSpanChanged(null, yychar);
273         yypop();
274     }
275     ">"     {
276         chkLOC();
277         onNonSymbolMatched(yytext(), yychar);
278         onDisjointSpanChanged(null, yychar);
279         //pop twice
280         yypop();
281         yypop();
282     }
283 }
284 
285 <ATTRIBUTE_DOUBLE>\" {
286     chkLOC();
287     onNonSymbolMatched(yytext(), yychar);
288     onDisjointSpanChanged(null, yychar); yypop();
289 }
290 <ATTRIBUTE_SINGLE>\' {
291     chkLOC();
292     onNonSymbolMatched(yytext(), yychar);
293     onDisjointSpanChanged(null, yychar); yypop();
294 }
295 
296 <ATTRIBUTE_DOUBLE, ATTRIBUTE_SINGLE> {
297     {WhspChar}* {EOL} {
298         onDisjointSpanChanged(null, yychar);
299         onEndOfLineMatched(yytext(), yychar);
300         onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
301     }
302 }
303 
304 <ATTRIBUTE_NOQUOTE, ATTRIBUTE_DOUBLE, ATTRIBUTE_SINGLE> {
305     {OpeningTag} {
306         chkLOC();
307         onDisjointSpanChanged(null, yychar);
308         onNonSymbolMatched(yytext(), EmphasisHint.STRONG, yychar);
309         yypush(IN_SCRIPT, HtmlConsts.STRING_CLASS);
310     }
311 }
312 
313 <HTMLCOMMENT> {
314     "-->" {
315         onNonSymbolMatched(yytext(), yychar);
316         onDisjointSpanChanged(null, yychar);
317         yybegin(YYINITIAL);
318     }
319 
320     {WhspChar}* {EOL} {
321         onDisjointSpanChanged(null, yychar);
322         onEndOfLineMatched(yytext(), yychar);
323         onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar);
324     }
325 
326     {OpeningTag} {
327         onDisjointSpanChanged(null, yychar);
328         onNonSymbolMatched(yytext(), EmphasisHint.STRONG, yychar);
329         yypush(IN_SCRIPT, HtmlConsts.COMMENT_CLASS);
330     }
331 }
332 
333 <IN_SCRIPT> {
334     "$" {Identifier} {
335         chkLOC();
336         //we ignore keywords if the identifier starts with one of variable chars
337         String id = yytext().substring(1);
338         onNonSymbolMatched("$", yychar);
339         onFilteredSymbolMatched(id, yychar, null);
340     }
341 
342     {Identifier} {
343         chkLOC();
344         onFilteredSymbolMatched(yytext(), yychar, Consts.kwd);
345     }
346 
347     \( {WhspChar}* {CastTypes} {WhspChar}* \) {
348         chkLOC();
349         onNonSymbolMatched("(", yychar);
350         int i = 1, j;
351         while (isTabOrSpace(i)) { onNonSymbolMatched(yycharat(i++), yychar); }
352 
353         j = i + 1;
354         while (!isTabOrSpace(j) && yycharat(j) != ')') { j++; }
355         onNonSymbolMatched(yytext().substring(i, j), EmphasisHint.EM, yychar);
356 
357         onNonSymbolMatched(yytext().substring(j, yylength()), yychar);
358     }
359 
360     b? \" {
361         chkLOC();
362         yypush(STRING);
363         if (yycharat(0) == 'b') { onNonSymbolMatched('b', yychar); }
364         onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
365         onNonSymbolMatched("\"", yychar);
366     }
367 
368     b? \' {
369         chkLOC();
370         yypush(QSTRING);
371         if (yycharat(0) == 'b') { onNonSymbolMatched('b', yychar); }
372         onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
373         onNonSymbolMatched("\'", yychar);
374     }
375 
376     [`]    {
377         chkLOC();
378         yypush(BACKQUOTE);
379         onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
380         onNonSymbolMatched(yytext(), yychar);
381     }
382 
383     b? "<<<" {WhspChar}* ({Identifier} | (\'{Identifier}\') | (\"{Identifier}\")){EOL} {
384         chkLOC();
385         if (yycharat(0) == 'b') { onNonSymbolMatched('b', yychar); }
386         onNonSymbolMatched("<<<", yychar);
387         int i = yycharat(0) == 'b' ? 4 : 3, j = yylength()-1;
388         while (isTabOrSpace(i)) {
389             onNonSymbolMatched(yycharat(i++), yychar);
390         }
391         while (yycharat(j) == '\n' || yycharat(j) == '\r') { j--; }
392 
393         if (yycharat(i) == '\'' || yycharat(i) == '"') {
394             yypush(NOWDOC);
395             String text = yytext().substring(i+1, j);
396             this.docLabels.push(text);
397             onNonSymbolMatched(String.valueOf(yycharat(i)), yychar);
398             onDisjointSpanChanged(HtmlConsts.BOLD_CLASS, yychar);
399             onNonSymbolMatched(text, yychar);
400             onDisjointSpanChanged(null, yychar);
401             onNonSymbolMatched(String.valueOf(yycharat(i)), yychar);
402         } else {
403             yypush(HEREDOC);
404             String text = yytext().substring(i, j+1);
405             this.docLabels.push(text);
406             onDisjointSpanChanged(HtmlConsts.BOLD_CLASS, yychar);
407             onNonSymbolMatched(text, yychar);
408             onDisjointSpanChanged(null, yychar);
409         }
410         onEndOfLineMatched(yytext(), yychar);
411         onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
412     }
413 
414     {Number}   {
415         chkLOC();
416         String lastClassName = getDisjointSpanClassName();
417         onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar);
418         onNonSymbolMatched(yytext(), yychar);
419         onDisjointSpanChanged(lastClassName, yychar);
420     }
421 
422     "#"|"//"   {
423         yypush(SCOMMENT);
424         onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar);
425         onNonSymbolMatched(yytext(), yychar);
426     }
427     "/**"      {
428         yypush(DOCCOMMENT);
429         onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar);
430         onNonSymbolMatched("/*", yychar);
431         yypushback(1);
432     }
433     "/*"       {
434         yypush(COMMENT);
435         onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar);
436         onNonSymbolMatched(yytext(), yychar);
437     }
438 
439     \{         {
440         chkLOC();
441         onNonSymbolMatched(yytext(), yychar);
442         yypush(IN_SCRIPT);
443     }
444     \}         {
445         chkLOC();
446         onNonSymbolMatched(yytext(), yychar);
447         if (!this.stack.empty() && !isHtmlState(this.stack.peek()))
448             yypop(); //may pop STRINGEXPR/HEREDOC/BACKQUOTE
449         /* we don't pop unconditionally because we can exit a ?php block with
450          * with open braces and we discard the information about the number of
451          * open braces when exiting the block (see the action for {ClosingTag}
452          * below. An alternative would be keeping two stacks -- one for HTML
453          * and another for PHP. The PHP scanner only needs one stack because
454          * it doesn't need to keep state about the HTML */
455     }
456 
457     {ClosingTag} {
458         chkLOC();
459         onNonSymbolMatched(yytext(), EmphasisHint.STRONG, yychar);
460         while (!isHtmlState(yystate()))
461             yypop();
462     }
463 } //end of IN_SCRIPT
464 
465 <STRING> {
466     \\\"    {
467         chkLOC();
468         onNonSymbolMatched(yytext(), EmphasisHint.STRONG, yychar);
469     }
470     \"    {
471         chkLOC();
472         onNonSymbolMatched(yytext(), yychar);
473         onDisjointSpanChanged(null, yychar); yypop();
474     }
475 }
476 
477 <BACKQUOTE> {
478     "\\`"    {
479         chkLOC();
480         onNonSymbolMatched(yytext(), EmphasisHint.STRONG, yychar);
481     }
482     "`"    {
483         chkLOC();
484         onNonSymbolMatched("`", yychar);
485         onDisjointSpanChanged(null, yychar); yypop();
486     }
487 }
488 
489 <STRING, BACKQUOTE, HEREDOC> {
490     "\\{" {
491         chkLOC();
492         onNonSymbolMatched(yytext(), yychar);
493     }
494 
495     {DoubleQuoteEscapeSequences} {
496         chkLOC();
497         onNonSymbolMatched(yytext(), EmphasisHint.STRONG, yychar);
498     }
499 
500     "$"     {
501         chkLOC();
502         onDisjointSpanChanged(null, yychar);
503         onNonSymbolMatched("$", yychar);
504         yypush(STRINGVAR, HtmlConsts.STRING_CLASS);
505     }
506 
507     "${" {
508         chkLOC();
509         onDisjointSpanChanged(null, yychar);
510         onNonSymbolMatched(yytext(), yychar);
511         yypush(STRINGEXPR, HtmlConsts.STRING_CLASS);
512     }
513 
514     /* ${ is different from {$ -- for instance {$foo->bar[1]} is valid
515      * but ${foo->bar[1]} is not. ${ only enters the full blown scripting state
516      * when {Identifer}[ is found (see the PHP scanner). Tthe parser seems to
517      * put more restrictions on the {$ scripting mode than on the
518      * "${" {Identifer} "[" scripting mode, but that's not relevant here */
519     "{$" {
520         chkLOC();
521         onDisjointSpanChanged(null, yychar);
522         onNonSymbolMatched("{", yychar);
523         yypushback(1);
524         yypush(IN_SCRIPT, HtmlConsts.STRING_CLASS);
525     }
526 }
527 
528 <QSTRING> {
529     {SingleQuoteEscapeSequences} {
530         chkLOC();
531         onNonSymbolMatched(yytext(), EmphasisHint.STRONG, yychar);
532     }
533 
534     \'      {
535         chkLOC();
536         onNonSymbolMatched("'", yychar);
537         onDisjointSpanChanged(null, yychar); yypop();
538     }
539 }
540 
541 <HEREDOC, NOWDOC>^{Identifier} ";"? {EOL}  {
542     chkLOC();
543     int i = yylength() - 1;
544     boolean hasSemi = false;
545     while (yycharat(i) == '\n' || yycharat(i) == '\r') { i--; }
546     if (yycharat(i) == ';') { hasSemi = true; i--; }
547     if (yytext().substring(0, i+1).equals(this.docLabels.peek())) {
548         String text = this.docLabels.pop();
549         yypop();
550         onDisjointSpanChanged(HtmlConsts.BOLD_CLASS, yychar);
551         onNonSymbolMatched(text, yychar);
552         onDisjointSpanChanged(null, yychar);
553         if (hasSemi) onNonSymbolMatched(";", yychar);
554         onEndOfLineMatched(yytext(), yychar);
555     } else {
556         onNonSymbolMatched(yytext().substring(0,i+1), yychar);
557         if (hasSemi) onNonSymbolMatched(";", yychar);
558         onEndOfLineMatched(yytext(), yychar);
559     }
560 }
561 
562 <STRING, QSTRING, BACKQUOTE, HEREDOC, NOWDOC>{WhspChar}* {EOL} {
563     onDisjointSpanChanged(null, yychar);
564     onEndOfLineMatched(yytext(), yychar);
565     onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
566 }
567 
568 <STRINGVAR> {
569     {Identifier}    {
570         chkLOC();
571         onFilteredSymbolMatched(yytext(), yychar, null);
572     }
573 
574     \[ {Number} \] {
575         chkLOC();
576         onNonSymbolMatched("[", yychar);
577         String lastClassName = getDisjointSpanClassName();
578         onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar);
579         onNonSymbolMatched(yytext().substring(1, yylength()-1), yychar);
580         onDisjointSpanChanged(lastClassName, yychar);
581         onNonSymbolMatched("]", yychar);
582         yypop(); //because "$arr[0][1]" is the same as $arr[0] . "[1]"
583     }
584 
585     \[ {Identifier} \] {
586         chkLOC();
587         //then the identifier is actually a string!
588         onNonSymbolMatched("[", yychar);
589         String lastClassName = getDisjointSpanClassName();
590         onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
591         onNonSymbolMatched(yytext().substring(1, yylength()-1), yychar);
592         onDisjointSpanChanged(lastClassName, yychar);
593         onNonSymbolMatched("]", yychar);
594         yypop();
595     }
596 
597     \[ "$" {Identifier} \] {
598         chkLOC();
599         onNonSymbolMatched("[$", yychar);
600         onFilteredSymbolMatched(yytext().substring(2, yylength()-1), yychar, null);
601         onNonSymbolMatched("]", yychar);
602         yypop();
603     }
604 
605     "->" {Identifier} {
606         chkLOC();
607         onNonSymbolMatched(yytext().substring(0, 2), yychar);
608         onFilteredSymbolMatched(yytext().substring(2), yychar, null);
609         yypop(); //because "$arr->a[0]" is the same as $arr->a . "[0]"
610     }
611 
612     [^]          { yypushback(1); yypop(); }
613 }
614 
615 <STRINGEXPR> {
616     {Identifier} {
617         chkLOC();
618         onFilteredSymbolMatched(yytext(), yychar, null);
619     }
620     \}  { chkLOC(); onNonSymbolMatched('}', yychar); yypop(); }
621     \[  { chkLOC(); onNonSymbolMatched('[', yychar); yybegin(IN_SCRIPT); } /* don't push. when we find '}'
622                                                  * and we pop we want to go to
623                                                  * STRING/HEREDOC, not back to
624                                                  * STRINGEXPR */
625 }
626 
627 <SCOMMENT> {
628     {ClosingTag}    {
629         onDisjointSpanChanged(null, yychar);
630         onNonSymbolMatched(yytext(), EmphasisHint.STRONG, yychar);
631         while (!isHtmlState(yystate()))
632             yypop();
633     }
634     {WhspChar}* {EOL} {
635         onDisjointSpanChanged(null, yychar);
636         onEndOfLineMatched(yytext(), yychar);
637         yypop();
638     }
639 }
640 
641 <DOCCOMMENT> {
642     {DocPreviousChar} "@" {DocParamWithType} {
643         writeDocTag(); yybegin(DOCCOM_TYPE);
644     }
645 
646     {DocPreviousChar} "@" {DocParamWithTypeAndName} {
647         writeDocTag(); yybegin(DOCCOM_TYPE_THEN_NAME);
648     }
649 
650     {DocPreviousChar} "@" {DocParamWithName} {
651         writeDocTag(); yybegin(DOCCOM_NAME);
652     }
653 
654     ("{@" {DocInlineTags}) | {DocPreviousChar} "@" {Identifier} {
655         writeDocTag();
656     }
657 }
658 
659 <DOCCOM_TYPE_THEN_NAME, DOCCOM_TYPE> {
660     {WhspChar}+ {DocType} {
661         int i = 0;
662         do { onNonSymbolMatched(yycharat(i++), yychar); } while (isTabOrSpace(i));
663         int j = i;
664         while (i < yylength()) {
665             //skip over [], |, ( and )
666             char c;
667             while (i < yylength() && ((c = yycharat(i)) == '[' || c == ']'
668                     || c == '|' || c == '(' || c == ')')) {
669                 onNonSymbolMatched(c, yychar);
670                 i++;
671             }
672             j = i;
673             while (j < yylength() && (c = yycharat(j)) != ')' && c != '|'
674             && c != '[') { j++; }
675             onFilteredSymbolMatched(yytext().substring(i, j), yychar,
676                     PSEUDO_TYPES, false);
677             i = j;
678         }
679         yybegin(yystate() == DOCCOM_TYPE_THEN_NAME ? DOCCOM_NAME : DOCCOMMENT);
680     }
681 
682     [^] { yybegin(DOCCOMMENT); yypushback(1); }
683 }
684 
685 <DOCCOM_NAME> {
686     {WhspChar}+ "$" {Identifier} {
687         int i = 0;
688         do { onNonSymbolMatched(yycharat(i++), yychar); } while (isTabOrSpace(i));
689 
690         onNonSymbolMatched("$", yychar);
691         onFilteredSymbolMatched(yytext().substring(i + 1), yychar, null);
692         yybegin(DOCCOMMENT);
693     }
694 
695     [^] { yybegin(DOCCOMMENT); yypushback(1); }
696 }
697 
698 <COMMENT, DOCCOMMENT> {
699     {WhspChar}* {EOL} {
700         onDisjointSpanChanged(null, yychar);
701         onEndOfLineMatched(yytext(), yychar);
702         onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar);
703     }
704     "*/"    {
705         onNonSymbolMatched(yytext(), yychar);
706         onDisjointSpanChanged(null, yychar);
707         yypop();
708     }
709 }
710 
711 <YYINITIAL, TAG_NAME, AFTER_TAG_NAME, ATTRIBUTE_NOQUOTE, ATTRIBUTE_DOUBLE,
712     ATTRIBUTE_SINGLE, HTMLCOMMENT, IN_SCRIPT, STRING, QSTRING, BACKQUOTE,
713     HEREDOC, NOWDOC, SCOMMENT, COMMENT, DOCCOMMENT, STRINGEXPR, STRINGVAR> {
714 
715     {WhspChar}* {EOL} {
716         onEndOfLineMatched(yytext(), yychar);
717     }
718     [[\s]--[\n]]    { onNonSymbolMatched(yytext(), yychar); }
719     [^\n]    { chkLOC(); onNonSymbolMatched(yytext(), yychar); }
720 }
721 
722 <YYINITIAL, HTMLCOMMENT, SCOMMENT, COMMENT, DOCCOMMENT, STRING, QSTRING, BACKQUOTE, HEREDOC, NOWDOC> {
723     {FPath}
724             { chkLOC(); onPathlikeMatched(yytext(), '/', false, yychar); }
725 
726     {File}
727             {
728             chkLOC();
729             String path = yytext();
730             onFilelikeMatched(path, yychar);
731     }
732 
733     {BrowseableURI}    {
734               chkLOC();
735               onUriMatched(yytext(), yychar);
736             }
737 
738     {FNameChar}+ "@" {FNameChar}+ "." {FNameChar}+
739             {
740             chkLOC();
741             onEmailAddressMatched(yytext(), yychar);
742             }
743 }
744