1 /*
2 * Copyright (c) 2013, Colomban Wendling <ban@herbesfolles.org>
3 *
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
6 *
7 * This module contains code for generating tags for the PHP scripting
8 * language.
9 *
10 * The language reference: http://php.net/manual/en/langref.php
11 */
12
13 /*
14 * INCLUDE FILES
15 */
16 #include "general.h" /* must always come first */
17
18 #include <string.h>
19
20 #include "parse.h"
21 #include "read.h"
22 #include "vstring.h"
23 #include "keyword.h"
24 #include "entry.h"
25 #include "routines.h"
26 #include "debug.h"
27 #include "objpool.h"
28 #include "promise.h"
29
30 #define isIdentChar(c) (isalnum (c) || (c) == '_' || (c) >= 0x80)
31 #define newToken() (objPoolGet (TokenPool))
32 #define deleteToken(t) (objPoolPut (TokenPool, (t)))
33
34 enum {
35 KEYWORD_abstract,
36 KEYWORD_and,
37 KEYWORD_as,
38 KEYWORD_break,
39 KEYWORD_callable,
40 KEYWORD_case,
41 KEYWORD_catch,
42 KEYWORD_class,
43 KEYWORD_clone,
44 KEYWORD_const,
45 KEYWORD_continue,
46 KEYWORD_declare,
47 KEYWORD_define,
48 KEYWORD_default,
49 KEYWORD_do,
50 KEYWORD_echo,
51 KEYWORD_else,
52 KEYWORD_elif,
53 KEYWORD_enddeclare,
54 KEYWORD_endfor,
55 KEYWORD_endforeach,
56 KEYWORD_endif,
57 KEYWORD_endswitch,
58 KEYWORD_endwhile,
59 KEYWORD_extends,
60 KEYWORD_final,
61 KEYWORD_finally,
62 KEYWORD_for,
63 KEYWORD_foreach,
64 KEYWORD_function,
65 KEYWORD_global,
66 KEYWORD_goto,
67 KEYWORD_if,
68 KEYWORD_implements,
69 KEYWORD_include,
70 KEYWORD_include_once,
71 KEYWORD_instanceof,
72 KEYWORD_insteadof,
73 KEYWORD_interface,
74 KEYWORD_namespace,
75 KEYWORD_new,
76 KEYWORD_or,
77 KEYWORD_print,
78 KEYWORD_private,
79 KEYWORD_protected,
80 KEYWORD_public,
81 KEYWORD_require,
82 KEYWORD_require_once,
83 KEYWORD_return,
84 KEYWORD_static,
85 KEYWORD_switch,
86 KEYWORD_throw,
87 KEYWORD_trait,
88 KEYWORD_try,
89 KEYWORD_use,
90 KEYWORD_var,
91 KEYWORD_while,
92 KEYWORD_xor,
93 KEYWORD_yield
94 };
95 typedef int keywordId; /* to allow KEYWORD_NONE */
96
97 typedef enum {
98 ACCESS_UNDEFINED,
99 ACCESS_PRIVATE,
100 ACCESS_PROTECTED,
101 ACCESS_PUBLIC,
102 COUNT_ACCESS
103 } accessType;
104
105 typedef enum {
106 IMPL_UNDEFINED,
107 IMPL_ABSTRACT,
108 COUNT_IMPL
109 } implType;
110
111 typedef enum {
112 K_CLASS,
113 K_DEFINE,
114 K_FUNCTION,
115 K_INTERFACE,
116 K_LOCAL_VARIABLE,
117 K_NAMESPACE,
118 K_TRAIT,
119 K_VARIABLE,
120 K_ALIAS,
121 COUNT_KIND
122 } phpKind;
123
124 #define NAMESPACE_SEPARATOR "\\"
125 static scopeSeparator PhpGenericSeparators [] = {
126 { K_NAMESPACE , NAMESPACE_SEPARATOR },
127 { KIND_WILDCARD_INDEX, "::" },
128 };
129
130 static kindDefinition PhpKinds[COUNT_KIND] = {
131 { true, 'c', "class", "classes",
132 ATTACH_SEPARATORS(PhpGenericSeparators) },
133 { true, 'd', "define", "constant definitions",
134 ATTACH_SEPARATORS(PhpGenericSeparators)},
135 { true, 'f', "function", "functions",
136 ATTACH_SEPARATORS(PhpGenericSeparators)},
137 { true, 'i', "interface", "interfaces",
138 ATTACH_SEPARATORS(PhpGenericSeparators)},
139 { false, 'l', "local", "local variables",
140 ATTACH_SEPARATORS(PhpGenericSeparators)},
141 { true, 'n', "namespace", "namespaces",
142 ATTACH_SEPARATORS(PhpGenericSeparators)},
143 { true, 't', "trait", "traits",
144 ATTACH_SEPARATORS(PhpGenericSeparators)},
145 { true, 'v', "variable", "variables",
146 ATTACH_SEPARATORS(PhpGenericSeparators)},
147 { true, 'a', "alias", "aliases",
148 ATTACH_SEPARATORS(PhpGenericSeparators)},
149 };
150
151 static const keywordTable PhpKeywordTable[] = {
152 /* keyword keyword ID */
153 { "abstract", KEYWORD_abstract },
154 { "and", KEYWORD_and },
155 { "as", KEYWORD_as },
156 { "break", KEYWORD_break },
157 { "callable", KEYWORD_callable },
158 { "case", KEYWORD_case },
159 { "catch", KEYWORD_catch },
160 { "cfunction", KEYWORD_function }, /* nobody knows what the hell this is, but it seems to behave much like "function" so bind it to it */
161 { "class", KEYWORD_class },
162 { "clone", KEYWORD_clone },
163 { "const", KEYWORD_const },
164 { "continue", KEYWORD_continue },
165 { "declare", KEYWORD_declare },
166 { "define", KEYWORD_define }, /* this isn't really a keyword but we handle it so it's easier this way */
167 { "default", KEYWORD_default },
168 { "do", KEYWORD_do },
169 { "echo", KEYWORD_echo },
170 { "else", KEYWORD_else },
171 { "elseif", KEYWORD_elif },
172 { "enddeclare", KEYWORD_enddeclare },
173 { "endfor", KEYWORD_endfor },
174 { "endforeach", KEYWORD_endforeach },
175 { "endif", KEYWORD_endif },
176 { "endswitch", KEYWORD_endswitch },
177 { "endwhile", KEYWORD_endwhile },
178 { "extends", KEYWORD_extends },
179 { "final", KEYWORD_final },
180 { "finally", KEYWORD_finally },
181 { "for", KEYWORD_for },
182 { "foreach", KEYWORD_foreach },
183 { "function", KEYWORD_function },
184 { "global", KEYWORD_global },
185 { "goto", KEYWORD_goto },
186 { "if", KEYWORD_if },
187 { "implements", KEYWORD_implements },
188 { "include", KEYWORD_include },
189 { "include_once", KEYWORD_include_once },
190 { "instanceof", KEYWORD_instanceof },
191 { "insteadof", KEYWORD_insteadof },
192 { "interface", KEYWORD_interface },
193 { "namespace", KEYWORD_namespace },
194 { "new", KEYWORD_new },
195 { "or", KEYWORD_or },
196 { "print", KEYWORD_print },
197 { "private", KEYWORD_private },
198 { "protected", KEYWORD_protected },
199 { "public", KEYWORD_public },
200 { "require", KEYWORD_require },
201 { "require_once", KEYWORD_require_once },
202 { "return", KEYWORD_return },
203 { "static", KEYWORD_static },
204 { "switch", KEYWORD_switch },
205 { "throw", KEYWORD_throw },
206 { "trait", KEYWORD_trait },
207 { "try", KEYWORD_try },
208 { "use", KEYWORD_use },
209 { "var", KEYWORD_var },
210 { "while", KEYWORD_while },
211 { "xor", KEYWORD_xor },
212 { "yield", KEYWORD_yield }
213 };
214
215
216 typedef enum eTokenType {
217 TOKEN_UNDEFINED,
218 TOKEN_EOF,
219 TOKEN_CHARACTER,
220 TOKEN_CLOSE_PAREN,
221 TOKEN_SEMICOLON,
222 TOKEN_COLON,
223 TOKEN_COMMA,
224 TOKEN_KEYWORD,
225 TOKEN_OPEN_PAREN,
226 TOKEN_OPERATOR,
227 TOKEN_IDENTIFIER,
228 TOKEN_STRING,
229 TOKEN_PERIOD,
230 TOKEN_OPEN_CURLY,
231 TOKEN_CLOSE_CURLY,
232 TOKEN_EQUAL_SIGN,
233 TOKEN_OPEN_SQUARE,
234 TOKEN_CLOSE_SQUARE,
235 TOKEN_VARIABLE,
236 TOKEN_AMPERSAND,
237 TOKEN_BACKSLASH,
238 TOKEN_QMARK,
239 } tokenType;
240
241 typedef struct {
242 tokenType type;
243 keywordId keyword;
244 vString * string;
245 vString * scope;
246 unsigned long lineNumber;
247 MIOPos filePosition;
248 int parentKind; /* -1 if none */
249 bool anonymous; /* true if token specifies
250 * an anonymous class */
251 } tokenInfo;
252
253 static langType Lang_php;
254 static langType Lang_zephir;
255
256 static bool InPhp = false; /* whether we are between <? ?> */
257 /* whether the next token may be a keyword, e.g. not after "::" or "->" */
258 static bool MayBeKeyword = true;
259
260 /* current statement details */
261 static struct {
262 accessType access;
263 implType impl;
264 } CurrentStatement;
265
266 /* Current namespace */
267 static vString *CurrentNamesapce;
268 /* Cache variable to build the tag's scope. It has no real meaning outside
269 * of initPhpEntry()'s scope. */
270 static vString *FullScope;
271 /* The class name specified at "extends" keyword in the current class
272 * definition. Used to resolve "parent" in return type. */
273 static vString *ParentClass;
274
275 static objPool *TokenPool = NULL;
276
phpScopeSeparatorFor(int kind,int upperScopeKind)277 static const char *phpScopeSeparatorFor (int kind, int upperScopeKind)
278 {
279 return scopeSeparatorFor (getInputLanguage(), kind, upperScopeKind);
280 }
281
accessToString(const accessType access)282 static const char *accessToString (const accessType access)
283 {
284 static const char *const names[COUNT_ACCESS] = {
285 "undefined",
286 "private",
287 "protected",
288 "public"
289 };
290
291 Assert (access < COUNT_ACCESS);
292
293 return names[access];
294 }
295
implToString(const implType impl)296 static const char *implToString (const implType impl)
297 {
298 static const char *const names[COUNT_IMPL] = {
299 "undefined",
300 "abstract"
301 };
302
303 Assert (impl < COUNT_IMPL);
304
305 return names[impl];
306 }
307
initPhpEntry(tagEntryInfo * const e,const tokenInfo * const token,const phpKind kind,const accessType access)308 static void initPhpEntry (tagEntryInfo *const e, const tokenInfo *const token,
309 const phpKind kind, const accessType access)
310 {
311 int parentKind = -1;
312
313 vStringClear (FullScope);
314
315 if (vStringLength (CurrentNamesapce) > 0)
316 {
317 parentKind = K_NAMESPACE;
318 vStringCat (FullScope, CurrentNamesapce);
319
320 }
321
322 initTagEntry (e, vStringValue (token->string), kind);
323
324 e->lineNumber = token->lineNumber;
325 e->filePosition = token->filePosition;
326
327 if (access != ACCESS_UNDEFINED)
328 e->extensionFields.access = accessToString (access);
329 if (vStringLength (token->scope) > 0)
330 {
331 parentKind = token->parentKind;
332
333 if (vStringLength (FullScope) > 0)
334 {
335 const char* sep;
336
337 sep = phpScopeSeparatorFor (parentKind,
338 K_NAMESPACE);
339 vStringCatS (FullScope, sep);
340 }
341 vStringCat (FullScope, token->scope);
342 }
343 if (vStringLength (FullScope) > 0)
344 {
345 Assert (parentKind >= 0);
346
347 e->extensionFields.scopeKindIndex = parentKind;
348 e->extensionFields.scopeName = vStringValue (FullScope);
349 }
350
351 if (token->anonymous)
352 markTagExtraBit (e, XTAG_ANONYMOUS);
353 }
354
makePhpTagEntry(tagEntryInfo * const e)355 static void makePhpTagEntry (tagEntryInfo *const e)
356 {
357 makeTagEntry (e);
358 makeQualifiedTagEntry (e);
359 }
360
fillTypeRefField(tagEntryInfo * const e,const vString * const rtype,const tokenInfo * const token)361 static void fillTypeRefField (tagEntryInfo *const e,
362 const vString *const rtype, const tokenInfo *const token)
363 {
364 if ((vStringLength (rtype) == 4)
365 && (strcmp (vStringValue (rtype), "self") == 0)
366 && vStringLength (token->scope) > 0)
367 {
368 if (token->parentKind == -1)
369 e->extensionFields.typeRef [0] = "unknown";
370 else
371 e->extensionFields.typeRef [0] = PhpKinds [token->parentKind].name;
372 e->extensionFields.typeRef [1] = vStringValue (token->scope);
373 }
374 else if ((vStringLength (rtype) == 6)
375 && (strcmp (vStringValue (rtype), "parent") == 0)
376 && (ParentClass && vStringLength (ParentClass) > 0))
377 {
378 e->extensionFields.typeRef [0] = "class";
379 e->extensionFields.typeRef [1] = vStringValue (ParentClass);
380 }
381 else
382 {
383 e->extensionFields.typeRef [0] = "unknown";
384 e->extensionFields.typeRef [1] = vStringValue (rtype);
385 }
386 }
387
makeTypedPhpTag(const tokenInfo * const token,const phpKind kind,const accessType access,vString * typeName)388 static void makeTypedPhpTag (const tokenInfo *const token, const phpKind kind,
389 const accessType access, vString* typeName)
390 {
391 if (PhpKinds[kind].enabled)
392 {
393 tagEntryInfo e;
394
395 initPhpEntry (&e, token, kind, access);
396 if (typeName)
397 fillTypeRefField (&e, typeName, token);
398 makePhpTagEntry (&e);
399 }
400 }
401
makeSimplePhpTag(const tokenInfo * const token,const phpKind kind,const accessType access)402 static void makeSimplePhpTag (const tokenInfo *const token, const phpKind kind,
403 const accessType access)
404 {
405 makeTypedPhpTag (token, kind, access, NULL);
406 }
407
makeNamespacePhpTag(const tokenInfo * const token,const vString * const name)408 static void makeNamespacePhpTag (const tokenInfo *const token, const vString *const name)
409 {
410 if (PhpKinds[K_NAMESPACE].enabled)
411 {
412 tagEntryInfo e;
413
414 initTagEntry (&e, vStringValue (name), K_NAMESPACE);
415
416 e.lineNumber = token->lineNumber;
417 e.filePosition = token->filePosition;
418
419 makePhpTagEntry (&e);
420 }
421 }
422
makeClassOrIfaceTag(const phpKind kind,const tokenInfo * const token,vString * const inheritance,const implType impl)423 static void makeClassOrIfaceTag (const phpKind kind, const tokenInfo *const token,
424 vString *const inheritance, const implType impl)
425 {
426 if (PhpKinds[kind].enabled)
427 {
428 tagEntryInfo e;
429
430 initPhpEntry (&e, token, kind, ACCESS_UNDEFINED);
431
432 if (impl != IMPL_UNDEFINED)
433 e.extensionFields.implementation = implToString (impl);
434 if (vStringLength (inheritance) > 0)
435 e.extensionFields.inheritance = vStringValue (inheritance);
436
437 makePhpTagEntry (&e);
438 }
439 }
440
makeFunctionTag(const tokenInfo * const token,const vString * const arglist,const vString * const rtype,const accessType access,const implType impl)441 static void makeFunctionTag (const tokenInfo *const token,
442 const vString *const arglist,
443 const vString *const rtype,
444 const accessType access, const implType impl)
445 {
446 if (PhpKinds[K_FUNCTION].enabled)
447 {
448 tagEntryInfo e;
449
450 initPhpEntry (&e, token, K_FUNCTION, access);
451
452 if (impl != IMPL_UNDEFINED)
453 e.extensionFields.implementation = implToString (impl);
454 if (arglist)
455 e.extensionFields.signature = vStringValue (arglist);
456 if (rtype)
457 fillTypeRefField (&e, rtype, token);
458
459 makePhpTagEntry (&e);
460 }
461 }
462
newPoolToken(void * createArg CTAGS_ATTR_UNUSED)463 static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED)
464 {
465 tokenInfo *token = xMalloc (1, tokenInfo);
466
467 token->string = vStringNew ();
468 token->scope = vStringNew ();
469 return token;
470 }
471
clearPoolToken(void * data)472 static void clearPoolToken (void *data)
473 {
474 tokenInfo *token = data;
475
476 token->type = TOKEN_UNDEFINED;
477 token->keyword = KEYWORD_NONE;
478 token->lineNumber = getInputLineNumber ();
479 token->filePosition = getInputFilePosition ();
480 token->parentKind = -1;
481 token->anonymous = false;
482 vStringClear (token->string);
483 vStringClear (token->scope);
484 }
485
deletePoolToken(void * data)486 static void deletePoolToken (void *data)
487 {
488 tokenInfo *token = data;
489 vStringDelete (token->string);
490 vStringDelete (token->scope);
491 eFree (token);
492 }
493
copyToken(tokenInfo * const dest,const tokenInfo * const src,bool scope)494 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
495 bool scope)
496 {
497 dest->lineNumber = src->lineNumber;
498 dest->filePosition = src->filePosition;
499 dest->type = src->type;
500 dest->keyword = src->keyword;
501 vStringCopy(dest->string, src->string);
502 dest->parentKind = src->parentKind;
503 if (scope)
504 vStringCopy(dest->scope, src->scope);
505 dest->anonymous = src->anonymous;
506 }
507
508 #if 0
509 #include <stdio.h>
510
511 static const char *tokenTypeName (const tokenType type)
512 {
513 switch (type)
514 {
515 case TOKEN_UNDEFINED: return "undefined";
516 case TOKEN_EOF: return "EOF";
517 case TOKEN_CHARACTER: return "character";
518 case TOKEN_CLOSE_PAREN: return "')'";
519 case TOKEN_SEMICOLON: return "';'";
520 case TOKEN_COLON: return "':'";
521 case TOKEN_COMMA: return "','";
522 case TOKEN_OPEN_PAREN: return "'('";
523 case TOKEN_OPERATOR: return "operator";
524 case TOKEN_IDENTIFIER: return "identifier";
525 case TOKEN_KEYWORD: return "keyword";
526 case TOKEN_STRING: return "string";
527 case TOKEN_PERIOD: return "'.'";
528 case TOKEN_OPEN_CURLY: return "'{'";
529 case TOKEN_CLOSE_CURLY: return "'}'";
530 case TOKEN_EQUAL_SIGN: return "'='";
531 case TOKEN_OPEN_SQUARE: return "'['";
532 case TOKEN_CLOSE_SQUARE: return "']'";
533 case TOKEN_VARIABLE: return "variable";
534 }
535 return NULL;
536 }
537
538 static void printToken (const tokenInfo *const token)
539 {
540 fprintf (stderr, "%p:\n\ttype:\t%s\n\tline:\t%lu\n\tscope:\t%s\n", (void *) token,
541 tokenTypeName (token->type),
542 token->lineNumber,
543 vStringValue (token->scope));
544 switch (token->type)
545 {
546 case TOKEN_IDENTIFIER:
547 case TOKEN_STRING:
548 case TOKEN_VARIABLE:
549 fprintf (stderr, "\tcontent:\t%s\n", vStringValue (token->string));
550 break;
551
552 case TOKEN_KEYWORD:
553 {
554 size_t n = ARRAY_SIZE (PhpKeywordTable);
555 size_t i;
556
557 fprintf (stderr, "\tkeyword:\t");
558 for (i = 0; i < n; i++)
559 {
560 if (PhpKeywordTable[i].id == token->keyword)
561 {
562 fprintf (stderr, "%s\n", PhpKeywordTable[i].name);
563 break;
564 }
565 }
566 if (i >= n)
567 fprintf (stderr, "(unknown)\n");
568 }
569
570 default: break;
571 }
572 }
573 #endif
574
addToScope(tokenInfo * const token,const vString * const extra,int kindOfUpperScope)575 static void addToScope (tokenInfo *const token, const vString *const extra,
576 int kindOfUpperScope)
577 {
578 if (vStringLength (token->scope) > 0)
579 {
580 const char* sep;
581
582 sep = phpScopeSeparatorFor(token->parentKind,
583 kindOfUpperScope);
584 vStringCatS (token->scope, sep);
585 }
586 vStringCat (token->scope, extra);
587 }
588
skipToCharacter(const int c)589 static int skipToCharacter (const int c)
590 {
591 int d;
592 do
593 {
594 d = getcFromInputFile ();
595 } while (d != EOF && d != c);
596 return d;
597 }
598
parseString(vString * const string,const int delimiter)599 static void parseString (vString *const string, const int delimiter)
600 {
601 while (true)
602 {
603 int c = getcFromInputFile ();
604
605 if (c == '\\' && (c = getcFromInputFile ()) != EOF)
606 vStringPut (string, (char) c);
607 else if (c == EOF || c == delimiter)
608 break;
609 else
610 vStringPut (string, (char) c);
611 }
612 }
613
614 /* Strips @indent_len characters from lines in @string to get the correct
615 * string value for an indented heredoc (PHP 7.3+).
616 * This doesn't handle invalid values specially and might yield surprising
617 * results with them, but it doesn't really matter as it's invalid anyway. */
stripHeredocIndent(vString * const string,size_t indent_len)618 static void stripHeredocIndent (vString *const string, size_t indent_len)
619 {
620 char *str = vStringValue (string);
621 size_t str_len = vStringLength (string);
622 char *p = str;
623 size_t new_len = str_len;
624 bool at_line_start = true;
625
626 while (*p)
627 {
628 if (at_line_start)
629 {
630 size_t p_len;
631 size_t strip_len;
632
633 p_len = str_len - (p - str);
634 strip_len = p_len < indent_len ? p_len : indent_len;
635 memmove (p, p + strip_len, p_len - strip_len);
636 p += strip_len;
637 new_len -= strip_len;
638 }
639 /* CRLF is already normalized as LF */
640 at_line_start = (*p == '\r' || *p == '\n');
641 p++;
642 }
643 vStringTruncate (string, new_len);
644 }
645
646 /* reads a PHP >= 7.3 HereDoc or a NowDoc (the part after the <<<).
647 * <<<[ \t]*(ID|'ID'|"ID")
648 * ...
649 * [ \t]*ID[^:indent-char:];?
650 *
651 * note that:
652 * 1) starting ID must be immediately followed by a newline;
653 * 2) closing ID is the same as opening one;
654 * 3) closing ID must not be immediately followed by an identifier character;
655 * 4) optional indentation of the closing ID is stripped from body lines,
656 * which lines must have the exact same prefix indentation.
657 *
658 * This is slightly relaxed from PHP < 7.3, where the closing ID had to be the
659 * only thing on its line, with the only exception of a semicolon right after
660 * the ID.
661 *
662 * Example of a single valid heredoc:
663 * <<< FOO
664 * something
665 * something else
666 * FOO_this is not an end
667 * FOO;
668 * # previous line was the end, but the semicolon wasn't required
669 *
670 * Another example using indentation and more code after the heredoc:
671 * <<<FOO
672 * something
673 * something else
674 * FOO . 'hello';
675 * # the heredoc ends at FOO, and leading tabs are stripped from the body.
676 * # ". 'hello'" is a normal concatenation operator and the string "hello".
677 */
parseHeredoc(vString * const string)678 static void parseHeredoc (vString *const string)
679 {
680 int c;
681 unsigned int len;
682 char delimiter[64]; /* arbitrary limit, but more is crazy anyway */
683 int quote = 0;
684
685 do
686 {
687 c = getcFromInputFile ();
688 }
689 while (c == ' ' || c == '\t');
690
691 if (c == '\'' || c == '"')
692 {
693 quote = c;
694 c = getcFromInputFile ();
695 }
696 for (len = 0; len < ARRAY_SIZE (delimiter) - 1; len++)
697 {
698 if (! isIdentChar (c))
699 break;
700 delimiter[len] = (char) c;
701 c = getcFromInputFile ();
702 }
703 delimiter[len] = 0;
704
705 if (len == 0) /* no delimiter, give up */
706 goto error;
707 if (quote)
708 {
709 if (c != quote) /* no closing quote for quoted identifier, give up */
710 goto error;
711 c = getcFromInputFile ();
712 }
713 if (c != '\r' && c != '\n') /* missing newline, give up */
714 goto error;
715
716 do
717 {
718 c = getcFromInputFile ();
719
720 vStringPut (string, (char) c);
721 if (c == '\r' || c == '\n')
722 {
723 /* new line, check for a delimiter right after. No need to handle
724 * CRLF, getcFromInputFile() normalizes it to LF already. */
725 const size_t prev_string_len = vStringLength (string) - 1;
726 size_t indent_len = 0;
727
728 c = getcFromInputFile ();
729 while (c == ' ' || c == '\t')
730 {
731 vStringPut (string, (char) c);
732 c = getcFromInputFile ();
733 indent_len++;
734 }
735
736 for (len = 0; c != 0 && (c - delimiter[len]) == 0; len++)
737 c = getcFromInputFile ();
738
739 if (delimiter[len] != 0)
740 ungetcToInputFile (c);
741 else if (! isIdentChar (c))
742 {
743 /* line start matched the delimiter and has a separator, we're done */
744 ungetcToInputFile (c);
745
746 /* strip trailing newline and indent of the end delimiter */
747 vStringTruncate (string, prev_string_len);
748
749 /* strip indent from the value if needed */
750 if (indent_len > 0)
751 stripHeredocIndent (string, indent_len);
752 break;
753 }
754 /* if we are here it wasn't a delimiter, so put everything in the
755 * string */
756 vStringNCatS (string, delimiter, len);
757 }
758 }
759 while (c != EOF);
760
761 return;
762
763 error:
764 ungetcToInputFile (c);
765 }
766
parseIdentifier(vString * const string,const int firstChar)767 static void parseIdentifier (vString *const string, const int firstChar)
768 {
769 int c = firstChar;
770 do
771 {
772 vStringPut (string, (char) c);
773 c = getcFromInputFile ();
774 } while (isIdentChar (c));
775 ungetcToInputFile (c);
776 }
777
isSpace(int c)778 static bool isSpace (int c)
779 {
780 return (c == '\t' || c == ' ' || c == '\v' ||
781 c == '\n' || c == '\r' || c == '\f');
782 }
783
skipWhitespaces(int c)784 static int skipWhitespaces (int c)
785 {
786 while (isSpace (c))
787 c = getcFromInputFile ();
788 return c;
789 }
790
791 /* <script[:white:]+language[:white:]*=[:white:]*(php|'php'|"php")[:white:]*>
792 *
793 * This is ugly, but the whole "<script language=php>" tag is and we can't
794 * really do better without adding a lot of code only for this */
isOpenScriptLanguagePhp(int c)795 static bool isOpenScriptLanguagePhp (int c)
796 {
797 int quote = 0;
798
799 /* <script[:white:]+language[:white:]*= */
800 if (c != '<' ||
801 tolower ((c = getcFromInputFile ())) != 's' ||
802 tolower ((c = getcFromInputFile ())) != 'c' ||
803 tolower ((c = getcFromInputFile ())) != 'r' ||
804 tolower ((c = getcFromInputFile ())) != 'i' ||
805 tolower ((c = getcFromInputFile ())) != 'p' ||
806 tolower ((c = getcFromInputFile ())) != 't' ||
807 ! isSpace ((c = getcFromInputFile ())) ||
808 tolower ((c = skipWhitespaces (c))) != 'l' ||
809 tolower ((c = getcFromInputFile ())) != 'a' ||
810 tolower ((c = getcFromInputFile ())) != 'n' ||
811 tolower ((c = getcFromInputFile ())) != 'g' ||
812 tolower ((c = getcFromInputFile ())) != 'u' ||
813 tolower ((c = getcFromInputFile ())) != 'a' ||
814 tolower ((c = getcFromInputFile ())) != 'g' ||
815 tolower ((c = getcFromInputFile ())) != 'e' ||
816 (c = skipWhitespaces (getcFromInputFile ())) != '=')
817 return false;
818
819 /* (php|'php'|"php")> */
820 c = skipWhitespaces (getcFromInputFile ());
821 if (c == '"' || c == '\'')
822 {
823 quote = c;
824 c = getcFromInputFile ();
825 }
826 if (tolower (c) != 'p' ||
827 tolower ((c = getcFromInputFile ())) != 'h' ||
828 tolower ((c = getcFromInputFile ())) != 'p' ||
829 (quote != 0 && (c = getcFromInputFile ()) != quote) ||
830 (c = skipWhitespaces (getcFromInputFile ())) != '>')
831 return false;
832
833 return true;
834 }
835
findPhpStart(void)836 static int findPhpStart (void)
837 {
838 int c;
839 do
840 {
841 if ((c = getcFromInputFile ()) == '<')
842 {
843 c = getcFromInputFile ();
844 /* <?, <?= and <?php, but not <?xml */
845 if (c == '?')
846 {
847 c = getcFromInputFile ();
848 /* echo tag */
849 if (c == '=')
850 c = getcFromInputFile ();
851 /* don't enter PHP mode on "<?xml", yet still support short open tags (<?) */
852 else if (tolower (c) != 'x' ||
853 tolower ((c = getcFromInputFile ())) != 'm' ||
854 tolower ((c = getcFromInputFile ())) != 'l')
855 {
856 break;
857 }
858 }
859 /* <script language="php"> */
860 else
861 {
862 ungetcToInputFile (c);
863 if (isOpenScriptLanguagePhp ('<'))
864 break;
865 }
866 }
867 }
868 while (c != EOF);
869
870 return c;
871 }
872
skipSingleComment(void)873 static int skipSingleComment (void)
874 {
875 int c;
876 do
877 {
878 c = getcFromInputFile ();
879 /* ?> in single-line comments leaves PHP mode */
880 if (c == '?')
881 {
882 int next = getcFromInputFile ();
883 if (next == '>')
884 InPhp = false;
885 else
886 ungetcToInputFile (next);
887 }
888 } while (InPhp && c != EOF && c != '\n' && c != '\r');
889 return c;
890 }
891
readToken(tokenInfo * const token)892 static void readToken (tokenInfo *const token)
893 {
894 int c;
895 bool nextMayBeKeyword = true;
896
897 token->type = TOKEN_UNDEFINED;
898 token->keyword = KEYWORD_NONE;
899 vStringClear (token->string);
900
901 getNextChar:
902
903 if (! InPhp)
904 {
905 unsigned long startSourceLineNumber = getSourceLineNumber ();
906 unsigned long startLineNumber = startSourceLineNumber;
907 int startLineOffset = getInputLineOffset ();
908
909 c = findPhpStart ();
910 if (c != EOF)
911 InPhp = true;
912
913 unsigned long endLineNumber = getInputLineNumber ();
914 int endLineOffset = getInputLineOffset ();
915
916 if ((startLineNumber != endLineNumber)
917 || (startLineOffset != endLineOffset))
918 makePromise ("HTML", startLineNumber, startLineOffset,
919 endLineNumber, endLineOffset, startSourceLineNumber);
920 }
921 else
922 c = getcFromInputFile ();
923
924 c = skipWhitespaces (c);
925
926 token->lineNumber = getInputLineNumber ();
927 token->filePosition = getInputFilePosition ();
928
929 switch (c)
930 {
931 case EOF: token->type = TOKEN_EOF; break;
932 case '(': token->type = TOKEN_OPEN_PAREN; break;
933 case ')': token->type = TOKEN_CLOSE_PAREN; break;
934 case ';': token->type = TOKEN_SEMICOLON; break;
935 case ',': token->type = TOKEN_COMMA; break;
936 case '.': token->type = TOKEN_PERIOD; break;
937 case '{': token->type = TOKEN_OPEN_CURLY; break;
938 case '}': token->type = TOKEN_CLOSE_CURLY; break;
939 case '[': token->type = TOKEN_OPEN_SQUARE; break;
940 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
941 case '&': token->type = TOKEN_AMPERSAND; break;
942 case '\\': token->type = TOKEN_BACKSLASH; break;
943
944 case ':':
945 {
946 int d = getcFromInputFile ();
947 if (d == c) /* :: */
948 {
949 nextMayBeKeyword = false;
950 token->type = TOKEN_OPERATOR;
951 }
952 else
953 {
954 ungetcToInputFile (d);
955 token->type = TOKEN_COLON;
956 }
957 break;
958 }
959
960 case '=':
961 {
962 int d = getcFromInputFile ();
963 if (d == '=' || d == '>')
964 token->type = TOKEN_OPERATOR;
965 else
966 {
967 ungetcToInputFile (d);
968 token->type = TOKEN_EQUAL_SIGN;
969 }
970 break;
971 }
972
973 case '\'':
974 case '"':
975 token->type = TOKEN_STRING;
976 parseString (token->string, c);
977 token->lineNumber = getInputLineNumber ();
978 token->filePosition = getInputFilePosition ();
979 break;
980
981 case '<':
982 {
983 int d = getcFromInputFile ();
984 if (d == '/')
985 {
986 /* </script[:white:]*> */
987 if (tolower ((d = getcFromInputFile ())) == 's' &&
988 tolower ((d = getcFromInputFile ())) == 'c' &&
989 tolower ((d = getcFromInputFile ())) == 'r' &&
990 tolower ((d = getcFromInputFile ())) == 'i' &&
991 tolower ((d = getcFromInputFile ())) == 'p' &&
992 tolower ((d = getcFromInputFile ())) == 't' &&
993 (d = skipWhitespaces (getcFromInputFile ())) == '>')
994 {
995 InPhp = false;
996 goto getNextChar;
997 }
998 else
999 {
1000 ungetcToInputFile (d);
1001 token->type = TOKEN_UNDEFINED;
1002 }
1003 }
1004 else if (d == '<' && (d = getcFromInputFile ()) == '<')
1005 {
1006 token->type = TOKEN_STRING;
1007 parseHeredoc (token->string);
1008 }
1009 else
1010 {
1011 ungetcToInputFile (d);
1012 token->type = TOKEN_UNDEFINED;
1013 }
1014 break;
1015 }
1016
1017 case '#': /* comment */
1018 skipSingleComment ();
1019 goto getNextChar;
1020 break;
1021
1022 case '+':
1023 case '-':
1024 case '*':
1025 case '%':
1026 {
1027 int d = getcFromInputFile ();
1028 if (c == '-' && d == '>')
1029 nextMayBeKeyword = false;
1030 else if (d != '=')
1031 ungetcToInputFile (d);
1032 token->type = TOKEN_OPERATOR;
1033 break;
1034 }
1035
1036 case '/': /* division or comment start */
1037 {
1038 int d = getcFromInputFile ();
1039 if (d == '/') /* single-line comment */
1040 {
1041 skipSingleComment ();
1042 goto getNextChar;
1043 }
1044 else if (d == '*')
1045 {
1046 do
1047 {
1048 c = skipToCharacter ('*');
1049 if (c != EOF)
1050 {
1051 c = getcFromInputFile ();
1052 if (c == '/')
1053 break;
1054 else
1055 ungetcToInputFile (c);
1056 }
1057 } while (c != EOF && c != '\0');
1058 goto getNextChar;
1059 }
1060 else
1061 {
1062 if (d != '=')
1063 ungetcToInputFile (d);
1064 token->type = TOKEN_OPERATOR;
1065 }
1066 break;
1067 }
1068
1069 case '$': /* variable start */
1070 {
1071 int d = getcFromInputFile ();
1072 if (! isIdentChar (d))
1073 {
1074 ungetcToInputFile (d);
1075 token->type = TOKEN_UNDEFINED;
1076 }
1077 else
1078 {
1079 parseIdentifier (token->string, d);
1080 token->type = TOKEN_VARIABLE;
1081 }
1082 break;
1083 }
1084
1085 case '?': /* maybe the end of the PHP chunk */
1086 {
1087 int d = getcFromInputFile ();
1088 if (d == '>')
1089 {
1090 InPhp = false;
1091 goto getNextChar;
1092 }
1093 else
1094 {
1095 ungetcToInputFile (d);
1096 token->type = TOKEN_QMARK;
1097 }
1098 break;
1099 }
1100
1101 default:
1102 if (! isIdentChar (c))
1103 token->type = TOKEN_UNDEFINED;
1104 else
1105 {
1106 parseIdentifier (token->string, c);
1107 if (MayBeKeyword)
1108 token->keyword = lookupCaseKeyword (vStringValue (token->string), getInputLanguage ());
1109 else
1110 token->keyword = KEYWORD_NONE;
1111
1112 if (token->keyword == KEYWORD_NONE)
1113 token->type = TOKEN_IDENTIFIER;
1114 else
1115 token->type = TOKEN_KEYWORD;
1116 }
1117 break;
1118 }
1119
1120 if (token->type == TOKEN_SEMICOLON ||
1121 token->type == TOKEN_OPEN_CURLY ||
1122 token->type == TOKEN_CLOSE_CURLY)
1123 {
1124 /* reset current statement details on statement end, and when entering
1125 * a deeper scope.
1126 * it is a bit ugly to do this in readToken(), but it makes everything
1127 * a lot simpler. */
1128 CurrentStatement.access = ACCESS_UNDEFINED;
1129 CurrentStatement.impl = IMPL_UNDEFINED;
1130 }
1131
1132 MayBeKeyword = nextMayBeKeyword;
1133 }
1134
readQualifiedName(tokenInfo * const token,vString * name,tokenInfo * const lastToken)1135 static void readQualifiedName (tokenInfo *const token, vString *name,
1136 tokenInfo *const lastToken)
1137 {
1138 while (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_BACKSLASH)
1139 {
1140 if (name)
1141 {
1142 if (token->type == TOKEN_BACKSLASH)
1143 vStringPut (name, '\\');
1144 else
1145 vStringCat (name, token->string);
1146 }
1147 if (lastToken)
1148 copyToken (lastToken, token, true);
1149 readToken (token);
1150 }
1151 }
1152
1153 static void enterScope (tokenInfo *const parentToken,
1154 const vString *const extraScope,
1155 const int parentKind);
1156
skipOverParens(tokenInfo * token)1157 static void skipOverParens (tokenInfo *token)
1158 {
1159 if (token->type == TOKEN_OPEN_PAREN)
1160 {
1161 int depth = 1;
1162
1163 do
1164 {
1165 readToken (token);
1166 switch (token->type)
1167 {
1168 case TOKEN_OPEN_PAREN: depth++; break;
1169 case TOKEN_CLOSE_PAREN: depth--; break;
1170 default: break;
1171 }
1172 }
1173 while (token->type != TOKEN_EOF && depth > 0);
1174
1175 readToken (token);
1176 }
1177 }
1178
1179 /* parses a class or an interface:
1180 * class Foo {}
1181 * class Foo extends Bar {}
1182 * class Foo extends Bar implements iFoo, iBar {}
1183 * interface iFoo {}
1184 * interface iBar extends iFoo {}
1185 *
1186 * if @name is not NULL, parses an anonymous class with name @name
1187 * new class {}
1188 * new class(1, 2) {}
1189 * new class(1, 2) extends Foo implements iFoo, iBar {} */
parseClassOrIface(tokenInfo * const token,const phpKind kind,const tokenInfo * name)1190 static bool parseClassOrIface (tokenInfo *const token, const phpKind kind,
1191 const tokenInfo *name)
1192 {
1193 bool readNext = true;
1194 implType impl = CurrentStatement.impl;
1195 tokenInfo *nameFree = NULL;
1196 vString *inheritance = NULL;
1197 vString *parent = NULL;
1198
1199 readToken (token);
1200 if (name) /* anonymous class */
1201 {
1202 /* skip possible construction arguments */
1203 skipOverParens (token);
1204 }
1205 else /* normal, named class */
1206 {
1207 if (token->type != TOKEN_IDENTIFIER)
1208 return false;
1209
1210 name = nameFree = newToken ();
1211 copyToken (nameFree, token, true);
1212
1213 readToken (token);
1214 }
1215
1216 inheritance = vStringNew ();
1217 /* read every identifiers, keywords and commas, and assume each
1218 * identifier (not keyword) is an inheritance
1219 * (like in "class Foo extends Bar implements iA, iB") */
1220 enum { inheritance_initial,
1221 inheritance_extends,
1222 inheritance_implements
1223 } istat = inheritance_initial;
1224 while (token->type == TOKEN_IDENTIFIER ||
1225 token->type == TOKEN_BACKSLASH ||
1226 token->type == TOKEN_KEYWORD ||
1227 token->type == TOKEN_COMMA)
1228 {
1229 if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_BACKSLASH)
1230 {
1231 vString *qualifiedName = vStringNew ();
1232
1233 readQualifiedName (token, qualifiedName, NULL);
1234 if (vStringLength (inheritance) > 0)
1235 vStringPut (inheritance, ',');
1236 vStringCat (inheritance, qualifiedName);
1237 if (istat == inheritance_extends && !parent)
1238 parent = qualifiedName;
1239 else
1240 vStringDelete (qualifiedName);
1241 }
1242 else
1243 {
1244 if (token->type == TOKEN_KEYWORD)
1245 {
1246 if (token->keyword == KEYWORD_extends)
1247 istat = inheritance_extends;
1248 else if (token->keyword == KEYWORD_implements)
1249 istat = inheritance_implements;
1250 }
1251 readToken (token);
1252 }
1253 }
1254
1255 makeClassOrIfaceTag (kind, name, inheritance, impl);
1256
1257 if (token->type == TOKEN_OPEN_CURLY)
1258 {
1259 vString *backup = ParentClass;
1260 ParentClass = parent;
1261 enterScope (token, name->string, kind);
1262 ParentClass = backup;
1263 }
1264 else
1265 readNext = false;
1266
1267 if (nameFree)
1268 deleteToken (nameFree);
1269 vStringDelete (parent);
1270 vStringDelete (inheritance);
1271
1272 return readNext;
1273 }
1274
1275 /* parses a trait:
1276 * trait Foo {} */
parseTrait(tokenInfo * const token)1277 static bool parseTrait (tokenInfo *const token)
1278 {
1279 bool readNext = true;
1280 tokenInfo *name;
1281
1282 readToken (token);
1283 if (token->type != TOKEN_IDENTIFIER)
1284 return false;
1285
1286 name = newToken ();
1287 copyToken (name, token, true);
1288
1289 makeSimplePhpTag (name, K_TRAIT, ACCESS_UNDEFINED);
1290
1291 readToken (token);
1292 if (token->type == TOKEN_OPEN_CURLY)
1293 enterScope (token, name->string, K_TRAIT);
1294 else
1295 readNext = false;
1296
1297 deleteToken (name);
1298
1299 return readNext;
1300 }
1301
1302 /* parse a function
1303 *
1304 * if @name is NULL, parses a normal function
1305 * function myfunc($foo, $bar) {}
1306 * function &myfunc($foo, $bar) {}
1307 * function myfunc($foo, $bar) : type {}
1308 * function myfunc($foo, $bar) : ?type {}
1309 *
1310 * if @name is not NULL, parses an anonymous function with name @name
1311 * $foo = function($foo, $bar) {}
1312 * $foo = function&($foo, $bar) {}
1313 * $foo = function($foo, $bar) use ($x, &$y) {}
1314 * $foo = function($foo, $bar) use ($x, &$y) : type {}
1315 * $foo = function($foo, $bar) use ($x, &$y) : ?type {} */
parseFunction(tokenInfo * const token,const tokenInfo * name)1316 static bool parseFunction (tokenInfo *const token, const tokenInfo *name)
1317 {
1318 bool readNext = true;
1319 accessType access = CurrentStatement.access;
1320 implType impl = CurrentStatement.impl;
1321 tokenInfo *nameFree = NULL;
1322 vString *rtype = NULL;
1323 vString *arglist = NULL;
1324
1325 readToken (token);
1326 /* skip a possible leading ampersand (return by reference) */
1327 if (token->type == TOKEN_AMPERSAND)
1328 readToken (token);
1329
1330 if (! name)
1331 {
1332 if (token->type != TOKEN_IDENTIFIER && token->type != TOKEN_KEYWORD)
1333 return false;
1334
1335 name = nameFree = newToken ();
1336 copyToken (nameFree, token, true);
1337 readToken (token);
1338 }
1339
1340 if (token->type == TOKEN_OPEN_PAREN)
1341 {
1342 int depth = 1;
1343
1344 arglist = vStringNew ();
1345 vStringPut (arglist, '(');
1346 do
1347 {
1348 readToken (token);
1349
1350 switch (token->type)
1351 {
1352 case TOKEN_OPEN_PAREN: depth++; break;
1353 case TOKEN_CLOSE_PAREN: depth--; break;
1354 default: break;
1355 }
1356 /* display part */
1357 switch (token->type)
1358 {
1359 case TOKEN_AMPERSAND: vStringPut (arglist, '&'); break;
1360 case TOKEN_CLOSE_CURLY: vStringPut (arglist, '}'); break;
1361 case TOKEN_CLOSE_PAREN: vStringPut (arglist, ')'); break;
1362 case TOKEN_CLOSE_SQUARE: vStringPut (arglist, ']'); break;
1363 case TOKEN_COLON: vStringPut (arglist, ':'); break;
1364 case TOKEN_COMMA: vStringCatS (arglist, ", "); break;
1365 case TOKEN_EQUAL_SIGN: vStringCatS (arglist, " = "); break;
1366 case TOKEN_OPEN_CURLY: vStringPut (arglist, '{'); break;
1367 case TOKEN_OPEN_PAREN: vStringPut (arglist, '('); break;
1368 case TOKEN_OPEN_SQUARE: vStringPut (arglist, '['); break;
1369 case TOKEN_PERIOD: vStringPut (arglist, '.'); break;
1370 case TOKEN_SEMICOLON: vStringPut (arglist, ';'); break;
1371 case TOKEN_BACKSLASH: vStringPut (arglist, '\\'); break;
1372 case TOKEN_STRING:
1373 {
1374 vStringPut (arglist, '\'');
1375 vStringCat (arglist, token->string);
1376 vStringPut (arglist, '\'');
1377 break;
1378 }
1379
1380 case TOKEN_IDENTIFIER:
1381 case TOKEN_KEYWORD:
1382 case TOKEN_VARIABLE:
1383 {
1384 switch (vStringLast (arglist))
1385 {
1386 case 0:
1387 case ' ':
1388 case '{':
1389 case '(':
1390 case '[':
1391 case '.':
1392 case '\\':
1393 /* no need for a space between those and the identifier */
1394 break;
1395
1396 default:
1397 vStringPut (arglist, ' ');
1398 break;
1399 }
1400 if (token->type == TOKEN_VARIABLE)
1401 vStringPut (arglist, '$');
1402 vStringCat (arglist, token->string);
1403 break;
1404 }
1405
1406 default: break;
1407 }
1408 }
1409 while (token->type != TOKEN_EOF && depth > 0);
1410
1411 readToken (token); /* normally it's an open brace or "use" keyword */
1412 }
1413
1414 /* skip use(...) */
1415 if (token->type == TOKEN_KEYWORD && token->keyword == KEYWORD_use)
1416 {
1417 readToken (token);
1418 skipOverParens (token);
1419 }
1420
1421 /* PHP7 return type declaration or if parsing Zephir, gather function return
1422 * type hint to fill typeRef. */
1423 if ((getInputLanguage () == Lang_php && token->type == TOKEN_COLON) ||
1424 (getInputLanguage () == Lang_zephir && token->type == TOKEN_OPERATOR))
1425 {
1426 if (arglist)
1427 rtype = vStringNew ();
1428
1429 readToken (token);
1430 if (token->type == TOKEN_QMARK)
1431 {
1432 if (rtype)
1433 vStringPut (rtype, '?');
1434 readToken (token);
1435 }
1436 readQualifiedName (token, rtype, NULL);
1437
1438 if (rtype && vStringIsEmpty (rtype))
1439 {
1440 vStringDelete (rtype);
1441 rtype = NULL;
1442 }
1443 }
1444
1445 if (arglist)
1446 makeFunctionTag (name, arglist, rtype, access, impl);
1447
1448 if (token->type == TOKEN_OPEN_CURLY)
1449 enterScope (token, name->string, K_FUNCTION);
1450 else
1451 readNext = false;
1452
1453 vStringDelete (rtype);
1454 vStringDelete (arglist);
1455 if (nameFree)
1456 deleteToken (nameFree);
1457
1458 return readNext;
1459 }
1460
1461 /* parses declarations of the form
1462 * const NAME = VALUE */
parseConstant(tokenInfo * const token)1463 static bool parseConstant (tokenInfo *const token)
1464 {
1465 tokenInfo *name;
1466
1467 readToken (token); /* skip const keyword */
1468 if (token->type != TOKEN_IDENTIFIER && token->type != TOKEN_KEYWORD)
1469 return false;
1470
1471 name = newToken ();
1472 copyToken (name, token, true);
1473
1474 readToken (token);
1475 if (token->type == TOKEN_EQUAL_SIGN)
1476 makeSimplePhpTag (name, K_DEFINE, ACCESS_UNDEFINED);
1477
1478 deleteToken (name);
1479
1480 return token->type == TOKEN_EQUAL_SIGN;
1481 }
1482
1483 /* parses declarations of the form
1484 * define('NAME', 'VALUE')
1485 * define(NAME, 'VALUE) */
parseDefine(tokenInfo * const token)1486 static bool parseDefine (tokenInfo *const token)
1487 {
1488 int depth = 1;
1489
1490 readToken (token); /* skip "define" identifier */
1491 if (token->type != TOKEN_OPEN_PAREN)
1492 return false;
1493
1494 readToken (token);
1495 if (token->type == TOKEN_STRING ||
1496 token->type == TOKEN_IDENTIFIER)
1497 {
1498 makeSimplePhpTag (token, K_DEFINE, ACCESS_UNDEFINED);
1499 readToken (token);
1500 }
1501
1502 /* skip until the close parenthesis.
1503 * no need to handle nested blocks since they would be invalid
1504 * in this context anyway (the VALUE may only be a scalar, like
1505 * 42
1506 * (42)
1507 * and alike) */
1508 while (token->type != TOKEN_EOF && depth > 0)
1509 {
1510 switch (token->type)
1511 {
1512 case TOKEN_OPEN_PAREN: depth++; break;
1513 case TOKEN_CLOSE_PAREN: depth--; break;
1514 default: break;
1515 }
1516 readToken (token);
1517 }
1518
1519 return false;
1520 }
1521
1522 /* parses declarations of the form
1523 * use Foo
1524 * use Foo\Bar\Class
1525 * use Foo\Bar\Class as FooBarClass
1526 * use function Foo\Bar\func
1527 * use function Foo\Bar\func as foobarfunc
1528 * use const Foo\Bar\CONST
1529 * use const Foo\Bar\CONST as FOOBARCONST
1530 * use Foo, Bar
1531 * use Foo, Bar as Baz
1532 * use Foo as Test, Bar as Baz
1533 * use Foo\{Bar, Baz as Child, Nested\Other, Even\More as Something} */
parseUse(tokenInfo * const token)1534 static bool parseUse (tokenInfo *const token)
1535 {
1536 bool readNext = false;
1537 /* we can't know the use type, because class, interface and namespaces
1538 * aliases are the same, and the only difference is the referenced name's
1539 * type */
1540 const char *refType = "unknown";
1541 vString *refName = vStringNew ();
1542 tokenInfo *nameToken = newToken ();
1543 bool grouped = false;
1544
1545 readToken (token); /* skip use keyword itself */
1546 if (token->type == TOKEN_KEYWORD && (token->keyword == KEYWORD_function ||
1547 token->keyword == KEYWORD_const))
1548 {
1549 switch (token->keyword)
1550 {
1551 case KEYWORD_function: refType = PhpKinds[K_FUNCTION].name; break;
1552 case KEYWORD_const: refType = PhpKinds[K_DEFINE].name; break;
1553 default: break; /* silence compilers */
1554 }
1555 readNext = true;
1556 }
1557
1558 if (readNext)
1559 readToken (token);
1560
1561 readQualifiedName (token, refName, nameToken);
1562 grouped = readNext = (token->type == TOKEN_OPEN_CURLY);
1563
1564 do
1565 {
1566 size_t refNamePrefixLength = grouped ? vStringLength (refName) : 0;
1567
1568 /* if it's either not the first name in a comma-separated list, or we
1569 * are in a grouped alias and need to read the leaf name */
1570 if (readNext)
1571 {
1572 readToken (token);
1573 /* in case of a trailing comma (or an empty group) */
1574 if (token->type == TOKEN_CLOSE_CURLY)
1575 break;
1576 readQualifiedName (token, refName, nameToken);
1577 }
1578
1579 if (token->type == TOKEN_KEYWORD && token->keyword == KEYWORD_as)
1580 {
1581 readToken (token);
1582 copyToken (nameToken, token, true);
1583 readToken (token);
1584 }
1585
1586 if (nameToken->type == TOKEN_IDENTIFIER && PhpKinds[K_ALIAS].enabled)
1587 {
1588 tagEntryInfo entry;
1589
1590 initPhpEntry (&entry, nameToken, K_ALIAS, ACCESS_UNDEFINED);
1591
1592 entry.extensionFields.typeRef[0] = refType;
1593 entry.extensionFields.typeRef[1] = vStringValue (refName);
1594
1595 makePhpTagEntry (&entry);
1596 }
1597
1598 vStringTruncate (refName, refNamePrefixLength);
1599
1600 readNext = true;
1601 }
1602 while (token->type == TOKEN_COMMA);
1603
1604 if (grouped && token->type == TOKEN_CLOSE_CURLY)
1605 readToken (token);
1606
1607 vStringDelete (refName);
1608 deleteToken (nameToken);
1609
1610 return (token->type == TOKEN_SEMICOLON);
1611 }
1612
1613 /* parses declarations of the form
1614 * $var = VALUE
1615 * $var; */
parseVariable(tokenInfo * const token,vString * typeName)1616 static bool parseVariable (tokenInfo *const token, vString * typeName)
1617 {
1618 tokenInfo *name;
1619 bool readNext = true;
1620 accessType access = CurrentStatement.access;
1621
1622 name = newToken ();
1623 copyToken (name, token, true);
1624
1625 readToken (token);
1626 if (token->type == TOKEN_EQUAL_SIGN)
1627 {
1628 phpKind kind = K_VARIABLE;
1629
1630 if (token->parentKind == K_FUNCTION)
1631 kind = K_LOCAL_VARIABLE;
1632
1633 readToken (token);
1634 if (token->type == TOKEN_KEYWORD &&
1635 token->keyword == KEYWORD_function &&
1636 PhpKinds[kind].enabled)
1637 {
1638 if (parseFunction (token, name))
1639 readToken (token);
1640 readNext = (bool) (token->type == TOKEN_SEMICOLON);
1641 }
1642 else
1643 {
1644 makeSimplePhpTag (name, kind, access);
1645 readNext = false;
1646 }
1647 }
1648 else if (token->type == TOKEN_SEMICOLON)
1649 {
1650 /* generate tags for variable declarations in classes
1651 * class Foo {
1652 * protected $foo;
1653 * }
1654 * but don't get fooled by stuff like $foo = $bar; */
1655 if (token->parentKind == K_CLASS ||
1656 token->parentKind == K_INTERFACE ||
1657 token->parentKind == K_TRAIT)
1658 makeTypedPhpTag (name, K_VARIABLE, access, typeName);
1659 }
1660 else
1661 readNext = false;
1662
1663 deleteToken (name);
1664
1665 return readNext;
1666 }
1667
1668 /* parses namespace declarations
1669 * namespace Foo {}
1670 * namespace Foo\Bar {}
1671 * namespace Foo;
1672 * namespace Foo\Bar;
1673 * namespace;
1674 * namespace {} */
parseNamespace(tokenInfo * const token)1675 static bool parseNamespace (tokenInfo *const token)
1676 {
1677 tokenInfo *nsToken = newToken ();
1678
1679 vStringClear (CurrentNamesapce);
1680 copyToken (nsToken, token, false);
1681
1682 do
1683 {
1684 readToken (token);
1685 if (token->type == TOKEN_IDENTIFIER)
1686 {
1687 if (vStringLength (CurrentNamesapce) > 0)
1688 {
1689 const char *sep;
1690
1691 sep = phpScopeSeparatorFor(K_NAMESPACE,
1692 K_NAMESPACE);
1693 vStringCatS (CurrentNamesapce, sep);
1694 }
1695 vStringCat (CurrentNamesapce, token->string);
1696 }
1697 }
1698 while (token->type != TOKEN_EOF &&
1699 token->type != TOKEN_SEMICOLON &&
1700 token->type != TOKEN_OPEN_CURLY);
1701
1702 if (vStringLength (CurrentNamesapce) > 0)
1703 makeNamespacePhpTag (nsToken, CurrentNamesapce);
1704
1705 if (token->type == TOKEN_OPEN_CURLY)
1706 enterScope (token, NULL, -1);
1707
1708 deleteToken (nsToken);
1709
1710 return true;
1711 }
1712
enterScope(tokenInfo * const parentToken,const vString * const extraScope,const int parentKind)1713 static void enterScope (tokenInfo *const parentToken,
1714 const vString *const extraScope,
1715 const int parentKind)
1716 {
1717 tokenInfo *token = newToken ();
1718 vString *typeName = vStringNew ();
1719 int origParentKind = parentToken->parentKind;
1720
1721 copyToken (token, parentToken, true);
1722
1723 if (extraScope)
1724 {
1725 token->parentKind = parentKind;
1726 addToScope (token, extraScope, origParentKind);
1727 }
1728
1729 readToken (token);
1730 while (token->type != TOKEN_EOF &&
1731 token->type != TOKEN_CLOSE_CURLY)
1732 {
1733 bool readNext = true;
1734
1735 switch (token->type)
1736 {
1737 case TOKEN_OPEN_CURLY:
1738 enterScope (token, NULL, -1);
1739 break;
1740
1741 case TOKEN_KEYWORD:
1742 switch (token->keyword)
1743 {
1744 /* handle anonymous classes */
1745 case KEYWORD_new:
1746 readToken (token);
1747 if (token->keyword != KEYWORD_class)
1748 readNext = false;
1749 else
1750 {
1751 tokenInfo *name = newToken ();
1752
1753 copyToken (name, token, true);
1754 anonGenerate (name->string, "AnonymousClass", K_CLASS);
1755 name->anonymous = true;
1756 readNext = parseClassOrIface (token, K_CLASS, name);
1757 deleteToken (name);
1758 }
1759 break;
1760
1761 case KEYWORD_class: readNext = parseClassOrIface (token, K_CLASS, NULL); break;
1762 case KEYWORD_interface: readNext = parseClassOrIface (token, K_INTERFACE, NULL); break;
1763 case KEYWORD_trait: readNext = parseTrait (token); break;
1764 case KEYWORD_function: readNext = parseFunction (token, NULL); break;
1765 case KEYWORD_const: readNext = parseConstant (token); break;
1766 case KEYWORD_define: readNext = parseDefine (token); break;
1767
1768 case KEYWORD_use:
1769 /* aliases are only allowed at root scope, but the keyword
1770 * is also used to i.e. "import" traits into a class */
1771 if (vStringLength (token->scope) == 0)
1772 readNext = parseUse (token);
1773 break;
1774
1775 case KEYWORD_namespace: readNext = parseNamespace (token); break;
1776
1777 case KEYWORD_private: CurrentStatement.access = ACCESS_PRIVATE; break;
1778 case KEYWORD_protected: CurrentStatement.access = ACCESS_PROTECTED; break;
1779 case KEYWORD_public: CurrentStatement.access = ACCESS_PUBLIC; break;
1780 case KEYWORD_var: CurrentStatement.access = ACCESS_PUBLIC; break;
1781
1782 case KEYWORD_abstract: CurrentStatement.impl = IMPL_ABSTRACT; break;
1783
1784 default: break;
1785 }
1786 break;
1787
1788 case TOKEN_QMARK:
1789 vStringClear (typeName);
1790 vStringPut (typeName, '?');
1791 readNext = true;
1792 break;
1793 case TOKEN_IDENTIFIER:
1794 vStringCat (typeName, token->string);
1795 readNext = true;
1796 break;
1797 case TOKEN_VARIABLE:
1798 readNext = parseVariable (token,
1799 vStringIsEmpty(typeName)
1800 ? NULL
1801 : typeName);
1802 vStringClear (typeName);
1803 break;
1804
1805 default: break;
1806 }
1807
1808 if (readNext)
1809 readToken (token);
1810 }
1811
1812 copyToken (parentToken, token, false);
1813 parentToken->parentKind = origParentKind;
1814 vStringDelete (typeName);
1815 deleteToken (token);
1816 }
1817
findTags(bool startsInPhpMode)1818 static void findTags (bool startsInPhpMode)
1819 {
1820 tokenInfo *const token = newToken ();
1821
1822 InPhp = startsInPhpMode;
1823 MayBeKeyword = true;
1824 CurrentStatement.access = ACCESS_UNDEFINED;
1825 CurrentStatement.impl = IMPL_UNDEFINED;
1826 CurrentNamesapce = vStringNew ();
1827 FullScope = vStringNew ();
1828 Assert (ParentClass == NULL);
1829
1830 do
1831 {
1832 enterScope (token, NULL, -1);
1833 }
1834 while (token->type != TOKEN_EOF); /* keep going even with unmatched braces */
1835
1836 vStringDelete (FullScope);
1837 vStringDelete (CurrentNamesapce);
1838 deleteToken (token);
1839 }
1840
findPhpTags(void)1841 static void findPhpTags (void)
1842 {
1843 findTags (false);
1844 }
1845
findZephirTags(void)1846 static void findZephirTags (void)
1847 {
1848 findTags (true);
1849 }
1850
initializePool(void)1851 static void initializePool (void)
1852 {
1853 if (TokenPool == NULL)
1854 TokenPool = objPoolNew (16, newPoolToken, deletePoolToken, clearPoolToken, NULL);
1855 }
1856
initializePhpParser(const langType language)1857 static void initializePhpParser (const langType language)
1858 {
1859 Lang_php = language;
1860 initializePool ();
1861 }
1862
initializeZephirParser(const langType language)1863 static void initializeZephirParser (const langType language)
1864 {
1865 Lang_zephir = language;
1866 initializePool ();
1867 }
1868
finalize(langType language CTAGS_ATTR_UNUSED,bool initialized)1869 static void finalize (langType language CTAGS_ATTR_UNUSED, bool initialized)
1870 {
1871 if (!initialized)
1872 return;
1873
1874 if (TokenPool != NULL)
1875 {
1876 objPoolDelete (TokenPool);
1877 TokenPool = NULL;
1878 }
1879 }
1880
PhpParser(void)1881 extern parserDefinition* PhpParser (void)
1882 {
1883 static const char *const extensions [] = { "php", "php3", "php4", "php5", "php7", "phtml", NULL };
1884 parserDefinition* def = parserNew ("PHP");
1885 def->kindTable = PhpKinds;
1886 def->kindCount = ARRAY_SIZE (PhpKinds);
1887 def->extensions = extensions;
1888 def->parser = findPhpTags;
1889 def->initialize = initializePhpParser;
1890 def->finalize = finalize;
1891 def->keywordTable = PhpKeywordTable;
1892 def->keywordCount = ARRAY_SIZE (PhpKeywordTable);
1893 return def;
1894 }
1895
ZephirParser(void)1896 extern parserDefinition* ZephirParser (void)
1897 {
1898 static const char *const extensions [] = { "zep", NULL };
1899 parserDefinition* def = parserNew ("Zephir");
1900 def->kindTable = PhpKinds;
1901 def->kindCount = ARRAY_SIZE (PhpKinds);
1902 def->extensions = extensions;
1903 def->parser = findZephirTags;
1904 def->initialize = initializeZephirParser;
1905 def->finalize = finalize;
1906 def->keywordTable = PhpKeywordTable;
1907 def->keywordCount = ARRAY_SIZE (PhpKeywordTable);
1908 return def;
1909 }
1910