xref: /Universal-ctags/parsers/php.c (revision f476dfe1d31ba7d25d0a1f50f94a756d3fc6cd34)
1 /*
2 *   Copyright (c) 2013, Colomban Wendling <ban@herbesfolles.org>
3 *
4 *   This source code is released for free distribution under the terms of the
5 *   GNU General Public License version 2 or (at your option) any later version.
6 *
7 *   This module contains code for generating tags for the PHP scripting
8 *   language.
9 *
10 *   The language reference: http://php.net/manual/en/langref.php
11 */
12 
13 /*
14 *   INCLUDE FILES
15 */
16 #include "general.h"  /* must always come first */
17 
18 #include <string.h>
19 
20 #include "parse.h"
21 #include "read.h"
22 #include "vstring.h"
23 #include "keyword.h"
24 #include "entry.h"
25 #include "routines.h"
26 #include "debug.h"
27 #include "objpool.h"
28 #include "promise.h"
29 
30 #define isIdentChar(c) (isalnum (c) || (c) == '_' || (c) >= 0x80)
31 #define newToken() (objPoolGet (TokenPool))
32 #define deleteToken(t) (objPoolPut (TokenPool, (t)))
33 
34 enum {
35 	KEYWORD_abstract,
36 	KEYWORD_and,
37 	KEYWORD_as,
38 	KEYWORD_break,
39 	KEYWORD_callable,
40 	KEYWORD_case,
41 	KEYWORD_catch,
42 	KEYWORD_class,
43 	KEYWORD_clone,
44 	KEYWORD_const,
45 	KEYWORD_continue,
46 	KEYWORD_declare,
47 	KEYWORD_define,
48 	KEYWORD_default,
49 	KEYWORD_do,
50 	KEYWORD_echo,
51 	KEYWORD_else,
52 	KEYWORD_elif,
53 	KEYWORD_enddeclare,
54 	KEYWORD_endfor,
55 	KEYWORD_endforeach,
56 	KEYWORD_endif,
57 	KEYWORD_endswitch,
58 	KEYWORD_endwhile,
59 	KEYWORD_extends,
60 	KEYWORD_final,
61 	KEYWORD_finally,
62 	KEYWORD_for,
63 	KEYWORD_foreach,
64 	KEYWORD_function,
65 	KEYWORD_global,
66 	KEYWORD_goto,
67 	KEYWORD_if,
68 	KEYWORD_implements,
69 	KEYWORD_include,
70 	KEYWORD_include_once,
71 	KEYWORD_instanceof,
72 	KEYWORD_insteadof,
73 	KEYWORD_interface,
74 	KEYWORD_namespace,
75 	KEYWORD_new,
76 	KEYWORD_or,
77 	KEYWORD_print,
78 	KEYWORD_private,
79 	KEYWORD_protected,
80 	KEYWORD_public,
81 	KEYWORD_require,
82 	KEYWORD_require_once,
83 	KEYWORD_return,
84 	KEYWORD_static,
85 	KEYWORD_switch,
86 	KEYWORD_throw,
87 	KEYWORD_trait,
88 	KEYWORD_try,
89 	KEYWORD_use,
90 	KEYWORD_var,
91 	KEYWORD_while,
92 	KEYWORD_xor,
93 	KEYWORD_yield
94 };
95 typedef int keywordId; /* to allow KEYWORD_NONE */
96 
97 typedef enum {
98 	ACCESS_UNDEFINED,
99 	ACCESS_PRIVATE,
100 	ACCESS_PROTECTED,
101 	ACCESS_PUBLIC,
102 	COUNT_ACCESS
103 } accessType;
104 
105 typedef enum {
106 	IMPL_UNDEFINED,
107 	IMPL_ABSTRACT,
108 	COUNT_IMPL
109 } implType;
110 
111 typedef enum {
112 	K_CLASS,
113 	K_DEFINE,
114 	K_FUNCTION,
115 	K_INTERFACE,
116 	K_LOCAL_VARIABLE,
117 	K_NAMESPACE,
118 	K_TRAIT,
119 	K_VARIABLE,
120 	K_ALIAS,
121 	COUNT_KIND
122 } phpKind;
123 
124 #define NAMESPACE_SEPARATOR "\\"
125 static scopeSeparator PhpGenericSeparators [] = {
126 	{ K_NAMESPACE        , NAMESPACE_SEPARATOR },
127 	{ KIND_WILDCARD_INDEX, "::" },
128 };
129 
130 static kindDefinition PhpKinds[COUNT_KIND] = {
131 	{ true, 'c', "class",		"classes",
132 	  ATTACH_SEPARATORS(PhpGenericSeparators) },
133 	{ true, 'd', "define",		"constant definitions",
134 	  ATTACH_SEPARATORS(PhpGenericSeparators)},
135 	{ true, 'f', "function",	"functions",
136 	  ATTACH_SEPARATORS(PhpGenericSeparators)},
137 	{ true, 'i', "interface",	"interfaces",
138 	  ATTACH_SEPARATORS(PhpGenericSeparators)},
139 	{ false, 'l', "local",		"local variables",
140 	  ATTACH_SEPARATORS(PhpGenericSeparators)},
141 	{ true, 'n', "namespace",	"namespaces",
142 	  ATTACH_SEPARATORS(PhpGenericSeparators)},
143 	{ true, 't', "trait",		"traits",
144 	  ATTACH_SEPARATORS(PhpGenericSeparators)},
145 	{ true, 'v', "variable",	"variables",
146 	  ATTACH_SEPARATORS(PhpGenericSeparators)},
147 	{ true, 'a', "alias",		"aliases",
148 	  ATTACH_SEPARATORS(PhpGenericSeparators)},
149 };
150 
151 static const keywordTable PhpKeywordTable[] = {
152 	/* keyword			keyword ID */
153 	{ "abstract",		KEYWORD_abstract		},
154 	{ "and",			KEYWORD_and				},
155 	{ "as",				KEYWORD_as				},
156 	{ "break",			KEYWORD_break			},
157 	{ "callable",		KEYWORD_callable		},
158 	{ "case",			KEYWORD_case			},
159 	{ "catch",			KEYWORD_catch			},
160 	{ "cfunction",		KEYWORD_function		}, /* nobody knows what the hell this is, but it seems to behave much like "function" so bind it to it */
161 	{ "class",			KEYWORD_class			},
162 	{ "clone",			KEYWORD_clone			},
163 	{ "const",			KEYWORD_const			},
164 	{ "continue",		KEYWORD_continue		},
165 	{ "declare",		KEYWORD_declare			},
166 	{ "define",			KEYWORD_define			}, /* this isn't really a keyword but we handle it so it's easier this way */
167 	{ "default",		KEYWORD_default			},
168 	{ "do",				KEYWORD_do				},
169 	{ "echo",			KEYWORD_echo			},
170 	{ "else",			KEYWORD_else			},
171 	{ "elseif",			KEYWORD_elif			},
172 	{ "enddeclare",		KEYWORD_enddeclare		},
173 	{ "endfor",			KEYWORD_endfor			},
174 	{ "endforeach",		KEYWORD_endforeach		},
175 	{ "endif",			KEYWORD_endif			},
176 	{ "endswitch",		KEYWORD_endswitch		},
177 	{ "endwhile",		KEYWORD_endwhile		},
178 	{ "extends",		KEYWORD_extends			},
179 	{ "final",			KEYWORD_final			},
180 	{ "finally",		KEYWORD_finally			},
181 	{ "for",			KEYWORD_for				},
182 	{ "foreach",		KEYWORD_foreach			},
183 	{ "function",		KEYWORD_function		},
184 	{ "global",			KEYWORD_global			},
185 	{ "goto",			KEYWORD_goto			},
186 	{ "if",				KEYWORD_if				},
187 	{ "implements",		KEYWORD_implements		},
188 	{ "include",		KEYWORD_include			},
189 	{ "include_once",	KEYWORD_include_once	},
190 	{ "instanceof",		KEYWORD_instanceof		},
191 	{ "insteadof",		KEYWORD_insteadof		},
192 	{ "interface",		KEYWORD_interface		},
193 	{ "namespace",		KEYWORD_namespace		},
194 	{ "new",			KEYWORD_new				},
195 	{ "or",				KEYWORD_or				},
196 	{ "print",			KEYWORD_print			},
197 	{ "private",		KEYWORD_private			},
198 	{ "protected",		KEYWORD_protected		},
199 	{ "public",			KEYWORD_public			},
200 	{ "require",		KEYWORD_require			},
201 	{ "require_once",	KEYWORD_require_once	},
202 	{ "return",			KEYWORD_return			},
203 	{ "static",			KEYWORD_static			},
204 	{ "switch",			KEYWORD_switch			},
205 	{ "throw",			KEYWORD_throw			},
206 	{ "trait",			KEYWORD_trait			},
207 	{ "try",			KEYWORD_try				},
208 	{ "use",			KEYWORD_use				},
209 	{ "var",			KEYWORD_var				},
210 	{ "while",			KEYWORD_while			},
211 	{ "xor",			KEYWORD_xor				},
212 	{ "yield",			KEYWORD_yield			}
213 };
214 
215 
216 typedef enum eTokenType {
217 	TOKEN_UNDEFINED,
218 	TOKEN_EOF,
219 	TOKEN_CHARACTER,
220 	TOKEN_CLOSE_PAREN,
221 	TOKEN_SEMICOLON,
222 	TOKEN_COLON,
223 	TOKEN_COMMA,
224 	TOKEN_KEYWORD,
225 	TOKEN_OPEN_PAREN,
226 	TOKEN_OPERATOR,
227 	TOKEN_IDENTIFIER,
228 	TOKEN_STRING,
229 	TOKEN_PERIOD,
230 	TOKEN_OPEN_CURLY,
231 	TOKEN_CLOSE_CURLY,
232 	TOKEN_EQUAL_SIGN,
233 	TOKEN_OPEN_SQUARE,
234 	TOKEN_CLOSE_SQUARE,
235 	TOKEN_VARIABLE,
236 	TOKEN_AMPERSAND,
237 	TOKEN_BACKSLASH,
238 	TOKEN_QMARK,
239 } tokenType;
240 
241 typedef struct {
242 	tokenType		type;
243 	keywordId		keyword;
244 	vString *		string;
245 	vString *		scope;
246 	unsigned long 	lineNumber;
247 	MIOPos			filePosition;
248 	int 			parentKind; /* -1 if none */
249 	bool			anonymous;	/* true if token specifies
250 								 * an anonymous class */
251 } tokenInfo;
252 
253 static langType Lang_php;
254 static langType Lang_zephir;
255 
256 static bool InPhp = false; /* whether we are between <? ?> */
257 /* whether the next token may be a keyword, e.g. not after "::" or "->" */
258 static bool MayBeKeyword = true;
259 
260 /* current statement details */
261 static struct {
262 	accessType access;
263 	implType impl;
264 } CurrentStatement;
265 
266 /* Current namespace */
267 static vString *CurrentNamesapce;
268 /* Cache variable to build the tag's scope.  It has no real meaning outside
269  * of initPhpEntry()'s scope. */
270 static vString *FullScope;
271 /* The class name specified at "extends" keyword in the current class
272  * definition. Used to resolve "parent" in return type. */
273 static vString *ParentClass;
274 
275 static objPool *TokenPool = NULL;
276 
phpScopeSeparatorFor(int kind,int upperScopeKind)277 static const char *phpScopeSeparatorFor (int kind, int upperScopeKind)
278 {
279 	return scopeSeparatorFor (getInputLanguage(), kind, upperScopeKind);
280 }
281 
accessToString(const accessType access)282 static const char *accessToString (const accessType access)
283 {
284 	static const char *const names[COUNT_ACCESS] = {
285 		"undefined",
286 		"private",
287 		"protected",
288 		"public"
289 	};
290 
291 	Assert (access < COUNT_ACCESS);
292 
293 	return names[access];
294 }
295 
implToString(const implType impl)296 static const char *implToString (const implType impl)
297 {
298 	static const char *const names[COUNT_IMPL] = {
299 		"undefined",
300 		"abstract"
301 	};
302 
303 	Assert (impl < COUNT_IMPL);
304 
305 	return names[impl];
306 }
307 
initPhpEntry(tagEntryInfo * const e,const tokenInfo * const token,const phpKind kind,const accessType access)308 static void initPhpEntry (tagEntryInfo *const e, const tokenInfo *const token,
309 						  const phpKind kind, const accessType access)
310 {
311 	int parentKind = -1;
312 
313 	vStringClear (FullScope);
314 
315 	if (vStringLength (CurrentNamesapce) > 0)
316 	{
317 		parentKind = K_NAMESPACE;
318 		vStringCat (FullScope, CurrentNamesapce);
319 
320 	}
321 
322 	initTagEntry (e, vStringValue (token->string), kind);
323 
324 	e->lineNumber	= token->lineNumber;
325 	e->filePosition	= token->filePosition;
326 
327 	if (access != ACCESS_UNDEFINED)
328 		e->extensionFields.access = accessToString (access);
329 	if (vStringLength (token->scope) > 0)
330 	{
331 		parentKind = token->parentKind;
332 
333 		if (vStringLength (FullScope) > 0)
334 		{
335 			const char* sep;
336 
337 			sep = phpScopeSeparatorFor (parentKind,
338 						    K_NAMESPACE);
339 			vStringCatS (FullScope, sep);
340 		}
341 			vStringCat (FullScope, token->scope);
342 	}
343 	if (vStringLength (FullScope) > 0)
344 	{
345 		Assert (parentKind >= 0);
346 
347 		e->extensionFields.scopeKindIndex = parentKind;
348 		e->extensionFields.scopeName = vStringValue (FullScope);
349 	}
350 
351 	if (token->anonymous)
352 		markTagExtraBit (e, XTAG_ANONYMOUS);
353 }
354 
makePhpTagEntry(tagEntryInfo * const e)355 static void  makePhpTagEntry  (tagEntryInfo *const e)
356 {
357 	makeTagEntry (e);
358 	makeQualifiedTagEntry (e);
359 }
360 
fillTypeRefField(tagEntryInfo * const e,const vString * const rtype,const tokenInfo * const token)361 static void fillTypeRefField (tagEntryInfo *const e,
362 							  const vString *const rtype, const tokenInfo *const token)
363 {
364 	if ((vStringLength (rtype) == 4)
365 		&& (strcmp (vStringValue (rtype), "self") == 0)
366 		&& vStringLength (token->scope) > 0)
367 	{
368 		if (token->parentKind == -1)
369 			e->extensionFields.typeRef [0] = "unknown";
370 		else
371 			e->extensionFields.typeRef [0] = PhpKinds [token->parentKind].name;
372 		e->extensionFields.typeRef [1] = vStringValue (token->scope);
373 	}
374 	else if ((vStringLength (rtype) == 6)
375 			 && (strcmp (vStringValue (rtype), "parent") == 0)
376 			 && (ParentClass && vStringLength (ParentClass) > 0))
377 	{
378 		e->extensionFields.typeRef [0] = "class";
379 		e->extensionFields.typeRef [1] = vStringValue (ParentClass);
380 	}
381 	else
382 	{
383 		e->extensionFields.typeRef [0] = "unknown";
384 		e->extensionFields.typeRef [1] = vStringValue (rtype);
385 	}
386 }
387 
makeTypedPhpTag(const tokenInfo * const token,const phpKind kind,const accessType access,vString * typeName)388 static void makeTypedPhpTag (const tokenInfo *const token, const phpKind kind,
389 							 const accessType access, vString* typeName)
390 {
391 	if (PhpKinds[kind].enabled)
392 	{
393 		tagEntryInfo e;
394 
395 		initPhpEntry (&e, token, kind, access);
396 		if (typeName)
397 			fillTypeRefField (&e, typeName, token);
398 		makePhpTagEntry (&e);
399 	}
400 }
401 
makeSimplePhpTag(const tokenInfo * const token,const phpKind kind,const accessType access)402 static void makeSimplePhpTag (const tokenInfo *const token, const phpKind kind,
403 							  const accessType access)
404 {
405 	makeTypedPhpTag (token, kind, access, NULL);
406 }
407 
makeNamespacePhpTag(const tokenInfo * const token,const vString * const name)408 static void makeNamespacePhpTag (const tokenInfo *const token, const vString *const name)
409 {
410 	if (PhpKinds[K_NAMESPACE].enabled)
411 	{
412 		tagEntryInfo e;
413 
414 		initTagEntry (&e, vStringValue (name), K_NAMESPACE);
415 
416 		e.lineNumber	= token->lineNumber;
417 		e.filePosition	= token->filePosition;
418 
419 		makePhpTagEntry (&e);
420 	}
421 }
422 
makeClassOrIfaceTag(const phpKind kind,const tokenInfo * const token,vString * const inheritance,const implType impl)423 static void makeClassOrIfaceTag (const phpKind kind, const tokenInfo *const token,
424 								 vString *const inheritance, const implType impl)
425 {
426 	if (PhpKinds[kind].enabled)
427 	{
428 		tagEntryInfo e;
429 
430 		initPhpEntry (&e, token, kind, ACCESS_UNDEFINED);
431 
432 		if (impl != IMPL_UNDEFINED)
433 			e.extensionFields.implementation = implToString (impl);
434 		if (vStringLength (inheritance) > 0)
435 			e.extensionFields.inheritance = vStringValue (inheritance);
436 
437 		makePhpTagEntry (&e);
438 	}
439 }
440 
makeFunctionTag(const tokenInfo * const token,const vString * const arglist,const vString * const rtype,const accessType access,const implType impl)441 static void makeFunctionTag (const tokenInfo *const token,
442 							 const vString *const arglist,
443 							 const vString *const rtype,
444 							 const accessType access, const implType impl)
445 {
446 	if (PhpKinds[K_FUNCTION].enabled)
447 	{
448 		tagEntryInfo e;
449 
450 		initPhpEntry (&e, token, K_FUNCTION, access);
451 
452 		if (impl != IMPL_UNDEFINED)
453 			e.extensionFields.implementation = implToString (impl);
454 		if (arglist)
455 			e.extensionFields.signature = vStringValue (arglist);
456 		if (rtype)
457 			fillTypeRefField (&e, rtype, token);
458 
459 		makePhpTagEntry (&e);
460 	}
461 }
462 
newPoolToken(void * createArg CTAGS_ATTR_UNUSED)463 static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED)
464 {
465 	tokenInfo *token = xMalloc (1, tokenInfo);
466 
467 	token->string = vStringNew ();
468 	token->scope  = vStringNew ();
469 	return token;
470 }
471 
clearPoolToken(void * data)472 static void clearPoolToken (void *data)
473 {
474 	tokenInfo *token = data;
475 
476 	token->type			= TOKEN_UNDEFINED;
477 	token->keyword		= KEYWORD_NONE;
478 	token->lineNumber   = getInputLineNumber ();
479 	token->filePosition = getInputFilePosition ();
480 	token->parentKind	= -1;
481 	token->anonymous	= false;
482 	vStringClear (token->string);
483 	vStringClear (token->scope);
484 }
485 
deletePoolToken(void * data)486 static void deletePoolToken (void *data)
487 {
488 	tokenInfo *token = data;
489 	vStringDelete (token->string);
490 	vStringDelete (token->scope);
491 	eFree (token);
492 }
493 
copyToken(tokenInfo * const dest,const tokenInfo * const src,bool scope)494 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
495 					   bool scope)
496 {
497 	dest->lineNumber = src->lineNumber;
498 	dest->filePosition = src->filePosition;
499 	dest->type = src->type;
500 	dest->keyword = src->keyword;
501 	vStringCopy(dest->string, src->string);
502 	dest->parentKind = src->parentKind;
503 	if (scope)
504 		vStringCopy(dest->scope, src->scope);
505 	dest->anonymous = src->anonymous;
506 }
507 
508 #if 0
509 #include <stdio.h>
510 
511 static const char *tokenTypeName (const tokenType type)
512 {
513 	switch (type)
514 	{
515 		case TOKEN_UNDEFINED:		return "undefined";
516 		case TOKEN_EOF:				return "EOF";
517 		case TOKEN_CHARACTER:		return "character";
518 		case TOKEN_CLOSE_PAREN:		return "')'";
519 		case TOKEN_SEMICOLON:		return "';'";
520 		case TOKEN_COLON:			return "':'";
521 		case TOKEN_COMMA:			return "','";
522 		case TOKEN_OPEN_PAREN:		return "'('";
523 		case TOKEN_OPERATOR:		return "operator";
524 		case TOKEN_IDENTIFIER:		return "identifier";
525 		case TOKEN_KEYWORD:			return "keyword";
526 		case TOKEN_STRING:			return "string";
527 		case TOKEN_PERIOD:			return "'.'";
528 		case TOKEN_OPEN_CURLY:		return "'{'";
529 		case TOKEN_CLOSE_CURLY:		return "'}'";
530 		case TOKEN_EQUAL_SIGN:		return "'='";
531 		case TOKEN_OPEN_SQUARE:		return "'['";
532 		case TOKEN_CLOSE_SQUARE:	return "']'";
533 		case TOKEN_VARIABLE:		return "variable";
534 	}
535 	return NULL;
536 }
537 
538 static void printToken (const tokenInfo *const token)
539 {
540 	fprintf (stderr, "%p:\n\ttype:\t%s\n\tline:\t%lu\n\tscope:\t%s\n", (void *) token,
541 			 tokenTypeName (token->type),
542 			 token->lineNumber,
543 			 vStringValue (token->scope));
544 	switch (token->type)
545 	{
546 		case TOKEN_IDENTIFIER:
547 		case TOKEN_STRING:
548 		case TOKEN_VARIABLE:
549 			fprintf (stderr, "\tcontent:\t%s\n", vStringValue (token->string));
550 			break;
551 
552 		case TOKEN_KEYWORD:
553 		{
554 			size_t n = ARRAY_SIZE (PhpKeywordTable);
555 			size_t i;
556 
557 			fprintf (stderr, "\tkeyword:\t");
558 			for (i = 0; i < n; i++)
559 			{
560 				if (PhpKeywordTable[i].id == token->keyword)
561 				{
562 					fprintf (stderr, "%s\n", PhpKeywordTable[i].name);
563 					break;
564 				}
565 			}
566 			if (i >= n)
567 				fprintf (stderr, "(unknown)\n");
568 		}
569 
570 		default: break;
571 	}
572 }
573 #endif
574 
addToScope(tokenInfo * const token,const vString * const extra,int kindOfUpperScope)575 static void addToScope (tokenInfo *const token, const vString *const extra,
576 			int kindOfUpperScope)
577 {
578 	if (vStringLength (token->scope) > 0)
579 	{
580 		const char* sep;
581 
582 		sep = phpScopeSeparatorFor(token->parentKind,
583 					   kindOfUpperScope);
584 		vStringCatS (token->scope, sep);
585 	}
586 	vStringCat (token->scope, extra);
587 }
588 
skipToCharacter(const int c)589 static int skipToCharacter (const int c)
590 {
591 	int d;
592 	do
593 	{
594 		d = getcFromInputFile ();
595 	} while (d != EOF  &&  d != c);
596 	return d;
597 }
598 
parseString(vString * const string,const int delimiter)599 static void parseString (vString *const string, const int delimiter)
600 {
601 	while (true)
602 	{
603 		int c = getcFromInputFile ();
604 
605 		if (c == '\\' && (c = getcFromInputFile ()) != EOF)
606 			vStringPut (string, (char) c);
607 		else if (c == EOF || c == delimiter)
608 			break;
609 		else
610 			vStringPut (string, (char) c);
611 	}
612 }
613 
614 /* Strips @indent_len characters from lines in @string to get the correct
615  * string value for an indented heredoc (PHP 7.3+).
616  * This doesn't handle invalid values specially and might yield surprising
617  * results with them, but it doesn't really matter as it's invalid anyway. */
stripHeredocIndent(vString * const string,size_t indent_len)618 static void stripHeredocIndent (vString *const string, size_t indent_len)
619 {
620 	char *str = vStringValue (string);
621 	size_t str_len = vStringLength (string);
622 	char *p = str;
623 	size_t new_len = str_len;
624 	bool at_line_start = true;
625 
626 	while (*p)
627 	{
628 		if (at_line_start)
629 		{
630 			size_t p_len;
631 			size_t strip_len;
632 
633 			p_len = str_len - (p - str);
634 			strip_len = p_len < indent_len ? p_len : indent_len;
635 			memmove (p, p + strip_len, p_len - strip_len);
636 			p += strip_len;
637 			new_len -= strip_len;
638 		}
639 		/* CRLF is already normalized as LF */
640 		at_line_start = (*p == '\r' || *p == '\n');
641 		p++;
642 	}
643 	vStringTruncate (string, new_len);
644 }
645 
646 /* reads a PHP >= 7.3 HereDoc or a NowDoc (the part after the <<<).
647  * 	<<<[ \t]*(ID|'ID'|"ID")
648  * 	...
649  * 	[ \t]*ID[^:indent-char:];?
650  *
651  * note that:
652  *  1) starting ID must be immediately followed by a newline;
653  *  2) closing ID is the same as opening one;
654  *  3) closing ID must not be immediately followed by an identifier character;
655  *  4) optional indentation of the closing ID is stripped from body lines,
656  *     which lines must have the exact same prefix indentation.
657  *
658  * This is slightly relaxed from PHP < 7.3, where the closing ID had to be the
659  * only thing on its line, with the only exception of a semicolon right after
660  * the ID.
661  *
662  * Example of a single valid heredoc:
663  * 	<<< FOO
664  * 	something
665  * 	something else
666  * 	FOO_this is not an end
667  * 	FOO;
668  * 	# previous line was the end, but the semicolon wasn't required
669  *
670  * Another example using indentation and more code after the heredoc:
671  * 	<<<FOO
672  * 		something
673  * 		something else
674  * 		FOO . 'hello';
675  * 	# the heredoc ends at FOO, and leading tabs are stripped from the body.
676  * 	# ". 'hello'" is a normal concatenation operator and the string "hello".
677  */
parseHeredoc(vString * const string)678 static void parseHeredoc (vString *const string)
679 {
680 	int c;
681 	unsigned int len;
682 	char delimiter[64]; /* arbitrary limit, but more is crazy anyway */
683 	int quote = 0;
684 
685 	do
686 	{
687 		c = getcFromInputFile ();
688 	}
689 	while (c == ' ' || c == '\t');
690 
691 	if (c == '\'' || c == '"')
692 	{
693 		quote = c;
694 		c = getcFromInputFile ();
695 	}
696 	for (len = 0; len < ARRAY_SIZE (delimiter) - 1; len++)
697 	{
698 		if (! isIdentChar (c))
699 			break;
700 		delimiter[len] = (char) c;
701 		c = getcFromInputFile ();
702 	}
703 	delimiter[len] = 0;
704 
705 	if (len == 0) /* no delimiter, give up */
706 		goto error;
707 	if (quote)
708 	{
709 		if (c != quote) /* no closing quote for quoted identifier, give up */
710 			goto error;
711 		c = getcFromInputFile ();
712 	}
713 	if (c != '\r' && c != '\n') /* missing newline, give up */
714 		goto error;
715 
716 	do
717 	{
718 		c = getcFromInputFile ();
719 
720 		vStringPut (string, (char) c);
721 		if (c == '\r' || c == '\n')
722 		{
723 			/* new line, check for a delimiter right after.  No need to handle
724 			 * CRLF, getcFromInputFile() normalizes it to LF already. */
725 			const size_t prev_string_len = vStringLength (string) - 1;
726 			size_t indent_len = 0;
727 
728 			c = getcFromInputFile ();
729 			while (c == ' ' || c == '\t')
730 			{
731 				vStringPut (string, (char) c);
732 				c = getcFromInputFile ();
733 				indent_len++;
734 			}
735 
736 			for (len = 0; c != 0 && (c - delimiter[len]) == 0; len++)
737 				c = getcFromInputFile ();
738 
739 			if (delimiter[len] != 0)
740 				ungetcToInputFile (c);
741 			else if (! isIdentChar (c))
742 			{
743 				/* line start matched the delimiter and has a separator, we're done */
744 				ungetcToInputFile (c);
745 
746 				/* strip trailing newline and indent of the end delimiter */
747 				vStringTruncate (string, prev_string_len);
748 
749 				/* strip indent from the value if needed */
750 				if (indent_len > 0)
751 					stripHeredocIndent (string, indent_len);
752 				break;
753 			}
754 			/* if we are here it wasn't a delimiter, so put everything in the
755 			 * string */
756 			vStringNCatS (string, delimiter, len);
757 		}
758 	}
759 	while (c != EOF);
760 
761 	return;
762 
763 error:
764 	ungetcToInputFile (c);
765 }
766 
parseIdentifier(vString * const string,const int firstChar)767 static void parseIdentifier (vString *const string, const int firstChar)
768 {
769 	int c = firstChar;
770 	do
771 	{
772 		vStringPut (string, (char) c);
773 		c = getcFromInputFile ();
774 	} while (isIdentChar (c));
775 	ungetcToInputFile (c);
776 }
777 
isSpace(int c)778 static bool isSpace (int c)
779 {
780 	return (c == '\t' || c == ' ' || c == '\v' ||
781 			c == '\n' || c == '\r' || c == '\f');
782 }
783 
skipWhitespaces(int c)784 static int skipWhitespaces (int c)
785 {
786 	while (isSpace (c))
787 		c = getcFromInputFile ();
788 	return c;
789 }
790 
791 /* <script[:white:]+language[:white:]*=[:white:]*(php|'php'|"php")[:white:]*>
792  *
793  * This is ugly, but the whole "<script language=php>" tag is and we can't
794  * really do better without adding a lot of code only for this */
isOpenScriptLanguagePhp(int c)795 static bool isOpenScriptLanguagePhp (int c)
796 {
797 	int quote = 0;
798 
799 	/* <script[:white:]+language[:white:]*= */
800 	if (c                                   != '<' ||
801 		tolower ((c = getcFromInputFile ()))         != 's' ||
802 		tolower ((c = getcFromInputFile ()))         != 'c' ||
803 		tolower ((c = getcFromInputFile ()))         != 'r' ||
804 		tolower ((c = getcFromInputFile ()))         != 'i' ||
805 		tolower ((c = getcFromInputFile ()))         != 'p' ||
806 		tolower ((c = getcFromInputFile ()))         != 't' ||
807 		! isSpace ((c = getcFromInputFile ()))              ||
808 		tolower ((c = skipWhitespaces (c))) != 'l' ||
809 		tolower ((c = getcFromInputFile ()))         != 'a' ||
810 		tolower ((c = getcFromInputFile ()))         != 'n' ||
811 		tolower ((c = getcFromInputFile ()))         != 'g' ||
812 		tolower ((c = getcFromInputFile ()))         != 'u' ||
813 		tolower ((c = getcFromInputFile ()))         != 'a' ||
814 		tolower ((c = getcFromInputFile ()))         != 'g' ||
815 		tolower ((c = getcFromInputFile ()))         != 'e' ||
816 		(c = skipWhitespaces (getcFromInputFile ())) != '=')
817 		return false;
818 
819 	/* (php|'php'|"php")> */
820 	c = skipWhitespaces (getcFromInputFile ());
821 	if (c == '"' || c == '\'')
822 	{
823 		quote = c;
824 		c = getcFromInputFile ();
825 	}
826 	if (tolower (c)                         != 'p' ||
827 		tolower ((c = getcFromInputFile ()))         != 'h' ||
828 		tolower ((c = getcFromInputFile ()))         != 'p' ||
829 		(quote != 0 && (c = getcFromInputFile ()) != quote) ||
830 		(c = skipWhitespaces (getcFromInputFile ())) != '>')
831 		return false;
832 
833 	return true;
834 }
835 
findPhpStart(void)836 static int findPhpStart (void)
837 {
838 	int c;
839 	do
840 	{
841 		if ((c = getcFromInputFile ()) == '<')
842 		{
843 			c = getcFromInputFile ();
844 			/* <?, <?= and <?php, but not <?xml */
845 			if (c == '?')
846 			{
847 				c = getcFromInputFile ();
848 				/* echo tag */
849 				if (c == '=')
850 					c = getcFromInputFile ();
851 				/* don't enter PHP mode on "<?xml", yet still support short open tags (<?) */
852 				else if (tolower (c)                          != 'x' ||
853 				         tolower ((c = getcFromInputFile ())) != 'm' ||
854 				         tolower ((c = getcFromInputFile ())) != 'l')
855 				{
856 					break;
857 				}
858 			}
859 			/* <script language="php"> */
860 			else
861 			{
862 				ungetcToInputFile (c);
863 				if (isOpenScriptLanguagePhp ('<'))
864 					break;
865 			}
866 		}
867 	}
868 	while (c != EOF);
869 
870 	return c;
871 }
872 
skipSingleComment(void)873 static int skipSingleComment (void)
874 {
875 	int c;
876 	do
877 	{
878 		c = getcFromInputFile ();
879 		/* ?> in single-line comments leaves PHP mode */
880 		if (c == '?')
881 		{
882 			int next = getcFromInputFile ();
883 			if (next == '>')
884 				InPhp = false;
885 			else
886 				ungetcToInputFile (next);
887 		}
888 	} while (InPhp && c != EOF && c != '\n' && c != '\r');
889 	return c;
890 }
891 
readToken(tokenInfo * const token)892 static void readToken (tokenInfo *const token)
893 {
894 	int c;
895 	bool nextMayBeKeyword = true;
896 
897 	token->type		= TOKEN_UNDEFINED;
898 	token->keyword	= KEYWORD_NONE;
899 	vStringClear (token->string);
900 
901 getNextChar:
902 
903 	if (! InPhp)
904 	{
905 		unsigned long startSourceLineNumber = getSourceLineNumber ();
906 		unsigned long startLineNumber = startSourceLineNumber;
907 		int startLineOffset = getInputLineOffset ();
908 
909 		c = findPhpStart ();
910 		if (c != EOF)
911 			InPhp = true;
912 
913 		unsigned long endLineNumber = getInputLineNumber ();
914 		int endLineOffset = getInputLineOffset ();
915 
916 		if ((startLineNumber != endLineNumber)
917 			|| (startLineOffset != endLineOffset))
918 			makePromise ("HTML", startLineNumber, startLineOffset,
919 						 endLineNumber, endLineOffset, startSourceLineNumber);
920 	}
921 	else
922 		c = getcFromInputFile ();
923 
924 	c = skipWhitespaces (c);
925 
926 	token->lineNumber   = getInputLineNumber ();
927 	token->filePosition = getInputFilePosition ();
928 
929 	switch (c)
930 	{
931 		case EOF: token->type = TOKEN_EOF;					break;
932 		case '(': token->type = TOKEN_OPEN_PAREN;			break;
933 		case ')': token->type = TOKEN_CLOSE_PAREN;			break;
934 		case ';': token->type = TOKEN_SEMICOLON;			break;
935 		case ',': token->type = TOKEN_COMMA;				break;
936 		case '.': token->type = TOKEN_PERIOD;				break;
937 		case '{': token->type = TOKEN_OPEN_CURLY;			break;
938 		case '}': token->type = TOKEN_CLOSE_CURLY;			break;
939 		case '[': token->type = TOKEN_OPEN_SQUARE;			break;
940 		case ']': token->type = TOKEN_CLOSE_SQUARE;			break;
941 		case '&': token->type = TOKEN_AMPERSAND;			break;
942 		case '\\': token->type = TOKEN_BACKSLASH;			break;
943 
944 		case ':':
945 		{
946 			int d = getcFromInputFile ();
947 			if (d == c) /* :: */
948 			{
949 				nextMayBeKeyword = false;
950 				token->type = TOKEN_OPERATOR;
951 			}
952 			else
953 			{
954 				ungetcToInputFile (d);
955 				token->type = TOKEN_COLON;
956 			}
957 			break;
958 		}
959 
960 		case '=':
961 		{
962 			int d = getcFromInputFile ();
963 			if (d == '=' || d == '>')
964 				token->type = TOKEN_OPERATOR;
965 			else
966 			{
967 				ungetcToInputFile (d);
968 				token->type = TOKEN_EQUAL_SIGN;
969 			}
970 			break;
971 		}
972 
973 		case '\'':
974 		case '"':
975 			token->type = TOKEN_STRING;
976 			parseString (token->string, c);
977 			token->lineNumber = getInputLineNumber ();
978 			token->filePosition = getInputFilePosition ();
979 			break;
980 
981 		case '<':
982 		{
983 			int d = getcFromInputFile ();
984 			if (d == '/')
985 			{
986 				/* </script[:white:]*> */
987 				if (tolower ((d = getcFromInputFile ())) == 's' &&
988 					tolower ((d = getcFromInputFile ())) == 'c' &&
989 					tolower ((d = getcFromInputFile ())) == 'r' &&
990 					tolower ((d = getcFromInputFile ())) == 'i' &&
991 					tolower ((d = getcFromInputFile ())) == 'p' &&
992 					tolower ((d = getcFromInputFile ())) == 't' &&
993 					(d = skipWhitespaces (getcFromInputFile ())) == '>')
994 				{
995 					InPhp = false;
996 					goto getNextChar;
997 				}
998 				else
999 				{
1000 					ungetcToInputFile (d);
1001 					token->type = TOKEN_UNDEFINED;
1002 				}
1003 			}
1004 			else if (d == '<' && (d = getcFromInputFile ()) == '<')
1005 			{
1006 				token->type = TOKEN_STRING;
1007 				parseHeredoc (token->string);
1008 			}
1009 			else
1010 			{
1011 				ungetcToInputFile (d);
1012 				token->type = TOKEN_UNDEFINED;
1013 			}
1014 			break;
1015 		}
1016 
1017 		case '#': /* comment */
1018 			skipSingleComment ();
1019 			goto getNextChar;
1020 			break;
1021 
1022 		case '+':
1023 		case '-':
1024 		case '*':
1025 		case '%':
1026 		{
1027 			int d = getcFromInputFile ();
1028 			if (c == '-' && d == '>')
1029 				nextMayBeKeyword = false;
1030 			else if (d != '=')
1031 				ungetcToInputFile (d);
1032 			token->type = TOKEN_OPERATOR;
1033 			break;
1034 		}
1035 
1036 		case '/': /* division or comment start */
1037 		{
1038 			int d = getcFromInputFile ();
1039 			if (d == '/') /* single-line comment */
1040 			{
1041 				skipSingleComment ();
1042 				goto getNextChar;
1043 			}
1044 			else if (d == '*')
1045 			{
1046 				do
1047 				{
1048 					c = skipToCharacter ('*');
1049 					if (c != EOF)
1050 					{
1051 						c = getcFromInputFile ();
1052 						if (c == '/')
1053 							break;
1054 						else
1055 							ungetcToInputFile (c);
1056 					}
1057 				} while (c != EOF && c != '\0');
1058 				goto getNextChar;
1059 			}
1060 			else
1061 			{
1062 				if (d != '=')
1063 					ungetcToInputFile (d);
1064 				token->type = TOKEN_OPERATOR;
1065 			}
1066 			break;
1067 		}
1068 
1069 		case '$': /* variable start */
1070 		{
1071 			int d = getcFromInputFile ();
1072 			if (! isIdentChar (d))
1073 			{
1074 				ungetcToInputFile (d);
1075 				token->type = TOKEN_UNDEFINED;
1076 			}
1077 			else
1078 			{
1079 				parseIdentifier (token->string, d);
1080 				token->type = TOKEN_VARIABLE;
1081 			}
1082 			break;
1083 		}
1084 
1085 		case '?': /* maybe the end of the PHP chunk */
1086 		{
1087 			int d = getcFromInputFile ();
1088 			if (d == '>')
1089 			{
1090 				InPhp = false;
1091 				goto getNextChar;
1092 			}
1093 			else
1094 			{
1095 				ungetcToInputFile (d);
1096 				token->type = TOKEN_QMARK;
1097 			}
1098 			break;
1099 		}
1100 
1101 		default:
1102 			if (! isIdentChar (c))
1103 				token->type = TOKEN_UNDEFINED;
1104 			else
1105 			{
1106 				parseIdentifier (token->string, c);
1107 				if (MayBeKeyword)
1108 					token->keyword = lookupCaseKeyword (vStringValue (token->string), getInputLanguage ());
1109 				else
1110 					token->keyword = KEYWORD_NONE;
1111 
1112 				if (token->keyword == KEYWORD_NONE)
1113 					token->type = TOKEN_IDENTIFIER;
1114 				else
1115 					token->type = TOKEN_KEYWORD;
1116 			}
1117 			break;
1118 	}
1119 
1120 	if (token->type == TOKEN_SEMICOLON ||
1121 		token->type == TOKEN_OPEN_CURLY ||
1122 		token->type == TOKEN_CLOSE_CURLY)
1123 	{
1124 		/* reset current statement details on statement end, and when entering
1125 		 * a deeper scope.
1126 		 * it is a bit ugly to do this in readToken(), but it makes everything
1127 		 * a lot simpler. */
1128 		CurrentStatement.access = ACCESS_UNDEFINED;
1129 		CurrentStatement.impl = IMPL_UNDEFINED;
1130 	}
1131 
1132 	MayBeKeyword = nextMayBeKeyword;
1133 }
1134 
readQualifiedName(tokenInfo * const token,vString * name,tokenInfo * const lastToken)1135 static void readQualifiedName (tokenInfo *const token, vString *name,
1136                                tokenInfo *const lastToken)
1137 {
1138 	while (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_BACKSLASH)
1139 	{
1140 		if (name)
1141 		{
1142 			if (token->type == TOKEN_BACKSLASH)
1143 				vStringPut (name, '\\');
1144 			else
1145 				vStringCat (name, token->string);
1146 		}
1147 		if (lastToken)
1148 			copyToken (lastToken, token, true);
1149 		readToken (token);
1150 	}
1151 }
1152 
1153 static void enterScope (tokenInfo *const parentToken,
1154 						const vString *const extraScope,
1155 						const int parentKind);
1156 
skipOverParens(tokenInfo * token)1157 static void skipOverParens (tokenInfo *token)
1158 {
1159 	if (token->type == TOKEN_OPEN_PAREN)
1160 	{
1161 		int depth = 1;
1162 
1163 		do
1164 		{
1165 			readToken (token);
1166 			switch (token->type)
1167 			{
1168 				case TOKEN_OPEN_PAREN:  depth++; break;
1169 				case TOKEN_CLOSE_PAREN: depth--; break;
1170 				default: break;
1171 			}
1172 		}
1173 		while (token->type != TOKEN_EOF && depth > 0);
1174 
1175 		readToken (token);
1176 	}
1177 }
1178 
1179 /* parses a class or an interface:
1180  * 	class Foo {}
1181  * 	class Foo extends Bar {}
1182  * 	class Foo extends Bar implements iFoo, iBar {}
1183  * 	interface iFoo {}
1184  * 	interface iBar extends iFoo {}
1185  *
1186  * if @name is not NULL, parses an anonymous class with name @name
1187  * 	new class {}
1188  * 	new class(1, 2) {}
1189  * 	new class(1, 2) extends Foo implements iFoo, iBar {} */
parseClassOrIface(tokenInfo * const token,const phpKind kind,const tokenInfo * name)1190 static bool parseClassOrIface (tokenInfo *const token, const phpKind kind,
1191                                   const tokenInfo *name)
1192 {
1193 	bool readNext = true;
1194 	implType impl = CurrentStatement.impl;
1195 	tokenInfo *nameFree = NULL;
1196 	vString *inheritance = NULL;
1197 	vString *parent = NULL;
1198 
1199 	readToken (token);
1200 	if (name) /* anonymous class */
1201 	{
1202 		/* skip possible construction arguments */
1203 		skipOverParens (token);
1204 	}
1205 	else /* normal, named class */
1206 	{
1207 		if (token->type != TOKEN_IDENTIFIER)
1208 			return false;
1209 
1210 		name = nameFree = newToken ();
1211 		copyToken (nameFree, token, true);
1212 
1213 		readToken (token);
1214 	}
1215 
1216 	inheritance = vStringNew ();
1217 	/* read every identifiers, keywords and commas, and assume each
1218 	 *  identifier (not keyword) is an inheritance
1219 	 * (like in "class Foo extends Bar implements iA, iB") */
1220 	enum { inheritance_initial,
1221 		   inheritance_extends,
1222 		   inheritance_implements
1223 	} istat = inheritance_initial;
1224 	while (token->type == TOKEN_IDENTIFIER ||
1225 	       token->type == TOKEN_BACKSLASH ||
1226 	       token->type == TOKEN_KEYWORD ||
1227 	       token->type == TOKEN_COMMA)
1228 	{
1229 		if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_BACKSLASH)
1230 		{
1231 			vString *qualifiedName = vStringNew ();
1232 
1233 			readQualifiedName (token, qualifiedName, NULL);
1234 			if (vStringLength (inheritance) > 0)
1235 				vStringPut (inheritance, ',');
1236 			vStringCat (inheritance, qualifiedName);
1237 			if (istat == inheritance_extends && !parent)
1238 				parent = qualifiedName;
1239 			else
1240 				vStringDelete (qualifiedName);
1241 		}
1242 		else
1243 		{
1244 			if (token->type == TOKEN_KEYWORD)
1245 			{
1246 				if (token->keyword == KEYWORD_extends)
1247 					istat = inheritance_extends;
1248 				else if (token->keyword == KEYWORD_implements)
1249 					istat = inheritance_implements;
1250 			}
1251 			readToken (token);
1252 		}
1253 	}
1254 
1255 	makeClassOrIfaceTag (kind, name, inheritance, impl);
1256 
1257 	if (token->type == TOKEN_OPEN_CURLY)
1258 	{
1259 		vString *backup = ParentClass;
1260 		ParentClass = parent;
1261 		enterScope (token, name->string, kind);
1262 		ParentClass = backup;
1263 	}
1264 	else
1265 		readNext = false;
1266 
1267 	if (nameFree)
1268 		deleteToken (nameFree);
1269 	vStringDelete (parent);
1270 	vStringDelete (inheritance);
1271 
1272 	return readNext;
1273 }
1274 
1275 /* parses a trait:
1276  * 	trait Foo {} */
parseTrait(tokenInfo * const token)1277 static bool parseTrait (tokenInfo *const token)
1278 {
1279 	bool readNext = true;
1280 	tokenInfo *name;
1281 
1282 	readToken (token);
1283 	if (token->type != TOKEN_IDENTIFIER)
1284 		return false;
1285 
1286 	name = newToken ();
1287 	copyToken (name, token, true);
1288 
1289 	makeSimplePhpTag (name, K_TRAIT, ACCESS_UNDEFINED);
1290 
1291 	readToken (token);
1292 	if (token->type == TOKEN_OPEN_CURLY)
1293 		enterScope (token, name->string, K_TRAIT);
1294 	else
1295 		readNext = false;
1296 
1297 	deleteToken (name);
1298 
1299 	return readNext;
1300 }
1301 
1302 /* parse a function
1303  *
1304  * if @name is NULL, parses a normal function
1305  * 	function myfunc($foo, $bar) {}
1306  * 	function &myfunc($foo, $bar) {}
1307  * 	function myfunc($foo, $bar) : type {}
1308  * 	function myfunc($foo, $bar) : ?type {}
1309  *
1310  * if @name is not NULL, parses an anonymous function with name @name
1311  * 	$foo = function($foo, $bar) {}
1312  * 	$foo = function&($foo, $bar) {}
1313  * 	$foo = function($foo, $bar) use ($x, &$y) {}
1314  * 	$foo = function($foo, $bar) use ($x, &$y) : type {}
1315  * 	$foo = function($foo, $bar) use ($x, &$y) : ?type {} */
parseFunction(tokenInfo * const token,const tokenInfo * name)1316 static bool parseFunction (tokenInfo *const token, const tokenInfo *name)
1317 {
1318 	bool readNext = true;
1319 	accessType access = CurrentStatement.access;
1320 	implType impl = CurrentStatement.impl;
1321 	tokenInfo *nameFree = NULL;
1322 	vString *rtype = NULL;
1323 	vString *arglist = NULL;
1324 
1325 	readToken (token);
1326 	/* skip a possible leading ampersand (return by reference) */
1327 	if (token->type == TOKEN_AMPERSAND)
1328 		readToken (token);
1329 
1330 	if (! name)
1331 	{
1332 		if (token->type != TOKEN_IDENTIFIER && token->type != TOKEN_KEYWORD)
1333 			return false;
1334 
1335 		name = nameFree = newToken ();
1336 		copyToken (nameFree, token, true);
1337 		readToken (token);
1338 	}
1339 
1340 	if (token->type == TOKEN_OPEN_PAREN)
1341 	{
1342 		int depth = 1;
1343 
1344 		arglist = vStringNew ();
1345 		vStringPut (arglist, '(');
1346 		do
1347 		{
1348 			readToken (token);
1349 
1350 			switch (token->type)
1351 			{
1352 				case TOKEN_OPEN_PAREN:  depth++; break;
1353 				case TOKEN_CLOSE_PAREN: depth--; break;
1354 				default: break;
1355 			}
1356 			/* display part */
1357 			switch (token->type)
1358 			{
1359 				case TOKEN_AMPERSAND:		vStringPut (arglist, '&');		break;
1360 				case TOKEN_CLOSE_CURLY:		vStringPut (arglist, '}');		break;
1361 				case TOKEN_CLOSE_PAREN:		vStringPut (arglist, ')');		break;
1362 				case TOKEN_CLOSE_SQUARE:	vStringPut (arglist, ']');		break;
1363 				case TOKEN_COLON:			vStringPut (arglist, ':');		break;
1364 				case TOKEN_COMMA:			vStringCatS (arglist, ", ");	break;
1365 				case TOKEN_EQUAL_SIGN:		vStringCatS (arglist, " = ");	break;
1366 				case TOKEN_OPEN_CURLY:		vStringPut (arglist, '{');		break;
1367 				case TOKEN_OPEN_PAREN:		vStringPut (arglist, '(');		break;
1368 				case TOKEN_OPEN_SQUARE:		vStringPut (arglist, '[');		break;
1369 				case TOKEN_PERIOD:			vStringPut (arglist, '.');		break;
1370 				case TOKEN_SEMICOLON:		vStringPut (arglist, ';');		break;
1371 				case TOKEN_BACKSLASH:		vStringPut (arglist, '\\');		break;
1372 				case TOKEN_STRING:
1373 				{
1374 					vStringPut (arglist, '\'');
1375 					vStringCat  (arglist, token->string);
1376 					vStringPut (arglist, '\'');
1377 					break;
1378 				}
1379 
1380 				case TOKEN_IDENTIFIER:
1381 				case TOKEN_KEYWORD:
1382 				case TOKEN_VARIABLE:
1383 				{
1384 					switch (vStringLast (arglist))
1385 					{
1386 						case 0:
1387 						case ' ':
1388 						case '{':
1389 						case '(':
1390 						case '[':
1391 						case '.':
1392 						case '\\':
1393 							/* no need for a space between those and the identifier */
1394 							break;
1395 
1396 						default:
1397 							vStringPut (arglist, ' ');
1398 							break;
1399 					}
1400 					if (token->type == TOKEN_VARIABLE)
1401 						vStringPut (arglist, '$');
1402 					vStringCat (arglist, token->string);
1403 					break;
1404 				}
1405 
1406 				default: break;
1407 			}
1408 		}
1409 		while (token->type != TOKEN_EOF && depth > 0);
1410 
1411 		readToken (token); /* normally it's an open brace or "use" keyword */
1412 	}
1413 
1414 	/* skip use(...) */
1415 	if (token->type == TOKEN_KEYWORD && token->keyword == KEYWORD_use)
1416 	{
1417 		readToken (token);
1418 		skipOverParens (token);
1419 	}
1420 
1421 	/* PHP7 return type declaration or if parsing Zephir, gather function return
1422 	 * type hint to fill typeRef. */
1423 	if ((getInputLanguage () == Lang_php && token->type == TOKEN_COLON) ||
1424 	    (getInputLanguage () == Lang_zephir && token->type == TOKEN_OPERATOR))
1425 	{
1426 		if (arglist)
1427 			rtype = vStringNew ();
1428 
1429 		readToken (token);
1430 		if (token->type == TOKEN_QMARK)
1431 		{
1432 			if (rtype)
1433 				vStringPut (rtype, '?');
1434 			readToken (token);
1435 		}
1436 		readQualifiedName (token, rtype, NULL);
1437 
1438 		if (rtype && vStringIsEmpty (rtype))
1439 		{
1440 			vStringDelete (rtype);
1441 			rtype = NULL;
1442 		}
1443 	}
1444 
1445 	if (arglist)
1446 		makeFunctionTag (name, arglist, rtype, access, impl);
1447 
1448 	if (token->type == TOKEN_OPEN_CURLY)
1449 		enterScope (token, name->string, K_FUNCTION);
1450 	else
1451 		readNext = false;
1452 
1453 	vStringDelete (rtype);
1454 	vStringDelete (arglist);
1455 	if (nameFree)
1456 		deleteToken (nameFree);
1457 
1458 	return readNext;
1459 }
1460 
1461 /* parses declarations of the form
1462  * 	const NAME = VALUE */
parseConstant(tokenInfo * const token)1463 static bool parseConstant (tokenInfo *const token)
1464 {
1465 	tokenInfo *name;
1466 
1467 	readToken (token); /* skip const keyword */
1468 	if (token->type != TOKEN_IDENTIFIER && token->type != TOKEN_KEYWORD)
1469 		return false;
1470 
1471 	name = newToken ();
1472 	copyToken (name, token, true);
1473 
1474 	readToken (token);
1475 	if (token->type == TOKEN_EQUAL_SIGN)
1476 		makeSimplePhpTag (name, K_DEFINE, ACCESS_UNDEFINED);
1477 
1478 	deleteToken (name);
1479 
1480 	return token->type == TOKEN_EQUAL_SIGN;
1481 }
1482 
1483 /* parses declarations of the form
1484  * 	define('NAME', 'VALUE')
1485  * 	define(NAME, 'VALUE) */
parseDefine(tokenInfo * const token)1486 static bool parseDefine (tokenInfo *const token)
1487 {
1488 	int depth = 1;
1489 
1490 	readToken (token); /* skip "define" identifier */
1491 	if (token->type != TOKEN_OPEN_PAREN)
1492 		return false;
1493 
1494 	readToken (token);
1495 	if (token->type == TOKEN_STRING ||
1496 		token->type == TOKEN_IDENTIFIER)
1497 	{
1498 		makeSimplePhpTag (token, K_DEFINE, ACCESS_UNDEFINED);
1499 		readToken (token);
1500 	}
1501 
1502 	/* skip until the close parenthesis.
1503 	 * no need to handle nested blocks since they would be invalid
1504 	 * in this context anyway (the VALUE may only be a scalar, like
1505 	 * 	42
1506 	 * 	(42)
1507 	 * and alike) */
1508 	while (token->type != TOKEN_EOF && depth > 0)
1509 	{
1510 		switch (token->type)
1511 		{
1512 			case TOKEN_OPEN_PAREN:	depth++; break;
1513 			case TOKEN_CLOSE_PAREN:	depth--; break;
1514 			default: break;
1515 		}
1516 		readToken (token);
1517 	}
1518 
1519 	return false;
1520 }
1521 
1522 /* parses declarations of the form
1523  * 	use Foo
1524  * 	use Foo\Bar\Class
1525  * 	use Foo\Bar\Class as FooBarClass
1526  * 	use function Foo\Bar\func
1527  * 	use function Foo\Bar\func as foobarfunc
1528  * 	use const Foo\Bar\CONST
1529  * 	use const Foo\Bar\CONST as FOOBARCONST
1530  * 	use Foo, Bar
1531  * 	use Foo, Bar as Baz
1532  * 	use Foo as Test, Bar as Baz
1533  * 	use Foo\{Bar, Baz as Child, Nested\Other, Even\More as Something} */
parseUse(tokenInfo * const token)1534 static bool parseUse (tokenInfo *const token)
1535 {
1536 	bool readNext = false;
1537 	/* we can't know the use type, because class, interface and namespaces
1538 	 * aliases are the same, and the only difference is the referenced name's
1539 	 * type */
1540 	const char *refType = "unknown";
1541 	vString *refName = vStringNew ();
1542 	tokenInfo *nameToken = newToken ();
1543 	bool grouped = false;
1544 
1545 	readToken (token); /* skip use keyword itself */
1546 	if (token->type == TOKEN_KEYWORD && (token->keyword == KEYWORD_function ||
1547 	                                     token->keyword == KEYWORD_const))
1548 	{
1549 		switch (token->keyword)
1550 		{
1551 			case KEYWORD_function:	refType = PhpKinds[K_FUNCTION].name;	break;
1552 			case KEYWORD_const:		refType = PhpKinds[K_DEFINE].name;		break;
1553 			default: break; /* silence compilers */
1554 		}
1555 		readNext = true;
1556 	}
1557 
1558 	if (readNext)
1559 		readToken (token);
1560 
1561 	readQualifiedName (token, refName, nameToken);
1562 	grouped = readNext = (token->type == TOKEN_OPEN_CURLY);
1563 
1564 	do
1565 	{
1566 		size_t refNamePrefixLength = grouped ? vStringLength (refName) : 0;
1567 
1568 		/* if it's either not the first name in a comma-separated list, or we
1569 		 * are in a grouped alias and need to read the leaf name */
1570 		if (readNext)
1571 		{
1572 			readToken (token);
1573 			/* in case of a trailing comma (or an empty group) */
1574 			if (token->type == TOKEN_CLOSE_CURLY)
1575 				break;
1576 			readQualifiedName (token, refName, nameToken);
1577 		}
1578 
1579 		if (token->type == TOKEN_KEYWORD && token->keyword == KEYWORD_as)
1580 		{
1581 			readToken (token);
1582 			copyToken (nameToken, token, true);
1583 			readToken (token);
1584 		}
1585 
1586 		if (nameToken->type == TOKEN_IDENTIFIER && PhpKinds[K_ALIAS].enabled)
1587 		{
1588 			tagEntryInfo entry;
1589 
1590 			initPhpEntry (&entry, nameToken, K_ALIAS, ACCESS_UNDEFINED);
1591 
1592 			entry.extensionFields.typeRef[0] = refType;
1593 			entry.extensionFields.typeRef[1] = vStringValue (refName);
1594 
1595 			makePhpTagEntry (&entry);
1596 		}
1597 
1598 		vStringTruncate (refName, refNamePrefixLength);
1599 
1600 		readNext = true;
1601 	}
1602 	while (token->type == TOKEN_COMMA);
1603 
1604 	if (grouped && token->type == TOKEN_CLOSE_CURLY)
1605 		readToken (token);
1606 
1607 	vStringDelete (refName);
1608 	deleteToken (nameToken);
1609 
1610 	return (token->type == TOKEN_SEMICOLON);
1611 }
1612 
1613 /* parses declarations of the form
1614  * 	$var = VALUE
1615  * 	$var; */
parseVariable(tokenInfo * const token,vString * typeName)1616 static bool parseVariable (tokenInfo *const token, vString * typeName)
1617 {
1618 	tokenInfo *name;
1619 	bool readNext = true;
1620 	accessType access = CurrentStatement.access;
1621 
1622 	name = newToken ();
1623 	copyToken (name, token, true);
1624 
1625 	readToken (token);
1626 	if (token->type == TOKEN_EQUAL_SIGN)
1627 	{
1628 		phpKind kind = K_VARIABLE;
1629 
1630 		if (token->parentKind == K_FUNCTION)
1631 			kind = K_LOCAL_VARIABLE;
1632 
1633 		readToken (token);
1634 		if (token->type == TOKEN_KEYWORD &&
1635 			token->keyword == KEYWORD_function &&
1636 			PhpKinds[kind].enabled)
1637 		{
1638 			if (parseFunction (token, name))
1639 				readToken (token);
1640 			readNext = (bool) (token->type == TOKEN_SEMICOLON);
1641 		}
1642 		else
1643 		{
1644 			makeSimplePhpTag (name, kind, access);
1645 			readNext = false;
1646 		}
1647 	}
1648 	else if (token->type == TOKEN_SEMICOLON)
1649 	{
1650 		/* generate tags for variable declarations in classes
1651 		 * 	class Foo {
1652 		 * 		protected $foo;
1653 		 * 	}
1654 		 * but don't get fooled by stuff like $foo = $bar; */
1655 		if (token->parentKind == K_CLASS ||
1656 		    token->parentKind == K_INTERFACE ||
1657 		    token->parentKind == K_TRAIT)
1658 			makeTypedPhpTag (name, K_VARIABLE, access, typeName);
1659 	}
1660 	else
1661 		readNext = false;
1662 
1663 	deleteToken (name);
1664 
1665 	return readNext;
1666 }
1667 
1668 /* parses namespace declarations
1669  * 	namespace Foo {}
1670  * 	namespace Foo\Bar {}
1671  * 	namespace Foo;
1672  * 	namespace Foo\Bar;
1673  * 	namespace;
1674  * 	namespace {} */
parseNamespace(tokenInfo * const token)1675 static bool parseNamespace (tokenInfo *const token)
1676 {
1677 	tokenInfo *nsToken = newToken ();
1678 
1679 	vStringClear (CurrentNamesapce);
1680 	copyToken (nsToken, token, false);
1681 
1682 	do
1683 	{
1684 		readToken (token);
1685 		if (token->type == TOKEN_IDENTIFIER)
1686 		{
1687 			if (vStringLength (CurrentNamesapce) > 0)
1688 			{
1689 				const char *sep;
1690 
1691 				sep = phpScopeSeparatorFor(K_NAMESPACE,
1692 							   K_NAMESPACE);
1693 				vStringCatS (CurrentNamesapce, sep);
1694 			}
1695 			vStringCat (CurrentNamesapce, token->string);
1696 		}
1697 	}
1698 	while (token->type != TOKEN_EOF &&
1699 		   token->type != TOKEN_SEMICOLON &&
1700 		   token->type != TOKEN_OPEN_CURLY);
1701 
1702 	if (vStringLength (CurrentNamesapce) > 0)
1703 		makeNamespacePhpTag (nsToken, CurrentNamesapce);
1704 
1705 	if (token->type == TOKEN_OPEN_CURLY)
1706 		enterScope (token, NULL, -1);
1707 
1708 	deleteToken (nsToken);
1709 
1710 	return true;
1711 }
1712 
enterScope(tokenInfo * const parentToken,const vString * const extraScope,const int parentKind)1713 static void enterScope (tokenInfo *const parentToken,
1714 						const vString *const extraScope,
1715 						const int parentKind)
1716 {
1717 	tokenInfo *token = newToken ();
1718 	vString *typeName = vStringNew ();
1719 	int origParentKind = parentToken->parentKind;
1720 
1721 	copyToken (token, parentToken, true);
1722 
1723 	if (extraScope)
1724 	{
1725 		token->parentKind = parentKind;
1726 		addToScope (token, extraScope, origParentKind);
1727 	}
1728 
1729 	readToken (token);
1730 	while (token->type != TOKEN_EOF &&
1731 		   token->type != TOKEN_CLOSE_CURLY)
1732 	{
1733 		bool readNext = true;
1734 
1735 		switch (token->type)
1736 		{
1737 			case TOKEN_OPEN_CURLY:
1738 				enterScope (token, NULL, -1);
1739 				break;
1740 
1741 			case TOKEN_KEYWORD:
1742 				switch (token->keyword)
1743 				{
1744 					/* handle anonymous classes */
1745 					case KEYWORD_new:
1746 						readToken (token);
1747 						if (token->keyword != KEYWORD_class)
1748 							readNext = false;
1749 						else
1750 						{
1751 							tokenInfo *name = newToken ();
1752 
1753 							copyToken (name, token, true);
1754 							anonGenerate (name->string, "AnonymousClass", K_CLASS);
1755 							name->anonymous = true;
1756 							readNext = parseClassOrIface (token, K_CLASS, name);
1757 							deleteToken (name);
1758 						}
1759 						break;
1760 
1761 					case KEYWORD_class:		readNext = parseClassOrIface (token, K_CLASS, NULL);		break;
1762 					case KEYWORD_interface:	readNext = parseClassOrIface (token, K_INTERFACE, NULL);	break;
1763 					case KEYWORD_trait:		readNext = parseTrait (token);								break;
1764 					case KEYWORD_function:	readNext = parseFunction (token, NULL);						break;
1765 					case KEYWORD_const:		readNext = parseConstant (token);							break;
1766 					case KEYWORD_define:	readNext = parseDefine (token);								break;
1767 
1768 					case KEYWORD_use:
1769 						/* aliases are only allowed at root scope, but the keyword
1770 						 * is also used to i.e. "import" traits into a class */
1771 						if (vStringLength (token->scope) == 0)
1772 							readNext = parseUse (token);
1773 						break;
1774 
1775 					case KEYWORD_namespace:	readNext = parseNamespace (token);	break;
1776 
1777 					case KEYWORD_private:	CurrentStatement.access = ACCESS_PRIVATE;	break;
1778 					case KEYWORD_protected:	CurrentStatement.access = ACCESS_PROTECTED;	break;
1779 					case KEYWORD_public:	CurrentStatement.access = ACCESS_PUBLIC;	break;
1780 					case KEYWORD_var:		CurrentStatement.access = ACCESS_PUBLIC;	break;
1781 
1782 					case KEYWORD_abstract:	CurrentStatement.impl = IMPL_ABSTRACT;		break;
1783 
1784 					default: break;
1785 				}
1786 				break;
1787 
1788 			case TOKEN_QMARK:
1789 				vStringClear (typeName);
1790 				vStringPut (typeName, '?');
1791 				readNext = true;
1792 				break;
1793 			case TOKEN_IDENTIFIER:
1794 				vStringCat (typeName, token->string);
1795 				readNext = true;
1796 				break;
1797 			case TOKEN_VARIABLE:
1798 				readNext = parseVariable (token,
1799 										  vStringIsEmpty(typeName)
1800 										  ? NULL
1801 										  : typeName);
1802 				vStringClear (typeName);
1803 				break;
1804 
1805 			default: break;
1806 		}
1807 
1808 		if (readNext)
1809 			readToken (token);
1810 	}
1811 
1812 	copyToken (parentToken, token, false);
1813 	parentToken->parentKind = origParentKind;
1814 	vStringDelete (typeName);
1815 	deleteToken (token);
1816 }
1817 
findTags(bool startsInPhpMode)1818 static void findTags (bool startsInPhpMode)
1819 {
1820 	tokenInfo *const token = newToken ();
1821 
1822 	InPhp = startsInPhpMode;
1823 	MayBeKeyword = true;
1824 	CurrentStatement.access = ACCESS_UNDEFINED;
1825 	CurrentStatement.impl = IMPL_UNDEFINED;
1826 	CurrentNamesapce = vStringNew ();
1827 	FullScope = vStringNew ();
1828 	Assert (ParentClass == NULL);
1829 
1830 	do
1831 	{
1832 		enterScope (token, NULL, -1);
1833 	}
1834 	while (token->type != TOKEN_EOF); /* keep going even with unmatched braces */
1835 
1836 	vStringDelete (FullScope);
1837 	vStringDelete (CurrentNamesapce);
1838 	deleteToken (token);
1839 }
1840 
findPhpTags(void)1841 static void findPhpTags (void)
1842 {
1843 	findTags (false);
1844 }
1845 
findZephirTags(void)1846 static void findZephirTags (void)
1847 {
1848 	findTags (true);
1849 }
1850 
initializePool(void)1851 static void initializePool (void)
1852 {
1853 	if (TokenPool == NULL)
1854 		TokenPool = objPoolNew (16, newPoolToken, deletePoolToken, clearPoolToken, NULL);
1855 }
1856 
initializePhpParser(const langType language)1857 static void initializePhpParser (const langType language)
1858 {
1859 	Lang_php = language;
1860 	initializePool ();
1861 }
1862 
initializeZephirParser(const langType language)1863 static void initializeZephirParser (const langType language)
1864 {
1865 	Lang_zephir = language;
1866 	initializePool ();
1867 }
1868 
finalize(langType language CTAGS_ATTR_UNUSED,bool initialized)1869 static void finalize (langType language CTAGS_ATTR_UNUSED, bool initialized)
1870 {
1871 	if (!initialized)
1872 		return;
1873 
1874 	if (TokenPool != NULL)
1875 	{
1876 		objPoolDelete (TokenPool);
1877 		TokenPool = NULL;
1878 	}
1879 }
1880 
PhpParser(void)1881 extern parserDefinition* PhpParser (void)
1882 {
1883 	static const char *const extensions [] = { "php", "php3", "php4", "php5", "php7", "phtml", NULL };
1884 	parserDefinition* def = parserNew ("PHP");
1885 	def->kindTable      = PhpKinds;
1886 	def->kindCount  = ARRAY_SIZE (PhpKinds);
1887 	def->extensions = extensions;
1888 	def->parser     = findPhpTags;
1889 	def->initialize = initializePhpParser;
1890 	def->finalize   = finalize;
1891 	def->keywordTable = PhpKeywordTable;
1892 	def->keywordCount = ARRAY_SIZE (PhpKeywordTable);
1893 	return def;
1894 }
1895 
ZephirParser(void)1896 extern parserDefinition* ZephirParser (void)
1897 {
1898 	static const char *const extensions [] = { "zep", NULL };
1899 	parserDefinition* def = parserNew ("Zephir");
1900 	def->kindTable      = PhpKinds;
1901 	def->kindCount  = ARRAY_SIZE (PhpKinds);
1902 	def->extensions = extensions;
1903 	def->parser     = findZephirTags;
1904 	def->initialize = initializeZephirParser;
1905 	def->finalize   = finalize;
1906 	def->keywordTable = PhpKeywordTable;
1907 	def->keywordCount = ARRAY_SIZE (PhpKeywordTable);
1908 	return def;
1909 }
1910