xref: /Universal-ctags/parsers/jscript.c (revision 6f5dd0b16afc44f8acb7f7c0561de3aaa16909cc)
1 /*
2  *	 Copyright (c) 2003, Darren Hiebert
3  *
4  *	 This source code is released for free distribution under the terms of the
5  *	 GNU General Public License version 2 or (at your option) any later version.
6  *
7  *	 This module contains functions for generating tags for JavaScript language
8  *	 files.
9  *
10  *	 Reference: http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
11  *
12  *	 This is a good reference for different forms of the function statement:
13  *		 http://www.permadi.com/tutorial/jsFunc/
14  *   Another good reference:
15  *       http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
16  */
17 
18 /*
19  *	 INCLUDE FILES
20  */
21 #include "general.h"	/* must always come first */
22 #include <ctype.h>	/* to define isalpha () */
23 #ifdef DEBUG
24 #include <stdio.h>
25 #endif
26 
27 #ifdef HAVE_ICONV
28 #include <iconv.h>
29 #include <errno.h>
30 #	ifdef WORDS_BIGENDIAN
31 #		define INTERNAL_ENCODING "UTF-32BE"
32 #	else
33 #		define INTERNAL_ENCODING "UTF-32LE"
34 #	endif /* WORDS_BIGENDIAN */
35 #endif
36 
37 #include <string.h>
38 #include "debug.h"
39 #include "entry.h"
40 #include "keyword.h"
41 #include "parse.h"
42 #include "read.h"
43 #include "routines.h"
44 #include "vstring.h"
45 #include "objpool.h"
46 #include "options.h"
47 #include "mbcs.h"
48 #include "trace.h"
49 #include "strlist.h"
50 
51 /*
52  *	 MACROS
53  */
54 #define isType(token,t)		(bool) ((token)->type == (t))
55 #define isKeyword(token,k)	(bool) ((token)->keyword == (k))
56 #define isIdentChar(c) \
57 	(isalpha (c) || isdigit (c) || (c) == '$' || \
58 		(c) == '@' || (c) == '_' || (c) == '#' || \
59 		(c) >= 0x80)
60 #define newToken() (objPoolGet (TokenPool))
61 #define deleteToken(t) (objPoolPut (TokenPool, (t)))
62 
63 /*
64  *	 DATA DECLARATIONS
65  */
66 
67 /*
68  * Tracks class and function names already created
69  */
70 static stringList *ClassNames;
71 static stringList *FunctionNames;
72 
73 /*	Used to specify type of keyword.
74 */
75 enum eKeywordId {
76 	KEYWORD_function,
77 	KEYWORD_capital_function,
78 	KEYWORD_capital_object,
79 	KEYWORD_prototype,
80 	KEYWORD_var,
81 	KEYWORD_let,
82 	KEYWORD_const,
83 	KEYWORD_new,
84 	KEYWORD_this,
85 	KEYWORD_for,
86 	KEYWORD_while,
87 	KEYWORD_do,
88 	KEYWORD_if,
89 	KEYWORD_else,
90 	KEYWORD_switch,
91 	KEYWORD_try,
92 	KEYWORD_catch,
93 	KEYWORD_finally,
94 	KEYWORD_sap,
95 	KEYWORD_return,
96 	KEYWORD_class,
97 	KEYWORD_extends,
98 	KEYWORD_static,
99 	KEYWORD_default,
100 	KEYWORD_export,
101 	KEYWORD_async,
102 	KEYWORD_get,
103 	KEYWORD_set,
104 };
105 typedef int keywordId; /* to allow KEYWORD_NONE */
106 
107 typedef enum eTokenType {
108 	TOKEN_UNDEFINED,
109 	TOKEN_EOF,
110 	TOKEN_CHARACTER,
111 	TOKEN_CLOSE_PAREN,
112 	TOKEN_SEMICOLON,
113 	TOKEN_COLON,
114 	TOKEN_COMMA,
115 	TOKEN_KEYWORD,
116 	TOKEN_OPEN_PAREN,
117 	TOKEN_IDENTIFIER,
118 	TOKEN_STRING,
119 	TOKEN_TEMPLATE_STRING,
120 	TOKEN_PERIOD,
121 	TOKEN_OPEN_CURLY,
122 	TOKEN_CLOSE_CURLY,
123 	TOKEN_EQUAL_SIGN,
124 	TOKEN_OPEN_SQUARE,
125 	TOKEN_CLOSE_SQUARE,
126 	TOKEN_REGEXP,
127 	TOKEN_POSTFIX_OPERATOR,
128 	TOKEN_STAR,
129 	/* To handle Babel's decorators.
130 	 * Used only in readTokenFull or lower functions. */
131 	TOKEN_ATMARK,
132 	TOKEN_BINARY_OPERATOR,
133 	TOKEN_ARROW
134 } tokenType;
135 
136 typedef struct sTokenInfo {
137 	tokenType		type;
138 	keywordId		keyword;
139 	vString *		string;
140 	vString *		scope;
141 	unsigned long 	lineNumber;
142 	MIOPos 			filePosition;
143 	int				nestLevel;
144 	bool			dynamicProp;
145 } tokenInfo;
146 
147 /*
148  *	DATA DEFINITIONS
149  */
150 
151 static tokenType LastTokenType;
152 static tokenInfo *NextToken;
153 
154 static langType Lang_js;
155 
156 static objPool *TokenPool = NULL;
157 
158 #ifdef HAVE_ICONV
159 static iconv_t JSUnicodeConverter = (iconv_t) -2;
160 #endif
161 
162 typedef enum {
163 	JSTAG_FUNCTION,
164 	JSTAG_CLASS,
165 	JSTAG_METHOD,
166 	JSTAG_PROPERTY,
167 	JSTAG_CONSTANT,
168 	JSTAG_VARIABLE,
169 	JSTAG_GENERATOR,
170 	JSTAG_GETTER,
171 	JSTAG_SETTER,
172 	JSTAG_FIELD,
173 	JSTAG_COUNT
174 } jsKind;
175 
176 static kindDefinition JsKinds [] = {
177 	{ true,  'f', "function",	  "functions"		   },
178 	{ true,  'c', "class",		  "classes"			   },
179 	{ true,  'm', "method",		  "methods"			   },
180 	{ true,  'p', "property",	  "properties"		   },
181 	{ true,  'C', "constant",	  "constants"		   },
182 	{ true,  'v', "variable",	  "global variables"   },
183 	{ true,  'g', "generator",	  "generators"		   },
184 	{ true,  'G', "getter",		  "getters"			   },
185 	{ true,  'S', "setter",		  "setters"			   },
186 	{ true,  'M', "field",		  "fields"			   },
187 };
188 
189 static const keywordTable JsKeywordTable [] = {
190 	/* keyword		keyword ID */
191 	{ "function",	KEYWORD_function			},
192 	{ "Function",	KEYWORD_capital_function	},
193 	{ "Object",		KEYWORD_capital_object		},
194 	{ "prototype",	KEYWORD_prototype			},
195 	{ "var",		KEYWORD_var					},
196 	{ "let",		KEYWORD_let					},
197 	{ "const",		KEYWORD_const				},
198 	{ "new",		KEYWORD_new					},
199 	{ "this",		KEYWORD_this				},
200 	{ "for",		KEYWORD_for					},
201 	{ "while",		KEYWORD_while				},
202 	{ "do",			KEYWORD_do					},
203 	{ "if",			KEYWORD_if					},
204 	{ "else",		KEYWORD_else				},
205 	{ "switch",		KEYWORD_switch				},
206 	{ "try",		KEYWORD_try					},
207 	{ "catch",		KEYWORD_catch				},
208 	{ "finally",	KEYWORD_finally				},
209 	{ "sap",	    KEYWORD_sap    				},
210 	{ "return",		KEYWORD_return				},
211 	{ "class",		KEYWORD_class				},
212 	{ "extends",	KEYWORD_extends				},
213 	{ "static",		KEYWORD_static				},
214 	{ "default",	KEYWORD_default				},
215 	{ "export",		KEYWORD_export				},
216 	{ "async",		KEYWORD_async				},
217 	{ "get",		KEYWORD_get					},
218 	{ "set",		KEYWORD_set					},
219 };
220 
221 /*
222  *	 FUNCTION DEFINITIONS
223  */
224 
225 /* Recursive functions */
226 static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr);
227 static void skipArgumentList (tokenInfo *const token, bool include_newlines, vString *const repr);
228 static void parseFunction (tokenInfo *const token);
229 static bool parseBlock (tokenInfo *const token, const vString *const parentScope);
230 static bool parseMethods (tokenInfo *const token, const tokenInfo *const class, const bool is_es6_class);
231 static bool parseLine (tokenInfo *const token, bool is_inside_class);
232 static void parseUI5 (tokenInfo *const token);
233 
234 #ifdef DO_TRACING
235 static const char *tokenTypeName(enum eTokenType e);
236 // #define DO_TRACING_USE_DUMP_TOKEN
237 #ifdef DO_TRACING_USE_DUMP_TOKEN
238 static void dumpToken (const tokenInfo *const token);
239 static const char *keywordName(enum eKeywordId e);
240 #endif
241 #endif
242 
newPoolToken(void * createArg CTAGS_ATTR_UNUSED)243 static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED)
244 {
245 	tokenInfo *token = xMalloc (1, tokenInfo);
246 
247 	token->string		= vStringNew ();
248 	token->scope		= vStringNew ();
249 
250 	return token;
251 }
252 
clearPoolToken(void * data)253 static void clearPoolToken (void *data)
254 {
255 	tokenInfo *token = data;
256 
257 	token->type			= TOKEN_UNDEFINED;
258 	token->keyword		= KEYWORD_NONE;
259 	token->nestLevel	= 0;
260 	token->dynamicProp  = false;
261 	token->lineNumber   = getInputLineNumber ();
262 	token->filePosition = getInputFilePosition ();
263 	vStringClear (token->string);
264 	vStringClear (token->scope);
265 }
266 
deletePoolToken(void * data)267 static void deletePoolToken (void *data)
268 {
269 	tokenInfo *token = data;
270 	vStringDelete (token->string);
271 	vStringDelete (token->scope);
272 	eFree (token);
273 }
274 
copyToken(tokenInfo * const dest,const tokenInfo * const src,bool const include_non_read_info)275 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
276                        bool const include_non_read_info)
277 {
278 	dest->lineNumber = src->lineNumber;
279 	dest->filePosition = src->filePosition;
280 	dest->type = src->type;
281 	dest->keyword = src->keyword;
282 	dest->dynamicProp = src->dynamicProp;
283 	vStringCopy(dest->string, src->string);
284 	if (include_non_read_info)
285 	{
286 		dest->nestLevel = src->nestLevel;
287 		vStringCopy(dest->scope, src->scope);
288 	}
289 }
290 
injectDynamicName(tokenInfo * const token,vString * newName)291 static void injectDynamicName (tokenInfo *const token, vString *newName)
292 {
293 	token->dynamicProp = true;
294 	vStringDelete (token->string);
295 	token->string = newName;
296 }
297 
298 /*
299  *	 Tag generation functions
300  */
301 
makeJsTagCommon(const tokenInfo * const token,const jsKind kind,vString * const signature,vString * const inheritance,bool anonymous)302 static void makeJsTagCommon (const tokenInfo *const token, const jsKind kind,
303 							 vString *const signature, vString *const inheritance,
304 							 bool anonymous)
305 {
306 	if (JsKinds [kind].enabled )
307 	{
308 		const char *name = vStringValue (token->string);
309 		vString *fullscope = vStringNewCopy (token->scope);
310 		const char *p;
311 		tagEntryInfo e;
312 
313 		if (!token->dynamicProp && kind != JSTAG_PROPERTY &&  (p = strrchr (name, '.')) != NULL )
314 		{
315 			if (vStringLength (fullscope) > 0)
316 				vStringPut (fullscope, '.');
317 			vStringNCatS (fullscope, name, (size_t) (p - name));
318 			name = p + 1;
319 		}
320 
321 		initTagEntry (&e, name, kind);
322 
323 		TRACE_PRINT("Emitting tag for symbol '%s' of kind %02x with scope '%s'",name,kind,vStringValue(fullscope));
324 
325 		e.lineNumber   = token->lineNumber;
326 		e.filePosition = token->filePosition;
327 
328 		if ( vStringLength(fullscope) > 0 )
329 		{
330 			/* FIXME: proper parent type */
331 			jsKind parent_kind = JSTAG_CLASS;
332 
333 			/*
334 			 * If we're creating a function (and not a method),
335 			 * guess we're inside another function
336 			 */
337 			if (kind == JSTAG_FUNCTION)
338 				parent_kind = JSTAG_FUNCTION;
339 
340 			e.extensionFields.scopeKindIndex = parent_kind;
341 			e.extensionFields.scopeName = vStringValue (fullscope);
342 		}
343 
344 		if (signature && vStringLength(signature))
345 		{
346 			size_t i;
347 			/* sanitize signature by replacing all control characters with a
348 			 * space (because it's simple).
349 			 * there should never be any junk in a valid signature, but who
350 			 * knows what the user wrote and CTags doesn't cope well with weird
351 			 * characters. */
352 			for (i = 0; i < signature->length; i++)
353 			{
354 				unsigned char c = (unsigned char) vStringChar (signature, i);
355 				if (c < 0x20 /* below space */ || c == 0x7F /* DEL */)
356 					vStringChar (signature, i) = ' ';
357 			}
358 			e.extensionFields.signature = vStringValue(signature);
359 		}
360 
361 		if (inheritance)
362 			e.extensionFields.inheritance = vStringValue(inheritance);
363 
364 		if (anonymous)
365 			markTagExtraBit (&e, XTAG_ANONYMOUS);
366 
367 		makeTagEntry (&e);
368 		vStringDelete (fullscope);
369 	}
370 }
371 
makeJsTag(const tokenInfo * const token,const jsKind kind,vString * const signature,vString * const inheritance)372 static void makeJsTag (const tokenInfo *const token, const jsKind kind,
373 					   vString *const signature, vString *const inheritance)
374 {
375 	makeJsTagCommon (token, kind, signature, inheritance, false);
376 }
377 
makeClassTagCommon(tokenInfo * const token,vString * const signature,vString * const inheritance,bool anonymous)378 static void makeClassTagCommon (tokenInfo *const token, vString *const signature,
379                           vString *const inheritance, bool anonymous)
380 {
381 	vString *	fulltag = vStringNew ();
382 	if (vStringLength (token->scope) > 0)
383 	{
384 		vStringCopy(fulltag, token->scope);
385 		vStringPut (fulltag, '.');
386 		vStringCat (fulltag, token->string);
387 	}
388 	else
389 	{
390 		vStringCopy(fulltag, token->string);
391 	}
392 	if ( ! stringListHas(ClassNames, vStringValue (fulltag)) )
393 	{
394 		stringListAdd (ClassNames, vStringNewCopy (fulltag));
395 		makeJsTagCommon (token, JSTAG_CLASS, signature, inheritance,
396 						 anonymous);
397 	}
398 	vStringDelete (fulltag);
399 }
400 
makeClassTag(tokenInfo * const token,vString * const signature,vString * const inheritance)401 static void makeClassTag (tokenInfo *const token, vString *const signature,
402 						  vString *const inheritance)
403 {
404 	makeClassTagCommon (token, signature, inheritance, false);
405 }
406 
makeFunctionTagCommon(tokenInfo * const token,vString * const signature,bool generator,bool anonymous)407 static void makeFunctionTagCommon (tokenInfo *const token, vString *const signature, bool generator,
408 								   bool anonymous)
409 {
410 	vString *	fulltag = vStringNew ();
411 	if (vStringLength (token->scope) > 0)
412 	{
413 		vStringCopy(fulltag, token->scope);
414 		vStringPut (fulltag, '.');
415 		vStringCat (fulltag, token->string);
416 	}
417 	else
418 	{
419 		vStringCopy(fulltag, token->string);
420 	}
421 	if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) )
422 	{
423 		stringListAdd (FunctionNames, vStringNewCopy (fulltag));
424 		makeJsTagCommon (token, generator ? JSTAG_GENERATOR : JSTAG_FUNCTION, signature, NULL,
425 						 anonymous);
426 	}
427 	vStringDelete (fulltag);
428 }
429 
makeFunctionTag(tokenInfo * const token,vString * const signature,bool generator)430 static void makeFunctionTag (tokenInfo *const token, vString *const signature, bool generator)
431 {
432 	makeFunctionTagCommon (token, signature, generator, false);
433 }
434 
435 /*
436  *	 Parsing functions
437  */
438 
439 /* given @p point, returns the first byte of the encoded output sequence, and
440  * make sure the next ones will be returned by calls to getcFromInputFile()
441  * as if the code point was simply written in the input file. */
handleUnicodeCodePoint(uint32_t point)442 static int handleUnicodeCodePoint (uint32_t point)
443 {
444 	int c = (int) point;
445 
446 	Assert (point < 0x110000);
447 
448 #ifdef HAVE_ICONV
449 	/* if we do have iconv and the encodings are specified, use this */
450 	if (isConverting () && JSUnicodeConverter == (iconv_t) -2)
451 	{
452 		/* if we didn't try creating the converter yet, try and do so */
453 		JSUnicodeConverter = iconv_open (getLanguageEncoding (Lang_js), INTERNAL_ENCODING);
454 	}
455 	if (isConverting () && JSUnicodeConverter != (iconv_t) -1)
456 	{
457 		char *input_ptr = (char *) &point;
458 		size_t input_left = sizeof point;
459 		/* 4 bytes should be enough for any encoding (it's how much UTF-32
460 		 * would need). */
461 		/* FIXME: actually iconv has a tendency to output a BOM for Unicode
462 		 * encodings where it matters when the endianness is not specified in
463 		 * the target encoding name.  E.g., if the target encoding is "UTF-32"
464 		 * or "UTF-16" it will output 2 code points, the BOM (U+FEFF) and the
465 		 * one we expect. This does not happen if the endianness is specified
466 		 * explicitly, e.g. with "UTF-32LE", or "UTF-16BE".
467 		 * However, it's not very relevant for the moment as nothing in CTags
468 		 * cope well (if at all) with non-ASCII-compatible encodings like
469 		 * UTF-32 or UTF-16 anyway. */
470 		char output[4] = { 0 };
471 		char *output_ptr = output;
472 		size_t output_left = ARRAY_SIZE (output);
473 
474 		if (iconv (JSUnicodeConverter, &input_ptr, &input_left, &output_ptr, &output_left) == (size_t) -1)
475 		{
476 			/* something went wrong, which probably means the output encoding
477 			 * cannot represent the character.  Use a placeholder likely to be
478 			 * supported instead, that's also valid in an identifier */
479 			verbose ("JavaScript: Encoding: %s\n", strerror (errno));
480 			c = '_';
481 		}
482 		else
483 		{
484 			const size_t output_len = ARRAY_SIZE (output) - output_left;
485 
486 			/* put all but the first byte back so that getcFromInputFile() will
487 			 * return them in the right order */
488 			for (unsigned int i = 1; i < output_len; i++)
489 				ungetcToInputFile ((unsigned char) output[output_len - i]);
490 			c = (unsigned char) output[0];
491 		}
492 
493 		iconv (JSUnicodeConverter, NULL, NULL, NULL, NULL);
494 	}
495 	else
496 #endif
497 	{
498 		/* when no encoding is specified (or no iconv), assume UTF-8 is good.
499 		 * Why UTF-8?  Because it's an ASCII-compatible common Unicode encoding. */
500 		if (point < 0x80)
501 			c = (unsigned char) point;
502 		else if (point < 0x800)
503 		{
504 			c = (unsigned char) (0xc0 | ((point >> 6) & 0x1f));
505 			ungetcToInputFile ((unsigned char) (0x80 | (point & 0x3f)));
506 		}
507 		else if (point < 0x10000)
508 		{
509 			c = (unsigned char) (0xe0 | ((point >> 12) & 0x0f));
510 			ungetcToInputFile ((unsigned char) (0x80 | ((point >>  0) & 0x3f)));
511 			ungetcToInputFile ((unsigned char) (0x80 | ((point >>  6) & 0x3f)));
512 		}
513 		else if (point < 0x110000)
514 		{
515 			c = (unsigned char) (0xf0 | ((point >> 18) & 0x07));
516 			ungetcToInputFile ((unsigned char) (0x80 | ((point >>  0) & 0x3f)));
517 			ungetcToInputFile ((unsigned char) (0x80 | ((point >>  6) & 0x3f)));
518 			ungetcToInputFile ((unsigned char) (0x80 | ((point >> 12) & 0x3f)));
519 		}
520 	}
521 
522 	return c;
523 }
524 
525 /* reads a Unicode escape sequence after the "\" prefix.
526  * @param value Location to store the escape sequence value.
527  * @param isUTF16 Location to store whether @param value is an UTF-16 word.
528  * @returns Whether a valid sequence was read. */
readUnicodeEscapeSequenceValue(uint32_t * const value,bool * const isUTF16)529 static bool readUnicodeEscapeSequenceValue (uint32_t *const value,
530                                             bool *const isUTF16)
531 {
532 	bool valid = false;
533 	int d = getcFromInputFile ();
534 
535 	if (d != 'u')
536 		ungetcToInputFile (d);
537 	else
538 	{
539 		int e = getcFromInputFile ();
540 		char cp[6 + 1]; /* up to 6 hex + possible closing '}' or invalid char */
541 		unsigned int cp_len = 0;
542 
543 		*isUTF16 = (e != '{');
544 		if (e == '{')
545 		{	/* Handles Unicode code point escapes: \u{ HexDigits }
546 			 * We skip the leading 0s because there can be any number of them
547 			 * and they don't change any meaning. */
548 			bool has_leading_zero = false;
549 
550 			while ((cp[cp_len] = (char) getcFromInputFile ()) == '0')
551 				has_leading_zero = true;
552 
553 			while (isxdigit (cp[cp_len]) && ++cp_len < ARRAY_SIZE (cp))
554 				cp[cp_len] = (char) getcFromInputFile ();
555 			valid = ((cp_len > 0 || has_leading_zero) &&
556 					 cp_len < ARRAY_SIZE (cp) && cp[cp_len] == '}' &&
557 					 /* also check if it's a valid Unicode code point */
558 					 (cp_len < 6 ||
559 					  (cp_len == 6 && strncmp (cp, "110000", 6) < 0)));
560 			if (! valid) /* put back the last (likely invalid) character */
561 				ungetcToInputFile (cp[cp_len]);
562 		}
563 		else
564 		{	/* Handles Unicode escape sequences: \u Hex4Digits */
565 			do
566 				cp[cp_len] = (char) ((cp_len == 0) ? e : getcFromInputFile ());
567 			while (isxdigit (cp[cp_len]) && ++cp_len < 4);
568 			valid = (cp_len == 4);
569 		}
570 
571 		if (! valid)
572 		{
573 			/* we don't get every character back, but it would require to
574 			 * be able to put up to 9 characters back (in the worst case
575 			 * for handling invalid \u{10FFFFx}), and here we're recovering
576 			 * from invalid syntax anyway. */
577 			ungetcToInputFile (e);
578 			ungetcToInputFile (d);
579 		}
580 		else
581 		{
582 			*value = 0;
583 			for (unsigned int i = 0; i < cp_len; i++)
584 			{
585 				*value *= 16;
586 
587 				/* we know it's a hex digit, no need to double check */
588 				if (cp[i] < 'A')
589 					*value += (unsigned int) cp[i] - '0';
590 				else if (cp[i] < 'a')
591 					*value += 10 + (unsigned int) cp[i] - 'A';
592 				else
593 					*value += 10 + (unsigned int) cp[i] - 'a';
594 			}
595 		}
596 	}
597 
598 	return valid;
599 }
600 
valueToXDigit(unsigned char v)601 static int valueToXDigit (unsigned char v)
602 {
603 	Assert (v <= 0xF);
604 
605 	if (v >= 0xA)
606 		return 'A' + (v - 0xA);
607 	else
608 		return '0' + v;
609 }
610 
611 /* Reads and expands a Unicode escape sequence after the "\" prefix.  If the
612  * escape sequence is a UTF16 high surrogate, also try and read the low
613  * surrogate to emit the proper code point.
614  * @param fallback The character to return if the sequence is invalid. Usually
615  *                 this would be the '\' character starting the sequence.
616  * @returns The first byte of the sequence, or @param fallback if the sequence
617  *          is invalid. On success, next calls to getcFromInputFile() will
618  *          return subsequent bytes (if any). */
readUnicodeEscapeSequence(const int fallback)619 static int readUnicodeEscapeSequence (const int fallback)
620 {
621 	int c;
622 	uint32_t value;
623 	bool isUTF16;
624 
625 	if (! readUnicodeEscapeSequenceValue (&value, &isUTF16))
626 		c = fallback;
627 	else
628 	{
629 		if (isUTF16 && (value & 0xfc00) == 0xd800)
630 		{	/* this is a high surrogate, try and read its low surrogate and
631 			 * emit the resulting code point */
632 			uint32_t low;
633 			int d = getcFromInputFile ();
634 
635 			if (d != '\\' || ! readUnicodeEscapeSequenceValue (&low, &isUTF16))
636 				ungetcToInputFile (d);
637 			else if (! isUTF16)
638 			{	/* not UTF-16 low surrogate but a plain code point */
639 				d = handleUnicodeCodePoint (low);
640 				ungetcToInputFile (d);
641 			}
642 			else if ((low & 0xfc00) != 0xdc00)
643 			{	/* not a low surrogate, so put back the escaped representation
644 				 * in case it was another high surrogate we should read as part
645 				 * of another pair. */
646 				ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0x000f) >>  0)));
647 				ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0x00f0) >>  4)));
648 				ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0x0f00) >>  8)));
649 				ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0xf000) >> 12)));
650 				ungetcToInputFile ('u');
651 				ungetcToInputFile ('\\');
652 			}
653 			else
654 				value = 0x010000 + ((value & 0x03ff) << 10) + (low & 0x03ff);
655 		}
656 		c = handleUnicodeCodePoint (value);
657 	}
658 
659 	return c;
660 }
661 
parseString(vString * const string,const int delimiter)662 static void parseString (vString *const string, const int delimiter)
663 {
664 	bool end = false;
665 	while (! end)
666 	{
667 		int c = getcFromInputFile ();
668 		if (c == EOF)
669 			end = true;
670 		else if (c == '\\')
671 		{
672 			/* Eat the escape sequence (\", \', etc).  We properly handle
673 			 * <LineContinuation> by eating a whole \<CR><LF> not to see <LF>
674 			 * as an unescaped character, which is invalid and handled below.
675 			 * Also, handle the fact that <LineContinuation> produces an empty
676 			 * sequence.
677 			 * See ECMA-262 7.8.4 */
678 			c = getcFromInputFile ();
679 			if (c == 'u')
680 			{
681 				ungetcToInputFile (c);
682 				c = readUnicodeEscapeSequence ('\\');
683 				vStringPut (string, c);
684 			}
685 			else if (c != '\r' && c != '\n')
686 				vStringPut(string, c);
687 			else if (c == '\r')
688 			{
689 				c = getcFromInputFile();
690 				if (c != '\n')
691 					ungetcToInputFile (c);
692 			}
693 		}
694 		else if (c == delimiter)
695 			end = true;
696 		else if (c == '\r' || c == '\n')
697 		{
698 			/* those are invalid when not escaped */
699 			end = true;
700 			/* we don't want to eat the newline itself to let the automatic
701 			 * semicolon insertion code kick in */
702 			ungetcToInputFile (c);
703 		}
704 		else
705 			vStringPut (string, c);
706 	}
707 }
708 
parseRegExp(void)709 static void parseRegExp (void)
710 {
711 	int c;
712 	bool in_range = false;
713 
714 	do
715 	{
716 		c = getcFromInputFile ();
717 		if (! in_range && c == '/')
718 		{
719 			do /* skip flags */
720 			{
721 				c = getcFromInputFile ();
722 			} while (isalpha (c));
723 			ungetcToInputFile (c);
724 			break;
725 		}
726 		else if (c == '\n' || c == '\r')
727 		{
728 			/* invalid in a regex */
729 			ungetcToInputFile (c);
730 			break;
731 		}
732 		else if (c == '\\')
733 			c = getcFromInputFile (); /* skip next character */
734 		else if (c == '[')
735 			in_range = true;
736 		else if (c == ']')
737 			in_range = false;
738 	} while (c != EOF);
739 }
740 
741 /*	Read a C identifier beginning with "firstChar" and places it into
742  *	"name".
743  */
parseIdentifier(vString * const string,const int firstChar)744 static void parseIdentifier (vString *const string, const int firstChar)
745 {
746 	int c = firstChar;
747 	Assert (isIdentChar (c));
748 	do
749 	{
750 		vStringPut (string, c);
751 		c = getcFromInputFile ();
752 		if (c == '\\')
753 			c = readUnicodeEscapeSequence (c);
754 	} while (isIdentChar (c));
755 	/* if readUnicodeEscapeSequence() read an escape sequence this is incorrect,
756 	 * as we should actually put back the whole escape sequence and not the
757 	 * decoded character.  However, it's not really worth the hassle as it can
758 	 * only happen if the input has an invalid escape sequence. */
759 	ungetcToInputFile (c);		/* unget non-identifier character */
760 }
761 
parseTemplateString(vString * const string)762 static void parseTemplateString (vString *const string)
763 {
764 	int c;
765 	do
766 	{
767 		c = getcFromInputFile ();
768 		if (c == '`' || c == EOF)
769 			break;
770 
771 		vStringPut (string, c);
772 
773 		if (c == '\\')
774 		{
775 			c = getcFromInputFile();
776 			if (c != EOF)
777 				vStringPut(string, c);
778 		}
779 		else if (c == '$')
780 		{
781 			c = getcFromInputFile ();
782 			if (c != '{')
783 				ungetcToInputFile (c);
784 			else
785 			{
786 				int depth = 1;
787 				/* we need to use the real token machinery to handle strings,
788 				 * comments, regexes and whatnot */
789 				tokenInfo *token = newToken ();
790 				LastTokenType = TOKEN_UNDEFINED;
791 				vStringPut(string, c);
792 				do
793 				{
794 					readTokenFull (token, false, string);
795 					if (isType (token, TOKEN_OPEN_CURLY))
796 						depth++;
797 					else if (isType (token, TOKEN_CLOSE_CURLY))
798 						depth--;
799 				}
800 				while (! isType (token, TOKEN_EOF) && depth > 0);
801 				deleteToken (token);
802 			}
803 		}
804 	}
805 	while (c != EOF);
806 }
807 
readTokenFullRaw(tokenInfo * const token,bool include_newlines,vString * const repr)808 static void readTokenFullRaw (tokenInfo *const token, bool include_newlines, vString *const repr)
809 {
810 	int c;
811 	int i;
812 	bool newline_encountered = false;
813 
814 	/* if we've got a token held back, emit it */
815 	if (NextToken)
816 	{
817 		copyToken (token, NextToken, false);
818 		deleteToken (NextToken);
819 		NextToken = NULL;
820 		return;
821 	}
822 
823 	token->type			= TOKEN_UNDEFINED;
824 	token->keyword		= KEYWORD_NONE;
825 	vStringClear (token->string);
826 
827 getNextChar:
828 	i = 0;
829 	do
830 	{
831 		c = getcFromInputFile ();
832 		if (include_newlines && (c == '\r' || c == '\n'))
833 			newline_encountered = true;
834 		i++;
835 	}
836 	while (c == '\t' || c == ' ' || c == '\r' || c == '\n');
837 
838 	token->lineNumber   = getInputLineNumber ();
839 	token->filePosition = getInputFilePosition ();
840 
841 	if (repr && c != EOF)
842 	{
843 		if (i > 1)
844 			vStringPut (repr, ' ');
845 		vStringPut (repr, c);
846 	}
847 
848 	switch (c)
849 	{
850 		case EOF: token->type = TOKEN_EOF;					break;
851 		case '(': token->type = TOKEN_OPEN_PAREN;			break;
852 		case ')': token->type = TOKEN_CLOSE_PAREN;			break;
853 		case ';': token->type = TOKEN_SEMICOLON;			break;
854 		case ',': token->type = TOKEN_COMMA;				break;
855 		case '.': token->type = TOKEN_PERIOD;				break;
856 		case ':': token->type = TOKEN_COLON;				break;
857 		case '{': token->type = TOKEN_OPEN_CURLY;			break;
858 		case '}': token->type = TOKEN_CLOSE_CURLY;			break;
859 		case '[': token->type = TOKEN_OPEN_SQUARE;			break;
860 		case ']': token->type = TOKEN_CLOSE_SQUARE;			break;
861 
862 		case '=':
863 			{
864 				int d = getcFromInputFile ();
865 				if (d == '>')
866 					token->type = TOKEN_ARROW;
867 				else
868 				{
869 					ungetcToInputFile (d);
870 					token->type = TOKEN_EQUAL_SIGN;
871 				}
872 				break;
873 			}
874 
875 		case '+':
876 		case '-':
877 			{
878 				int d = getcFromInputFile ();
879 				if (d == c) /* ++ or -- */
880 					token->type = TOKEN_POSTFIX_OPERATOR;
881 				else
882 				{
883 					ungetcToInputFile (d);
884 					token->type = TOKEN_BINARY_OPERATOR;
885 				}
886 				break;
887 			}
888 
889 		case '*':
890 			token->type = TOKEN_STAR;
891 			break;
892 		case '%':
893 		case '?':
894 		case '>':
895 		case '<':
896 		case '^':
897 		case '|':
898 		case '&':
899 			token->type = TOKEN_BINARY_OPERATOR;
900 			break;
901 
902 		case '\'':
903 		case '"':
904 				  token->type = TOKEN_STRING;
905 				  parseString (token->string, c);
906 				  token->lineNumber = getInputLineNumber ();
907 				  token->filePosition = getInputFilePosition ();
908 				  if (repr)
909 				  {
910 					  vStringCat (repr, token->string);
911 					  vStringPut (repr, c);
912 				  }
913 				  break;
914 
915 		case '`':
916 				  token->type = TOKEN_TEMPLATE_STRING;
917 				  parseTemplateString (token->string);
918 				  token->lineNumber = getInputLineNumber ();
919 				  token->filePosition = getInputFilePosition ();
920 				  if (repr)
921 				  {
922 					  vStringCat (repr, token->string);
923 					  vStringPut (repr, c);
924 				  }
925 				  break;
926 
927 		case '/':
928 				  {
929 					  int d = getcFromInputFile ();
930 					  if ( (d != '*') &&		/* is this the start of a comment? */
931 							  (d != '/') )		/* is a one line comment? */
932 					  {
933 						  ungetcToInputFile (d);
934 						  switch (LastTokenType)
935 						  {
936 							  case TOKEN_CHARACTER:
937 							  case TOKEN_IDENTIFIER:
938 							  case TOKEN_STRING:
939 							  case TOKEN_TEMPLATE_STRING:
940 							  case TOKEN_CLOSE_CURLY:
941 							  case TOKEN_CLOSE_PAREN:
942 							  case TOKEN_CLOSE_SQUARE:
943 								  token->type = TOKEN_BINARY_OPERATOR;
944 								  break;
945 
946 							  default:
947 								  token->type = TOKEN_REGEXP;
948 								  parseRegExp ();
949 								  token->lineNumber = getInputLineNumber ();
950 								  token->filePosition = getInputFilePosition ();
951 								  break;
952 						  }
953 					  }
954 					  else
955 					  {
956 						  if (repr) /* remove the / we added */
957 							  vStringChop(repr);
958 						  if (d == '*')
959 						  {
960 							  skipToCharacterInInputFile2('*', '/');
961 							  goto getNextChar;
962 						  }
963 						  else if (d == '/')	/* is this the start of a comment?  */
964 						  {
965 							  skipToCharacterInInputFile ('\n');
966 							  /* if we care about newlines, put it back so it is seen */
967 							  if (include_newlines)
968 								  ungetcToInputFile ('\n');
969 							  goto getNextChar;
970 						  }
971 					  }
972 					  break;
973 				  }
974 
975 		case '#':
976 				  /* skip shebang in case of e.g. Node.js scripts */
977 				  if (token->lineNumber > 1)
978 					  token->type = TOKEN_UNDEFINED;
979 				  else if ((c = getcFromInputFile ()) != '!')
980 				  {
981 					  ungetcToInputFile (c);
982 					  token->type = TOKEN_UNDEFINED;
983 				  }
984 				  else
985 				  {
986 					  skipToCharacterInInputFile ('\n');
987 					  goto getNextChar;
988 				  }
989 				  break;
990 
991 		case '@':
992 				  token->type = TOKEN_ATMARK;
993 				  break;
994 
995 		case '\\':
996 				  c = readUnicodeEscapeSequence (c);
997 				  /* fallthrough */
998 		default:
999 				  if (! isIdentChar (c))
1000 					  token->type = TOKEN_UNDEFINED;
1001 				  else
1002 				  {
1003 					  parseIdentifier (token->string, c);
1004 					  token->lineNumber = getInputLineNumber ();
1005 					  token->filePosition = getInputFilePosition ();
1006 					  token->keyword = lookupKeyword (vStringValue (token->string), Lang_js);
1007 					  if (isKeyword (token, KEYWORD_NONE))
1008 						  token->type = TOKEN_IDENTIFIER;
1009 					  else
1010 						  token->type = TOKEN_KEYWORD;
1011 					  if (repr && vStringLength (token->string) > 1)
1012 						  vStringCatS (repr, vStringValue (token->string) + 1);
1013 				  }
1014 				  break;
1015 	}
1016 
1017 	if (include_newlines && newline_encountered)
1018 	{
1019 		/* This isn't strictly correct per the standard, but following the
1020 		 * real rules means understanding all statements, and that's not
1021 		 * what the parser currently does.  What we do here is a guess, by
1022 		 * avoiding inserting semicolons that would make the statement on
1023 		 * the left or right obviously invalid.  Hopefully this should not
1024 		 * have false negatives (e.g. should not miss insertion of a semicolon)
1025 		 * but might have false positives (e.g. it will wrongfully emit a
1026 		 * semicolon sometimes, i.e. for the newline in "foo\n(bar)").
1027 		 * This should however be mostly harmless as we only deal with
1028 		 * newlines in specific situations where we know a false positive
1029 		 * wouldn't hurt too bad. */
1030 
1031 		/* these already end a statement, so no need to duplicate it */
1032 		#define IS_STMT_SEPARATOR(t) ((t) == TOKEN_SEMICOLON    || \
1033 		                              (t) == TOKEN_EOF          || \
1034 		                              (t) == TOKEN_COMMA        || \
1035 		                              (t) == TOKEN_OPEN_CURLY)
1036 		/* these cannot be the start or end of a statement */
1037 		#define IS_BINARY_OPERATOR(t) ((t) == TOKEN_EQUAL_SIGN      || \
1038 		                               (t) == TOKEN_ARROW           || \
1039 		                               (t) == TOKEN_COLON           || \
1040 		                               (t) == TOKEN_PERIOD          || \
1041 		                               (t) == TOKEN_STAR            || \
1042 		                               (t) == TOKEN_BINARY_OPERATOR)
1043 
1044 		if (! IS_STMT_SEPARATOR(LastTokenType) &&
1045 		    ! IS_STMT_SEPARATOR(token->type) &&
1046 		    ! IS_BINARY_OPERATOR(LastTokenType) &&
1047 		    ! IS_BINARY_OPERATOR(token->type) &&
1048 		    /* these cannot be followed by a semicolon */
1049 		    ! (LastTokenType == TOKEN_OPEN_PAREN ||
1050 		       LastTokenType == TOKEN_OPEN_SQUARE))
1051 		{
1052 			/* hold the token... */
1053 			Assert (NextToken == NULL);
1054 			NextToken = newToken ();
1055 			copyToken (NextToken, token, false);
1056 
1057 			/* ...and emit a semicolon instead */
1058 			token->type		= TOKEN_SEMICOLON;
1059 			token->keyword	= KEYWORD_NONE;
1060 			vStringClear (token->string);
1061 			if (repr)
1062 				vStringPut (token->string, '\n');
1063 		}
1064 
1065 		#undef IS_STMT_SEPARATOR
1066 		#undef IS_BINARY_OPERATOR
1067 	}
1068 
1069 	LastTokenType = token->type;
1070 }
1071 
1072 /* See https://babeljs.io/blog/2018/09/17/decorators */
skipBabelDecorator(tokenInfo * token,bool include_newlines,vString * const repr)1073 static void skipBabelDecorator (tokenInfo *token, bool include_newlines, vString *const repr)
1074 {
1075 	readTokenFullRaw (token, include_newlines, repr);
1076 	if (isType (token, TOKEN_OPEN_PAREN))
1077 	{
1078 		/*  @(complex ? dec1 : dec2) */
1079 		skipArgumentList (token, include_newlines, repr);
1080 		TRACE_PRINT ("found @(...) style decorator");
1081 	}
1082 	else if (isType (token, TOKEN_IDENTIFIER))
1083 	{
1084 		/*  @namespace.foo (...) */
1085 		bool found_period = false;
1086 		while (1)
1087 		{
1088 			readTokenFullRaw (token, include_newlines, repr);
1089 			if (isType (token, TOKEN_IDENTIFIER))
1090 			{
1091 				if (!found_period)
1092 				{
1093 					TRACE_PRINT("found @namespace.bar style decorator");
1094 					break;
1095 				}
1096 				found_period = false;
1097 			}
1098 			else if (isType (token, TOKEN_PERIOD))
1099 				found_period = true;
1100 			else if (isType (token, TOKEN_OPEN_PAREN))
1101 			{
1102 				skipArgumentList (token, include_newlines, repr);
1103 				TRACE_PRINT("found @foo(...) style decorator");
1104 				break;
1105 			}
1106 			else
1107 			{
1108 				TRACE_PRINT("found @foo style decorator");
1109 				break;
1110 			}
1111 		}
1112 	}
1113 	else
1114 		/* Unexpected token after @ */
1115 		TRACE_PRINT("found unexpected token during skipping a decorator");
1116 }
1117 
readTokenFull(tokenInfo * const token,bool include_newlines,vString * const repr)1118 static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr)
1119 {
1120 	readTokenFullRaw (token, include_newlines, repr);
1121 
1122 	while (1)
1123 	{
1124 		if (!isType (token, TOKEN_ATMARK))
1125 			break;
1126 		skipBabelDecorator (token, include_newlines, repr);
1127 		/* @decorator0 @decorator1 ... There can be more than one decorator. */
1128 	}
1129 }
1130 
1131 #ifdef JSCRIPT_DO_DEBUGGING
1132 /* trace readTokenFull() */
readTokenFullDebug(tokenInfo * const token,bool include_newlines,vString * const repr)1133 static void readTokenFullDebug (tokenInfo *const token, bool include_newlines, vString *const repr)
1134 {
1135 	readTokenFull (token, include_newlines, repr);
1136 	TRACE_PRINT("token '%s' of type %02x with scope '%s'",vStringValue(token->string),token->type, vStringValue(token->scope));
1137 }
1138 # define readTokenFull readTokenFullDebug
1139 #endif
1140 
readToken(tokenInfo * const token)1141 static void readToken (tokenInfo *const token)
1142 {
1143 	readTokenFull (token, false, NULL);
1144 }
1145 
1146 /*
1147  *	 Token parsing functions
1148  */
1149 
parseMethodsInAnonymousClass(tokenInfo * const token)1150 static void parseMethodsInAnonymousClass (tokenInfo *const token)
1151 {
1152 	tokenInfo *const anon_class = newToken ();
1153 	copyToken (anon_class, token, true);
1154 	anonGenerate (anon_class->string, "AnonymousClass", JSTAG_CLASS);
1155 	anon_class->type = TOKEN_IDENTIFIER;
1156 
1157 	bool has_methods = parseMethods (token, anon_class, false);
1158 
1159 	if (has_methods)
1160 		makeJsTagCommon (anon_class, JSTAG_CLASS, NULL, NULL, true);
1161 
1162 	deleteToken (anon_class);
1163 }
1164 
skipArgumentList(tokenInfo * const token,bool include_newlines,vString * const repr)1165 static void skipArgumentList (tokenInfo *const token, bool include_newlines, vString *const repr)
1166 {
1167 	if (isType (token, TOKEN_OPEN_PAREN))	/* arguments? */
1168 	{
1169 		int nest_level = 1;
1170 		if (repr)
1171 			vStringPut (repr, '(');
1172 
1173 		tokenType prev_token_type = token->type;
1174 		while (nest_level > 0 && ! isType (token, TOKEN_EOF))
1175 		{
1176 			readTokenFull (token, false, repr);
1177 			if (isType (token, TOKEN_OPEN_PAREN))
1178 				nest_level++;
1179 			else if (isType (token, TOKEN_CLOSE_PAREN))
1180 				nest_level--;
1181 			else if (isType (token, TOKEN_OPEN_CURLY))
1182 			{
1183 				if (prev_token_type == TOKEN_ARROW)
1184 					parseBlock (token, NULL);
1185 				else
1186 					parseMethodsInAnonymousClass (token);
1187 			}
1188 			else if (isKeyword (token, KEYWORD_function))
1189 				parseFunction (token);
1190 
1191 			prev_token_type = token->type;
1192 		}
1193 		readTokenFull (token, include_newlines, NULL);
1194 	}
1195 }
1196 
skipArrayList(tokenInfo * const token,bool include_newlines)1197 static void skipArrayList (tokenInfo *const token, bool include_newlines)
1198 {
1199 	/*
1200 	 * Handle square brackets
1201 	 *	 var name[1]
1202 	 * So we must check for nested open and closing square brackets
1203 	 */
1204 
1205 	if (isType (token, TOKEN_OPEN_SQUARE))	/* arguments? */
1206 	{
1207 		int nest_level = 1;
1208 		tokenType prev_token_type = token->type;
1209 		while (nest_level > 0 && ! isType (token, TOKEN_EOF))
1210 		{
1211 			readToken (token);
1212 			if (isType (token, TOKEN_OPEN_SQUARE))
1213 				nest_level++;
1214 			else if (isType (token, TOKEN_CLOSE_SQUARE))
1215 				nest_level--;
1216 			else if (isType (token, TOKEN_OPEN_CURLY))
1217 			{
1218 				if (prev_token_type == TOKEN_ARROW)
1219 					parseBlock (token, NULL);
1220 				else
1221 					parseMethodsInAnonymousClass (token);
1222 			}
1223 
1224 			prev_token_type = token->type;
1225 		}
1226 		readTokenFull (token, include_newlines, NULL);
1227 	}
1228 }
1229 
skipQualifiedIdentifier(tokenInfo * const token)1230 static void skipQualifiedIdentifier (tokenInfo *const token)
1231 {
1232 	/* Skip foo.bar.baz */
1233 	while (isType (token, TOKEN_IDENTIFIER))
1234 	{
1235 		readToken (token);
1236 		if (isType (token, TOKEN_PERIOD))
1237 			readToken (token);
1238 		else
1239 			break;
1240 	}
1241 }
1242 
addContext(tokenInfo * const parent,const tokenInfo * const child)1243 static void addContext (tokenInfo* const parent, const tokenInfo* const child)
1244 {
1245 	if (vStringLength (parent->string) > 0)
1246 	{
1247 		vStringPut (parent->string, '.');
1248 	}
1249 	vStringCat (parent->string, child->string);
1250 }
1251 
addToScope(tokenInfo * const token,const vString * const extra)1252 static void addToScope (tokenInfo* const token, const vString* const extra)
1253 {
1254 	if (vStringLength (token->scope) > 0)
1255 	{
1256 		vStringPut (token->scope, '.');
1257 	}
1258 	vStringCat (token->scope, extra);
1259 }
1260 
1261 /*
1262  *	 Scanning functions
1263  */
1264 
findCmdTerm(tokenInfo * const token,bool include_newlines,bool include_commas)1265 static bool findCmdTerm (tokenInfo *const token, bool include_newlines,
1266                             bool include_commas)
1267 {
1268 	/*
1269 	 * Read until we find either a semicolon or closing brace.
1270 	 * Any nested braces will be handled within.
1271 	 */
1272 	while (! isType (token, TOKEN_SEMICOLON) &&
1273 		   ! isType (token, TOKEN_CLOSE_CURLY) &&
1274 		   ! (include_commas && isType (token, TOKEN_COMMA)) &&
1275 		   ! isType (token, TOKEN_EOF))
1276 	{
1277 		/* Handle nested blocks */
1278 		if ( isType (token, TOKEN_OPEN_CURLY))
1279 		{
1280 			parseBlock (token, NULL);
1281 			readTokenFull (token, include_newlines, NULL);
1282 		}
1283 		else if ( isType (token, TOKEN_OPEN_PAREN) )
1284 		{
1285 			skipArgumentList(token, include_newlines, NULL);
1286 		}
1287 		else if ( isType (token, TOKEN_OPEN_SQUARE) )
1288 		{
1289 			skipArrayList(token, include_newlines);
1290 		}
1291 		else
1292 		{
1293 			readTokenFull (token, include_newlines, NULL);
1294 		}
1295 	}
1296 
1297 	return isType (token, TOKEN_SEMICOLON);
1298 }
1299 
parseSwitch(tokenInfo * const token)1300 static void parseSwitch (tokenInfo *const token)
1301 {
1302 	/*
1303 	 * switch (expression) {
1304 	 * case value1:
1305 	 *	   statement;
1306 	 *	   break;
1307 	 * case value2:
1308 	 *	   statement;
1309 	 *	   break;
1310 	 * default : statement;
1311 	 * }
1312 	 */
1313 
1314 	readToken (token);
1315 
1316 	if (isType (token, TOKEN_OPEN_PAREN))
1317 	{
1318 		skipArgumentList(token, false, NULL);
1319 	}
1320 
1321 	if (isType (token, TOKEN_OPEN_CURLY))
1322 	{
1323 		parseBlock (token, NULL);
1324 	}
1325 }
1326 
parseLoop(tokenInfo * const token)1327 static bool parseLoop (tokenInfo *const token)
1328 {
1329 	/*
1330 	 * Handles these statements
1331 	 *	   for (x=0; x<3; x++)
1332 	 *		   document.write("This text is repeated three times<br>");
1333 	 *
1334 	 *	   for (x=0; x<3; x++)
1335 	 *	   {
1336 	 *		   document.write("This text is repeated three times<br>");
1337 	 *	   }
1338 	 *
1339 	 *	   while (number<5){
1340 	 *		   document.write(number+"<br>");
1341 	 *		   number++;
1342 	 *	   }
1343 	 *
1344 	 *	   do{
1345 	 *		   document.write(number+"<br>");
1346 	 *		   number++;
1347 	 *	   }
1348 	 *	   while (number<5);
1349 	 */
1350 	bool is_terminated = true;
1351 
1352 	if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while))
1353 	{
1354 		readToken(token);
1355 
1356 		if (isType (token, TOKEN_OPEN_PAREN))
1357 		{
1358 			skipArgumentList(token, false, NULL);
1359 		}
1360 
1361 		if (isType (token, TOKEN_OPEN_CURLY))
1362 		{
1363 			parseBlock (token, NULL);
1364 		}
1365 		else
1366 		{
1367 			is_terminated = parseLine(token, false);
1368 		}
1369 	}
1370 	else if (isKeyword (token, KEYWORD_do))
1371 	{
1372 		readToken(token);
1373 
1374 		if (isType (token, TOKEN_OPEN_CURLY))
1375 		{
1376 			parseBlock (token, NULL);
1377 		}
1378 		else
1379 		{
1380 			is_terminated = parseLine(token, false);
1381 		}
1382 
1383 		if (is_terminated)
1384 			readToken(token);
1385 
1386 		if (isKeyword (token, KEYWORD_while))
1387 		{
1388 			readToken(token);
1389 
1390 			if (isType (token, TOKEN_OPEN_PAREN))
1391 			{
1392 				skipArgumentList(token, true, NULL);
1393 			}
1394 			if (! isType (token, TOKEN_SEMICOLON))
1395 			{
1396 				/* oddly enough, `do {} while (0) var foo = 42` is perfectly
1397 				 * valid JS, so explicitly handle the remaining of the line
1398 				 * for the sake of the root scope handling (as parseJsFile()
1399 				 * always advances a token not to ever get stuck) */
1400 				is_terminated = parseLine(token, false);
1401 			}
1402 		}
1403 	}
1404 
1405 	return is_terminated;
1406 }
1407 
parseIf(tokenInfo * const token)1408 static bool parseIf (tokenInfo *const token)
1409 {
1410 	bool read_next_token = true;
1411 	/*
1412 	 * If statements have two forms
1413 	 *	   if ( ... )
1414 	 *		   one line;
1415 	 *
1416 	 *	   if ( ... )
1417 	 *		  statement;
1418 	 *	   else
1419 	 *		  statement
1420 	 *
1421 	 *	   if ( ... ) {
1422 	 *		  multiple;
1423 	 *		  statements;
1424 	 *	   }
1425 	 *
1426 	 *
1427 	 *	   if ( ... ) {
1428 	 *		  return elem
1429 	 *	   }
1430 	 *
1431 	 *     This example if correctly written, but the
1432 	 *     else contains only 1 statement without a terminator
1433 	 *     since the function finishes with the closing brace.
1434 	 *
1435      *     function a(flag){
1436      *         if(flag)
1437      *             test(1);
1438      *         else
1439      *             test(2)
1440      *     }
1441 	 *
1442 	 * TODO:  Deal with statements that can optional end
1443 	 *		  without a semi-colon.  Currently this messes up
1444 	 *		  the parsing of blocks.
1445 	 *		  Need to somehow detect this has happened, and either
1446 	 *		  backup a token, or skip reading the next token if
1447 	 *		  that is possible from all code locations.
1448 	 *
1449 	 */
1450 
1451 	readToken (token);
1452 
1453 	if (isKeyword (token, KEYWORD_if))
1454 	{
1455 		/*
1456 		 * Check for an "else if" and consume the "if"
1457 		 */
1458 		readToken (token);
1459 	}
1460 
1461 	if (isType (token, TOKEN_OPEN_PAREN))
1462 	{
1463 		skipArgumentList(token, false, NULL);
1464 	}
1465 
1466 	if (isType (token, TOKEN_OPEN_CURLY))
1467 	{
1468 		parseBlock (token, NULL);
1469 	}
1470 	else
1471 	{
1472 		/* The next token should only be read if this statement had its own
1473 		 * terminator */
1474 		read_next_token = findCmdTerm (token, true, false);
1475 	}
1476 	return read_next_token;
1477 }
1478 
parseFunction(tokenInfo * const token)1479 static void parseFunction (tokenInfo *const token)
1480 {
1481 	TRACE_ENTER();
1482 
1483 	tokenInfo *const name = newToken ();
1484 	vString *const signature = vStringNew ();
1485 	bool is_class = false;
1486 	bool is_generator = false;
1487 	bool is_anonymous = false;
1488 	/*
1489 	 * This deals with these formats
1490 	 *	   function validFunctionTwo(a,b) {}
1491 	 *	   function * generator(a,b) {}
1492 	 */
1493 
1494 	copyToken (name, token, true);
1495 	readToken (name);
1496 	if (isType (name, TOKEN_STAR))
1497 	{
1498 		is_generator = true;
1499 		readToken (name);
1500 	}
1501 	if (isType (name, TOKEN_OPEN_PAREN))
1502 	{
1503 		/* anonymous function */
1504 		copyToken (token, name, false);
1505 		anonGenerate (name->string, "AnonymousFunction", JSTAG_FUNCTION);
1506 		is_anonymous = true;
1507 	}
1508 	else if (!isType (name, TOKEN_IDENTIFIER))
1509 		goto cleanUp;
1510 	else
1511 		readToken (token);
1512 
1513 	while (isType (token, TOKEN_PERIOD))
1514 	{
1515 		readToken (token);
1516 		if (! isType(token, TOKEN_KEYWORD))
1517 		{
1518 			addContext (name, token);
1519 			readToken (token);
1520 		}
1521 	}
1522 
1523 	if ( isType (token, TOKEN_OPEN_PAREN) )
1524 		skipArgumentList(token, false, signature);
1525 
1526 	if ( isType (token, TOKEN_OPEN_CURLY) )
1527 	{
1528 		is_class = parseBlock (token, name->string);
1529 		if ( is_class )
1530 			makeClassTagCommon (name, signature, NULL, is_anonymous);
1531 		else
1532 			makeFunctionTagCommon (name, signature, is_generator, is_anonymous);
1533 	}
1534 
1535 	findCmdTerm (token, false, false);
1536 
1537  cleanUp:
1538 	vStringDelete (signature);
1539 	deleteToken (name);
1540 
1541 	TRACE_LEAVE();
1542 }
1543 
1544 /* Parses a block surrounded by curly braces.
1545  * @p parentScope is the scope name for this block, or NULL for unnamed scopes */
parseBlock(tokenInfo * const token,const vString * const parentScope)1546 static bool parseBlock (tokenInfo *const token, const vString *const parentScope)
1547 {
1548 	TRACE_ENTER();
1549 
1550 	bool is_class = false;
1551 	bool read_next_token = true;
1552 	vString * saveScope = vStringNew ();
1553 
1554 	vStringCopy(saveScope, token->scope);
1555 	if (parentScope)
1556 	{
1557 		addToScope (token, parentScope);
1558 		token->nestLevel++;
1559 	}
1560 
1561 	/*
1562 	 * Make this routine a bit more forgiving.
1563 	 * If called on an open_curly advance it
1564 	 */
1565 	if (isType (token, TOKEN_OPEN_CURLY))
1566 		readToken(token);
1567 
1568 	if (! isType (token, TOKEN_CLOSE_CURLY))
1569 	{
1570 		/*
1571 		 * Read until we find the closing brace,
1572 		 * any nested braces will be handled within
1573 		 */
1574 		do
1575 		{
1576 			read_next_token = true;
1577 			if (isKeyword (token, KEYWORD_this))
1578 			{
1579 				/*
1580 				 * Means we are inside a class and have found
1581 				 * a class, not a function
1582 				 */
1583 				is_class = true;
1584 
1585 				/*
1586 				 * Ignore the remainder of the line
1587 				 * findCmdTerm(token);
1588 				 */
1589 				read_next_token = parseLine (token, is_class);
1590 			}
1591 			else if (isKeyword (token, KEYWORD_var) ||
1592 					 isKeyword (token, KEYWORD_let) ||
1593 					 isKeyword (token, KEYWORD_const))
1594 			{
1595 				/*
1596 				 * Potentially we have found an inner function.
1597 				 * Set something to indicate the scope
1598 				 */
1599 				read_next_token = parseLine (token, is_class);
1600 			}
1601 			else if (isType (token, TOKEN_OPEN_CURLY))
1602 			{
1603 				/* Handle nested blocks */
1604 				parseBlock (token, NULL);
1605 			}
1606 			else
1607 			{
1608 				/*
1609 				 * It is possible for a line to have no terminator
1610 				 * if the following line is a closing brace.
1611 				 * parseLine will detect this case and indicate
1612 				 * whether we should read an additional token.
1613 				 */
1614 				read_next_token = parseLine (token, is_class);
1615 			}
1616 
1617 			/*
1618 			 * Always read a new token unless we find a statement without
1619 			 * a ending terminator
1620 			 */
1621 			if( read_next_token )
1622 				readToken(token);
1623 
1624 			/*
1625 			 * If we find a statement without a terminator consider the
1626 			 * block finished, otherwise the stack will be off by one.
1627 			 */
1628 		} while (! isType (token, TOKEN_EOF) &&
1629 				 ! isType (token, TOKEN_CLOSE_CURLY) && read_next_token);
1630 	}
1631 
1632 	vStringCopy(token->scope, saveScope);
1633 	vStringDelete(saveScope);
1634 	if (parentScope)
1635 		token->nestLevel--;
1636 
1637 	TRACE_LEAVE();
1638 
1639 	return is_class;
1640 }
1641 
parseMethods(tokenInfo * const token,const tokenInfo * const class,const bool is_es6_class)1642 static bool parseMethods (tokenInfo *const token, const tokenInfo *const class,
1643                           const bool is_es6_class)
1644 {
1645 	TRACE_ENTER_TEXT("token is '%s' of type %s in classToken '%s' of type %s (es6: %s)",
1646 					 vStringValue(token->string), tokenTypeName (token->type),
1647 					 class == NULL ? "none" : vStringValue(class->string),
1648 					 class == NULL ? "none" : tokenTypeName (class->type),
1649 					 is_es6_class? "yes": "no");
1650 
1651 	tokenInfo *const name = newToken ();
1652 	bool has_methods = false;
1653 	vString *saveScope = vStringNew ();
1654 
1655 	vStringCopy (saveScope, token->scope);
1656 	if (class != NULL)
1657 		addToScope (token, class->string);
1658 
1659 	/*
1660 	 * This deals with these formats
1661 	 *	   validProperty  : 2,
1662 	 *	   validMethod    : function(a,b) {}
1663 	 *	   'validMethod2' : function(a,b) {}
1664      *     container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
1665 	 *     get prop() {}
1666 	 *     set prop(val) {}
1667 	 *     get(...) {}
1668 	 *     set(...) {}
1669      *
1670      * ES6 methods:
1671      *     property(...) {}
1672      *     *generator() {}
1673      *
1674      * ES6 computed name:
1675      *     [property]() {}
1676      *     get [property]() {}
1677      *     set [property]() {}
1678      *     *[generator]() {}
1679 	 *
1680 	 * tc39/proposal-class-fields
1681 	 *     field0 = function(a,b) {}
1682 	 *     field1 = 1
1683 	 * The parser extracts field0 as a method because the left value
1684 	 * is a function (kind propagation), and field1 as a field.
1685 	 */
1686 
1687 	bool dont_read = false;
1688 	do
1689 	{
1690 		bool is_setter = false;
1691 		bool is_getter = false;
1692 
1693 		if (!dont_read)
1694 			readToken (token);
1695 		dont_read = false;
1696 
1697 		if (isType (token, TOKEN_CLOSE_CURLY))
1698 		{
1699 			goto cleanUp;
1700 		}
1701 
1702 		if (isKeyword (token, KEYWORD_async))
1703 			readToken (token);
1704 		else if (isType (token, TOKEN_KEYWORD) &&
1705 				 (isKeyword (token, KEYWORD_get) || isKeyword (token, KEYWORD_set)))
1706 		{
1707 			tokenInfo *saved_token = newToken ();
1708 			copyToken (saved_token, token, true);
1709 			readToken (token);
1710 			if (isType(token, TOKEN_OPEN_PAREN))
1711 			{
1712 				Assert (NextToken == NULL);
1713 				NextToken = newToken ();
1714 				copyToken (NextToken, token, false);	/* save token for next read */
1715 				copyToken (token, saved_token, true);	/* restore token to process */
1716 				token->type = TOKEN_IDENTIFIER;			/* process as identifier */
1717 				token->keyword = KEYWORD_NONE;
1718 			}
1719 			else if (isKeyword (saved_token, KEYWORD_get))
1720 			{
1721 				is_getter = true;
1722 			}
1723 			else
1724 			{
1725 				is_setter = true;
1726 			}
1727 			deleteToken (saved_token);
1728 		}
1729 
1730 		if (! isType (token, TOKEN_KEYWORD) &&
1731 		    ! isType (token, TOKEN_SEMICOLON))
1732 		{
1733 			bool is_generator = false;
1734 			bool is_shorthand = false; /* ES6 shorthand syntax */
1735 			bool is_computed_name = false; /* ES6 computed property name */
1736 			bool is_dynamic_prop = false;
1737 			vString *dprop = NULL; /* is_computed_name is true but
1738 									* the name is not represented in
1739 									* a string literal. The expressions
1740 									* go this string. */
1741 
1742 			if (isType (token, TOKEN_STAR)) /* shorthand generator */
1743 			{
1744 				is_generator = true;
1745 				readToken (token);
1746 			}
1747 
1748 			if (isType (token, TOKEN_OPEN_SQUARE))
1749 			{
1750 				is_computed_name = true;
1751 				dprop = vStringNewInit ("[");
1752 				readTokenFull (token, false, dprop);
1753 			}
1754 
1755 			copyToken(name, token, true);
1756 			if (is_computed_name && ! isType (token, TOKEN_STRING))
1757 				is_dynamic_prop = true;
1758 
1759 			readTokenFull (token, false, dprop);
1760 
1761 			if (is_computed_name)
1762 			{
1763 				int depth = 1;
1764 				do
1765 				{
1766 					if (isType (token, TOKEN_CLOSE_SQUARE))
1767 						depth--;
1768 					else
1769 					{
1770 						is_dynamic_prop = true;
1771 						if (isType (token, TOKEN_OPEN_SQUARE))
1772 							depth++;
1773 					}
1774 					readTokenFull (token, false, (is_dynamic_prop && depth != 0)? dprop: NULL);
1775 				} while (! isType (token, TOKEN_EOF) && depth > 0);
1776 			}
1777 
1778 			if (is_dynamic_prop)
1779 			{
1780 				injectDynamicName (name, dprop);
1781 				dprop = NULL;
1782 			}
1783 			else
1784 				vStringDelete (dprop);
1785 
1786 			is_shorthand = isType (token, TOKEN_OPEN_PAREN);
1787 			bool can_be_field = isType (token, TOKEN_EQUAL_SIGN);
1788 			if ( isType (token, TOKEN_COLON) || can_be_field || is_shorthand )
1789 			{
1790 				if (! is_shorthand)
1791 				{
1792 					readToken (token);
1793 					if (isKeyword (token, KEYWORD_async))
1794 						readToken (token);
1795 				}
1796 
1797 				vString * signature = vStringNew ();
1798 				if ( is_shorthand || isKeyword (token, KEYWORD_function) )
1799 				{
1800 					TRACE_PRINT("Seems to be a function or shorthand");
1801 
1802 					if (! is_shorthand)
1803 					{
1804 						readToken (token);
1805 						if (isType (token, TOKEN_STAR))
1806 						{
1807 							/* generator: 'function' '*' '(' ... ')' '{' ... '}' */
1808 							is_generator = true;
1809 							readToken (token);
1810 						}
1811 					}
1812 					if ( isType (token, TOKEN_OPEN_PAREN) )
1813 					{
1814 						skipArgumentList(token, false, signature);
1815 					}
1816 
1817 function:
1818 					if (isType (token, TOKEN_OPEN_CURLY))
1819 					{
1820 						has_methods = true;
1821 
1822 						int kind = JSTAG_METHOD;
1823 						if (is_generator)
1824 							kind = JSTAG_GENERATOR;
1825 						else if (is_getter)
1826 							kind = JSTAG_GETTER;
1827 						else if (is_setter)
1828 							kind = JSTAG_SETTER;
1829 
1830 						makeJsTag (name, kind, signature, NULL);
1831 						parseBlock (token, name->string);
1832 
1833 						/*
1834 						 * If we aren't parsing an ES6 class (for which there
1835 						 * is no mandatory separators), read to the closing
1836 						 * curly, check next token, if a comma, we must loop
1837 						 * again.
1838 						 */
1839 						if (! is_es6_class)
1840 							readToken (token);
1841 					}
1842 				}
1843 				else if (! is_es6_class)
1844 				{
1845 					bool has_child_methods = false;
1846 					tokenInfo *saved_token = newToken ();
1847 
1848 					/* skip whatever is the value */
1849 					while (! isType (token, TOKEN_COMMA) &&
1850 					       ! isType (token, TOKEN_CLOSE_CURLY) &&
1851 					       ! isType (token, TOKEN_EOF))
1852 					{
1853 						if (isType (token, TOKEN_OPEN_CURLY))
1854 						{
1855 							/* Recurse to find child properties/methods */
1856 							has_child_methods = parseMethods (token, name, false);
1857 							readToken (token);
1858 						}
1859 						else if (isType (token, TOKEN_OPEN_PAREN))
1860 						{
1861 							vStringClear (signature);
1862 							skipArgumentList (token, false, signature);
1863 						}
1864 						else if (isType (token, TOKEN_OPEN_SQUARE))
1865 						{
1866 							skipArrayList (token, false);
1867 						}
1868 						else if (isType (token, TOKEN_ARROW))
1869 						{
1870 							TRACE_PRINT("Seems to be an anonymous function");
1871 							if (vStringIsEmpty (signature) &&
1872 								isType (saved_token, TOKEN_IDENTIFIER))
1873 							{
1874 								vStringPut (signature, '(');
1875 								vStringCat (signature, saved_token->string);
1876 								vStringPut (signature, ')');
1877 							}
1878 							readToken (token);
1879 							deleteToken (saved_token);
1880 							goto function;
1881 						}
1882 						else
1883 						{
1884 							copyToken (saved_token, token, true);
1885 							readToken (token);
1886 						}
1887 					}
1888 					deleteToken (saved_token);
1889 
1890 					has_methods = true;
1891 					if (has_child_methods)
1892 						makeJsTag (name, JSTAG_CLASS, NULL, NULL);
1893 					else
1894 						makeJsTag (name, JSTAG_PROPERTY, NULL, NULL);
1895 				}
1896 				else if (can_be_field)
1897 				{
1898 					makeJsTag (name, JSTAG_FIELD, NULL, NULL);
1899 					parseLine (token, true);
1900 				}
1901 
1902 				vStringDelete (signature);
1903 			}
1904 			else
1905 			{
1906 				makeJsTag (name, JSTAG_FIELD, NULL, NULL);
1907 				if (!isType (token, TOKEN_SEMICOLON))
1908 					dont_read = true;
1909 			}
1910 		}
1911 	} while ( isType(token, TOKEN_COMMA) ||
1912 	          ( is_es6_class && ! isType(token, TOKEN_EOF) ) );
1913 
1914 	TRACE_PRINT("Finished parsing methods");
1915 
1916 	findCmdTerm (token, false, false);
1917 
1918 cleanUp:
1919 	vStringCopy (token->scope, saveScope);
1920 	vStringDelete (saveScope);
1921 	deleteToken (name);
1922 
1923 	TRACE_LEAVE_TEXT("found method(s): %s", has_methods? "yes": "no");
1924 
1925 	return has_methods;
1926 }
1927 
parseES6Class(tokenInfo * const token,const tokenInfo * targetName)1928 static bool parseES6Class (tokenInfo *const token, const tokenInfo *targetName)
1929 {
1930 	TRACE_ENTER();
1931 
1932 	tokenInfo * className = newToken ();
1933 	vString *inheritance = NULL;
1934 	bool is_anonymous = true;
1935 
1936 	copyToken (className, token, true);
1937 	readToken (className);
1938 
1939 	/* optional name */
1940 	if (isType (className, TOKEN_IDENTIFIER))
1941 	{
1942 		readToken (token);
1943 		is_anonymous = false;
1944 	}
1945 	else
1946 	{
1947 		copyToken (token, className, true);
1948 		/* We create a fake name so we have a scope for the members */
1949 		if (! targetName)
1950 			anonGenerate (className->string, "AnonymousClass", JSTAG_CLASS);
1951 	}
1952 
1953 	if (! targetName)
1954 		targetName = className;
1955 
1956 	if (isKeyword (token, KEYWORD_extends))
1957 		inheritance = vStringNew ();
1958 
1959 	/* skip inheritance info */
1960 	while (! isType (token, TOKEN_OPEN_CURLY) &&
1961 	       ! isType (token, TOKEN_EOF) &&
1962 	       ! isType (token, TOKEN_SEMICOLON))
1963 		readTokenFull (token, false, inheritance);
1964 
1965 	/* remove the last added token (here we assume it's one char, "{" or ";" */
1966 	if (inheritance && vStringLength (inheritance) > 0 &&
1967 	    ! isType (token, TOKEN_EOF))
1968 	{
1969 		vStringChop (inheritance);
1970 		vStringStripTrailing (inheritance);
1971 		vStringStripLeading (inheritance);
1972 	}
1973 
1974 	TRACE_PRINT("Emitting tag for class '%s'", vStringValue(targetName->string));
1975 
1976 	makeJsTagCommon (targetName, JSTAG_CLASS, NULL, inheritance,
1977 					 (is_anonymous && (targetName == className)));
1978 
1979 	if (! is_anonymous && targetName != className)
1980 	{
1981 		/* FIXME: what to do with the secondary name?  It's local to the
1982 		 *        class itself, so not very useful... let's hope people
1983 		 *        don't give it another name than the target in case of
1984 		 *        	var MyClass = class MyClassSecondaryName { ... }
1985 		 *        I guess it could be an alias to MyClass, or duplicate it
1986 		 *        altogether, not sure. */
1987 		makeJsTag (className, JSTAG_CLASS, NULL, inheritance);
1988 	}
1989 
1990 	if (inheritance)
1991 		vStringDelete (inheritance);
1992 
1993 	if (isType (token, TOKEN_OPEN_CURLY))
1994 		parseMethods (token, targetName, true);
1995 
1996 	deleteToken (className);
1997 
1998 	TRACE_LEAVE();
1999 	return true;
2000 }
2001 
parseStatement(tokenInfo * const token,bool is_inside_class)2002 static bool parseStatement (tokenInfo *const token, bool is_inside_class)
2003 {
2004 	TRACE_ENTER_TEXT("is_inside_class: %s", is_inside_class? "yes": "no");
2005 
2006 	tokenInfo *const name = newToken ();
2007 	tokenInfo *const secondary_name = newToken ();
2008 	tokenInfo *const method_body_token = newToken ();
2009 	vString * saveScope = vStringNew ();
2010 	bool is_class = false;
2011 	bool is_var = false;
2012 	bool is_const = false;
2013 	bool is_terminated = true;
2014 	bool is_global = false;
2015 	bool has_methods = false;
2016 	vString *	fulltag;
2017 
2018 	vStringCopy (saveScope, token->scope);
2019 	/*
2020 	 * Functions can be named or unnamed.
2021 	 * This deals with these formats:
2022 	 * Function
2023 	 *	   validFunctionOne = function(a,b) {}
2024 	 *	   testlib.validFunctionFive = function(a,b) {}
2025 	 *	   var innerThree = function(a,b) {}
2026 	 *	   var innerFour = (a,b) {}
2027 	 *	   var D2 = secondary_fcn_name(a,b) {}
2028 	 *	   var D3 = new Function("a", "b", "return a+b;");
2029 	 * Class
2030 	 *	   testlib.extras.ValidClassOne = function(a,b) {
2031 	 *		   this.a = a;
2032 	 *	   }
2033 	 * Class Methods
2034 	 *	   testlib.extras.ValidClassOne.prototype = {
2035 	 *		   'validMethodOne' : function(a,b) {},
2036 	 *		   'validMethodTwo' : function(a,b) {}
2037 	 *	   }
2038      *     ValidClassTwo = function ()
2039      *     {
2040      *         this.validMethodThree = function() {}
2041      *         // unnamed method
2042      *         this.validMethodFour = () {}
2043      *     }
2044 	 *	   Database.prototype.validMethodThree = Database_getTodaysDate;
2045 	 */
2046 
2047 	if ( is_inside_class )
2048 		is_class = true;
2049 	/*
2050 	 * var can precede an inner function
2051 	 */
2052 	if ( isKeyword(token, KEYWORD_var) ||
2053 		 isKeyword(token, KEYWORD_let) ||
2054 		 isKeyword(token, KEYWORD_const) )
2055 	{
2056 		TRACE_PRINT("var/let/const case");
2057 		is_const = isKeyword(token, KEYWORD_const);
2058 		/*
2059 		 * Only create variables for global scope
2060 		 */
2061 		if ( token->nestLevel == 0 )
2062 		{
2063 			is_global = true;
2064 		}
2065 		readToken(token);
2066 	}
2067 
2068 nextVar:
2069 	if ( isKeyword(token, KEYWORD_this) )
2070 	{
2071 		TRACE_PRINT("found 'this' keyword");
2072 
2073 		readToken(token);
2074 		if (isType (token, TOKEN_PERIOD))
2075 		{
2076 			readToken(token);
2077 		}
2078 	}
2079 
2080 	copyToken(name, token, true);
2081 	TRACE_PRINT("name becomes '%s' of type %s",
2082 				vStringValue(token->string), tokenTypeName (token->type));
2083 
2084 	while (! isType (token, TOKEN_CLOSE_CURLY) &&
2085 	       ! isType (token, TOKEN_SEMICOLON)   &&
2086 	       ! isType (token, TOKEN_EQUAL_SIGN)  &&
2087 	       ! isType (token, TOKEN_COMMA)       &&
2088 	       ! isType (token, TOKEN_EOF))
2089 	{
2090 		if (isType (token, TOKEN_OPEN_CURLY))
2091 			parseBlock (token, NULL);
2092 
2093 		/* Potentially the name of the function */
2094 		if (isType (token, TOKEN_PERIOD))
2095 		{
2096 			/*
2097 			 * Cannot be a global variable is it has dot references in the name
2098 			 */
2099 			is_global = false;
2100 			/* Assume it's an assignment to a global name (e.g. a class) using
2101 			 * its fully qualified name, so strip the scope.
2102 			 * FIXME: resolve the scope so we can make more than an assumption. */
2103 			vStringClear (token->scope);
2104 			vStringClear (name->scope);
2105 			do
2106 			{
2107 				readToken (token);
2108 				if (! isType(token, TOKEN_KEYWORD))
2109 				{
2110 					if ( is_class )
2111 					{
2112 						addToScope(token, name->string);
2113 					}
2114 					else
2115 						addContext (name, token);
2116 
2117 					readToken (token);
2118 				}
2119 				else if ( isKeyword(token, KEYWORD_prototype) )
2120 				{
2121 					/*
2122 					 * When we reach the "prototype" tag, we infer:
2123 					 *     "BindAgent" is a class
2124 					 *     "build"     is a method
2125 					 *
2126 					 * function BindAgent( repeatableIdName, newParentIdName ) {
2127 					 * }
2128 					 *
2129 					 * CASE 1
2130 					 * Specified function name: "build"
2131 					 *     BindAgent.prototype.build = function( mode ) {
2132 					 *     	  maybe parse nested functions
2133 					 *     }
2134 					 *
2135 					 * CASE 2
2136 					 * Prototype listing
2137 					 *     ValidClassOne.prototype = {
2138 					 *         'validMethodOne' : function(a,b) {},
2139 					 *         'validMethodTwo' : function(a,b) {}
2140 					 *     }
2141 					 *
2142 					 */
2143 					if (! ( isType (name, TOKEN_IDENTIFIER)
2144 						|| isType (name, TOKEN_STRING) ) )
2145 						/*
2146 						 * Unexpected input. Try to reset the parsing.
2147 						 *
2148 						 * TOKEN_STRING is acceptable. e.g.:
2149 						 * -----------------------------------
2150 						 * "a".prototype = function( mode ) {}
2151 						 */
2152 						goto cleanUp;
2153 
2154 					makeClassTag (name, NULL, NULL);
2155 					is_class = true;
2156 
2157 					/*
2158 					 * There should a ".function_name" next.
2159 					 */
2160 					readToken (token);
2161 					if (isType (token, TOKEN_PERIOD))
2162 					{
2163 						/*
2164 						 * Handle CASE 1
2165 						 */
2166 						readToken (token);
2167 						if (! isType(token, TOKEN_KEYWORD))
2168 						{
2169 							vString *const signature = vStringNew ();
2170 
2171 							addToScope(token, name->string);
2172 
2173 							copyToken (method_body_token, token, true);
2174 							readToken (method_body_token);
2175 
2176 							while (! isType (method_body_token, TOKEN_SEMICOLON) &&
2177 							       ! isType (method_body_token, TOKEN_CLOSE_CURLY) &&
2178 							       ! isType (method_body_token, TOKEN_OPEN_CURLY) &&
2179 							       ! isType (method_body_token, TOKEN_EOF))
2180 							{
2181 								if ( isType (method_body_token, TOKEN_OPEN_PAREN) )
2182 									skipArgumentList(method_body_token, false,
2183 													 vStringLength (signature) == 0 ? signature : NULL);
2184 								else
2185 									readToken (method_body_token);
2186 							}
2187 
2188 							makeJsTag (token, JSTAG_METHOD, signature, NULL);
2189 							vStringDelete (signature);
2190 
2191 							if ( isType (method_body_token, TOKEN_OPEN_CURLY))
2192 							{
2193 								parseBlock (method_body_token, token->string);
2194 								is_terminated = true;
2195 							}
2196 							else
2197 								is_terminated = isType (method_body_token, TOKEN_SEMICOLON);
2198 							goto cleanUp;
2199 						}
2200 					}
2201 					else if (isType (token, TOKEN_EQUAL_SIGN))
2202 					{
2203 						readToken (token);
2204 						if (isType (token, TOKEN_OPEN_CURLY))
2205 						{
2206 							/*
2207 							 * Handle CASE 2
2208 							 *
2209 							 * Creates tags for each of these class methods
2210 							 *     ValidClassOne.prototype = {
2211 							 *         'validMethodOne' : function(a,b) {},
2212 							 *         'validMethodTwo' : function(a,b) {}
2213 							 *     }
2214 							 */
2215 							parseMethods(token, name, false);
2216 							/*
2217 							 * Find to the end of the statement
2218 							 */
2219 							findCmdTerm (token, false, false);
2220 							is_terminated = true;
2221 							goto cleanUp;
2222 						}
2223 					}
2224 				}
2225 				else
2226 					readToken (token);
2227 			} while (isType (token, TOKEN_PERIOD));
2228 		}
2229 		else
2230 			readTokenFull (token, true, NULL);
2231 
2232 		if ( isType (token, TOKEN_OPEN_PAREN) )
2233 			skipArgumentList(token, false, NULL);
2234 
2235 		if ( isType (token, TOKEN_OPEN_SQUARE) )
2236 			skipArrayList(token, false);
2237 
2238 		/*
2239 		if ( isType (token, TOKEN_OPEN_CURLY) )
2240 		{
2241 			is_class = parseBlock (token, name->string);
2242 		}
2243 		*/
2244 	}
2245 
2246 	if ( isType (token, TOKEN_CLOSE_CURLY) )
2247 	{
2248 		/*
2249 		 * Reaching this section without having
2250 		 * processed an open curly brace indicates
2251 		 * the statement is most likely not terminated.
2252 		 */
2253 		is_terminated = false;
2254 		goto cleanUp;
2255 	}
2256 
2257 	if ( isType (token, TOKEN_SEMICOLON) ||
2258 	     isType (token, TOKEN_EOF) ||
2259 	     isType (token, TOKEN_COMMA) )
2260 	{
2261 		/*
2262 		 * Only create variables for global scope
2263 		 */
2264 		if ( token->nestLevel == 0 && is_global )
2265 		{
2266 			/*
2267 			 * Handles this syntax:
2268 			 *	   var g_var2;
2269 			 */
2270 			makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2271 		}
2272 		/*
2273 		 * Statement has ended.
2274 		 * This deals with calls to functions, like:
2275 		 *     alert(..);
2276 		 */
2277 		if (isType (token, TOKEN_COMMA))
2278 		{
2279 			readToken (token);
2280 			goto nextVar;
2281 		}
2282 		goto cleanUp;
2283 	}
2284 
2285 	if ( isType (token, TOKEN_EQUAL_SIGN) )
2286 	{
2287 		int parenDepth = 0;
2288 
2289 		readToken (token);
2290 
2291 		/* rvalue might be surrounded with parentheses */
2292 		while (isType (token, TOKEN_OPEN_PAREN))
2293 		{
2294 			parenDepth++;
2295 			readToken (token);
2296 		}
2297 
2298 		if (isKeyword (token, KEYWORD_async))
2299 			readToken (token);
2300 
2301 		if ( isKeyword (token, KEYWORD_function) )
2302 		{
2303 			vString *const signature = vStringNew ();
2304 			bool is_generator = false;
2305 
2306 			readToken (token);
2307 			if (isType (token, TOKEN_STAR))
2308 			{
2309 				is_generator = true;
2310 				readToken (token);
2311 			}
2312 
2313 			if (! isType (token, TOKEN_KEYWORD) &&
2314 			    ! isType (token, TOKEN_OPEN_PAREN))
2315 			{
2316 				/*
2317 				 * Functions of this format:
2318 				 *	   var D2A = function theAdd(a, b)
2319 				 *	   {
2320 				 *		  return a+b;
2321 				 *	   }
2322 				 * Are really two separate defined functions and
2323 				 * can be referenced in two ways:
2324 				 *	   alert( D2A(1,2) );			  // produces 3
2325 				 *	   alert( theAdd(1,2) );		  // also produces 3
2326 				 * So it must have two tags:
2327 				 *	   D2A
2328 				 *	   theAdd
2329 				 * Save the reference to the name for later use, once
2330 				 * we have established this is a valid function we will
2331 				 * create the secondary reference to it.
2332 				 */
2333 				copyToken(secondary_name, token, true);
2334 				readToken (token);
2335 			}
2336 
2337 			if ( isType (token, TOKEN_OPEN_PAREN) )
2338 				skipArgumentList(token, false, signature);
2339 
2340 			if (isType (token, TOKEN_OPEN_CURLY))
2341 			{
2342 				/*
2343 				 * This will be either a function or a class.
2344 				 * We can only determine this by checking the body
2345 				 * of the function.  If we find a "this." we know
2346 				 * it is a class, otherwise it is a function.
2347 				 */
2348 				if ( is_inside_class )
2349 				{
2350 					makeJsTag (name, is_generator ? JSTAG_GENERATOR : JSTAG_METHOD, signature, NULL);
2351 					if ( vStringLength(secondary_name->string) > 0 )
2352 						makeFunctionTag (secondary_name, signature, is_generator);
2353 				}
2354 				else
2355 				{
2356 					if (! ( isType (name, TOKEN_IDENTIFIER)
2357 					     || isType (name, TOKEN_STRING)
2358 					     || isType (name, TOKEN_KEYWORD) ) )
2359 					{
2360 						/* Unexpected input. Try to reset the parsing. */
2361 						TRACE_PRINT("Unexpected input, trying to reset");
2362 						vStringDelete (signature);
2363 						goto cleanUp;
2364 					}
2365 
2366 					is_class = parseBlock (token, name->string);
2367 					if ( is_class )
2368 						makeClassTag (name, signature, NULL);
2369 					else
2370 						makeFunctionTag (name, signature, is_generator);
2371 
2372 					if ( vStringLength(secondary_name->string) > 0 )
2373 						makeFunctionTag (secondary_name, signature, is_generator);
2374 				}
2375 				parseBlock (token, name->string);
2376 			}
2377 
2378 			vStringDelete (signature);
2379 		}
2380 		else if (isKeyword (token, KEYWORD_class))
2381 		{
2382 			is_terminated = parseES6Class (token, name);
2383 		}
2384 		else if (isType (token, TOKEN_OPEN_CURLY))
2385 		{
2386 			/*
2387 			 * Creates tags for each of these class methods
2388 			 *     ValidClassOne.prototype = {
2389 			 *         'validMethodOne' : function(a,b) {},
2390 			 *         'validMethodTwo' : function(a,b) {}
2391 			 *     }
2392 			 * Or checks if this is a hash variable.
2393 			 *     var z = {};
2394 			 */
2395 			bool anonClass = vStringIsEmpty (name->string);
2396 			if (anonClass)
2397 				anonGenerate (name->string, "AnonymousClass", JSTAG_CLASS);
2398 			has_methods = parseMethods(token, name, false);
2399 			if (has_methods)
2400 				makeJsTagCommon (name, JSTAG_CLASS, NULL, NULL, anonClass);
2401 			else
2402 			{
2403 				/*
2404 				 * Only create variables for global scope
2405 				 */
2406 				if ( token->nestLevel == 0 && is_global )
2407 				{
2408 					/*
2409 					 * A pointer can be created to the function.
2410 					 * If we recognize the function/class name ignore the variable.
2411 					 * This format looks identical to a variable definition.
2412 					 * A variable defined outside of a block is considered
2413 					 * a global variable:
2414 					 *	   var g_var1 = 1;
2415 					 *	   var g_var2;
2416 					 * This is not a global variable:
2417 					 *	   var g_var = function;
2418 					 * This is a global variable:
2419 					 *	   var g_var = different_var_name;
2420 					 */
2421 					fulltag = vStringNew ();
2422 					if (vStringLength (token->scope) > 0)
2423 					{
2424 						vStringCopy(fulltag, token->scope);
2425 						vStringPut (fulltag, '.');
2426 						vStringCat (fulltag, token->string);
2427 					}
2428 					else
2429 					{
2430 						vStringCopy(fulltag, token->string);
2431 					}
2432 					if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
2433 							! stringListHas(ClassNames, vStringValue (fulltag)) )
2434 					{
2435 						makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2436 					}
2437 					vStringDelete (fulltag);
2438 				}
2439 			}
2440 			/* Here we should be at the end of the block, on the close curly.
2441 			 * If so, read the next token not to confuse that close curly with
2442 			 * the end of the current statement. */
2443 			if (isType (token, TOKEN_CLOSE_CURLY))
2444 			{
2445 				readTokenFull(token, true, NULL);
2446 				is_terminated = isType (token, TOKEN_SEMICOLON);
2447 			}
2448 		}
2449 		else if (isKeyword (token, KEYWORD_new))
2450 		{
2451 			readToken (token);
2452 			is_var = isType (token, TOKEN_IDENTIFIER);
2453 			if ( isKeyword (token, KEYWORD_function) ||
2454 					isKeyword (token, KEYWORD_capital_function) ||
2455 					isKeyword (token, KEYWORD_capital_object) ||
2456 					is_var )
2457 			{
2458 				if ( isKeyword (token, KEYWORD_capital_object) )
2459 					is_class = true;
2460 
2461 				if (is_var)
2462 					skipQualifiedIdentifier (token);
2463 				else
2464 					readToken (token);
2465 
2466 				if ( isType (token, TOKEN_OPEN_PAREN) )
2467 					skipArgumentList(token, true, NULL);
2468 
2469 				if (isType (token, TOKEN_SEMICOLON))
2470 				{
2471 					if ( token->nestLevel == 0 )
2472 					{
2473 						if ( is_var )
2474 						{
2475 							makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2476 						}
2477 						else if ( is_class )
2478 						{
2479 							makeClassTag (name, NULL, NULL);
2480 						}
2481 						else
2482 						{
2483 							/* FIXME: we cannot really get a meaningful
2484 							 * signature from a `new Function()` call,
2485 							 * so for now just don't set any */
2486 							makeFunctionTag (name, NULL, false);
2487 						}
2488 					}
2489 				}
2490 				else if (isType (token, TOKEN_CLOSE_CURLY))
2491 					is_terminated = false;
2492 			}
2493 		}
2494 		else if (! isType (token, TOKEN_KEYWORD))
2495 		{
2496 			/*
2497 			 * Only create variables for global scope
2498 			 */
2499 			if ( token->nestLevel == 0 && is_global )
2500 			{
2501 				/*
2502 				 * A pointer can be created to the function.
2503 				 * If we recognize the function/class name ignore the variable.
2504 				 * This format looks identical to a variable definition.
2505 				 * A variable defined outside of a block is considered
2506 				 * a global variable:
2507 				 *	   var g_var1 = 1;
2508 				 *	   var g_var2;
2509 				 * This is not a global variable:
2510 				 *	   var g_var = function;
2511 				 * This is a global variable:
2512 				 *	   var g_var = different_var_name;
2513 				 */
2514 				fulltag = vStringNew ();
2515 				if (vStringLength (token->scope) > 0)
2516 				{
2517 					vStringCopy(fulltag, token->scope);
2518 					vStringPut (fulltag, '.');
2519 					vStringCat (fulltag, token->string);
2520 				}
2521 				else
2522 				{
2523 					vStringCopy(fulltag, token->string);
2524 				}
2525 				if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
2526 						! stringListHas(ClassNames, vStringValue (fulltag)) )
2527 				{
2528 					makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2529 				}
2530 				vStringDelete (fulltag);
2531 			}
2532 		}
2533 
2534 		if (parenDepth > 0)
2535 		{
2536 			while (parenDepth > 0 && ! isType (token, TOKEN_EOF))
2537 			{
2538 				if (isType (token, TOKEN_OPEN_PAREN))
2539 					parenDepth++;
2540 				else if (isType (token, TOKEN_CLOSE_PAREN))
2541 					parenDepth--;
2542 				readTokenFull (token, true, NULL);
2543 			}
2544 			if (isType (token, TOKEN_CLOSE_CURLY))
2545 				is_terminated = false;
2546 		}
2547 	}
2548 	/* if we aren't already at the cmd end, advance to it and check whether
2549 	 * the statement was terminated */
2550 	if (! isType (token, TOKEN_CLOSE_CURLY) &&
2551 	    ! isType (token, TOKEN_SEMICOLON))
2552 	{
2553 		/*
2554 		 * Statements can be optionally terminated in the case of
2555 		 * statement prior to a close curly brace as in the
2556 		 * document.write line below:
2557 		 *
2558 		 * function checkForUpdate() {
2559 		 *	   if( 1==1 ) {
2560 		 *		   document.write("hello from checkForUpdate<br>")
2561 		 *	   }
2562 		 *	   return 1;
2563 		 * }
2564 		 */
2565 		is_terminated = findCmdTerm (token, true, true);
2566 		/* if we're at a comma, try and read a second var */
2567 		if (isType (token, TOKEN_COMMA))
2568 		{
2569 			readToken (token);
2570 			goto nextVar;
2571 		}
2572 	}
2573 
2574 cleanUp:
2575 	vStringCopy(token->scope, saveScope);
2576 	deleteToken (name);
2577 	deleteToken (secondary_name);
2578 	deleteToken (method_body_token);
2579 	vStringDelete(saveScope);
2580 
2581 	TRACE_LEAVE();
2582 
2583 	return is_terminated;
2584 }
2585 
parseUI5(tokenInfo * const token)2586 static void parseUI5 (tokenInfo *const token)
2587 {
2588 	tokenInfo *const name = newToken ();
2589 	/*
2590 	 * SAPUI5 is built on top of jQuery.
2591 	 * It follows a standard format:
2592 	 *     sap.ui.controller("id.of.controller", {
2593 	 *         method_name : function... {
2594 	 *         },
2595 	 *
2596 	 *         method_name : function ... {
2597 	 *         }
2598 	 *     }
2599 	 *
2600 	 * Handle the parsing of the initial controller (and the
2601 	 * same for "view") and then allow the methods to be
2602 	 * parsed as usual.
2603 	 */
2604 
2605 	readToken (token);
2606 
2607 	if (isType (token, TOKEN_PERIOD))
2608 	{
2609 		readToken (token);
2610 		while (! isType (token, TOKEN_OPEN_PAREN) &&
2611 			   ! isType (token, TOKEN_EOF))
2612 		{
2613 			readToken (token);
2614 		}
2615 		readToken (token);
2616 
2617 		if (isType (token, TOKEN_STRING))
2618 		{
2619 			copyToken(name, token, true);
2620 			readToken (token);
2621 		}
2622 
2623 		if (isType (token, TOKEN_COMMA))
2624 			readToken (token);
2625 
2626 		do
2627 		{
2628 			parseMethods (token, name, false);
2629 		} while (! isType (token, TOKEN_CLOSE_CURLY) &&
2630 				 ! isType (token, TOKEN_EOF));
2631 	}
2632 
2633 	deleteToken (name);
2634 }
2635 
parseLine(tokenInfo * const token,bool is_inside_class)2636 static bool parseLine (tokenInfo *const token, bool is_inside_class)
2637 {
2638 	TRACE_ENTER_TEXT("token is '%s' of type %s",
2639 					 vStringValue(token->string), tokenTypeName (token->type));
2640 
2641 	bool is_terminated = true;
2642 	/*
2643 	 * Detect the common statements, if, while, for, do, ...
2644 	 * This is necessary since the last statement within a block "{}"
2645 	 * can be optionally terminated.
2646 	 *
2647 	 * If the statement is not terminated, we need to tell
2648 	 * the calling routine to prevent reading an additional token
2649 	 * looking for the end of the statement.
2650 	 */
2651 
2652 	if (isType(token, TOKEN_KEYWORD))
2653 	{
2654 		switch (token->keyword)
2655 		{
2656 			case KEYWORD_for:
2657 			case KEYWORD_while:
2658 			case KEYWORD_do:
2659 				is_terminated = parseLoop (token);
2660 				break;
2661 			case KEYWORD_if:
2662 			case KEYWORD_else:
2663 			case KEYWORD_try:
2664 			case KEYWORD_catch:
2665 			case KEYWORD_finally:
2666 				/* Common semantics */
2667 				is_terminated = parseIf (token);
2668 				break;
2669 			case KEYWORD_switch:
2670 				parseSwitch (token);
2671 				break;
2672 			case KEYWORD_return:
2673 			case KEYWORD_async:
2674 				readToken (token);
2675 				is_terminated = parseLine (token, is_inside_class);
2676 				break;
2677 			case KEYWORD_function:
2678 				parseFunction (token);
2679 				break;
2680 			case KEYWORD_class:
2681 				is_terminated = parseES6Class (token, NULL);
2682 				break;
2683 			default:
2684 				is_terminated = parseStatement (token, is_inside_class);
2685 				break;
2686 		}
2687 	}
2688 	else
2689 	{
2690 		/*
2691 		 * Special case where single line statements may not be
2692 		 * SEMICOLON terminated.  parseBlock needs to know this
2693 		 * so that it does not read the next token.
2694 		 */
2695 		is_terminated = parseStatement (token, is_inside_class);
2696 	}
2697 
2698 	TRACE_LEAVE();
2699 
2700 	return is_terminated;
2701 }
2702 
parseJsFile(tokenInfo * const token)2703 static void parseJsFile (tokenInfo *const token)
2704 {
2705 	TRACE_ENTER();
2706 
2707 	do
2708 	{
2709 		readToken (token);
2710 
2711 		if (isType (token, TOKEN_KEYWORD) && token->keyword == KEYWORD_sap)
2712 			parseUI5 (token);
2713 		else if (isType (token, TOKEN_KEYWORD) && (token->keyword == KEYWORD_export ||
2714 		                                           token->keyword == KEYWORD_default))
2715 			/* skip those at top-level */;
2716 		else
2717 			parseLine (token, false);
2718 	} while (! isType (token, TOKEN_EOF));
2719 
2720 	TRACE_LEAVE();
2721 }
2722 
2723 #ifdef DO_TRACING
2724 #if DO_TRACING_USE_DUMP_TOKEN
dumpToken(const tokenInfo * const token)2725 static void dumpToken (const tokenInfo *const token)
2726 {
2727 	fprintf(stderr, "Token <%p>: %s: %s\n",
2728 			token,
2729 			tokenTypeName (token->type),
2730 			(token->type == TOKEN_KEYWORD   ? keywordName (token->keyword):
2731 			 token->type == TOKEN_IDENTIFIER? vStringValue (token->string):
2732 			 ""));
2733 }
2734 #endif
2735 
tokenTypeName(enum eTokenType e)2736 static const char *tokenTypeName(enum eTokenType e)
2737 { /* Generated by misc/enumstr.sh with cmdline "parsers/jscript.c" "eTokenType" "tokenTypeName" */
2738 	switch (e)
2739 	{
2740 		case    TOKEN_BINARY_OPERATOR: return "TOKEN_BINARY_OPERATOR";
2741 		case          TOKEN_CHARACTER: return "TOKEN_CHARACTER";
2742 		case        TOKEN_CLOSE_CURLY: return "TOKEN_CLOSE_CURLY";
2743 		case        TOKEN_CLOSE_PAREN: return "TOKEN_CLOSE_PAREN";
2744 		case       TOKEN_CLOSE_SQUARE: return "TOKEN_CLOSE_SQUARE";
2745 		case              TOKEN_COLON: return "TOKEN_COLON";
2746 		case              TOKEN_COMMA: return "TOKEN_COMMA";
2747 		case                TOKEN_EOF: return "TOKEN_EOF";
2748 		case         TOKEN_EQUAL_SIGN: return "TOKEN_EQUAL_SIGN";
2749 		case         TOKEN_IDENTIFIER: return "TOKEN_IDENTIFIER";
2750 		case            TOKEN_KEYWORD: return "TOKEN_KEYWORD";
2751 		case         TOKEN_OPEN_CURLY: return "TOKEN_OPEN_CURLY";
2752 		case         TOKEN_OPEN_PAREN: return "TOKEN_OPEN_PAREN";
2753 		case        TOKEN_OPEN_SQUARE: return "TOKEN_OPEN_SQUARE";
2754 		case             TOKEN_PERIOD: return "TOKEN_PERIOD";
2755 		case   TOKEN_POSTFIX_OPERATOR: return "TOKEN_POSTFIX_OPERATOR";
2756 		case             TOKEN_REGEXP: return "TOKEN_REGEXP";
2757 		case          TOKEN_SEMICOLON: return "TOKEN_SEMICOLON";
2758 		case               TOKEN_STAR: return "TOKEN_STAR";
2759 		case             TOKEN_STRING: return "TOKEN_STRING";
2760 		case    TOKEN_TEMPLATE_STRING: return "TOKEN_TEMPLATE_STRING";
2761 		case          TOKEN_UNDEFINED: return "TOKEN_UNDEFINED";
2762 		default: return "UNKNOWN";
2763 	}
2764 }
2765 
2766 #if DO_TRACING_USE_DUMP_TOKEN
keywordName(enum eKeywordId e)2767 static const char *keywordName(enum eKeywordId e)
2768 { /* Generated by misc/enumstr.sh with cmdline "parsers/jscript.c" "eKeywordId" "keywordName" */
2769 	switch (e)
2770 	{
2771 		case            KEYWORD_async: return "KEYWORD_async";
2772 		case KEYWORD_capital_function: return "KEYWORD_capital_function";
2773 		case   KEYWORD_capital_object: return "KEYWORD_capital_object";
2774 		case            KEYWORD_catch: return "KEYWORD_catch";
2775 		case            KEYWORD_class: return "KEYWORD_class";
2776 		case            KEYWORD_const: return "KEYWORD_const";
2777 		case          KEYWORD_default: return "KEYWORD_default";
2778 		case               KEYWORD_do: return "KEYWORD_do";
2779 		case             KEYWORD_else: return "KEYWORD_else";
2780 		case           KEYWORD_export: return "KEYWORD_export";
2781 		case          KEYWORD_extends: return "KEYWORD_extends";
2782 		case          KEYWORD_finally: return "KEYWORD_finally";
2783 		case              KEYWORD_for: return "KEYWORD_for";
2784 		case         KEYWORD_function: return "KEYWORD_function";
2785 		case              KEYWORD_get: return "KEYWORD_get";
2786 		case               KEYWORD_if: return "KEYWORD_if";
2787 		case              KEYWORD_let: return "KEYWORD_let";
2788 		case              KEYWORD_new: return "KEYWORD_new";
2789 		case        KEYWORD_prototype: return "KEYWORD_prototype";
2790 		case           KEYWORD_return: return "KEYWORD_return";
2791 		case              KEYWORD_sap: return "KEYWORD_sap";
2792 		case              KEYWORD_set: return "KEYWORD_set";
2793 		case           KEYWORD_static: return "KEYWORD_static";
2794 		case           KEYWORD_switch: return "KEYWORD_switch";
2795 		case             KEYWORD_this: return "KEYWORD_this";
2796 		case              KEYWORD_try: return "KEYWORD_try";
2797 		case              KEYWORD_var: return "KEYWORD_var";
2798 		case            KEYWORD_while: return "KEYWORD_while";
2799 		default: return "UNKNOWN";
2800 	}
2801 }
2802 #endif
2803 #endif
2804 
initialize(const langType language)2805 static void initialize (const langType language)
2806 {
2807 	Assert (ARRAY_SIZE (JsKinds) == JSTAG_COUNT);
2808 	Lang_js = language;
2809 
2810 	TokenPool = objPoolNew (16, newPoolToken, deletePoolToken, clearPoolToken, NULL);
2811 }
2812 
finalize(langType language CTAGS_ATTR_UNUSED,bool initialized)2813 static void finalize (langType language CTAGS_ATTR_UNUSED, bool initialized)
2814 {
2815 	if (!initialized)
2816 		return;
2817 
2818 	objPoolDelete (TokenPool);
2819 }
2820 
findJsTags(void)2821 static void findJsTags (void)
2822 {
2823 	tokenInfo *const token = newToken ();
2824 
2825 	NextToken = NULL;
2826 	ClassNames = stringListNew ();
2827 	FunctionNames = stringListNew ();
2828 	LastTokenType = TOKEN_UNDEFINED;
2829 
2830 	parseJsFile (token);
2831 
2832 	stringListDelete (ClassNames);
2833 	stringListDelete (FunctionNames);
2834 	ClassNames = NULL;
2835 	FunctionNames = NULL;
2836 	deleteToken (token);
2837 
2838 #ifdef HAVE_ICONV
2839 	if (JSUnicodeConverter != (iconv_t) -2 && /* not created */
2840 	    JSUnicodeConverter != (iconv_t) -1 /* creation failed */)
2841 	{
2842 		iconv_close (JSUnicodeConverter);
2843 		JSUnicodeConverter = (iconv_t) -2;
2844 	}
2845 #endif
2846 
2847 	Assert (NextToken == NULL);
2848 }
2849 
2850 /* Create parser definition structure */
JavaScriptParser(void)2851 extern parserDefinition* JavaScriptParser (void)
2852 {
2853 	// .jsx files are JSX: https://facebook.github.io/jsx/
2854 	// which have JS function definitions, so we just use the JS parser
2855 	static const char *const extensions [] = { "js", "jsx", "mjs", NULL };
2856 	static const char *const aliases [] = { "js", "node", "nodejs",
2857 	                                        "seed", "gjs",
2858 											/* Used in PostgreSQL
2859 											 * https://github.com/plv8/plv8 */
2860 											"v8",
2861 											NULL };
2862 	parserDefinition *const def = parserNew ("JavaScript");
2863 	def->extensions = extensions;
2864 	def->aliases = aliases;
2865 	/*
2866 	 * New definitions for parsing instead of regex
2867 	 */
2868 	def->kindTable	= JsKinds;
2869 	def->kindCount	= ARRAY_SIZE (JsKinds);
2870 	def->parser		= findJsTags;
2871 	def->initialize = initialize;
2872 	def->finalize   = finalize;
2873 	def->keywordTable = JsKeywordTable;
2874 	def->keywordCount = ARRAY_SIZE (JsKeywordTable);
2875 
2876 	return def;
2877 }
2878