1 /*
2 * Copyright (c) 2003, Darren Hiebert
3 *
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
6 *
7 * This module contains functions for generating tags for JavaScript language
8 * files.
9 *
10 * Reference: http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
11 *
12 * This is a good reference for different forms of the function statement:
13 * http://www.permadi.com/tutorial/jsFunc/
14 * Another good reference:
15 * http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
16 */
17
18 /*
19 * INCLUDE FILES
20 */
21 #include "general.h" /* must always come first */
22 #include <ctype.h> /* to define isalpha () */
23 #ifdef DEBUG
24 #include <stdio.h>
25 #endif
26
27 #ifdef HAVE_ICONV
28 #include <iconv.h>
29 #include <errno.h>
30 # ifdef WORDS_BIGENDIAN
31 # define INTERNAL_ENCODING "UTF-32BE"
32 # else
33 # define INTERNAL_ENCODING "UTF-32LE"
34 # endif /* WORDS_BIGENDIAN */
35 #endif
36
37 #include <string.h>
38 #include "debug.h"
39 #include "entry.h"
40 #include "keyword.h"
41 #include "parse.h"
42 #include "read.h"
43 #include "routines.h"
44 #include "vstring.h"
45 #include "objpool.h"
46 #include "options.h"
47 #include "mbcs.h"
48 #include "trace.h"
49 #include "strlist.h"
50
51 /*
52 * MACROS
53 */
54 #define isType(token,t) (bool) ((token)->type == (t))
55 #define isKeyword(token,k) (bool) ((token)->keyword == (k))
56 #define isIdentChar(c) \
57 (isalpha (c) || isdigit (c) || (c) == '$' || \
58 (c) == '@' || (c) == '_' || (c) == '#' || \
59 (c) >= 0x80)
60 #define newToken() (objPoolGet (TokenPool))
61 #define deleteToken(t) (objPoolPut (TokenPool, (t)))
62
63 /*
64 * DATA DECLARATIONS
65 */
66
67 /*
68 * Tracks class and function names already created
69 */
70 static stringList *ClassNames;
71 static stringList *FunctionNames;
72
73 /* Used to specify type of keyword.
74 */
75 enum eKeywordId {
76 KEYWORD_function,
77 KEYWORD_capital_function,
78 KEYWORD_capital_object,
79 KEYWORD_prototype,
80 KEYWORD_var,
81 KEYWORD_let,
82 KEYWORD_const,
83 KEYWORD_new,
84 KEYWORD_this,
85 KEYWORD_for,
86 KEYWORD_while,
87 KEYWORD_do,
88 KEYWORD_if,
89 KEYWORD_else,
90 KEYWORD_switch,
91 KEYWORD_try,
92 KEYWORD_catch,
93 KEYWORD_finally,
94 KEYWORD_sap,
95 KEYWORD_return,
96 KEYWORD_class,
97 KEYWORD_extends,
98 KEYWORD_static,
99 KEYWORD_default,
100 KEYWORD_export,
101 KEYWORD_async,
102 KEYWORD_get,
103 KEYWORD_set,
104 };
105 typedef int keywordId; /* to allow KEYWORD_NONE */
106
107 typedef enum eTokenType {
108 TOKEN_UNDEFINED,
109 TOKEN_EOF,
110 TOKEN_CHARACTER,
111 TOKEN_CLOSE_PAREN,
112 TOKEN_SEMICOLON,
113 TOKEN_COLON,
114 TOKEN_COMMA,
115 TOKEN_KEYWORD,
116 TOKEN_OPEN_PAREN,
117 TOKEN_IDENTIFIER,
118 TOKEN_STRING,
119 TOKEN_TEMPLATE_STRING,
120 TOKEN_PERIOD,
121 TOKEN_OPEN_CURLY,
122 TOKEN_CLOSE_CURLY,
123 TOKEN_EQUAL_SIGN,
124 TOKEN_OPEN_SQUARE,
125 TOKEN_CLOSE_SQUARE,
126 TOKEN_REGEXP,
127 TOKEN_POSTFIX_OPERATOR,
128 TOKEN_STAR,
129 /* To handle Babel's decorators.
130 * Used only in readTokenFull or lower functions. */
131 TOKEN_ATMARK,
132 TOKEN_BINARY_OPERATOR,
133 TOKEN_ARROW
134 } tokenType;
135
136 typedef struct sTokenInfo {
137 tokenType type;
138 keywordId keyword;
139 vString * string;
140 vString * scope;
141 unsigned long lineNumber;
142 MIOPos filePosition;
143 int nestLevel;
144 bool dynamicProp;
145 } tokenInfo;
146
147 /*
148 * DATA DEFINITIONS
149 */
150
151 static tokenType LastTokenType;
152 static tokenInfo *NextToken;
153
154 static langType Lang_js;
155
156 static objPool *TokenPool = NULL;
157
158 #ifdef HAVE_ICONV
159 static iconv_t JSUnicodeConverter = (iconv_t) -2;
160 #endif
161
162 typedef enum {
163 JSTAG_FUNCTION,
164 JSTAG_CLASS,
165 JSTAG_METHOD,
166 JSTAG_PROPERTY,
167 JSTAG_CONSTANT,
168 JSTAG_VARIABLE,
169 JSTAG_GENERATOR,
170 JSTAG_GETTER,
171 JSTAG_SETTER,
172 JSTAG_FIELD,
173 JSTAG_COUNT
174 } jsKind;
175
176 static kindDefinition JsKinds [] = {
177 { true, 'f', "function", "functions" },
178 { true, 'c', "class", "classes" },
179 { true, 'm', "method", "methods" },
180 { true, 'p', "property", "properties" },
181 { true, 'C', "constant", "constants" },
182 { true, 'v', "variable", "global variables" },
183 { true, 'g', "generator", "generators" },
184 { true, 'G', "getter", "getters" },
185 { true, 'S', "setter", "setters" },
186 { true, 'M', "field", "fields" },
187 };
188
189 static const keywordTable JsKeywordTable [] = {
190 /* keyword keyword ID */
191 { "function", KEYWORD_function },
192 { "Function", KEYWORD_capital_function },
193 { "Object", KEYWORD_capital_object },
194 { "prototype", KEYWORD_prototype },
195 { "var", KEYWORD_var },
196 { "let", KEYWORD_let },
197 { "const", KEYWORD_const },
198 { "new", KEYWORD_new },
199 { "this", KEYWORD_this },
200 { "for", KEYWORD_for },
201 { "while", KEYWORD_while },
202 { "do", KEYWORD_do },
203 { "if", KEYWORD_if },
204 { "else", KEYWORD_else },
205 { "switch", KEYWORD_switch },
206 { "try", KEYWORD_try },
207 { "catch", KEYWORD_catch },
208 { "finally", KEYWORD_finally },
209 { "sap", KEYWORD_sap },
210 { "return", KEYWORD_return },
211 { "class", KEYWORD_class },
212 { "extends", KEYWORD_extends },
213 { "static", KEYWORD_static },
214 { "default", KEYWORD_default },
215 { "export", KEYWORD_export },
216 { "async", KEYWORD_async },
217 { "get", KEYWORD_get },
218 { "set", KEYWORD_set },
219 };
220
221 /*
222 * FUNCTION DEFINITIONS
223 */
224
225 /* Recursive functions */
226 static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr);
227 static void skipArgumentList (tokenInfo *const token, bool include_newlines, vString *const repr);
228 static void parseFunction (tokenInfo *const token);
229 static bool parseBlock (tokenInfo *const token, const vString *const parentScope);
230 static bool parseMethods (tokenInfo *const token, const tokenInfo *const class, const bool is_es6_class);
231 static bool parseLine (tokenInfo *const token, bool is_inside_class);
232 static void parseUI5 (tokenInfo *const token);
233
234 #ifdef DO_TRACING
235 static const char *tokenTypeName(enum eTokenType e);
236 // #define DO_TRACING_USE_DUMP_TOKEN
237 #ifdef DO_TRACING_USE_DUMP_TOKEN
238 static void dumpToken (const tokenInfo *const token);
239 static const char *keywordName(enum eKeywordId e);
240 #endif
241 #endif
242
newPoolToken(void * createArg CTAGS_ATTR_UNUSED)243 static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED)
244 {
245 tokenInfo *token = xMalloc (1, tokenInfo);
246
247 token->string = vStringNew ();
248 token->scope = vStringNew ();
249
250 return token;
251 }
252
clearPoolToken(void * data)253 static void clearPoolToken (void *data)
254 {
255 tokenInfo *token = data;
256
257 token->type = TOKEN_UNDEFINED;
258 token->keyword = KEYWORD_NONE;
259 token->nestLevel = 0;
260 token->dynamicProp = false;
261 token->lineNumber = getInputLineNumber ();
262 token->filePosition = getInputFilePosition ();
263 vStringClear (token->string);
264 vStringClear (token->scope);
265 }
266
deletePoolToken(void * data)267 static void deletePoolToken (void *data)
268 {
269 tokenInfo *token = data;
270 vStringDelete (token->string);
271 vStringDelete (token->scope);
272 eFree (token);
273 }
274
copyToken(tokenInfo * const dest,const tokenInfo * const src,bool const include_non_read_info)275 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
276 bool const include_non_read_info)
277 {
278 dest->lineNumber = src->lineNumber;
279 dest->filePosition = src->filePosition;
280 dest->type = src->type;
281 dest->keyword = src->keyword;
282 dest->dynamicProp = src->dynamicProp;
283 vStringCopy(dest->string, src->string);
284 if (include_non_read_info)
285 {
286 dest->nestLevel = src->nestLevel;
287 vStringCopy(dest->scope, src->scope);
288 }
289 }
290
injectDynamicName(tokenInfo * const token,vString * newName)291 static void injectDynamicName (tokenInfo *const token, vString *newName)
292 {
293 token->dynamicProp = true;
294 vStringDelete (token->string);
295 token->string = newName;
296 }
297
298 /*
299 * Tag generation functions
300 */
301
makeJsTagCommon(const tokenInfo * const token,const jsKind kind,vString * const signature,vString * const inheritance,bool anonymous)302 static void makeJsTagCommon (const tokenInfo *const token, const jsKind kind,
303 vString *const signature, vString *const inheritance,
304 bool anonymous)
305 {
306 if (JsKinds [kind].enabled )
307 {
308 const char *name = vStringValue (token->string);
309 vString *fullscope = vStringNewCopy (token->scope);
310 const char *p;
311 tagEntryInfo e;
312
313 if (!token->dynamicProp && kind != JSTAG_PROPERTY && (p = strrchr (name, '.')) != NULL )
314 {
315 if (vStringLength (fullscope) > 0)
316 vStringPut (fullscope, '.');
317 vStringNCatS (fullscope, name, (size_t) (p - name));
318 name = p + 1;
319 }
320
321 initTagEntry (&e, name, kind);
322
323 TRACE_PRINT("Emitting tag for symbol '%s' of kind %02x with scope '%s'",name,kind,vStringValue(fullscope));
324
325 e.lineNumber = token->lineNumber;
326 e.filePosition = token->filePosition;
327
328 if ( vStringLength(fullscope) > 0 )
329 {
330 /* FIXME: proper parent type */
331 jsKind parent_kind = JSTAG_CLASS;
332
333 /*
334 * If we're creating a function (and not a method),
335 * guess we're inside another function
336 */
337 if (kind == JSTAG_FUNCTION)
338 parent_kind = JSTAG_FUNCTION;
339
340 e.extensionFields.scopeKindIndex = parent_kind;
341 e.extensionFields.scopeName = vStringValue (fullscope);
342 }
343
344 if (signature && vStringLength(signature))
345 {
346 size_t i;
347 /* sanitize signature by replacing all control characters with a
348 * space (because it's simple).
349 * there should never be any junk in a valid signature, but who
350 * knows what the user wrote and CTags doesn't cope well with weird
351 * characters. */
352 for (i = 0; i < signature->length; i++)
353 {
354 unsigned char c = (unsigned char) vStringChar (signature, i);
355 if (c < 0x20 /* below space */ || c == 0x7F /* DEL */)
356 vStringChar (signature, i) = ' ';
357 }
358 e.extensionFields.signature = vStringValue(signature);
359 }
360
361 if (inheritance)
362 e.extensionFields.inheritance = vStringValue(inheritance);
363
364 if (anonymous)
365 markTagExtraBit (&e, XTAG_ANONYMOUS);
366
367 makeTagEntry (&e);
368 vStringDelete (fullscope);
369 }
370 }
371
makeJsTag(const tokenInfo * const token,const jsKind kind,vString * const signature,vString * const inheritance)372 static void makeJsTag (const tokenInfo *const token, const jsKind kind,
373 vString *const signature, vString *const inheritance)
374 {
375 makeJsTagCommon (token, kind, signature, inheritance, false);
376 }
377
makeClassTagCommon(tokenInfo * const token,vString * const signature,vString * const inheritance,bool anonymous)378 static void makeClassTagCommon (tokenInfo *const token, vString *const signature,
379 vString *const inheritance, bool anonymous)
380 {
381 vString * fulltag = vStringNew ();
382 if (vStringLength (token->scope) > 0)
383 {
384 vStringCopy(fulltag, token->scope);
385 vStringPut (fulltag, '.');
386 vStringCat (fulltag, token->string);
387 }
388 else
389 {
390 vStringCopy(fulltag, token->string);
391 }
392 if ( ! stringListHas(ClassNames, vStringValue (fulltag)) )
393 {
394 stringListAdd (ClassNames, vStringNewCopy (fulltag));
395 makeJsTagCommon (token, JSTAG_CLASS, signature, inheritance,
396 anonymous);
397 }
398 vStringDelete (fulltag);
399 }
400
makeClassTag(tokenInfo * const token,vString * const signature,vString * const inheritance)401 static void makeClassTag (tokenInfo *const token, vString *const signature,
402 vString *const inheritance)
403 {
404 makeClassTagCommon (token, signature, inheritance, false);
405 }
406
makeFunctionTagCommon(tokenInfo * const token,vString * const signature,bool generator,bool anonymous)407 static void makeFunctionTagCommon (tokenInfo *const token, vString *const signature, bool generator,
408 bool anonymous)
409 {
410 vString * fulltag = vStringNew ();
411 if (vStringLength (token->scope) > 0)
412 {
413 vStringCopy(fulltag, token->scope);
414 vStringPut (fulltag, '.');
415 vStringCat (fulltag, token->string);
416 }
417 else
418 {
419 vStringCopy(fulltag, token->string);
420 }
421 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) )
422 {
423 stringListAdd (FunctionNames, vStringNewCopy (fulltag));
424 makeJsTagCommon (token, generator ? JSTAG_GENERATOR : JSTAG_FUNCTION, signature, NULL,
425 anonymous);
426 }
427 vStringDelete (fulltag);
428 }
429
makeFunctionTag(tokenInfo * const token,vString * const signature,bool generator)430 static void makeFunctionTag (tokenInfo *const token, vString *const signature, bool generator)
431 {
432 makeFunctionTagCommon (token, signature, generator, false);
433 }
434
435 /*
436 * Parsing functions
437 */
438
439 /* given @p point, returns the first byte of the encoded output sequence, and
440 * make sure the next ones will be returned by calls to getcFromInputFile()
441 * as if the code point was simply written in the input file. */
handleUnicodeCodePoint(uint32_t point)442 static int handleUnicodeCodePoint (uint32_t point)
443 {
444 int c = (int) point;
445
446 Assert (point < 0x110000);
447
448 #ifdef HAVE_ICONV
449 /* if we do have iconv and the encodings are specified, use this */
450 if (isConverting () && JSUnicodeConverter == (iconv_t) -2)
451 {
452 /* if we didn't try creating the converter yet, try and do so */
453 JSUnicodeConverter = iconv_open (getLanguageEncoding (Lang_js), INTERNAL_ENCODING);
454 }
455 if (isConverting () && JSUnicodeConverter != (iconv_t) -1)
456 {
457 char *input_ptr = (char *) &point;
458 size_t input_left = sizeof point;
459 /* 4 bytes should be enough for any encoding (it's how much UTF-32
460 * would need). */
461 /* FIXME: actually iconv has a tendency to output a BOM for Unicode
462 * encodings where it matters when the endianness is not specified in
463 * the target encoding name. E.g., if the target encoding is "UTF-32"
464 * or "UTF-16" it will output 2 code points, the BOM (U+FEFF) and the
465 * one we expect. This does not happen if the endianness is specified
466 * explicitly, e.g. with "UTF-32LE", or "UTF-16BE".
467 * However, it's not very relevant for the moment as nothing in CTags
468 * cope well (if at all) with non-ASCII-compatible encodings like
469 * UTF-32 or UTF-16 anyway. */
470 char output[4] = { 0 };
471 char *output_ptr = output;
472 size_t output_left = ARRAY_SIZE (output);
473
474 if (iconv (JSUnicodeConverter, &input_ptr, &input_left, &output_ptr, &output_left) == (size_t) -1)
475 {
476 /* something went wrong, which probably means the output encoding
477 * cannot represent the character. Use a placeholder likely to be
478 * supported instead, that's also valid in an identifier */
479 verbose ("JavaScript: Encoding: %s\n", strerror (errno));
480 c = '_';
481 }
482 else
483 {
484 const size_t output_len = ARRAY_SIZE (output) - output_left;
485
486 /* put all but the first byte back so that getcFromInputFile() will
487 * return them in the right order */
488 for (unsigned int i = 1; i < output_len; i++)
489 ungetcToInputFile ((unsigned char) output[output_len - i]);
490 c = (unsigned char) output[0];
491 }
492
493 iconv (JSUnicodeConverter, NULL, NULL, NULL, NULL);
494 }
495 else
496 #endif
497 {
498 /* when no encoding is specified (or no iconv), assume UTF-8 is good.
499 * Why UTF-8? Because it's an ASCII-compatible common Unicode encoding. */
500 if (point < 0x80)
501 c = (unsigned char) point;
502 else if (point < 0x800)
503 {
504 c = (unsigned char) (0xc0 | ((point >> 6) & 0x1f));
505 ungetcToInputFile ((unsigned char) (0x80 | (point & 0x3f)));
506 }
507 else if (point < 0x10000)
508 {
509 c = (unsigned char) (0xe0 | ((point >> 12) & 0x0f));
510 ungetcToInputFile ((unsigned char) (0x80 | ((point >> 0) & 0x3f)));
511 ungetcToInputFile ((unsigned char) (0x80 | ((point >> 6) & 0x3f)));
512 }
513 else if (point < 0x110000)
514 {
515 c = (unsigned char) (0xf0 | ((point >> 18) & 0x07));
516 ungetcToInputFile ((unsigned char) (0x80 | ((point >> 0) & 0x3f)));
517 ungetcToInputFile ((unsigned char) (0x80 | ((point >> 6) & 0x3f)));
518 ungetcToInputFile ((unsigned char) (0x80 | ((point >> 12) & 0x3f)));
519 }
520 }
521
522 return c;
523 }
524
525 /* reads a Unicode escape sequence after the "\" prefix.
526 * @param value Location to store the escape sequence value.
527 * @param isUTF16 Location to store whether @param value is an UTF-16 word.
528 * @returns Whether a valid sequence was read. */
readUnicodeEscapeSequenceValue(uint32_t * const value,bool * const isUTF16)529 static bool readUnicodeEscapeSequenceValue (uint32_t *const value,
530 bool *const isUTF16)
531 {
532 bool valid = false;
533 int d = getcFromInputFile ();
534
535 if (d != 'u')
536 ungetcToInputFile (d);
537 else
538 {
539 int e = getcFromInputFile ();
540 char cp[6 + 1]; /* up to 6 hex + possible closing '}' or invalid char */
541 unsigned int cp_len = 0;
542
543 *isUTF16 = (e != '{');
544 if (e == '{')
545 { /* Handles Unicode code point escapes: \u{ HexDigits }
546 * We skip the leading 0s because there can be any number of them
547 * and they don't change any meaning. */
548 bool has_leading_zero = false;
549
550 while ((cp[cp_len] = (char) getcFromInputFile ()) == '0')
551 has_leading_zero = true;
552
553 while (isxdigit (cp[cp_len]) && ++cp_len < ARRAY_SIZE (cp))
554 cp[cp_len] = (char) getcFromInputFile ();
555 valid = ((cp_len > 0 || has_leading_zero) &&
556 cp_len < ARRAY_SIZE (cp) && cp[cp_len] == '}' &&
557 /* also check if it's a valid Unicode code point */
558 (cp_len < 6 ||
559 (cp_len == 6 && strncmp (cp, "110000", 6) < 0)));
560 if (! valid) /* put back the last (likely invalid) character */
561 ungetcToInputFile (cp[cp_len]);
562 }
563 else
564 { /* Handles Unicode escape sequences: \u Hex4Digits */
565 do
566 cp[cp_len] = (char) ((cp_len == 0) ? e : getcFromInputFile ());
567 while (isxdigit (cp[cp_len]) && ++cp_len < 4);
568 valid = (cp_len == 4);
569 }
570
571 if (! valid)
572 {
573 /* we don't get every character back, but it would require to
574 * be able to put up to 9 characters back (in the worst case
575 * for handling invalid \u{10FFFFx}), and here we're recovering
576 * from invalid syntax anyway. */
577 ungetcToInputFile (e);
578 ungetcToInputFile (d);
579 }
580 else
581 {
582 *value = 0;
583 for (unsigned int i = 0; i < cp_len; i++)
584 {
585 *value *= 16;
586
587 /* we know it's a hex digit, no need to double check */
588 if (cp[i] < 'A')
589 *value += (unsigned int) cp[i] - '0';
590 else if (cp[i] < 'a')
591 *value += 10 + (unsigned int) cp[i] - 'A';
592 else
593 *value += 10 + (unsigned int) cp[i] - 'a';
594 }
595 }
596 }
597
598 return valid;
599 }
600
valueToXDigit(unsigned char v)601 static int valueToXDigit (unsigned char v)
602 {
603 Assert (v <= 0xF);
604
605 if (v >= 0xA)
606 return 'A' + (v - 0xA);
607 else
608 return '0' + v;
609 }
610
611 /* Reads and expands a Unicode escape sequence after the "\" prefix. If the
612 * escape sequence is a UTF16 high surrogate, also try and read the low
613 * surrogate to emit the proper code point.
614 * @param fallback The character to return if the sequence is invalid. Usually
615 * this would be the '\' character starting the sequence.
616 * @returns The first byte of the sequence, or @param fallback if the sequence
617 * is invalid. On success, next calls to getcFromInputFile() will
618 * return subsequent bytes (if any). */
readUnicodeEscapeSequence(const int fallback)619 static int readUnicodeEscapeSequence (const int fallback)
620 {
621 int c;
622 uint32_t value;
623 bool isUTF16;
624
625 if (! readUnicodeEscapeSequenceValue (&value, &isUTF16))
626 c = fallback;
627 else
628 {
629 if (isUTF16 && (value & 0xfc00) == 0xd800)
630 { /* this is a high surrogate, try and read its low surrogate and
631 * emit the resulting code point */
632 uint32_t low;
633 int d = getcFromInputFile ();
634
635 if (d != '\\' || ! readUnicodeEscapeSequenceValue (&low, &isUTF16))
636 ungetcToInputFile (d);
637 else if (! isUTF16)
638 { /* not UTF-16 low surrogate but a plain code point */
639 d = handleUnicodeCodePoint (low);
640 ungetcToInputFile (d);
641 }
642 else if ((low & 0xfc00) != 0xdc00)
643 { /* not a low surrogate, so put back the escaped representation
644 * in case it was another high surrogate we should read as part
645 * of another pair. */
646 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0x000f) >> 0)));
647 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0x00f0) >> 4)));
648 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0x0f00) >> 8)));
649 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0xf000) >> 12)));
650 ungetcToInputFile ('u');
651 ungetcToInputFile ('\\');
652 }
653 else
654 value = 0x010000 + ((value & 0x03ff) << 10) + (low & 0x03ff);
655 }
656 c = handleUnicodeCodePoint (value);
657 }
658
659 return c;
660 }
661
parseString(vString * const string,const int delimiter)662 static void parseString (vString *const string, const int delimiter)
663 {
664 bool end = false;
665 while (! end)
666 {
667 int c = getcFromInputFile ();
668 if (c == EOF)
669 end = true;
670 else if (c == '\\')
671 {
672 /* Eat the escape sequence (\", \', etc). We properly handle
673 * <LineContinuation> by eating a whole \<CR><LF> not to see <LF>
674 * as an unescaped character, which is invalid and handled below.
675 * Also, handle the fact that <LineContinuation> produces an empty
676 * sequence.
677 * See ECMA-262 7.8.4 */
678 c = getcFromInputFile ();
679 if (c == 'u')
680 {
681 ungetcToInputFile (c);
682 c = readUnicodeEscapeSequence ('\\');
683 vStringPut (string, c);
684 }
685 else if (c != '\r' && c != '\n')
686 vStringPut(string, c);
687 else if (c == '\r')
688 {
689 c = getcFromInputFile();
690 if (c != '\n')
691 ungetcToInputFile (c);
692 }
693 }
694 else if (c == delimiter)
695 end = true;
696 else if (c == '\r' || c == '\n')
697 {
698 /* those are invalid when not escaped */
699 end = true;
700 /* we don't want to eat the newline itself to let the automatic
701 * semicolon insertion code kick in */
702 ungetcToInputFile (c);
703 }
704 else
705 vStringPut (string, c);
706 }
707 }
708
parseRegExp(void)709 static void parseRegExp (void)
710 {
711 int c;
712 bool in_range = false;
713
714 do
715 {
716 c = getcFromInputFile ();
717 if (! in_range && c == '/')
718 {
719 do /* skip flags */
720 {
721 c = getcFromInputFile ();
722 } while (isalpha (c));
723 ungetcToInputFile (c);
724 break;
725 }
726 else if (c == '\n' || c == '\r')
727 {
728 /* invalid in a regex */
729 ungetcToInputFile (c);
730 break;
731 }
732 else if (c == '\\')
733 c = getcFromInputFile (); /* skip next character */
734 else if (c == '[')
735 in_range = true;
736 else if (c == ']')
737 in_range = false;
738 } while (c != EOF);
739 }
740
741 /* Read a C identifier beginning with "firstChar" and places it into
742 * "name".
743 */
parseIdentifier(vString * const string,const int firstChar)744 static void parseIdentifier (vString *const string, const int firstChar)
745 {
746 int c = firstChar;
747 Assert (isIdentChar (c));
748 do
749 {
750 vStringPut (string, c);
751 c = getcFromInputFile ();
752 if (c == '\\')
753 c = readUnicodeEscapeSequence (c);
754 } while (isIdentChar (c));
755 /* if readUnicodeEscapeSequence() read an escape sequence this is incorrect,
756 * as we should actually put back the whole escape sequence and not the
757 * decoded character. However, it's not really worth the hassle as it can
758 * only happen if the input has an invalid escape sequence. */
759 ungetcToInputFile (c); /* unget non-identifier character */
760 }
761
parseTemplateString(vString * const string)762 static void parseTemplateString (vString *const string)
763 {
764 int c;
765 do
766 {
767 c = getcFromInputFile ();
768 if (c == '`' || c == EOF)
769 break;
770
771 vStringPut (string, c);
772
773 if (c == '\\')
774 {
775 c = getcFromInputFile();
776 if (c != EOF)
777 vStringPut(string, c);
778 }
779 else if (c == '$')
780 {
781 c = getcFromInputFile ();
782 if (c != '{')
783 ungetcToInputFile (c);
784 else
785 {
786 int depth = 1;
787 /* we need to use the real token machinery to handle strings,
788 * comments, regexes and whatnot */
789 tokenInfo *token = newToken ();
790 LastTokenType = TOKEN_UNDEFINED;
791 vStringPut(string, c);
792 do
793 {
794 readTokenFull (token, false, string);
795 if (isType (token, TOKEN_OPEN_CURLY))
796 depth++;
797 else if (isType (token, TOKEN_CLOSE_CURLY))
798 depth--;
799 }
800 while (! isType (token, TOKEN_EOF) && depth > 0);
801 deleteToken (token);
802 }
803 }
804 }
805 while (c != EOF);
806 }
807
readTokenFullRaw(tokenInfo * const token,bool include_newlines,vString * const repr)808 static void readTokenFullRaw (tokenInfo *const token, bool include_newlines, vString *const repr)
809 {
810 int c;
811 int i;
812 bool newline_encountered = false;
813
814 /* if we've got a token held back, emit it */
815 if (NextToken)
816 {
817 copyToken (token, NextToken, false);
818 deleteToken (NextToken);
819 NextToken = NULL;
820 return;
821 }
822
823 token->type = TOKEN_UNDEFINED;
824 token->keyword = KEYWORD_NONE;
825 vStringClear (token->string);
826
827 getNextChar:
828 i = 0;
829 do
830 {
831 c = getcFromInputFile ();
832 if (include_newlines && (c == '\r' || c == '\n'))
833 newline_encountered = true;
834 i++;
835 }
836 while (c == '\t' || c == ' ' || c == '\r' || c == '\n');
837
838 token->lineNumber = getInputLineNumber ();
839 token->filePosition = getInputFilePosition ();
840
841 if (repr && c != EOF)
842 {
843 if (i > 1)
844 vStringPut (repr, ' ');
845 vStringPut (repr, c);
846 }
847
848 switch (c)
849 {
850 case EOF: token->type = TOKEN_EOF; break;
851 case '(': token->type = TOKEN_OPEN_PAREN; break;
852 case ')': token->type = TOKEN_CLOSE_PAREN; break;
853 case ';': token->type = TOKEN_SEMICOLON; break;
854 case ',': token->type = TOKEN_COMMA; break;
855 case '.': token->type = TOKEN_PERIOD; break;
856 case ':': token->type = TOKEN_COLON; break;
857 case '{': token->type = TOKEN_OPEN_CURLY; break;
858 case '}': token->type = TOKEN_CLOSE_CURLY; break;
859 case '[': token->type = TOKEN_OPEN_SQUARE; break;
860 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
861
862 case '=':
863 {
864 int d = getcFromInputFile ();
865 if (d == '>')
866 token->type = TOKEN_ARROW;
867 else
868 {
869 ungetcToInputFile (d);
870 token->type = TOKEN_EQUAL_SIGN;
871 }
872 break;
873 }
874
875 case '+':
876 case '-':
877 {
878 int d = getcFromInputFile ();
879 if (d == c) /* ++ or -- */
880 token->type = TOKEN_POSTFIX_OPERATOR;
881 else
882 {
883 ungetcToInputFile (d);
884 token->type = TOKEN_BINARY_OPERATOR;
885 }
886 break;
887 }
888
889 case '*':
890 token->type = TOKEN_STAR;
891 break;
892 case '%':
893 case '?':
894 case '>':
895 case '<':
896 case '^':
897 case '|':
898 case '&':
899 token->type = TOKEN_BINARY_OPERATOR;
900 break;
901
902 case '\'':
903 case '"':
904 token->type = TOKEN_STRING;
905 parseString (token->string, c);
906 token->lineNumber = getInputLineNumber ();
907 token->filePosition = getInputFilePosition ();
908 if (repr)
909 {
910 vStringCat (repr, token->string);
911 vStringPut (repr, c);
912 }
913 break;
914
915 case '`':
916 token->type = TOKEN_TEMPLATE_STRING;
917 parseTemplateString (token->string);
918 token->lineNumber = getInputLineNumber ();
919 token->filePosition = getInputFilePosition ();
920 if (repr)
921 {
922 vStringCat (repr, token->string);
923 vStringPut (repr, c);
924 }
925 break;
926
927 case '/':
928 {
929 int d = getcFromInputFile ();
930 if ( (d != '*') && /* is this the start of a comment? */
931 (d != '/') ) /* is a one line comment? */
932 {
933 ungetcToInputFile (d);
934 switch (LastTokenType)
935 {
936 case TOKEN_CHARACTER:
937 case TOKEN_IDENTIFIER:
938 case TOKEN_STRING:
939 case TOKEN_TEMPLATE_STRING:
940 case TOKEN_CLOSE_CURLY:
941 case TOKEN_CLOSE_PAREN:
942 case TOKEN_CLOSE_SQUARE:
943 token->type = TOKEN_BINARY_OPERATOR;
944 break;
945
946 default:
947 token->type = TOKEN_REGEXP;
948 parseRegExp ();
949 token->lineNumber = getInputLineNumber ();
950 token->filePosition = getInputFilePosition ();
951 break;
952 }
953 }
954 else
955 {
956 if (repr) /* remove the / we added */
957 vStringChop(repr);
958 if (d == '*')
959 {
960 skipToCharacterInInputFile2('*', '/');
961 goto getNextChar;
962 }
963 else if (d == '/') /* is this the start of a comment? */
964 {
965 skipToCharacterInInputFile ('\n');
966 /* if we care about newlines, put it back so it is seen */
967 if (include_newlines)
968 ungetcToInputFile ('\n');
969 goto getNextChar;
970 }
971 }
972 break;
973 }
974
975 case '#':
976 /* skip shebang in case of e.g. Node.js scripts */
977 if (token->lineNumber > 1)
978 token->type = TOKEN_UNDEFINED;
979 else if ((c = getcFromInputFile ()) != '!')
980 {
981 ungetcToInputFile (c);
982 token->type = TOKEN_UNDEFINED;
983 }
984 else
985 {
986 skipToCharacterInInputFile ('\n');
987 goto getNextChar;
988 }
989 break;
990
991 case '@':
992 token->type = TOKEN_ATMARK;
993 break;
994
995 case '\\':
996 c = readUnicodeEscapeSequence (c);
997 /* fallthrough */
998 default:
999 if (! isIdentChar (c))
1000 token->type = TOKEN_UNDEFINED;
1001 else
1002 {
1003 parseIdentifier (token->string, c);
1004 token->lineNumber = getInputLineNumber ();
1005 token->filePosition = getInputFilePosition ();
1006 token->keyword = lookupKeyword (vStringValue (token->string), Lang_js);
1007 if (isKeyword (token, KEYWORD_NONE))
1008 token->type = TOKEN_IDENTIFIER;
1009 else
1010 token->type = TOKEN_KEYWORD;
1011 if (repr && vStringLength (token->string) > 1)
1012 vStringCatS (repr, vStringValue (token->string) + 1);
1013 }
1014 break;
1015 }
1016
1017 if (include_newlines && newline_encountered)
1018 {
1019 /* This isn't strictly correct per the standard, but following the
1020 * real rules means understanding all statements, and that's not
1021 * what the parser currently does. What we do here is a guess, by
1022 * avoiding inserting semicolons that would make the statement on
1023 * the left or right obviously invalid. Hopefully this should not
1024 * have false negatives (e.g. should not miss insertion of a semicolon)
1025 * but might have false positives (e.g. it will wrongfully emit a
1026 * semicolon sometimes, i.e. for the newline in "foo\n(bar)").
1027 * This should however be mostly harmless as we only deal with
1028 * newlines in specific situations where we know a false positive
1029 * wouldn't hurt too bad. */
1030
1031 /* these already end a statement, so no need to duplicate it */
1032 #define IS_STMT_SEPARATOR(t) ((t) == TOKEN_SEMICOLON || \
1033 (t) == TOKEN_EOF || \
1034 (t) == TOKEN_COMMA || \
1035 (t) == TOKEN_OPEN_CURLY)
1036 /* these cannot be the start or end of a statement */
1037 #define IS_BINARY_OPERATOR(t) ((t) == TOKEN_EQUAL_SIGN || \
1038 (t) == TOKEN_ARROW || \
1039 (t) == TOKEN_COLON || \
1040 (t) == TOKEN_PERIOD || \
1041 (t) == TOKEN_STAR || \
1042 (t) == TOKEN_BINARY_OPERATOR)
1043
1044 if (! IS_STMT_SEPARATOR(LastTokenType) &&
1045 ! IS_STMT_SEPARATOR(token->type) &&
1046 ! IS_BINARY_OPERATOR(LastTokenType) &&
1047 ! IS_BINARY_OPERATOR(token->type) &&
1048 /* these cannot be followed by a semicolon */
1049 ! (LastTokenType == TOKEN_OPEN_PAREN ||
1050 LastTokenType == TOKEN_OPEN_SQUARE))
1051 {
1052 /* hold the token... */
1053 Assert (NextToken == NULL);
1054 NextToken = newToken ();
1055 copyToken (NextToken, token, false);
1056
1057 /* ...and emit a semicolon instead */
1058 token->type = TOKEN_SEMICOLON;
1059 token->keyword = KEYWORD_NONE;
1060 vStringClear (token->string);
1061 if (repr)
1062 vStringPut (token->string, '\n');
1063 }
1064
1065 #undef IS_STMT_SEPARATOR
1066 #undef IS_BINARY_OPERATOR
1067 }
1068
1069 LastTokenType = token->type;
1070 }
1071
1072 /* See https://babeljs.io/blog/2018/09/17/decorators */
skipBabelDecorator(tokenInfo * token,bool include_newlines,vString * const repr)1073 static void skipBabelDecorator (tokenInfo *token, bool include_newlines, vString *const repr)
1074 {
1075 readTokenFullRaw (token, include_newlines, repr);
1076 if (isType (token, TOKEN_OPEN_PAREN))
1077 {
1078 /* @(complex ? dec1 : dec2) */
1079 skipArgumentList (token, include_newlines, repr);
1080 TRACE_PRINT ("found @(...) style decorator");
1081 }
1082 else if (isType (token, TOKEN_IDENTIFIER))
1083 {
1084 /* @namespace.foo (...) */
1085 bool found_period = false;
1086 while (1)
1087 {
1088 readTokenFullRaw (token, include_newlines, repr);
1089 if (isType (token, TOKEN_IDENTIFIER))
1090 {
1091 if (!found_period)
1092 {
1093 TRACE_PRINT("found @namespace.bar style decorator");
1094 break;
1095 }
1096 found_period = false;
1097 }
1098 else if (isType (token, TOKEN_PERIOD))
1099 found_period = true;
1100 else if (isType (token, TOKEN_OPEN_PAREN))
1101 {
1102 skipArgumentList (token, include_newlines, repr);
1103 TRACE_PRINT("found @foo(...) style decorator");
1104 break;
1105 }
1106 else
1107 {
1108 TRACE_PRINT("found @foo style decorator");
1109 break;
1110 }
1111 }
1112 }
1113 else
1114 /* Unexpected token after @ */
1115 TRACE_PRINT("found unexpected token during skipping a decorator");
1116 }
1117
readTokenFull(tokenInfo * const token,bool include_newlines,vString * const repr)1118 static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr)
1119 {
1120 readTokenFullRaw (token, include_newlines, repr);
1121
1122 while (1)
1123 {
1124 if (!isType (token, TOKEN_ATMARK))
1125 break;
1126 skipBabelDecorator (token, include_newlines, repr);
1127 /* @decorator0 @decorator1 ... There can be more than one decorator. */
1128 }
1129 }
1130
1131 #ifdef JSCRIPT_DO_DEBUGGING
1132 /* trace readTokenFull() */
readTokenFullDebug(tokenInfo * const token,bool include_newlines,vString * const repr)1133 static void readTokenFullDebug (tokenInfo *const token, bool include_newlines, vString *const repr)
1134 {
1135 readTokenFull (token, include_newlines, repr);
1136 TRACE_PRINT("token '%s' of type %02x with scope '%s'",vStringValue(token->string),token->type, vStringValue(token->scope));
1137 }
1138 # define readTokenFull readTokenFullDebug
1139 #endif
1140
readToken(tokenInfo * const token)1141 static void readToken (tokenInfo *const token)
1142 {
1143 readTokenFull (token, false, NULL);
1144 }
1145
1146 /*
1147 * Token parsing functions
1148 */
1149
parseMethodsInAnonymousClass(tokenInfo * const token)1150 static void parseMethodsInAnonymousClass (tokenInfo *const token)
1151 {
1152 tokenInfo *const anon_class = newToken ();
1153 copyToken (anon_class, token, true);
1154 anonGenerate (anon_class->string, "AnonymousClass", JSTAG_CLASS);
1155 anon_class->type = TOKEN_IDENTIFIER;
1156
1157 bool has_methods = parseMethods (token, anon_class, false);
1158
1159 if (has_methods)
1160 makeJsTagCommon (anon_class, JSTAG_CLASS, NULL, NULL, true);
1161
1162 deleteToken (anon_class);
1163 }
1164
skipArgumentList(tokenInfo * const token,bool include_newlines,vString * const repr)1165 static void skipArgumentList (tokenInfo *const token, bool include_newlines, vString *const repr)
1166 {
1167 if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */
1168 {
1169 int nest_level = 1;
1170 if (repr)
1171 vStringPut (repr, '(');
1172
1173 tokenType prev_token_type = token->type;
1174 while (nest_level > 0 && ! isType (token, TOKEN_EOF))
1175 {
1176 readTokenFull (token, false, repr);
1177 if (isType (token, TOKEN_OPEN_PAREN))
1178 nest_level++;
1179 else if (isType (token, TOKEN_CLOSE_PAREN))
1180 nest_level--;
1181 else if (isType (token, TOKEN_OPEN_CURLY))
1182 {
1183 if (prev_token_type == TOKEN_ARROW)
1184 parseBlock (token, NULL);
1185 else
1186 parseMethodsInAnonymousClass (token);
1187 }
1188 else if (isKeyword (token, KEYWORD_function))
1189 parseFunction (token);
1190
1191 prev_token_type = token->type;
1192 }
1193 readTokenFull (token, include_newlines, NULL);
1194 }
1195 }
1196
skipArrayList(tokenInfo * const token,bool include_newlines)1197 static void skipArrayList (tokenInfo *const token, bool include_newlines)
1198 {
1199 /*
1200 * Handle square brackets
1201 * var name[1]
1202 * So we must check for nested open and closing square brackets
1203 */
1204
1205 if (isType (token, TOKEN_OPEN_SQUARE)) /* arguments? */
1206 {
1207 int nest_level = 1;
1208 tokenType prev_token_type = token->type;
1209 while (nest_level > 0 && ! isType (token, TOKEN_EOF))
1210 {
1211 readToken (token);
1212 if (isType (token, TOKEN_OPEN_SQUARE))
1213 nest_level++;
1214 else if (isType (token, TOKEN_CLOSE_SQUARE))
1215 nest_level--;
1216 else if (isType (token, TOKEN_OPEN_CURLY))
1217 {
1218 if (prev_token_type == TOKEN_ARROW)
1219 parseBlock (token, NULL);
1220 else
1221 parseMethodsInAnonymousClass (token);
1222 }
1223
1224 prev_token_type = token->type;
1225 }
1226 readTokenFull (token, include_newlines, NULL);
1227 }
1228 }
1229
skipQualifiedIdentifier(tokenInfo * const token)1230 static void skipQualifiedIdentifier (tokenInfo *const token)
1231 {
1232 /* Skip foo.bar.baz */
1233 while (isType (token, TOKEN_IDENTIFIER))
1234 {
1235 readToken (token);
1236 if (isType (token, TOKEN_PERIOD))
1237 readToken (token);
1238 else
1239 break;
1240 }
1241 }
1242
addContext(tokenInfo * const parent,const tokenInfo * const child)1243 static void addContext (tokenInfo* const parent, const tokenInfo* const child)
1244 {
1245 if (vStringLength (parent->string) > 0)
1246 {
1247 vStringPut (parent->string, '.');
1248 }
1249 vStringCat (parent->string, child->string);
1250 }
1251
addToScope(tokenInfo * const token,const vString * const extra)1252 static void addToScope (tokenInfo* const token, const vString* const extra)
1253 {
1254 if (vStringLength (token->scope) > 0)
1255 {
1256 vStringPut (token->scope, '.');
1257 }
1258 vStringCat (token->scope, extra);
1259 }
1260
1261 /*
1262 * Scanning functions
1263 */
1264
findCmdTerm(tokenInfo * const token,bool include_newlines,bool include_commas)1265 static bool findCmdTerm (tokenInfo *const token, bool include_newlines,
1266 bool include_commas)
1267 {
1268 /*
1269 * Read until we find either a semicolon or closing brace.
1270 * Any nested braces will be handled within.
1271 */
1272 while (! isType (token, TOKEN_SEMICOLON) &&
1273 ! isType (token, TOKEN_CLOSE_CURLY) &&
1274 ! (include_commas && isType (token, TOKEN_COMMA)) &&
1275 ! isType (token, TOKEN_EOF))
1276 {
1277 /* Handle nested blocks */
1278 if ( isType (token, TOKEN_OPEN_CURLY))
1279 {
1280 parseBlock (token, NULL);
1281 readTokenFull (token, include_newlines, NULL);
1282 }
1283 else if ( isType (token, TOKEN_OPEN_PAREN) )
1284 {
1285 skipArgumentList(token, include_newlines, NULL);
1286 }
1287 else if ( isType (token, TOKEN_OPEN_SQUARE) )
1288 {
1289 skipArrayList(token, include_newlines);
1290 }
1291 else
1292 {
1293 readTokenFull (token, include_newlines, NULL);
1294 }
1295 }
1296
1297 return isType (token, TOKEN_SEMICOLON);
1298 }
1299
parseSwitch(tokenInfo * const token)1300 static void parseSwitch (tokenInfo *const token)
1301 {
1302 /*
1303 * switch (expression) {
1304 * case value1:
1305 * statement;
1306 * break;
1307 * case value2:
1308 * statement;
1309 * break;
1310 * default : statement;
1311 * }
1312 */
1313
1314 readToken (token);
1315
1316 if (isType (token, TOKEN_OPEN_PAREN))
1317 {
1318 skipArgumentList(token, false, NULL);
1319 }
1320
1321 if (isType (token, TOKEN_OPEN_CURLY))
1322 {
1323 parseBlock (token, NULL);
1324 }
1325 }
1326
parseLoop(tokenInfo * const token)1327 static bool parseLoop (tokenInfo *const token)
1328 {
1329 /*
1330 * Handles these statements
1331 * for (x=0; x<3; x++)
1332 * document.write("This text is repeated three times<br>");
1333 *
1334 * for (x=0; x<3; x++)
1335 * {
1336 * document.write("This text is repeated three times<br>");
1337 * }
1338 *
1339 * while (number<5){
1340 * document.write(number+"<br>");
1341 * number++;
1342 * }
1343 *
1344 * do{
1345 * document.write(number+"<br>");
1346 * number++;
1347 * }
1348 * while (number<5);
1349 */
1350 bool is_terminated = true;
1351
1352 if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while))
1353 {
1354 readToken(token);
1355
1356 if (isType (token, TOKEN_OPEN_PAREN))
1357 {
1358 skipArgumentList(token, false, NULL);
1359 }
1360
1361 if (isType (token, TOKEN_OPEN_CURLY))
1362 {
1363 parseBlock (token, NULL);
1364 }
1365 else
1366 {
1367 is_terminated = parseLine(token, false);
1368 }
1369 }
1370 else if (isKeyword (token, KEYWORD_do))
1371 {
1372 readToken(token);
1373
1374 if (isType (token, TOKEN_OPEN_CURLY))
1375 {
1376 parseBlock (token, NULL);
1377 }
1378 else
1379 {
1380 is_terminated = parseLine(token, false);
1381 }
1382
1383 if (is_terminated)
1384 readToken(token);
1385
1386 if (isKeyword (token, KEYWORD_while))
1387 {
1388 readToken(token);
1389
1390 if (isType (token, TOKEN_OPEN_PAREN))
1391 {
1392 skipArgumentList(token, true, NULL);
1393 }
1394 if (! isType (token, TOKEN_SEMICOLON))
1395 {
1396 /* oddly enough, `do {} while (0) var foo = 42` is perfectly
1397 * valid JS, so explicitly handle the remaining of the line
1398 * for the sake of the root scope handling (as parseJsFile()
1399 * always advances a token not to ever get stuck) */
1400 is_terminated = parseLine(token, false);
1401 }
1402 }
1403 }
1404
1405 return is_terminated;
1406 }
1407
parseIf(tokenInfo * const token)1408 static bool parseIf (tokenInfo *const token)
1409 {
1410 bool read_next_token = true;
1411 /*
1412 * If statements have two forms
1413 * if ( ... )
1414 * one line;
1415 *
1416 * if ( ... )
1417 * statement;
1418 * else
1419 * statement
1420 *
1421 * if ( ... ) {
1422 * multiple;
1423 * statements;
1424 * }
1425 *
1426 *
1427 * if ( ... ) {
1428 * return elem
1429 * }
1430 *
1431 * This example if correctly written, but the
1432 * else contains only 1 statement without a terminator
1433 * since the function finishes with the closing brace.
1434 *
1435 * function a(flag){
1436 * if(flag)
1437 * test(1);
1438 * else
1439 * test(2)
1440 * }
1441 *
1442 * TODO: Deal with statements that can optional end
1443 * without a semi-colon. Currently this messes up
1444 * the parsing of blocks.
1445 * Need to somehow detect this has happened, and either
1446 * backup a token, or skip reading the next token if
1447 * that is possible from all code locations.
1448 *
1449 */
1450
1451 readToken (token);
1452
1453 if (isKeyword (token, KEYWORD_if))
1454 {
1455 /*
1456 * Check for an "else if" and consume the "if"
1457 */
1458 readToken (token);
1459 }
1460
1461 if (isType (token, TOKEN_OPEN_PAREN))
1462 {
1463 skipArgumentList(token, false, NULL);
1464 }
1465
1466 if (isType (token, TOKEN_OPEN_CURLY))
1467 {
1468 parseBlock (token, NULL);
1469 }
1470 else
1471 {
1472 /* The next token should only be read if this statement had its own
1473 * terminator */
1474 read_next_token = findCmdTerm (token, true, false);
1475 }
1476 return read_next_token;
1477 }
1478
parseFunction(tokenInfo * const token)1479 static void parseFunction (tokenInfo *const token)
1480 {
1481 TRACE_ENTER();
1482
1483 tokenInfo *const name = newToken ();
1484 vString *const signature = vStringNew ();
1485 bool is_class = false;
1486 bool is_generator = false;
1487 bool is_anonymous = false;
1488 /*
1489 * This deals with these formats
1490 * function validFunctionTwo(a,b) {}
1491 * function * generator(a,b) {}
1492 */
1493
1494 copyToken (name, token, true);
1495 readToken (name);
1496 if (isType (name, TOKEN_STAR))
1497 {
1498 is_generator = true;
1499 readToken (name);
1500 }
1501 if (isType (name, TOKEN_OPEN_PAREN))
1502 {
1503 /* anonymous function */
1504 copyToken (token, name, false);
1505 anonGenerate (name->string, "AnonymousFunction", JSTAG_FUNCTION);
1506 is_anonymous = true;
1507 }
1508 else if (!isType (name, TOKEN_IDENTIFIER))
1509 goto cleanUp;
1510 else
1511 readToken (token);
1512
1513 while (isType (token, TOKEN_PERIOD))
1514 {
1515 readToken (token);
1516 if (! isType(token, TOKEN_KEYWORD))
1517 {
1518 addContext (name, token);
1519 readToken (token);
1520 }
1521 }
1522
1523 if ( isType (token, TOKEN_OPEN_PAREN) )
1524 skipArgumentList(token, false, signature);
1525
1526 if ( isType (token, TOKEN_OPEN_CURLY) )
1527 {
1528 is_class = parseBlock (token, name->string);
1529 if ( is_class )
1530 makeClassTagCommon (name, signature, NULL, is_anonymous);
1531 else
1532 makeFunctionTagCommon (name, signature, is_generator, is_anonymous);
1533 }
1534
1535 findCmdTerm (token, false, false);
1536
1537 cleanUp:
1538 vStringDelete (signature);
1539 deleteToken (name);
1540
1541 TRACE_LEAVE();
1542 }
1543
1544 /* Parses a block surrounded by curly braces.
1545 * @p parentScope is the scope name for this block, or NULL for unnamed scopes */
parseBlock(tokenInfo * const token,const vString * const parentScope)1546 static bool parseBlock (tokenInfo *const token, const vString *const parentScope)
1547 {
1548 TRACE_ENTER();
1549
1550 bool is_class = false;
1551 bool read_next_token = true;
1552 vString * saveScope = vStringNew ();
1553
1554 vStringCopy(saveScope, token->scope);
1555 if (parentScope)
1556 {
1557 addToScope (token, parentScope);
1558 token->nestLevel++;
1559 }
1560
1561 /*
1562 * Make this routine a bit more forgiving.
1563 * If called on an open_curly advance it
1564 */
1565 if (isType (token, TOKEN_OPEN_CURLY))
1566 readToken(token);
1567
1568 if (! isType (token, TOKEN_CLOSE_CURLY))
1569 {
1570 /*
1571 * Read until we find the closing brace,
1572 * any nested braces will be handled within
1573 */
1574 do
1575 {
1576 read_next_token = true;
1577 if (isKeyword (token, KEYWORD_this))
1578 {
1579 /*
1580 * Means we are inside a class and have found
1581 * a class, not a function
1582 */
1583 is_class = true;
1584
1585 /*
1586 * Ignore the remainder of the line
1587 * findCmdTerm(token);
1588 */
1589 read_next_token = parseLine (token, is_class);
1590 }
1591 else if (isKeyword (token, KEYWORD_var) ||
1592 isKeyword (token, KEYWORD_let) ||
1593 isKeyword (token, KEYWORD_const))
1594 {
1595 /*
1596 * Potentially we have found an inner function.
1597 * Set something to indicate the scope
1598 */
1599 read_next_token = parseLine (token, is_class);
1600 }
1601 else if (isType (token, TOKEN_OPEN_CURLY))
1602 {
1603 /* Handle nested blocks */
1604 parseBlock (token, NULL);
1605 }
1606 else
1607 {
1608 /*
1609 * It is possible for a line to have no terminator
1610 * if the following line is a closing brace.
1611 * parseLine will detect this case and indicate
1612 * whether we should read an additional token.
1613 */
1614 read_next_token = parseLine (token, is_class);
1615 }
1616
1617 /*
1618 * Always read a new token unless we find a statement without
1619 * a ending terminator
1620 */
1621 if( read_next_token )
1622 readToken(token);
1623
1624 /*
1625 * If we find a statement without a terminator consider the
1626 * block finished, otherwise the stack will be off by one.
1627 */
1628 } while (! isType (token, TOKEN_EOF) &&
1629 ! isType (token, TOKEN_CLOSE_CURLY) && read_next_token);
1630 }
1631
1632 vStringCopy(token->scope, saveScope);
1633 vStringDelete(saveScope);
1634 if (parentScope)
1635 token->nestLevel--;
1636
1637 TRACE_LEAVE();
1638
1639 return is_class;
1640 }
1641
parseMethods(tokenInfo * const token,const tokenInfo * const class,const bool is_es6_class)1642 static bool parseMethods (tokenInfo *const token, const tokenInfo *const class,
1643 const bool is_es6_class)
1644 {
1645 TRACE_ENTER_TEXT("token is '%s' of type %s in classToken '%s' of type %s (es6: %s)",
1646 vStringValue(token->string), tokenTypeName (token->type),
1647 class == NULL ? "none" : vStringValue(class->string),
1648 class == NULL ? "none" : tokenTypeName (class->type),
1649 is_es6_class? "yes": "no");
1650
1651 tokenInfo *const name = newToken ();
1652 bool has_methods = false;
1653 vString *saveScope = vStringNew ();
1654
1655 vStringCopy (saveScope, token->scope);
1656 if (class != NULL)
1657 addToScope (token, class->string);
1658
1659 /*
1660 * This deals with these formats
1661 * validProperty : 2,
1662 * validMethod : function(a,b) {}
1663 * 'validMethod2' : function(a,b) {}
1664 * container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
1665 * get prop() {}
1666 * set prop(val) {}
1667 * get(...) {}
1668 * set(...) {}
1669 *
1670 * ES6 methods:
1671 * property(...) {}
1672 * *generator() {}
1673 *
1674 * ES6 computed name:
1675 * [property]() {}
1676 * get [property]() {}
1677 * set [property]() {}
1678 * *[generator]() {}
1679 *
1680 * tc39/proposal-class-fields
1681 * field0 = function(a,b) {}
1682 * field1 = 1
1683 * The parser extracts field0 as a method because the left value
1684 * is a function (kind propagation), and field1 as a field.
1685 */
1686
1687 bool dont_read = false;
1688 do
1689 {
1690 bool is_setter = false;
1691 bool is_getter = false;
1692
1693 if (!dont_read)
1694 readToken (token);
1695 dont_read = false;
1696
1697 if (isType (token, TOKEN_CLOSE_CURLY))
1698 {
1699 goto cleanUp;
1700 }
1701
1702 if (isKeyword (token, KEYWORD_async))
1703 readToken (token);
1704 else if (isType (token, TOKEN_KEYWORD) &&
1705 (isKeyword (token, KEYWORD_get) || isKeyword (token, KEYWORD_set)))
1706 {
1707 tokenInfo *saved_token = newToken ();
1708 copyToken (saved_token, token, true);
1709 readToken (token);
1710 if (isType(token, TOKEN_OPEN_PAREN))
1711 {
1712 Assert (NextToken == NULL);
1713 NextToken = newToken ();
1714 copyToken (NextToken, token, false); /* save token for next read */
1715 copyToken (token, saved_token, true); /* restore token to process */
1716 token->type = TOKEN_IDENTIFIER; /* process as identifier */
1717 token->keyword = KEYWORD_NONE;
1718 }
1719 else if (isKeyword (saved_token, KEYWORD_get))
1720 {
1721 is_getter = true;
1722 }
1723 else
1724 {
1725 is_setter = true;
1726 }
1727 deleteToken (saved_token);
1728 }
1729
1730 if (! isType (token, TOKEN_KEYWORD) &&
1731 ! isType (token, TOKEN_SEMICOLON))
1732 {
1733 bool is_generator = false;
1734 bool is_shorthand = false; /* ES6 shorthand syntax */
1735 bool is_computed_name = false; /* ES6 computed property name */
1736 bool is_dynamic_prop = false;
1737 vString *dprop = NULL; /* is_computed_name is true but
1738 * the name is not represented in
1739 * a string literal. The expressions
1740 * go this string. */
1741
1742 if (isType (token, TOKEN_STAR)) /* shorthand generator */
1743 {
1744 is_generator = true;
1745 readToken (token);
1746 }
1747
1748 if (isType (token, TOKEN_OPEN_SQUARE))
1749 {
1750 is_computed_name = true;
1751 dprop = vStringNewInit ("[");
1752 readTokenFull (token, false, dprop);
1753 }
1754
1755 copyToken(name, token, true);
1756 if (is_computed_name && ! isType (token, TOKEN_STRING))
1757 is_dynamic_prop = true;
1758
1759 readTokenFull (token, false, dprop);
1760
1761 if (is_computed_name)
1762 {
1763 int depth = 1;
1764 do
1765 {
1766 if (isType (token, TOKEN_CLOSE_SQUARE))
1767 depth--;
1768 else
1769 {
1770 is_dynamic_prop = true;
1771 if (isType (token, TOKEN_OPEN_SQUARE))
1772 depth++;
1773 }
1774 readTokenFull (token, false, (is_dynamic_prop && depth != 0)? dprop: NULL);
1775 } while (! isType (token, TOKEN_EOF) && depth > 0);
1776 }
1777
1778 if (is_dynamic_prop)
1779 {
1780 injectDynamicName (name, dprop);
1781 dprop = NULL;
1782 }
1783 else
1784 vStringDelete (dprop);
1785
1786 is_shorthand = isType (token, TOKEN_OPEN_PAREN);
1787 bool can_be_field = isType (token, TOKEN_EQUAL_SIGN);
1788 if ( isType (token, TOKEN_COLON) || can_be_field || is_shorthand )
1789 {
1790 if (! is_shorthand)
1791 {
1792 readToken (token);
1793 if (isKeyword (token, KEYWORD_async))
1794 readToken (token);
1795 }
1796
1797 vString * signature = vStringNew ();
1798 if ( is_shorthand || isKeyword (token, KEYWORD_function) )
1799 {
1800 TRACE_PRINT("Seems to be a function or shorthand");
1801
1802 if (! is_shorthand)
1803 {
1804 readToken (token);
1805 if (isType (token, TOKEN_STAR))
1806 {
1807 /* generator: 'function' '*' '(' ... ')' '{' ... '}' */
1808 is_generator = true;
1809 readToken (token);
1810 }
1811 }
1812 if ( isType (token, TOKEN_OPEN_PAREN) )
1813 {
1814 skipArgumentList(token, false, signature);
1815 }
1816
1817 function:
1818 if (isType (token, TOKEN_OPEN_CURLY))
1819 {
1820 has_methods = true;
1821
1822 int kind = JSTAG_METHOD;
1823 if (is_generator)
1824 kind = JSTAG_GENERATOR;
1825 else if (is_getter)
1826 kind = JSTAG_GETTER;
1827 else if (is_setter)
1828 kind = JSTAG_SETTER;
1829
1830 makeJsTag (name, kind, signature, NULL);
1831 parseBlock (token, name->string);
1832
1833 /*
1834 * If we aren't parsing an ES6 class (for which there
1835 * is no mandatory separators), read to the closing
1836 * curly, check next token, if a comma, we must loop
1837 * again.
1838 */
1839 if (! is_es6_class)
1840 readToken (token);
1841 }
1842 }
1843 else if (! is_es6_class)
1844 {
1845 bool has_child_methods = false;
1846 tokenInfo *saved_token = newToken ();
1847
1848 /* skip whatever is the value */
1849 while (! isType (token, TOKEN_COMMA) &&
1850 ! isType (token, TOKEN_CLOSE_CURLY) &&
1851 ! isType (token, TOKEN_EOF))
1852 {
1853 if (isType (token, TOKEN_OPEN_CURLY))
1854 {
1855 /* Recurse to find child properties/methods */
1856 has_child_methods = parseMethods (token, name, false);
1857 readToken (token);
1858 }
1859 else if (isType (token, TOKEN_OPEN_PAREN))
1860 {
1861 vStringClear (signature);
1862 skipArgumentList (token, false, signature);
1863 }
1864 else if (isType (token, TOKEN_OPEN_SQUARE))
1865 {
1866 skipArrayList (token, false);
1867 }
1868 else if (isType (token, TOKEN_ARROW))
1869 {
1870 TRACE_PRINT("Seems to be an anonymous function");
1871 if (vStringIsEmpty (signature) &&
1872 isType (saved_token, TOKEN_IDENTIFIER))
1873 {
1874 vStringPut (signature, '(');
1875 vStringCat (signature, saved_token->string);
1876 vStringPut (signature, ')');
1877 }
1878 readToken (token);
1879 deleteToken (saved_token);
1880 goto function;
1881 }
1882 else
1883 {
1884 copyToken (saved_token, token, true);
1885 readToken (token);
1886 }
1887 }
1888 deleteToken (saved_token);
1889
1890 has_methods = true;
1891 if (has_child_methods)
1892 makeJsTag (name, JSTAG_CLASS, NULL, NULL);
1893 else
1894 makeJsTag (name, JSTAG_PROPERTY, NULL, NULL);
1895 }
1896 else if (can_be_field)
1897 {
1898 makeJsTag (name, JSTAG_FIELD, NULL, NULL);
1899 parseLine (token, true);
1900 }
1901
1902 vStringDelete (signature);
1903 }
1904 else
1905 {
1906 makeJsTag (name, JSTAG_FIELD, NULL, NULL);
1907 if (!isType (token, TOKEN_SEMICOLON))
1908 dont_read = true;
1909 }
1910 }
1911 } while ( isType(token, TOKEN_COMMA) ||
1912 ( is_es6_class && ! isType(token, TOKEN_EOF) ) );
1913
1914 TRACE_PRINT("Finished parsing methods");
1915
1916 findCmdTerm (token, false, false);
1917
1918 cleanUp:
1919 vStringCopy (token->scope, saveScope);
1920 vStringDelete (saveScope);
1921 deleteToken (name);
1922
1923 TRACE_LEAVE_TEXT("found method(s): %s", has_methods? "yes": "no");
1924
1925 return has_methods;
1926 }
1927
parseES6Class(tokenInfo * const token,const tokenInfo * targetName)1928 static bool parseES6Class (tokenInfo *const token, const tokenInfo *targetName)
1929 {
1930 TRACE_ENTER();
1931
1932 tokenInfo * className = newToken ();
1933 vString *inheritance = NULL;
1934 bool is_anonymous = true;
1935
1936 copyToken (className, token, true);
1937 readToken (className);
1938
1939 /* optional name */
1940 if (isType (className, TOKEN_IDENTIFIER))
1941 {
1942 readToken (token);
1943 is_anonymous = false;
1944 }
1945 else
1946 {
1947 copyToken (token, className, true);
1948 /* We create a fake name so we have a scope for the members */
1949 if (! targetName)
1950 anonGenerate (className->string, "AnonymousClass", JSTAG_CLASS);
1951 }
1952
1953 if (! targetName)
1954 targetName = className;
1955
1956 if (isKeyword (token, KEYWORD_extends))
1957 inheritance = vStringNew ();
1958
1959 /* skip inheritance info */
1960 while (! isType (token, TOKEN_OPEN_CURLY) &&
1961 ! isType (token, TOKEN_EOF) &&
1962 ! isType (token, TOKEN_SEMICOLON))
1963 readTokenFull (token, false, inheritance);
1964
1965 /* remove the last added token (here we assume it's one char, "{" or ";" */
1966 if (inheritance && vStringLength (inheritance) > 0 &&
1967 ! isType (token, TOKEN_EOF))
1968 {
1969 vStringChop (inheritance);
1970 vStringStripTrailing (inheritance);
1971 vStringStripLeading (inheritance);
1972 }
1973
1974 TRACE_PRINT("Emitting tag for class '%s'", vStringValue(targetName->string));
1975
1976 makeJsTagCommon (targetName, JSTAG_CLASS, NULL, inheritance,
1977 (is_anonymous && (targetName == className)));
1978
1979 if (! is_anonymous && targetName != className)
1980 {
1981 /* FIXME: what to do with the secondary name? It's local to the
1982 * class itself, so not very useful... let's hope people
1983 * don't give it another name than the target in case of
1984 * var MyClass = class MyClassSecondaryName { ... }
1985 * I guess it could be an alias to MyClass, or duplicate it
1986 * altogether, not sure. */
1987 makeJsTag (className, JSTAG_CLASS, NULL, inheritance);
1988 }
1989
1990 if (inheritance)
1991 vStringDelete (inheritance);
1992
1993 if (isType (token, TOKEN_OPEN_CURLY))
1994 parseMethods (token, targetName, true);
1995
1996 deleteToken (className);
1997
1998 TRACE_LEAVE();
1999 return true;
2000 }
2001
parseStatement(tokenInfo * const token,bool is_inside_class)2002 static bool parseStatement (tokenInfo *const token, bool is_inside_class)
2003 {
2004 TRACE_ENTER_TEXT("is_inside_class: %s", is_inside_class? "yes": "no");
2005
2006 tokenInfo *const name = newToken ();
2007 tokenInfo *const secondary_name = newToken ();
2008 tokenInfo *const method_body_token = newToken ();
2009 vString * saveScope = vStringNew ();
2010 bool is_class = false;
2011 bool is_var = false;
2012 bool is_const = false;
2013 bool is_terminated = true;
2014 bool is_global = false;
2015 bool has_methods = false;
2016 vString * fulltag;
2017
2018 vStringCopy (saveScope, token->scope);
2019 /*
2020 * Functions can be named or unnamed.
2021 * This deals with these formats:
2022 * Function
2023 * validFunctionOne = function(a,b) {}
2024 * testlib.validFunctionFive = function(a,b) {}
2025 * var innerThree = function(a,b) {}
2026 * var innerFour = (a,b) {}
2027 * var D2 = secondary_fcn_name(a,b) {}
2028 * var D3 = new Function("a", "b", "return a+b;");
2029 * Class
2030 * testlib.extras.ValidClassOne = function(a,b) {
2031 * this.a = a;
2032 * }
2033 * Class Methods
2034 * testlib.extras.ValidClassOne.prototype = {
2035 * 'validMethodOne' : function(a,b) {},
2036 * 'validMethodTwo' : function(a,b) {}
2037 * }
2038 * ValidClassTwo = function ()
2039 * {
2040 * this.validMethodThree = function() {}
2041 * // unnamed method
2042 * this.validMethodFour = () {}
2043 * }
2044 * Database.prototype.validMethodThree = Database_getTodaysDate;
2045 */
2046
2047 if ( is_inside_class )
2048 is_class = true;
2049 /*
2050 * var can precede an inner function
2051 */
2052 if ( isKeyword(token, KEYWORD_var) ||
2053 isKeyword(token, KEYWORD_let) ||
2054 isKeyword(token, KEYWORD_const) )
2055 {
2056 TRACE_PRINT("var/let/const case");
2057 is_const = isKeyword(token, KEYWORD_const);
2058 /*
2059 * Only create variables for global scope
2060 */
2061 if ( token->nestLevel == 0 )
2062 {
2063 is_global = true;
2064 }
2065 readToken(token);
2066 }
2067
2068 nextVar:
2069 if ( isKeyword(token, KEYWORD_this) )
2070 {
2071 TRACE_PRINT("found 'this' keyword");
2072
2073 readToken(token);
2074 if (isType (token, TOKEN_PERIOD))
2075 {
2076 readToken(token);
2077 }
2078 }
2079
2080 copyToken(name, token, true);
2081 TRACE_PRINT("name becomes '%s' of type %s",
2082 vStringValue(token->string), tokenTypeName (token->type));
2083
2084 while (! isType (token, TOKEN_CLOSE_CURLY) &&
2085 ! isType (token, TOKEN_SEMICOLON) &&
2086 ! isType (token, TOKEN_EQUAL_SIGN) &&
2087 ! isType (token, TOKEN_COMMA) &&
2088 ! isType (token, TOKEN_EOF))
2089 {
2090 if (isType (token, TOKEN_OPEN_CURLY))
2091 parseBlock (token, NULL);
2092
2093 /* Potentially the name of the function */
2094 if (isType (token, TOKEN_PERIOD))
2095 {
2096 /*
2097 * Cannot be a global variable is it has dot references in the name
2098 */
2099 is_global = false;
2100 /* Assume it's an assignment to a global name (e.g. a class) using
2101 * its fully qualified name, so strip the scope.
2102 * FIXME: resolve the scope so we can make more than an assumption. */
2103 vStringClear (token->scope);
2104 vStringClear (name->scope);
2105 do
2106 {
2107 readToken (token);
2108 if (! isType(token, TOKEN_KEYWORD))
2109 {
2110 if ( is_class )
2111 {
2112 addToScope(token, name->string);
2113 }
2114 else
2115 addContext (name, token);
2116
2117 readToken (token);
2118 }
2119 else if ( isKeyword(token, KEYWORD_prototype) )
2120 {
2121 /*
2122 * When we reach the "prototype" tag, we infer:
2123 * "BindAgent" is a class
2124 * "build" is a method
2125 *
2126 * function BindAgent( repeatableIdName, newParentIdName ) {
2127 * }
2128 *
2129 * CASE 1
2130 * Specified function name: "build"
2131 * BindAgent.prototype.build = function( mode ) {
2132 * maybe parse nested functions
2133 * }
2134 *
2135 * CASE 2
2136 * Prototype listing
2137 * ValidClassOne.prototype = {
2138 * 'validMethodOne' : function(a,b) {},
2139 * 'validMethodTwo' : function(a,b) {}
2140 * }
2141 *
2142 */
2143 if (! ( isType (name, TOKEN_IDENTIFIER)
2144 || isType (name, TOKEN_STRING) ) )
2145 /*
2146 * Unexpected input. Try to reset the parsing.
2147 *
2148 * TOKEN_STRING is acceptable. e.g.:
2149 * -----------------------------------
2150 * "a".prototype = function( mode ) {}
2151 */
2152 goto cleanUp;
2153
2154 makeClassTag (name, NULL, NULL);
2155 is_class = true;
2156
2157 /*
2158 * There should a ".function_name" next.
2159 */
2160 readToken (token);
2161 if (isType (token, TOKEN_PERIOD))
2162 {
2163 /*
2164 * Handle CASE 1
2165 */
2166 readToken (token);
2167 if (! isType(token, TOKEN_KEYWORD))
2168 {
2169 vString *const signature = vStringNew ();
2170
2171 addToScope(token, name->string);
2172
2173 copyToken (method_body_token, token, true);
2174 readToken (method_body_token);
2175
2176 while (! isType (method_body_token, TOKEN_SEMICOLON) &&
2177 ! isType (method_body_token, TOKEN_CLOSE_CURLY) &&
2178 ! isType (method_body_token, TOKEN_OPEN_CURLY) &&
2179 ! isType (method_body_token, TOKEN_EOF))
2180 {
2181 if ( isType (method_body_token, TOKEN_OPEN_PAREN) )
2182 skipArgumentList(method_body_token, false,
2183 vStringLength (signature) == 0 ? signature : NULL);
2184 else
2185 readToken (method_body_token);
2186 }
2187
2188 makeJsTag (token, JSTAG_METHOD, signature, NULL);
2189 vStringDelete (signature);
2190
2191 if ( isType (method_body_token, TOKEN_OPEN_CURLY))
2192 {
2193 parseBlock (method_body_token, token->string);
2194 is_terminated = true;
2195 }
2196 else
2197 is_terminated = isType (method_body_token, TOKEN_SEMICOLON);
2198 goto cleanUp;
2199 }
2200 }
2201 else if (isType (token, TOKEN_EQUAL_SIGN))
2202 {
2203 readToken (token);
2204 if (isType (token, TOKEN_OPEN_CURLY))
2205 {
2206 /*
2207 * Handle CASE 2
2208 *
2209 * Creates tags for each of these class methods
2210 * ValidClassOne.prototype = {
2211 * 'validMethodOne' : function(a,b) {},
2212 * 'validMethodTwo' : function(a,b) {}
2213 * }
2214 */
2215 parseMethods(token, name, false);
2216 /*
2217 * Find to the end of the statement
2218 */
2219 findCmdTerm (token, false, false);
2220 is_terminated = true;
2221 goto cleanUp;
2222 }
2223 }
2224 }
2225 else
2226 readToken (token);
2227 } while (isType (token, TOKEN_PERIOD));
2228 }
2229 else
2230 readTokenFull (token, true, NULL);
2231
2232 if ( isType (token, TOKEN_OPEN_PAREN) )
2233 skipArgumentList(token, false, NULL);
2234
2235 if ( isType (token, TOKEN_OPEN_SQUARE) )
2236 skipArrayList(token, false);
2237
2238 /*
2239 if ( isType (token, TOKEN_OPEN_CURLY) )
2240 {
2241 is_class = parseBlock (token, name->string);
2242 }
2243 */
2244 }
2245
2246 if ( isType (token, TOKEN_CLOSE_CURLY) )
2247 {
2248 /*
2249 * Reaching this section without having
2250 * processed an open curly brace indicates
2251 * the statement is most likely not terminated.
2252 */
2253 is_terminated = false;
2254 goto cleanUp;
2255 }
2256
2257 if ( isType (token, TOKEN_SEMICOLON) ||
2258 isType (token, TOKEN_EOF) ||
2259 isType (token, TOKEN_COMMA) )
2260 {
2261 /*
2262 * Only create variables for global scope
2263 */
2264 if ( token->nestLevel == 0 && is_global )
2265 {
2266 /*
2267 * Handles this syntax:
2268 * var g_var2;
2269 */
2270 makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2271 }
2272 /*
2273 * Statement has ended.
2274 * This deals with calls to functions, like:
2275 * alert(..);
2276 */
2277 if (isType (token, TOKEN_COMMA))
2278 {
2279 readToken (token);
2280 goto nextVar;
2281 }
2282 goto cleanUp;
2283 }
2284
2285 if ( isType (token, TOKEN_EQUAL_SIGN) )
2286 {
2287 int parenDepth = 0;
2288
2289 readToken (token);
2290
2291 /* rvalue might be surrounded with parentheses */
2292 while (isType (token, TOKEN_OPEN_PAREN))
2293 {
2294 parenDepth++;
2295 readToken (token);
2296 }
2297
2298 if (isKeyword (token, KEYWORD_async))
2299 readToken (token);
2300
2301 if ( isKeyword (token, KEYWORD_function) )
2302 {
2303 vString *const signature = vStringNew ();
2304 bool is_generator = false;
2305
2306 readToken (token);
2307 if (isType (token, TOKEN_STAR))
2308 {
2309 is_generator = true;
2310 readToken (token);
2311 }
2312
2313 if (! isType (token, TOKEN_KEYWORD) &&
2314 ! isType (token, TOKEN_OPEN_PAREN))
2315 {
2316 /*
2317 * Functions of this format:
2318 * var D2A = function theAdd(a, b)
2319 * {
2320 * return a+b;
2321 * }
2322 * Are really two separate defined functions and
2323 * can be referenced in two ways:
2324 * alert( D2A(1,2) ); // produces 3
2325 * alert( theAdd(1,2) ); // also produces 3
2326 * So it must have two tags:
2327 * D2A
2328 * theAdd
2329 * Save the reference to the name for later use, once
2330 * we have established this is a valid function we will
2331 * create the secondary reference to it.
2332 */
2333 copyToken(secondary_name, token, true);
2334 readToken (token);
2335 }
2336
2337 if ( isType (token, TOKEN_OPEN_PAREN) )
2338 skipArgumentList(token, false, signature);
2339
2340 if (isType (token, TOKEN_OPEN_CURLY))
2341 {
2342 /*
2343 * This will be either a function or a class.
2344 * We can only determine this by checking the body
2345 * of the function. If we find a "this." we know
2346 * it is a class, otherwise it is a function.
2347 */
2348 if ( is_inside_class )
2349 {
2350 makeJsTag (name, is_generator ? JSTAG_GENERATOR : JSTAG_METHOD, signature, NULL);
2351 if ( vStringLength(secondary_name->string) > 0 )
2352 makeFunctionTag (secondary_name, signature, is_generator);
2353 }
2354 else
2355 {
2356 if (! ( isType (name, TOKEN_IDENTIFIER)
2357 || isType (name, TOKEN_STRING)
2358 || isType (name, TOKEN_KEYWORD) ) )
2359 {
2360 /* Unexpected input. Try to reset the parsing. */
2361 TRACE_PRINT("Unexpected input, trying to reset");
2362 vStringDelete (signature);
2363 goto cleanUp;
2364 }
2365
2366 is_class = parseBlock (token, name->string);
2367 if ( is_class )
2368 makeClassTag (name, signature, NULL);
2369 else
2370 makeFunctionTag (name, signature, is_generator);
2371
2372 if ( vStringLength(secondary_name->string) > 0 )
2373 makeFunctionTag (secondary_name, signature, is_generator);
2374 }
2375 parseBlock (token, name->string);
2376 }
2377
2378 vStringDelete (signature);
2379 }
2380 else if (isKeyword (token, KEYWORD_class))
2381 {
2382 is_terminated = parseES6Class (token, name);
2383 }
2384 else if (isType (token, TOKEN_OPEN_CURLY))
2385 {
2386 /*
2387 * Creates tags for each of these class methods
2388 * ValidClassOne.prototype = {
2389 * 'validMethodOne' : function(a,b) {},
2390 * 'validMethodTwo' : function(a,b) {}
2391 * }
2392 * Or checks if this is a hash variable.
2393 * var z = {};
2394 */
2395 bool anonClass = vStringIsEmpty (name->string);
2396 if (anonClass)
2397 anonGenerate (name->string, "AnonymousClass", JSTAG_CLASS);
2398 has_methods = parseMethods(token, name, false);
2399 if (has_methods)
2400 makeJsTagCommon (name, JSTAG_CLASS, NULL, NULL, anonClass);
2401 else
2402 {
2403 /*
2404 * Only create variables for global scope
2405 */
2406 if ( token->nestLevel == 0 && is_global )
2407 {
2408 /*
2409 * A pointer can be created to the function.
2410 * If we recognize the function/class name ignore the variable.
2411 * This format looks identical to a variable definition.
2412 * A variable defined outside of a block is considered
2413 * a global variable:
2414 * var g_var1 = 1;
2415 * var g_var2;
2416 * This is not a global variable:
2417 * var g_var = function;
2418 * This is a global variable:
2419 * var g_var = different_var_name;
2420 */
2421 fulltag = vStringNew ();
2422 if (vStringLength (token->scope) > 0)
2423 {
2424 vStringCopy(fulltag, token->scope);
2425 vStringPut (fulltag, '.');
2426 vStringCat (fulltag, token->string);
2427 }
2428 else
2429 {
2430 vStringCopy(fulltag, token->string);
2431 }
2432 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
2433 ! stringListHas(ClassNames, vStringValue (fulltag)) )
2434 {
2435 makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2436 }
2437 vStringDelete (fulltag);
2438 }
2439 }
2440 /* Here we should be at the end of the block, on the close curly.
2441 * If so, read the next token not to confuse that close curly with
2442 * the end of the current statement. */
2443 if (isType (token, TOKEN_CLOSE_CURLY))
2444 {
2445 readTokenFull(token, true, NULL);
2446 is_terminated = isType (token, TOKEN_SEMICOLON);
2447 }
2448 }
2449 else if (isKeyword (token, KEYWORD_new))
2450 {
2451 readToken (token);
2452 is_var = isType (token, TOKEN_IDENTIFIER);
2453 if ( isKeyword (token, KEYWORD_function) ||
2454 isKeyword (token, KEYWORD_capital_function) ||
2455 isKeyword (token, KEYWORD_capital_object) ||
2456 is_var )
2457 {
2458 if ( isKeyword (token, KEYWORD_capital_object) )
2459 is_class = true;
2460
2461 if (is_var)
2462 skipQualifiedIdentifier (token);
2463 else
2464 readToken (token);
2465
2466 if ( isType (token, TOKEN_OPEN_PAREN) )
2467 skipArgumentList(token, true, NULL);
2468
2469 if (isType (token, TOKEN_SEMICOLON))
2470 {
2471 if ( token->nestLevel == 0 )
2472 {
2473 if ( is_var )
2474 {
2475 makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2476 }
2477 else if ( is_class )
2478 {
2479 makeClassTag (name, NULL, NULL);
2480 }
2481 else
2482 {
2483 /* FIXME: we cannot really get a meaningful
2484 * signature from a `new Function()` call,
2485 * so for now just don't set any */
2486 makeFunctionTag (name, NULL, false);
2487 }
2488 }
2489 }
2490 else if (isType (token, TOKEN_CLOSE_CURLY))
2491 is_terminated = false;
2492 }
2493 }
2494 else if (! isType (token, TOKEN_KEYWORD))
2495 {
2496 /*
2497 * Only create variables for global scope
2498 */
2499 if ( token->nestLevel == 0 && is_global )
2500 {
2501 /*
2502 * A pointer can be created to the function.
2503 * If we recognize the function/class name ignore the variable.
2504 * This format looks identical to a variable definition.
2505 * A variable defined outside of a block is considered
2506 * a global variable:
2507 * var g_var1 = 1;
2508 * var g_var2;
2509 * This is not a global variable:
2510 * var g_var = function;
2511 * This is a global variable:
2512 * var g_var = different_var_name;
2513 */
2514 fulltag = vStringNew ();
2515 if (vStringLength (token->scope) > 0)
2516 {
2517 vStringCopy(fulltag, token->scope);
2518 vStringPut (fulltag, '.');
2519 vStringCat (fulltag, token->string);
2520 }
2521 else
2522 {
2523 vStringCopy(fulltag, token->string);
2524 }
2525 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
2526 ! stringListHas(ClassNames, vStringValue (fulltag)) )
2527 {
2528 makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2529 }
2530 vStringDelete (fulltag);
2531 }
2532 }
2533
2534 if (parenDepth > 0)
2535 {
2536 while (parenDepth > 0 && ! isType (token, TOKEN_EOF))
2537 {
2538 if (isType (token, TOKEN_OPEN_PAREN))
2539 parenDepth++;
2540 else if (isType (token, TOKEN_CLOSE_PAREN))
2541 parenDepth--;
2542 readTokenFull (token, true, NULL);
2543 }
2544 if (isType (token, TOKEN_CLOSE_CURLY))
2545 is_terminated = false;
2546 }
2547 }
2548 /* if we aren't already at the cmd end, advance to it and check whether
2549 * the statement was terminated */
2550 if (! isType (token, TOKEN_CLOSE_CURLY) &&
2551 ! isType (token, TOKEN_SEMICOLON))
2552 {
2553 /*
2554 * Statements can be optionally terminated in the case of
2555 * statement prior to a close curly brace as in the
2556 * document.write line below:
2557 *
2558 * function checkForUpdate() {
2559 * if( 1==1 ) {
2560 * document.write("hello from checkForUpdate<br>")
2561 * }
2562 * return 1;
2563 * }
2564 */
2565 is_terminated = findCmdTerm (token, true, true);
2566 /* if we're at a comma, try and read a second var */
2567 if (isType (token, TOKEN_COMMA))
2568 {
2569 readToken (token);
2570 goto nextVar;
2571 }
2572 }
2573
2574 cleanUp:
2575 vStringCopy(token->scope, saveScope);
2576 deleteToken (name);
2577 deleteToken (secondary_name);
2578 deleteToken (method_body_token);
2579 vStringDelete(saveScope);
2580
2581 TRACE_LEAVE();
2582
2583 return is_terminated;
2584 }
2585
parseUI5(tokenInfo * const token)2586 static void parseUI5 (tokenInfo *const token)
2587 {
2588 tokenInfo *const name = newToken ();
2589 /*
2590 * SAPUI5 is built on top of jQuery.
2591 * It follows a standard format:
2592 * sap.ui.controller("id.of.controller", {
2593 * method_name : function... {
2594 * },
2595 *
2596 * method_name : function ... {
2597 * }
2598 * }
2599 *
2600 * Handle the parsing of the initial controller (and the
2601 * same for "view") and then allow the methods to be
2602 * parsed as usual.
2603 */
2604
2605 readToken (token);
2606
2607 if (isType (token, TOKEN_PERIOD))
2608 {
2609 readToken (token);
2610 while (! isType (token, TOKEN_OPEN_PAREN) &&
2611 ! isType (token, TOKEN_EOF))
2612 {
2613 readToken (token);
2614 }
2615 readToken (token);
2616
2617 if (isType (token, TOKEN_STRING))
2618 {
2619 copyToken(name, token, true);
2620 readToken (token);
2621 }
2622
2623 if (isType (token, TOKEN_COMMA))
2624 readToken (token);
2625
2626 do
2627 {
2628 parseMethods (token, name, false);
2629 } while (! isType (token, TOKEN_CLOSE_CURLY) &&
2630 ! isType (token, TOKEN_EOF));
2631 }
2632
2633 deleteToken (name);
2634 }
2635
parseLine(tokenInfo * const token,bool is_inside_class)2636 static bool parseLine (tokenInfo *const token, bool is_inside_class)
2637 {
2638 TRACE_ENTER_TEXT("token is '%s' of type %s",
2639 vStringValue(token->string), tokenTypeName (token->type));
2640
2641 bool is_terminated = true;
2642 /*
2643 * Detect the common statements, if, while, for, do, ...
2644 * This is necessary since the last statement within a block "{}"
2645 * can be optionally terminated.
2646 *
2647 * If the statement is not terminated, we need to tell
2648 * the calling routine to prevent reading an additional token
2649 * looking for the end of the statement.
2650 */
2651
2652 if (isType(token, TOKEN_KEYWORD))
2653 {
2654 switch (token->keyword)
2655 {
2656 case KEYWORD_for:
2657 case KEYWORD_while:
2658 case KEYWORD_do:
2659 is_terminated = parseLoop (token);
2660 break;
2661 case KEYWORD_if:
2662 case KEYWORD_else:
2663 case KEYWORD_try:
2664 case KEYWORD_catch:
2665 case KEYWORD_finally:
2666 /* Common semantics */
2667 is_terminated = parseIf (token);
2668 break;
2669 case KEYWORD_switch:
2670 parseSwitch (token);
2671 break;
2672 case KEYWORD_return:
2673 case KEYWORD_async:
2674 readToken (token);
2675 is_terminated = parseLine (token, is_inside_class);
2676 break;
2677 case KEYWORD_function:
2678 parseFunction (token);
2679 break;
2680 case KEYWORD_class:
2681 is_terminated = parseES6Class (token, NULL);
2682 break;
2683 default:
2684 is_terminated = parseStatement (token, is_inside_class);
2685 break;
2686 }
2687 }
2688 else
2689 {
2690 /*
2691 * Special case where single line statements may not be
2692 * SEMICOLON terminated. parseBlock needs to know this
2693 * so that it does not read the next token.
2694 */
2695 is_terminated = parseStatement (token, is_inside_class);
2696 }
2697
2698 TRACE_LEAVE();
2699
2700 return is_terminated;
2701 }
2702
parseJsFile(tokenInfo * const token)2703 static void parseJsFile (tokenInfo *const token)
2704 {
2705 TRACE_ENTER();
2706
2707 do
2708 {
2709 readToken (token);
2710
2711 if (isType (token, TOKEN_KEYWORD) && token->keyword == KEYWORD_sap)
2712 parseUI5 (token);
2713 else if (isType (token, TOKEN_KEYWORD) && (token->keyword == KEYWORD_export ||
2714 token->keyword == KEYWORD_default))
2715 /* skip those at top-level */;
2716 else
2717 parseLine (token, false);
2718 } while (! isType (token, TOKEN_EOF));
2719
2720 TRACE_LEAVE();
2721 }
2722
2723 #ifdef DO_TRACING
2724 #if DO_TRACING_USE_DUMP_TOKEN
dumpToken(const tokenInfo * const token)2725 static void dumpToken (const tokenInfo *const token)
2726 {
2727 fprintf(stderr, "Token <%p>: %s: %s\n",
2728 token,
2729 tokenTypeName (token->type),
2730 (token->type == TOKEN_KEYWORD ? keywordName (token->keyword):
2731 token->type == TOKEN_IDENTIFIER? vStringValue (token->string):
2732 ""));
2733 }
2734 #endif
2735
tokenTypeName(enum eTokenType e)2736 static const char *tokenTypeName(enum eTokenType e)
2737 { /* Generated by misc/enumstr.sh with cmdline "parsers/jscript.c" "eTokenType" "tokenTypeName" */
2738 switch (e)
2739 {
2740 case TOKEN_BINARY_OPERATOR: return "TOKEN_BINARY_OPERATOR";
2741 case TOKEN_CHARACTER: return "TOKEN_CHARACTER";
2742 case TOKEN_CLOSE_CURLY: return "TOKEN_CLOSE_CURLY";
2743 case TOKEN_CLOSE_PAREN: return "TOKEN_CLOSE_PAREN";
2744 case TOKEN_CLOSE_SQUARE: return "TOKEN_CLOSE_SQUARE";
2745 case TOKEN_COLON: return "TOKEN_COLON";
2746 case TOKEN_COMMA: return "TOKEN_COMMA";
2747 case TOKEN_EOF: return "TOKEN_EOF";
2748 case TOKEN_EQUAL_SIGN: return "TOKEN_EQUAL_SIGN";
2749 case TOKEN_IDENTIFIER: return "TOKEN_IDENTIFIER";
2750 case TOKEN_KEYWORD: return "TOKEN_KEYWORD";
2751 case TOKEN_OPEN_CURLY: return "TOKEN_OPEN_CURLY";
2752 case TOKEN_OPEN_PAREN: return "TOKEN_OPEN_PAREN";
2753 case TOKEN_OPEN_SQUARE: return "TOKEN_OPEN_SQUARE";
2754 case TOKEN_PERIOD: return "TOKEN_PERIOD";
2755 case TOKEN_POSTFIX_OPERATOR: return "TOKEN_POSTFIX_OPERATOR";
2756 case TOKEN_REGEXP: return "TOKEN_REGEXP";
2757 case TOKEN_SEMICOLON: return "TOKEN_SEMICOLON";
2758 case TOKEN_STAR: return "TOKEN_STAR";
2759 case TOKEN_STRING: return "TOKEN_STRING";
2760 case TOKEN_TEMPLATE_STRING: return "TOKEN_TEMPLATE_STRING";
2761 case TOKEN_UNDEFINED: return "TOKEN_UNDEFINED";
2762 default: return "UNKNOWN";
2763 }
2764 }
2765
2766 #if DO_TRACING_USE_DUMP_TOKEN
keywordName(enum eKeywordId e)2767 static const char *keywordName(enum eKeywordId e)
2768 { /* Generated by misc/enumstr.sh with cmdline "parsers/jscript.c" "eKeywordId" "keywordName" */
2769 switch (e)
2770 {
2771 case KEYWORD_async: return "KEYWORD_async";
2772 case KEYWORD_capital_function: return "KEYWORD_capital_function";
2773 case KEYWORD_capital_object: return "KEYWORD_capital_object";
2774 case KEYWORD_catch: return "KEYWORD_catch";
2775 case KEYWORD_class: return "KEYWORD_class";
2776 case KEYWORD_const: return "KEYWORD_const";
2777 case KEYWORD_default: return "KEYWORD_default";
2778 case KEYWORD_do: return "KEYWORD_do";
2779 case KEYWORD_else: return "KEYWORD_else";
2780 case KEYWORD_export: return "KEYWORD_export";
2781 case KEYWORD_extends: return "KEYWORD_extends";
2782 case KEYWORD_finally: return "KEYWORD_finally";
2783 case KEYWORD_for: return "KEYWORD_for";
2784 case KEYWORD_function: return "KEYWORD_function";
2785 case KEYWORD_get: return "KEYWORD_get";
2786 case KEYWORD_if: return "KEYWORD_if";
2787 case KEYWORD_let: return "KEYWORD_let";
2788 case KEYWORD_new: return "KEYWORD_new";
2789 case KEYWORD_prototype: return "KEYWORD_prototype";
2790 case KEYWORD_return: return "KEYWORD_return";
2791 case KEYWORD_sap: return "KEYWORD_sap";
2792 case KEYWORD_set: return "KEYWORD_set";
2793 case KEYWORD_static: return "KEYWORD_static";
2794 case KEYWORD_switch: return "KEYWORD_switch";
2795 case KEYWORD_this: return "KEYWORD_this";
2796 case KEYWORD_try: return "KEYWORD_try";
2797 case KEYWORD_var: return "KEYWORD_var";
2798 case KEYWORD_while: return "KEYWORD_while";
2799 default: return "UNKNOWN";
2800 }
2801 }
2802 #endif
2803 #endif
2804
initialize(const langType language)2805 static void initialize (const langType language)
2806 {
2807 Assert (ARRAY_SIZE (JsKinds) == JSTAG_COUNT);
2808 Lang_js = language;
2809
2810 TokenPool = objPoolNew (16, newPoolToken, deletePoolToken, clearPoolToken, NULL);
2811 }
2812
finalize(langType language CTAGS_ATTR_UNUSED,bool initialized)2813 static void finalize (langType language CTAGS_ATTR_UNUSED, bool initialized)
2814 {
2815 if (!initialized)
2816 return;
2817
2818 objPoolDelete (TokenPool);
2819 }
2820
findJsTags(void)2821 static void findJsTags (void)
2822 {
2823 tokenInfo *const token = newToken ();
2824
2825 NextToken = NULL;
2826 ClassNames = stringListNew ();
2827 FunctionNames = stringListNew ();
2828 LastTokenType = TOKEN_UNDEFINED;
2829
2830 parseJsFile (token);
2831
2832 stringListDelete (ClassNames);
2833 stringListDelete (FunctionNames);
2834 ClassNames = NULL;
2835 FunctionNames = NULL;
2836 deleteToken (token);
2837
2838 #ifdef HAVE_ICONV
2839 if (JSUnicodeConverter != (iconv_t) -2 && /* not created */
2840 JSUnicodeConverter != (iconv_t) -1 /* creation failed */)
2841 {
2842 iconv_close (JSUnicodeConverter);
2843 JSUnicodeConverter = (iconv_t) -2;
2844 }
2845 #endif
2846
2847 Assert (NextToken == NULL);
2848 }
2849
2850 /* Create parser definition structure */
JavaScriptParser(void)2851 extern parserDefinition* JavaScriptParser (void)
2852 {
2853 // .jsx files are JSX: https://facebook.github.io/jsx/
2854 // which have JS function definitions, so we just use the JS parser
2855 static const char *const extensions [] = { "js", "jsx", "mjs", NULL };
2856 static const char *const aliases [] = { "js", "node", "nodejs",
2857 "seed", "gjs",
2858 /* Used in PostgreSQL
2859 * https://github.com/plv8/plv8 */
2860 "v8",
2861 NULL };
2862 parserDefinition *const def = parserNew ("JavaScript");
2863 def->extensions = extensions;
2864 def->aliases = aliases;
2865 /*
2866 * New definitions for parsing instead of regex
2867 */
2868 def->kindTable = JsKinds;
2869 def->kindCount = ARRAY_SIZE (JsKinds);
2870 def->parser = findJsTags;
2871 def->initialize = initialize;
2872 def->finalize = finalize;
2873 def->keywordTable = JsKeywordTable;
2874 def->keywordCount = ARRAY_SIZE (JsKeywordTable);
2875
2876 return def;
2877 }
2878