xref: /Universal-ctags/parsers/julia.c (revision b9feb3309d67ccd5c1f32e1a8cd0ae9a88d4291e)
1 /*
2 *   Copyright (c) 2020-2021, getzze <getzze@gmail.com>
3 *
4 *   This source code is released for free distribution under the terms of the
5 *   GNU General Public License version 2 or (at your option) any later version.
6 *
7 *   This module contains functions for generating tags for Julia files.
8 *
9 *   Documented 'kinds':
10 *       https://docs.julialang.org/en/v1/manual/documentation/#Syntax-Guide
11 *   Language parser in Scheme:
12 *       https://github.com/JuliaLang/julia/blob/master/src/julia-parser.scm
13 */
14 
15 /*
16 *   INCLUDE FILES
17 */
18 #include "general.h"    /* must always come first */
19 
20 #include <string.h>
21 
22 #include "keyword.h"
23 #include "parse.h"
24 #include "entry.h"
25 #include "options.h"
26 #include "read.h"
27 #include "routines.h"
28 #include "vstring.h"
29 #include "xtag.h"
30 
31 /*
32 *   MACROS
33 */
34 #define MAX_STRING_LENGTH 256
35 
36 /*
37 *   DATA DEFINITIONS
38 */
39 typedef enum {
40     K_CONSTANT,
41     K_FUNCTION,
42     K_FIELD,
43     K_MACRO,
44     K_MODULE,
45     K_STRUCT,
46     K_TYPE,
47     K_UNKNOWN,
48     K_NONE
49 } JuliaKind;
50 
51 typedef enum {
52     JULIA_MODULE_IMPORTED,
53     JULIA_MODULE_USED,
54     JULIA_MODULE_NAMESPACE,
55 } juliaModuleRole;
56 
57 typedef enum {
58     JULIA_UNKNOWN_IMPORTED,
59     JULIA_UNKNOWN_USED,
60 } juliaUnknownRole;
61 
62 /*
63 *  using X               X = (kind:module, role:used)
64 *
65 *  using X: a, b         X = (kind:module, role:namespace)
66 *                     a, b = (kind:unknown, role:used, scope:module:X)
67 *
68 *  import X              X = (kind:module, role:imported)
69 *
70 *  import X.a, Y.b    X, Y = (kind:module, role:namespace)
71 *                     a, b = (kind:unknown, role:imported, scope:module:X)
72 *
73 *  import X: a, b     Same as the above one
74 */
75 static roleDefinition JuliaModuleRoles [] = {
76     { true, "imported", "loaded by \"import\"" },
77     { true, "used", "loaded by \"using\"" },
78     { true, "namespace", "only some symbols in it are imported" },
79 };
80 
81 static roleDefinition JuliaUnknownRoles [] = {
82     { true, "imported", "loaded by \"import\"" },
83     { true, "used", "loaded by \"using\""},
84 };
85 
86 static kindDefinition JuliaKinds [] = {
87     { true, 'c', "constant", "Constants"    },
88     { true, 'f', "function", "Functions"    },
89     { true, 'g', "field",    "Fields"       },
90     { true, 'm', "macro",    "Macros"       },
91     { true, 'n', "module",   "Modules",
92       ATTACH_ROLES(JuliaModuleRoles) },
93     { true, 's', "struct",   "Structures"   },
94     { true, 't', "type",     "Types"        },
95     { true, 'x', "unknown", "name defined in other modules",
96       .referenceOnly = true, ATTACH_ROLES(JuliaUnknownRoles) },
97 };
98 
99 typedef enum {
100     TOKEN_NONE=0,         /* none */
101     TOKEN_WHITESPACE,
102     TOKEN_PAREN_BLOCK,
103     TOKEN_BRACKET_BLOCK,
104     TOKEN_CURLY_BLOCK,
105     TOKEN_OPEN_BLOCK,
106     TOKEN_CLOSE_BLOCK,
107     TOKEN_TYPE_ANNOTATION,
108     TOKEN_TYPE_WHERE,
109     TOKEN_CONST,
110     TOKEN_STRING,         /*  = 10 */
111     TOKEN_COMMAND,
112     TOKEN_MACROCALL,
113     TOKEN_IDENTIFIER,
114     TOKEN_MODULE,
115     TOKEN_MACRO,
116     TOKEN_FUNCTION,
117     TOKEN_STRUCT,
118     TOKEN_ENUM,
119     TOKEN_TYPE,
120     TOKEN_IMPORT,         /*  = 20 */
121     TOKEN_USING,
122     TOKEN_EXPORT,
123     TOKEN_NEWLINE,
124     TOKEN_SEMICOLON,
125     TOKEN_COMPOSER_KWD,   /* KEYWORD only */
126     TOKEN_EOF,
127     TOKEN_COUNT
128 } tokenType;
129 
130 static const keywordTable JuliaKeywordTable [] = {
131     /* TODO: Sort by keys. */
132     { "mutable",   TOKEN_COMPOSER_KWD },
133     { "primitive", TOKEN_COMPOSER_KWD },
134     { "abstract",  TOKEN_COMPOSER_KWD },
135 
136     { "if",        TOKEN_OPEN_BLOCK   },
137     { "for",       TOKEN_OPEN_BLOCK   },
138     { "while",     TOKEN_OPEN_BLOCK   },
139     { "try",       TOKEN_OPEN_BLOCK   },
140     { "do",        TOKEN_OPEN_BLOCK   },
141     { "begin",     TOKEN_OPEN_BLOCK   },
142     { "let",       TOKEN_OPEN_BLOCK   },
143     { "quote",     TOKEN_OPEN_BLOCK   },
144 
145     { "module",    TOKEN_MODULE       },
146     { "baremodule",TOKEN_MODULE       },
147 
148     { "using",     TOKEN_USING        },
149     { "import",    TOKEN_IMPORT       },
150 
151     { "export",    TOKEN_EXPORT       },
152     { "const",     TOKEN_CONST        },
153     { "macro",     TOKEN_MACRO        },
154     { "function",  TOKEN_FUNCTION     },
155     { "struct",    TOKEN_STRUCT       },
156     { "type",      TOKEN_TYPE         },
157     { "where",     TOKEN_TYPE_WHERE   },
158     { "end",       TOKEN_CLOSE_BLOCK  },
159 };
160 
161 typedef struct {
162     /* Characters */
163     int prev_c;
164     int cur_c;
165     int next_c;
166 
167     /* Tokens */
168     bool first_token;
169     int cur_token;
170     vString* token_str;
171     unsigned long line;
172     MIOPos pos;
173 } lexerState;
174 
175 /*
176 *   FUNCTION PROTOTYPES
177 */
178 
179 static void parseExpr (lexerState *lexer, bool delim, int kind, vString *scope);
180 
181 static void scanParenBlock (lexerState *lexer);
182 
183 /*
184 *   FUNCTION DEFINITIONS
185 */
186 
endswith(const char * what,const char * withwhat)187 static int endswith(const char* what, const char* withwhat)
188 {
189     int l1 = strlen(what);
190     int l2 = strlen(withwhat);
191     if (l2 > l1)
192     {
193         return 0;
194     }
195 
196     return strcmp(withwhat, what + (l1 - l2)) == 0;
197 }
198 
199 /* Resets the scope string to the old length */
resetScope(vString * scope,size_t old_len)200 static void resetScope (vString *scope, size_t old_len)
201 {
202     vStringTruncate (scope, old_len);
203 }
204 
205 /* Adds a name to the end of the scope string */
addToScope(vString * scope,vString * name)206 static void addToScope (vString *scope, vString *name)
207 {
208     if (vStringLength(scope) > 0)
209     {
210         vStringPut(scope, '.');
211     }
212     vStringCat(scope, name);
213 }
214 
215 /* Reads a character from the file */
advanceChar(lexerState * lexer)216 static void advanceChar (lexerState *lexer)
217 {
218     lexer->prev_c = lexer->cur_c;
219     lexer->cur_c  = lexer->next_c;
220     lexer->next_c = getcFromInputFile();
221 }
222 
223 /* Reads N characters from the file */
advanceNChar(lexerState * lexer,int n)224 static void advanceNChar (lexerState *lexer, int n)
225 {
226     while (n--)
227     {
228         advanceChar(lexer);
229     }
230 }
231 
232 /* Store the current character in lexerState::token_str if there is space
233  * (set by MAX_STRING_LENGTH), and then read the next character from the file */
advanceAndStoreChar(lexerState * lexer)234 static void advanceAndStoreChar (lexerState *lexer)
235 {
236     if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH)
237     {
238         vStringPut(lexer->token_str, (char) lexer->cur_c);
239     }
240     advanceChar(lexer);
241 }
242 
isWhitespace(int c,bool newline)243 static bool isWhitespace (int c, bool newline)
244 {
245     if (newline)
246     {
247         return c == ' ' || c == '\t' || c == '\r' || c == '\n';
248     }
249     return c == ' ' || c == '\t';
250 }
251 
isAscii(int c)252 static bool isAscii (int c)
253 {
254     return (c >= 0) && (c < 0x80);
255 }
256 
isOperator(int c)257 static bool isOperator (int c)
258 {
259     if (c == '%' || c == '^' || c == '&' || c == '|' ||
260         c == '*' || c == '-' || c == '+' || c == '~' ||
261         c == '<' || c == '>' || c == ',' || c == '/' ||
262         c == '?' || c == '=' || c == ':' )
263     {
264         return true;
265     }
266     return false;
267 }
268 
269 /* This does not distinguish Unicode letters from operators... */
isIdentifierFirstCharacter(int c)270 static bool isIdentifierFirstCharacter (int c)
271 {
272     return (bool) ((isAscii(c) && (isalpha (c) || c == '_')) || c >= 0xC0);
273 }
274 
275 /* This does not distinguish Unicode letters from operators... */
isIdentifierCharacter(int c)276 static bool isIdentifierCharacter (int c)
277 {
278     return (bool) (isIdentifierFirstCharacter(c) || (isAscii(c) && (isdigit(c) || c == '!')) || c >= 0x80);
279 }
280 
skipWhitespace(lexerState * lexer,bool newline)281 static void skipWhitespace (lexerState *lexer, bool newline)
282 {
283     while (isWhitespace(lexer->cur_c, newline))
284     {
285         advanceChar(lexer);
286     }
287 }
288 
289 /* The transpose operator is only allowed after an identifier, a number, an expression inside parenthesis or an index */
isTranspose(int c)290 static bool isTranspose (int c)
291 {
292     return (isIdentifierCharacter(c) || c == ')' || c == ']');
293 }
294 
295 
296 /*
297  *  Lexer functions
298  * */
299 
300 /* Check that the current character sequence is a type declaration or inheritance */
isTypeDecl(lexerState * lexer)301 static bool isTypeDecl (lexerState *lexer)
302 {
303     if ((lexer->prev_c != '.' && lexer->cur_c == '<' && lexer->next_c == ':') ||
304         (lexer->prev_c != '.' && lexer->cur_c == '>' && lexer->next_c == ':') ||
305         (lexer->cur_c == ':' && lexer->next_c == ':') )
306     {
307         return true;
308     }
309     return false;
310 }
311 
312 /* Check if the current char is a new line */
isNewLine(lexerState * lexer)313 static bool isNewLine (lexerState *lexer)
314 {
315     return (lexer->cur_c == '\n')? true: false;
316 }
317 
318 /* Check if the current char is a new line.
319  * If it is, skip the newline and return true */
skipNewLine(lexerState * lexer)320 static bool skipNewLine (lexerState *lexer)
321 {
322     if (isNewLine(lexer))
323     {
324         advanceChar(lexer);
325         return true;
326     }
327     return false;
328 }
329 
330 /* Skip a single comment or multiline comment
331  * A single line comment starts with #
332  * A multi-line comment is encapsulated in #=...=# and they are nesting
333  * */
skipComment(lexerState * lexer)334 static void skipComment (lexerState *lexer)
335 {
336     /* # */
337     if (lexer->next_c != '=')
338     {
339         advanceNChar(lexer, 1);
340         while (lexer->cur_c != EOF && lexer->cur_c != '\n')
341         {
342             advanceChar(lexer);
343         }
344     }
345     /* block comment */
346     else /* if (lexer->next_c == '=') */
347     {
348         int level = 1;
349         advanceNChar(lexer, 2);
350         while (lexer->cur_c != EOF && level > 0)
351         {
352             if (lexer->cur_c == '=' && lexer->next_c == '#')
353             {
354                 level--;
355                 advanceNChar(lexer, 2);
356             }
357             else if (lexer->cur_c == '#' && lexer->next_c == '=')
358             {
359                 level++;
360                 advanceNChar(lexer, 2);
361             }
362             else
363             {
364                 advanceChar(lexer);
365             }
366         }
367     }
368 }
369 
scanIdentifier(lexerState * lexer,bool clear)370 static void scanIdentifier (lexerState *lexer, bool clear)
371 {
372     if (clear)
373     {
374         vStringClear(lexer->token_str);
375     }
376 
377     do
378     {
379         advanceAndStoreChar(lexer);
380     } while(lexer->cur_c != EOF && isIdentifierCharacter(lexer->cur_c));
381 }
382 
383 /* Scan a quote-like expression.
384  * Allow for triple-character variand and interpolation with `$`.
385  * These last past the end of the line, so be careful
386  * not to store too much of them (see MAX_STRING_LENGTH). */
scanStringOrCommand(lexerState * lexer,int c)387 static void scanStringOrCommand (lexerState *lexer, int c)
388 {
389     bool istriple = false;
390 
391     /* Pass the first "quote"-character */
392     advanceAndStoreChar(lexer);
393 
394     /* Check for triple "quote"-character */
395     if (lexer->cur_c == c && lexer->next_c == c)
396     {
397         istriple = true;
398         advanceAndStoreChar(lexer);
399         advanceAndStoreChar(lexer);
400 
401         /* Cancel up to 2 "quote"-characters after opening the triple */
402         if (lexer->cur_c == c)
403         {
404             advanceAndStoreChar(lexer);
405             if (lexer->cur_c == c)
406             {
407                 advanceAndStoreChar(lexer);
408             }
409         }
410     }
411 
412     while (lexer->cur_c != EOF && lexer->cur_c != c)
413     {
414         /* Check for interpolation before checking for end of "quote" */
415         if (lexer->cur_c == '$' && lexer->next_c == '(')
416         {
417             advanceAndStoreChar(lexer);
418             scanParenBlock(lexer);
419             /* continue to avoid advance character again. Correct bug
420              * with "quote"-character just after closing parenthesis */
421             continue;
422         }
423 
424         if (lexer->cur_c == '\\' &&
425             (lexer->next_c == c || lexer->next_c == '\\'))
426         {
427             advanceAndStoreChar(lexer);
428         }
429         advanceAndStoreChar(lexer);
430 
431         /* Cancel up to 2 "quote"-characters if triple string */
432         if (istriple && lexer->cur_c == c)
433         {
434             advanceAndStoreChar(lexer);
435             if (lexer->cur_c == c)
436             {
437                 advanceAndStoreChar(lexer);
438             }
439         }
440     }
441     /* Pass the last "quote"-character */
442     advanceAndStoreChar(lexer);
443 }
444 
445 
446 /* Scan commands surrounded by backticks,
447  * possibly triple backticks */
scanCommand(lexerState * lexer)448 static void scanCommand (lexerState *lexer)
449 {
450     scanStringOrCommand(lexer, '`');
451 }
452 
453 /* Double-quoted strings,
454  * possibly triple doublequotes */
scanString(lexerState * lexer)455 static void scanString (lexerState *lexer)
456 {
457     scanStringOrCommand(lexer, '"');
458 }
459 
460 
461 /* This deals with character literals: 'n', '\n', '\uFFFF';
462  * and matrix transpose: A'.
463  * We'll use this approximate regexp for the literals:
464  * \' [^'] \' or \' \\ [^']+ \' or \' \\ \' \'
465  * Either way, we'll treat this token as a string, so it gets preserved */
scanCharacterOrTranspose(lexerState * lexer)466 static bool scanCharacterOrTranspose (lexerState *lexer)
467 {
468     if (isTranspose(lexer->prev_c))
469     {
470         /* deal with untranspose/transpose sequence */
471         while (lexer->cur_c != EOF && lexer->cur_c == '\'')
472         {
473             advanceAndStoreChar(lexer);
474         }
475         return false;
476     }
477 
478     //vStringClear(lexer->token_str);
479     advanceAndStoreChar(lexer);
480 
481     if (lexer->cur_c == '\\')
482     {
483         advanceAndStoreChar(lexer);
484         /* The \' \\ \' \' (literally '\'') case */
485         if (lexer->cur_c == '\'' && lexer->next_c == '\'')
486         {
487             advanceAndStoreChar(lexer);
488             advanceAndStoreChar(lexer);
489         }
490         /* The \' \\ [^']+ \' case */
491         else
492         {
493             while (lexer->cur_c != EOF && lexer->cur_c != '\'')
494             {
495                 advanceAndStoreChar(lexer);
496             }
497         }
498     }
499     /* The \' [^'] \' and  \' \' \' cases */
500     else if (lexer->next_c == '\'')
501     {
502         advanceAndStoreChar(lexer);
503         advanceAndStoreChar(lexer);
504     }
505     /* Otherwise it is malformed */
506     return true;
507 }
508 
509 /* Parse a block with opening and closing character */
scanBlock(lexerState * lexer,int open,int close,bool convert_newline)510 static void scanBlock (lexerState *lexer, int open, int close, bool convert_newline)
511 {
512     /* Assume the current char is `open` */
513     int level = 1;
514 
515     /* Pass the first opening */
516     advanceAndStoreChar(lexer);
517 
518     while (lexer->cur_c != EOF && level > 0)
519     {
520         /* Parse everything */
521         if (lexer->cur_c == ' ' || lexer->cur_c == '\t')
522         {
523             skipWhitespace(lexer, false);
524             vStringPut(lexer->token_str, ' ');
525         }
526         if (lexer->cur_c == '#')
527         {
528             skipComment(lexer);
529         }
530         else if (lexer->cur_c == '\"')
531         {
532             scanString(lexer);
533         }
534         else if (lexer->cur_c == '\'')
535         {
536             scanCharacterOrTranspose(lexer);
537         }
538 
539         /* Parse opening/closing */
540         if (lexer->cur_c == open)
541         {
542             level++;
543         }
544         else if (lexer->cur_c == close)
545         {
546             level--;
547         }
548 
549         if (convert_newline && skipNewLine(lexer))
550         {
551             vStringPut(lexer->token_str, ' ');
552         }
553         else
554         {
555             advanceAndStoreChar(lexer);
556         }
557 
558     }
559     /* Lexer position is just after `close` */
560 }
561 
562 
563 /* Parse a block inside parenthesis, for example a function argument list */
scanParenBlock(lexerState * lexer)564 static void scanParenBlock (lexerState *lexer)
565 {
566     scanBlock(lexer, '(', ')', true);
567 }
568 
569 /* Indexing block with bracket.
570  * Some keywords have a special meaning in this environment:
571  * end, begin, for and if */
scanIndexBlock(lexerState * lexer)572 static void scanIndexBlock (lexerState *lexer)
573 {
574     scanBlock(lexer, '[', ']', false);
575 
576 }
577 
578 /* Parse a block inside curly brackets, for type parametrization */
scanCurlyBlock(lexerState * lexer)579 static void scanCurlyBlock (lexerState *lexer)
580 {
581     scanBlock(lexer, '{', '}', true);
582 }
583 
584 /* Scan type annotation like
585  * `::Type`, `::Type{T}`
586  */
scanTypeAnnotation(lexerState * lexer)587 static void scanTypeAnnotation (lexerState *lexer)
588 {
589     /* assume that current char is '<', '>' or ':', followed by ':' */
590     advanceAndStoreChar(lexer);
591     advanceAndStoreChar(lexer);
592 
593     skipWhitespace(lexer, true);
594     scanIdentifier(lexer, false);
595     if (lexer->cur_c == '{')
596     {
597         scanCurlyBlock(lexer);
598     }
599 }
600 
601 /* Scan type annotation like
602  * `where Int<:T<:Real`, `where S<:Array{Real}` or `where {S, T}`
603  */
scanTypeWhere(lexerState * lexer)604 static void scanTypeWhere (lexerState *lexer)
605 {
606     /* assume that current token is 'where'
607      * allow line continuation */
608     vStringPut(lexer->token_str, ' ');
609     skipWhitespace(lexer, true);
610 
611     while (lexer->cur_c != EOF)
612     {
613 
614         if (lexer->cur_c == '{')
615         {
616             scanCurlyBlock(lexer);
617         }
618         else if (isIdentifierFirstCharacter(lexer->cur_c))
619         {
620             scanIdentifier(lexer, false);
621             if (endswith(vStringValue(lexer->token_str), "where"))
622             {
623                 /* allow line continuation */
624                 vStringPut(lexer->token_str, ' ');
625                 skipWhitespace(lexer, true);
626             }
627         }
628         else if (isTypeDecl(lexer))
629         {
630             scanTypeAnnotation(lexer);
631             //skipWhitespace(lexer, false);
632         }
633         else if (lexer->cur_c == '#')
634         {
635             skipComment(lexer);
636             /* allow line continuation */
637             if (endswith(vStringValue(lexer->token_str), "where "))
638             {
639                 skipWhitespace(lexer, true);
640             }
641         }
642         else if (isWhitespace(lexer->cur_c, false))
643         {
644             while (isWhitespace(lexer->cur_c, false))
645             {
646                 advanceChar(lexer);
647             }
648             /* Add a space, if it is not a trailing space */
649             if (!(isNewLine(lexer)))
650             {
651                 vStringPut(lexer->token_str, ' ');
652             }
653         }
654         else
655         {
656             break;
657         }
658     }
659 }
660 
661 
parseIdentifier(lexerState * lexer)662 static int parseIdentifier (lexerState *lexer)
663 {
664     langType julia = getInputLanguage ();
665     scanIdentifier(lexer, true);
666 
667     int k = lookupKeyword (vStringValue(lexer->token_str), julia);
668     /* First part of a composed identifier */
669     if (k == TOKEN_COMPOSER_KWD)
670     {
671         skipWhitespace(lexer, false);
672         scanIdentifier(lexer, true);
673         k = lookupKeyword (vStringValue(lexer->token_str), julia);
674     }
675 
676     if ((k == TOKEN_OPEN_BLOCK)
677         || (k == TOKEN_MODULE)
678         || (k == TOKEN_IMPORT)
679         || (k == TOKEN_USING)
680         || (k == TOKEN_EXPORT)
681         || (k == TOKEN_CONST)
682         || (k == TOKEN_MACRO)
683         || (k == TOKEN_FUNCTION)
684         || (k == TOKEN_STRUCT)
685         || (k == TOKEN_TYPE)
686         || (k == TOKEN_TYPE_WHERE)
687         || (k == TOKEN_CLOSE_BLOCK))
688     {
689         if (k == TOKEN_TYPE_WHERE)
690         {
691             scanTypeWhere(lexer);
692         }
693         return lexer->cur_token = k;
694     }
695     return lexer->cur_token = TOKEN_IDENTIFIER;
696 }
697 
698 
699 /* Advances the parser one token, optionally skipping whitespace
700  * (otherwise it is concatenated and returned as a single whitespace token).
701  * Whitespace is needed to properly render function signatures. Unrecognized
702  * token starts are stored literally, e.g. token may equal to a character '#'. */
advanceToken(lexerState * lexer,bool skip_whitespace,bool propagate_first)703 static int advanceToken (lexerState *lexer, bool skip_whitespace, bool propagate_first)
704 {
705     bool have_whitespace = false;
706     bool newline = false;
707     lexer->line = getInputLineNumber();
708     lexer->pos = getInputFilePosition();
709 
710     /* the next token is the first token of the line */
711     if (!propagate_first)
712     {
713         if (lexer->cur_token == TOKEN_NEWLINE ||
714             lexer->cur_token == TOKEN_SEMICOLON ||
715             lexer->cur_token == TOKEN_NONE ||
716             (lexer->first_token && lexer->cur_token == TOKEN_MACROCALL))
717         {
718             lexer->first_token = true;
719         }
720         else
721         {
722             lexer->first_token = false;
723         }
724     }
725 
726     while (lexer->cur_c != EOF)
727     {
728         /* skip whitespaces but not newlines */
729         if (isWhitespace(lexer->cur_c, newline))
730         {
731             skipWhitespace(lexer, newline);
732             have_whitespace = true;
733         }
734         else if (lexer->cur_c == '#')
735         {
736             skipComment(lexer);
737             have_whitespace = true;
738         }
739         else
740         {
741             if (have_whitespace && !skip_whitespace)
742             {
743                 return lexer->cur_token = TOKEN_WHITESPACE;
744             }
745             break;
746         }
747     }
748     lexer->line = getInputLineNumber();
749     lexer->pos = getInputFilePosition();
750     while (lexer->cur_c != EOF)
751     {
752         if (lexer->cur_c == '"')
753         {
754             vStringClear(lexer->token_str);
755             scanString(lexer);
756             return lexer->cur_token = TOKEN_STRING;
757         }
758         else if (lexer->cur_c == '\'')
759         {
760             vStringClear(lexer->token_str);
761             if (scanCharacterOrTranspose(lexer))
762             {
763                 return lexer->cur_token = TOKEN_STRING;
764             }
765             else
766             {
767                 return lexer->cur_token = '\'';
768             }
769         }
770         else if (lexer->cur_c == '`')
771         {
772             vStringClear(lexer->token_str);
773             scanCommand(lexer);
774             return lexer->cur_token = TOKEN_COMMAND;
775         }
776         else if (isIdentifierFirstCharacter(lexer->cur_c))
777         {
778             return parseIdentifier(lexer);
779         }
780         else if (lexer->cur_c == '@')
781         {
782             vStringClear(lexer->token_str);
783             advanceAndStoreChar(lexer);
784             do
785             {
786                 advanceAndStoreChar(lexer);
787             } while(lexer->cur_c != EOF && isIdentifierCharacter(lexer->cur_c));
788             return lexer->cur_token = TOKEN_MACROCALL;
789         }
790         else if (lexer->cur_c == '(')
791         {
792             vStringClear(lexer->token_str);
793             scanParenBlock(lexer);
794             return lexer->cur_token = TOKEN_PAREN_BLOCK;
795         }
796         else if (lexer->cur_c == '[')
797         {
798             vStringClear(lexer->token_str);
799             scanIndexBlock(lexer);
800             return lexer->cur_token = TOKEN_BRACKET_BLOCK;
801         }
802         else if (lexer->cur_c == '{')
803         {
804             vStringClear(lexer->token_str);
805             scanCurlyBlock(lexer);
806             return lexer->cur_token = TOKEN_CURLY_BLOCK;
807         }
808         else if (isTypeDecl(lexer))
809         {
810             vStringClear(lexer->token_str);
811             scanTypeAnnotation(lexer);
812             return lexer->cur_token = TOKEN_TYPE_ANNOTATION;
813         }
814         else if (skipNewLine(lexer))
815         {
816             /* allow line continuation */
817             if (isOperator(lexer->cur_token))
818             {
819                 return lexer->cur_token;
820             }
821             return lexer->cur_token = TOKEN_NEWLINE;
822         }
823         else if (lexer->cur_c == ';')
824         {
825             advanceChar(lexer);
826             return lexer->cur_token = TOKEN_SEMICOLON;
827         }
828         else
829         {
830             int c = lexer->cur_c;
831             advanceChar(lexer);
832             return lexer->cur_token = c;
833         }
834     }
835     return lexer->cur_token = TOKEN_EOF;
836 }
837 
initLexer(lexerState * lexer)838 static void initLexer (lexerState *lexer)
839 {
840     advanceNChar(lexer, 2);
841     lexer->token_str = vStringNew();
842     lexer->first_token = true;
843     lexer->cur_token = TOKEN_NONE;
844     lexer->prev_c = '\0';
845 
846     if (lexer->cur_c == '#' && lexer->next_c == '!')
847     {
848         skipComment(lexer);
849     }
850     advanceToken(lexer, true, false);
851 }
852 
deInitLexer(lexerState * lexer)853 static void deInitLexer (lexerState *lexer)
854 {
855     vStringDelete(lexer->token_str);
856     lexer->token_str = NULL;
857 }
858 
859 #if 0
860 static void debugLexer (lexerState *lexer)
861 {
862     printf("Current lexer state: line %d, token (%lu), cur char `%c`, token str:\n\t`", lexer->line, lexer->cur_token, lexer->cur_c);
863     printf(vStringValue(lexer->token_str));
864     printf("`\n");
865 }
866 #endif
867 
addTag(vString * ident,const char * type,const char * arg_list,int kind,unsigned long line,MIOPos pos,vString * scope,int parent_kind)868 static void addTag (vString* ident, const char* type, const char* arg_list, int kind, unsigned long line, MIOPos pos, vString *scope, int parent_kind)
869 {
870     if (kind == K_NONE)
871     {
872         return;
873     }
874     tagEntryInfo tag;
875     initTagEntry(&tag, vStringValue(ident), kind);
876 
877     tag.lineNumber = line;
878     tag.filePosition = pos;
879     tag.sourceFileName = getInputFileName();
880 
881     tag.extensionFields.signature = arg_list;
882     /* tag.extensionFields.varType = type; */  /* Needs a workaround */
883     if (parent_kind != K_NONE)
884     {
885         tag.extensionFields.scopeKindIndex = parent_kind;
886         tag.extensionFields.scopeName = vStringValue(scope);
887     }
888     makeTagEntry(&tag);
889 }
890 
addReferenceTag(vString * ident,int kind,int role,unsigned long line,MIOPos pos,vString * scope,int parent_kind)891 static void addReferenceTag (vString* ident, int kind, int role, unsigned long line, MIOPos pos, vString* scope, int parent_kind)
892 {
893     if (kind == K_NONE)
894     {
895         return;
896     }
897     tagEntryInfo tag;
898     initRefTagEntry(&tag, vStringValue(ident), kind, role);
899     tag.lineNumber = line;
900     tag.filePosition = pos;
901     if (parent_kind != K_NONE)
902     {
903         tag.extensionFields.scopeKindIndex = parent_kind;
904         tag.extensionFields.scopeName = vStringValue(scope);
905     }
906     makeTagEntry(&tag);
907 }
908 
909 /* Skip tokens until one of the goal tokens is hit. Escapes when level = 0 if there are no goal tokens.
910  * Keeps track of balanced ()'s, []'s, and {}'s and ignores the goal tokens within those pairings */
skipUntil(lexerState * lexer,int goal_tokens[],int num_goal_tokens)911 static void skipUntil (lexerState *lexer, int goal_tokens[], int num_goal_tokens)
912 {
913     int block_level = 0;
914 
915     while (lexer->cur_token != TOKEN_EOF)
916     {
917         /* check if the keyword is reached, only if outside a block */
918         if (block_level == 0)
919         {
920             int ii = 0;
921             for(ii = 0; ii < num_goal_tokens; ii++)
922             {
923                 if (lexer->cur_token == goal_tokens[ii])
924                 {
925                     break;
926                 }
927             }
928             if (ii < num_goal_tokens)
929             {
930                 /* parse the next token */
931                 advanceToken(lexer, true, false);
932                 break;
933             }
934         }
935 
936         /* take into account nested blocks */
937         switch (lexer->cur_token)
938         {
939             case TOKEN_OPEN_BLOCK:
940                 block_level++;
941                 break;
942             case TOKEN_CLOSE_BLOCK:
943                 block_level--;
944                 break;
945             default:
946                 break;
947         }
948 
949         /* Has to be after the token switch to catch the case when we start with the initial level token */
950         if (num_goal_tokens == 0 && block_level == 0)
951         {
952             break;
953         }
954 
955         advanceToken(lexer, true, false);
956     }
957 }
958 
959 /* Skip until the end of the block */
skipUntilEnd(lexerState * lexer)960 static void skipUntilEnd (lexerState *lexer)
961 {
962     int goal_tokens[] = { TOKEN_CLOSE_BLOCK };
963 
964     skipUntil(lexer, goal_tokens, 1);
965 }
966 
967 /* Skip a function body after assignment operator '='
968  * Beware of continuation lines after operators
969  *  */
skipBody(lexerState * lexer)970 static void skipBody (lexerState *lexer)
971 {
972     /* assume position just after '=' */
973     while (lexer->cur_token != TOKEN_EOF && lexer->cur_token != TOKEN_NEWLINE)
974     {
975         advanceToken(lexer, true, false);
976 
977         if (lexer->cur_token == TOKEN_OPEN_BLOCK)
978         {
979             /* pass the keyword */
980             advanceToken(lexer, true, false);
981             skipUntilEnd(lexer);
982             /* the next token is already selected */
983         }
984     }
985 }
986 
987 /* Short function format:
988  * <ident> ( [<args>] ) [::<type>] [<where>] = [begin] <body> [end]
989  * */
parseShortFunction(lexerState * lexer,vString * scope,int parent_kind)990 static void parseShortFunction (lexerState *lexer, vString *scope, int parent_kind)
991 {
992     /* assume the current char is just after identifier */
993     vString *name;
994     vString *arg_list;
995     unsigned long line;
996     MIOPos pos;
997 
998     /* should be an open parenthesis after identifier
999      * with potentially parametric type */
1000     skipWhitespace(lexer, false);
1001     if (lexer->cur_c == '{')
1002     {
1003         scanCurlyBlock(lexer);
1004         skipWhitespace(lexer, false);
1005     }
1006 
1007     if (lexer->cur_c != '(')
1008     {
1009         advanceToken(lexer, true, false);
1010         return;
1011     }
1012 
1013     name = vStringNewCopy(lexer->token_str);
1014     line = lexer->line;
1015     pos = lexer->pos;
1016 
1017     /* scan argument list */
1018     advanceToken(lexer, true, false);
1019     arg_list = vStringNewCopy(lexer->token_str);
1020 
1021     /* scan potential type casting */
1022     advanceToken(lexer, true, false);
1023     if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
1024     {
1025         vStringCat(arg_list, lexer->token_str);
1026         advanceToken(lexer, true, false);
1027     }
1028     /* scan potential type union with 'where' */
1029     if (lexer->cur_token == TOKEN_TYPE_WHERE)
1030     {
1031         vStringPut(arg_list, ' ');
1032         vStringCat(arg_list, lexer->token_str);
1033         advanceToken(lexer, true, false);
1034     }
1035 
1036     /* scan equal sign, ignore `==` and `=>` */
1037     if (!(lexer->cur_token == '=' &&
1038           lexer->cur_c != '=' &&
1039           lexer->cur_c != '>'))
1040     {
1041         vStringDelete(name);
1042         vStringDelete(arg_list);
1043         return;
1044     }
1045 
1046     addTag(name, NULL, vStringValue(arg_list), K_FUNCTION, line, pos, scope, parent_kind);
1047 
1048     /* scan until end of function definition */
1049     skipBody(lexer);
1050 
1051     /* Should end on a new line, parse next token */
1052     advanceToken(lexer, true, false);
1053     lexer->first_token = true;
1054 
1055     vStringDelete(name);
1056     vStringDelete(arg_list);
1057 }
1058 
1059 /* Function format:
1060  * function <ident> ( [<args>] ) [::<type>] [<where>] [<body>] end
1061  * */
parseFunction(lexerState * lexer,vString * scope,int parent_kind)1062 static void parseFunction (lexerState *lexer, vString *scope, int parent_kind)
1063 {
1064     vString *name;
1065     vString *arg_list;
1066     vString *local_scope;
1067     int local_parent_kind;
1068     unsigned long line;
1069     MIOPos pos;
1070 
1071     advanceToken(lexer, true, false);
1072     if (lexer->cur_token != TOKEN_IDENTIFIER)
1073     {
1074         return;
1075     }
1076     else if (lexer->cur_c == '.')
1077     {
1078         local_scope = vStringNewCopy(lexer->token_str);
1079         local_parent_kind = K_MODULE;
1080         advanceChar(lexer);
1081         advanceToken(lexer, true, false);
1082     }
1083     else
1084     {
1085         local_scope = vStringNewCopy(scope);
1086         local_parent_kind = parent_kind;
1087     }
1088 
1089     /* Scan for parametric type constructor */
1090     skipWhitespace(lexer, false);
1091     if (lexer->cur_c == '{')
1092     {
1093         scanCurlyBlock(lexer);
1094         skipWhitespace(lexer, false);
1095     }
1096 
1097     name = vStringNewCopy(lexer->token_str);
1098     arg_list = vStringNew();
1099     line = lexer->line;
1100     pos = lexer->pos;
1101 
1102     advanceToken(lexer, true, false);
1103     if (lexer->cur_token == TOKEN_PAREN_BLOCK)
1104     {
1105         vStringCopy(arg_list, lexer->token_str);
1106 
1107         /* scan potential type casting */
1108         advanceToken(lexer, true, false);
1109         if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
1110         {
1111             vStringCat(arg_list, lexer->token_str);
1112             advanceToken(lexer, true, false);
1113         }
1114         /* scan potential type union with 'where' */
1115         if (lexer->cur_token == TOKEN_TYPE_WHERE)
1116         {
1117             vStringPut(arg_list, ' ');
1118             vStringCat(arg_list, lexer->token_str);
1119             advanceToken(lexer, true, false);
1120         }
1121 
1122         addTag(name, NULL, vStringValue(arg_list), K_FUNCTION, line, pos, local_scope, local_parent_kind);
1123         addToScope(scope, name);
1124         parseExpr(lexer, true, K_FUNCTION, scope);
1125     }
1126     else if (lexer->cur_token == TOKEN_CLOSE_BLOCK)
1127     {
1128         /* Function without method */
1129         addTag(name, NULL, NULL, K_FUNCTION, line, pos, local_scope, local_parent_kind);
1130         /* Go to the closing 'end' keyword */
1131         skipUntilEnd(lexer);
1132     }
1133 
1134     vStringDelete(name);
1135     vStringDelete(arg_list);
1136     vStringDelete(local_scope);
1137 }
1138 
1139 /* Macro format:
1140  * "macro" <ident>()
1141  */
parseMacro(lexerState * lexer,vString * scope,int parent_kind)1142 static void parseMacro (lexerState *lexer, vString *scope, int parent_kind)
1143 {
1144     vString *name;
1145     unsigned long line;
1146     MIOPos pos;
1147 
1148     advanceToken(lexer, true, false);
1149     if (lexer->cur_token != TOKEN_IDENTIFIER)
1150     {
1151         return;
1152     }
1153 
1154     name = vStringNewCopy(lexer->token_str);
1155     line = lexer->line;
1156     pos = lexer->pos;
1157 
1158     advanceToken(lexer, true, false);
1159     if (lexer->cur_token == TOKEN_PAREN_BLOCK)
1160     {
1161         addTag(name, NULL, vStringValue(lexer->token_str), K_MACRO, line, pos, scope, parent_kind);
1162     }
1163 
1164     skipUntilEnd(lexer);
1165     vStringDelete(name);
1166 }
1167 
1168 /* Const format:
1169  * "const" <ident>
1170  */
parseConst(lexerState * lexer,vString * scope,int parent_kind)1171 static void parseConst (lexerState *lexer, vString *scope, int parent_kind)
1172 {
1173     vString *name;
1174 
1175     advanceToken(lexer, true, false);
1176     if (lexer->cur_token != TOKEN_IDENTIFIER)
1177     {
1178         return;
1179     }
1180 
1181     name = vStringNewCopy(lexer->token_str);
1182 
1183     advanceToken(lexer, true, false);
1184     if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
1185     {
1186         addTag(name, "const", vStringValue(lexer->token_str), K_CONSTANT, lexer->line, lexer->pos, scope, parent_kind);
1187         advanceToken(lexer, true, false);
1188     }
1189     else
1190     {
1191         addTag(name, "const", NULL, K_CONSTANT, lexer->line, lexer->pos, scope, parent_kind);
1192     }
1193 
1194     vStringDelete(name);
1195 }
1196 
1197 /* Type format:
1198  * [ "abstract" | "primitive" ] "type" <ident>
1199  */
parseType(lexerState * lexer,vString * scope,int parent_kind)1200 static void parseType (lexerState *lexer, vString *scope, int parent_kind)
1201 {
1202     advanceToken(lexer, true, false);
1203     if (lexer->cur_token != TOKEN_IDENTIFIER)
1204     {
1205         return;
1206     }
1207 
1208     addTag(lexer->token_str, NULL, NULL, K_TYPE, lexer->line, lexer->pos, scope, parent_kind);
1209 
1210     skipUntilEnd(lexer);
1211 }
1212 
1213 /* Module format:
1214  * [ "baremodule" | "module" ] <ident>
1215  */
parseModule(lexerState * lexer,vString * scope,int parent_kind)1216 static void parseModule (lexerState *lexer, vString *scope, int parent_kind)
1217 {
1218     advanceToken(lexer, true, false);
1219     if (lexer->cur_token != TOKEN_IDENTIFIER)
1220     {
1221         return;
1222     }
1223 
1224     addTag(lexer->token_str, NULL, NULL, K_MODULE, lexer->line, lexer->pos, scope, parent_kind);
1225     addToScope(scope, lexer->token_str);
1226     advanceToken(lexer, true, false);
1227     parseExpr(lexer, true, K_MODULE, scope);
1228 }
1229 
1230 /*
1231  * Parse comma separated entity in import/using expressions. An entity could be
1232  * in the form of "Module" or "Module.symbol". The lexer should be at the end
1233  * of "Module", and this function will take it to the end of the entity
1234  * (whitespaces also skipped).
1235  */
parseImportEntity(lexerState * lexer,vString * scope,int token_type,int parent_kind)1236 static void parseImportEntity (lexerState *lexer, vString *scope, int token_type, int parent_kind)
1237 {
1238     if (lexer->cur_c == '.')
1239     {
1240         if (token_type == TOKEN_IMPORT)
1241         {
1242             vString *module_name = vStringNewCopy(lexer->token_str);
1243             addReferenceTag(module_name, K_MODULE, JULIA_MODULE_NAMESPACE, lexer->line, lexer->pos, scope, parent_kind);
1244             advanceChar(lexer);
1245             advanceToken(lexer, true, false);
1246             addReferenceTag(lexer->token_str, K_UNKNOWN, JULIA_UNKNOWN_IMPORTED, lexer->line, lexer->pos, module_name, K_MODULE);
1247             vStringDelete(module_name);
1248         }
1249         else /* if (token_type == TOKEN_USING) */
1250         {
1251             /* using Module.symbol is invalid, so we advance the lexer but don't tag it. */
1252             advanceChar(lexer);
1253             advanceToken(lexer, true, false);
1254         }
1255     }
1256     else
1257     {
1258         if (token_type == TOKEN_IMPORT)
1259         {
1260             addReferenceTag(lexer->token_str, K_MODULE, JULIA_MODULE_IMPORTED, lexer->line, lexer->pos, scope, parent_kind);
1261         }
1262         else /* if (token_type == TOKEN_USING) */
1263         {
1264             addReferenceTag(lexer->token_str, K_MODULE, JULIA_MODULE_USED, lexer->line, lexer->pos, scope, parent_kind);
1265         }
1266     }
1267 }
1268 
1269 /* Parse import/using expressions with a colon, like: */
1270 /* import Module: symbol1, symbol2 */
1271 /* using Module: symbol1, symbol2 */
1272 /* The lexer should be at the end of "Module", and this function will take it
1273  * to the end of the token after this expression (whitespaces also skipped). */
parseColonImportExpr(lexerState * lexer,vString * scope,int token_type,int parent_kind)1274 static void parseColonImportExpr (lexerState *lexer, vString *scope, int token_type, int parent_kind)
1275 {
1276     int symbol_role;
1277     if (token_type == TOKEN_IMPORT)
1278     {
1279         symbol_role = JULIA_UNKNOWN_IMPORTED;
1280     }
1281     else /* if (token_type == TOKEN_USING) */
1282     {
1283         symbol_role = JULIA_UNKNOWN_USED;
1284     }
1285     vString *name = vStringNewCopy(lexer->token_str);
1286     addReferenceTag(name, K_MODULE, JULIA_MODULE_NAMESPACE, lexer->line, lexer->pos, scope, parent_kind);
1287     advanceChar(lexer);
1288     advanceToken(lexer, true, false);
1289     if (lexer->cur_token == TOKEN_NEWLINE)
1290     {
1291         advanceToken(lexer, true, false);
1292     }
1293     while (lexer->cur_token == TOKEN_IDENTIFIER || lexer->cur_token == TOKEN_MACROCALL)
1294     {
1295         addReferenceTag(lexer->token_str, K_UNKNOWN, symbol_role, lexer->line, lexer->pos, name, K_MODULE);
1296         if (lexer->cur_c == ',')
1297         {
1298             advanceChar(lexer);
1299             advanceToken(lexer, true, false);
1300             if (lexer->cur_token == TOKEN_NEWLINE)
1301             {
1302                 advanceToken(lexer, true, false);
1303             }
1304         }
1305         else
1306         {
1307             advanceToken(lexer, true, false);
1308         }
1309     }
1310     vStringDelete(name);
1311 }
1312 
1313 /* Import format:
1314  * [ "import" | "using" ] <ident> [: <name>]
1315  */
parseImport(lexerState * lexer,vString * scope,int token_type,int parent_kind)1316 static void parseImport (lexerState *lexer, vString *scope, int token_type, int parent_kind)
1317 {
1318     /* capture the imported name */
1319     advanceToken(lexer, true, false);
1320     /* import Mod1: symbol1, symbol2 */
1321     /* using Mod1: symbol1, symbol2 */
1322     if (lexer->cur_c == ':')
1323     {
1324         parseColonImportExpr(lexer, scope, token_type, parent_kind);
1325     }
1326     /* All other situations, like import/using Mod1, Mod2.symbol1, Mod3... */
1327     else
1328     {
1329         while (lexer->cur_token == TOKEN_IDENTIFIER || lexer->cur_token == TOKEN_MACROCALL)
1330         {
1331             parseImportEntity(lexer, scope, token_type, parent_kind);
1332             if (lexer->cur_c == ',')
1333             {
1334                 advanceChar(lexer);
1335                 advanceToken(lexer, true, false);
1336                 if (lexer->cur_token == TOKEN_NEWLINE)
1337                 {
1338                     advanceToken(lexer, true, false);
1339                 }
1340             }
1341             else
1342             {
1343                 advanceToken(lexer, true, false);
1344             }
1345         }
1346     }
1347 }
1348 
1349 /* Structs format:
1350  * "struct" <ident>[{<param>}] [<:<type>]; <fields> <inner constructor> end
1351  * */
parseStruct(lexerState * lexer,vString * scope,int parent_kind)1352 static void parseStruct (lexerState *lexer, vString *scope, int parent_kind)
1353 {
1354     vString *name;
1355     vString *field;
1356     size_t old_scope_len;
1357     unsigned long line;
1358     MIOPos pos;
1359 
1360     advanceToken(lexer, true, false);
1361     if (lexer->cur_token != TOKEN_IDENTIFIER)
1362     {
1363         return;
1364     }
1365 
1366     name = vStringNewCopy(lexer->token_str);
1367     field = vStringNew();
1368     line = lexer->line;
1369     pos = lexer->pos;
1370 
1371     /* scan parametrization */
1372     advanceToken(lexer, true, false);
1373     if (lexer->cur_token == TOKEN_CURLY_BLOCK)
1374     {
1375         addTag(name, NULL, vStringValue(lexer->token_str), K_STRUCT, line, pos, scope, parent_kind);
1376         advanceToken(lexer, true, false);
1377     }
1378     else
1379     {
1380         addTag(name, NULL, NULL, K_STRUCT, line, pos, scope, parent_kind);
1381     }
1382     addToScope(scope, name);
1383 
1384     /* skip inheritance */
1385     if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
1386     {
1387         advanceToken(lexer, true, false);
1388     }
1389 
1390     /* keep the struct scope in memory to reset it after parsing constructors */
1391     old_scope_len = vStringLength(scope);
1392     /* Parse fields and inner constructors */
1393     while (lexer->cur_token != TOKEN_EOF && lexer->cur_token != TOKEN_CLOSE_BLOCK)
1394     {
1395         if (lexer->cur_token == TOKEN_IDENTIFIER && lexer->first_token)
1396         {
1397             if (strcmp(vStringValue(lexer->token_str), vStringValue(name)) == 0)
1398             {
1399                 /* inner constructor */
1400                 parseShortFunction(lexer, scope, K_STRUCT);
1401                 continue;
1402             }
1403 
1404             vStringCopy(field, lexer->token_str);
1405 
1406             /* parse type annotation */
1407             advanceToken(lexer, true, false);
1408             if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
1409             {
1410                 addTag(field, NULL, vStringValue(lexer->token_str), K_FIELD, lexer->line, lexer->pos, scope, K_STRUCT);
1411                 advanceToken(lexer, true, false);
1412             }
1413             else
1414             {
1415                 addTag(field, NULL, NULL, K_FIELD, lexer->line, lexer->pos, scope, K_STRUCT);
1416             }
1417         }
1418         else if (lexer->cur_token == TOKEN_FUNCTION)
1419         {
1420             /* inner constructor */
1421             parseFunction(lexer, scope, K_STRUCT);
1422         }
1423         else
1424         {
1425             /* Get next token */
1426             advanceToken(lexer, true, false);
1427         }
1428         resetScope(scope, old_scope_len);
1429     }
1430 
1431     vStringDelete(name);
1432     vStringDelete(field);
1433 }
1434 
1435 
parseExpr(lexerState * lexer,bool delim,int kind,vString * scope)1436 static void parseExpr (lexerState *lexer, bool delim, int kind, vString *scope)
1437 {
1438     int level = 1;
1439     size_t old_scope_len;
1440     vString *local_scope = NULL;
1441 
1442     while (lexer->cur_token != TOKEN_EOF)
1443     {
1444         old_scope_len = vStringLength(scope);
1445         /* Advance token and update if this is a new line */
1446         while (lexer->cur_token == TOKEN_NEWLINE ||
1447                lexer->cur_token == TOKEN_SEMICOLON ||
1448                lexer->cur_token == TOKEN_NONE )
1449         {
1450             advanceToken(lexer, true, false);
1451         }
1452 
1453         /* Make sure every case advances the token
1454          * otherwise we can be stuck in infinite loop */
1455         switch (lexer->cur_token)
1456         {
1457             case TOKEN_CONST:
1458                 parseConst(lexer, scope, kind);
1459                 break;
1460             case TOKEN_FUNCTION:
1461                 parseFunction(lexer, scope, kind);
1462                 break;
1463             case TOKEN_MACRO:
1464                 parseMacro(lexer, scope, kind);
1465                 break;
1466             case TOKEN_MODULE:
1467                 parseModule(lexer, scope, kind);
1468                 break;
1469             case TOKEN_STRUCT:
1470                 parseStruct(lexer, scope, kind);
1471                 break;
1472             case TOKEN_TYPE:
1473                 parseType(lexer, scope, kind);
1474                 break;
1475             case TOKEN_IMPORT:
1476                 parseImport(lexer, scope, TOKEN_IMPORT, kind);
1477                 break;
1478             case TOKEN_USING:
1479                 parseImport(lexer, scope, TOKEN_USING, kind);
1480             case TOKEN_IDENTIFIER:
1481                 if (lexer->first_token && lexer->cur_c == '.')
1482                 {
1483                     if (local_scope == NULL)
1484                     {
1485                         local_scope = vStringNew();
1486                     }
1487                     vStringCopy(local_scope, lexer->token_str);
1488                     advanceChar(lexer);
1489                     // next token, but keep the first_token value
1490                     advanceToken(lexer, true, true);
1491                     skipWhitespace(lexer, false);
1492                     if (lexer->cur_c == '(')
1493                     {
1494                         parseShortFunction(lexer, local_scope, K_MODULE);
1495                     }
1496                 }
1497                 else
1498                 {
1499                     skipWhitespace(lexer, false);
1500                     if (lexer->first_token && (lexer->cur_c == '(' || lexer->cur_c == '{'))
1501                     {
1502                         parseShortFunction(lexer, scope, kind);
1503                     }
1504                     else
1505                     {
1506                         advanceToken(lexer, true, false);
1507                     }
1508                 }
1509                 break;
1510             case TOKEN_OPEN_BLOCK:
1511                 level++;
1512                 advanceToken(lexer, true, false);
1513                 break;
1514             case TOKEN_CLOSE_BLOCK:
1515                 level--;
1516                 advanceToken(lexer, true, false);
1517                 break;
1518             default:
1519                 advanceToken(lexer, true, false);
1520                 break;
1521         }
1522         resetScope(scope, old_scope_len);
1523         if (delim && level <= 0)
1524         {
1525             break;
1526         }
1527     }
1528     vStringDelete(local_scope);
1529 }
1530 
findJuliaTags(void)1531 static void findJuliaTags (void)
1532 {
1533     lexerState lexer;
1534     vString* scope = vStringNew();
1535     initLexer(&lexer);
1536 
1537     parseExpr(&lexer, false, K_NONE, scope);
1538     vStringDelete(scope);
1539 
1540     deInitLexer(&lexer);
1541 }
1542 
JuliaParser(void)1543 extern parserDefinition* JuliaParser (void)
1544 {
1545     static const char *const extensions [] = { "jl", NULL };
1546     parserDefinition* def = parserNew ("Julia");
1547     def->kindTable  = JuliaKinds;
1548     def->kindCount  = ARRAY_SIZE (JuliaKinds);
1549     def->extensions = extensions;
1550     def->parser     = findJuliaTags;
1551     def->keywordTable = JuliaKeywordTable;
1552     def->keywordCount = ARRAY_SIZE (JuliaKeywordTable);
1553     return def;
1554 }
1555