1 /*
2 * Copyright (c) 2020-2021, getzze <getzze@gmail.com>
3 *
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
6 *
7 * This module contains functions for generating tags for Julia files.
8 *
9 * Documented 'kinds':
10 * https://docs.julialang.org/en/v1/manual/documentation/#Syntax-Guide
11 * Language parser in Scheme:
12 * https://github.com/JuliaLang/julia/blob/master/src/julia-parser.scm
13 */
14
15 /*
16 * INCLUDE FILES
17 */
18 #include "general.h" /* must always come first */
19
20 #include <string.h>
21
22 #include "keyword.h"
23 #include "parse.h"
24 #include "entry.h"
25 #include "options.h"
26 #include "read.h"
27 #include "routines.h"
28 #include "vstring.h"
29 #include "xtag.h"
30
31 /*
32 * MACROS
33 */
34 #define MAX_STRING_LENGTH 256
35
36 /*
37 * DATA DEFINITIONS
38 */
39 typedef enum {
40 K_CONSTANT,
41 K_FUNCTION,
42 K_FIELD,
43 K_MACRO,
44 K_MODULE,
45 K_STRUCT,
46 K_TYPE,
47 K_UNKNOWN,
48 K_NONE
49 } JuliaKind;
50
51 typedef enum {
52 JULIA_MODULE_IMPORTED,
53 JULIA_MODULE_USED,
54 JULIA_MODULE_NAMESPACE,
55 } juliaModuleRole;
56
57 typedef enum {
58 JULIA_UNKNOWN_IMPORTED,
59 JULIA_UNKNOWN_USED,
60 } juliaUnknownRole;
61
62 /*
63 * using X X = (kind:module, role:used)
64 *
65 * using X: a, b X = (kind:module, role:namespace)
66 * a, b = (kind:unknown, role:used, scope:module:X)
67 *
68 * import X X = (kind:module, role:imported)
69 *
70 * import X.a, Y.b X, Y = (kind:module, role:namespace)
71 * a, b = (kind:unknown, role:imported, scope:module:X)
72 *
73 * import X: a, b Same as the above one
74 */
75 static roleDefinition JuliaModuleRoles [] = {
76 { true, "imported", "loaded by \"import\"" },
77 { true, "used", "loaded by \"using\"" },
78 { true, "namespace", "only some symbols in it are imported" },
79 };
80
81 static roleDefinition JuliaUnknownRoles [] = {
82 { true, "imported", "loaded by \"import\"" },
83 { true, "used", "loaded by \"using\""},
84 };
85
86 static kindDefinition JuliaKinds [] = {
87 { true, 'c', "constant", "Constants" },
88 { true, 'f', "function", "Functions" },
89 { true, 'g', "field", "Fields" },
90 { true, 'm', "macro", "Macros" },
91 { true, 'n', "module", "Modules",
92 ATTACH_ROLES(JuliaModuleRoles) },
93 { true, 's', "struct", "Structures" },
94 { true, 't', "type", "Types" },
95 { true, 'x', "unknown", "name defined in other modules",
96 .referenceOnly = true, ATTACH_ROLES(JuliaUnknownRoles) },
97 };
98
99 typedef enum {
100 TOKEN_NONE=0, /* none */
101 TOKEN_WHITESPACE,
102 TOKEN_PAREN_BLOCK,
103 TOKEN_BRACKET_BLOCK,
104 TOKEN_CURLY_BLOCK,
105 TOKEN_OPEN_BLOCK,
106 TOKEN_CLOSE_BLOCK,
107 TOKEN_TYPE_ANNOTATION,
108 TOKEN_TYPE_WHERE,
109 TOKEN_CONST,
110 TOKEN_STRING, /* = 10 */
111 TOKEN_COMMAND,
112 TOKEN_MACROCALL,
113 TOKEN_IDENTIFIER,
114 TOKEN_MODULE,
115 TOKEN_MACRO,
116 TOKEN_FUNCTION,
117 TOKEN_STRUCT,
118 TOKEN_ENUM,
119 TOKEN_TYPE,
120 TOKEN_IMPORT, /* = 20 */
121 TOKEN_USING,
122 TOKEN_EXPORT,
123 TOKEN_NEWLINE,
124 TOKEN_SEMICOLON,
125 TOKEN_COMPOSER_KWD, /* KEYWORD only */
126 TOKEN_EOF,
127 TOKEN_COUNT
128 } tokenType;
129
130 static const keywordTable JuliaKeywordTable [] = {
131 /* TODO: Sort by keys. */
132 { "mutable", TOKEN_COMPOSER_KWD },
133 { "primitive", TOKEN_COMPOSER_KWD },
134 { "abstract", TOKEN_COMPOSER_KWD },
135
136 { "if", TOKEN_OPEN_BLOCK },
137 { "for", TOKEN_OPEN_BLOCK },
138 { "while", TOKEN_OPEN_BLOCK },
139 { "try", TOKEN_OPEN_BLOCK },
140 { "do", TOKEN_OPEN_BLOCK },
141 { "begin", TOKEN_OPEN_BLOCK },
142 { "let", TOKEN_OPEN_BLOCK },
143 { "quote", TOKEN_OPEN_BLOCK },
144
145 { "module", TOKEN_MODULE },
146 { "baremodule",TOKEN_MODULE },
147
148 { "using", TOKEN_USING },
149 { "import", TOKEN_IMPORT },
150
151 { "export", TOKEN_EXPORT },
152 { "const", TOKEN_CONST },
153 { "macro", TOKEN_MACRO },
154 { "function", TOKEN_FUNCTION },
155 { "struct", TOKEN_STRUCT },
156 { "type", TOKEN_TYPE },
157 { "where", TOKEN_TYPE_WHERE },
158 { "end", TOKEN_CLOSE_BLOCK },
159 };
160
161 typedef struct {
162 /* Characters */
163 int prev_c;
164 int cur_c;
165 int next_c;
166
167 /* Tokens */
168 bool first_token;
169 int cur_token;
170 vString* token_str;
171 unsigned long line;
172 MIOPos pos;
173 } lexerState;
174
175 /*
176 * FUNCTION PROTOTYPES
177 */
178
179 static void parseExpr (lexerState *lexer, bool delim, int kind, vString *scope);
180
181 static void scanParenBlock (lexerState *lexer);
182
183 /*
184 * FUNCTION DEFINITIONS
185 */
186
endswith(const char * what,const char * withwhat)187 static int endswith(const char* what, const char* withwhat)
188 {
189 int l1 = strlen(what);
190 int l2 = strlen(withwhat);
191 if (l2 > l1)
192 {
193 return 0;
194 }
195
196 return strcmp(withwhat, what + (l1 - l2)) == 0;
197 }
198
199 /* Resets the scope string to the old length */
resetScope(vString * scope,size_t old_len)200 static void resetScope (vString *scope, size_t old_len)
201 {
202 vStringTruncate (scope, old_len);
203 }
204
205 /* Adds a name to the end of the scope string */
addToScope(vString * scope,vString * name)206 static void addToScope (vString *scope, vString *name)
207 {
208 if (vStringLength(scope) > 0)
209 {
210 vStringPut(scope, '.');
211 }
212 vStringCat(scope, name);
213 }
214
215 /* Reads a character from the file */
advanceChar(lexerState * lexer)216 static void advanceChar (lexerState *lexer)
217 {
218 lexer->prev_c = lexer->cur_c;
219 lexer->cur_c = lexer->next_c;
220 lexer->next_c = getcFromInputFile();
221 }
222
223 /* Reads N characters from the file */
advanceNChar(lexerState * lexer,int n)224 static void advanceNChar (lexerState *lexer, int n)
225 {
226 while (n--)
227 {
228 advanceChar(lexer);
229 }
230 }
231
232 /* Store the current character in lexerState::token_str if there is space
233 * (set by MAX_STRING_LENGTH), and then read the next character from the file */
advanceAndStoreChar(lexerState * lexer)234 static void advanceAndStoreChar (lexerState *lexer)
235 {
236 if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH)
237 {
238 vStringPut(lexer->token_str, (char) lexer->cur_c);
239 }
240 advanceChar(lexer);
241 }
242
isWhitespace(int c,bool newline)243 static bool isWhitespace (int c, bool newline)
244 {
245 if (newline)
246 {
247 return c == ' ' || c == '\t' || c == '\r' || c == '\n';
248 }
249 return c == ' ' || c == '\t';
250 }
251
isAscii(int c)252 static bool isAscii (int c)
253 {
254 return (c >= 0) && (c < 0x80);
255 }
256
isOperator(int c)257 static bool isOperator (int c)
258 {
259 if (c == '%' || c == '^' || c == '&' || c == '|' ||
260 c == '*' || c == '-' || c == '+' || c == '~' ||
261 c == '<' || c == '>' || c == ',' || c == '/' ||
262 c == '?' || c == '=' || c == ':' )
263 {
264 return true;
265 }
266 return false;
267 }
268
269 /* This does not distinguish Unicode letters from operators... */
isIdentifierFirstCharacter(int c)270 static bool isIdentifierFirstCharacter (int c)
271 {
272 return (bool) ((isAscii(c) && (isalpha (c) || c == '_')) || c >= 0xC0);
273 }
274
275 /* This does not distinguish Unicode letters from operators... */
isIdentifierCharacter(int c)276 static bool isIdentifierCharacter (int c)
277 {
278 return (bool) (isIdentifierFirstCharacter(c) || (isAscii(c) && (isdigit(c) || c == '!')) || c >= 0x80);
279 }
280
skipWhitespace(lexerState * lexer,bool newline)281 static void skipWhitespace (lexerState *lexer, bool newline)
282 {
283 while (isWhitespace(lexer->cur_c, newline))
284 {
285 advanceChar(lexer);
286 }
287 }
288
289 /* The transpose operator is only allowed after an identifier, a number, an expression inside parenthesis or an index */
isTranspose(int c)290 static bool isTranspose (int c)
291 {
292 return (isIdentifierCharacter(c) || c == ')' || c == ']');
293 }
294
295
296 /*
297 * Lexer functions
298 * */
299
300 /* Check that the current character sequence is a type declaration or inheritance */
isTypeDecl(lexerState * lexer)301 static bool isTypeDecl (lexerState *lexer)
302 {
303 if ((lexer->prev_c != '.' && lexer->cur_c == '<' && lexer->next_c == ':') ||
304 (lexer->prev_c != '.' && lexer->cur_c == '>' && lexer->next_c == ':') ||
305 (lexer->cur_c == ':' && lexer->next_c == ':') )
306 {
307 return true;
308 }
309 return false;
310 }
311
312 /* Check if the current char is a new line */
isNewLine(lexerState * lexer)313 static bool isNewLine (lexerState *lexer)
314 {
315 return (lexer->cur_c == '\n')? true: false;
316 }
317
318 /* Check if the current char is a new line.
319 * If it is, skip the newline and return true */
skipNewLine(lexerState * lexer)320 static bool skipNewLine (lexerState *lexer)
321 {
322 if (isNewLine(lexer))
323 {
324 advanceChar(lexer);
325 return true;
326 }
327 return false;
328 }
329
330 /* Skip a single comment or multiline comment
331 * A single line comment starts with #
332 * A multi-line comment is encapsulated in #=...=# and they are nesting
333 * */
skipComment(lexerState * lexer)334 static void skipComment (lexerState *lexer)
335 {
336 /* # */
337 if (lexer->next_c != '=')
338 {
339 advanceNChar(lexer, 1);
340 while (lexer->cur_c != EOF && lexer->cur_c != '\n')
341 {
342 advanceChar(lexer);
343 }
344 }
345 /* block comment */
346 else /* if (lexer->next_c == '=') */
347 {
348 int level = 1;
349 advanceNChar(lexer, 2);
350 while (lexer->cur_c != EOF && level > 0)
351 {
352 if (lexer->cur_c == '=' && lexer->next_c == '#')
353 {
354 level--;
355 advanceNChar(lexer, 2);
356 }
357 else if (lexer->cur_c == '#' && lexer->next_c == '=')
358 {
359 level++;
360 advanceNChar(lexer, 2);
361 }
362 else
363 {
364 advanceChar(lexer);
365 }
366 }
367 }
368 }
369
scanIdentifier(lexerState * lexer,bool clear)370 static void scanIdentifier (lexerState *lexer, bool clear)
371 {
372 if (clear)
373 {
374 vStringClear(lexer->token_str);
375 }
376
377 do
378 {
379 advanceAndStoreChar(lexer);
380 } while(lexer->cur_c != EOF && isIdentifierCharacter(lexer->cur_c));
381 }
382
383 /* Scan a quote-like expression.
384 * Allow for triple-character variand and interpolation with `$`.
385 * These last past the end of the line, so be careful
386 * not to store too much of them (see MAX_STRING_LENGTH). */
scanStringOrCommand(lexerState * lexer,int c)387 static void scanStringOrCommand (lexerState *lexer, int c)
388 {
389 bool istriple = false;
390
391 /* Pass the first "quote"-character */
392 advanceAndStoreChar(lexer);
393
394 /* Check for triple "quote"-character */
395 if (lexer->cur_c == c && lexer->next_c == c)
396 {
397 istriple = true;
398 advanceAndStoreChar(lexer);
399 advanceAndStoreChar(lexer);
400
401 /* Cancel up to 2 "quote"-characters after opening the triple */
402 if (lexer->cur_c == c)
403 {
404 advanceAndStoreChar(lexer);
405 if (lexer->cur_c == c)
406 {
407 advanceAndStoreChar(lexer);
408 }
409 }
410 }
411
412 while (lexer->cur_c != EOF && lexer->cur_c != c)
413 {
414 /* Check for interpolation before checking for end of "quote" */
415 if (lexer->cur_c == '$' && lexer->next_c == '(')
416 {
417 advanceAndStoreChar(lexer);
418 scanParenBlock(lexer);
419 /* continue to avoid advance character again. Correct bug
420 * with "quote"-character just after closing parenthesis */
421 continue;
422 }
423
424 if (lexer->cur_c == '\\' &&
425 (lexer->next_c == c || lexer->next_c == '\\'))
426 {
427 advanceAndStoreChar(lexer);
428 }
429 advanceAndStoreChar(lexer);
430
431 /* Cancel up to 2 "quote"-characters if triple string */
432 if (istriple && lexer->cur_c == c)
433 {
434 advanceAndStoreChar(lexer);
435 if (lexer->cur_c == c)
436 {
437 advanceAndStoreChar(lexer);
438 }
439 }
440 }
441 /* Pass the last "quote"-character */
442 advanceAndStoreChar(lexer);
443 }
444
445
446 /* Scan commands surrounded by backticks,
447 * possibly triple backticks */
scanCommand(lexerState * lexer)448 static void scanCommand (lexerState *lexer)
449 {
450 scanStringOrCommand(lexer, '`');
451 }
452
453 /* Double-quoted strings,
454 * possibly triple doublequotes */
scanString(lexerState * lexer)455 static void scanString (lexerState *lexer)
456 {
457 scanStringOrCommand(lexer, '"');
458 }
459
460
461 /* This deals with character literals: 'n', '\n', '\uFFFF';
462 * and matrix transpose: A'.
463 * We'll use this approximate regexp for the literals:
464 * \' [^'] \' or \' \\ [^']+ \' or \' \\ \' \'
465 * Either way, we'll treat this token as a string, so it gets preserved */
scanCharacterOrTranspose(lexerState * lexer)466 static bool scanCharacterOrTranspose (lexerState *lexer)
467 {
468 if (isTranspose(lexer->prev_c))
469 {
470 /* deal with untranspose/transpose sequence */
471 while (lexer->cur_c != EOF && lexer->cur_c == '\'')
472 {
473 advanceAndStoreChar(lexer);
474 }
475 return false;
476 }
477
478 //vStringClear(lexer->token_str);
479 advanceAndStoreChar(lexer);
480
481 if (lexer->cur_c == '\\')
482 {
483 advanceAndStoreChar(lexer);
484 /* The \' \\ \' \' (literally '\'') case */
485 if (lexer->cur_c == '\'' && lexer->next_c == '\'')
486 {
487 advanceAndStoreChar(lexer);
488 advanceAndStoreChar(lexer);
489 }
490 /* The \' \\ [^']+ \' case */
491 else
492 {
493 while (lexer->cur_c != EOF && lexer->cur_c != '\'')
494 {
495 advanceAndStoreChar(lexer);
496 }
497 }
498 }
499 /* The \' [^'] \' and \' \' \' cases */
500 else if (lexer->next_c == '\'')
501 {
502 advanceAndStoreChar(lexer);
503 advanceAndStoreChar(lexer);
504 }
505 /* Otherwise it is malformed */
506 return true;
507 }
508
509 /* Parse a block with opening and closing character */
scanBlock(lexerState * lexer,int open,int close,bool convert_newline)510 static void scanBlock (lexerState *lexer, int open, int close, bool convert_newline)
511 {
512 /* Assume the current char is `open` */
513 int level = 1;
514
515 /* Pass the first opening */
516 advanceAndStoreChar(lexer);
517
518 while (lexer->cur_c != EOF && level > 0)
519 {
520 /* Parse everything */
521 if (lexer->cur_c == ' ' || lexer->cur_c == '\t')
522 {
523 skipWhitespace(lexer, false);
524 vStringPut(lexer->token_str, ' ');
525 }
526 if (lexer->cur_c == '#')
527 {
528 skipComment(lexer);
529 }
530 else if (lexer->cur_c == '\"')
531 {
532 scanString(lexer);
533 }
534 else if (lexer->cur_c == '\'')
535 {
536 scanCharacterOrTranspose(lexer);
537 }
538
539 /* Parse opening/closing */
540 if (lexer->cur_c == open)
541 {
542 level++;
543 }
544 else if (lexer->cur_c == close)
545 {
546 level--;
547 }
548
549 if (convert_newline && skipNewLine(lexer))
550 {
551 vStringPut(lexer->token_str, ' ');
552 }
553 else
554 {
555 advanceAndStoreChar(lexer);
556 }
557
558 }
559 /* Lexer position is just after `close` */
560 }
561
562
563 /* Parse a block inside parenthesis, for example a function argument list */
scanParenBlock(lexerState * lexer)564 static void scanParenBlock (lexerState *lexer)
565 {
566 scanBlock(lexer, '(', ')', true);
567 }
568
569 /* Indexing block with bracket.
570 * Some keywords have a special meaning in this environment:
571 * end, begin, for and if */
scanIndexBlock(lexerState * lexer)572 static void scanIndexBlock (lexerState *lexer)
573 {
574 scanBlock(lexer, '[', ']', false);
575
576 }
577
578 /* Parse a block inside curly brackets, for type parametrization */
scanCurlyBlock(lexerState * lexer)579 static void scanCurlyBlock (lexerState *lexer)
580 {
581 scanBlock(lexer, '{', '}', true);
582 }
583
584 /* Scan type annotation like
585 * `::Type`, `::Type{T}`
586 */
scanTypeAnnotation(lexerState * lexer)587 static void scanTypeAnnotation (lexerState *lexer)
588 {
589 /* assume that current char is '<', '>' or ':', followed by ':' */
590 advanceAndStoreChar(lexer);
591 advanceAndStoreChar(lexer);
592
593 skipWhitespace(lexer, true);
594 scanIdentifier(lexer, false);
595 if (lexer->cur_c == '{')
596 {
597 scanCurlyBlock(lexer);
598 }
599 }
600
601 /* Scan type annotation like
602 * `where Int<:T<:Real`, `where S<:Array{Real}` or `where {S, T}`
603 */
scanTypeWhere(lexerState * lexer)604 static void scanTypeWhere (lexerState *lexer)
605 {
606 /* assume that current token is 'where'
607 * allow line continuation */
608 vStringPut(lexer->token_str, ' ');
609 skipWhitespace(lexer, true);
610
611 while (lexer->cur_c != EOF)
612 {
613
614 if (lexer->cur_c == '{')
615 {
616 scanCurlyBlock(lexer);
617 }
618 else if (isIdentifierFirstCharacter(lexer->cur_c))
619 {
620 scanIdentifier(lexer, false);
621 if (endswith(vStringValue(lexer->token_str), "where"))
622 {
623 /* allow line continuation */
624 vStringPut(lexer->token_str, ' ');
625 skipWhitespace(lexer, true);
626 }
627 }
628 else if (isTypeDecl(lexer))
629 {
630 scanTypeAnnotation(lexer);
631 //skipWhitespace(lexer, false);
632 }
633 else if (lexer->cur_c == '#')
634 {
635 skipComment(lexer);
636 /* allow line continuation */
637 if (endswith(vStringValue(lexer->token_str), "where "))
638 {
639 skipWhitespace(lexer, true);
640 }
641 }
642 else if (isWhitespace(lexer->cur_c, false))
643 {
644 while (isWhitespace(lexer->cur_c, false))
645 {
646 advanceChar(lexer);
647 }
648 /* Add a space, if it is not a trailing space */
649 if (!(isNewLine(lexer)))
650 {
651 vStringPut(lexer->token_str, ' ');
652 }
653 }
654 else
655 {
656 break;
657 }
658 }
659 }
660
661
parseIdentifier(lexerState * lexer)662 static int parseIdentifier (lexerState *lexer)
663 {
664 langType julia = getInputLanguage ();
665 scanIdentifier(lexer, true);
666
667 int k = lookupKeyword (vStringValue(lexer->token_str), julia);
668 /* First part of a composed identifier */
669 if (k == TOKEN_COMPOSER_KWD)
670 {
671 skipWhitespace(lexer, false);
672 scanIdentifier(lexer, true);
673 k = lookupKeyword (vStringValue(lexer->token_str), julia);
674 }
675
676 if ((k == TOKEN_OPEN_BLOCK)
677 || (k == TOKEN_MODULE)
678 || (k == TOKEN_IMPORT)
679 || (k == TOKEN_USING)
680 || (k == TOKEN_EXPORT)
681 || (k == TOKEN_CONST)
682 || (k == TOKEN_MACRO)
683 || (k == TOKEN_FUNCTION)
684 || (k == TOKEN_STRUCT)
685 || (k == TOKEN_TYPE)
686 || (k == TOKEN_TYPE_WHERE)
687 || (k == TOKEN_CLOSE_BLOCK))
688 {
689 if (k == TOKEN_TYPE_WHERE)
690 {
691 scanTypeWhere(lexer);
692 }
693 return lexer->cur_token = k;
694 }
695 return lexer->cur_token = TOKEN_IDENTIFIER;
696 }
697
698
699 /* Advances the parser one token, optionally skipping whitespace
700 * (otherwise it is concatenated and returned as a single whitespace token).
701 * Whitespace is needed to properly render function signatures. Unrecognized
702 * token starts are stored literally, e.g. token may equal to a character '#'. */
advanceToken(lexerState * lexer,bool skip_whitespace,bool propagate_first)703 static int advanceToken (lexerState *lexer, bool skip_whitespace, bool propagate_first)
704 {
705 bool have_whitespace = false;
706 bool newline = false;
707 lexer->line = getInputLineNumber();
708 lexer->pos = getInputFilePosition();
709
710 /* the next token is the first token of the line */
711 if (!propagate_first)
712 {
713 if (lexer->cur_token == TOKEN_NEWLINE ||
714 lexer->cur_token == TOKEN_SEMICOLON ||
715 lexer->cur_token == TOKEN_NONE ||
716 (lexer->first_token && lexer->cur_token == TOKEN_MACROCALL))
717 {
718 lexer->first_token = true;
719 }
720 else
721 {
722 lexer->first_token = false;
723 }
724 }
725
726 while (lexer->cur_c != EOF)
727 {
728 /* skip whitespaces but not newlines */
729 if (isWhitespace(lexer->cur_c, newline))
730 {
731 skipWhitespace(lexer, newline);
732 have_whitespace = true;
733 }
734 else if (lexer->cur_c == '#')
735 {
736 skipComment(lexer);
737 have_whitespace = true;
738 }
739 else
740 {
741 if (have_whitespace && !skip_whitespace)
742 {
743 return lexer->cur_token = TOKEN_WHITESPACE;
744 }
745 break;
746 }
747 }
748 lexer->line = getInputLineNumber();
749 lexer->pos = getInputFilePosition();
750 while (lexer->cur_c != EOF)
751 {
752 if (lexer->cur_c == '"')
753 {
754 vStringClear(lexer->token_str);
755 scanString(lexer);
756 return lexer->cur_token = TOKEN_STRING;
757 }
758 else if (lexer->cur_c == '\'')
759 {
760 vStringClear(lexer->token_str);
761 if (scanCharacterOrTranspose(lexer))
762 {
763 return lexer->cur_token = TOKEN_STRING;
764 }
765 else
766 {
767 return lexer->cur_token = '\'';
768 }
769 }
770 else if (lexer->cur_c == '`')
771 {
772 vStringClear(lexer->token_str);
773 scanCommand(lexer);
774 return lexer->cur_token = TOKEN_COMMAND;
775 }
776 else if (isIdentifierFirstCharacter(lexer->cur_c))
777 {
778 return parseIdentifier(lexer);
779 }
780 else if (lexer->cur_c == '@')
781 {
782 vStringClear(lexer->token_str);
783 advanceAndStoreChar(lexer);
784 do
785 {
786 advanceAndStoreChar(lexer);
787 } while(lexer->cur_c != EOF && isIdentifierCharacter(lexer->cur_c));
788 return lexer->cur_token = TOKEN_MACROCALL;
789 }
790 else if (lexer->cur_c == '(')
791 {
792 vStringClear(lexer->token_str);
793 scanParenBlock(lexer);
794 return lexer->cur_token = TOKEN_PAREN_BLOCK;
795 }
796 else if (lexer->cur_c == '[')
797 {
798 vStringClear(lexer->token_str);
799 scanIndexBlock(lexer);
800 return lexer->cur_token = TOKEN_BRACKET_BLOCK;
801 }
802 else if (lexer->cur_c == '{')
803 {
804 vStringClear(lexer->token_str);
805 scanCurlyBlock(lexer);
806 return lexer->cur_token = TOKEN_CURLY_BLOCK;
807 }
808 else if (isTypeDecl(lexer))
809 {
810 vStringClear(lexer->token_str);
811 scanTypeAnnotation(lexer);
812 return lexer->cur_token = TOKEN_TYPE_ANNOTATION;
813 }
814 else if (skipNewLine(lexer))
815 {
816 /* allow line continuation */
817 if (isOperator(lexer->cur_token))
818 {
819 return lexer->cur_token;
820 }
821 return lexer->cur_token = TOKEN_NEWLINE;
822 }
823 else if (lexer->cur_c == ';')
824 {
825 advanceChar(lexer);
826 return lexer->cur_token = TOKEN_SEMICOLON;
827 }
828 else
829 {
830 int c = lexer->cur_c;
831 advanceChar(lexer);
832 return lexer->cur_token = c;
833 }
834 }
835 return lexer->cur_token = TOKEN_EOF;
836 }
837
initLexer(lexerState * lexer)838 static void initLexer (lexerState *lexer)
839 {
840 advanceNChar(lexer, 2);
841 lexer->token_str = vStringNew();
842 lexer->first_token = true;
843 lexer->cur_token = TOKEN_NONE;
844 lexer->prev_c = '\0';
845
846 if (lexer->cur_c == '#' && lexer->next_c == '!')
847 {
848 skipComment(lexer);
849 }
850 advanceToken(lexer, true, false);
851 }
852
deInitLexer(lexerState * lexer)853 static void deInitLexer (lexerState *lexer)
854 {
855 vStringDelete(lexer->token_str);
856 lexer->token_str = NULL;
857 }
858
859 #if 0
860 static void debugLexer (lexerState *lexer)
861 {
862 printf("Current lexer state: line %d, token (%lu), cur char `%c`, token str:\n\t`", lexer->line, lexer->cur_token, lexer->cur_c);
863 printf(vStringValue(lexer->token_str));
864 printf("`\n");
865 }
866 #endif
867
addTag(vString * ident,const char * type,const char * arg_list,int kind,unsigned long line,MIOPos pos,vString * scope,int parent_kind)868 static void addTag (vString* ident, const char* type, const char* arg_list, int kind, unsigned long line, MIOPos pos, vString *scope, int parent_kind)
869 {
870 if (kind == K_NONE)
871 {
872 return;
873 }
874 tagEntryInfo tag;
875 initTagEntry(&tag, vStringValue(ident), kind);
876
877 tag.lineNumber = line;
878 tag.filePosition = pos;
879 tag.sourceFileName = getInputFileName();
880
881 tag.extensionFields.signature = arg_list;
882 /* tag.extensionFields.varType = type; */ /* Needs a workaround */
883 if (parent_kind != K_NONE)
884 {
885 tag.extensionFields.scopeKindIndex = parent_kind;
886 tag.extensionFields.scopeName = vStringValue(scope);
887 }
888 makeTagEntry(&tag);
889 }
890
addReferenceTag(vString * ident,int kind,int role,unsigned long line,MIOPos pos,vString * scope,int parent_kind)891 static void addReferenceTag (vString* ident, int kind, int role, unsigned long line, MIOPos pos, vString* scope, int parent_kind)
892 {
893 if (kind == K_NONE)
894 {
895 return;
896 }
897 tagEntryInfo tag;
898 initRefTagEntry(&tag, vStringValue(ident), kind, role);
899 tag.lineNumber = line;
900 tag.filePosition = pos;
901 if (parent_kind != K_NONE)
902 {
903 tag.extensionFields.scopeKindIndex = parent_kind;
904 tag.extensionFields.scopeName = vStringValue(scope);
905 }
906 makeTagEntry(&tag);
907 }
908
909 /* Skip tokens until one of the goal tokens is hit. Escapes when level = 0 if there are no goal tokens.
910 * Keeps track of balanced ()'s, []'s, and {}'s and ignores the goal tokens within those pairings */
skipUntil(lexerState * lexer,int goal_tokens[],int num_goal_tokens)911 static void skipUntil (lexerState *lexer, int goal_tokens[], int num_goal_tokens)
912 {
913 int block_level = 0;
914
915 while (lexer->cur_token != TOKEN_EOF)
916 {
917 /* check if the keyword is reached, only if outside a block */
918 if (block_level == 0)
919 {
920 int ii = 0;
921 for(ii = 0; ii < num_goal_tokens; ii++)
922 {
923 if (lexer->cur_token == goal_tokens[ii])
924 {
925 break;
926 }
927 }
928 if (ii < num_goal_tokens)
929 {
930 /* parse the next token */
931 advanceToken(lexer, true, false);
932 break;
933 }
934 }
935
936 /* take into account nested blocks */
937 switch (lexer->cur_token)
938 {
939 case TOKEN_OPEN_BLOCK:
940 block_level++;
941 break;
942 case TOKEN_CLOSE_BLOCK:
943 block_level--;
944 break;
945 default:
946 break;
947 }
948
949 /* Has to be after the token switch to catch the case when we start with the initial level token */
950 if (num_goal_tokens == 0 && block_level == 0)
951 {
952 break;
953 }
954
955 advanceToken(lexer, true, false);
956 }
957 }
958
959 /* Skip until the end of the block */
skipUntilEnd(lexerState * lexer)960 static void skipUntilEnd (lexerState *lexer)
961 {
962 int goal_tokens[] = { TOKEN_CLOSE_BLOCK };
963
964 skipUntil(lexer, goal_tokens, 1);
965 }
966
967 /* Skip a function body after assignment operator '='
968 * Beware of continuation lines after operators
969 * */
skipBody(lexerState * lexer)970 static void skipBody (lexerState *lexer)
971 {
972 /* assume position just after '=' */
973 while (lexer->cur_token != TOKEN_EOF && lexer->cur_token != TOKEN_NEWLINE)
974 {
975 advanceToken(lexer, true, false);
976
977 if (lexer->cur_token == TOKEN_OPEN_BLOCK)
978 {
979 /* pass the keyword */
980 advanceToken(lexer, true, false);
981 skipUntilEnd(lexer);
982 /* the next token is already selected */
983 }
984 }
985 }
986
987 /* Short function format:
988 * <ident> ( [<args>] ) [::<type>] [<where>] = [begin] <body> [end]
989 * */
parseShortFunction(lexerState * lexer,vString * scope,int parent_kind)990 static void parseShortFunction (lexerState *lexer, vString *scope, int parent_kind)
991 {
992 /* assume the current char is just after identifier */
993 vString *name;
994 vString *arg_list;
995 unsigned long line;
996 MIOPos pos;
997
998 /* should be an open parenthesis after identifier
999 * with potentially parametric type */
1000 skipWhitespace(lexer, false);
1001 if (lexer->cur_c == '{')
1002 {
1003 scanCurlyBlock(lexer);
1004 skipWhitespace(lexer, false);
1005 }
1006
1007 if (lexer->cur_c != '(')
1008 {
1009 advanceToken(lexer, true, false);
1010 return;
1011 }
1012
1013 name = vStringNewCopy(lexer->token_str);
1014 line = lexer->line;
1015 pos = lexer->pos;
1016
1017 /* scan argument list */
1018 advanceToken(lexer, true, false);
1019 arg_list = vStringNewCopy(lexer->token_str);
1020
1021 /* scan potential type casting */
1022 advanceToken(lexer, true, false);
1023 if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
1024 {
1025 vStringCat(arg_list, lexer->token_str);
1026 advanceToken(lexer, true, false);
1027 }
1028 /* scan potential type union with 'where' */
1029 if (lexer->cur_token == TOKEN_TYPE_WHERE)
1030 {
1031 vStringPut(arg_list, ' ');
1032 vStringCat(arg_list, lexer->token_str);
1033 advanceToken(lexer, true, false);
1034 }
1035
1036 /* scan equal sign, ignore `==` and `=>` */
1037 if (!(lexer->cur_token == '=' &&
1038 lexer->cur_c != '=' &&
1039 lexer->cur_c != '>'))
1040 {
1041 vStringDelete(name);
1042 vStringDelete(arg_list);
1043 return;
1044 }
1045
1046 addTag(name, NULL, vStringValue(arg_list), K_FUNCTION, line, pos, scope, parent_kind);
1047
1048 /* scan until end of function definition */
1049 skipBody(lexer);
1050
1051 /* Should end on a new line, parse next token */
1052 advanceToken(lexer, true, false);
1053 lexer->first_token = true;
1054
1055 vStringDelete(name);
1056 vStringDelete(arg_list);
1057 }
1058
1059 /* Function format:
1060 * function <ident> ( [<args>] ) [::<type>] [<where>] [<body>] end
1061 * */
parseFunction(lexerState * lexer,vString * scope,int parent_kind)1062 static void parseFunction (lexerState *lexer, vString *scope, int parent_kind)
1063 {
1064 vString *name;
1065 vString *arg_list;
1066 vString *local_scope;
1067 int local_parent_kind;
1068 unsigned long line;
1069 MIOPos pos;
1070
1071 advanceToken(lexer, true, false);
1072 if (lexer->cur_token != TOKEN_IDENTIFIER)
1073 {
1074 return;
1075 }
1076 else if (lexer->cur_c == '.')
1077 {
1078 local_scope = vStringNewCopy(lexer->token_str);
1079 local_parent_kind = K_MODULE;
1080 advanceChar(lexer);
1081 advanceToken(lexer, true, false);
1082 }
1083 else
1084 {
1085 local_scope = vStringNewCopy(scope);
1086 local_parent_kind = parent_kind;
1087 }
1088
1089 /* Scan for parametric type constructor */
1090 skipWhitespace(lexer, false);
1091 if (lexer->cur_c == '{')
1092 {
1093 scanCurlyBlock(lexer);
1094 skipWhitespace(lexer, false);
1095 }
1096
1097 name = vStringNewCopy(lexer->token_str);
1098 arg_list = vStringNew();
1099 line = lexer->line;
1100 pos = lexer->pos;
1101
1102 advanceToken(lexer, true, false);
1103 if (lexer->cur_token == TOKEN_PAREN_BLOCK)
1104 {
1105 vStringCopy(arg_list, lexer->token_str);
1106
1107 /* scan potential type casting */
1108 advanceToken(lexer, true, false);
1109 if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
1110 {
1111 vStringCat(arg_list, lexer->token_str);
1112 advanceToken(lexer, true, false);
1113 }
1114 /* scan potential type union with 'where' */
1115 if (lexer->cur_token == TOKEN_TYPE_WHERE)
1116 {
1117 vStringPut(arg_list, ' ');
1118 vStringCat(arg_list, lexer->token_str);
1119 advanceToken(lexer, true, false);
1120 }
1121
1122 addTag(name, NULL, vStringValue(arg_list), K_FUNCTION, line, pos, local_scope, local_parent_kind);
1123 addToScope(scope, name);
1124 parseExpr(lexer, true, K_FUNCTION, scope);
1125 }
1126 else if (lexer->cur_token == TOKEN_CLOSE_BLOCK)
1127 {
1128 /* Function without method */
1129 addTag(name, NULL, NULL, K_FUNCTION, line, pos, local_scope, local_parent_kind);
1130 /* Go to the closing 'end' keyword */
1131 skipUntilEnd(lexer);
1132 }
1133
1134 vStringDelete(name);
1135 vStringDelete(arg_list);
1136 vStringDelete(local_scope);
1137 }
1138
1139 /* Macro format:
1140 * "macro" <ident>()
1141 */
parseMacro(lexerState * lexer,vString * scope,int parent_kind)1142 static void parseMacro (lexerState *lexer, vString *scope, int parent_kind)
1143 {
1144 vString *name;
1145 unsigned long line;
1146 MIOPos pos;
1147
1148 advanceToken(lexer, true, false);
1149 if (lexer->cur_token != TOKEN_IDENTIFIER)
1150 {
1151 return;
1152 }
1153
1154 name = vStringNewCopy(lexer->token_str);
1155 line = lexer->line;
1156 pos = lexer->pos;
1157
1158 advanceToken(lexer, true, false);
1159 if (lexer->cur_token == TOKEN_PAREN_BLOCK)
1160 {
1161 addTag(name, NULL, vStringValue(lexer->token_str), K_MACRO, line, pos, scope, parent_kind);
1162 }
1163
1164 skipUntilEnd(lexer);
1165 vStringDelete(name);
1166 }
1167
1168 /* Const format:
1169 * "const" <ident>
1170 */
parseConst(lexerState * lexer,vString * scope,int parent_kind)1171 static void parseConst (lexerState *lexer, vString *scope, int parent_kind)
1172 {
1173 vString *name;
1174
1175 advanceToken(lexer, true, false);
1176 if (lexer->cur_token != TOKEN_IDENTIFIER)
1177 {
1178 return;
1179 }
1180
1181 name = vStringNewCopy(lexer->token_str);
1182
1183 advanceToken(lexer, true, false);
1184 if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
1185 {
1186 addTag(name, "const", vStringValue(lexer->token_str), K_CONSTANT, lexer->line, lexer->pos, scope, parent_kind);
1187 advanceToken(lexer, true, false);
1188 }
1189 else
1190 {
1191 addTag(name, "const", NULL, K_CONSTANT, lexer->line, lexer->pos, scope, parent_kind);
1192 }
1193
1194 vStringDelete(name);
1195 }
1196
1197 /* Type format:
1198 * [ "abstract" | "primitive" ] "type" <ident>
1199 */
parseType(lexerState * lexer,vString * scope,int parent_kind)1200 static void parseType (lexerState *lexer, vString *scope, int parent_kind)
1201 {
1202 advanceToken(lexer, true, false);
1203 if (lexer->cur_token != TOKEN_IDENTIFIER)
1204 {
1205 return;
1206 }
1207
1208 addTag(lexer->token_str, NULL, NULL, K_TYPE, lexer->line, lexer->pos, scope, parent_kind);
1209
1210 skipUntilEnd(lexer);
1211 }
1212
1213 /* Module format:
1214 * [ "baremodule" | "module" ] <ident>
1215 */
parseModule(lexerState * lexer,vString * scope,int parent_kind)1216 static void parseModule (lexerState *lexer, vString *scope, int parent_kind)
1217 {
1218 advanceToken(lexer, true, false);
1219 if (lexer->cur_token != TOKEN_IDENTIFIER)
1220 {
1221 return;
1222 }
1223
1224 addTag(lexer->token_str, NULL, NULL, K_MODULE, lexer->line, lexer->pos, scope, parent_kind);
1225 addToScope(scope, lexer->token_str);
1226 advanceToken(lexer, true, false);
1227 parseExpr(lexer, true, K_MODULE, scope);
1228 }
1229
1230 /*
1231 * Parse comma separated entity in import/using expressions. An entity could be
1232 * in the form of "Module" or "Module.symbol". The lexer should be at the end
1233 * of "Module", and this function will take it to the end of the entity
1234 * (whitespaces also skipped).
1235 */
parseImportEntity(lexerState * lexer,vString * scope,int token_type,int parent_kind)1236 static void parseImportEntity (lexerState *lexer, vString *scope, int token_type, int parent_kind)
1237 {
1238 if (lexer->cur_c == '.')
1239 {
1240 if (token_type == TOKEN_IMPORT)
1241 {
1242 vString *module_name = vStringNewCopy(lexer->token_str);
1243 addReferenceTag(module_name, K_MODULE, JULIA_MODULE_NAMESPACE, lexer->line, lexer->pos, scope, parent_kind);
1244 advanceChar(lexer);
1245 advanceToken(lexer, true, false);
1246 addReferenceTag(lexer->token_str, K_UNKNOWN, JULIA_UNKNOWN_IMPORTED, lexer->line, lexer->pos, module_name, K_MODULE);
1247 vStringDelete(module_name);
1248 }
1249 else /* if (token_type == TOKEN_USING) */
1250 {
1251 /* using Module.symbol is invalid, so we advance the lexer but don't tag it. */
1252 advanceChar(lexer);
1253 advanceToken(lexer, true, false);
1254 }
1255 }
1256 else
1257 {
1258 if (token_type == TOKEN_IMPORT)
1259 {
1260 addReferenceTag(lexer->token_str, K_MODULE, JULIA_MODULE_IMPORTED, lexer->line, lexer->pos, scope, parent_kind);
1261 }
1262 else /* if (token_type == TOKEN_USING) */
1263 {
1264 addReferenceTag(lexer->token_str, K_MODULE, JULIA_MODULE_USED, lexer->line, lexer->pos, scope, parent_kind);
1265 }
1266 }
1267 }
1268
1269 /* Parse import/using expressions with a colon, like: */
1270 /* import Module: symbol1, symbol2 */
1271 /* using Module: symbol1, symbol2 */
1272 /* The lexer should be at the end of "Module", and this function will take it
1273 * to the end of the token after this expression (whitespaces also skipped). */
parseColonImportExpr(lexerState * lexer,vString * scope,int token_type,int parent_kind)1274 static void parseColonImportExpr (lexerState *lexer, vString *scope, int token_type, int parent_kind)
1275 {
1276 int symbol_role;
1277 if (token_type == TOKEN_IMPORT)
1278 {
1279 symbol_role = JULIA_UNKNOWN_IMPORTED;
1280 }
1281 else /* if (token_type == TOKEN_USING) */
1282 {
1283 symbol_role = JULIA_UNKNOWN_USED;
1284 }
1285 vString *name = vStringNewCopy(lexer->token_str);
1286 addReferenceTag(name, K_MODULE, JULIA_MODULE_NAMESPACE, lexer->line, lexer->pos, scope, parent_kind);
1287 advanceChar(lexer);
1288 advanceToken(lexer, true, false);
1289 if (lexer->cur_token == TOKEN_NEWLINE)
1290 {
1291 advanceToken(lexer, true, false);
1292 }
1293 while (lexer->cur_token == TOKEN_IDENTIFIER || lexer->cur_token == TOKEN_MACROCALL)
1294 {
1295 addReferenceTag(lexer->token_str, K_UNKNOWN, symbol_role, lexer->line, lexer->pos, name, K_MODULE);
1296 if (lexer->cur_c == ',')
1297 {
1298 advanceChar(lexer);
1299 advanceToken(lexer, true, false);
1300 if (lexer->cur_token == TOKEN_NEWLINE)
1301 {
1302 advanceToken(lexer, true, false);
1303 }
1304 }
1305 else
1306 {
1307 advanceToken(lexer, true, false);
1308 }
1309 }
1310 vStringDelete(name);
1311 }
1312
1313 /* Import format:
1314 * [ "import" | "using" ] <ident> [: <name>]
1315 */
parseImport(lexerState * lexer,vString * scope,int token_type,int parent_kind)1316 static void parseImport (lexerState *lexer, vString *scope, int token_type, int parent_kind)
1317 {
1318 /* capture the imported name */
1319 advanceToken(lexer, true, false);
1320 /* import Mod1: symbol1, symbol2 */
1321 /* using Mod1: symbol1, symbol2 */
1322 if (lexer->cur_c == ':')
1323 {
1324 parseColonImportExpr(lexer, scope, token_type, parent_kind);
1325 }
1326 /* All other situations, like import/using Mod1, Mod2.symbol1, Mod3... */
1327 else
1328 {
1329 while (lexer->cur_token == TOKEN_IDENTIFIER || lexer->cur_token == TOKEN_MACROCALL)
1330 {
1331 parseImportEntity(lexer, scope, token_type, parent_kind);
1332 if (lexer->cur_c == ',')
1333 {
1334 advanceChar(lexer);
1335 advanceToken(lexer, true, false);
1336 if (lexer->cur_token == TOKEN_NEWLINE)
1337 {
1338 advanceToken(lexer, true, false);
1339 }
1340 }
1341 else
1342 {
1343 advanceToken(lexer, true, false);
1344 }
1345 }
1346 }
1347 }
1348
1349 /* Structs format:
1350 * "struct" <ident>[{<param>}] [<:<type>]; <fields> <inner constructor> end
1351 * */
parseStruct(lexerState * lexer,vString * scope,int parent_kind)1352 static void parseStruct (lexerState *lexer, vString *scope, int parent_kind)
1353 {
1354 vString *name;
1355 vString *field;
1356 size_t old_scope_len;
1357 unsigned long line;
1358 MIOPos pos;
1359
1360 advanceToken(lexer, true, false);
1361 if (lexer->cur_token != TOKEN_IDENTIFIER)
1362 {
1363 return;
1364 }
1365
1366 name = vStringNewCopy(lexer->token_str);
1367 field = vStringNew();
1368 line = lexer->line;
1369 pos = lexer->pos;
1370
1371 /* scan parametrization */
1372 advanceToken(lexer, true, false);
1373 if (lexer->cur_token == TOKEN_CURLY_BLOCK)
1374 {
1375 addTag(name, NULL, vStringValue(lexer->token_str), K_STRUCT, line, pos, scope, parent_kind);
1376 advanceToken(lexer, true, false);
1377 }
1378 else
1379 {
1380 addTag(name, NULL, NULL, K_STRUCT, line, pos, scope, parent_kind);
1381 }
1382 addToScope(scope, name);
1383
1384 /* skip inheritance */
1385 if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
1386 {
1387 advanceToken(lexer, true, false);
1388 }
1389
1390 /* keep the struct scope in memory to reset it after parsing constructors */
1391 old_scope_len = vStringLength(scope);
1392 /* Parse fields and inner constructors */
1393 while (lexer->cur_token != TOKEN_EOF && lexer->cur_token != TOKEN_CLOSE_BLOCK)
1394 {
1395 if (lexer->cur_token == TOKEN_IDENTIFIER && lexer->first_token)
1396 {
1397 if (strcmp(vStringValue(lexer->token_str), vStringValue(name)) == 0)
1398 {
1399 /* inner constructor */
1400 parseShortFunction(lexer, scope, K_STRUCT);
1401 continue;
1402 }
1403
1404 vStringCopy(field, lexer->token_str);
1405
1406 /* parse type annotation */
1407 advanceToken(lexer, true, false);
1408 if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
1409 {
1410 addTag(field, NULL, vStringValue(lexer->token_str), K_FIELD, lexer->line, lexer->pos, scope, K_STRUCT);
1411 advanceToken(lexer, true, false);
1412 }
1413 else
1414 {
1415 addTag(field, NULL, NULL, K_FIELD, lexer->line, lexer->pos, scope, K_STRUCT);
1416 }
1417 }
1418 else if (lexer->cur_token == TOKEN_FUNCTION)
1419 {
1420 /* inner constructor */
1421 parseFunction(lexer, scope, K_STRUCT);
1422 }
1423 else
1424 {
1425 /* Get next token */
1426 advanceToken(lexer, true, false);
1427 }
1428 resetScope(scope, old_scope_len);
1429 }
1430
1431 vStringDelete(name);
1432 vStringDelete(field);
1433 }
1434
1435
parseExpr(lexerState * lexer,bool delim,int kind,vString * scope)1436 static void parseExpr (lexerState *lexer, bool delim, int kind, vString *scope)
1437 {
1438 int level = 1;
1439 size_t old_scope_len;
1440 vString *local_scope = NULL;
1441
1442 while (lexer->cur_token != TOKEN_EOF)
1443 {
1444 old_scope_len = vStringLength(scope);
1445 /* Advance token and update if this is a new line */
1446 while (lexer->cur_token == TOKEN_NEWLINE ||
1447 lexer->cur_token == TOKEN_SEMICOLON ||
1448 lexer->cur_token == TOKEN_NONE )
1449 {
1450 advanceToken(lexer, true, false);
1451 }
1452
1453 /* Make sure every case advances the token
1454 * otherwise we can be stuck in infinite loop */
1455 switch (lexer->cur_token)
1456 {
1457 case TOKEN_CONST:
1458 parseConst(lexer, scope, kind);
1459 break;
1460 case TOKEN_FUNCTION:
1461 parseFunction(lexer, scope, kind);
1462 break;
1463 case TOKEN_MACRO:
1464 parseMacro(lexer, scope, kind);
1465 break;
1466 case TOKEN_MODULE:
1467 parseModule(lexer, scope, kind);
1468 break;
1469 case TOKEN_STRUCT:
1470 parseStruct(lexer, scope, kind);
1471 break;
1472 case TOKEN_TYPE:
1473 parseType(lexer, scope, kind);
1474 break;
1475 case TOKEN_IMPORT:
1476 parseImport(lexer, scope, TOKEN_IMPORT, kind);
1477 break;
1478 case TOKEN_USING:
1479 parseImport(lexer, scope, TOKEN_USING, kind);
1480 case TOKEN_IDENTIFIER:
1481 if (lexer->first_token && lexer->cur_c == '.')
1482 {
1483 if (local_scope == NULL)
1484 {
1485 local_scope = vStringNew();
1486 }
1487 vStringCopy(local_scope, lexer->token_str);
1488 advanceChar(lexer);
1489 // next token, but keep the first_token value
1490 advanceToken(lexer, true, true);
1491 skipWhitespace(lexer, false);
1492 if (lexer->cur_c == '(')
1493 {
1494 parseShortFunction(lexer, local_scope, K_MODULE);
1495 }
1496 }
1497 else
1498 {
1499 skipWhitespace(lexer, false);
1500 if (lexer->first_token && (lexer->cur_c == '(' || lexer->cur_c == '{'))
1501 {
1502 parseShortFunction(lexer, scope, kind);
1503 }
1504 else
1505 {
1506 advanceToken(lexer, true, false);
1507 }
1508 }
1509 break;
1510 case TOKEN_OPEN_BLOCK:
1511 level++;
1512 advanceToken(lexer, true, false);
1513 break;
1514 case TOKEN_CLOSE_BLOCK:
1515 level--;
1516 advanceToken(lexer, true, false);
1517 break;
1518 default:
1519 advanceToken(lexer, true, false);
1520 break;
1521 }
1522 resetScope(scope, old_scope_len);
1523 if (delim && level <= 0)
1524 {
1525 break;
1526 }
1527 }
1528 vStringDelete(local_scope);
1529 }
1530
findJuliaTags(void)1531 static void findJuliaTags (void)
1532 {
1533 lexerState lexer;
1534 vString* scope = vStringNew();
1535 initLexer(&lexer);
1536
1537 parseExpr(&lexer, false, K_NONE, scope);
1538 vStringDelete(scope);
1539
1540 deInitLexer(&lexer);
1541 }
1542
JuliaParser(void)1543 extern parserDefinition* JuliaParser (void)
1544 {
1545 static const char *const extensions [] = { "jl", NULL };
1546 parserDefinition* def = parserNew ("Julia");
1547 def->kindTable = JuliaKinds;
1548 def->kindCount = ARRAY_SIZE (JuliaKinds);
1549 def->extensions = extensions;
1550 def->parser = findJuliaTags;
1551 def->keywordTable = JuliaKeywordTable;
1552 def->keywordCount = ARRAY_SIZE (JuliaKeywordTable);
1553 return def;
1554 }
1555