xref: /Universal-ctags/parsers/julia.c (revision b9feb3309d67ccd5c1f32e1a8cd0ae9a88d4291e)
181ac50a2Sgetzze /*
281ac50a2Sgetzze *   Copyright (c) 2020-2021, getzze <getzze@gmail.com>
381ac50a2Sgetzze *
481ac50a2Sgetzze *   This source code is released for free distribution under the terms of the
581ac50a2Sgetzze *   GNU General Public License version 2 or (at your option) any later version.
681ac50a2Sgetzze *
781ac50a2Sgetzze *   This module contains functions for generating tags for Julia files.
881ac50a2Sgetzze *
981ac50a2Sgetzze *   Documented 'kinds':
1081ac50a2Sgetzze *       https://docs.julialang.org/en/v1/manual/documentation/#Syntax-Guide
1181ac50a2Sgetzze *   Language parser in Scheme:
1281ac50a2Sgetzze *       https://github.com/JuliaLang/julia/blob/master/src/julia-parser.scm
1381ac50a2Sgetzze */
1481ac50a2Sgetzze 
1581ac50a2Sgetzze /*
1681ac50a2Sgetzze *   INCLUDE FILES
1781ac50a2Sgetzze */
1881ac50a2Sgetzze #include "general.h"    /* must always come first */
1981ac50a2Sgetzze 
2081ac50a2Sgetzze #include <string.h>
2181ac50a2Sgetzze 
2281ac50a2Sgetzze #include "keyword.h"
2381ac50a2Sgetzze #include "parse.h"
2481ac50a2Sgetzze #include "entry.h"
2581ac50a2Sgetzze #include "options.h"
2681ac50a2Sgetzze #include "read.h"
2781ac50a2Sgetzze #include "routines.h"
2881ac50a2Sgetzze #include "vstring.h"
2981ac50a2Sgetzze #include "xtag.h"
3081ac50a2Sgetzze 
3181ac50a2Sgetzze /*
3281ac50a2Sgetzze *   MACROS
3381ac50a2Sgetzze */
3481ac50a2Sgetzze #define MAX_STRING_LENGTH 256
3581ac50a2Sgetzze 
3681ac50a2Sgetzze /*
3781ac50a2Sgetzze *   DATA DEFINITIONS
3881ac50a2Sgetzze */
3981ac50a2Sgetzze typedef enum {
4081ac50a2Sgetzze     K_CONSTANT,
4181ac50a2Sgetzze     K_FUNCTION,
4281ac50a2Sgetzze     K_FIELD,
4381ac50a2Sgetzze     K_MACRO,
4481ac50a2Sgetzze     K_MODULE,
4581ac50a2Sgetzze     K_STRUCT,
4681ac50a2Sgetzze     K_TYPE,
473cc79e5cSAmaiKinono     K_UNKNOWN,
4881ac50a2Sgetzze     K_NONE
4981ac50a2Sgetzze } JuliaKind;
5081ac50a2Sgetzze 
513cc79e5cSAmaiKinono typedef enum {
523cc79e5cSAmaiKinono     JULIA_MODULE_IMPORTED,
533cc79e5cSAmaiKinono     JULIA_MODULE_USED,
543cc79e5cSAmaiKinono     JULIA_MODULE_NAMESPACE,
553cc79e5cSAmaiKinono } juliaModuleRole;
563cc79e5cSAmaiKinono 
573cc79e5cSAmaiKinono typedef enum {
583cc79e5cSAmaiKinono     JULIA_UNKNOWN_IMPORTED,
593cc79e5cSAmaiKinono     JULIA_UNKNOWN_USED,
603cc79e5cSAmaiKinono } juliaUnknownRole;
613cc79e5cSAmaiKinono 
623cc79e5cSAmaiKinono /*
633cc79e5cSAmaiKinono *  using X               X = (kind:module, role:used)
643cc79e5cSAmaiKinono *
653cc79e5cSAmaiKinono *  using X: a, b         X = (kind:module, role:namespace)
663cc79e5cSAmaiKinono *                     a, b = (kind:unknown, role:used, scope:module:X)
673cc79e5cSAmaiKinono *
683cc79e5cSAmaiKinono *  import X              X = (kind:module, role:imported)
693cc79e5cSAmaiKinono *
703cc79e5cSAmaiKinono *  import X.a, Y.b    X, Y = (kind:module, role:namespace)
713cc79e5cSAmaiKinono *                     a, b = (kind:unknown, role:imported, scope:module:X)
723cc79e5cSAmaiKinono *
733cc79e5cSAmaiKinono *  import X: a, b     Same as the above one
743cc79e5cSAmaiKinono */
753cc79e5cSAmaiKinono static roleDefinition JuliaModuleRoles [] = {
763cc79e5cSAmaiKinono     { true, "imported", "loaded by \"import\"" },
773cc79e5cSAmaiKinono     { true, "used", "loaded by \"using\"" },
783cc79e5cSAmaiKinono     { true, "namespace", "only some symbols in it are imported" },
793cc79e5cSAmaiKinono };
803cc79e5cSAmaiKinono 
813cc79e5cSAmaiKinono static roleDefinition JuliaUnknownRoles [] = {
823cc79e5cSAmaiKinono     { true, "imported", "loaded by \"import\"" },
833cc79e5cSAmaiKinono     { true, "used", "loaded by \"using\""},
843cc79e5cSAmaiKinono };
853cc79e5cSAmaiKinono 
8681ac50a2Sgetzze static kindDefinition JuliaKinds [] = {
8781ac50a2Sgetzze     { true, 'c', "constant", "Constants"    },
8881ac50a2Sgetzze     { true, 'f', "function", "Functions"    },
8981ac50a2Sgetzze     { true, 'g', "field",    "Fields"       },
9081ac50a2Sgetzze     { true, 'm', "macro",    "Macros"       },
913cc79e5cSAmaiKinono     { true, 'n', "module",   "Modules",
923cc79e5cSAmaiKinono       ATTACH_ROLES(JuliaModuleRoles) },
9381ac50a2Sgetzze     { true, 's', "struct",   "Structures"   },
9481ac50a2Sgetzze     { true, 't', "type",     "Types"        },
953cc79e5cSAmaiKinono     { true, 'x', "unknown", "name defined in other modules",
963cc79e5cSAmaiKinono       .referenceOnly = true, ATTACH_ROLES(JuliaUnknownRoles) },
9781ac50a2Sgetzze };
9881ac50a2Sgetzze 
9981ac50a2Sgetzze typedef enum {
10081ac50a2Sgetzze     TOKEN_NONE=0,         /* none */
10181ac50a2Sgetzze     TOKEN_WHITESPACE,
10281ac50a2Sgetzze     TOKEN_PAREN_BLOCK,
10381ac50a2Sgetzze     TOKEN_BRACKET_BLOCK,
10481ac50a2Sgetzze     TOKEN_CURLY_BLOCK,
10581ac50a2Sgetzze     TOKEN_OPEN_BLOCK,
10681ac50a2Sgetzze     TOKEN_CLOSE_BLOCK,
10781ac50a2Sgetzze     TOKEN_TYPE_ANNOTATION,
10881ac50a2Sgetzze     TOKEN_TYPE_WHERE,
10981ac50a2Sgetzze     TOKEN_CONST,
11081ac50a2Sgetzze     TOKEN_STRING,         /*  = 10 */
11181ac50a2Sgetzze     TOKEN_COMMAND,
11281ac50a2Sgetzze     TOKEN_MACROCALL,
11381ac50a2Sgetzze     TOKEN_IDENTIFIER,
11481ac50a2Sgetzze     TOKEN_MODULE,
11581ac50a2Sgetzze     TOKEN_MACRO,
11681ac50a2Sgetzze     TOKEN_FUNCTION,
11781ac50a2Sgetzze     TOKEN_STRUCT,
11881ac50a2Sgetzze     TOKEN_ENUM,
11981ac50a2Sgetzze     TOKEN_TYPE,
12081ac50a2Sgetzze     TOKEN_IMPORT,         /*  = 20 */
1213cc79e5cSAmaiKinono     TOKEN_USING,
12281ac50a2Sgetzze     TOKEN_EXPORT,
12381ac50a2Sgetzze     TOKEN_NEWLINE,
12481ac50a2Sgetzze     TOKEN_SEMICOLON,
12581ac50a2Sgetzze     TOKEN_COMPOSER_KWD,   /* KEYWORD only */
12681ac50a2Sgetzze     TOKEN_EOF,
12781ac50a2Sgetzze     TOKEN_COUNT
12881ac50a2Sgetzze } tokenType;
12981ac50a2Sgetzze 
13081ac50a2Sgetzze static const keywordTable JuliaKeywordTable [] = {
13181ac50a2Sgetzze     /* TODO: Sort by keys. */
13281ac50a2Sgetzze     { "mutable",   TOKEN_COMPOSER_KWD },
13381ac50a2Sgetzze     { "primitive", TOKEN_COMPOSER_KWD },
13481ac50a2Sgetzze     { "abstract",  TOKEN_COMPOSER_KWD },
13581ac50a2Sgetzze 
13681ac50a2Sgetzze     { "if",        TOKEN_OPEN_BLOCK   },
13781ac50a2Sgetzze     { "for",       TOKEN_OPEN_BLOCK   },
13881ac50a2Sgetzze     { "while",     TOKEN_OPEN_BLOCK   },
13981ac50a2Sgetzze     { "try",       TOKEN_OPEN_BLOCK   },
14081ac50a2Sgetzze     { "do",        TOKEN_OPEN_BLOCK   },
14181ac50a2Sgetzze     { "begin",     TOKEN_OPEN_BLOCK   },
14281ac50a2Sgetzze     { "let",       TOKEN_OPEN_BLOCK   },
14381ac50a2Sgetzze     { "quote",     TOKEN_OPEN_BLOCK   },
14481ac50a2Sgetzze 
14581ac50a2Sgetzze     { "module",    TOKEN_MODULE       },
14681ac50a2Sgetzze     { "baremodule",TOKEN_MODULE       },
14781ac50a2Sgetzze 
1483cc79e5cSAmaiKinono     { "using",     TOKEN_USING        },
14981ac50a2Sgetzze     { "import",    TOKEN_IMPORT       },
15081ac50a2Sgetzze 
15181ac50a2Sgetzze     { "export",    TOKEN_EXPORT       },
15281ac50a2Sgetzze     { "const",     TOKEN_CONST        },
15381ac50a2Sgetzze     { "macro",     TOKEN_MACRO        },
15481ac50a2Sgetzze     { "function",  TOKEN_FUNCTION     },
15581ac50a2Sgetzze     { "struct",    TOKEN_STRUCT       },
15681ac50a2Sgetzze     { "type",      TOKEN_TYPE         },
15781ac50a2Sgetzze     { "where",     TOKEN_TYPE_WHERE   },
15881ac50a2Sgetzze     { "end",       TOKEN_CLOSE_BLOCK  },
15981ac50a2Sgetzze };
16081ac50a2Sgetzze 
16181ac50a2Sgetzze typedef struct {
16281ac50a2Sgetzze     /* Characters */
16381ac50a2Sgetzze     int prev_c;
16481ac50a2Sgetzze     int cur_c;
16581ac50a2Sgetzze     int next_c;
16681ac50a2Sgetzze 
16781ac50a2Sgetzze     /* Tokens */
16881ac50a2Sgetzze     bool first_token;
16981ac50a2Sgetzze     int cur_token;
17081ac50a2Sgetzze     vString* token_str;
17181ac50a2Sgetzze     unsigned long line;
17281ac50a2Sgetzze     MIOPos pos;
17381ac50a2Sgetzze } lexerState;
17481ac50a2Sgetzze 
17581ac50a2Sgetzze /*
17681ac50a2Sgetzze *   FUNCTION PROTOTYPES
17781ac50a2Sgetzze */
17881ac50a2Sgetzze 
17981ac50a2Sgetzze static void parseExpr (lexerState *lexer, bool delim, int kind, vString *scope);
18081ac50a2Sgetzze 
18181ac50a2Sgetzze static void scanParenBlock (lexerState *lexer);
18281ac50a2Sgetzze 
18381ac50a2Sgetzze /*
18481ac50a2Sgetzze *   FUNCTION DEFINITIONS
18581ac50a2Sgetzze */
18681ac50a2Sgetzze 
endswith(const char * what,const char * withwhat)18781ac50a2Sgetzze static int endswith(const char* what, const char* withwhat)
18881ac50a2Sgetzze {
18981ac50a2Sgetzze     int l1 = strlen(what);
19081ac50a2Sgetzze     int l2 = strlen(withwhat);
19181ac50a2Sgetzze     if (l2 > l1)
19281ac50a2Sgetzze     {
19381ac50a2Sgetzze         return 0;
19481ac50a2Sgetzze     }
19581ac50a2Sgetzze 
19681ac50a2Sgetzze     return strcmp(withwhat, what + (l1 - l2)) == 0;
19781ac50a2Sgetzze }
19881ac50a2Sgetzze 
19981ac50a2Sgetzze /* Resets the scope string to the old length */
resetScope(vString * scope,size_t old_len)20081ac50a2Sgetzze static void resetScope (vString *scope, size_t old_len)
20181ac50a2Sgetzze {
20281ac50a2Sgetzze     vStringTruncate (scope, old_len);
20381ac50a2Sgetzze }
20481ac50a2Sgetzze 
20581ac50a2Sgetzze /* Adds a name to the end of the scope string */
addToScope(vString * scope,vString * name)20681ac50a2Sgetzze static void addToScope (vString *scope, vString *name)
20781ac50a2Sgetzze {
20881ac50a2Sgetzze     if (vStringLength(scope) > 0)
20981ac50a2Sgetzze     {
21081ac50a2Sgetzze         vStringPut(scope, '.');
21181ac50a2Sgetzze     }
21281ac50a2Sgetzze     vStringCat(scope, name);
21381ac50a2Sgetzze }
21481ac50a2Sgetzze 
21581ac50a2Sgetzze /* Reads a character from the file */
advanceChar(lexerState * lexer)21681ac50a2Sgetzze static void advanceChar (lexerState *lexer)
21781ac50a2Sgetzze {
21881ac50a2Sgetzze     lexer->prev_c = lexer->cur_c;
21981ac50a2Sgetzze     lexer->cur_c  = lexer->next_c;
22081ac50a2Sgetzze     lexer->next_c = getcFromInputFile();
22181ac50a2Sgetzze }
22281ac50a2Sgetzze 
22381ac50a2Sgetzze /* Reads N characters from the file */
advanceNChar(lexerState * lexer,int n)22481ac50a2Sgetzze static void advanceNChar (lexerState *lexer, int n)
22581ac50a2Sgetzze {
22681ac50a2Sgetzze     while (n--)
22781ac50a2Sgetzze     {
22881ac50a2Sgetzze         advanceChar(lexer);
22981ac50a2Sgetzze     }
23081ac50a2Sgetzze }
23181ac50a2Sgetzze 
23281ac50a2Sgetzze /* Store the current character in lexerState::token_str if there is space
23381ac50a2Sgetzze  * (set by MAX_STRING_LENGTH), and then read the next character from the file */
advanceAndStoreChar(lexerState * lexer)23481ac50a2Sgetzze static void advanceAndStoreChar (lexerState *lexer)
23581ac50a2Sgetzze {
23681ac50a2Sgetzze     if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH)
23781ac50a2Sgetzze     {
23881ac50a2Sgetzze         vStringPut(lexer->token_str, (char) lexer->cur_c);
23981ac50a2Sgetzze     }
24081ac50a2Sgetzze     advanceChar(lexer);
24181ac50a2Sgetzze }
24281ac50a2Sgetzze 
isWhitespace(int c,bool newline)24381ac50a2Sgetzze static bool isWhitespace (int c, bool newline)
24481ac50a2Sgetzze {
24581ac50a2Sgetzze     if (newline)
24681ac50a2Sgetzze     {
24781ac50a2Sgetzze         return c == ' ' || c == '\t' || c == '\r' || c == '\n';
24881ac50a2Sgetzze     }
24981ac50a2Sgetzze     return c == ' ' || c == '\t';
25081ac50a2Sgetzze }
25181ac50a2Sgetzze 
isAscii(int c)25281ac50a2Sgetzze static bool isAscii (int c)
25381ac50a2Sgetzze {
25481ac50a2Sgetzze     return (c >= 0) && (c < 0x80);
25581ac50a2Sgetzze }
25681ac50a2Sgetzze 
isOperator(int c)25781ac50a2Sgetzze static bool isOperator (int c)
25881ac50a2Sgetzze {
25981ac50a2Sgetzze     if (c == '%' || c == '^' || c == '&' || c == '|' ||
26081ac50a2Sgetzze         c == '*' || c == '-' || c == '+' || c == '~' ||
26181ac50a2Sgetzze         c == '<' || c == '>' || c == ',' || c == '/' ||
26281ac50a2Sgetzze         c == '?' || c == '=' || c == ':' )
26381ac50a2Sgetzze     {
26481ac50a2Sgetzze         return true;
26581ac50a2Sgetzze     }
26681ac50a2Sgetzze     return false;
26781ac50a2Sgetzze }
26881ac50a2Sgetzze 
26981ac50a2Sgetzze /* This does not distinguish Unicode letters from operators... */
isIdentifierFirstCharacter(int c)27081ac50a2Sgetzze static bool isIdentifierFirstCharacter (int c)
27181ac50a2Sgetzze {
27281ac50a2Sgetzze     return (bool) ((isAscii(c) && (isalpha (c) || c == '_')) || c >= 0xC0);
27381ac50a2Sgetzze }
27481ac50a2Sgetzze 
275696902a0SAmaiKinono /* This does not distinguish Unicode letters from operators... */
isIdentifierCharacter(int c)27681ac50a2Sgetzze static bool isIdentifierCharacter (int c)
27781ac50a2Sgetzze {
278696902a0SAmaiKinono     return (bool) (isIdentifierFirstCharacter(c) || (isAscii(c) && (isdigit(c) || c == '!')) || c >= 0x80);
27981ac50a2Sgetzze }
28081ac50a2Sgetzze 
skipWhitespace(lexerState * lexer,bool newline)28181ac50a2Sgetzze static void skipWhitespace (lexerState *lexer, bool newline)
28281ac50a2Sgetzze {
28381ac50a2Sgetzze     while (isWhitespace(lexer->cur_c, newline))
28481ac50a2Sgetzze     {
28581ac50a2Sgetzze         advanceChar(lexer);
28681ac50a2Sgetzze     }
28781ac50a2Sgetzze }
28881ac50a2Sgetzze 
28981ac50a2Sgetzze /* The transpose operator is only allowed after an identifier, a number, an expression inside parenthesis or an index */
isTranspose(int c)29081ac50a2Sgetzze static bool isTranspose (int c)
29181ac50a2Sgetzze {
29281ac50a2Sgetzze     return (isIdentifierCharacter(c) || c == ')' || c == ']');
29381ac50a2Sgetzze }
29481ac50a2Sgetzze 
29581ac50a2Sgetzze 
29681ac50a2Sgetzze /*
29781ac50a2Sgetzze  *  Lexer functions
29881ac50a2Sgetzze  * */
29981ac50a2Sgetzze 
30081ac50a2Sgetzze /* Check that the current character sequence is a type declaration or inheritance */
isTypeDecl(lexerState * lexer)30181ac50a2Sgetzze static bool isTypeDecl (lexerState *lexer)
30281ac50a2Sgetzze {
30381ac50a2Sgetzze     if ((lexer->prev_c != '.' && lexer->cur_c == '<' && lexer->next_c == ':') ||
30481ac50a2Sgetzze         (lexer->prev_c != '.' && lexer->cur_c == '>' && lexer->next_c == ':') ||
30581ac50a2Sgetzze         (lexer->cur_c == ':' && lexer->next_c == ':') )
30681ac50a2Sgetzze     {
30781ac50a2Sgetzze         return true;
30881ac50a2Sgetzze     }
30981ac50a2Sgetzze     return false;
31081ac50a2Sgetzze }
31181ac50a2Sgetzze 
31281ac50a2Sgetzze /* Check if the current char is a new line */
isNewLine(lexerState * lexer)31381ac50a2Sgetzze static bool isNewLine (lexerState *lexer)
31481ac50a2Sgetzze {
31581ac50a2Sgetzze     return (lexer->cur_c == '\n')? true: false;
31681ac50a2Sgetzze }
31781ac50a2Sgetzze 
31881ac50a2Sgetzze /* Check if the current char is a new line.
31981ac50a2Sgetzze  * If it is, skip the newline and return true */
skipNewLine(lexerState * lexer)32081ac50a2Sgetzze static bool skipNewLine (lexerState *lexer)
32181ac50a2Sgetzze {
32281ac50a2Sgetzze     if (isNewLine(lexer))
32381ac50a2Sgetzze     {
32481ac50a2Sgetzze         advanceChar(lexer);
32581ac50a2Sgetzze         return true;
32681ac50a2Sgetzze     }
32781ac50a2Sgetzze     return false;
32881ac50a2Sgetzze }
32981ac50a2Sgetzze 
33081ac50a2Sgetzze /* Skip a single comment or multiline comment
33181ac50a2Sgetzze  * A single line comment starts with #
33281ac50a2Sgetzze  * A multi-line comment is encapsulated in #=...=# and they are nesting
33381ac50a2Sgetzze  * */
skipComment(lexerState * lexer)33481ac50a2Sgetzze static void skipComment (lexerState *lexer)
33581ac50a2Sgetzze {
33681ac50a2Sgetzze     /* # */
33781ac50a2Sgetzze     if (lexer->next_c != '=')
33881ac50a2Sgetzze     {
33981ac50a2Sgetzze         advanceNChar(lexer, 1);
34081ac50a2Sgetzze         while (lexer->cur_c != EOF && lexer->cur_c != '\n')
34181ac50a2Sgetzze         {
34281ac50a2Sgetzze             advanceChar(lexer);
34381ac50a2Sgetzze         }
34481ac50a2Sgetzze     }
34581ac50a2Sgetzze     /* block comment */
34681ac50a2Sgetzze     else /* if (lexer->next_c == '=') */
34781ac50a2Sgetzze     {
34881ac50a2Sgetzze         int level = 1;
34981ac50a2Sgetzze         advanceNChar(lexer, 2);
35081ac50a2Sgetzze         while (lexer->cur_c != EOF && level > 0)
35181ac50a2Sgetzze         {
35281ac50a2Sgetzze             if (lexer->cur_c == '=' && lexer->next_c == '#')
35381ac50a2Sgetzze             {
35481ac50a2Sgetzze                 level--;
35581ac50a2Sgetzze                 advanceNChar(lexer, 2);
35681ac50a2Sgetzze             }
35781ac50a2Sgetzze             else if (lexer->cur_c == '#' && lexer->next_c == '=')
35881ac50a2Sgetzze             {
35981ac50a2Sgetzze                 level++;
36081ac50a2Sgetzze                 advanceNChar(lexer, 2);
36181ac50a2Sgetzze             }
36281ac50a2Sgetzze             else
36381ac50a2Sgetzze             {
36481ac50a2Sgetzze                 advanceChar(lexer);
36581ac50a2Sgetzze             }
36681ac50a2Sgetzze         }
36781ac50a2Sgetzze     }
36881ac50a2Sgetzze }
36981ac50a2Sgetzze 
scanIdentifier(lexerState * lexer,bool clear)37081ac50a2Sgetzze static void scanIdentifier (lexerState *lexer, bool clear)
37181ac50a2Sgetzze {
37281ac50a2Sgetzze     if (clear)
37381ac50a2Sgetzze     {
37481ac50a2Sgetzze         vStringClear(lexer->token_str);
37581ac50a2Sgetzze     }
37681ac50a2Sgetzze 
37781ac50a2Sgetzze     do
37881ac50a2Sgetzze     {
37981ac50a2Sgetzze         advanceAndStoreChar(lexer);
38081ac50a2Sgetzze     } while(lexer->cur_c != EOF && isIdentifierCharacter(lexer->cur_c));
38181ac50a2Sgetzze }
38281ac50a2Sgetzze 
38381ac50a2Sgetzze /* Scan a quote-like expression.
38481ac50a2Sgetzze  * Allow for triple-character variand and interpolation with `$`.
38581ac50a2Sgetzze  * These last past the end of the line, so be careful
38681ac50a2Sgetzze  * not to store too much of them (see MAX_STRING_LENGTH). */
scanStringOrCommand(lexerState * lexer,int c)38781ac50a2Sgetzze static void scanStringOrCommand (lexerState *lexer, int c)
38881ac50a2Sgetzze {
38981ac50a2Sgetzze     bool istriple = false;
39081ac50a2Sgetzze 
39181ac50a2Sgetzze     /* Pass the first "quote"-character */
39281ac50a2Sgetzze     advanceAndStoreChar(lexer);
39381ac50a2Sgetzze 
39481ac50a2Sgetzze     /* Check for triple "quote"-character */
39581ac50a2Sgetzze     if (lexer->cur_c == c && lexer->next_c == c)
39681ac50a2Sgetzze     {
39781ac50a2Sgetzze         istriple = true;
39881ac50a2Sgetzze         advanceAndStoreChar(lexer);
39981ac50a2Sgetzze         advanceAndStoreChar(lexer);
40081ac50a2Sgetzze 
40181ac50a2Sgetzze         /* Cancel up to 2 "quote"-characters after opening the triple */
40281ac50a2Sgetzze         if (lexer->cur_c == c)
40381ac50a2Sgetzze         {
40481ac50a2Sgetzze             advanceAndStoreChar(lexer);
40581ac50a2Sgetzze             if (lexer->cur_c == c)
40681ac50a2Sgetzze             {
40781ac50a2Sgetzze                 advanceAndStoreChar(lexer);
40881ac50a2Sgetzze             }
40981ac50a2Sgetzze         }
41081ac50a2Sgetzze     }
41181ac50a2Sgetzze 
41281ac50a2Sgetzze     while (lexer->cur_c != EOF && lexer->cur_c != c)
41381ac50a2Sgetzze     {
41481ac50a2Sgetzze         /* Check for interpolation before checking for end of "quote" */
41581ac50a2Sgetzze         if (lexer->cur_c == '$' && lexer->next_c == '(')
41681ac50a2Sgetzze         {
41781ac50a2Sgetzze             advanceAndStoreChar(lexer);
41881ac50a2Sgetzze             scanParenBlock(lexer);
41981ac50a2Sgetzze             /* continue to avoid advance character again. Correct bug
42081ac50a2Sgetzze              * with "quote"-character just after closing parenthesis */
42181ac50a2Sgetzze             continue;
42281ac50a2Sgetzze         }
42381ac50a2Sgetzze 
42481ac50a2Sgetzze         if (lexer->cur_c == '\\' &&
42581ac50a2Sgetzze             (lexer->next_c == c || lexer->next_c == '\\'))
42681ac50a2Sgetzze         {
42781ac50a2Sgetzze             advanceAndStoreChar(lexer);
42881ac50a2Sgetzze         }
42981ac50a2Sgetzze         advanceAndStoreChar(lexer);
43081ac50a2Sgetzze 
43181ac50a2Sgetzze         /* Cancel up to 2 "quote"-characters if triple string */
43281ac50a2Sgetzze         if (istriple && lexer->cur_c == c)
43381ac50a2Sgetzze         {
43481ac50a2Sgetzze             advanceAndStoreChar(lexer);
43581ac50a2Sgetzze             if (lexer->cur_c == c)
43681ac50a2Sgetzze             {
43781ac50a2Sgetzze                 advanceAndStoreChar(lexer);
43881ac50a2Sgetzze             }
43981ac50a2Sgetzze         }
44081ac50a2Sgetzze     }
44181ac50a2Sgetzze     /* Pass the last "quote"-character */
44281ac50a2Sgetzze     advanceAndStoreChar(lexer);
44381ac50a2Sgetzze }
44481ac50a2Sgetzze 
44581ac50a2Sgetzze 
44681ac50a2Sgetzze /* Scan commands surrounded by backticks,
44781ac50a2Sgetzze  * possibly triple backticks */
scanCommand(lexerState * lexer)44881ac50a2Sgetzze static void scanCommand (lexerState *lexer)
44981ac50a2Sgetzze {
45081ac50a2Sgetzze     scanStringOrCommand(lexer, '`');
45181ac50a2Sgetzze }
45281ac50a2Sgetzze 
45381ac50a2Sgetzze /* Double-quoted strings,
45481ac50a2Sgetzze  * possibly triple doublequotes */
scanString(lexerState * lexer)45581ac50a2Sgetzze static void scanString (lexerState *lexer)
45681ac50a2Sgetzze {
45781ac50a2Sgetzze     scanStringOrCommand(lexer, '"');
45881ac50a2Sgetzze }
45981ac50a2Sgetzze 
46081ac50a2Sgetzze 
46181ac50a2Sgetzze /* This deals with character literals: 'n', '\n', '\uFFFF';
46281ac50a2Sgetzze  * and matrix transpose: A'.
46381ac50a2Sgetzze  * We'll use this approximate regexp for the literals:
46481ac50a2Sgetzze  * \' [^'] \' or \' \\ [^']+ \' or \' \\ \' \'
46581ac50a2Sgetzze  * Either way, we'll treat this token as a string, so it gets preserved */
scanCharacterOrTranspose(lexerState * lexer)46681ac50a2Sgetzze static bool scanCharacterOrTranspose (lexerState *lexer)
46781ac50a2Sgetzze {
46881ac50a2Sgetzze     if (isTranspose(lexer->prev_c))
46981ac50a2Sgetzze     {
47081ac50a2Sgetzze         /* deal with untranspose/transpose sequence */
47181ac50a2Sgetzze         while (lexer->cur_c != EOF && lexer->cur_c == '\'')
47281ac50a2Sgetzze         {
47381ac50a2Sgetzze             advanceAndStoreChar(lexer);
47481ac50a2Sgetzze         }
47581ac50a2Sgetzze         return false;
47681ac50a2Sgetzze     }
47781ac50a2Sgetzze 
47881ac50a2Sgetzze     //vStringClear(lexer->token_str);
47981ac50a2Sgetzze     advanceAndStoreChar(lexer);
48081ac50a2Sgetzze 
48181ac50a2Sgetzze     if (lexer->cur_c == '\\')
48281ac50a2Sgetzze     {
48381ac50a2Sgetzze         advanceAndStoreChar(lexer);
48481ac50a2Sgetzze         /* The \' \\ \' \' (literally '\'') case */
48581ac50a2Sgetzze         if (lexer->cur_c == '\'' && lexer->next_c == '\'')
48681ac50a2Sgetzze         {
48781ac50a2Sgetzze             advanceAndStoreChar(lexer);
48881ac50a2Sgetzze             advanceAndStoreChar(lexer);
48981ac50a2Sgetzze         }
49081ac50a2Sgetzze         /* The \' \\ [^']+ \' case */
49181ac50a2Sgetzze         else
49281ac50a2Sgetzze         {
49381ac50a2Sgetzze             while (lexer->cur_c != EOF && lexer->cur_c != '\'')
49481ac50a2Sgetzze             {
49581ac50a2Sgetzze                 advanceAndStoreChar(lexer);
49681ac50a2Sgetzze             }
49781ac50a2Sgetzze         }
49881ac50a2Sgetzze     }
49981ac50a2Sgetzze     /* The \' [^'] \' and  \' \' \' cases */
50081ac50a2Sgetzze     else if (lexer->next_c == '\'')
50181ac50a2Sgetzze     {
50281ac50a2Sgetzze         advanceAndStoreChar(lexer);
50381ac50a2Sgetzze         advanceAndStoreChar(lexer);
50481ac50a2Sgetzze     }
50581ac50a2Sgetzze     /* Otherwise it is malformed */
50681ac50a2Sgetzze     return true;
50781ac50a2Sgetzze }
50881ac50a2Sgetzze 
50981ac50a2Sgetzze /* Parse a block with opening and closing character */
scanBlock(lexerState * lexer,int open,int close,bool convert_newline)51081ac50a2Sgetzze static void scanBlock (lexerState *lexer, int open, int close, bool convert_newline)
51181ac50a2Sgetzze {
51281ac50a2Sgetzze     /* Assume the current char is `open` */
51381ac50a2Sgetzze     int level = 1;
51481ac50a2Sgetzze 
51581ac50a2Sgetzze     /* Pass the first opening */
51681ac50a2Sgetzze     advanceAndStoreChar(lexer);
51781ac50a2Sgetzze 
51881ac50a2Sgetzze     while (lexer->cur_c != EOF && level > 0)
51981ac50a2Sgetzze     {
52081ac50a2Sgetzze         /* Parse everything */
52181ac50a2Sgetzze         if (lexer->cur_c == ' ' || lexer->cur_c == '\t')
52281ac50a2Sgetzze         {
52381ac50a2Sgetzze             skipWhitespace(lexer, false);
52481ac50a2Sgetzze             vStringPut(lexer->token_str, ' ');
52581ac50a2Sgetzze         }
52681ac50a2Sgetzze         if (lexer->cur_c == '#')
52781ac50a2Sgetzze         {
52881ac50a2Sgetzze             skipComment(lexer);
52981ac50a2Sgetzze         }
53081ac50a2Sgetzze         else if (lexer->cur_c == '\"')
53181ac50a2Sgetzze         {
53281ac50a2Sgetzze             scanString(lexer);
53381ac50a2Sgetzze         }
53481ac50a2Sgetzze         else if (lexer->cur_c == '\'')
53581ac50a2Sgetzze         {
53681ac50a2Sgetzze             scanCharacterOrTranspose(lexer);
53781ac50a2Sgetzze         }
53881ac50a2Sgetzze 
53981ac50a2Sgetzze         /* Parse opening/closing */
54081ac50a2Sgetzze         if (lexer->cur_c == open)
54181ac50a2Sgetzze         {
54281ac50a2Sgetzze             level++;
54381ac50a2Sgetzze         }
54481ac50a2Sgetzze         else if (lexer->cur_c == close)
54581ac50a2Sgetzze         {
54681ac50a2Sgetzze             level--;
54781ac50a2Sgetzze         }
54881ac50a2Sgetzze 
54981ac50a2Sgetzze         if (convert_newline && skipNewLine(lexer))
55081ac50a2Sgetzze         {
55181ac50a2Sgetzze             vStringPut(lexer->token_str, ' ');
55281ac50a2Sgetzze         }
55381ac50a2Sgetzze         else
55481ac50a2Sgetzze         {
55581ac50a2Sgetzze             advanceAndStoreChar(lexer);
55681ac50a2Sgetzze         }
55781ac50a2Sgetzze 
55881ac50a2Sgetzze     }
55981ac50a2Sgetzze     /* Lexer position is just after `close` */
56081ac50a2Sgetzze }
56181ac50a2Sgetzze 
56281ac50a2Sgetzze 
56381ac50a2Sgetzze /* Parse a block inside parenthesis, for example a function argument list */
scanParenBlock(lexerState * lexer)56481ac50a2Sgetzze static void scanParenBlock (lexerState *lexer)
56581ac50a2Sgetzze {
56681ac50a2Sgetzze     scanBlock(lexer, '(', ')', true);
56781ac50a2Sgetzze }
56881ac50a2Sgetzze 
56981ac50a2Sgetzze /* Indexing block with bracket.
57081ac50a2Sgetzze  * Some keywords have a special meaning in this environment:
57181ac50a2Sgetzze  * end, begin, for and if */
scanIndexBlock(lexerState * lexer)57281ac50a2Sgetzze static void scanIndexBlock (lexerState *lexer)
57381ac50a2Sgetzze {
57481ac50a2Sgetzze     scanBlock(lexer, '[', ']', false);
57581ac50a2Sgetzze 
57681ac50a2Sgetzze }
57781ac50a2Sgetzze 
57881ac50a2Sgetzze /* Parse a block inside curly brackets, for type parametrization */
scanCurlyBlock(lexerState * lexer)57981ac50a2Sgetzze static void scanCurlyBlock (lexerState *lexer)
58081ac50a2Sgetzze {
58181ac50a2Sgetzze     scanBlock(lexer, '{', '}', true);
58281ac50a2Sgetzze }
58381ac50a2Sgetzze 
58481ac50a2Sgetzze /* Scan type annotation like
58581ac50a2Sgetzze  * `::Type`, `::Type{T}`
58681ac50a2Sgetzze  */
scanTypeAnnotation(lexerState * lexer)58781ac50a2Sgetzze static void scanTypeAnnotation (lexerState *lexer)
58881ac50a2Sgetzze {
58981ac50a2Sgetzze     /* assume that current char is '<', '>' or ':', followed by ':' */
59081ac50a2Sgetzze     advanceAndStoreChar(lexer);
59181ac50a2Sgetzze     advanceAndStoreChar(lexer);
59281ac50a2Sgetzze 
59381ac50a2Sgetzze     skipWhitespace(lexer, true);
59481ac50a2Sgetzze     scanIdentifier(lexer, false);
59581ac50a2Sgetzze     if (lexer->cur_c == '{')
59681ac50a2Sgetzze     {
59781ac50a2Sgetzze         scanCurlyBlock(lexer);
59881ac50a2Sgetzze     }
59981ac50a2Sgetzze }
60081ac50a2Sgetzze 
60181ac50a2Sgetzze /* Scan type annotation like
60281ac50a2Sgetzze  * `where Int<:T<:Real`, `where S<:Array{Real}` or `where {S, T}`
60381ac50a2Sgetzze  */
scanTypeWhere(lexerState * lexer)60481ac50a2Sgetzze static void scanTypeWhere (lexerState *lexer)
60581ac50a2Sgetzze {
60681ac50a2Sgetzze     /* assume that current token is 'where'
60781ac50a2Sgetzze      * allow line continuation */
60881ac50a2Sgetzze     vStringPut(lexer->token_str, ' ');
60981ac50a2Sgetzze     skipWhitespace(lexer, true);
61081ac50a2Sgetzze 
61181ac50a2Sgetzze     while (lexer->cur_c != EOF)
61281ac50a2Sgetzze     {
61381ac50a2Sgetzze 
61481ac50a2Sgetzze         if (lexer->cur_c == '{')
61581ac50a2Sgetzze         {
61681ac50a2Sgetzze             scanCurlyBlock(lexer);
61781ac50a2Sgetzze         }
61881ac50a2Sgetzze         else if (isIdentifierFirstCharacter(lexer->cur_c))
61981ac50a2Sgetzze         {
62081ac50a2Sgetzze             scanIdentifier(lexer, false);
62181ac50a2Sgetzze             if (endswith(vStringValue(lexer->token_str), "where"))
62281ac50a2Sgetzze             {
62381ac50a2Sgetzze                 /* allow line continuation */
62481ac50a2Sgetzze                 vStringPut(lexer->token_str, ' ');
62581ac50a2Sgetzze                 skipWhitespace(lexer, true);
62681ac50a2Sgetzze             }
62781ac50a2Sgetzze         }
62881ac50a2Sgetzze         else if (isTypeDecl(lexer))
62981ac50a2Sgetzze         {
63081ac50a2Sgetzze             scanTypeAnnotation(lexer);
63181ac50a2Sgetzze             //skipWhitespace(lexer, false);
63281ac50a2Sgetzze         }
63381ac50a2Sgetzze         else if (lexer->cur_c == '#')
63481ac50a2Sgetzze         {
63581ac50a2Sgetzze             skipComment(lexer);
63681ac50a2Sgetzze             /* allow line continuation */
63781ac50a2Sgetzze             if (endswith(vStringValue(lexer->token_str), "where "))
63881ac50a2Sgetzze             {
63981ac50a2Sgetzze                 skipWhitespace(lexer, true);
64081ac50a2Sgetzze             }
64181ac50a2Sgetzze         }
64281ac50a2Sgetzze         else if (isWhitespace(lexer->cur_c, false))
64381ac50a2Sgetzze         {
64481ac50a2Sgetzze             while (isWhitespace(lexer->cur_c, false))
64581ac50a2Sgetzze             {
64681ac50a2Sgetzze                 advanceChar(lexer);
64781ac50a2Sgetzze             }
64881ac50a2Sgetzze             /* Add a space, if it is not a trailing space */
64981ac50a2Sgetzze             if (!(isNewLine(lexer)))
65081ac50a2Sgetzze             {
65181ac50a2Sgetzze                 vStringPut(lexer->token_str, ' ');
65281ac50a2Sgetzze             }
65381ac50a2Sgetzze         }
65481ac50a2Sgetzze         else
65581ac50a2Sgetzze         {
65681ac50a2Sgetzze             break;
65781ac50a2Sgetzze         }
65881ac50a2Sgetzze     }
65981ac50a2Sgetzze }
66081ac50a2Sgetzze 
66181ac50a2Sgetzze 
parseIdentifier(lexerState * lexer)66281ac50a2Sgetzze static int parseIdentifier (lexerState *lexer)
66381ac50a2Sgetzze {
66481ac50a2Sgetzze     langType julia = getInputLanguage ();
66581ac50a2Sgetzze     scanIdentifier(lexer, true);
66681ac50a2Sgetzze 
66781ac50a2Sgetzze     int k = lookupKeyword (vStringValue(lexer->token_str), julia);
66881ac50a2Sgetzze     /* First part of a composed identifier */
66981ac50a2Sgetzze     if (k == TOKEN_COMPOSER_KWD)
67081ac50a2Sgetzze     {
67181ac50a2Sgetzze         skipWhitespace(lexer, false);
67281ac50a2Sgetzze         scanIdentifier(lexer, true);
67381ac50a2Sgetzze         k = lookupKeyword (vStringValue(lexer->token_str), julia);
67481ac50a2Sgetzze     }
67581ac50a2Sgetzze 
67681ac50a2Sgetzze     if ((k == TOKEN_OPEN_BLOCK)
67781ac50a2Sgetzze         || (k == TOKEN_MODULE)
67881ac50a2Sgetzze         || (k == TOKEN_IMPORT)
6793cc79e5cSAmaiKinono         || (k == TOKEN_USING)
68081ac50a2Sgetzze         || (k == TOKEN_EXPORT)
68181ac50a2Sgetzze         || (k == TOKEN_CONST)
68281ac50a2Sgetzze         || (k == TOKEN_MACRO)
68381ac50a2Sgetzze         || (k == TOKEN_FUNCTION)
68481ac50a2Sgetzze         || (k == TOKEN_STRUCT)
68581ac50a2Sgetzze         || (k == TOKEN_TYPE)
68681ac50a2Sgetzze         || (k == TOKEN_TYPE_WHERE)
68781ac50a2Sgetzze         || (k == TOKEN_CLOSE_BLOCK))
68881ac50a2Sgetzze     {
68981ac50a2Sgetzze         if (k == TOKEN_TYPE_WHERE)
69081ac50a2Sgetzze         {
69181ac50a2Sgetzze             scanTypeWhere(lexer);
69281ac50a2Sgetzze         }
69381ac50a2Sgetzze         return lexer->cur_token = k;
69481ac50a2Sgetzze     }
69581ac50a2Sgetzze     return lexer->cur_token = TOKEN_IDENTIFIER;
69681ac50a2Sgetzze }
69781ac50a2Sgetzze 
69881ac50a2Sgetzze 
69981ac50a2Sgetzze /* Advances the parser one token, optionally skipping whitespace
70081ac50a2Sgetzze  * (otherwise it is concatenated and returned as a single whitespace token).
70181ac50a2Sgetzze  * Whitespace is needed to properly render function signatures. Unrecognized
70281ac50a2Sgetzze  * token starts are stored literally, e.g. token may equal to a character '#'. */
advanceToken(lexerState * lexer,bool skip_whitespace,bool propagate_first)7037ea3b2f4Sgetzze static int advanceToken (lexerState *lexer, bool skip_whitespace, bool propagate_first)
70481ac50a2Sgetzze {
70581ac50a2Sgetzze     bool have_whitespace = false;
70681ac50a2Sgetzze     bool newline = false;
70781ac50a2Sgetzze     lexer->line = getInputLineNumber();
70881ac50a2Sgetzze     lexer->pos = getInputFilePosition();
70981ac50a2Sgetzze 
71081ac50a2Sgetzze     /* the next token is the first token of the line */
7117ea3b2f4Sgetzze     if (!propagate_first)
7127ea3b2f4Sgetzze     {
71381ac50a2Sgetzze         if (lexer->cur_token == TOKEN_NEWLINE ||
71481ac50a2Sgetzze             lexer->cur_token == TOKEN_SEMICOLON ||
71570b87d3dSgetzze             lexer->cur_token == TOKEN_NONE ||
71681ac50a2Sgetzze             (lexer->first_token && lexer->cur_token == TOKEN_MACROCALL))
71781ac50a2Sgetzze         {
71881ac50a2Sgetzze             lexer->first_token = true;
71981ac50a2Sgetzze         }
72081ac50a2Sgetzze         else
72181ac50a2Sgetzze         {
72281ac50a2Sgetzze             lexer->first_token = false;
72381ac50a2Sgetzze         }
7247ea3b2f4Sgetzze     }
72581ac50a2Sgetzze 
72681ac50a2Sgetzze     while (lexer->cur_c != EOF)
72781ac50a2Sgetzze     {
72881ac50a2Sgetzze         /* skip whitespaces but not newlines */
72981ac50a2Sgetzze         if (isWhitespace(lexer->cur_c, newline))
73081ac50a2Sgetzze         {
73181ac50a2Sgetzze             skipWhitespace(lexer, newline);
73281ac50a2Sgetzze             have_whitespace = true;
73381ac50a2Sgetzze         }
73481ac50a2Sgetzze         else if (lexer->cur_c == '#')
73581ac50a2Sgetzze         {
73681ac50a2Sgetzze             skipComment(lexer);
73781ac50a2Sgetzze             have_whitespace = true;
73881ac50a2Sgetzze         }
73981ac50a2Sgetzze         else
74081ac50a2Sgetzze         {
74181ac50a2Sgetzze             if (have_whitespace && !skip_whitespace)
74281ac50a2Sgetzze             {
74381ac50a2Sgetzze                 return lexer->cur_token = TOKEN_WHITESPACE;
74481ac50a2Sgetzze             }
74581ac50a2Sgetzze             break;
74681ac50a2Sgetzze         }
74781ac50a2Sgetzze     }
74881ac50a2Sgetzze     lexer->line = getInputLineNumber();
74981ac50a2Sgetzze     lexer->pos = getInputFilePosition();
75081ac50a2Sgetzze     while (lexer->cur_c != EOF)
75181ac50a2Sgetzze     {
75281ac50a2Sgetzze         if (lexer->cur_c == '"')
75381ac50a2Sgetzze         {
75481ac50a2Sgetzze             vStringClear(lexer->token_str);
75581ac50a2Sgetzze             scanString(lexer);
75681ac50a2Sgetzze             return lexer->cur_token = TOKEN_STRING;
75781ac50a2Sgetzze         }
75881ac50a2Sgetzze         else if (lexer->cur_c == '\'')
75981ac50a2Sgetzze         {
76081ac50a2Sgetzze             vStringClear(lexer->token_str);
76181ac50a2Sgetzze             if (scanCharacterOrTranspose(lexer))
76281ac50a2Sgetzze             {
76381ac50a2Sgetzze                 return lexer->cur_token = TOKEN_STRING;
76481ac50a2Sgetzze             }
76581ac50a2Sgetzze             else
76681ac50a2Sgetzze             {
76781ac50a2Sgetzze                 return lexer->cur_token = '\'';
76881ac50a2Sgetzze             }
76981ac50a2Sgetzze         }
77081ac50a2Sgetzze         else if (lexer->cur_c == '`')
77181ac50a2Sgetzze         {
77281ac50a2Sgetzze             vStringClear(lexer->token_str);
77381ac50a2Sgetzze             scanCommand(lexer);
77481ac50a2Sgetzze             return lexer->cur_token = TOKEN_COMMAND;
77581ac50a2Sgetzze         }
77681ac50a2Sgetzze         else if (isIdentifierFirstCharacter(lexer->cur_c))
77781ac50a2Sgetzze         {
77881ac50a2Sgetzze             return parseIdentifier(lexer);
77981ac50a2Sgetzze         }
78081ac50a2Sgetzze         else if (lexer->cur_c == '@')
78181ac50a2Sgetzze         {
78281ac50a2Sgetzze             vStringClear(lexer->token_str);
78381ac50a2Sgetzze             advanceAndStoreChar(lexer);
78481ac50a2Sgetzze             do
78581ac50a2Sgetzze             {
78681ac50a2Sgetzze                 advanceAndStoreChar(lexer);
78781ac50a2Sgetzze             } while(lexer->cur_c != EOF && isIdentifierCharacter(lexer->cur_c));
78881ac50a2Sgetzze             return lexer->cur_token = TOKEN_MACROCALL;
78981ac50a2Sgetzze         }
79081ac50a2Sgetzze         else if (lexer->cur_c == '(')
79181ac50a2Sgetzze         {
79281ac50a2Sgetzze             vStringClear(lexer->token_str);
79381ac50a2Sgetzze             scanParenBlock(lexer);
79481ac50a2Sgetzze             return lexer->cur_token = TOKEN_PAREN_BLOCK;
79581ac50a2Sgetzze         }
79681ac50a2Sgetzze         else if (lexer->cur_c == '[')
79781ac50a2Sgetzze         {
79881ac50a2Sgetzze             vStringClear(lexer->token_str);
79981ac50a2Sgetzze             scanIndexBlock(lexer);
80081ac50a2Sgetzze             return lexer->cur_token = TOKEN_BRACKET_BLOCK;
80181ac50a2Sgetzze         }
80281ac50a2Sgetzze         else if (lexer->cur_c == '{')
80381ac50a2Sgetzze         {
80481ac50a2Sgetzze             vStringClear(lexer->token_str);
80581ac50a2Sgetzze             scanCurlyBlock(lexer);
80681ac50a2Sgetzze             return lexer->cur_token = TOKEN_CURLY_BLOCK;
80781ac50a2Sgetzze         }
80881ac50a2Sgetzze         else if (isTypeDecl(lexer))
80981ac50a2Sgetzze         {
81081ac50a2Sgetzze             vStringClear(lexer->token_str);
81181ac50a2Sgetzze             scanTypeAnnotation(lexer);
81281ac50a2Sgetzze             return lexer->cur_token = TOKEN_TYPE_ANNOTATION;
81381ac50a2Sgetzze         }
81481ac50a2Sgetzze         else if (skipNewLine(lexer))
81581ac50a2Sgetzze         {
81681ac50a2Sgetzze             /* allow line continuation */
81781ac50a2Sgetzze             if (isOperator(lexer->cur_token))
81881ac50a2Sgetzze             {
81981ac50a2Sgetzze                 return lexer->cur_token;
82081ac50a2Sgetzze             }
82181ac50a2Sgetzze             return lexer->cur_token = TOKEN_NEWLINE;
82281ac50a2Sgetzze         }
82381ac50a2Sgetzze         else if (lexer->cur_c == ';')
82481ac50a2Sgetzze         {
82581ac50a2Sgetzze             advanceChar(lexer);
82681ac50a2Sgetzze             return lexer->cur_token = TOKEN_SEMICOLON;
82781ac50a2Sgetzze         }
82881ac50a2Sgetzze         else
82981ac50a2Sgetzze         {
83081ac50a2Sgetzze             int c = lexer->cur_c;
83181ac50a2Sgetzze             advanceChar(lexer);
83281ac50a2Sgetzze             return lexer->cur_token = c;
83381ac50a2Sgetzze         }
83481ac50a2Sgetzze     }
83581ac50a2Sgetzze     return lexer->cur_token = TOKEN_EOF;
83681ac50a2Sgetzze }
83781ac50a2Sgetzze 
initLexer(lexerState * lexer)83881ac50a2Sgetzze static void initLexer (lexerState *lexer)
83981ac50a2Sgetzze {
84081ac50a2Sgetzze     advanceNChar(lexer, 2);
84181ac50a2Sgetzze     lexer->token_str = vStringNew();
84281ac50a2Sgetzze     lexer->first_token = true;
84381ac50a2Sgetzze     lexer->cur_token = TOKEN_NONE;
84470b87d3dSgetzze     lexer->prev_c = '\0';
84581ac50a2Sgetzze 
84681ac50a2Sgetzze     if (lexer->cur_c == '#' && lexer->next_c == '!')
84781ac50a2Sgetzze     {
84881ac50a2Sgetzze         skipComment(lexer);
84981ac50a2Sgetzze     }
8507ea3b2f4Sgetzze     advanceToken(lexer, true, false);
85181ac50a2Sgetzze }
85281ac50a2Sgetzze 
deInitLexer(lexerState * lexer)85381ac50a2Sgetzze static void deInitLexer (lexerState *lexer)
85481ac50a2Sgetzze {
85581ac50a2Sgetzze     vStringDelete(lexer->token_str);
85681ac50a2Sgetzze     lexer->token_str = NULL;
85781ac50a2Sgetzze }
85881ac50a2Sgetzze 
85981ac50a2Sgetzze #if 0
86081ac50a2Sgetzze static void debugLexer (lexerState *lexer)
86181ac50a2Sgetzze {
86281ac50a2Sgetzze     printf("Current lexer state: line %d, token (%lu), cur char `%c`, token str:\n\t`", lexer->line, lexer->cur_token, lexer->cur_c);
86381ac50a2Sgetzze     printf(vStringValue(lexer->token_str));
86481ac50a2Sgetzze     printf("`\n");
86581ac50a2Sgetzze }
86681ac50a2Sgetzze #endif
86781ac50a2Sgetzze 
addTag(vString * ident,const char * type,const char * arg_list,int kind,unsigned long line,MIOPos pos,vString * scope,int parent_kind)86881ac50a2Sgetzze static void addTag (vString* ident, const char* type, const char* arg_list, int kind, unsigned long line, MIOPos pos, vString *scope, int parent_kind)
86981ac50a2Sgetzze {
87081ac50a2Sgetzze     if (kind == K_NONE)
87181ac50a2Sgetzze     {
87281ac50a2Sgetzze         return;
87381ac50a2Sgetzze     }
87481ac50a2Sgetzze     tagEntryInfo tag;
87581ac50a2Sgetzze     initTagEntry(&tag, vStringValue(ident), kind);
87681ac50a2Sgetzze 
87781ac50a2Sgetzze     tag.lineNumber = line;
87881ac50a2Sgetzze     tag.filePosition = pos;
87981ac50a2Sgetzze     tag.sourceFileName = getInputFileName();
88081ac50a2Sgetzze 
88181ac50a2Sgetzze     tag.extensionFields.signature = arg_list;
88281ac50a2Sgetzze     /* tag.extensionFields.varType = type; */  /* Needs a workaround */
88381ac50a2Sgetzze     if (parent_kind != K_NONE)
88481ac50a2Sgetzze     {
88581ac50a2Sgetzze         tag.extensionFields.scopeKindIndex = parent_kind;
88681ac50a2Sgetzze         tag.extensionFields.scopeName = vStringValue(scope);
88781ac50a2Sgetzze     }
88881ac50a2Sgetzze     makeTagEntry(&tag);
88981ac50a2Sgetzze }
89081ac50a2Sgetzze 
addReferenceTag(vString * ident,int kind,int role,unsigned long line,MIOPos pos,vString * scope,int parent_kind)8913cc79e5cSAmaiKinono static void addReferenceTag (vString* ident, int kind, int role, unsigned long line, MIOPos pos, vString* scope, int parent_kind)
8923cc79e5cSAmaiKinono {
8933cc79e5cSAmaiKinono     if (kind == K_NONE)
8943cc79e5cSAmaiKinono     {
8953cc79e5cSAmaiKinono         return;
8963cc79e5cSAmaiKinono     }
8973cc79e5cSAmaiKinono     tagEntryInfo tag;
8983cc79e5cSAmaiKinono     initRefTagEntry(&tag, vStringValue(ident), kind, role);
8993cc79e5cSAmaiKinono     tag.lineNumber = line;
9003cc79e5cSAmaiKinono     tag.filePosition = pos;
9013cc79e5cSAmaiKinono     if (parent_kind != K_NONE)
9023cc79e5cSAmaiKinono     {
9033cc79e5cSAmaiKinono         tag.extensionFields.scopeKindIndex = parent_kind;
9043cc79e5cSAmaiKinono         tag.extensionFields.scopeName = vStringValue(scope);
9053cc79e5cSAmaiKinono     }
9063cc79e5cSAmaiKinono     makeTagEntry(&tag);
9073cc79e5cSAmaiKinono }
9083cc79e5cSAmaiKinono 
90981ac50a2Sgetzze /* Skip tokens until one of the goal tokens is hit. Escapes when level = 0 if there are no goal tokens.
91081ac50a2Sgetzze  * Keeps track of balanced ()'s, []'s, and {}'s and ignores the goal tokens within those pairings */
skipUntil(lexerState * lexer,int goal_tokens[],int num_goal_tokens)91181ac50a2Sgetzze static void skipUntil (lexerState *lexer, int goal_tokens[], int num_goal_tokens)
91281ac50a2Sgetzze {
91381ac50a2Sgetzze     int block_level = 0;
91481ac50a2Sgetzze 
91581ac50a2Sgetzze     while (lexer->cur_token != TOKEN_EOF)
91681ac50a2Sgetzze     {
91781ac50a2Sgetzze         /* check if the keyword is reached, only if outside a block */
91881ac50a2Sgetzze         if (block_level == 0)
91981ac50a2Sgetzze         {
92081ac50a2Sgetzze             int ii = 0;
92181ac50a2Sgetzze             for(ii = 0; ii < num_goal_tokens; ii++)
92281ac50a2Sgetzze             {
92381ac50a2Sgetzze                 if (lexer->cur_token == goal_tokens[ii])
92481ac50a2Sgetzze                 {
92581ac50a2Sgetzze                     break;
92681ac50a2Sgetzze                 }
92781ac50a2Sgetzze             }
92881ac50a2Sgetzze             if (ii < num_goal_tokens)
92981ac50a2Sgetzze             {
93081ac50a2Sgetzze                 /* parse the next token */
9317ea3b2f4Sgetzze                 advanceToken(lexer, true, false);
93281ac50a2Sgetzze                 break;
93381ac50a2Sgetzze             }
93481ac50a2Sgetzze         }
93581ac50a2Sgetzze 
93681ac50a2Sgetzze         /* take into account nested blocks */
93781ac50a2Sgetzze         switch (lexer->cur_token)
93881ac50a2Sgetzze         {
93981ac50a2Sgetzze             case TOKEN_OPEN_BLOCK:
94081ac50a2Sgetzze                 block_level++;
94181ac50a2Sgetzze                 break;
94281ac50a2Sgetzze             case TOKEN_CLOSE_BLOCK:
94381ac50a2Sgetzze                 block_level--;
94481ac50a2Sgetzze                 break;
94581ac50a2Sgetzze             default:
94681ac50a2Sgetzze                 break;
94781ac50a2Sgetzze         }
94881ac50a2Sgetzze 
94981ac50a2Sgetzze         /* Has to be after the token switch to catch the case when we start with the initial level token */
95081ac50a2Sgetzze         if (num_goal_tokens == 0 && block_level == 0)
95181ac50a2Sgetzze         {
95281ac50a2Sgetzze             break;
95381ac50a2Sgetzze         }
95481ac50a2Sgetzze 
9557ea3b2f4Sgetzze         advanceToken(lexer, true, false);
95681ac50a2Sgetzze     }
95781ac50a2Sgetzze }
95881ac50a2Sgetzze 
95981ac50a2Sgetzze /* Skip until the end of the block */
skipUntilEnd(lexerState * lexer)96081ac50a2Sgetzze static void skipUntilEnd (lexerState *lexer)
96181ac50a2Sgetzze {
96281ac50a2Sgetzze     int goal_tokens[] = { TOKEN_CLOSE_BLOCK };
96381ac50a2Sgetzze 
96481ac50a2Sgetzze     skipUntil(lexer, goal_tokens, 1);
96581ac50a2Sgetzze }
96681ac50a2Sgetzze 
96781ac50a2Sgetzze /* Skip a function body after assignment operator '='
96881ac50a2Sgetzze  * Beware of continuation lines after operators
96981ac50a2Sgetzze  *  */
skipBody(lexerState * lexer)97081ac50a2Sgetzze static void skipBody (lexerState *lexer)
97181ac50a2Sgetzze {
97281ac50a2Sgetzze     /* assume position just after '=' */
97381ac50a2Sgetzze     while (lexer->cur_token != TOKEN_EOF && lexer->cur_token != TOKEN_NEWLINE)
97481ac50a2Sgetzze     {
9757ea3b2f4Sgetzze         advanceToken(lexer, true, false);
97681ac50a2Sgetzze 
97781ac50a2Sgetzze         if (lexer->cur_token == TOKEN_OPEN_BLOCK)
97881ac50a2Sgetzze         {
97981ac50a2Sgetzze             /* pass the keyword */
9807ea3b2f4Sgetzze             advanceToken(lexer, true, false);
98181ac50a2Sgetzze             skipUntilEnd(lexer);
98281ac50a2Sgetzze             /* the next token is already selected */
98381ac50a2Sgetzze         }
98481ac50a2Sgetzze     }
98581ac50a2Sgetzze }
98681ac50a2Sgetzze 
98781ac50a2Sgetzze /* Short function format:
98881ac50a2Sgetzze  * <ident> ( [<args>] ) [::<type>] [<where>] = [begin] <body> [end]
98981ac50a2Sgetzze  * */
parseShortFunction(lexerState * lexer,vString * scope,int parent_kind)99081ac50a2Sgetzze static void parseShortFunction (lexerState *lexer, vString *scope, int parent_kind)
99181ac50a2Sgetzze {
99281ac50a2Sgetzze     /* assume the current char is just after identifier */
99381ac50a2Sgetzze     vString *name;
99481ac50a2Sgetzze     vString *arg_list;
99581ac50a2Sgetzze     unsigned long line;
99681ac50a2Sgetzze     MIOPos pos;
99781ac50a2Sgetzze 
99881ac50a2Sgetzze     /* should be an open parenthesis after identifier
99981ac50a2Sgetzze      * with potentially parametric type */
100081ac50a2Sgetzze     skipWhitespace(lexer, false);
100181ac50a2Sgetzze     if (lexer->cur_c == '{')
100281ac50a2Sgetzze     {
100381ac50a2Sgetzze         scanCurlyBlock(lexer);
100481ac50a2Sgetzze         skipWhitespace(lexer, false);
100581ac50a2Sgetzze     }
100681ac50a2Sgetzze 
100781ac50a2Sgetzze     if (lexer->cur_c != '(')
100881ac50a2Sgetzze     {
10097ea3b2f4Sgetzze         advanceToken(lexer, true, false);
101081ac50a2Sgetzze         return;
101181ac50a2Sgetzze     }
101281ac50a2Sgetzze 
101381ac50a2Sgetzze     name = vStringNewCopy(lexer->token_str);
101481ac50a2Sgetzze     line = lexer->line;
101581ac50a2Sgetzze     pos = lexer->pos;
101681ac50a2Sgetzze 
101781ac50a2Sgetzze     /* scan argument list */
10187ea3b2f4Sgetzze     advanceToken(lexer, true, false);
101981ac50a2Sgetzze     arg_list = vStringNewCopy(lexer->token_str);
102081ac50a2Sgetzze 
102181ac50a2Sgetzze     /* scan potential type casting */
10227ea3b2f4Sgetzze     advanceToken(lexer, true, false);
102381ac50a2Sgetzze     if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
102481ac50a2Sgetzze     {
102581ac50a2Sgetzze         vStringCat(arg_list, lexer->token_str);
10267ea3b2f4Sgetzze         advanceToken(lexer, true, false);
102781ac50a2Sgetzze     }
102881ac50a2Sgetzze     /* scan potential type union with 'where' */
102981ac50a2Sgetzze     if (lexer->cur_token == TOKEN_TYPE_WHERE)
103081ac50a2Sgetzze     {
103181ac50a2Sgetzze         vStringPut(arg_list, ' ');
103281ac50a2Sgetzze         vStringCat(arg_list, lexer->token_str);
10337ea3b2f4Sgetzze         advanceToken(lexer, true, false);
103481ac50a2Sgetzze     }
103581ac50a2Sgetzze 
1036*b9feb330Sgetzze     /* scan equal sign, ignore `==` and `=>` */
1037*b9feb330Sgetzze     if (!(lexer->cur_token == '=' &&
103881ac50a2Sgetzze           lexer->cur_c != '=' &&
1039*b9feb330Sgetzze           lexer->cur_c != '>'))
104081ac50a2Sgetzze     {
104181ac50a2Sgetzze         vStringDelete(name);
104281ac50a2Sgetzze         vStringDelete(arg_list);
104381ac50a2Sgetzze         return;
104481ac50a2Sgetzze     }
104581ac50a2Sgetzze 
104681ac50a2Sgetzze     addTag(name, NULL, vStringValue(arg_list), K_FUNCTION, line, pos, scope, parent_kind);
104781ac50a2Sgetzze 
104881ac50a2Sgetzze     /* scan until end of function definition */
104981ac50a2Sgetzze     skipBody(lexer);
105081ac50a2Sgetzze 
105181ac50a2Sgetzze     /* Should end on a new line, parse next token */
10527ea3b2f4Sgetzze     advanceToken(lexer, true, false);
105381ac50a2Sgetzze     lexer->first_token = true;
105481ac50a2Sgetzze 
105581ac50a2Sgetzze     vStringDelete(name);
105681ac50a2Sgetzze     vStringDelete(arg_list);
105781ac50a2Sgetzze }
105881ac50a2Sgetzze 
105981ac50a2Sgetzze /* Function format:
106081ac50a2Sgetzze  * function <ident> ( [<args>] ) [::<type>] [<where>] [<body>] end
106181ac50a2Sgetzze  * */
parseFunction(lexerState * lexer,vString * scope,int parent_kind)106281ac50a2Sgetzze static void parseFunction (lexerState *lexer, vString *scope, int parent_kind)
106381ac50a2Sgetzze {
106481ac50a2Sgetzze     vString *name;
106581ac50a2Sgetzze     vString *arg_list;
1066696902a0SAmaiKinono     vString *local_scope;
1067696902a0SAmaiKinono     int local_parent_kind;
106881ac50a2Sgetzze     unsigned long line;
106981ac50a2Sgetzze     MIOPos pos;
107081ac50a2Sgetzze 
10717ea3b2f4Sgetzze     advanceToken(lexer, true, false);
107281ac50a2Sgetzze     if (lexer->cur_token != TOKEN_IDENTIFIER)
107381ac50a2Sgetzze     {
107481ac50a2Sgetzze         return;
107589dd26eaSgetzze     }
107689dd26eaSgetzze     else if (lexer->cur_c == '.')
107789dd26eaSgetzze     {
1078696902a0SAmaiKinono         local_scope = vStringNewCopy(lexer->token_str);
1079696902a0SAmaiKinono         local_parent_kind = K_MODULE;
1080696902a0SAmaiKinono         advanceChar(lexer);
10817ea3b2f4Sgetzze         advanceToken(lexer, true, false);
108289dd26eaSgetzze     }
108389dd26eaSgetzze     else
108489dd26eaSgetzze     {
1085696902a0SAmaiKinono         local_scope = vStringNewCopy(scope);
1086696902a0SAmaiKinono         local_parent_kind = parent_kind;
108781ac50a2Sgetzze     }
108881ac50a2Sgetzze 
108970cbf361Sgetzze     /* Scan for parametric type constructor */
109070cbf361Sgetzze     skipWhitespace(lexer, false);
109170cbf361Sgetzze     if (lexer->cur_c == '{')
109270cbf361Sgetzze     {
109370cbf361Sgetzze         scanCurlyBlock(lexer);
109470cbf361Sgetzze         skipWhitespace(lexer, false);
109570cbf361Sgetzze     }
109670cbf361Sgetzze 
109781ac50a2Sgetzze     name = vStringNewCopy(lexer->token_str);
109881ac50a2Sgetzze     arg_list = vStringNew();
109981ac50a2Sgetzze     line = lexer->line;
110081ac50a2Sgetzze     pos = lexer->pos;
110181ac50a2Sgetzze 
11027ea3b2f4Sgetzze     advanceToken(lexer, true, false);
110381ac50a2Sgetzze     if (lexer->cur_token == TOKEN_PAREN_BLOCK)
110481ac50a2Sgetzze     {
110581ac50a2Sgetzze         vStringCopy(arg_list, lexer->token_str);
110681ac50a2Sgetzze 
110781ac50a2Sgetzze         /* scan potential type casting */
11087ea3b2f4Sgetzze         advanceToken(lexer, true, false);
110981ac50a2Sgetzze         if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
111081ac50a2Sgetzze         {
111181ac50a2Sgetzze             vStringCat(arg_list, lexer->token_str);
11127ea3b2f4Sgetzze             advanceToken(lexer, true, false);
111381ac50a2Sgetzze         }
111481ac50a2Sgetzze         /* scan potential type union with 'where' */
111581ac50a2Sgetzze         if (lexer->cur_token == TOKEN_TYPE_WHERE)
111681ac50a2Sgetzze         {
111781ac50a2Sgetzze             vStringPut(arg_list, ' ');
111881ac50a2Sgetzze             vStringCat(arg_list, lexer->token_str);
11197ea3b2f4Sgetzze             advanceToken(lexer, true, false);
112081ac50a2Sgetzze         }
112181ac50a2Sgetzze 
1122696902a0SAmaiKinono         addTag(name, NULL, vStringValue(arg_list), K_FUNCTION, line, pos, local_scope, local_parent_kind);
1123696902a0SAmaiKinono         addToScope(scope, name);
1124696902a0SAmaiKinono         parseExpr(lexer, true, K_FUNCTION, scope);
112581ac50a2Sgetzze     }
112681ac50a2Sgetzze     else if (lexer->cur_token == TOKEN_CLOSE_BLOCK)
112781ac50a2Sgetzze     {
112881ac50a2Sgetzze         /* Function without method */
1129696902a0SAmaiKinono         addTag(name, NULL, NULL, K_FUNCTION, line, pos, local_scope, local_parent_kind);
113081ac50a2Sgetzze         /* Go to the closing 'end' keyword */
113181ac50a2Sgetzze         skipUntilEnd(lexer);
1132696902a0SAmaiKinono     }
113381ac50a2Sgetzze 
113481ac50a2Sgetzze     vStringDelete(name);
113581ac50a2Sgetzze     vStringDelete(arg_list);
1136696902a0SAmaiKinono     vStringDelete(local_scope);
113781ac50a2Sgetzze }
113881ac50a2Sgetzze 
113981ac50a2Sgetzze /* Macro format:
114081ac50a2Sgetzze  * "macro" <ident>()
114181ac50a2Sgetzze  */
parseMacro(lexerState * lexer,vString * scope,int parent_kind)114281ac50a2Sgetzze static void parseMacro (lexerState *lexer, vString *scope, int parent_kind)
114381ac50a2Sgetzze {
114481ac50a2Sgetzze     vString *name;
114581ac50a2Sgetzze     unsigned long line;
114681ac50a2Sgetzze     MIOPos pos;
114781ac50a2Sgetzze 
11487ea3b2f4Sgetzze     advanceToken(lexer, true, false);
114981ac50a2Sgetzze     if (lexer->cur_token != TOKEN_IDENTIFIER)
115081ac50a2Sgetzze     {
115181ac50a2Sgetzze         return;
115281ac50a2Sgetzze     }
115381ac50a2Sgetzze 
115481ac50a2Sgetzze     name = vStringNewCopy(lexer->token_str);
115581ac50a2Sgetzze     line = lexer->line;
115681ac50a2Sgetzze     pos = lexer->pos;
115781ac50a2Sgetzze 
11587ea3b2f4Sgetzze     advanceToken(lexer, true, false);
115981ac50a2Sgetzze     if (lexer->cur_token == TOKEN_PAREN_BLOCK)
116081ac50a2Sgetzze     {
116181ac50a2Sgetzze         addTag(name, NULL, vStringValue(lexer->token_str), K_MACRO, line, pos, scope, parent_kind);
116281ac50a2Sgetzze     }
116381ac50a2Sgetzze 
116481ac50a2Sgetzze     skipUntilEnd(lexer);
116581ac50a2Sgetzze     vStringDelete(name);
116681ac50a2Sgetzze }
116781ac50a2Sgetzze 
116881ac50a2Sgetzze /* Const format:
116981ac50a2Sgetzze  * "const" <ident>
117081ac50a2Sgetzze  */
parseConst(lexerState * lexer,vString * scope,int parent_kind)117181ac50a2Sgetzze static void parseConst (lexerState *lexer, vString *scope, int parent_kind)
117281ac50a2Sgetzze {
117381ac50a2Sgetzze     vString *name;
117481ac50a2Sgetzze 
11757ea3b2f4Sgetzze     advanceToken(lexer, true, false);
117681ac50a2Sgetzze     if (lexer->cur_token != TOKEN_IDENTIFIER)
117781ac50a2Sgetzze     {
117881ac50a2Sgetzze         return;
117981ac50a2Sgetzze     }
118081ac50a2Sgetzze 
118181ac50a2Sgetzze     name = vStringNewCopy(lexer->token_str);
118281ac50a2Sgetzze 
11837ea3b2f4Sgetzze     advanceToken(lexer, true, false);
118481ac50a2Sgetzze     if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
118581ac50a2Sgetzze     {
118681ac50a2Sgetzze         addTag(name, "const", vStringValue(lexer->token_str), K_CONSTANT, lexer->line, lexer->pos, scope, parent_kind);
11877ea3b2f4Sgetzze         advanceToken(lexer, true, false);
118881ac50a2Sgetzze     }
118981ac50a2Sgetzze     else
119081ac50a2Sgetzze     {
119181ac50a2Sgetzze         addTag(name, "const", NULL, K_CONSTANT, lexer->line, lexer->pos, scope, parent_kind);
119281ac50a2Sgetzze     }
119381ac50a2Sgetzze 
119481ac50a2Sgetzze     vStringDelete(name);
119581ac50a2Sgetzze }
119681ac50a2Sgetzze 
119781ac50a2Sgetzze /* Type format:
119881ac50a2Sgetzze  * [ "abstract" | "primitive" ] "type" <ident>
119981ac50a2Sgetzze  */
parseType(lexerState * lexer,vString * scope,int parent_kind)120081ac50a2Sgetzze static void parseType (lexerState *lexer, vString *scope, int parent_kind)
120181ac50a2Sgetzze {
12027ea3b2f4Sgetzze     advanceToken(lexer, true, false);
120381ac50a2Sgetzze     if (lexer->cur_token != TOKEN_IDENTIFIER)
120481ac50a2Sgetzze     {
120581ac50a2Sgetzze         return;
120681ac50a2Sgetzze     }
120781ac50a2Sgetzze 
120881ac50a2Sgetzze     addTag(lexer->token_str, NULL, NULL, K_TYPE, lexer->line, lexer->pos, scope, parent_kind);
120981ac50a2Sgetzze 
121081ac50a2Sgetzze     skipUntilEnd(lexer);
121181ac50a2Sgetzze }
121281ac50a2Sgetzze 
121381ac50a2Sgetzze /* Module format:
121481ac50a2Sgetzze  * [ "baremodule" | "module" ] <ident>
121581ac50a2Sgetzze  */
parseModule(lexerState * lexer,vString * scope,int parent_kind)121681ac50a2Sgetzze static void parseModule (lexerState *lexer, vString *scope, int parent_kind)
121781ac50a2Sgetzze {
12187ea3b2f4Sgetzze     advanceToken(lexer, true, false);
121981ac50a2Sgetzze     if (lexer->cur_token != TOKEN_IDENTIFIER)
122081ac50a2Sgetzze     {
122181ac50a2Sgetzze         return;
122281ac50a2Sgetzze     }
122381ac50a2Sgetzze 
122481ac50a2Sgetzze     addTag(lexer->token_str, NULL, NULL, K_MODULE, lexer->line, lexer->pos, scope, parent_kind);
1225696902a0SAmaiKinono     addToScope(scope, lexer->token_str);
12267ea3b2f4Sgetzze     advanceToken(lexer, true, false);
1227696902a0SAmaiKinono     parseExpr(lexer, true, K_MODULE, scope);
122881ac50a2Sgetzze }
122981ac50a2Sgetzze 
12303cc79e5cSAmaiKinono /*
12313cc79e5cSAmaiKinono  * Parse comma separated entity in import/using expressions. An entity could be
12323cc79e5cSAmaiKinono  * in the form of "Module" or "Module.symbol". The lexer should be at the end
12333cc79e5cSAmaiKinono  * of "Module", and this function will take it to the end of the entity
12343cc79e5cSAmaiKinono  * (whitespaces also skipped).
123581ac50a2Sgetzze  */
parseImportEntity(lexerState * lexer,vString * scope,int token_type,int parent_kind)12363cc79e5cSAmaiKinono static void parseImportEntity (lexerState *lexer, vString *scope, int token_type, int parent_kind)
123781ac50a2Sgetzze {
12383cc79e5cSAmaiKinono     if (lexer->cur_c == '.')
12393cc79e5cSAmaiKinono     {
12403cc79e5cSAmaiKinono         if (token_type == TOKEN_IMPORT)
12413cc79e5cSAmaiKinono         {
12423cc79e5cSAmaiKinono             vString *module_name = vStringNewCopy(lexer->token_str);
12433cc79e5cSAmaiKinono             addReferenceTag(module_name, K_MODULE, JULIA_MODULE_NAMESPACE, lexer->line, lexer->pos, scope, parent_kind);
12443cc79e5cSAmaiKinono             advanceChar(lexer);
12457ea3b2f4Sgetzze             advanceToken(lexer, true, false);
12463cc79e5cSAmaiKinono             addReferenceTag(lexer->token_str, K_UNKNOWN, JULIA_UNKNOWN_IMPORTED, lexer->line, lexer->pos, module_name, K_MODULE);
12473cc79e5cSAmaiKinono             vStringDelete(module_name);
12483cc79e5cSAmaiKinono         }
12493cc79e5cSAmaiKinono         else /* if (token_type == TOKEN_USING) */
125081ac50a2Sgetzze         {
12513cc79e5cSAmaiKinono             /* using Module.symbol is invalid, so we advance the lexer but don't tag it. */
12523cc79e5cSAmaiKinono             advanceChar(lexer);
12537ea3b2f4Sgetzze             advanceToken(lexer, true, false);
125481ac50a2Sgetzze         }
12553cc79e5cSAmaiKinono     }
12563cc79e5cSAmaiKinono     else
12573cc79e5cSAmaiKinono     {
12583cc79e5cSAmaiKinono         if (token_type == TOKEN_IMPORT)
12593cc79e5cSAmaiKinono         {
12603cc79e5cSAmaiKinono             addReferenceTag(lexer->token_str, K_MODULE, JULIA_MODULE_IMPORTED, lexer->line, lexer->pos, scope, parent_kind);
12613cc79e5cSAmaiKinono         }
12623cc79e5cSAmaiKinono         else /* if (token_type == TOKEN_USING) */
12633cc79e5cSAmaiKinono         {
12643cc79e5cSAmaiKinono             addReferenceTag(lexer->token_str, K_MODULE, JULIA_MODULE_USED, lexer->line, lexer->pos, scope, parent_kind);
12653cc79e5cSAmaiKinono         }
12663cc79e5cSAmaiKinono     }
12673cc79e5cSAmaiKinono }
126881ac50a2Sgetzze 
12693cc79e5cSAmaiKinono /* Parse import/using expressions with a colon, like: */
12703cc79e5cSAmaiKinono /* import Module: symbol1, symbol2 */
12713cc79e5cSAmaiKinono /* using Module: symbol1, symbol2 */
12723cc79e5cSAmaiKinono /* The lexer should be at the end of "Module", and this function will take it
12733cc79e5cSAmaiKinono  * to the end of the token after this expression (whitespaces also skipped). */
parseColonImportExpr(lexerState * lexer,vString * scope,int token_type,int parent_kind)12743cc79e5cSAmaiKinono static void parseColonImportExpr (lexerState *lexer, vString *scope, int token_type, int parent_kind)
12753cc79e5cSAmaiKinono {
12763cc79e5cSAmaiKinono     int symbol_role;
12773cc79e5cSAmaiKinono     if (token_type == TOKEN_IMPORT)
12783cc79e5cSAmaiKinono     {
12793cc79e5cSAmaiKinono         symbol_role = JULIA_UNKNOWN_IMPORTED;
12803cc79e5cSAmaiKinono     }
12813cc79e5cSAmaiKinono     else /* if (token_type == TOKEN_USING) */
12823cc79e5cSAmaiKinono     {
12833cc79e5cSAmaiKinono         symbol_role = JULIA_UNKNOWN_USED;
12843cc79e5cSAmaiKinono     }
12853cc79e5cSAmaiKinono     vString *name = vStringNewCopy(lexer->token_str);
12863cc79e5cSAmaiKinono     addReferenceTag(name, K_MODULE, JULIA_MODULE_NAMESPACE, lexer->line, lexer->pos, scope, parent_kind);
12873cc79e5cSAmaiKinono     advanceChar(lexer);
12887ea3b2f4Sgetzze     advanceToken(lexer, true, false);
12893cc79e5cSAmaiKinono     if (lexer->cur_token == TOKEN_NEWLINE)
12903cc79e5cSAmaiKinono     {
12917ea3b2f4Sgetzze         advanceToken(lexer, true, false);
12923cc79e5cSAmaiKinono     }
129381ac50a2Sgetzze     while (lexer->cur_token == TOKEN_IDENTIFIER || lexer->cur_token == TOKEN_MACROCALL)
129481ac50a2Sgetzze     {
12953cc79e5cSAmaiKinono         addReferenceTag(lexer->token_str, K_UNKNOWN, symbol_role, lexer->line, lexer->pos, name, K_MODULE);
129681ac50a2Sgetzze         if (lexer->cur_c == ',')
129781ac50a2Sgetzze         {
12983cc79e5cSAmaiKinono             advanceChar(lexer);
12997ea3b2f4Sgetzze             advanceToken(lexer, true, false);
130081ac50a2Sgetzze             if (lexer->cur_token == TOKEN_NEWLINE)
130181ac50a2Sgetzze             {
13027ea3b2f4Sgetzze                 advanceToken(lexer, true, false);
130381ac50a2Sgetzze             }
130481ac50a2Sgetzze         }
130581ac50a2Sgetzze         else
130681ac50a2Sgetzze         {
13077ea3b2f4Sgetzze             advanceToken(lexer, true, false);
130881ac50a2Sgetzze         }
130981ac50a2Sgetzze     }
131081ac50a2Sgetzze     vStringDelete(name);
131181ac50a2Sgetzze }
131281ac50a2Sgetzze 
13133cc79e5cSAmaiKinono /* Import format:
13143cc79e5cSAmaiKinono  * [ "import" | "using" ] <ident> [: <name>]
13153cc79e5cSAmaiKinono  */
parseImport(lexerState * lexer,vString * scope,int token_type,int parent_kind)13163cc79e5cSAmaiKinono static void parseImport (lexerState *lexer, vString *scope, int token_type, int parent_kind)
13173cc79e5cSAmaiKinono {
13183cc79e5cSAmaiKinono     /* capture the imported name */
13197ea3b2f4Sgetzze     advanceToken(lexer, true, false);
13203cc79e5cSAmaiKinono     /* import Mod1: symbol1, symbol2 */
13213cc79e5cSAmaiKinono     /* using Mod1: symbol1, symbol2 */
13223cc79e5cSAmaiKinono     if (lexer->cur_c == ':')
13233cc79e5cSAmaiKinono     {
13243cc79e5cSAmaiKinono         parseColonImportExpr(lexer, scope, token_type, parent_kind);
13253cc79e5cSAmaiKinono     }
13263cc79e5cSAmaiKinono     /* All other situations, like import/using Mod1, Mod2.symbol1, Mod3... */
13273cc79e5cSAmaiKinono     else
13283cc79e5cSAmaiKinono     {
13293cc79e5cSAmaiKinono         while (lexer->cur_token == TOKEN_IDENTIFIER || lexer->cur_token == TOKEN_MACROCALL)
13303cc79e5cSAmaiKinono         {
13313cc79e5cSAmaiKinono             parseImportEntity(lexer, scope, token_type, parent_kind);
13323cc79e5cSAmaiKinono             if (lexer->cur_c == ',')
13333cc79e5cSAmaiKinono             {
13343cc79e5cSAmaiKinono                 advanceChar(lexer);
13357ea3b2f4Sgetzze                 advanceToken(lexer, true, false);
13363cc79e5cSAmaiKinono                 if (lexer->cur_token == TOKEN_NEWLINE)
13373cc79e5cSAmaiKinono                 {
13387ea3b2f4Sgetzze                     advanceToken(lexer, true, false);
13393cc79e5cSAmaiKinono                 }
13403cc79e5cSAmaiKinono             }
13413cc79e5cSAmaiKinono             else
13423cc79e5cSAmaiKinono             {
13437ea3b2f4Sgetzze                 advanceToken(lexer, true, false);
13443cc79e5cSAmaiKinono             }
13453cc79e5cSAmaiKinono         }
13463cc79e5cSAmaiKinono     }
13473cc79e5cSAmaiKinono }
13483cc79e5cSAmaiKinono 
134981ac50a2Sgetzze /* Structs format:
135081ac50a2Sgetzze  * "struct" <ident>[{<param>}] [<:<type>]; <fields> <inner constructor> end
135181ac50a2Sgetzze  * */
parseStruct(lexerState * lexer,vString * scope,int parent_kind)135281ac50a2Sgetzze static void parseStruct (lexerState *lexer, vString *scope, int parent_kind)
135381ac50a2Sgetzze {
135481ac50a2Sgetzze     vString *name;
135581ac50a2Sgetzze     vString *field;
135670cbf361Sgetzze     size_t old_scope_len;
135781ac50a2Sgetzze     unsigned long line;
135881ac50a2Sgetzze     MIOPos pos;
135981ac50a2Sgetzze 
13607ea3b2f4Sgetzze     advanceToken(lexer, true, false);
136181ac50a2Sgetzze     if (lexer->cur_token != TOKEN_IDENTIFIER)
136281ac50a2Sgetzze     {
136381ac50a2Sgetzze         return;
136481ac50a2Sgetzze     }
136581ac50a2Sgetzze 
136681ac50a2Sgetzze     name = vStringNewCopy(lexer->token_str);
136781ac50a2Sgetzze     field = vStringNew();
136881ac50a2Sgetzze     line = lexer->line;
136981ac50a2Sgetzze     pos = lexer->pos;
137081ac50a2Sgetzze 
137181ac50a2Sgetzze     /* scan parametrization */
13727ea3b2f4Sgetzze     advanceToken(lexer, true, false);
137381ac50a2Sgetzze     if (lexer->cur_token == TOKEN_CURLY_BLOCK)
137481ac50a2Sgetzze     {
137581ac50a2Sgetzze         addTag(name, NULL, vStringValue(lexer->token_str), K_STRUCT, line, pos, scope, parent_kind);
13767ea3b2f4Sgetzze         advanceToken(lexer, true, false);
137781ac50a2Sgetzze     }
137881ac50a2Sgetzze     else
137981ac50a2Sgetzze     {
138081ac50a2Sgetzze         addTag(name, NULL, NULL, K_STRUCT, line, pos, scope, parent_kind);
138181ac50a2Sgetzze     }
138281ac50a2Sgetzze     addToScope(scope, name);
138381ac50a2Sgetzze 
138481ac50a2Sgetzze     /* skip inheritance */
138581ac50a2Sgetzze     if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
138681ac50a2Sgetzze     {
13877ea3b2f4Sgetzze         advanceToken(lexer, true, false);
138881ac50a2Sgetzze     }
138981ac50a2Sgetzze 
139070cbf361Sgetzze     /* keep the struct scope in memory to reset it after parsing constructors */
139170cbf361Sgetzze     old_scope_len = vStringLength(scope);
139281ac50a2Sgetzze     /* Parse fields and inner constructors */
139381ac50a2Sgetzze     while (lexer->cur_token != TOKEN_EOF && lexer->cur_token != TOKEN_CLOSE_BLOCK)
139481ac50a2Sgetzze     {
1395db610ad5Sgetzze         if (lexer->cur_token == TOKEN_IDENTIFIER && lexer->first_token)
139681ac50a2Sgetzze         {
1397db610ad5Sgetzze             if (strcmp(vStringValue(lexer->token_str), vStringValue(name)) == 0)
139881ac50a2Sgetzze             {
139981ac50a2Sgetzze                 /* inner constructor */
140081ac50a2Sgetzze                 parseShortFunction(lexer, scope, K_STRUCT);
140181ac50a2Sgetzze                 continue;
140281ac50a2Sgetzze             }
140381ac50a2Sgetzze 
140481ac50a2Sgetzze             vStringCopy(field, lexer->token_str);
140581ac50a2Sgetzze 
140681ac50a2Sgetzze             /* parse type annotation */
14077ea3b2f4Sgetzze             advanceToken(lexer, true, false);
140881ac50a2Sgetzze             if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
140981ac50a2Sgetzze             {
141081ac50a2Sgetzze                 addTag(field, NULL, vStringValue(lexer->token_str), K_FIELD, lexer->line, lexer->pos, scope, K_STRUCT);
14117ea3b2f4Sgetzze                 advanceToken(lexer, true, false);
141281ac50a2Sgetzze             }
141381ac50a2Sgetzze             else
141481ac50a2Sgetzze             {
141581ac50a2Sgetzze                 addTag(field, NULL, NULL, K_FIELD, lexer->line, lexer->pos, scope, K_STRUCT);
141681ac50a2Sgetzze             }
141781ac50a2Sgetzze         }
141881ac50a2Sgetzze         else if (lexer->cur_token == TOKEN_FUNCTION)
141981ac50a2Sgetzze         {
142081ac50a2Sgetzze             /* inner constructor */
142181ac50a2Sgetzze             parseFunction(lexer, scope, K_STRUCT);
142281ac50a2Sgetzze         }
142381ac50a2Sgetzze         else
142481ac50a2Sgetzze         {
142581ac50a2Sgetzze             /* Get next token */
14267ea3b2f4Sgetzze             advanceToken(lexer, true, false);
142781ac50a2Sgetzze         }
142870cbf361Sgetzze         resetScope(scope, old_scope_len);
142981ac50a2Sgetzze     }
143081ac50a2Sgetzze 
143181ac50a2Sgetzze     vStringDelete(name);
143281ac50a2Sgetzze     vStringDelete(field);
143381ac50a2Sgetzze }
143481ac50a2Sgetzze 
143581ac50a2Sgetzze 
parseExpr(lexerState * lexer,bool delim,int kind,vString * scope)143681ac50a2Sgetzze static void parseExpr (lexerState *lexer, bool delim, int kind, vString *scope)
143781ac50a2Sgetzze {
143881ac50a2Sgetzze     int level = 1;
143981ac50a2Sgetzze     size_t old_scope_len;
1440696902a0SAmaiKinono     vString *local_scope = NULL;
144181ac50a2Sgetzze 
144281ac50a2Sgetzze     while (lexer->cur_token != TOKEN_EOF)
144381ac50a2Sgetzze     {
144481ac50a2Sgetzze         old_scope_len = vStringLength(scope);
144581ac50a2Sgetzze         /* Advance token and update if this is a new line */
144681ac50a2Sgetzze         while (lexer->cur_token == TOKEN_NEWLINE ||
144770b87d3dSgetzze                lexer->cur_token == TOKEN_SEMICOLON ||
144870b87d3dSgetzze                lexer->cur_token == TOKEN_NONE )
144981ac50a2Sgetzze         {
14507ea3b2f4Sgetzze             advanceToken(lexer, true, false);
145181ac50a2Sgetzze         }
145281ac50a2Sgetzze 
145381ac50a2Sgetzze         /* Make sure every case advances the token
145481ac50a2Sgetzze          * otherwise we can be stuck in infinite loop */
145581ac50a2Sgetzze         switch (lexer->cur_token)
145681ac50a2Sgetzze         {
145781ac50a2Sgetzze             case TOKEN_CONST:
145881ac50a2Sgetzze                 parseConst(lexer, scope, kind);
145981ac50a2Sgetzze                 break;
146081ac50a2Sgetzze             case TOKEN_FUNCTION:
146181ac50a2Sgetzze                 parseFunction(lexer, scope, kind);
146281ac50a2Sgetzze                 break;
146381ac50a2Sgetzze             case TOKEN_MACRO:
146481ac50a2Sgetzze                 parseMacro(lexer, scope, kind);
146581ac50a2Sgetzze                 break;
146681ac50a2Sgetzze             case TOKEN_MODULE:
146781ac50a2Sgetzze                 parseModule(lexer, scope, kind);
146881ac50a2Sgetzze                 break;
146981ac50a2Sgetzze             case TOKEN_STRUCT:
147081ac50a2Sgetzze                 parseStruct(lexer, scope, kind);
147181ac50a2Sgetzze                 break;
147281ac50a2Sgetzze             case TOKEN_TYPE:
147381ac50a2Sgetzze                 parseType(lexer, scope, kind);
147481ac50a2Sgetzze                 break;
147581ac50a2Sgetzze             case TOKEN_IMPORT:
14763cc79e5cSAmaiKinono                 parseImport(lexer, scope, TOKEN_IMPORT, kind);
147781ac50a2Sgetzze                 break;
14783cc79e5cSAmaiKinono             case TOKEN_USING:
14793cc79e5cSAmaiKinono                 parseImport(lexer, scope, TOKEN_USING, kind);
148081ac50a2Sgetzze             case TOKEN_IDENTIFIER:
1481696902a0SAmaiKinono                 if (lexer->first_token && lexer->cur_c == '.')
1482696902a0SAmaiKinono                 {
1483696902a0SAmaiKinono                     if (local_scope == NULL)
1484696902a0SAmaiKinono                     {
1485696902a0SAmaiKinono                         local_scope = vStringNew();
1486696902a0SAmaiKinono                     }
1487696902a0SAmaiKinono                     vStringCopy(local_scope, lexer->token_str);
1488696902a0SAmaiKinono                     advanceChar(lexer);
14897ea3b2f4Sgetzze                     // next token, but keep the first_token value
14907ea3b2f4Sgetzze                     advanceToken(lexer, true, true);
1491696902a0SAmaiKinono                     skipWhitespace(lexer, false);
1492696902a0SAmaiKinono                     if (lexer->cur_c == '(')
1493696902a0SAmaiKinono                     {
1494696902a0SAmaiKinono                         parseShortFunction(lexer, local_scope, K_MODULE);
1495696902a0SAmaiKinono                     }
1496696902a0SAmaiKinono                 }
1497696902a0SAmaiKinono                 else
1498696902a0SAmaiKinono                 {
149981ac50a2Sgetzze                     skipWhitespace(lexer, false);
150070cbf361Sgetzze                     if (lexer->first_token && (lexer->cur_c == '(' || lexer->cur_c == '{'))
150181ac50a2Sgetzze                     {
150281ac50a2Sgetzze                         parseShortFunction(lexer, scope, kind);
150381ac50a2Sgetzze                     }
150481ac50a2Sgetzze                     else
150581ac50a2Sgetzze                     {
15067ea3b2f4Sgetzze                         advanceToken(lexer, true, false);
150781ac50a2Sgetzze                     }
1508696902a0SAmaiKinono                 }
150981ac50a2Sgetzze                 break;
151081ac50a2Sgetzze             case TOKEN_OPEN_BLOCK:
151181ac50a2Sgetzze                 level++;
15127ea3b2f4Sgetzze                 advanceToken(lexer, true, false);
151381ac50a2Sgetzze                 break;
151481ac50a2Sgetzze             case TOKEN_CLOSE_BLOCK:
151581ac50a2Sgetzze                 level--;
15167ea3b2f4Sgetzze                 advanceToken(lexer, true, false);
151781ac50a2Sgetzze                 break;
151881ac50a2Sgetzze             default:
15197ea3b2f4Sgetzze                 advanceToken(lexer, true, false);
152081ac50a2Sgetzze                 break;
152181ac50a2Sgetzze         }
152281ac50a2Sgetzze         resetScope(scope, old_scope_len);
152381ac50a2Sgetzze         if (delim && level <= 0)
152481ac50a2Sgetzze         {
152581ac50a2Sgetzze             break;
152681ac50a2Sgetzze         }
152781ac50a2Sgetzze     }
1528696902a0SAmaiKinono     vStringDelete(local_scope);
152981ac50a2Sgetzze }
153081ac50a2Sgetzze 
findJuliaTags(void)153181ac50a2Sgetzze static void findJuliaTags (void)
153281ac50a2Sgetzze {
153381ac50a2Sgetzze     lexerState lexer;
153481ac50a2Sgetzze     vString* scope = vStringNew();
153581ac50a2Sgetzze     initLexer(&lexer);
153681ac50a2Sgetzze 
153781ac50a2Sgetzze     parseExpr(&lexer, false, K_NONE, scope);
153881ac50a2Sgetzze     vStringDelete(scope);
153981ac50a2Sgetzze 
154081ac50a2Sgetzze     deInitLexer(&lexer);
154181ac50a2Sgetzze }
154281ac50a2Sgetzze 
JuliaParser(void)154381ac50a2Sgetzze extern parserDefinition* JuliaParser (void)
154481ac50a2Sgetzze {
154581ac50a2Sgetzze     static const char *const extensions [] = { "jl", NULL };
154681ac50a2Sgetzze     parserDefinition* def = parserNew ("Julia");
154781ac50a2Sgetzze     def->kindTable  = JuliaKinds;
154881ac50a2Sgetzze     def->kindCount  = ARRAY_SIZE (JuliaKinds);
154981ac50a2Sgetzze     def->extensions = extensions;
155081ac50a2Sgetzze     def->parser     = findJuliaTags;
155181ac50a2Sgetzze     def->keywordTable = JuliaKeywordTable;
155281ac50a2Sgetzze     def->keywordCount = ARRAY_SIZE (JuliaKeywordTable);
155381ac50a2Sgetzze     return def;
155481ac50a2Sgetzze }
1555