181ac50a2Sgetzze /*
281ac50a2Sgetzze * Copyright (c) 2020-2021, getzze <getzze@gmail.com>
381ac50a2Sgetzze *
481ac50a2Sgetzze * This source code is released for free distribution under the terms of the
581ac50a2Sgetzze * GNU General Public License version 2 or (at your option) any later version.
681ac50a2Sgetzze *
781ac50a2Sgetzze * This module contains functions for generating tags for Julia files.
881ac50a2Sgetzze *
981ac50a2Sgetzze * Documented 'kinds':
1081ac50a2Sgetzze * https://docs.julialang.org/en/v1/manual/documentation/#Syntax-Guide
1181ac50a2Sgetzze * Language parser in Scheme:
1281ac50a2Sgetzze * https://github.com/JuliaLang/julia/blob/master/src/julia-parser.scm
1381ac50a2Sgetzze */
1481ac50a2Sgetzze
1581ac50a2Sgetzze /*
1681ac50a2Sgetzze * INCLUDE FILES
1781ac50a2Sgetzze */
1881ac50a2Sgetzze #include "general.h" /* must always come first */
1981ac50a2Sgetzze
2081ac50a2Sgetzze #include <string.h>
2181ac50a2Sgetzze
2281ac50a2Sgetzze #include "keyword.h"
2381ac50a2Sgetzze #include "parse.h"
2481ac50a2Sgetzze #include "entry.h"
2581ac50a2Sgetzze #include "options.h"
2681ac50a2Sgetzze #include "read.h"
2781ac50a2Sgetzze #include "routines.h"
2881ac50a2Sgetzze #include "vstring.h"
2981ac50a2Sgetzze #include "xtag.h"
3081ac50a2Sgetzze
3181ac50a2Sgetzze /*
3281ac50a2Sgetzze * MACROS
3381ac50a2Sgetzze */
3481ac50a2Sgetzze #define MAX_STRING_LENGTH 256
3581ac50a2Sgetzze
3681ac50a2Sgetzze /*
3781ac50a2Sgetzze * DATA DEFINITIONS
3881ac50a2Sgetzze */
3981ac50a2Sgetzze typedef enum {
4081ac50a2Sgetzze K_CONSTANT,
4181ac50a2Sgetzze K_FUNCTION,
4281ac50a2Sgetzze K_FIELD,
4381ac50a2Sgetzze K_MACRO,
4481ac50a2Sgetzze K_MODULE,
4581ac50a2Sgetzze K_STRUCT,
4681ac50a2Sgetzze K_TYPE,
473cc79e5cSAmaiKinono K_UNKNOWN,
4881ac50a2Sgetzze K_NONE
4981ac50a2Sgetzze } JuliaKind;
5081ac50a2Sgetzze
513cc79e5cSAmaiKinono typedef enum {
523cc79e5cSAmaiKinono JULIA_MODULE_IMPORTED,
533cc79e5cSAmaiKinono JULIA_MODULE_USED,
543cc79e5cSAmaiKinono JULIA_MODULE_NAMESPACE,
553cc79e5cSAmaiKinono } juliaModuleRole;
563cc79e5cSAmaiKinono
573cc79e5cSAmaiKinono typedef enum {
583cc79e5cSAmaiKinono JULIA_UNKNOWN_IMPORTED,
593cc79e5cSAmaiKinono JULIA_UNKNOWN_USED,
603cc79e5cSAmaiKinono } juliaUnknownRole;
613cc79e5cSAmaiKinono
623cc79e5cSAmaiKinono /*
633cc79e5cSAmaiKinono * using X X = (kind:module, role:used)
643cc79e5cSAmaiKinono *
653cc79e5cSAmaiKinono * using X: a, b X = (kind:module, role:namespace)
663cc79e5cSAmaiKinono * a, b = (kind:unknown, role:used, scope:module:X)
673cc79e5cSAmaiKinono *
683cc79e5cSAmaiKinono * import X X = (kind:module, role:imported)
693cc79e5cSAmaiKinono *
703cc79e5cSAmaiKinono * import X.a, Y.b X, Y = (kind:module, role:namespace)
713cc79e5cSAmaiKinono * a, b = (kind:unknown, role:imported, scope:module:X)
723cc79e5cSAmaiKinono *
733cc79e5cSAmaiKinono * import X: a, b Same as the above one
743cc79e5cSAmaiKinono */
753cc79e5cSAmaiKinono static roleDefinition JuliaModuleRoles [] = {
763cc79e5cSAmaiKinono { true, "imported", "loaded by \"import\"" },
773cc79e5cSAmaiKinono { true, "used", "loaded by \"using\"" },
783cc79e5cSAmaiKinono { true, "namespace", "only some symbols in it are imported" },
793cc79e5cSAmaiKinono };
803cc79e5cSAmaiKinono
813cc79e5cSAmaiKinono static roleDefinition JuliaUnknownRoles [] = {
823cc79e5cSAmaiKinono { true, "imported", "loaded by \"import\"" },
833cc79e5cSAmaiKinono { true, "used", "loaded by \"using\""},
843cc79e5cSAmaiKinono };
853cc79e5cSAmaiKinono
8681ac50a2Sgetzze static kindDefinition JuliaKinds [] = {
8781ac50a2Sgetzze { true, 'c', "constant", "Constants" },
8881ac50a2Sgetzze { true, 'f', "function", "Functions" },
8981ac50a2Sgetzze { true, 'g', "field", "Fields" },
9081ac50a2Sgetzze { true, 'm', "macro", "Macros" },
913cc79e5cSAmaiKinono { true, 'n', "module", "Modules",
923cc79e5cSAmaiKinono ATTACH_ROLES(JuliaModuleRoles) },
9381ac50a2Sgetzze { true, 's', "struct", "Structures" },
9481ac50a2Sgetzze { true, 't', "type", "Types" },
953cc79e5cSAmaiKinono { true, 'x', "unknown", "name defined in other modules",
963cc79e5cSAmaiKinono .referenceOnly = true, ATTACH_ROLES(JuliaUnknownRoles) },
9781ac50a2Sgetzze };
9881ac50a2Sgetzze
9981ac50a2Sgetzze typedef enum {
10081ac50a2Sgetzze TOKEN_NONE=0, /* none */
10181ac50a2Sgetzze TOKEN_WHITESPACE,
10281ac50a2Sgetzze TOKEN_PAREN_BLOCK,
10381ac50a2Sgetzze TOKEN_BRACKET_BLOCK,
10481ac50a2Sgetzze TOKEN_CURLY_BLOCK,
10581ac50a2Sgetzze TOKEN_OPEN_BLOCK,
10681ac50a2Sgetzze TOKEN_CLOSE_BLOCK,
10781ac50a2Sgetzze TOKEN_TYPE_ANNOTATION,
10881ac50a2Sgetzze TOKEN_TYPE_WHERE,
10981ac50a2Sgetzze TOKEN_CONST,
11081ac50a2Sgetzze TOKEN_STRING, /* = 10 */
11181ac50a2Sgetzze TOKEN_COMMAND,
11281ac50a2Sgetzze TOKEN_MACROCALL,
11381ac50a2Sgetzze TOKEN_IDENTIFIER,
11481ac50a2Sgetzze TOKEN_MODULE,
11581ac50a2Sgetzze TOKEN_MACRO,
11681ac50a2Sgetzze TOKEN_FUNCTION,
11781ac50a2Sgetzze TOKEN_STRUCT,
11881ac50a2Sgetzze TOKEN_ENUM,
11981ac50a2Sgetzze TOKEN_TYPE,
12081ac50a2Sgetzze TOKEN_IMPORT, /* = 20 */
1213cc79e5cSAmaiKinono TOKEN_USING,
12281ac50a2Sgetzze TOKEN_EXPORT,
12381ac50a2Sgetzze TOKEN_NEWLINE,
12481ac50a2Sgetzze TOKEN_SEMICOLON,
12581ac50a2Sgetzze TOKEN_COMPOSER_KWD, /* KEYWORD only */
12681ac50a2Sgetzze TOKEN_EOF,
12781ac50a2Sgetzze TOKEN_COUNT
12881ac50a2Sgetzze } tokenType;
12981ac50a2Sgetzze
13081ac50a2Sgetzze static const keywordTable JuliaKeywordTable [] = {
13181ac50a2Sgetzze /* TODO: Sort by keys. */
13281ac50a2Sgetzze { "mutable", TOKEN_COMPOSER_KWD },
13381ac50a2Sgetzze { "primitive", TOKEN_COMPOSER_KWD },
13481ac50a2Sgetzze { "abstract", TOKEN_COMPOSER_KWD },
13581ac50a2Sgetzze
13681ac50a2Sgetzze { "if", TOKEN_OPEN_BLOCK },
13781ac50a2Sgetzze { "for", TOKEN_OPEN_BLOCK },
13881ac50a2Sgetzze { "while", TOKEN_OPEN_BLOCK },
13981ac50a2Sgetzze { "try", TOKEN_OPEN_BLOCK },
14081ac50a2Sgetzze { "do", TOKEN_OPEN_BLOCK },
14181ac50a2Sgetzze { "begin", TOKEN_OPEN_BLOCK },
14281ac50a2Sgetzze { "let", TOKEN_OPEN_BLOCK },
14381ac50a2Sgetzze { "quote", TOKEN_OPEN_BLOCK },
14481ac50a2Sgetzze
14581ac50a2Sgetzze { "module", TOKEN_MODULE },
14681ac50a2Sgetzze { "baremodule",TOKEN_MODULE },
14781ac50a2Sgetzze
1483cc79e5cSAmaiKinono { "using", TOKEN_USING },
14981ac50a2Sgetzze { "import", TOKEN_IMPORT },
15081ac50a2Sgetzze
15181ac50a2Sgetzze { "export", TOKEN_EXPORT },
15281ac50a2Sgetzze { "const", TOKEN_CONST },
15381ac50a2Sgetzze { "macro", TOKEN_MACRO },
15481ac50a2Sgetzze { "function", TOKEN_FUNCTION },
15581ac50a2Sgetzze { "struct", TOKEN_STRUCT },
15681ac50a2Sgetzze { "type", TOKEN_TYPE },
15781ac50a2Sgetzze { "where", TOKEN_TYPE_WHERE },
15881ac50a2Sgetzze { "end", TOKEN_CLOSE_BLOCK },
15981ac50a2Sgetzze };
16081ac50a2Sgetzze
16181ac50a2Sgetzze typedef struct {
16281ac50a2Sgetzze /* Characters */
16381ac50a2Sgetzze int prev_c;
16481ac50a2Sgetzze int cur_c;
16581ac50a2Sgetzze int next_c;
16681ac50a2Sgetzze
16781ac50a2Sgetzze /* Tokens */
16881ac50a2Sgetzze bool first_token;
16981ac50a2Sgetzze int cur_token;
17081ac50a2Sgetzze vString* token_str;
17181ac50a2Sgetzze unsigned long line;
17281ac50a2Sgetzze MIOPos pos;
17381ac50a2Sgetzze } lexerState;
17481ac50a2Sgetzze
17581ac50a2Sgetzze /*
17681ac50a2Sgetzze * FUNCTION PROTOTYPES
17781ac50a2Sgetzze */
17881ac50a2Sgetzze
17981ac50a2Sgetzze static void parseExpr (lexerState *lexer, bool delim, int kind, vString *scope);
18081ac50a2Sgetzze
18181ac50a2Sgetzze static void scanParenBlock (lexerState *lexer);
18281ac50a2Sgetzze
18381ac50a2Sgetzze /*
18481ac50a2Sgetzze * FUNCTION DEFINITIONS
18581ac50a2Sgetzze */
18681ac50a2Sgetzze
endswith(const char * what,const char * withwhat)18781ac50a2Sgetzze static int endswith(const char* what, const char* withwhat)
18881ac50a2Sgetzze {
18981ac50a2Sgetzze int l1 = strlen(what);
19081ac50a2Sgetzze int l2 = strlen(withwhat);
19181ac50a2Sgetzze if (l2 > l1)
19281ac50a2Sgetzze {
19381ac50a2Sgetzze return 0;
19481ac50a2Sgetzze }
19581ac50a2Sgetzze
19681ac50a2Sgetzze return strcmp(withwhat, what + (l1 - l2)) == 0;
19781ac50a2Sgetzze }
19881ac50a2Sgetzze
19981ac50a2Sgetzze /* Resets the scope string to the old length */
resetScope(vString * scope,size_t old_len)20081ac50a2Sgetzze static void resetScope (vString *scope, size_t old_len)
20181ac50a2Sgetzze {
20281ac50a2Sgetzze vStringTruncate (scope, old_len);
20381ac50a2Sgetzze }
20481ac50a2Sgetzze
20581ac50a2Sgetzze /* Adds a name to the end of the scope string */
addToScope(vString * scope,vString * name)20681ac50a2Sgetzze static void addToScope (vString *scope, vString *name)
20781ac50a2Sgetzze {
20881ac50a2Sgetzze if (vStringLength(scope) > 0)
20981ac50a2Sgetzze {
21081ac50a2Sgetzze vStringPut(scope, '.');
21181ac50a2Sgetzze }
21281ac50a2Sgetzze vStringCat(scope, name);
21381ac50a2Sgetzze }
21481ac50a2Sgetzze
21581ac50a2Sgetzze /* Reads a character from the file */
advanceChar(lexerState * lexer)21681ac50a2Sgetzze static void advanceChar (lexerState *lexer)
21781ac50a2Sgetzze {
21881ac50a2Sgetzze lexer->prev_c = lexer->cur_c;
21981ac50a2Sgetzze lexer->cur_c = lexer->next_c;
22081ac50a2Sgetzze lexer->next_c = getcFromInputFile();
22181ac50a2Sgetzze }
22281ac50a2Sgetzze
22381ac50a2Sgetzze /* Reads N characters from the file */
advanceNChar(lexerState * lexer,int n)22481ac50a2Sgetzze static void advanceNChar (lexerState *lexer, int n)
22581ac50a2Sgetzze {
22681ac50a2Sgetzze while (n--)
22781ac50a2Sgetzze {
22881ac50a2Sgetzze advanceChar(lexer);
22981ac50a2Sgetzze }
23081ac50a2Sgetzze }
23181ac50a2Sgetzze
23281ac50a2Sgetzze /* Store the current character in lexerState::token_str if there is space
23381ac50a2Sgetzze * (set by MAX_STRING_LENGTH), and then read the next character from the file */
advanceAndStoreChar(lexerState * lexer)23481ac50a2Sgetzze static void advanceAndStoreChar (lexerState *lexer)
23581ac50a2Sgetzze {
23681ac50a2Sgetzze if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH)
23781ac50a2Sgetzze {
23881ac50a2Sgetzze vStringPut(lexer->token_str, (char) lexer->cur_c);
23981ac50a2Sgetzze }
24081ac50a2Sgetzze advanceChar(lexer);
24181ac50a2Sgetzze }
24281ac50a2Sgetzze
isWhitespace(int c,bool newline)24381ac50a2Sgetzze static bool isWhitespace (int c, bool newline)
24481ac50a2Sgetzze {
24581ac50a2Sgetzze if (newline)
24681ac50a2Sgetzze {
24781ac50a2Sgetzze return c == ' ' || c == '\t' || c == '\r' || c == '\n';
24881ac50a2Sgetzze }
24981ac50a2Sgetzze return c == ' ' || c == '\t';
25081ac50a2Sgetzze }
25181ac50a2Sgetzze
isAscii(int c)25281ac50a2Sgetzze static bool isAscii (int c)
25381ac50a2Sgetzze {
25481ac50a2Sgetzze return (c >= 0) && (c < 0x80);
25581ac50a2Sgetzze }
25681ac50a2Sgetzze
isOperator(int c)25781ac50a2Sgetzze static bool isOperator (int c)
25881ac50a2Sgetzze {
25981ac50a2Sgetzze if (c == '%' || c == '^' || c == '&' || c == '|' ||
26081ac50a2Sgetzze c == '*' || c == '-' || c == '+' || c == '~' ||
26181ac50a2Sgetzze c == '<' || c == '>' || c == ',' || c == '/' ||
26281ac50a2Sgetzze c == '?' || c == '=' || c == ':' )
26381ac50a2Sgetzze {
26481ac50a2Sgetzze return true;
26581ac50a2Sgetzze }
26681ac50a2Sgetzze return false;
26781ac50a2Sgetzze }
26881ac50a2Sgetzze
26981ac50a2Sgetzze /* This does not distinguish Unicode letters from operators... */
isIdentifierFirstCharacter(int c)27081ac50a2Sgetzze static bool isIdentifierFirstCharacter (int c)
27181ac50a2Sgetzze {
27281ac50a2Sgetzze return (bool) ((isAscii(c) && (isalpha (c) || c == '_')) || c >= 0xC0);
27381ac50a2Sgetzze }
27481ac50a2Sgetzze
275696902a0SAmaiKinono /* This does not distinguish Unicode letters from operators... */
isIdentifierCharacter(int c)27681ac50a2Sgetzze static bool isIdentifierCharacter (int c)
27781ac50a2Sgetzze {
278696902a0SAmaiKinono return (bool) (isIdentifierFirstCharacter(c) || (isAscii(c) && (isdigit(c) || c == '!')) || c >= 0x80);
27981ac50a2Sgetzze }
28081ac50a2Sgetzze
skipWhitespace(lexerState * lexer,bool newline)28181ac50a2Sgetzze static void skipWhitespace (lexerState *lexer, bool newline)
28281ac50a2Sgetzze {
28381ac50a2Sgetzze while (isWhitespace(lexer->cur_c, newline))
28481ac50a2Sgetzze {
28581ac50a2Sgetzze advanceChar(lexer);
28681ac50a2Sgetzze }
28781ac50a2Sgetzze }
28881ac50a2Sgetzze
28981ac50a2Sgetzze /* The transpose operator is only allowed after an identifier, a number, an expression inside parenthesis or an index */
isTranspose(int c)29081ac50a2Sgetzze static bool isTranspose (int c)
29181ac50a2Sgetzze {
29281ac50a2Sgetzze return (isIdentifierCharacter(c) || c == ')' || c == ']');
29381ac50a2Sgetzze }
29481ac50a2Sgetzze
29581ac50a2Sgetzze
29681ac50a2Sgetzze /*
29781ac50a2Sgetzze * Lexer functions
29881ac50a2Sgetzze * */
29981ac50a2Sgetzze
30081ac50a2Sgetzze /* Check that the current character sequence is a type declaration or inheritance */
isTypeDecl(lexerState * lexer)30181ac50a2Sgetzze static bool isTypeDecl (lexerState *lexer)
30281ac50a2Sgetzze {
30381ac50a2Sgetzze if ((lexer->prev_c != '.' && lexer->cur_c == '<' && lexer->next_c == ':') ||
30481ac50a2Sgetzze (lexer->prev_c != '.' && lexer->cur_c == '>' && lexer->next_c == ':') ||
30581ac50a2Sgetzze (lexer->cur_c == ':' && lexer->next_c == ':') )
30681ac50a2Sgetzze {
30781ac50a2Sgetzze return true;
30881ac50a2Sgetzze }
30981ac50a2Sgetzze return false;
31081ac50a2Sgetzze }
31181ac50a2Sgetzze
31281ac50a2Sgetzze /* Check if the current char is a new line */
isNewLine(lexerState * lexer)31381ac50a2Sgetzze static bool isNewLine (lexerState *lexer)
31481ac50a2Sgetzze {
31581ac50a2Sgetzze return (lexer->cur_c == '\n')? true: false;
31681ac50a2Sgetzze }
31781ac50a2Sgetzze
31881ac50a2Sgetzze /* Check if the current char is a new line.
31981ac50a2Sgetzze * If it is, skip the newline and return true */
skipNewLine(lexerState * lexer)32081ac50a2Sgetzze static bool skipNewLine (lexerState *lexer)
32181ac50a2Sgetzze {
32281ac50a2Sgetzze if (isNewLine(lexer))
32381ac50a2Sgetzze {
32481ac50a2Sgetzze advanceChar(lexer);
32581ac50a2Sgetzze return true;
32681ac50a2Sgetzze }
32781ac50a2Sgetzze return false;
32881ac50a2Sgetzze }
32981ac50a2Sgetzze
33081ac50a2Sgetzze /* Skip a single comment or multiline comment
33181ac50a2Sgetzze * A single line comment starts with #
33281ac50a2Sgetzze * A multi-line comment is encapsulated in #=...=# and they are nesting
33381ac50a2Sgetzze * */
skipComment(lexerState * lexer)33481ac50a2Sgetzze static void skipComment (lexerState *lexer)
33581ac50a2Sgetzze {
33681ac50a2Sgetzze /* # */
33781ac50a2Sgetzze if (lexer->next_c != '=')
33881ac50a2Sgetzze {
33981ac50a2Sgetzze advanceNChar(lexer, 1);
34081ac50a2Sgetzze while (lexer->cur_c != EOF && lexer->cur_c != '\n')
34181ac50a2Sgetzze {
34281ac50a2Sgetzze advanceChar(lexer);
34381ac50a2Sgetzze }
34481ac50a2Sgetzze }
34581ac50a2Sgetzze /* block comment */
34681ac50a2Sgetzze else /* if (lexer->next_c == '=') */
34781ac50a2Sgetzze {
34881ac50a2Sgetzze int level = 1;
34981ac50a2Sgetzze advanceNChar(lexer, 2);
35081ac50a2Sgetzze while (lexer->cur_c != EOF && level > 0)
35181ac50a2Sgetzze {
35281ac50a2Sgetzze if (lexer->cur_c == '=' && lexer->next_c == '#')
35381ac50a2Sgetzze {
35481ac50a2Sgetzze level--;
35581ac50a2Sgetzze advanceNChar(lexer, 2);
35681ac50a2Sgetzze }
35781ac50a2Sgetzze else if (lexer->cur_c == '#' && lexer->next_c == '=')
35881ac50a2Sgetzze {
35981ac50a2Sgetzze level++;
36081ac50a2Sgetzze advanceNChar(lexer, 2);
36181ac50a2Sgetzze }
36281ac50a2Sgetzze else
36381ac50a2Sgetzze {
36481ac50a2Sgetzze advanceChar(lexer);
36581ac50a2Sgetzze }
36681ac50a2Sgetzze }
36781ac50a2Sgetzze }
36881ac50a2Sgetzze }
36981ac50a2Sgetzze
scanIdentifier(lexerState * lexer,bool clear)37081ac50a2Sgetzze static void scanIdentifier (lexerState *lexer, bool clear)
37181ac50a2Sgetzze {
37281ac50a2Sgetzze if (clear)
37381ac50a2Sgetzze {
37481ac50a2Sgetzze vStringClear(lexer->token_str);
37581ac50a2Sgetzze }
37681ac50a2Sgetzze
37781ac50a2Sgetzze do
37881ac50a2Sgetzze {
37981ac50a2Sgetzze advanceAndStoreChar(lexer);
38081ac50a2Sgetzze } while(lexer->cur_c != EOF && isIdentifierCharacter(lexer->cur_c));
38181ac50a2Sgetzze }
38281ac50a2Sgetzze
38381ac50a2Sgetzze /* Scan a quote-like expression.
38481ac50a2Sgetzze * Allow for triple-character variand and interpolation with `$`.
38581ac50a2Sgetzze * These last past the end of the line, so be careful
38681ac50a2Sgetzze * not to store too much of them (see MAX_STRING_LENGTH). */
scanStringOrCommand(lexerState * lexer,int c)38781ac50a2Sgetzze static void scanStringOrCommand (lexerState *lexer, int c)
38881ac50a2Sgetzze {
38981ac50a2Sgetzze bool istriple = false;
39081ac50a2Sgetzze
39181ac50a2Sgetzze /* Pass the first "quote"-character */
39281ac50a2Sgetzze advanceAndStoreChar(lexer);
39381ac50a2Sgetzze
39481ac50a2Sgetzze /* Check for triple "quote"-character */
39581ac50a2Sgetzze if (lexer->cur_c == c && lexer->next_c == c)
39681ac50a2Sgetzze {
39781ac50a2Sgetzze istriple = true;
39881ac50a2Sgetzze advanceAndStoreChar(lexer);
39981ac50a2Sgetzze advanceAndStoreChar(lexer);
40081ac50a2Sgetzze
40181ac50a2Sgetzze /* Cancel up to 2 "quote"-characters after opening the triple */
40281ac50a2Sgetzze if (lexer->cur_c == c)
40381ac50a2Sgetzze {
40481ac50a2Sgetzze advanceAndStoreChar(lexer);
40581ac50a2Sgetzze if (lexer->cur_c == c)
40681ac50a2Sgetzze {
40781ac50a2Sgetzze advanceAndStoreChar(lexer);
40881ac50a2Sgetzze }
40981ac50a2Sgetzze }
41081ac50a2Sgetzze }
41181ac50a2Sgetzze
41281ac50a2Sgetzze while (lexer->cur_c != EOF && lexer->cur_c != c)
41381ac50a2Sgetzze {
41481ac50a2Sgetzze /* Check for interpolation before checking for end of "quote" */
41581ac50a2Sgetzze if (lexer->cur_c == '$' && lexer->next_c == '(')
41681ac50a2Sgetzze {
41781ac50a2Sgetzze advanceAndStoreChar(lexer);
41881ac50a2Sgetzze scanParenBlock(lexer);
41981ac50a2Sgetzze /* continue to avoid advance character again. Correct bug
42081ac50a2Sgetzze * with "quote"-character just after closing parenthesis */
42181ac50a2Sgetzze continue;
42281ac50a2Sgetzze }
42381ac50a2Sgetzze
42481ac50a2Sgetzze if (lexer->cur_c == '\\' &&
42581ac50a2Sgetzze (lexer->next_c == c || lexer->next_c == '\\'))
42681ac50a2Sgetzze {
42781ac50a2Sgetzze advanceAndStoreChar(lexer);
42881ac50a2Sgetzze }
42981ac50a2Sgetzze advanceAndStoreChar(lexer);
43081ac50a2Sgetzze
43181ac50a2Sgetzze /* Cancel up to 2 "quote"-characters if triple string */
43281ac50a2Sgetzze if (istriple && lexer->cur_c == c)
43381ac50a2Sgetzze {
43481ac50a2Sgetzze advanceAndStoreChar(lexer);
43581ac50a2Sgetzze if (lexer->cur_c == c)
43681ac50a2Sgetzze {
43781ac50a2Sgetzze advanceAndStoreChar(lexer);
43881ac50a2Sgetzze }
43981ac50a2Sgetzze }
44081ac50a2Sgetzze }
44181ac50a2Sgetzze /* Pass the last "quote"-character */
44281ac50a2Sgetzze advanceAndStoreChar(lexer);
44381ac50a2Sgetzze }
44481ac50a2Sgetzze
44581ac50a2Sgetzze
44681ac50a2Sgetzze /* Scan commands surrounded by backticks,
44781ac50a2Sgetzze * possibly triple backticks */
scanCommand(lexerState * lexer)44881ac50a2Sgetzze static void scanCommand (lexerState *lexer)
44981ac50a2Sgetzze {
45081ac50a2Sgetzze scanStringOrCommand(lexer, '`');
45181ac50a2Sgetzze }
45281ac50a2Sgetzze
45381ac50a2Sgetzze /* Double-quoted strings,
45481ac50a2Sgetzze * possibly triple doublequotes */
scanString(lexerState * lexer)45581ac50a2Sgetzze static void scanString (lexerState *lexer)
45681ac50a2Sgetzze {
45781ac50a2Sgetzze scanStringOrCommand(lexer, '"');
45881ac50a2Sgetzze }
45981ac50a2Sgetzze
46081ac50a2Sgetzze
46181ac50a2Sgetzze /* This deals with character literals: 'n', '\n', '\uFFFF';
46281ac50a2Sgetzze * and matrix transpose: A'.
46381ac50a2Sgetzze * We'll use this approximate regexp for the literals:
46481ac50a2Sgetzze * \' [^'] \' or \' \\ [^']+ \' or \' \\ \' \'
46581ac50a2Sgetzze * Either way, we'll treat this token as a string, so it gets preserved */
scanCharacterOrTranspose(lexerState * lexer)46681ac50a2Sgetzze static bool scanCharacterOrTranspose (lexerState *lexer)
46781ac50a2Sgetzze {
46881ac50a2Sgetzze if (isTranspose(lexer->prev_c))
46981ac50a2Sgetzze {
47081ac50a2Sgetzze /* deal with untranspose/transpose sequence */
47181ac50a2Sgetzze while (lexer->cur_c != EOF && lexer->cur_c == '\'')
47281ac50a2Sgetzze {
47381ac50a2Sgetzze advanceAndStoreChar(lexer);
47481ac50a2Sgetzze }
47581ac50a2Sgetzze return false;
47681ac50a2Sgetzze }
47781ac50a2Sgetzze
47881ac50a2Sgetzze //vStringClear(lexer->token_str);
47981ac50a2Sgetzze advanceAndStoreChar(lexer);
48081ac50a2Sgetzze
48181ac50a2Sgetzze if (lexer->cur_c == '\\')
48281ac50a2Sgetzze {
48381ac50a2Sgetzze advanceAndStoreChar(lexer);
48481ac50a2Sgetzze /* The \' \\ \' \' (literally '\'') case */
48581ac50a2Sgetzze if (lexer->cur_c == '\'' && lexer->next_c == '\'')
48681ac50a2Sgetzze {
48781ac50a2Sgetzze advanceAndStoreChar(lexer);
48881ac50a2Sgetzze advanceAndStoreChar(lexer);
48981ac50a2Sgetzze }
49081ac50a2Sgetzze /* The \' \\ [^']+ \' case */
49181ac50a2Sgetzze else
49281ac50a2Sgetzze {
49381ac50a2Sgetzze while (lexer->cur_c != EOF && lexer->cur_c != '\'')
49481ac50a2Sgetzze {
49581ac50a2Sgetzze advanceAndStoreChar(lexer);
49681ac50a2Sgetzze }
49781ac50a2Sgetzze }
49881ac50a2Sgetzze }
49981ac50a2Sgetzze /* The \' [^'] \' and \' \' \' cases */
50081ac50a2Sgetzze else if (lexer->next_c == '\'')
50181ac50a2Sgetzze {
50281ac50a2Sgetzze advanceAndStoreChar(lexer);
50381ac50a2Sgetzze advanceAndStoreChar(lexer);
50481ac50a2Sgetzze }
50581ac50a2Sgetzze /* Otherwise it is malformed */
50681ac50a2Sgetzze return true;
50781ac50a2Sgetzze }
50881ac50a2Sgetzze
50981ac50a2Sgetzze /* Parse a block with opening and closing character */
scanBlock(lexerState * lexer,int open,int close,bool convert_newline)51081ac50a2Sgetzze static void scanBlock (lexerState *lexer, int open, int close, bool convert_newline)
51181ac50a2Sgetzze {
51281ac50a2Sgetzze /* Assume the current char is `open` */
51381ac50a2Sgetzze int level = 1;
51481ac50a2Sgetzze
51581ac50a2Sgetzze /* Pass the first opening */
51681ac50a2Sgetzze advanceAndStoreChar(lexer);
51781ac50a2Sgetzze
51881ac50a2Sgetzze while (lexer->cur_c != EOF && level > 0)
51981ac50a2Sgetzze {
52081ac50a2Sgetzze /* Parse everything */
52181ac50a2Sgetzze if (lexer->cur_c == ' ' || lexer->cur_c == '\t')
52281ac50a2Sgetzze {
52381ac50a2Sgetzze skipWhitespace(lexer, false);
52481ac50a2Sgetzze vStringPut(lexer->token_str, ' ');
52581ac50a2Sgetzze }
52681ac50a2Sgetzze if (lexer->cur_c == '#')
52781ac50a2Sgetzze {
52881ac50a2Sgetzze skipComment(lexer);
52981ac50a2Sgetzze }
53081ac50a2Sgetzze else if (lexer->cur_c == '\"')
53181ac50a2Sgetzze {
53281ac50a2Sgetzze scanString(lexer);
53381ac50a2Sgetzze }
53481ac50a2Sgetzze else if (lexer->cur_c == '\'')
53581ac50a2Sgetzze {
53681ac50a2Sgetzze scanCharacterOrTranspose(lexer);
53781ac50a2Sgetzze }
53881ac50a2Sgetzze
53981ac50a2Sgetzze /* Parse opening/closing */
54081ac50a2Sgetzze if (lexer->cur_c == open)
54181ac50a2Sgetzze {
54281ac50a2Sgetzze level++;
54381ac50a2Sgetzze }
54481ac50a2Sgetzze else if (lexer->cur_c == close)
54581ac50a2Sgetzze {
54681ac50a2Sgetzze level--;
54781ac50a2Sgetzze }
54881ac50a2Sgetzze
54981ac50a2Sgetzze if (convert_newline && skipNewLine(lexer))
55081ac50a2Sgetzze {
55181ac50a2Sgetzze vStringPut(lexer->token_str, ' ');
55281ac50a2Sgetzze }
55381ac50a2Sgetzze else
55481ac50a2Sgetzze {
55581ac50a2Sgetzze advanceAndStoreChar(lexer);
55681ac50a2Sgetzze }
55781ac50a2Sgetzze
55881ac50a2Sgetzze }
55981ac50a2Sgetzze /* Lexer position is just after `close` */
56081ac50a2Sgetzze }
56181ac50a2Sgetzze
56281ac50a2Sgetzze
56381ac50a2Sgetzze /* Parse a block inside parenthesis, for example a function argument list */
scanParenBlock(lexerState * lexer)56481ac50a2Sgetzze static void scanParenBlock (lexerState *lexer)
56581ac50a2Sgetzze {
56681ac50a2Sgetzze scanBlock(lexer, '(', ')', true);
56781ac50a2Sgetzze }
56881ac50a2Sgetzze
56981ac50a2Sgetzze /* Indexing block with bracket.
57081ac50a2Sgetzze * Some keywords have a special meaning in this environment:
57181ac50a2Sgetzze * end, begin, for and if */
scanIndexBlock(lexerState * lexer)57281ac50a2Sgetzze static void scanIndexBlock (lexerState *lexer)
57381ac50a2Sgetzze {
57481ac50a2Sgetzze scanBlock(lexer, '[', ']', false);
57581ac50a2Sgetzze
57681ac50a2Sgetzze }
57781ac50a2Sgetzze
57881ac50a2Sgetzze /* Parse a block inside curly brackets, for type parametrization */
scanCurlyBlock(lexerState * lexer)57981ac50a2Sgetzze static void scanCurlyBlock (lexerState *lexer)
58081ac50a2Sgetzze {
58181ac50a2Sgetzze scanBlock(lexer, '{', '}', true);
58281ac50a2Sgetzze }
58381ac50a2Sgetzze
58481ac50a2Sgetzze /* Scan type annotation like
58581ac50a2Sgetzze * `::Type`, `::Type{T}`
58681ac50a2Sgetzze */
scanTypeAnnotation(lexerState * lexer)58781ac50a2Sgetzze static void scanTypeAnnotation (lexerState *lexer)
58881ac50a2Sgetzze {
58981ac50a2Sgetzze /* assume that current char is '<', '>' or ':', followed by ':' */
59081ac50a2Sgetzze advanceAndStoreChar(lexer);
59181ac50a2Sgetzze advanceAndStoreChar(lexer);
59281ac50a2Sgetzze
59381ac50a2Sgetzze skipWhitespace(lexer, true);
59481ac50a2Sgetzze scanIdentifier(lexer, false);
59581ac50a2Sgetzze if (lexer->cur_c == '{')
59681ac50a2Sgetzze {
59781ac50a2Sgetzze scanCurlyBlock(lexer);
59881ac50a2Sgetzze }
59981ac50a2Sgetzze }
60081ac50a2Sgetzze
60181ac50a2Sgetzze /* Scan type annotation like
60281ac50a2Sgetzze * `where Int<:T<:Real`, `where S<:Array{Real}` or `where {S, T}`
60381ac50a2Sgetzze */
scanTypeWhere(lexerState * lexer)60481ac50a2Sgetzze static void scanTypeWhere (lexerState *lexer)
60581ac50a2Sgetzze {
60681ac50a2Sgetzze /* assume that current token is 'where'
60781ac50a2Sgetzze * allow line continuation */
60881ac50a2Sgetzze vStringPut(lexer->token_str, ' ');
60981ac50a2Sgetzze skipWhitespace(lexer, true);
61081ac50a2Sgetzze
61181ac50a2Sgetzze while (lexer->cur_c != EOF)
61281ac50a2Sgetzze {
61381ac50a2Sgetzze
61481ac50a2Sgetzze if (lexer->cur_c == '{')
61581ac50a2Sgetzze {
61681ac50a2Sgetzze scanCurlyBlock(lexer);
61781ac50a2Sgetzze }
61881ac50a2Sgetzze else if (isIdentifierFirstCharacter(lexer->cur_c))
61981ac50a2Sgetzze {
62081ac50a2Sgetzze scanIdentifier(lexer, false);
62181ac50a2Sgetzze if (endswith(vStringValue(lexer->token_str), "where"))
62281ac50a2Sgetzze {
62381ac50a2Sgetzze /* allow line continuation */
62481ac50a2Sgetzze vStringPut(lexer->token_str, ' ');
62581ac50a2Sgetzze skipWhitespace(lexer, true);
62681ac50a2Sgetzze }
62781ac50a2Sgetzze }
62881ac50a2Sgetzze else if (isTypeDecl(lexer))
62981ac50a2Sgetzze {
63081ac50a2Sgetzze scanTypeAnnotation(lexer);
63181ac50a2Sgetzze //skipWhitespace(lexer, false);
63281ac50a2Sgetzze }
63381ac50a2Sgetzze else if (lexer->cur_c == '#')
63481ac50a2Sgetzze {
63581ac50a2Sgetzze skipComment(lexer);
63681ac50a2Sgetzze /* allow line continuation */
63781ac50a2Sgetzze if (endswith(vStringValue(lexer->token_str), "where "))
63881ac50a2Sgetzze {
63981ac50a2Sgetzze skipWhitespace(lexer, true);
64081ac50a2Sgetzze }
64181ac50a2Sgetzze }
64281ac50a2Sgetzze else if (isWhitespace(lexer->cur_c, false))
64381ac50a2Sgetzze {
64481ac50a2Sgetzze while (isWhitespace(lexer->cur_c, false))
64581ac50a2Sgetzze {
64681ac50a2Sgetzze advanceChar(lexer);
64781ac50a2Sgetzze }
64881ac50a2Sgetzze /* Add a space, if it is not a trailing space */
64981ac50a2Sgetzze if (!(isNewLine(lexer)))
65081ac50a2Sgetzze {
65181ac50a2Sgetzze vStringPut(lexer->token_str, ' ');
65281ac50a2Sgetzze }
65381ac50a2Sgetzze }
65481ac50a2Sgetzze else
65581ac50a2Sgetzze {
65681ac50a2Sgetzze break;
65781ac50a2Sgetzze }
65881ac50a2Sgetzze }
65981ac50a2Sgetzze }
66081ac50a2Sgetzze
66181ac50a2Sgetzze
parseIdentifier(lexerState * lexer)66281ac50a2Sgetzze static int parseIdentifier (lexerState *lexer)
66381ac50a2Sgetzze {
66481ac50a2Sgetzze langType julia = getInputLanguage ();
66581ac50a2Sgetzze scanIdentifier(lexer, true);
66681ac50a2Sgetzze
66781ac50a2Sgetzze int k = lookupKeyword (vStringValue(lexer->token_str), julia);
66881ac50a2Sgetzze /* First part of a composed identifier */
66981ac50a2Sgetzze if (k == TOKEN_COMPOSER_KWD)
67081ac50a2Sgetzze {
67181ac50a2Sgetzze skipWhitespace(lexer, false);
67281ac50a2Sgetzze scanIdentifier(lexer, true);
67381ac50a2Sgetzze k = lookupKeyword (vStringValue(lexer->token_str), julia);
67481ac50a2Sgetzze }
67581ac50a2Sgetzze
67681ac50a2Sgetzze if ((k == TOKEN_OPEN_BLOCK)
67781ac50a2Sgetzze || (k == TOKEN_MODULE)
67881ac50a2Sgetzze || (k == TOKEN_IMPORT)
6793cc79e5cSAmaiKinono || (k == TOKEN_USING)
68081ac50a2Sgetzze || (k == TOKEN_EXPORT)
68181ac50a2Sgetzze || (k == TOKEN_CONST)
68281ac50a2Sgetzze || (k == TOKEN_MACRO)
68381ac50a2Sgetzze || (k == TOKEN_FUNCTION)
68481ac50a2Sgetzze || (k == TOKEN_STRUCT)
68581ac50a2Sgetzze || (k == TOKEN_TYPE)
68681ac50a2Sgetzze || (k == TOKEN_TYPE_WHERE)
68781ac50a2Sgetzze || (k == TOKEN_CLOSE_BLOCK))
68881ac50a2Sgetzze {
68981ac50a2Sgetzze if (k == TOKEN_TYPE_WHERE)
69081ac50a2Sgetzze {
69181ac50a2Sgetzze scanTypeWhere(lexer);
69281ac50a2Sgetzze }
69381ac50a2Sgetzze return lexer->cur_token = k;
69481ac50a2Sgetzze }
69581ac50a2Sgetzze return lexer->cur_token = TOKEN_IDENTIFIER;
69681ac50a2Sgetzze }
69781ac50a2Sgetzze
69881ac50a2Sgetzze
69981ac50a2Sgetzze /* Advances the parser one token, optionally skipping whitespace
70081ac50a2Sgetzze * (otherwise it is concatenated and returned as a single whitespace token).
70181ac50a2Sgetzze * Whitespace is needed to properly render function signatures. Unrecognized
70281ac50a2Sgetzze * token starts are stored literally, e.g. token may equal to a character '#'. */
advanceToken(lexerState * lexer,bool skip_whitespace,bool propagate_first)7037ea3b2f4Sgetzze static int advanceToken (lexerState *lexer, bool skip_whitespace, bool propagate_first)
70481ac50a2Sgetzze {
70581ac50a2Sgetzze bool have_whitespace = false;
70681ac50a2Sgetzze bool newline = false;
70781ac50a2Sgetzze lexer->line = getInputLineNumber();
70881ac50a2Sgetzze lexer->pos = getInputFilePosition();
70981ac50a2Sgetzze
71081ac50a2Sgetzze /* the next token is the first token of the line */
7117ea3b2f4Sgetzze if (!propagate_first)
7127ea3b2f4Sgetzze {
71381ac50a2Sgetzze if (lexer->cur_token == TOKEN_NEWLINE ||
71481ac50a2Sgetzze lexer->cur_token == TOKEN_SEMICOLON ||
71570b87d3dSgetzze lexer->cur_token == TOKEN_NONE ||
71681ac50a2Sgetzze (lexer->first_token && lexer->cur_token == TOKEN_MACROCALL))
71781ac50a2Sgetzze {
71881ac50a2Sgetzze lexer->first_token = true;
71981ac50a2Sgetzze }
72081ac50a2Sgetzze else
72181ac50a2Sgetzze {
72281ac50a2Sgetzze lexer->first_token = false;
72381ac50a2Sgetzze }
7247ea3b2f4Sgetzze }
72581ac50a2Sgetzze
72681ac50a2Sgetzze while (lexer->cur_c != EOF)
72781ac50a2Sgetzze {
72881ac50a2Sgetzze /* skip whitespaces but not newlines */
72981ac50a2Sgetzze if (isWhitespace(lexer->cur_c, newline))
73081ac50a2Sgetzze {
73181ac50a2Sgetzze skipWhitespace(lexer, newline);
73281ac50a2Sgetzze have_whitespace = true;
73381ac50a2Sgetzze }
73481ac50a2Sgetzze else if (lexer->cur_c == '#')
73581ac50a2Sgetzze {
73681ac50a2Sgetzze skipComment(lexer);
73781ac50a2Sgetzze have_whitespace = true;
73881ac50a2Sgetzze }
73981ac50a2Sgetzze else
74081ac50a2Sgetzze {
74181ac50a2Sgetzze if (have_whitespace && !skip_whitespace)
74281ac50a2Sgetzze {
74381ac50a2Sgetzze return lexer->cur_token = TOKEN_WHITESPACE;
74481ac50a2Sgetzze }
74581ac50a2Sgetzze break;
74681ac50a2Sgetzze }
74781ac50a2Sgetzze }
74881ac50a2Sgetzze lexer->line = getInputLineNumber();
74981ac50a2Sgetzze lexer->pos = getInputFilePosition();
75081ac50a2Sgetzze while (lexer->cur_c != EOF)
75181ac50a2Sgetzze {
75281ac50a2Sgetzze if (lexer->cur_c == '"')
75381ac50a2Sgetzze {
75481ac50a2Sgetzze vStringClear(lexer->token_str);
75581ac50a2Sgetzze scanString(lexer);
75681ac50a2Sgetzze return lexer->cur_token = TOKEN_STRING;
75781ac50a2Sgetzze }
75881ac50a2Sgetzze else if (lexer->cur_c == '\'')
75981ac50a2Sgetzze {
76081ac50a2Sgetzze vStringClear(lexer->token_str);
76181ac50a2Sgetzze if (scanCharacterOrTranspose(lexer))
76281ac50a2Sgetzze {
76381ac50a2Sgetzze return lexer->cur_token = TOKEN_STRING;
76481ac50a2Sgetzze }
76581ac50a2Sgetzze else
76681ac50a2Sgetzze {
76781ac50a2Sgetzze return lexer->cur_token = '\'';
76881ac50a2Sgetzze }
76981ac50a2Sgetzze }
77081ac50a2Sgetzze else if (lexer->cur_c == '`')
77181ac50a2Sgetzze {
77281ac50a2Sgetzze vStringClear(lexer->token_str);
77381ac50a2Sgetzze scanCommand(lexer);
77481ac50a2Sgetzze return lexer->cur_token = TOKEN_COMMAND;
77581ac50a2Sgetzze }
77681ac50a2Sgetzze else if (isIdentifierFirstCharacter(lexer->cur_c))
77781ac50a2Sgetzze {
77881ac50a2Sgetzze return parseIdentifier(lexer);
77981ac50a2Sgetzze }
78081ac50a2Sgetzze else if (lexer->cur_c == '@')
78181ac50a2Sgetzze {
78281ac50a2Sgetzze vStringClear(lexer->token_str);
78381ac50a2Sgetzze advanceAndStoreChar(lexer);
78481ac50a2Sgetzze do
78581ac50a2Sgetzze {
78681ac50a2Sgetzze advanceAndStoreChar(lexer);
78781ac50a2Sgetzze } while(lexer->cur_c != EOF && isIdentifierCharacter(lexer->cur_c));
78881ac50a2Sgetzze return lexer->cur_token = TOKEN_MACROCALL;
78981ac50a2Sgetzze }
79081ac50a2Sgetzze else if (lexer->cur_c == '(')
79181ac50a2Sgetzze {
79281ac50a2Sgetzze vStringClear(lexer->token_str);
79381ac50a2Sgetzze scanParenBlock(lexer);
79481ac50a2Sgetzze return lexer->cur_token = TOKEN_PAREN_BLOCK;
79581ac50a2Sgetzze }
79681ac50a2Sgetzze else if (lexer->cur_c == '[')
79781ac50a2Sgetzze {
79881ac50a2Sgetzze vStringClear(lexer->token_str);
79981ac50a2Sgetzze scanIndexBlock(lexer);
80081ac50a2Sgetzze return lexer->cur_token = TOKEN_BRACKET_BLOCK;
80181ac50a2Sgetzze }
80281ac50a2Sgetzze else if (lexer->cur_c == '{')
80381ac50a2Sgetzze {
80481ac50a2Sgetzze vStringClear(lexer->token_str);
80581ac50a2Sgetzze scanCurlyBlock(lexer);
80681ac50a2Sgetzze return lexer->cur_token = TOKEN_CURLY_BLOCK;
80781ac50a2Sgetzze }
80881ac50a2Sgetzze else if (isTypeDecl(lexer))
80981ac50a2Sgetzze {
81081ac50a2Sgetzze vStringClear(lexer->token_str);
81181ac50a2Sgetzze scanTypeAnnotation(lexer);
81281ac50a2Sgetzze return lexer->cur_token = TOKEN_TYPE_ANNOTATION;
81381ac50a2Sgetzze }
81481ac50a2Sgetzze else if (skipNewLine(lexer))
81581ac50a2Sgetzze {
81681ac50a2Sgetzze /* allow line continuation */
81781ac50a2Sgetzze if (isOperator(lexer->cur_token))
81881ac50a2Sgetzze {
81981ac50a2Sgetzze return lexer->cur_token;
82081ac50a2Sgetzze }
82181ac50a2Sgetzze return lexer->cur_token = TOKEN_NEWLINE;
82281ac50a2Sgetzze }
82381ac50a2Sgetzze else if (lexer->cur_c == ';')
82481ac50a2Sgetzze {
82581ac50a2Sgetzze advanceChar(lexer);
82681ac50a2Sgetzze return lexer->cur_token = TOKEN_SEMICOLON;
82781ac50a2Sgetzze }
82881ac50a2Sgetzze else
82981ac50a2Sgetzze {
83081ac50a2Sgetzze int c = lexer->cur_c;
83181ac50a2Sgetzze advanceChar(lexer);
83281ac50a2Sgetzze return lexer->cur_token = c;
83381ac50a2Sgetzze }
83481ac50a2Sgetzze }
83581ac50a2Sgetzze return lexer->cur_token = TOKEN_EOF;
83681ac50a2Sgetzze }
83781ac50a2Sgetzze
initLexer(lexerState * lexer)83881ac50a2Sgetzze static void initLexer (lexerState *lexer)
83981ac50a2Sgetzze {
84081ac50a2Sgetzze advanceNChar(lexer, 2);
84181ac50a2Sgetzze lexer->token_str = vStringNew();
84281ac50a2Sgetzze lexer->first_token = true;
84381ac50a2Sgetzze lexer->cur_token = TOKEN_NONE;
84470b87d3dSgetzze lexer->prev_c = '\0';
84581ac50a2Sgetzze
84681ac50a2Sgetzze if (lexer->cur_c == '#' && lexer->next_c == '!')
84781ac50a2Sgetzze {
84881ac50a2Sgetzze skipComment(lexer);
84981ac50a2Sgetzze }
8507ea3b2f4Sgetzze advanceToken(lexer, true, false);
85181ac50a2Sgetzze }
85281ac50a2Sgetzze
deInitLexer(lexerState * lexer)85381ac50a2Sgetzze static void deInitLexer (lexerState *lexer)
85481ac50a2Sgetzze {
85581ac50a2Sgetzze vStringDelete(lexer->token_str);
85681ac50a2Sgetzze lexer->token_str = NULL;
85781ac50a2Sgetzze }
85881ac50a2Sgetzze
85981ac50a2Sgetzze #if 0
86081ac50a2Sgetzze static void debugLexer (lexerState *lexer)
86181ac50a2Sgetzze {
86281ac50a2Sgetzze printf("Current lexer state: line %d, token (%lu), cur char `%c`, token str:\n\t`", lexer->line, lexer->cur_token, lexer->cur_c);
86381ac50a2Sgetzze printf(vStringValue(lexer->token_str));
86481ac50a2Sgetzze printf("`\n");
86581ac50a2Sgetzze }
86681ac50a2Sgetzze #endif
86781ac50a2Sgetzze
addTag(vString * ident,const char * type,const char * arg_list,int kind,unsigned long line,MIOPos pos,vString * scope,int parent_kind)86881ac50a2Sgetzze static void addTag (vString* ident, const char* type, const char* arg_list, int kind, unsigned long line, MIOPos pos, vString *scope, int parent_kind)
86981ac50a2Sgetzze {
87081ac50a2Sgetzze if (kind == K_NONE)
87181ac50a2Sgetzze {
87281ac50a2Sgetzze return;
87381ac50a2Sgetzze }
87481ac50a2Sgetzze tagEntryInfo tag;
87581ac50a2Sgetzze initTagEntry(&tag, vStringValue(ident), kind);
87681ac50a2Sgetzze
87781ac50a2Sgetzze tag.lineNumber = line;
87881ac50a2Sgetzze tag.filePosition = pos;
87981ac50a2Sgetzze tag.sourceFileName = getInputFileName();
88081ac50a2Sgetzze
88181ac50a2Sgetzze tag.extensionFields.signature = arg_list;
88281ac50a2Sgetzze /* tag.extensionFields.varType = type; */ /* Needs a workaround */
88381ac50a2Sgetzze if (parent_kind != K_NONE)
88481ac50a2Sgetzze {
88581ac50a2Sgetzze tag.extensionFields.scopeKindIndex = parent_kind;
88681ac50a2Sgetzze tag.extensionFields.scopeName = vStringValue(scope);
88781ac50a2Sgetzze }
88881ac50a2Sgetzze makeTagEntry(&tag);
88981ac50a2Sgetzze }
89081ac50a2Sgetzze
addReferenceTag(vString * ident,int kind,int role,unsigned long line,MIOPos pos,vString * scope,int parent_kind)8913cc79e5cSAmaiKinono static void addReferenceTag (vString* ident, int kind, int role, unsigned long line, MIOPos pos, vString* scope, int parent_kind)
8923cc79e5cSAmaiKinono {
8933cc79e5cSAmaiKinono if (kind == K_NONE)
8943cc79e5cSAmaiKinono {
8953cc79e5cSAmaiKinono return;
8963cc79e5cSAmaiKinono }
8973cc79e5cSAmaiKinono tagEntryInfo tag;
8983cc79e5cSAmaiKinono initRefTagEntry(&tag, vStringValue(ident), kind, role);
8993cc79e5cSAmaiKinono tag.lineNumber = line;
9003cc79e5cSAmaiKinono tag.filePosition = pos;
9013cc79e5cSAmaiKinono if (parent_kind != K_NONE)
9023cc79e5cSAmaiKinono {
9033cc79e5cSAmaiKinono tag.extensionFields.scopeKindIndex = parent_kind;
9043cc79e5cSAmaiKinono tag.extensionFields.scopeName = vStringValue(scope);
9053cc79e5cSAmaiKinono }
9063cc79e5cSAmaiKinono makeTagEntry(&tag);
9073cc79e5cSAmaiKinono }
9083cc79e5cSAmaiKinono
90981ac50a2Sgetzze /* Skip tokens until one of the goal tokens is hit. Escapes when level = 0 if there are no goal tokens.
91081ac50a2Sgetzze * Keeps track of balanced ()'s, []'s, and {}'s and ignores the goal tokens within those pairings */
skipUntil(lexerState * lexer,int goal_tokens[],int num_goal_tokens)91181ac50a2Sgetzze static void skipUntil (lexerState *lexer, int goal_tokens[], int num_goal_tokens)
91281ac50a2Sgetzze {
91381ac50a2Sgetzze int block_level = 0;
91481ac50a2Sgetzze
91581ac50a2Sgetzze while (lexer->cur_token != TOKEN_EOF)
91681ac50a2Sgetzze {
91781ac50a2Sgetzze /* check if the keyword is reached, only if outside a block */
91881ac50a2Sgetzze if (block_level == 0)
91981ac50a2Sgetzze {
92081ac50a2Sgetzze int ii = 0;
92181ac50a2Sgetzze for(ii = 0; ii < num_goal_tokens; ii++)
92281ac50a2Sgetzze {
92381ac50a2Sgetzze if (lexer->cur_token == goal_tokens[ii])
92481ac50a2Sgetzze {
92581ac50a2Sgetzze break;
92681ac50a2Sgetzze }
92781ac50a2Sgetzze }
92881ac50a2Sgetzze if (ii < num_goal_tokens)
92981ac50a2Sgetzze {
93081ac50a2Sgetzze /* parse the next token */
9317ea3b2f4Sgetzze advanceToken(lexer, true, false);
93281ac50a2Sgetzze break;
93381ac50a2Sgetzze }
93481ac50a2Sgetzze }
93581ac50a2Sgetzze
93681ac50a2Sgetzze /* take into account nested blocks */
93781ac50a2Sgetzze switch (lexer->cur_token)
93881ac50a2Sgetzze {
93981ac50a2Sgetzze case TOKEN_OPEN_BLOCK:
94081ac50a2Sgetzze block_level++;
94181ac50a2Sgetzze break;
94281ac50a2Sgetzze case TOKEN_CLOSE_BLOCK:
94381ac50a2Sgetzze block_level--;
94481ac50a2Sgetzze break;
94581ac50a2Sgetzze default:
94681ac50a2Sgetzze break;
94781ac50a2Sgetzze }
94881ac50a2Sgetzze
94981ac50a2Sgetzze /* Has to be after the token switch to catch the case when we start with the initial level token */
95081ac50a2Sgetzze if (num_goal_tokens == 0 && block_level == 0)
95181ac50a2Sgetzze {
95281ac50a2Sgetzze break;
95381ac50a2Sgetzze }
95481ac50a2Sgetzze
9557ea3b2f4Sgetzze advanceToken(lexer, true, false);
95681ac50a2Sgetzze }
95781ac50a2Sgetzze }
95881ac50a2Sgetzze
95981ac50a2Sgetzze /* Skip until the end of the block */
skipUntilEnd(lexerState * lexer)96081ac50a2Sgetzze static void skipUntilEnd (lexerState *lexer)
96181ac50a2Sgetzze {
96281ac50a2Sgetzze int goal_tokens[] = { TOKEN_CLOSE_BLOCK };
96381ac50a2Sgetzze
96481ac50a2Sgetzze skipUntil(lexer, goal_tokens, 1);
96581ac50a2Sgetzze }
96681ac50a2Sgetzze
96781ac50a2Sgetzze /* Skip a function body after assignment operator '='
96881ac50a2Sgetzze * Beware of continuation lines after operators
96981ac50a2Sgetzze * */
skipBody(lexerState * lexer)97081ac50a2Sgetzze static void skipBody (lexerState *lexer)
97181ac50a2Sgetzze {
97281ac50a2Sgetzze /* assume position just after '=' */
97381ac50a2Sgetzze while (lexer->cur_token != TOKEN_EOF && lexer->cur_token != TOKEN_NEWLINE)
97481ac50a2Sgetzze {
9757ea3b2f4Sgetzze advanceToken(lexer, true, false);
97681ac50a2Sgetzze
97781ac50a2Sgetzze if (lexer->cur_token == TOKEN_OPEN_BLOCK)
97881ac50a2Sgetzze {
97981ac50a2Sgetzze /* pass the keyword */
9807ea3b2f4Sgetzze advanceToken(lexer, true, false);
98181ac50a2Sgetzze skipUntilEnd(lexer);
98281ac50a2Sgetzze /* the next token is already selected */
98381ac50a2Sgetzze }
98481ac50a2Sgetzze }
98581ac50a2Sgetzze }
98681ac50a2Sgetzze
98781ac50a2Sgetzze /* Short function format:
98881ac50a2Sgetzze * <ident> ( [<args>] ) [::<type>] [<where>] = [begin] <body> [end]
98981ac50a2Sgetzze * */
parseShortFunction(lexerState * lexer,vString * scope,int parent_kind)99081ac50a2Sgetzze static void parseShortFunction (lexerState *lexer, vString *scope, int parent_kind)
99181ac50a2Sgetzze {
99281ac50a2Sgetzze /* assume the current char is just after identifier */
99381ac50a2Sgetzze vString *name;
99481ac50a2Sgetzze vString *arg_list;
99581ac50a2Sgetzze unsigned long line;
99681ac50a2Sgetzze MIOPos pos;
99781ac50a2Sgetzze
99881ac50a2Sgetzze /* should be an open parenthesis after identifier
99981ac50a2Sgetzze * with potentially parametric type */
100081ac50a2Sgetzze skipWhitespace(lexer, false);
100181ac50a2Sgetzze if (lexer->cur_c == '{')
100281ac50a2Sgetzze {
100381ac50a2Sgetzze scanCurlyBlock(lexer);
100481ac50a2Sgetzze skipWhitespace(lexer, false);
100581ac50a2Sgetzze }
100681ac50a2Sgetzze
100781ac50a2Sgetzze if (lexer->cur_c != '(')
100881ac50a2Sgetzze {
10097ea3b2f4Sgetzze advanceToken(lexer, true, false);
101081ac50a2Sgetzze return;
101181ac50a2Sgetzze }
101281ac50a2Sgetzze
101381ac50a2Sgetzze name = vStringNewCopy(lexer->token_str);
101481ac50a2Sgetzze line = lexer->line;
101581ac50a2Sgetzze pos = lexer->pos;
101681ac50a2Sgetzze
101781ac50a2Sgetzze /* scan argument list */
10187ea3b2f4Sgetzze advanceToken(lexer, true, false);
101981ac50a2Sgetzze arg_list = vStringNewCopy(lexer->token_str);
102081ac50a2Sgetzze
102181ac50a2Sgetzze /* scan potential type casting */
10227ea3b2f4Sgetzze advanceToken(lexer, true, false);
102381ac50a2Sgetzze if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
102481ac50a2Sgetzze {
102581ac50a2Sgetzze vStringCat(arg_list, lexer->token_str);
10267ea3b2f4Sgetzze advanceToken(lexer, true, false);
102781ac50a2Sgetzze }
102881ac50a2Sgetzze /* scan potential type union with 'where' */
102981ac50a2Sgetzze if (lexer->cur_token == TOKEN_TYPE_WHERE)
103081ac50a2Sgetzze {
103181ac50a2Sgetzze vStringPut(arg_list, ' ');
103281ac50a2Sgetzze vStringCat(arg_list, lexer->token_str);
10337ea3b2f4Sgetzze advanceToken(lexer, true, false);
103481ac50a2Sgetzze }
103581ac50a2Sgetzze
1036*b9feb330Sgetzze /* scan equal sign, ignore `==` and `=>` */
1037*b9feb330Sgetzze if (!(lexer->cur_token == '=' &&
103881ac50a2Sgetzze lexer->cur_c != '=' &&
1039*b9feb330Sgetzze lexer->cur_c != '>'))
104081ac50a2Sgetzze {
104181ac50a2Sgetzze vStringDelete(name);
104281ac50a2Sgetzze vStringDelete(arg_list);
104381ac50a2Sgetzze return;
104481ac50a2Sgetzze }
104581ac50a2Sgetzze
104681ac50a2Sgetzze addTag(name, NULL, vStringValue(arg_list), K_FUNCTION, line, pos, scope, parent_kind);
104781ac50a2Sgetzze
104881ac50a2Sgetzze /* scan until end of function definition */
104981ac50a2Sgetzze skipBody(lexer);
105081ac50a2Sgetzze
105181ac50a2Sgetzze /* Should end on a new line, parse next token */
10527ea3b2f4Sgetzze advanceToken(lexer, true, false);
105381ac50a2Sgetzze lexer->first_token = true;
105481ac50a2Sgetzze
105581ac50a2Sgetzze vStringDelete(name);
105681ac50a2Sgetzze vStringDelete(arg_list);
105781ac50a2Sgetzze }
105881ac50a2Sgetzze
105981ac50a2Sgetzze /* Function format:
106081ac50a2Sgetzze * function <ident> ( [<args>] ) [::<type>] [<where>] [<body>] end
106181ac50a2Sgetzze * */
parseFunction(lexerState * lexer,vString * scope,int parent_kind)106281ac50a2Sgetzze static void parseFunction (lexerState *lexer, vString *scope, int parent_kind)
106381ac50a2Sgetzze {
106481ac50a2Sgetzze vString *name;
106581ac50a2Sgetzze vString *arg_list;
1066696902a0SAmaiKinono vString *local_scope;
1067696902a0SAmaiKinono int local_parent_kind;
106881ac50a2Sgetzze unsigned long line;
106981ac50a2Sgetzze MIOPos pos;
107081ac50a2Sgetzze
10717ea3b2f4Sgetzze advanceToken(lexer, true, false);
107281ac50a2Sgetzze if (lexer->cur_token != TOKEN_IDENTIFIER)
107381ac50a2Sgetzze {
107481ac50a2Sgetzze return;
107589dd26eaSgetzze }
107689dd26eaSgetzze else if (lexer->cur_c == '.')
107789dd26eaSgetzze {
1078696902a0SAmaiKinono local_scope = vStringNewCopy(lexer->token_str);
1079696902a0SAmaiKinono local_parent_kind = K_MODULE;
1080696902a0SAmaiKinono advanceChar(lexer);
10817ea3b2f4Sgetzze advanceToken(lexer, true, false);
108289dd26eaSgetzze }
108389dd26eaSgetzze else
108489dd26eaSgetzze {
1085696902a0SAmaiKinono local_scope = vStringNewCopy(scope);
1086696902a0SAmaiKinono local_parent_kind = parent_kind;
108781ac50a2Sgetzze }
108881ac50a2Sgetzze
108970cbf361Sgetzze /* Scan for parametric type constructor */
109070cbf361Sgetzze skipWhitespace(lexer, false);
109170cbf361Sgetzze if (lexer->cur_c == '{')
109270cbf361Sgetzze {
109370cbf361Sgetzze scanCurlyBlock(lexer);
109470cbf361Sgetzze skipWhitespace(lexer, false);
109570cbf361Sgetzze }
109670cbf361Sgetzze
109781ac50a2Sgetzze name = vStringNewCopy(lexer->token_str);
109881ac50a2Sgetzze arg_list = vStringNew();
109981ac50a2Sgetzze line = lexer->line;
110081ac50a2Sgetzze pos = lexer->pos;
110181ac50a2Sgetzze
11027ea3b2f4Sgetzze advanceToken(lexer, true, false);
110381ac50a2Sgetzze if (lexer->cur_token == TOKEN_PAREN_BLOCK)
110481ac50a2Sgetzze {
110581ac50a2Sgetzze vStringCopy(arg_list, lexer->token_str);
110681ac50a2Sgetzze
110781ac50a2Sgetzze /* scan potential type casting */
11087ea3b2f4Sgetzze advanceToken(lexer, true, false);
110981ac50a2Sgetzze if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
111081ac50a2Sgetzze {
111181ac50a2Sgetzze vStringCat(arg_list, lexer->token_str);
11127ea3b2f4Sgetzze advanceToken(lexer, true, false);
111381ac50a2Sgetzze }
111481ac50a2Sgetzze /* scan potential type union with 'where' */
111581ac50a2Sgetzze if (lexer->cur_token == TOKEN_TYPE_WHERE)
111681ac50a2Sgetzze {
111781ac50a2Sgetzze vStringPut(arg_list, ' ');
111881ac50a2Sgetzze vStringCat(arg_list, lexer->token_str);
11197ea3b2f4Sgetzze advanceToken(lexer, true, false);
112081ac50a2Sgetzze }
112181ac50a2Sgetzze
1122696902a0SAmaiKinono addTag(name, NULL, vStringValue(arg_list), K_FUNCTION, line, pos, local_scope, local_parent_kind);
1123696902a0SAmaiKinono addToScope(scope, name);
1124696902a0SAmaiKinono parseExpr(lexer, true, K_FUNCTION, scope);
112581ac50a2Sgetzze }
112681ac50a2Sgetzze else if (lexer->cur_token == TOKEN_CLOSE_BLOCK)
112781ac50a2Sgetzze {
112881ac50a2Sgetzze /* Function without method */
1129696902a0SAmaiKinono addTag(name, NULL, NULL, K_FUNCTION, line, pos, local_scope, local_parent_kind);
113081ac50a2Sgetzze /* Go to the closing 'end' keyword */
113181ac50a2Sgetzze skipUntilEnd(lexer);
1132696902a0SAmaiKinono }
113381ac50a2Sgetzze
113481ac50a2Sgetzze vStringDelete(name);
113581ac50a2Sgetzze vStringDelete(arg_list);
1136696902a0SAmaiKinono vStringDelete(local_scope);
113781ac50a2Sgetzze }
113881ac50a2Sgetzze
113981ac50a2Sgetzze /* Macro format:
114081ac50a2Sgetzze * "macro" <ident>()
114181ac50a2Sgetzze */
parseMacro(lexerState * lexer,vString * scope,int parent_kind)114281ac50a2Sgetzze static void parseMacro (lexerState *lexer, vString *scope, int parent_kind)
114381ac50a2Sgetzze {
114481ac50a2Sgetzze vString *name;
114581ac50a2Sgetzze unsigned long line;
114681ac50a2Sgetzze MIOPos pos;
114781ac50a2Sgetzze
11487ea3b2f4Sgetzze advanceToken(lexer, true, false);
114981ac50a2Sgetzze if (lexer->cur_token != TOKEN_IDENTIFIER)
115081ac50a2Sgetzze {
115181ac50a2Sgetzze return;
115281ac50a2Sgetzze }
115381ac50a2Sgetzze
115481ac50a2Sgetzze name = vStringNewCopy(lexer->token_str);
115581ac50a2Sgetzze line = lexer->line;
115681ac50a2Sgetzze pos = lexer->pos;
115781ac50a2Sgetzze
11587ea3b2f4Sgetzze advanceToken(lexer, true, false);
115981ac50a2Sgetzze if (lexer->cur_token == TOKEN_PAREN_BLOCK)
116081ac50a2Sgetzze {
116181ac50a2Sgetzze addTag(name, NULL, vStringValue(lexer->token_str), K_MACRO, line, pos, scope, parent_kind);
116281ac50a2Sgetzze }
116381ac50a2Sgetzze
116481ac50a2Sgetzze skipUntilEnd(lexer);
116581ac50a2Sgetzze vStringDelete(name);
116681ac50a2Sgetzze }
116781ac50a2Sgetzze
116881ac50a2Sgetzze /* Const format:
116981ac50a2Sgetzze * "const" <ident>
117081ac50a2Sgetzze */
parseConst(lexerState * lexer,vString * scope,int parent_kind)117181ac50a2Sgetzze static void parseConst (lexerState *lexer, vString *scope, int parent_kind)
117281ac50a2Sgetzze {
117381ac50a2Sgetzze vString *name;
117481ac50a2Sgetzze
11757ea3b2f4Sgetzze advanceToken(lexer, true, false);
117681ac50a2Sgetzze if (lexer->cur_token != TOKEN_IDENTIFIER)
117781ac50a2Sgetzze {
117881ac50a2Sgetzze return;
117981ac50a2Sgetzze }
118081ac50a2Sgetzze
118181ac50a2Sgetzze name = vStringNewCopy(lexer->token_str);
118281ac50a2Sgetzze
11837ea3b2f4Sgetzze advanceToken(lexer, true, false);
118481ac50a2Sgetzze if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
118581ac50a2Sgetzze {
118681ac50a2Sgetzze addTag(name, "const", vStringValue(lexer->token_str), K_CONSTANT, lexer->line, lexer->pos, scope, parent_kind);
11877ea3b2f4Sgetzze advanceToken(lexer, true, false);
118881ac50a2Sgetzze }
118981ac50a2Sgetzze else
119081ac50a2Sgetzze {
119181ac50a2Sgetzze addTag(name, "const", NULL, K_CONSTANT, lexer->line, lexer->pos, scope, parent_kind);
119281ac50a2Sgetzze }
119381ac50a2Sgetzze
119481ac50a2Sgetzze vStringDelete(name);
119581ac50a2Sgetzze }
119681ac50a2Sgetzze
119781ac50a2Sgetzze /* Type format:
119881ac50a2Sgetzze * [ "abstract" | "primitive" ] "type" <ident>
119981ac50a2Sgetzze */
parseType(lexerState * lexer,vString * scope,int parent_kind)120081ac50a2Sgetzze static void parseType (lexerState *lexer, vString *scope, int parent_kind)
120181ac50a2Sgetzze {
12027ea3b2f4Sgetzze advanceToken(lexer, true, false);
120381ac50a2Sgetzze if (lexer->cur_token != TOKEN_IDENTIFIER)
120481ac50a2Sgetzze {
120581ac50a2Sgetzze return;
120681ac50a2Sgetzze }
120781ac50a2Sgetzze
120881ac50a2Sgetzze addTag(lexer->token_str, NULL, NULL, K_TYPE, lexer->line, lexer->pos, scope, parent_kind);
120981ac50a2Sgetzze
121081ac50a2Sgetzze skipUntilEnd(lexer);
121181ac50a2Sgetzze }
121281ac50a2Sgetzze
121381ac50a2Sgetzze /* Module format:
121481ac50a2Sgetzze * [ "baremodule" | "module" ] <ident>
121581ac50a2Sgetzze */
parseModule(lexerState * lexer,vString * scope,int parent_kind)121681ac50a2Sgetzze static void parseModule (lexerState *lexer, vString *scope, int parent_kind)
121781ac50a2Sgetzze {
12187ea3b2f4Sgetzze advanceToken(lexer, true, false);
121981ac50a2Sgetzze if (lexer->cur_token != TOKEN_IDENTIFIER)
122081ac50a2Sgetzze {
122181ac50a2Sgetzze return;
122281ac50a2Sgetzze }
122381ac50a2Sgetzze
122481ac50a2Sgetzze addTag(lexer->token_str, NULL, NULL, K_MODULE, lexer->line, lexer->pos, scope, parent_kind);
1225696902a0SAmaiKinono addToScope(scope, lexer->token_str);
12267ea3b2f4Sgetzze advanceToken(lexer, true, false);
1227696902a0SAmaiKinono parseExpr(lexer, true, K_MODULE, scope);
122881ac50a2Sgetzze }
122981ac50a2Sgetzze
12303cc79e5cSAmaiKinono /*
12313cc79e5cSAmaiKinono * Parse comma separated entity in import/using expressions. An entity could be
12323cc79e5cSAmaiKinono * in the form of "Module" or "Module.symbol". The lexer should be at the end
12333cc79e5cSAmaiKinono * of "Module", and this function will take it to the end of the entity
12343cc79e5cSAmaiKinono * (whitespaces also skipped).
123581ac50a2Sgetzze */
parseImportEntity(lexerState * lexer,vString * scope,int token_type,int parent_kind)12363cc79e5cSAmaiKinono static void parseImportEntity (lexerState *lexer, vString *scope, int token_type, int parent_kind)
123781ac50a2Sgetzze {
12383cc79e5cSAmaiKinono if (lexer->cur_c == '.')
12393cc79e5cSAmaiKinono {
12403cc79e5cSAmaiKinono if (token_type == TOKEN_IMPORT)
12413cc79e5cSAmaiKinono {
12423cc79e5cSAmaiKinono vString *module_name = vStringNewCopy(lexer->token_str);
12433cc79e5cSAmaiKinono addReferenceTag(module_name, K_MODULE, JULIA_MODULE_NAMESPACE, lexer->line, lexer->pos, scope, parent_kind);
12443cc79e5cSAmaiKinono advanceChar(lexer);
12457ea3b2f4Sgetzze advanceToken(lexer, true, false);
12463cc79e5cSAmaiKinono addReferenceTag(lexer->token_str, K_UNKNOWN, JULIA_UNKNOWN_IMPORTED, lexer->line, lexer->pos, module_name, K_MODULE);
12473cc79e5cSAmaiKinono vStringDelete(module_name);
12483cc79e5cSAmaiKinono }
12493cc79e5cSAmaiKinono else /* if (token_type == TOKEN_USING) */
125081ac50a2Sgetzze {
12513cc79e5cSAmaiKinono /* using Module.symbol is invalid, so we advance the lexer but don't tag it. */
12523cc79e5cSAmaiKinono advanceChar(lexer);
12537ea3b2f4Sgetzze advanceToken(lexer, true, false);
125481ac50a2Sgetzze }
12553cc79e5cSAmaiKinono }
12563cc79e5cSAmaiKinono else
12573cc79e5cSAmaiKinono {
12583cc79e5cSAmaiKinono if (token_type == TOKEN_IMPORT)
12593cc79e5cSAmaiKinono {
12603cc79e5cSAmaiKinono addReferenceTag(lexer->token_str, K_MODULE, JULIA_MODULE_IMPORTED, lexer->line, lexer->pos, scope, parent_kind);
12613cc79e5cSAmaiKinono }
12623cc79e5cSAmaiKinono else /* if (token_type == TOKEN_USING) */
12633cc79e5cSAmaiKinono {
12643cc79e5cSAmaiKinono addReferenceTag(lexer->token_str, K_MODULE, JULIA_MODULE_USED, lexer->line, lexer->pos, scope, parent_kind);
12653cc79e5cSAmaiKinono }
12663cc79e5cSAmaiKinono }
12673cc79e5cSAmaiKinono }
126881ac50a2Sgetzze
12693cc79e5cSAmaiKinono /* Parse import/using expressions with a colon, like: */
12703cc79e5cSAmaiKinono /* import Module: symbol1, symbol2 */
12713cc79e5cSAmaiKinono /* using Module: symbol1, symbol2 */
12723cc79e5cSAmaiKinono /* The lexer should be at the end of "Module", and this function will take it
12733cc79e5cSAmaiKinono * to the end of the token after this expression (whitespaces also skipped). */
parseColonImportExpr(lexerState * lexer,vString * scope,int token_type,int parent_kind)12743cc79e5cSAmaiKinono static void parseColonImportExpr (lexerState *lexer, vString *scope, int token_type, int parent_kind)
12753cc79e5cSAmaiKinono {
12763cc79e5cSAmaiKinono int symbol_role;
12773cc79e5cSAmaiKinono if (token_type == TOKEN_IMPORT)
12783cc79e5cSAmaiKinono {
12793cc79e5cSAmaiKinono symbol_role = JULIA_UNKNOWN_IMPORTED;
12803cc79e5cSAmaiKinono }
12813cc79e5cSAmaiKinono else /* if (token_type == TOKEN_USING) */
12823cc79e5cSAmaiKinono {
12833cc79e5cSAmaiKinono symbol_role = JULIA_UNKNOWN_USED;
12843cc79e5cSAmaiKinono }
12853cc79e5cSAmaiKinono vString *name = vStringNewCopy(lexer->token_str);
12863cc79e5cSAmaiKinono addReferenceTag(name, K_MODULE, JULIA_MODULE_NAMESPACE, lexer->line, lexer->pos, scope, parent_kind);
12873cc79e5cSAmaiKinono advanceChar(lexer);
12887ea3b2f4Sgetzze advanceToken(lexer, true, false);
12893cc79e5cSAmaiKinono if (lexer->cur_token == TOKEN_NEWLINE)
12903cc79e5cSAmaiKinono {
12917ea3b2f4Sgetzze advanceToken(lexer, true, false);
12923cc79e5cSAmaiKinono }
129381ac50a2Sgetzze while (lexer->cur_token == TOKEN_IDENTIFIER || lexer->cur_token == TOKEN_MACROCALL)
129481ac50a2Sgetzze {
12953cc79e5cSAmaiKinono addReferenceTag(lexer->token_str, K_UNKNOWN, symbol_role, lexer->line, lexer->pos, name, K_MODULE);
129681ac50a2Sgetzze if (lexer->cur_c == ',')
129781ac50a2Sgetzze {
12983cc79e5cSAmaiKinono advanceChar(lexer);
12997ea3b2f4Sgetzze advanceToken(lexer, true, false);
130081ac50a2Sgetzze if (lexer->cur_token == TOKEN_NEWLINE)
130181ac50a2Sgetzze {
13027ea3b2f4Sgetzze advanceToken(lexer, true, false);
130381ac50a2Sgetzze }
130481ac50a2Sgetzze }
130581ac50a2Sgetzze else
130681ac50a2Sgetzze {
13077ea3b2f4Sgetzze advanceToken(lexer, true, false);
130881ac50a2Sgetzze }
130981ac50a2Sgetzze }
131081ac50a2Sgetzze vStringDelete(name);
131181ac50a2Sgetzze }
131281ac50a2Sgetzze
13133cc79e5cSAmaiKinono /* Import format:
13143cc79e5cSAmaiKinono * [ "import" | "using" ] <ident> [: <name>]
13153cc79e5cSAmaiKinono */
parseImport(lexerState * lexer,vString * scope,int token_type,int parent_kind)13163cc79e5cSAmaiKinono static void parseImport (lexerState *lexer, vString *scope, int token_type, int parent_kind)
13173cc79e5cSAmaiKinono {
13183cc79e5cSAmaiKinono /* capture the imported name */
13197ea3b2f4Sgetzze advanceToken(lexer, true, false);
13203cc79e5cSAmaiKinono /* import Mod1: symbol1, symbol2 */
13213cc79e5cSAmaiKinono /* using Mod1: symbol1, symbol2 */
13223cc79e5cSAmaiKinono if (lexer->cur_c == ':')
13233cc79e5cSAmaiKinono {
13243cc79e5cSAmaiKinono parseColonImportExpr(lexer, scope, token_type, parent_kind);
13253cc79e5cSAmaiKinono }
13263cc79e5cSAmaiKinono /* All other situations, like import/using Mod1, Mod2.symbol1, Mod3... */
13273cc79e5cSAmaiKinono else
13283cc79e5cSAmaiKinono {
13293cc79e5cSAmaiKinono while (lexer->cur_token == TOKEN_IDENTIFIER || lexer->cur_token == TOKEN_MACROCALL)
13303cc79e5cSAmaiKinono {
13313cc79e5cSAmaiKinono parseImportEntity(lexer, scope, token_type, parent_kind);
13323cc79e5cSAmaiKinono if (lexer->cur_c == ',')
13333cc79e5cSAmaiKinono {
13343cc79e5cSAmaiKinono advanceChar(lexer);
13357ea3b2f4Sgetzze advanceToken(lexer, true, false);
13363cc79e5cSAmaiKinono if (lexer->cur_token == TOKEN_NEWLINE)
13373cc79e5cSAmaiKinono {
13387ea3b2f4Sgetzze advanceToken(lexer, true, false);
13393cc79e5cSAmaiKinono }
13403cc79e5cSAmaiKinono }
13413cc79e5cSAmaiKinono else
13423cc79e5cSAmaiKinono {
13437ea3b2f4Sgetzze advanceToken(lexer, true, false);
13443cc79e5cSAmaiKinono }
13453cc79e5cSAmaiKinono }
13463cc79e5cSAmaiKinono }
13473cc79e5cSAmaiKinono }
13483cc79e5cSAmaiKinono
134981ac50a2Sgetzze /* Structs format:
135081ac50a2Sgetzze * "struct" <ident>[{<param>}] [<:<type>]; <fields> <inner constructor> end
135181ac50a2Sgetzze * */
parseStruct(lexerState * lexer,vString * scope,int parent_kind)135281ac50a2Sgetzze static void parseStruct (lexerState *lexer, vString *scope, int parent_kind)
135381ac50a2Sgetzze {
135481ac50a2Sgetzze vString *name;
135581ac50a2Sgetzze vString *field;
135670cbf361Sgetzze size_t old_scope_len;
135781ac50a2Sgetzze unsigned long line;
135881ac50a2Sgetzze MIOPos pos;
135981ac50a2Sgetzze
13607ea3b2f4Sgetzze advanceToken(lexer, true, false);
136181ac50a2Sgetzze if (lexer->cur_token != TOKEN_IDENTIFIER)
136281ac50a2Sgetzze {
136381ac50a2Sgetzze return;
136481ac50a2Sgetzze }
136581ac50a2Sgetzze
136681ac50a2Sgetzze name = vStringNewCopy(lexer->token_str);
136781ac50a2Sgetzze field = vStringNew();
136881ac50a2Sgetzze line = lexer->line;
136981ac50a2Sgetzze pos = lexer->pos;
137081ac50a2Sgetzze
137181ac50a2Sgetzze /* scan parametrization */
13727ea3b2f4Sgetzze advanceToken(lexer, true, false);
137381ac50a2Sgetzze if (lexer->cur_token == TOKEN_CURLY_BLOCK)
137481ac50a2Sgetzze {
137581ac50a2Sgetzze addTag(name, NULL, vStringValue(lexer->token_str), K_STRUCT, line, pos, scope, parent_kind);
13767ea3b2f4Sgetzze advanceToken(lexer, true, false);
137781ac50a2Sgetzze }
137881ac50a2Sgetzze else
137981ac50a2Sgetzze {
138081ac50a2Sgetzze addTag(name, NULL, NULL, K_STRUCT, line, pos, scope, parent_kind);
138181ac50a2Sgetzze }
138281ac50a2Sgetzze addToScope(scope, name);
138381ac50a2Sgetzze
138481ac50a2Sgetzze /* skip inheritance */
138581ac50a2Sgetzze if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
138681ac50a2Sgetzze {
13877ea3b2f4Sgetzze advanceToken(lexer, true, false);
138881ac50a2Sgetzze }
138981ac50a2Sgetzze
139070cbf361Sgetzze /* keep the struct scope in memory to reset it after parsing constructors */
139170cbf361Sgetzze old_scope_len = vStringLength(scope);
139281ac50a2Sgetzze /* Parse fields and inner constructors */
139381ac50a2Sgetzze while (lexer->cur_token != TOKEN_EOF && lexer->cur_token != TOKEN_CLOSE_BLOCK)
139481ac50a2Sgetzze {
1395db610ad5Sgetzze if (lexer->cur_token == TOKEN_IDENTIFIER && lexer->first_token)
139681ac50a2Sgetzze {
1397db610ad5Sgetzze if (strcmp(vStringValue(lexer->token_str), vStringValue(name)) == 0)
139881ac50a2Sgetzze {
139981ac50a2Sgetzze /* inner constructor */
140081ac50a2Sgetzze parseShortFunction(lexer, scope, K_STRUCT);
140181ac50a2Sgetzze continue;
140281ac50a2Sgetzze }
140381ac50a2Sgetzze
140481ac50a2Sgetzze vStringCopy(field, lexer->token_str);
140581ac50a2Sgetzze
140681ac50a2Sgetzze /* parse type annotation */
14077ea3b2f4Sgetzze advanceToken(lexer, true, false);
140881ac50a2Sgetzze if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
140981ac50a2Sgetzze {
141081ac50a2Sgetzze addTag(field, NULL, vStringValue(lexer->token_str), K_FIELD, lexer->line, lexer->pos, scope, K_STRUCT);
14117ea3b2f4Sgetzze advanceToken(lexer, true, false);
141281ac50a2Sgetzze }
141381ac50a2Sgetzze else
141481ac50a2Sgetzze {
141581ac50a2Sgetzze addTag(field, NULL, NULL, K_FIELD, lexer->line, lexer->pos, scope, K_STRUCT);
141681ac50a2Sgetzze }
141781ac50a2Sgetzze }
141881ac50a2Sgetzze else if (lexer->cur_token == TOKEN_FUNCTION)
141981ac50a2Sgetzze {
142081ac50a2Sgetzze /* inner constructor */
142181ac50a2Sgetzze parseFunction(lexer, scope, K_STRUCT);
142281ac50a2Sgetzze }
142381ac50a2Sgetzze else
142481ac50a2Sgetzze {
142581ac50a2Sgetzze /* Get next token */
14267ea3b2f4Sgetzze advanceToken(lexer, true, false);
142781ac50a2Sgetzze }
142870cbf361Sgetzze resetScope(scope, old_scope_len);
142981ac50a2Sgetzze }
143081ac50a2Sgetzze
143181ac50a2Sgetzze vStringDelete(name);
143281ac50a2Sgetzze vStringDelete(field);
143381ac50a2Sgetzze }
143481ac50a2Sgetzze
143581ac50a2Sgetzze
parseExpr(lexerState * lexer,bool delim,int kind,vString * scope)143681ac50a2Sgetzze static void parseExpr (lexerState *lexer, bool delim, int kind, vString *scope)
143781ac50a2Sgetzze {
143881ac50a2Sgetzze int level = 1;
143981ac50a2Sgetzze size_t old_scope_len;
1440696902a0SAmaiKinono vString *local_scope = NULL;
144181ac50a2Sgetzze
144281ac50a2Sgetzze while (lexer->cur_token != TOKEN_EOF)
144381ac50a2Sgetzze {
144481ac50a2Sgetzze old_scope_len = vStringLength(scope);
144581ac50a2Sgetzze /* Advance token and update if this is a new line */
144681ac50a2Sgetzze while (lexer->cur_token == TOKEN_NEWLINE ||
144770b87d3dSgetzze lexer->cur_token == TOKEN_SEMICOLON ||
144870b87d3dSgetzze lexer->cur_token == TOKEN_NONE )
144981ac50a2Sgetzze {
14507ea3b2f4Sgetzze advanceToken(lexer, true, false);
145181ac50a2Sgetzze }
145281ac50a2Sgetzze
145381ac50a2Sgetzze /* Make sure every case advances the token
145481ac50a2Sgetzze * otherwise we can be stuck in infinite loop */
145581ac50a2Sgetzze switch (lexer->cur_token)
145681ac50a2Sgetzze {
145781ac50a2Sgetzze case TOKEN_CONST:
145881ac50a2Sgetzze parseConst(lexer, scope, kind);
145981ac50a2Sgetzze break;
146081ac50a2Sgetzze case TOKEN_FUNCTION:
146181ac50a2Sgetzze parseFunction(lexer, scope, kind);
146281ac50a2Sgetzze break;
146381ac50a2Sgetzze case TOKEN_MACRO:
146481ac50a2Sgetzze parseMacro(lexer, scope, kind);
146581ac50a2Sgetzze break;
146681ac50a2Sgetzze case TOKEN_MODULE:
146781ac50a2Sgetzze parseModule(lexer, scope, kind);
146881ac50a2Sgetzze break;
146981ac50a2Sgetzze case TOKEN_STRUCT:
147081ac50a2Sgetzze parseStruct(lexer, scope, kind);
147181ac50a2Sgetzze break;
147281ac50a2Sgetzze case TOKEN_TYPE:
147381ac50a2Sgetzze parseType(lexer, scope, kind);
147481ac50a2Sgetzze break;
147581ac50a2Sgetzze case TOKEN_IMPORT:
14763cc79e5cSAmaiKinono parseImport(lexer, scope, TOKEN_IMPORT, kind);
147781ac50a2Sgetzze break;
14783cc79e5cSAmaiKinono case TOKEN_USING:
14793cc79e5cSAmaiKinono parseImport(lexer, scope, TOKEN_USING, kind);
148081ac50a2Sgetzze case TOKEN_IDENTIFIER:
1481696902a0SAmaiKinono if (lexer->first_token && lexer->cur_c == '.')
1482696902a0SAmaiKinono {
1483696902a0SAmaiKinono if (local_scope == NULL)
1484696902a0SAmaiKinono {
1485696902a0SAmaiKinono local_scope = vStringNew();
1486696902a0SAmaiKinono }
1487696902a0SAmaiKinono vStringCopy(local_scope, lexer->token_str);
1488696902a0SAmaiKinono advanceChar(lexer);
14897ea3b2f4Sgetzze // next token, but keep the first_token value
14907ea3b2f4Sgetzze advanceToken(lexer, true, true);
1491696902a0SAmaiKinono skipWhitespace(lexer, false);
1492696902a0SAmaiKinono if (lexer->cur_c == '(')
1493696902a0SAmaiKinono {
1494696902a0SAmaiKinono parseShortFunction(lexer, local_scope, K_MODULE);
1495696902a0SAmaiKinono }
1496696902a0SAmaiKinono }
1497696902a0SAmaiKinono else
1498696902a0SAmaiKinono {
149981ac50a2Sgetzze skipWhitespace(lexer, false);
150070cbf361Sgetzze if (lexer->first_token && (lexer->cur_c == '(' || lexer->cur_c == '{'))
150181ac50a2Sgetzze {
150281ac50a2Sgetzze parseShortFunction(lexer, scope, kind);
150381ac50a2Sgetzze }
150481ac50a2Sgetzze else
150581ac50a2Sgetzze {
15067ea3b2f4Sgetzze advanceToken(lexer, true, false);
150781ac50a2Sgetzze }
1508696902a0SAmaiKinono }
150981ac50a2Sgetzze break;
151081ac50a2Sgetzze case TOKEN_OPEN_BLOCK:
151181ac50a2Sgetzze level++;
15127ea3b2f4Sgetzze advanceToken(lexer, true, false);
151381ac50a2Sgetzze break;
151481ac50a2Sgetzze case TOKEN_CLOSE_BLOCK:
151581ac50a2Sgetzze level--;
15167ea3b2f4Sgetzze advanceToken(lexer, true, false);
151781ac50a2Sgetzze break;
151881ac50a2Sgetzze default:
15197ea3b2f4Sgetzze advanceToken(lexer, true, false);
152081ac50a2Sgetzze break;
152181ac50a2Sgetzze }
152281ac50a2Sgetzze resetScope(scope, old_scope_len);
152381ac50a2Sgetzze if (delim && level <= 0)
152481ac50a2Sgetzze {
152581ac50a2Sgetzze break;
152681ac50a2Sgetzze }
152781ac50a2Sgetzze }
1528696902a0SAmaiKinono vStringDelete(local_scope);
152981ac50a2Sgetzze }
153081ac50a2Sgetzze
findJuliaTags(void)153181ac50a2Sgetzze static void findJuliaTags (void)
153281ac50a2Sgetzze {
153381ac50a2Sgetzze lexerState lexer;
153481ac50a2Sgetzze vString* scope = vStringNew();
153581ac50a2Sgetzze initLexer(&lexer);
153681ac50a2Sgetzze
153781ac50a2Sgetzze parseExpr(&lexer, false, K_NONE, scope);
153881ac50a2Sgetzze vStringDelete(scope);
153981ac50a2Sgetzze
154081ac50a2Sgetzze deInitLexer(&lexer);
154181ac50a2Sgetzze }
154281ac50a2Sgetzze
JuliaParser(void)154381ac50a2Sgetzze extern parserDefinition* JuliaParser (void)
154481ac50a2Sgetzze {
154581ac50a2Sgetzze static const char *const extensions [] = { "jl", NULL };
154681ac50a2Sgetzze parserDefinition* def = parserNew ("Julia");
154781ac50a2Sgetzze def->kindTable = JuliaKinds;
154881ac50a2Sgetzze def->kindCount = ARRAY_SIZE (JuliaKinds);
154981ac50a2Sgetzze def->extensions = extensions;
155081ac50a2Sgetzze def->parser = findJuliaTags;
155181ac50a2Sgetzze def->keywordTable = JuliaKeywordTable;
155281ac50a2Sgetzze def->keywordCount = ARRAY_SIZE (JuliaKeywordTable);
155381ac50a2Sgetzze return def;
155481ac50a2Sgetzze }
1555