13ae02089SMasatake YAMATO /*
23ae02089SMasatake YAMATO *
33ae02089SMasatake YAMATO * This source code is released for free distribution under the terms of the
40ce38835Sviccuad * GNU General Public License version 2 or (at your option) any later version.
53ae02089SMasatake YAMATO *
63ae02089SMasatake YAMATO * This module contains functions for generating tags for Rust files.
73ae02089SMasatake YAMATO */
83ae02089SMasatake YAMATO
93ae02089SMasatake YAMATO /*
103ae02089SMasatake YAMATO * INCLUDE FILES
113ae02089SMasatake YAMATO */
123ae02089SMasatake YAMATO #include "general.h" /* must always come first */
133ae02089SMasatake YAMATO
143ae02089SMasatake YAMATO #include <string.h>
153ae02089SMasatake YAMATO
163ae02089SMasatake YAMATO #include "keyword.h"
173ae02089SMasatake YAMATO #include "parse.h"
183ae02089SMasatake YAMATO #include "entry.h"
193ae02089SMasatake YAMATO #include "options.h"
203ae02089SMasatake YAMATO #include "read.h"
213db72c21SMasatake YAMATO #include "routines.h"
223ae02089SMasatake YAMATO #include "vstring.h"
233ae02089SMasatake YAMATO
243ae02089SMasatake YAMATO /*
253ae02089SMasatake YAMATO * MACROS
263ae02089SMasatake YAMATO */
273ae02089SMasatake YAMATO #define MAX_STRING_LENGTH 256
283ae02089SMasatake YAMATO
293ae02089SMasatake YAMATO /*
303ae02089SMasatake YAMATO * DATA DECLARATIONS
313ae02089SMasatake YAMATO */
323ae02089SMasatake YAMATO
333ae02089SMasatake YAMATO typedef enum {
343ae02089SMasatake YAMATO K_MOD,
353ae02089SMasatake YAMATO K_STRUCT,
363ae02089SMasatake YAMATO K_TRAIT,
373ae02089SMasatake YAMATO K_IMPL,
383ae02089SMasatake YAMATO K_FN,
393ae02089SMasatake YAMATO K_ENUM,
403ae02089SMasatake YAMATO K_TYPE,
413ae02089SMasatake YAMATO K_STATIC,
423ae02089SMasatake YAMATO K_MACRO,
433ae02089SMasatake YAMATO K_FIELD,
443ae02089SMasatake YAMATO K_VARIANT,
453ae02089SMasatake YAMATO K_METHOD,
46*648cbe27SJiří Techet K_CONST,
473ae02089SMasatake YAMATO K_NONE
483ae02089SMasatake YAMATO } RustKind;
493ae02089SMasatake YAMATO
50e112e8abSMasatake YAMATO static kindDefinition rustKinds[] = {
51ce990805SThomas Braun {true, 'n', "module", "module"},
52ce990805SThomas Braun {true, 's', "struct", "structural type"},
53ce990805SThomas Braun {true, 'i', "interface", "trait interface"},
54ce990805SThomas Braun {true, 'c', "implementation", "implementation"},
55ce990805SThomas Braun {true, 'f', "function", "Function"},
56ce990805SThomas Braun {true, 'g', "enum", "Enum"},
57ce990805SThomas Braun {true, 't', "typedef", "Type Alias"},
58ce990805SThomas Braun {true, 'v', "variable", "Global variable"},
59ce990805SThomas Braun {true, 'M', "macro", "Macro Definition"},
60ce990805SThomas Braun {true, 'm', "field", "A struct field"},
61ce990805SThomas Braun {true, 'e', "enumerator", "An enum variant"},
628050d8baSMasatake YAMATO {true, 'P', "method", "A method"},
63*648cbe27SJiří Techet {true, 'C', "constant", "A constant"},
643ae02089SMasatake YAMATO };
653ae02089SMasatake YAMATO
663ae02089SMasatake YAMATO typedef enum {
673ae02089SMasatake YAMATO TOKEN_WHITESPACE,
683ae02089SMasatake YAMATO TOKEN_STRING,
693ae02089SMasatake YAMATO TOKEN_IDENT,
703ae02089SMasatake YAMATO TOKEN_LSHIFT,
713ae02089SMasatake YAMATO TOKEN_RSHIFT,
723ae02089SMasatake YAMATO TOKEN_RARROW,
733ae02089SMasatake YAMATO TOKEN_EOF
743ae02089SMasatake YAMATO } tokenType;
753ae02089SMasatake YAMATO
763ae02089SMasatake YAMATO typedef struct {
773ae02089SMasatake YAMATO /* Characters */
783ae02089SMasatake YAMATO int cur_c;
793ae02089SMasatake YAMATO int next_c;
803ae02089SMasatake YAMATO
813ae02089SMasatake YAMATO /* Tokens */
823ae02089SMasatake YAMATO int cur_token;
833ae02089SMasatake YAMATO vString* token_str;
843ae02089SMasatake YAMATO unsigned long line;
85509a47dbSJiří Techet MIOPos pos;
863ae02089SMasatake YAMATO } lexerState;
873ae02089SMasatake YAMATO
883ae02089SMasatake YAMATO /*
893ae02089SMasatake YAMATO * FUNCTION PROTOTYPES
903ae02089SMasatake YAMATO */
913ae02089SMasatake YAMATO
92ce990805SThomas Braun static void parseBlock (lexerState *lexer, bool delim, int kind, vString *scope);
933ae02089SMasatake YAMATO
943ae02089SMasatake YAMATO /*
953ae02089SMasatake YAMATO * FUNCTION DEFINITIONS
963ae02089SMasatake YAMATO */
973ae02089SMasatake YAMATO
983ae02089SMasatake YAMATO /* Resets the scope string to the old length */
resetScope(vString * scope,size_t old_len)993ae02089SMasatake YAMATO static void resetScope (vString *scope, size_t old_len)
1003ae02089SMasatake YAMATO {
101694968ddSMasatake YAMATO vStringTruncate (scope, old_len);
1023ae02089SMasatake YAMATO }
1033ae02089SMasatake YAMATO
1043ae02089SMasatake YAMATO /* Adds a name to the end of the scope string */
addToScope(vString * scope,vString * name)1053ae02089SMasatake YAMATO static void addToScope (vString *scope, vString *name)
1063ae02089SMasatake YAMATO {
1073ae02089SMasatake YAMATO if (vStringLength(scope) > 0)
1083ae02089SMasatake YAMATO vStringCatS(scope, "::");
1093ae02089SMasatake YAMATO vStringCat(scope, name);
1103ae02089SMasatake YAMATO }
1113ae02089SMasatake YAMATO
1123ae02089SMasatake YAMATO /* Write the lexer's current token to string, taking care of special tokens */
writeCurTokenToStr(lexerState * lexer,vString * out_str)1133ae02089SMasatake YAMATO static void writeCurTokenToStr (lexerState *lexer, vString *out_str)
1143ae02089SMasatake YAMATO {
1153ae02089SMasatake YAMATO switch (lexer->cur_token)
1163ae02089SMasatake YAMATO {
1173ae02089SMasatake YAMATO case TOKEN_IDENT:
1183ae02089SMasatake YAMATO vStringCat(out_str, lexer->token_str);
1193ae02089SMasatake YAMATO break;
1203ae02089SMasatake YAMATO case TOKEN_STRING:
1213ae02089SMasatake YAMATO vStringCat(out_str, lexer->token_str);
1223ae02089SMasatake YAMATO break;
1233ae02089SMasatake YAMATO case TOKEN_WHITESPACE:
1243ae02089SMasatake YAMATO vStringPut(out_str, ' ');
1253ae02089SMasatake YAMATO break;
1263ae02089SMasatake YAMATO case TOKEN_LSHIFT:
1273ae02089SMasatake YAMATO vStringCatS(out_str, "<<");
1283ae02089SMasatake YAMATO break;
1293ae02089SMasatake YAMATO case TOKEN_RSHIFT:
1303ae02089SMasatake YAMATO vStringCatS(out_str, ">>");
1313ae02089SMasatake YAMATO break;
1323ae02089SMasatake YAMATO case TOKEN_RARROW:
1333ae02089SMasatake YAMATO vStringCatS(out_str, "->");
1343ae02089SMasatake YAMATO break;
1353ae02089SMasatake YAMATO default:
1363ae02089SMasatake YAMATO vStringPut(out_str, (char) lexer->cur_token);
1373ae02089SMasatake YAMATO }
1383ae02089SMasatake YAMATO }
1393ae02089SMasatake YAMATO
1403ae02089SMasatake YAMATO /* Reads a character from the file */
advanceChar(lexerState * lexer)1413ae02089SMasatake YAMATO static void advanceChar (lexerState *lexer)
1423ae02089SMasatake YAMATO {
1433ae02089SMasatake YAMATO lexer->cur_c = lexer->next_c;
144018bce0bSMasatake YAMATO lexer->next_c = getcFromInputFile();
1453ae02089SMasatake YAMATO }
1463ae02089SMasatake YAMATO
1473ae02089SMasatake YAMATO /* Reads N characters from the file */
advanceNChar(lexerState * lexer,int n)1483ae02089SMasatake YAMATO static void advanceNChar (lexerState *lexer, int n)
1493ae02089SMasatake YAMATO {
1503ae02089SMasatake YAMATO while (n--)
1513ae02089SMasatake YAMATO advanceChar(lexer);
1523ae02089SMasatake YAMATO }
1533ae02089SMasatake YAMATO
1543ae02089SMasatake YAMATO /* Store the current character in lexerState::token_str if there is space
1553ae02089SMasatake YAMATO * (set by MAX_STRING_LENGTH), and then read the next character from the file */
advanceAndStoreChar(lexerState * lexer)1563ae02089SMasatake YAMATO static void advanceAndStoreChar (lexerState *lexer)
1573ae02089SMasatake YAMATO {
1583ae02089SMasatake YAMATO if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH)
1593ae02089SMasatake YAMATO vStringPut(lexer->token_str, (char) lexer->cur_c);
1603ae02089SMasatake YAMATO advanceChar(lexer);
1613ae02089SMasatake YAMATO }
1623ae02089SMasatake YAMATO
isWhitespace(int c)163ce990805SThomas Braun static bool isWhitespace (int c)
1643ae02089SMasatake YAMATO {
1653ae02089SMasatake YAMATO return c == ' ' || c == '\t' || c == '\r' || c == '\n';
1663ae02089SMasatake YAMATO }
1673ae02089SMasatake YAMATO
isAscii(int c)168ce990805SThomas Braun static bool isAscii (int c)
1693ae02089SMasatake YAMATO {
1703ae02089SMasatake YAMATO return (c >= 0) && (c < 0x80);
1713ae02089SMasatake YAMATO }
1723ae02089SMasatake YAMATO
1733ae02089SMasatake YAMATO /* This isn't quite right for Unicode identifiers */
isIdentifierStart(int c)174ce990805SThomas Braun static bool isIdentifierStart (int c)
1753ae02089SMasatake YAMATO {
1763ae02089SMasatake YAMATO return (isAscii(c) && (isalpha(c) || c == '_')) || !isAscii(c);
1773ae02089SMasatake YAMATO }
1783ae02089SMasatake YAMATO
1793ae02089SMasatake YAMATO /* This isn't quite right for Unicode identifiers */
isIdentifierContinue(int c)180ce990805SThomas Braun static bool isIdentifierContinue (int c)
1813ae02089SMasatake YAMATO {
1823ae02089SMasatake YAMATO return (isAscii(c) && (isalnum(c) || c == '_')) || !isAscii(c);
1833ae02089SMasatake YAMATO }
1843ae02089SMasatake YAMATO
scanWhitespace(lexerState * lexer)1853ae02089SMasatake YAMATO static void scanWhitespace (lexerState *lexer)
1863ae02089SMasatake YAMATO {
1873ae02089SMasatake YAMATO while (isWhitespace(lexer->cur_c))
1883ae02089SMasatake YAMATO advanceChar(lexer);
1893ae02089SMasatake YAMATO }
1903ae02089SMasatake YAMATO
1913ae02089SMasatake YAMATO /* Normal line comments start with two /'s and continue until the next \n
1923ae02089SMasatake YAMATO * (potentially after a \r). Additionally, a shebang in the beginning of the
1933ae02089SMasatake YAMATO * file also counts as a line comment as long as it is not this sequence: #![ .
1943ae02089SMasatake YAMATO * Block comments start with / followed by a * and end with a * followed by a /.
1953ae02089SMasatake YAMATO * Unlike in C/C++ they nest. */
scanComments(lexerState * lexer)1963ae02089SMasatake YAMATO static void scanComments (lexerState *lexer)
1973ae02089SMasatake YAMATO {
1983ae02089SMasatake YAMATO /* // */
1993ae02089SMasatake YAMATO if (lexer->next_c == '/')
2003ae02089SMasatake YAMATO {
2013ae02089SMasatake YAMATO advanceNChar(lexer, 2);
2023ae02089SMasatake YAMATO while (lexer->cur_c != EOF && lexer->cur_c != '\n')
2033ae02089SMasatake YAMATO advanceChar(lexer);
2043ae02089SMasatake YAMATO }
2053ae02089SMasatake YAMATO /* #! */
2063ae02089SMasatake YAMATO else if (lexer->next_c == '!')
2073ae02089SMasatake YAMATO {
2083ae02089SMasatake YAMATO advanceNChar(lexer, 2);
2093ae02089SMasatake YAMATO /* If it is exactly #![ then it is not a comment, but an attribute */
2103ae02089SMasatake YAMATO if (lexer->cur_c == '[')
2113ae02089SMasatake YAMATO return;
2123ae02089SMasatake YAMATO while (lexer->cur_c != EOF && lexer->cur_c != '\n')
2133ae02089SMasatake YAMATO advanceChar(lexer);
2143ae02089SMasatake YAMATO }
2153ae02089SMasatake YAMATO /* block comment */
2163ae02089SMasatake YAMATO else if (lexer->next_c == '*')
2173ae02089SMasatake YAMATO {
2183ae02089SMasatake YAMATO int level = 1;
2193ae02089SMasatake YAMATO advanceNChar(lexer, 2);
2203ae02089SMasatake YAMATO while (lexer->cur_c != EOF && level > 0)
2213ae02089SMasatake YAMATO {
2223ae02089SMasatake YAMATO if (lexer->cur_c == '*' && lexer->next_c == '/')
2233ae02089SMasatake YAMATO {
2243ae02089SMasatake YAMATO level--;
2253ae02089SMasatake YAMATO advanceNChar(lexer, 2);
2263ae02089SMasatake YAMATO }
2273ae02089SMasatake YAMATO else if (lexer->cur_c == '/' && lexer->next_c == '*')
2283ae02089SMasatake YAMATO {
2293ae02089SMasatake YAMATO level++;
2303ae02089SMasatake YAMATO advanceNChar(lexer, 2);
2313ae02089SMasatake YAMATO }
2323ae02089SMasatake YAMATO else
2333ae02089SMasatake YAMATO {
2343ae02089SMasatake YAMATO advanceChar(lexer);
2353ae02089SMasatake YAMATO }
2363ae02089SMasatake YAMATO }
2373ae02089SMasatake YAMATO }
2383ae02089SMasatake YAMATO }
2393ae02089SMasatake YAMATO
scanIdentifier(lexerState * lexer)2403ae02089SMasatake YAMATO static void scanIdentifier (lexerState *lexer)
2413ae02089SMasatake YAMATO {
2423ae02089SMasatake YAMATO vStringClear(lexer->token_str);
2433ae02089SMasatake YAMATO do
2443ae02089SMasatake YAMATO {
2453ae02089SMasatake YAMATO advanceAndStoreChar(lexer);
2463ae02089SMasatake YAMATO } while(lexer->cur_c != EOF && isIdentifierContinue(lexer->cur_c));
2473ae02089SMasatake YAMATO }
2483ae02089SMasatake YAMATO
2493ae02089SMasatake YAMATO /* Double-quoted strings, we only care about the \" escape. These
2503ae02089SMasatake YAMATO * last past the end of the line, so be careful not too store too much
2513ae02089SMasatake YAMATO * of them (see MAX_STRING_LENGTH). The only place we look at their
2523ae02089SMasatake YAMATO * contents is in the function definitions, and there the valid strings are
2533ae02089SMasatake YAMATO * things like "C" and "Rust" */
scanString(lexerState * lexer)2543ae02089SMasatake YAMATO static void scanString (lexerState *lexer)
2553ae02089SMasatake YAMATO {
2563ae02089SMasatake YAMATO vStringClear(lexer->token_str);
2573ae02089SMasatake YAMATO advanceAndStoreChar(lexer);
2583ae02089SMasatake YAMATO while (lexer->cur_c != EOF && lexer->cur_c != '"')
2593ae02089SMasatake YAMATO {
2603ae02089SMasatake YAMATO if (lexer->cur_c == '\\' && lexer->next_c == '"')
2613ae02089SMasatake YAMATO advanceAndStoreChar(lexer);
2623ae02089SMasatake YAMATO advanceAndStoreChar(lexer);
2633ae02089SMasatake YAMATO }
2643ae02089SMasatake YAMATO advanceAndStoreChar(lexer);
2653ae02089SMasatake YAMATO }
2663ae02089SMasatake YAMATO
2673ae02089SMasatake YAMATO /* Raw strings look like this: r"" or r##""## where the number of
2683ae02089SMasatake YAMATO * hashes must match */
scanRawString(lexerState * lexer)2693ae02089SMasatake YAMATO static void scanRawString (lexerState *lexer)
2703ae02089SMasatake YAMATO {
2713ae02089SMasatake YAMATO size_t num_initial_hashes = 0;
2723ae02089SMasatake YAMATO vStringClear(lexer->token_str);
2733ae02089SMasatake YAMATO advanceAndStoreChar(lexer);
2743ae02089SMasatake YAMATO /* Count how many leading hashes there are */
2753ae02089SMasatake YAMATO while (lexer->cur_c == '#')
2763ae02089SMasatake YAMATO {
2773ae02089SMasatake YAMATO num_initial_hashes++;
2783ae02089SMasatake YAMATO advanceAndStoreChar(lexer);
2793ae02089SMasatake YAMATO }
2803ae02089SMasatake YAMATO if (lexer->cur_c != '"')
2813ae02089SMasatake YAMATO return;
2823ae02089SMasatake YAMATO advanceAndStoreChar(lexer);
2833ae02089SMasatake YAMATO while (lexer->cur_c != EOF)
2843ae02089SMasatake YAMATO {
2853ae02089SMasatake YAMATO /* Count how many trailing hashes there are. If the number is equal or more
2863ae02089SMasatake YAMATO * than the number of leading hashes, break. */
2873ae02089SMasatake YAMATO if (lexer->cur_c == '"')
2883ae02089SMasatake YAMATO {
2893ae02089SMasatake YAMATO size_t num_trailing_hashes = 0;
2903ae02089SMasatake YAMATO advanceAndStoreChar(lexer);
2913ae02089SMasatake YAMATO while (lexer->cur_c == '#' && num_trailing_hashes < num_initial_hashes)
2923ae02089SMasatake YAMATO {
2933ae02089SMasatake YAMATO num_trailing_hashes++;
2943ae02089SMasatake YAMATO
2953ae02089SMasatake YAMATO advanceAndStoreChar(lexer);
2963ae02089SMasatake YAMATO }
2973ae02089SMasatake YAMATO if (num_trailing_hashes == num_initial_hashes)
2983ae02089SMasatake YAMATO break;
2993ae02089SMasatake YAMATO }
3003ae02089SMasatake YAMATO else
3013ae02089SMasatake YAMATO {
3023ae02089SMasatake YAMATO advanceAndStoreChar(lexer);
3033ae02089SMasatake YAMATO }
3043ae02089SMasatake YAMATO }
3053ae02089SMasatake YAMATO }
3063ae02089SMasatake YAMATO
3073ae02089SMasatake YAMATO /* This deals with character literals: 'n', '\n', '\uFFFF'; and lifetimes:
3083ae02089SMasatake YAMATO * 'lifetime. We'll use this approximate regexp for the literals:
3093ae02089SMasatake YAMATO * \' \\ [^']+ \' or \' [^'] \' or \' \\ \' \'. Either way, we'll treat this
3103ae02089SMasatake YAMATO * token as a string, so it gets preserved as is for function signatures with
3113ae02089SMasatake YAMATO * lifetimes. */
scanCharacterOrLifetime(lexerState * lexer)3123ae02089SMasatake YAMATO static void scanCharacterOrLifetime (lexerState *lexer)
3133ae02089SMasatake YAMATO {
3143ae02089SMasatake YAMATO vStringClear(lexer->token_str);
3153ae02089SMasatake YAMATO advanceAndStoreChar(lexer);
3163ae02089SMasatake YAMATO
3173ae02089SMasatake YAMATO if (lexer->cur_c == '\\')
3183ae02089SMasatake YAMATO {
3193ae02089SMasatake YAMATO advanceAndStoreChar(lexer);
3203ae02089SMasatake YAMATO /* The \' \\ \' \' (literally '\'') case */
3213ae02089SMasatake YAMATO if (lexer->cur_c == '\'' && lexer->next_c == '\'')
3223ae02089SMasatake YAMATO {
3233ae02089SMasatake YAMATO advanceAndStoreChar(lexer);
3243ae02089SMasatake YAMATO advanceAndStoreChar(lexer);
3253ae02089SMasatake YAMATO }
3263ae02089SMasatake YAMATO /* The \' \\ [^']+ \' case */
3273ae02089SMasatake YAMATO else
3283ae02089SMasatake YAMATO {
3293ae02089SMasatake YAMATO while (lexer->cur_c != EOF && lexer->cur_c != '\'')
3303ae02089SMasatake YAMATO advanceAndStoreChar(lexer);
3313ae02089SMasatake YAMATO }
3323ae02089SMasatake YAMATO }
3333ae02089SMasatake YAMATO /* The \' [^'] \' case */
3343ae02089SMasatake YAMATO else if (lexer->cur_c != '\'' && lexer->next_c == '\'')
3353ae02089SMasatake YAMATO {
3363ae02089SMasatake YAMATO advanceAndStoreChar(lexer);
3373ae02089SMasatake YAMATO advanceAndStoreChar(lexer);
3383ae02089SMasatake YAMATO }
3393ae02089SMasatake YAMATO /* Otherwise it is malformed, or a lifetime */
3403ae02089SMasatake YAMATO }
3413ae02089SMasatake YAMATO
3423ae02089SMasatake YAMATO /* Advances the parser one token, optionally skipping whitespace
3433ae02089SMasatake YAMATO * (otherwise it is concatenated and returned as a single whitespace token).
3443ae02089SMasatake YAMATO * Whitespace is needed to properly render function signatures. Unrecognized
3453ae02089SMasatake YAMATO * token starts are stored literally, e.g. token may equal to a character '#'. */
advanceToken(lexerState * lexer,bool skip_whitspace)346ce990805SThomas Braun static int advanceToken (lexerState *lexer, bool skip_whitspace)
3473ae02089SMasatake YAMATO {
348ce990805SThomas Braun bool have_whitespace = false;
349a31b37dcSMasatake YAMATO lexer->line = getInputLineNumber();
3503ae02089SMasatake YAMATO lexer->pos = getInputFilePosition();
3513ae02089SMasatake YAMATO while (lexer->cur_c != EOF)
3523ae02089SMasatake YAMATO {
3533ae02089SMasatake YAMATO if (isWhitespace(lexer->cur_c))
3543ae02089SMasatake YAMATO {
3553ae02089SMasatake YAMATO scanWhitespace(lexer);
356ce990805SThomas Braun have_whitespace = true;
3573ae02089SMasatake YAMATO }
3583ae02089SMasatake YAMATO else if (lexer->cur_c == '/' && (lexer->next_c == '/' || lexer->next_c == '*'))
3593ae02089SMasatake YAMATO {
3603ae02089SMasatake YAMATO scanComments(lexer);
361ce990805SThomas Braun have_whitespace = true;
3623ae02089SMasatake YAMATO }
3633ae02089SMasatake YAMATO else
3643ae02089SMasatake YAMATO {
3653ae02089SMasatake YAMATO if (have_whitespace && !skip_whitspace)
3663ae02089SMasatake YAMATO return lexer->cur_token = TOKEN_WHITESPACE;
3673ae02089SMasatake YAMATO break;
3683ae02089SMasatake YAMATO }
3693ae02089SMasatake YAMATO }
370a31b37dcSMasatake YAMATO lexer->line = getInputLineNumber();
3713ae02089SMasatake YAMATO lexer->pos = getInputFilePosition();
3723ae02089SMasatake YAMATO while (lexer->cur_c != EOF)
3733ae02089SMasatake YAMATO {
3743ae02089SMasatake YAMATO if (lexer->cur_c == '"')
3753ae02089SMasatake YAMATO {
3763ae02089SMasatake YAMATO scanString(lexer);
3773ae02089SMasatake YAMATO return lexer->cur_token = TOKEN_STRING;
3783ae02089SMasatake YAMATO }
3793ae02089SMasatake YAMATO else if (lexer->cur_c == 'r' && (lexer->next_c == '#' || lexer->next_c == '"'))
3803ae02089SMasatake YAMATO {
3813ae02089SMasatake YAMATO scanRawString(lexer);
3823ae02089SMasatake YAMATO return lexer->cur_token = TOKEN_STRING;
3833ae02089SMasatake YAMATO }
3843ae02089SMasatake YAMATO else if (lexer->cur_c == '\'')
3853ae02089SMasatake YAMATO {
3863ae02089SMasatake YAMATO scanCharacterOrLifetime(lexer);
3873ae02089SMasatake YAMATO return lexer->cur_token = TOKEN_STRING;
3883ae02089SMasatake YAMATO }
3893ae02089SMasatake YAMATO else if (isIdentifierStart(lexer->cur_c))
3903ae02089SMasatake YAMATO {
3913ae02089SMasatake YAMATO scanIdentifier(lexer);
3923ae02089SMasatake YAMATO return lexer->cur_token = TOKEN_IDENT;
3933ae02089SMasatake YAMATO }
3943ae02089SMasatake YAMATO /* These shift tokens aren't too important for tag-generation per se,
3953ae02089SMasatake YAMATO * but they confuse the skipUntil code which tracks the <> pairs. */
3963ae02089SMasatake YAMATO else if (lexer->cur_c == '>' && lexer->next_c == '>')
3973ae02089SMasatake YAMATO {
3983ae02089SMasatake YAMATO advanceNChar(lexer, 2);
3993ae02089SMasatake YAMATO return lexer->cur_token = TOKEN_RSHIFT;
4003ae02089SMasatake YAMATO }
4013ae02089SMasatake YAMATO else if (lexer->cur_c == '<' && lexer->next_c == '<')
4023ae02089SMasatake YAMATO {
4033ae02089SMasatake YAMATO advanceNChar(lexer, 2);
4043ae02089SMasatake YAMATO return lexer->cur_token = TOKEN_LSHIFT;
4053ae02089SMasatake YAMATO }
4063ae02089SMasatake YAMATO else if (lexer->cur_c == '-' && lexer->next_c == '>')
4073ae02089SMasatake YAMATO {
4083ae02089SMasatake YAMATO advanceNChar(lexer, 2);
4093ae02089SMasatake YAMATO return lexer->cur_token = TOKEN_RARROW;
4103ae02089SMasatake YAMATO }
4113ae02089SMasatake YAMATO else
4123ae02089SMasatake YAMATO {
4133ae02089SMasatake YAMATO int c = lexer->cur_c;
4143ae02089SMasatake YAMATO advanceChar(lexer);
4153ae02089SMasatake YAMATO return lexer->cur_token = c;
4163ae02089SMasatake YAMATO }
4173ae02089SMasatake YAMATO }
4183ae02089SMasatake YAMATO return lexer->cur_token = TOKEN_EOF;
4193ae02089SMasatake YAMATO }
4203ae02089SMasatake YAMATO
initLexer(lexerState * lexer)4213ae02089SMasatake YAMATO static void initLexer (lexerState *lexer)
4223ae02089SMasatake YAMATO {
4233ae02089SMasatake YAMATO advanceNChar(lexer, 2);
4243ae02089SMasatake YAMATO lexer->token_str = vStringNew();
4253ae02089SMasatake YAMATO
4263ae02089SMasatake YAMATO if (lexer->cur_c == '#' && lexer->next_c == '!')
4273ae02089SMasatake YAMATO scanComments(lexer);
428ce990805SThomas Braun advanceToken(lexer, true);
4293ae02089SMasatake YAMATO }
4303ae02089SMasatake YAMATO
deInitLexer(lexerState * lexer)4313ae02089SMasatake YAMATO static void deInitLexer (lexerState *lexer)
4323ae02089SMasatake YAMATO {
4333ae02089SMasatake YAMATO vStringDelete(lexer->token_str);
4343ae02089SMasatake YAMATO lexer->token_str = NULL;
4353ae02089SMasatake YAMATO }
4363ae02089SMasatake YAMATO
addTag(vString * ident,const char * arg_list,int kind,unsigned long line,MIOPos pos,vString * scope,int parent_kind)437509a47dbSJiří Techet static void addTag (vString* ident, const char* arg_list, int kind, unsigned long line, MIOPos pos, vString *scope, int parent_kind)
4383ae02089SMasatake YAMATO {
4394a95e4a5SColomban Wendling if (kind == K_NONE || ! rustKinds[kind].enabled)
4403ae02089SMasatake YAMATO return;
4413ae02089SMasatake YAMATO tagEntryInfo tag;
44217aff2f6SMasatake YAMATO initTagEntry(&tag, vStringValue(ident), kind);
4433ae02089SMasatake YAMATO
4443ae02089SMasatake YAMATO tag.lineNumber = line;
4453ae02089SMasatake YAMATO tag.filePosition = pos;
4463ae02089SMasatake YAMATO
4473ae02089SMasatake YAMATO tag.extensionFields.signature = arg_list;
4483ae02089SMasatake YAMATO /*tag.extensionFields.varType = type;*/ /* FIXME: map to typeRef[1]? */
4493ae02089SMasatake YAMATO if (parent_kind != K_NONE)
4503ae02089SMasatake YAMATO {
451f92e6bf2SMasatake YAMATO tag.extensionFields.scopeKindIndex = parent_kind;
45217aff2f6SMasatake YAMATO tag.extensionFields.scopeName = vStringValue(scope);
4533ae02089SMasatake YAMATO }
4543ae02089SMasatake YAMATO makeTagEntry(&tag);
4553ae02089SMasatake YAMATO }
4563ae02089SMasatake YAMATO
4573ae02089SMasatake YAMATO /* Skip tokens until one of the goal tokens is hit. Escapes when level = 0 if there are no goal tokens.
4583ae02089SMasatake YAMATO * Keeps track of balanced <>'s, ()'s, []'s, and {}'s and ignores the goal tokens within those pairings */
skipUntil(lexerState * lexer,int goal_tokens[],int num_goal_tokens)4593ae02089SMasatake YAMATO static void skipUntil (lexerState *lexer, int goal_tokens[], int num_goal_tokens)
4603ae02089SMasatake YAMATO {
4613ae02089SMasatake YAMATO int angle_level = 0;
4623ae02089SMasatake YAMATO int paren_level = 0;
4633ae02089SMasatake YAMATO int brace_level = 0;
4643ae02089SMasatake YAMATO int bracket_level = 0;
4653ae02089SMasatake YAMATO while (lexer->cur_token != TOKEN_EOF)
4663ae02089SMasatake YAMATO {
4673ae02089SMasatake YAMATO if (angle_level == 0 && paren_level == 0 && brace_level == 0
4683ae02089SMasatake YAMATO && bracket_level == 0)
4693ae02089SMasatake YAMATO {
4703ae02089SMasatake YAMATO int ii = 0;
4713ae02089SMasatake YAMATO for(ii = 0; ii < num_goal_tokens; ii++)
4723ae02089SMasatake YAMATO {
4733ae02089SMasatake YAMATO if (lexer->cur_token == goal_tokens[ii])
4743ae02089SMasatake YAMATO {
4753ae02089SMasatake YAMATO break;
4763ae02089SMasatake YAMATO }
4773ae02089SMasatake YAMATO }
4783ae02089SMasatake YAMATO if (ii < num_goal_tokens)
4793ae02089SMasatake YAMATO break;
4803ae02089SMasatake YAMATO }
4813ae02089SMasatake YAMATO switch (lexer->cur_token)
4823ae02089SMasatake YAMATO {
4833ae02089SMasatake YAMATO case '<':
4843ae02089SMasatake YAMATO angle_level++;
4853ae02089SMasatake YAMATO break;
4863ae02089SMasatake YAMATO case '(':
4873ae02089SMasatake YAMATO paren_level++;
4883ae02089SMasatake YAMATO break;
4893ae02089SMasatake YAMATO case '{':
4903ae02089SMasatake YAMATO brace_level++;
4913ae02089SMasatake YAMATO break;
4923ae02089SMasatake YAMATO case '[':
4933ae02089SMasatake YAMATO bracket_level++;
4943ae02089SMasatake YAMATO break;
4953ae02089SMasatake YAMATO case '>':
4963ae02089SMasatake YAMATO angle_level--;
4973ae02089SMasatake YAMATO break;
4983ae02089SMasatake YAMATO case ')':
4993ae02089SMasatake YAMATO paren_level--;
5003ae02089SMasatake YAMATO break;
5013ae02089SMasatake YAMATO case '}':
5023ae02089SMasatake YAMATO brace_level--;
5033ae02089SMasatake YAMATO break;
5043ae02089SMasatake YAMATO case ']':
5053ae02089SMasatake YAMATO bracket_level--;
5063ae02089SMasatake YAMATO break;
5073ae02089SMasatake YAMATO case TOKEN_RSHIFT:
5083ae02089SMasatake YAMATO if (angle_level >= 2)
5093ae02089SMasatake YAMATO angle_level -= 2;
5103ae02089SMasatake YAMATO break;
5113ae02089SMasatake YAMATO /* TOKEN_LSHIFT is never interpreted as two <'s in valid Rust code */
5123ae02089SMasatake YAMATO default:
5133ae02089SMasatake YAMATO break;
5143ae02089SMasatake YAMATO }
5153ae02089SMasatake YAMATO /* Has to be after the token switch to catch the case when we start with the initial level token */
5163ae02089SMasatake YAMATO if (num_goal_tokens == 0 && angle_level == 0 && paren_level == 0 && brace_level == 0
5173ae02089SMasatake YAMATO && bracket_level == 0)
5183ae02089SMasatake YAMATO break;
519ce990805SThomas Braun advanceToken(lexer, true);
5203ae02089SMasatake YAMATO }
5213ae02089SMasatake YAMATO }
5223ae02089SMasatake YAMATO
5233ae02089SMasatake YAMATO /* Function format:
5243ae02089SMasatake YAMATO * "fn" <ident>[<type_bounds>] "(" [<args>] ")" ["->" <ret_type>] "{" [<body>] "}"*/
parseFn(lexerState * lexer,vString * scope,int parent_kind)5253ae02089SMasatake YAMATO static void parseFn (lexerState *lexer, vString *scope, int parent_kind)
5263ae02089SMasatake YAMATO {
5273ae02089SMasatake YAMATO int kind = (parent_kind == K_TRAIT || parent_kind == K_IMPL) ? K_METHOD : K_FN;
5283ae02089SMasatake YAMATO vString *name;
5293ae02089SMasatake YAMATO vString *arg_list;
5303ae02089SMasatake YAMATO unsigned long line;
531509a47dbSJiří Techet MIOPos pos;
5323ae02089SMasatake YAMATO int paren_level = 0;
53329209756SLionel Flandrin int bracket_level = 0;
534ce990805SThomas Braun bool found_paren = false;
535ce990805SThomas Braun bool valid_signature = true;
5363ae02089SMasatake YAMATO
537ce990805SThomas Braun advanceToken(lexer, true);
5383ae02089SMasatake YAMATO if (lexer->cur_token != TOKEN_IDENT)
5393ae02089SMasatake YAMATO return;
5403ae02089SMasatake YAMATO
5413ae02089SMasatake YAMATO name = vStringNewCopy(lexer->token_str);
5423ae02089SMasatake YAMATO arg_list = vStringNew();
5433ae02089SMasatake YAMATO
5443ae02089SMasatake YAMATO line = lexer->line;
5453ae02089SMasatake YAMATO pos = lexer->pos;
5463ae02089SMasatake YAMATO
547ce990805SThomas Braun advanceToken(lexer, true);
5483ae02089SMasatake YAMATO
5493ae02089SMasatake YAMATO /* HACK: This is a bit coarse as far as what tag entry means by
5503ae02089SMasatake YAMATO * 'arglist'... */
55129209756SLionel Flandrin while (lexer->cur_token != '{')
5523ae02089SMasatake YAMATO {
55329209756SLionel Flandrin if (lexer->cur_token == ';' && bracket_level == 0)
55429209756SLionel Flandrin {
55529209756SLionel Flandrin break;
55629209756SLionel Flandrin }
55729209756SLionel Flandrin else if (lexer->cur_token == '}')
5583ae02089SMasatake YAMATO {
559ce990805SThomas Braun valid_signature = false;
5603ae02089SMasatake YAMATO break;
5613ae02089SMasatake YAMATO }
5623ae02089SMasatake YAMATO else if (lexer->cur_token == '(')
5633ae02089SMasatake YAMATO {
564ce990805SThomas Braun found_paren = true;
5653ae02089SMasatake YAMATO paren_level++;
5663ae02089SMasatake YAMATO }
5673ae02089SMasatake YAMATO else if (lexer->cur_token == ')')
5683ae02089SMasatake YAMATO {
5693ae02089SMasatake YAMATO paren_level--;
5703ae02089SMasatake YAMATO if (paren_level < 0)
5713ae02089SMasatake YAMATO {
572ce990805SThomas Braun valid_signature = false;
5733ae02089SMasatake YAMATO break;
5743ae02089SMasatake YAMATO }
5753ae02089SMasatake YAMATO }
57629209756SLionel Flandrin else if (lexer->cur_token == '[')
57729209756SLionel Flandrin {
57829209756SLionel Flandrin bracket_level++;
57929209756SLionel Flandrin }
58029209756SLionel Flandrin else if (lexer->cur_token == ']')
58129209756SLionel Flandrin {
58229209756SLionel Flandrin bracket_level--;
58329209756SLionel Flandrin }
5843ae02089SMasatake YAMATO else if (lexer->cur_token == TOKEN_EOF)
5853ae02089SMasatake YAMATO {
586ce990805SThomas Braun valid_signature = false;
5873ae02089SMasatake YAMATO break;
5883ae02089SMasatake YAMATO }
5893ae02089SMasatake YAMATO writeCurTokenToStr(lexer, arg_list);
590ce990805SThomas Braun advanceToken(lexer, false);
5913ae02089SMasatake YAMATO }
59229209756SLionel Flandrin if (!found_paren || paren_level != 0 || bracket_level != 0)
593ce990805SThomas Braun valid_signature = false;
5943ae02089SMasatake YAMATO
5953ae02089SMasatake YAMATO if (valid_signature)
5963ae02089SMasatake YAMATO {
5973ae02089SMasatake YAMATO vStringStripTrailing(arg_list);
59817aff2f6SMasatake YAMATO addTag(name, vStringValue(arg_list), kind, line, pos, scope, parent_kind);
5993ae02089SMasatake YAMATO addToScope(scope, name);
600ce990805SThomas Braun parseBlock(lexer, true, kind, scope);
6013ae02089SMasatake YAMATO }
6023ae02089SMasatake YAMATO
6033ae02089SMasatake YAMATO vStringDelete(name);
6043ae02089SMasatake YAMATO vStringDelete(arg_list);
6053ae02089SMasatake YAMATO }
6063ae02089SMasatake YAMATO
6073ae02089SMasatake YAMATO /* Mod format:
6083ae02089SMasatake YAMATO * "mod" <ident> "{" [<body>] "}"
6093ae02089SMasatake YAMATO * "mod" <ident> ";"*/
parseMod(lexerState * lexer,vString * scope,int parent_kind)6103ae02089SMasatake YAMATO static void parseMod (lexerState *lexer, vString *scope, int parent_kind)
6113ae02089SMasatake YAMATO {
612ce990805SThomas Braun advanceToken(lexer, true);
6133ae02089SMasatake YAMATO if (lexer->cur_token != TOKEN_IDENT)
6143ae02089SMasatake YAMATO return;
6153ae02089SMasatake YAMATO
616b0918b66SSteven Oliver addTag(lexer->token_str, NULL, K_MOD, lexer->line, lexer->pos, scope, parent_kind);
6173ae02089SMasatake YAMATO addToScope(scope, lexer->token_str);
6183ae02089SMasatake YAMATO
619ce990805SThomas Braun advanceToken(lexer, true);
6203ae02089SMasatake YAMATO
621ce990805SThomas Braun parseBlock(lexer, true, K_MOD, scope);
6223ae02089SMasatake YAMATO }
6233ae02089SMasatake YAMATO
6243ae02089SMasatake YAMATO /* Trait format:
6253ae02089SMasatake YAMATO * "trait" <ident> [<type_bounds>] "{" [<body>] "}"
6263ae02089SMasatake YAMATO */
parseTrait(lexerState * lexer,vString * scope,int parent_kind)6273ae02089SMasatake YAMATO static void parseTrait (lexerState *lexer, vString *scope, int parent_kind)
6283ae02089SMasatake YAMATO {
6293ae02089SMasatake YAMATO int goal_tokens[] = {'{'};
6303ae02089SMasatake YAMATO
631ce990805SThomas Braun advanceToken(lexer, true);
6323ae02089SMasatake YAMATO if (lexer->cur_token != TOKEN_IDENT)
6333ae02089SMasatake YAMATO return;
6343ae02089SMasatake YAMATO
635b0918b66SSteven Oliver addTag(lexer->token_str, NULL, K_TRAIT, lexer->line, lexer->pos, scope, parent_kind);
6363ae02089SMasatake YAMATO addToScope(scope, lexer->token_str);
6373ae02089SMasatake YAMATO
638ce990805SThomas Braun advanceToken(lexer, true);
6393ae02089SMasatake YAMATO
6403ae02089SMasatake YAMATO skipUntil(lexer, goal_tokens, 1);
6413ae02089SMasatake YAMATO
642ce990805SThomas Braun parseBlock(lexer, true, K_TRAIT, scope);
6433ae02089SMasatake YAMATO }
6443ae02089SMasatake YAMATO
6453ae02089SMasatake YAMATO /* Skips type blocks of the form <T:T<T>, ...> */
skipTypeBlock(lexerState * lexer)6463ae02089SMasatake YAMATO static void skipTypeBlock (lexerState *lexer)
6473ae02089SMasatake YAMATO {
6483ae02089SMasatake YAMATO if (lexer->cur_token == '<')
6493ae02089SMasatake YAMATO {
6503ae02089SMasatake YAMATO skipUntil(lexer, NULL, 0);
651ce990805SThomas Braun advanceToken(lexer, true);
6523ae02089SMasatake YAMATO }
6533ae02089SMasatake YAMATO }
6543ae02089SMasatake YAMATO
6553ae02089SMasatake YAMATO /* Essentially grabs the last ident before 'for', '<' and '{', which
6563ae02089SMasatake YAMATO * tends to correspond to what we want as the impl tag entry name */
parseQualifiedType(lexerState * lexer,vString * name)6573ae02089SMasatake YAMATO static void parseQualifiedType (lexerState *lexer, vString* name)
6583ae02089SMasatake YAMATO {
6593ae02089SMasatake YAMATO while (lexer->cur_token != TOKEN_EOF)
6603ae02089SMasatake YAMATO {
6613ae02089SMasatake YAMATO if (lexer->cur_token == TOKEN_IDENT)
6623ae02089SMasatake YAMATO {
66317aff2f6SMasatake YAMATO if (strcmp(vStringValue(lexer->token_str), "for") == 0
66417aff2f6SMasatake YAMATO || strcmp(vStringValue(lexer->token_str), "where") == 0)
6653ae02089SMasatake YAMATO break;
6663ae02089SMasatake YAMATO vStringClear(name);
6673ae02089SMasatake YAMATO vStringCat(name, lexer->token_str);
6683ae02089SMasatake YAMATO }
6693ae02089SMasatake YAMATO else if (lexer->cur_token == '<' || lexer->cur_token == '{')
6703ae02089SMasatake YAMATO {
6713ae02089SMasatake YAMATO break;
6723ae02089SMasatake YAMATO }
673ce990805SThomas Braun advanceToken(lexer, true);
6743ae02089SMasatake YAMATO }
6753ae02089SMasatake YAMATO skipTypeBlock(lexer);
6763ae02089SMasatake YAMATO }
6773ae02089SMasatake YAMATO
6783ae02089SMasatake YAMATO /* Impl format:
6793ae02089SMasatake YAMATO * "impl" [<type_bounds>] <qualified_ident>[<type_bounds>] ["for" <qualified_ident>[<type_bounds>]] "{" [<body>] "}"
6803ae02089SMasatake YAMATO */
parseImpl(lexerState * lexer,vString * scope,int parent_kind)6813ae02089SMasatake YAMATO static void parseImpl (lexerState *lexer, vString *scope, int parent_kind)
6823ae02089SMasatake YAMATO {
6833ae02089SMasatake YAMATO unsigned long line;
684509a47dbSJiří Techet MIOPos pos;
6853ae02089SMasatake YAMATO vString *name;
6863ae02089SMasatake YAMATO
687ce990805SThomas Braun advanceToken(lexer, true);
6883ae02089SMasatake YAMATO
6893ae02089SMasatake YAMATO line = lexer->line;
6903ae02089SMasatake YAMATO pos = lexer->pos;
6913ae02089SMasatake YAMATO
6923ae02089SMasatake YAMATO skipTypeBlock(lexer);
6933ae02089SMasatake YAMATO
6943ae02089SMasatake YAMATO name = vStringNew();
6953ae02089SMasatake YAMATO
6963ae02089SMasatake YAMATO parseQualifiedType(lexer, name);
6973ae02089SMasatake YAMATO
69817aff2f6SMasatake YAMATO if (lexer->cur_token == TOKEN_IDENT && strcmp(vStringValue(lexer->token_str), "for") == 0)
6993ae02089SMasatake YAMATO {
700ce990805SThomas Braun advanceToken(lexer, true);
7013ae02089SMasatake YAMATO parseQualifiedType(lexer, name);
7023ae02089SMasatake YAMATO }
7033ae02089SMasatake YAMATO
704b0918b66SSteven Oliver addTag(name, NULL, K_IMPL, line, pos, scope, parent_kind);
7053ae02089SMasatake YAMATO addToScope(scope, name);
7063ae02089SMasatake YAMATO
707ce990805SThomas Braun parseBlock(lexer, true, K_IMPL, scope);
7083ae02089SMasatake YAMATO
7093ae02089SMasatake YAMATO vStringDelete(name);
7103ae02089SMasatake YAMATO }
7113ae02089SMasatake YAMATO
7123ae02089SMasatake YAMATO /* Static format:
7133ae02089SMasatake YAMATO * "static" ["mut"] <ident>
7143ae02089SMasatake YAMATO */
parseStatic(lexerState * lexer,vString * scope,int parent_kind)7153ae02089SMasatake YAMATO static void parseStatic (lexerState *lexer, vString *scope, int parent_kind)
7163ae02089SMasatake YAMATO {
717ce990805SThomas Braun advanceToken(lexer, true);
7183ae02089SMasatake YAMATO if (lexer->cur_token != TOKEN_IDENT)
7193ae02089SMasatake YAMATO return;
72017aff2f6SMasatake YAMATO if (strcmp(vStringValue(lexer->token_str), "mut") == 0)
7213ae02089SMasatake YAMATO {
722ce990805SThomas Braun advanceToken(lexer, true);
7233ae02089SMasatake YAMATO }
7243ae02089SMasatake YAMATO if (lexer->cur_token != TOKEN_IDENT)
7253ae02089SMasatake YAMATO return;
7263ae02089SMasatake YAMATO
727b0918b66SSteven Oliver addTag(lexer->token_str, NULL, K_STATIC, lexer->line, lexer->pos, scope, parent_kind);
7283ae02089SMasatake YAMATO }
7293ae02089SMasatake YAMATO
730*648cbe27SJiří Techet /* Const format:
731*648cbe27SJiří Techet * "const" <ident>
732*648cbe27SJiří Techet */
parseConst(lexerState * lexer,vString * scope,int parent_kind)733*648cbe27SJiří Techet static void parseConst (lexerState *lexer, vString *scope, int parent_kind)
734*648cbe27SJiří Techet {
735*648cbe27SJiří Techet advanceToken(lexer, true);
736*648cbe27SJiří Techet if (lexer->cur_token != TOKEN_IDENT)
737*648cbe27SJiří Techet return;
738*648cbe27SJiří Techet
739*648cbe27SJiří Techet addTag(lexer->token_str, NULL, K_CONST, lexer->line, lexer->pos, scope, parent_kind);
740*648cbe27SJiří Techet }
741*648cbe27SJiří Techet
7423ae02089SMasatake YAMATO /* Type format:
7433ae02089SMasatake YAMATO * "type" <ident>
7443ae02089SMasatake YAMATO */
parseType(lexerState * lexer,vString * scope,int parent_kind)7453ae02089SMasatake YAMATO static void parseType (lexerState *lexer, vString *scope, int parent_kind)
7463ae02089SMasatake YAMATO {
747ce990805SThomas Braun advanceToken(lexer, true);
7483ae02089SMasatake YAMATO if (lexer->cur_token != TOKEN_IDENT)
7493ae02089SMasatake YAMATO return;
7503ae02089SMasatake YAMATO
751b0918b66SSteven Oliver addTag(lexer->token_str, NULL, K_TYPE, lexer->line, lexer->pos, scope, parent_kind);
7523ae02089SMasatake YAMATO }
7533ae02089SMasatake YAMATO
7543ae02089SMasatake YAMATO /* Structs and enums are very similar syntax-wise.
7553ae02089SMasatake YAMATO * It is possible to parse variants a bit more cleverly (e.g. make tuple variants functions and
7563ae02089SMasatake YAMATO * struct variants structs) but it'd be too clever and the signature wouldn't make too much sense without
7573ae02089SMasatake YAMATO * the enum's definition (e.g. for the type bounds)
7583ae02089SMasatake YAMATO *
7593ae02089SMasatake YAMATO * Struct/Enum format:
7603ae02089SMasatake YAMATO * "struct/enum" <ident>[<type_bounds>] "{" [<ident>,]+ "}"
7613ae02089SMasatake YAMATO * "struct/enum" <ident>[<type_bounds>] ";"
7623ae02089SMasatake YAMATO * */
parseStructOrEnum(lexerState * lexer,vString * scope,int parent_kind,bool is_struct)763ce990805SThomas Braun static void parseStructOrEnum (lexerState *lexer, vString *scope, int parent_kind, bool is_struct)
7643ae02089SMasatake YAMATO {
7653ae02089SMasatake YAMATO int kind = is_struct ? K_STRUCT : K_ENUM;
7663ae02089SMasatake YAMATO int field_kind = is_struct ? K_FIELD : K_VARIANT;
7673ae02089SMasatake YAMATO int goal_tokens1[] = {';', '{'};
7683ae02089SMasatake YAMATO
769ce990805SThomas Braun advanceToken(lexer, true);
7703ae02089SMasatake YAMATO if (lexer->cur_token != TOKEN_IDENT)
7713ae02089SMasatake YAMATO return;
7723ae02089SMasatake YAMATO
773b0918b66SSteven Oliver addTag(lexer->token_str, NULL, kind, lexer->line, lexer->pos, scope, parent_kind);
7743ae02089SMasatake YAMATO addToScope(scope, lexer->token_str);
7753ae02089SMasatake YAMATO
7763ae02089SMasatake YAMATO skipUntil(lexer, goal_tokens1, 2);
7773ae02089SMasatake YAMATO
7783ae02089SMasatake YAMATO if (lexer->cur_token == '{')
7793ae02089SMasatake YAMATO {
7803ae02089SMasatake YAMATO vString *field_name = vStringNew();
7813ae02089SMasatake YAMATO while (lexer->cur_token != TOKEN_EOF)
7823ae02089SMasatake YAMATO {
7833ae02089SMasatake YAMATO int goal_tokens2[] = {'}', ','};
7843ae02089SMasatake YAMATO /* Skip attributes. Format:
7853ae02089SMasatake YAMATO * #[..] or #![..]
7863ae02089SMasatake YAMATO * */
7873ae02089SMasatake YAMATO if (lexer->cur_token == '#')
7883ae02089SMasatake YAMATO {
789ce990805SThomas Braun advanceToken(lexer, true);
7903ae02089SMasatake YAMATO if (lexer->cur_token == '!')
791ce990805SThomas Braun advanceToken(lexer, true);
7923ae02089SMasatake YAMATO if (lexer->cur_token == '[')
7933ae02089SMasatake YAMATO {
7943ae02089SMasatake YAMATO /* It's an attribute, skip it. */
7953ae02089SMasatake YAMATO skipUntil(lexer, NULL, 0);
7963ae02089SMasatake YAMATO }
7973ae02089SMasatake YAMATO else
7983ae02089SMasatake YAMATO {
7993ae02089SMasatake YAMATO /* Something's up with this field, skip to the next one */
8003ae02089SMasatake YAMATO skipUntil(lexer, goal_tokens2, 2);
8013ae02089SMasatake YAMATO continue;
8023ae02089SMasatake YAMATO }
8033ae02089SMasatake YAMATO }
8043ae02089SMasatake YAMATO if (lexer->cur_token == TOKEN_IDENT)
8053ae02089SMasatake YAMATO {
80617aff2f6SMasatake YAMATO if (strcmp(vStringValue(lexer->token_str), "priv") == 0
80717aff2f6SMasatake YAMATO || strcmp(vStringValue(lexer->token_str), "pub") == 0)
8083ae02089SMasatake YAMATO {
809ce990805SThomas Braun advanceToken(lexer, true);
8106481c72aSMasatake YAMATO
8116481c72aSMasatake YAMATO /* Skip thevisibility specificaions.
8126481c72aSMasatake YAMATO * https://doc.rust-lang.org/reference/visibility-and-privacy.html */
8136481c72aSMasatake YAMATO if (lexer->cur_token == '(')
8146481c72aSMasatake YAMATO {
8156481c72aSMasatake YAMATO advanceToken(lexer, true);
8166481c72aSMasatake YAMATO skipUntil (lexer, (int []){')'}, 1);
8176481c72aSMasatake YAMATO advanceToken(lexer, true);
8186481c72aSMasatake YAMATO }
8196481c72aSMasatake YAMATO
8203ae02089SMasatake YAMATO if (lexer->cur_token != TOKEN_IDENT)
8213ae02089SMasatake YAMATO {
8223ae02089SMasatake YAMATO /* Something's up with this field, skip to the next one */
8233ae02089SMasatake YAMATO skipUntil(lexer, goal_tokens2, 2);
8243ae02089SMasatake YAMATO continue;
8253ae02089SMasatake YAMATO }
8263ae02089SMasatake YAMATO }
8273ae02089SMasatake YAMATO
8283ae02089SMasatake YAMATO vStringClear(field_name);
8293ae02089SMasatake YAMATO vStringCat(field_name, lexer->token_str);
830b0918b66SSteven Oliver addTag(field_name, NULL, field_kind, lexer->line, lexer->pos, scope, kind);
8313ae02089SMasatake YAMATO skipUntil(lexer, goal_tokens2, 2);
8323ae02089SMasatake YAMATO }
8333ae02089SMasatake YAMATO if (lexer->cur_token == '}')
8343ae02089SMasatake YAMATO {
835ce990805SThomas Braun advanceToken(lexer, true);
8363ae02089SMasatake YAMATO break;
8373ae02089SMasatake YAMATO }
838ce990805SThomas Braun advanceToken(lexer, true);
8393ae02089SMasatake YAMATO }
8403ae02089SMasatake YAMATO vStringDelete(field_name);
8413ae02089SMasatake YAMATO }
8423ae02089SMasatake YAMATO }
8433ae02089SMasatake YAMATO
8443ae02089SMasatake YAMATO /* Skip the body of the macro. Can't use skipUntil here as
8453ae02089SMasatake YAMATO * the body of the macro may have arbitrary code which confuses it (e.g.
8463ae02089SMasatake YAMATO * bitshift operators/function return arrows) */
skipMacro(lexerState * lexer)8473ae02089SMasatake YAMATO static void skipMacro (lexerState *lexer)
8483ae02089SMasatake YAMATO {
8493ae02089SMasatake YAMATO int level = 0;
8503ae02089SMasatake YAMATO int plus_token = 0;
8513ae02089SMasatake YAMATO int minus_token = 0;
8523ae02089SMasatake YAMATO
853ce990805SThomas Braun advanceToken(lexer, true);
8543ae02089SMasatake YAMATO switch (lexer->cur_token)
8553ae02089SMasatake YAMATO {
8563ae02089SMasatake YAMATO case '(':
8573ae02089SMasatake YAMATO plus_token = '(';
8583ae02089SMasatake YAMATO minus_token = ')';
8593ae02089SMasatake YAMATO break;
8603ae02089SMasatake YAMATO case '{':
8613ae02089SMasatake YAMATO plus_token = '{';
8623ae02089SMasatake YAMATO minus_token = '}';
8633ae02089SMasatake YAMATO break;
8643ae02089SMasatake YAMATO case '[':
8653ae02089SMasatake YAMATO plus_token = '[';
8663ae02089SMasatake YAMATO minus_token = ']';
8673ae02089SMasatake YAMATO break;
8683ae02089SMasatake YAMATO default:
8693ae02089SMasatake YAMATO return;
8703ae02089SMasatake YAMATO }
8713ae02089SMasatake YAMATO
8723ae02089SMasatake YAMATO while (lexer->cur_token != TOKEN_EOF)
8733ae02089SMasatake YAMATO {
8743ae02089SMasatake YAMATO if (lexer->cur_token == plus_token)
8753ae02089SMasatake YAMATO level++;
8763ae02089SMasatake YAMATO else if (lexer->cur_token == minus_token)
8773ae02089SMasatake YAMATO level--;
8783ae02089SMasatake YAMATO if (level == 0)
8793ae02089SMasatake YAMATO break;
880ce990805SThomas Braun advanceToken(lexer, true);
8813ae02089SMasatake YAMATO }
882ce990805SThomas Braun advanceToken(lexer, true);
8833ae02089SMasatake YAMATO }
8843ae02089SMasatake YAMATO
8853ae02089SMasatake YAMATO /*
8863ae02089SMasatake YAMATO * Macro rules format:
8873ae02089SMasatake YAMATO * "macro_rules" "!" <ident> <macro_body>
8883ae02089SMasatake YAMATO */
parseMacroRules(lexerState * lexer,vString * scope,int parent_kind)8893ae02089SMasatake YAMATO static void parseMacroRules (lexerState *lexer, vString *scope, int parent_kind)
8903ae02089SMasatake YAMATO {
891ce990805SThomas Braun advanceToken(lexer, true);
8923ae02089SMasatake YAMATO
8933ae02089SMasatake YAMATO if (lexer->cur_token != '!')
8943ae02089SMasatake YAMATO return;
8953ae02089SMasatake YAMATO
896ce990805SThomas Braun advanceToken(lexer, true);
8973ae02089SMasatake YAMATO
8983ae02089SMasatake YAMATO if (lexer->cur_token != TOKEN_IDENT)
8993ae02089SMasatake YAMATO return;
9003ae02089SMasatake YAMATO
901b0918b66SSteven Oliver addTag(lexer->token_str, NULL, K_MACRO, lexer->line, lexer->pos, scope, parent_kind);
9023ae02089SMasatake YAMATO
9033ae02089SMasatake YAMATO skipMacro(lexer);
9043ae02089SMasatake YAMATO }
9053ae02089SMasatake YAMATO
9063ae02089SMasatake YAMATO /*
9073ae02089SMasatake YAMATO * Rust is very liberal with nesting, so this function is used pretty much for any block
9083ae02089SMasatake YAMATO */
parseBlock(lexerState * lexer,bool delim,int kind,vString * scope)909ce990805SThomas Braun static void parseBlock (lexerState *lexer, bool delim, int kind, vString *scope)
9103ae02089SMasatake YAMATO {
9113ae02089SMasatake YAMATO int level = 1;
9123ae02089SMasatake YAMATO if (delim)
9133ae02089SMasatake YAMATO {
9143ae02089SMasatake YAMATO if (lexer->cur_token != '{')
9153ae02089SMasatake YAMATO return;
916ce990805SThomas Braun advanceToken(lexer, true);
9173ae02089SMasatake YAMATO }
9183ae02089SMasatake YAMATO while (lexer->cur_token != TOKEN_EOF)
9193ae02089SMasatake YAMATO {
9203ae02089SMasatake YAMATO if (lexer->cur_token == TOKEN_IDENT)
9213ae02089SMasatake YAMATO {
9223ae02089SMasatake YAMATO size_t old_scope_len = vStringLength(scope);
92317aff2f6SMasatake YAMATO if (strcmp(vStringValue(lexer->token_str), "fn") == 0)
9243ae02089SMasatake YAMATO {
9253ae02089SMasatake YAMATO parseFn(lexer, scope, kind);
9263ae02089SMasatake YAMATO }
92717aff2f6SMasatake YAMATO else if(strcmp(vStringValue(lexer->token_str), "mod") == 0)
9283ae02089SMasatake YAMATO {
9293ae02089SMasatake YAMATO parseMod(lexer, scope, kind);
9303ae02089SMasatake YAMATO }
93117aff2f6SMasatake YAMATO else if(strcmp(vStringValue(lexer->token_str), "static") == 0)
9323ae02089SMasatake YAMATO {
9333ae02089SMasatake YAMATO parseStatic(lexer, scope, kind);
9343ae02089SMasatake YAMATO }
935*648cbe27SJiří Techet else if(strcmp(vStringValue(lexer->token_str), "const") == 0)
936*648cbe27SJiří Techet {
937*648cbe27SJiří Techet parseConst(lexer, scope, kind);
938*648cbe27SJiří Techet }
93917aff2f6SMasatake YAMATO else if(strcmp(vStringValue(lexer->token_str), "trait") == 0)
9403ae02089SMasatake YAMATO {
9413ae02089SMasatake YAMATO parseTrait(lexer, scope, kind);
9423ae02089SMasatake YAMATO }
94317aff2f6SMasatake YAMATO else if(strcmp(vStringValue(lexer->token_str), "type") == 0)
9443ae02089SMasatake YAMATO {
9453ae02089SMasatake YAMATO parseType(lexer, scope, kind);
9463ae02089SMasatake YAMATO }
94717aff2f6SMasatake YAMATO else if(strcmp(vStringValue(lexer->token_str), "impl") == 0)
9483ae02089SMasatake YAMATO {
9493ae02089SMasatake YAMATO parseImpl(lexer, scope, kind);
9503ae02089SMasatake YAMATO }
95117aff2f6SMasatake YAMATO else if(strcmp(vStringValue(lexer->token_str), "struct") == 0)
9523ae02089SMasatake YAMATO {
953ce990805SThomas Braun parseStructOrEnum(lexer, scope, kind, true);
9543ae02089SMasatake YAMATO }
95517aff2f6SMasatake YAMATO else if(strcmp(vStringValue(lexer->token_str), "enum") == 0)
9563ae02089SMasatake YAMATO {
957ce990805SThomas Braun parseStructOrEnum(lexer, scope, kind, false);
9583ae02089SMasatake YAMATO }
95917aff2f6SMasatake YAMATO else if(strcmp(vStringValue(lexer->token_str), "macro_rules") == 0)
9603ae02089SMasatake YAMATO {
9613ae02089SMasatake YAMATO parseMacroRules(lexer, scope, kind);
9623ae02089SMasatake YAMATO }
9633ae02089SMasatake YAMATO else
9643ae02089SMasatake YAMATO {
965ce990805SThomas Braun advanceToken(lexer, true);
9663ae02089SMasatake YAMATO if (lexer->cur_token == '!')
9673ae02089SMasatake YAMATO {
9683ae02089SMasatake YAMATO skipMacro(lexer);
9693ae02089SMasatake YAMATO }
9703ae02089SMasatake YAMATO }
9713ae02089SMasatake YAMATO resetScope(scope, old_scope_len);
9723ae02089SMasatake YAMATO }
9733ae02089SMasatake YAMATO else if (lexer->cur_token == '{')
9743ae02089SMasatake YAMATO {
9753ae02089SMasatake YAMATO level++;
976ce990805SThomas Braun advanceToken(lexer, true);
9773ae02089SMasatake YAMATO }
9783ae02089SMasatake YAMATO else if (lexer->cur_token == '}')
9793ae02089SMasatake YAMATO {
9803ae02089SMasatake YAMATO level--;
981ce990805SThomas Braun advanceToken(lexer, true);
9823ae02089SMasatake YAMATO }
9833ae02089SMasatake YAMATO else if (lexer->cur_token == '\'')
9843ae02089SMasatake YAMATO {
9853ae02089SMasatake YAMATO /* Skip over the 'static lifetime, as it confuses the static parser above */
986ce990805SThomas Braun advanceToken(lexer, true);
98717aff2f6SMasatake YAMATO if (lexer->cur_token == TOKEN_IDENT && strcmp(vStringValue(lexer->token_str), "static") == 0)
988ce990805SThomas Braun advanceToken(lexer, true);
9893ae02089SMasatake YAMATO }
9903ae02089SMasatake YAMATO else
9913ae02089SMasatake YAMATO {
992ce990805SThomas Braun advanceToken(lexer, true);
9933ae02089SMasatake YAMATO }
9943ae02089SMasatake YAMATO if (delim && level <= 0)
9953ae02089SMasatake YAMATO break;
9963ae02089SMasatake YAMATO }
9973ae02089SMasatake YAMATO }
9983ae02089SMasatake YAMATO
findRustTags(void)9993ae02089SMasatake YAMATO static void findRustTags (void)
10003ae02089SMasatake YAMATO {
1001f6fc36daSAlan Barr lexerState lexer = {0};
10023ae02089SMasatake YAMATO vString* scope = vStringNew();
10033ae02089SMasatake YAMATO initLexer(&lexer);
10043ae02089SMasatake YAMATO
1005ce990805SThomas Braun parseBlock(&lexer, false, K_NONE, scope);
10063ae02089SMasatake YAMATO vStringDelete(scope);
10073ae02089SMasatake YAMATO
10083ae02089SMasatake YAMATO deInitLexer(&lexer);
10093ae02089SMasatake YAMATO }
10103ae02089SMasatake YAMATO
RustParser(void)10113ae02089SMasatake YAMATO extern parserDefinition *RustParser (void)
10123ae02089SMasatake YAMATO {
10133ae02089SMasatake YAMATO static const char *const extensions[] = { "rs", NULL };
1014b29ae60fSMasatake YAMATO parserDefinition *def = parserNew ("Rust");
101509ae690fSMasatake YAMATO def->kindTable = rustKinds;
10163db72c21SMasatake YAMATO def->kindCount = ARRAY_SIZE (rustKinds);
10173ae02089SMasatake YAMATO def->extensions = extensions;
10183ae02089SMasatake YAMATO def->parser = findRustTags;
10193ae02089SMasatake YAMATO
10203ae02089SMasatake YAMATO return def;
10213ae02089SMasatake YAMATO }
1022