xref: /Universal-ctags/parsers/rust.c (revision 648cbe273d9454a0556e871c5406db83f8968aa3)
13ae02089SMasatake YAMATO /*
23ae02089SMasatake YAMATO *
33ae02089SMasatake YAMATO *   This source code is released for free distribution under the terms of the
40ce38835Sviccuad *   GNU General Public License version 2 or (at your option) any later version.
53ae02089SMasatake YAMATO *
63ae02089SMasatake YAMATO *   This module contains functions for generating tags for Rust files.
73ae02089SMasatake YAMATO */
83ae02089SMasatake YAMATO 
93ae02089SMasatake YAMATO /*
103ae02089SMasatake YAMATO *   INCLUDE FILES
113ae02089SMasatake YAMATO */
123ae02089SMasatake YAMATO #include "general.h"	/* must always come first */
133ae02089SMasatake YAMATO 
143ae02089SMasatake YAMATO #include <string.h>
153ae02089SMasatake YAMATO 
163ae02089SMasatake YAMATO #include "keyword.h"
173ae02089SMasatake YAMATO #include "parse.h"
183ae02089SMasatake YAMATO #include "entry.h"
193ae02089SMasatake YAMATO #include "options.h"
203ae02089SMasatake YAMATO #include "read.h"
213db72c21SMasatake YAMATO #include "routines.h"
223ae02089SMasatake YAMATO #include "vstring.h"
233ae02089SMasatake YAMATO 
243ae02089SMasatake YAMATO /*
253ae02089SMasatake YAMATO *   MACROS
263ae02089SMasatake YAMATO */
273ae02089SMasatake YAMATO #define MAX_STRING_LENGTH 256
283ae02089SMasatake YAMATO 
293ae02089SMasatake YAMATO /*
303ae02089SMasatake YAMATO *   DATA DECLARATIONS
313ae02089SMasatake YAMATO */
323ae02089SMasatake YAMATO 
333ae02089SMasatake YAMATO typedef enum {
343ae02089SMasatake YAMATO 	K_MOD,
353ae02089SMasatake YAMATO 	K_STRUCT,
363ae02089SMasatake YAMATO 	K_TRAIT,
373ae02089SMasatake YAMATO 	K_IMPL,
383ae02089SMasatake YAMATO 	K_FN,
393ae02089SMasatake YAMATO 	K_ENUM,
403ae02089SMasatake YAMATO 	K_TYPE,
413ae02089SMasatake YAMATO 	K_STATIC,
423ae02089SMasatake YAMATO 	K_MACRO,
433ae02089SMasatake YAMATO 	K_FIELD,
443ae02089SMasatake YAMATO 	K_VARIANT,
453ae02089SMasatake YAMATO 	K_METHOD,
46*648cbe27SJiří Techet 	K_CONST,
473ae02089SMasatake YAMATO 	K_NONE
483ae02089SMasatake YAMATO } RustKind;
493ae02089SMasatake YAMATO 
50e112e8abSMasatake YAMATO static kindDefinition rustKinds[] = {
51ce990805SThomas Braun 	{true, 'n', "module", "module"},
52ce990805SThomas Braun 	{true, 's', "struct", "structural type"},
53ce990805SThomas Braun 	{true, 'i', "interface", "trait interface"},
54ce990805SThomas Braun 	{true, 'c', "implementation", "implementation"},
55ce990805SThomas Braun 	{true, 'f', "function", "Function"},
56ce990805SThomas Braun 	{true, 'g', "enum", "Enum"},
57ce990805SThomas Braun 	{true, 't', "typedef", "Type Alias"},
58ce990805SThomas Braun 	{true, 'v', "variable", "Global variable"},
59ce990805SThomas Braun 	{true, 'M', "macro", "Macro Definition"},
60ce990805SThomas Braun 	{true, 'm', "field", "A struct field"},
61ce990805SThomas Braun 	{true, 'e', "enumerator", "An enum variant"},
628050d8baSMasatake YAMATO 	{true, 'P', "method", "A method"},
63*648cbe27SJiří Techet 	{true, 'C', "constant", "A constant"},
643ae02089SMasatake YAMATO };
653ae02089SMasatake YAMATO 
663ae02089SMasatake YAMATO typedef enum {
673ae02089SMasatake YAMATO 	TOKEN_WHITESPACE,
683ae02089SMasatake YAMATO 	TOKEN_STRING,
693ae02089SMasatake YAMATO 	TOKEN_IDENT,
703ae02089SMasatake YAMATO 	TOKEN_LSHIFT,
713ae02089SMasatake YAMATO 	TOKEN_RSHIFT,
723ae02089SMasatake YAMATO 	TOKEN_RARROW,
733ae02089SMasatake YAMATO 	TOKEN_EOF
743ae02089SMasatake YAMATO } tokenType;
753ae02089SMasatake YAMATO 
763ae02089SMasatake YAMATO typedef struct {
773ae02089SMasatake YAMATO 	/* Characters */
783ae02089SMasatake YAMATO 	int cur_c;
793ae02089SMasatake YAMATO 	int next_c;
803ae02089SMasatake YAMATO 
813ae02089SMasatake YAMATO 	/* Tokens */
823ae02089SMasatake YAMATO 	int cur_token;
833ae02089SMasatake YAMATO 	vString* token_str;
843ae02089SMasatake YAMATO 	unsigned long line;
85509a47dbSJiří Techet 	MIOPos pos;
863ae02089SMasatake YAMATO } lexerState;
873ae02089SMasatake YAMATO 
883ae02089SMasatake YAMATO /*
893ae02089SMasatake YAMATO *   FUNCTION PROTOTYPES
903ae02089SMasatake YAMATO */
913ae02089SMasatake YAMATO 
92ce990805SThomas Braun static void parseBlock (lexerState *lexer, bool delim, int kind, vString *scope);
933ae02089SMasatake YAMATO 
943ae02089SMasatake YAMATO /*
953ae02089SMasatake YAMATO *   FUNCTION DEFINITIONS
963ae02089SMasatake YAMATO */
973ae02089SMasatake YAMATO 
983ae02089SMasatake YAMATO /* Resets the scope string to the old length */
resetScope(vString * scope,size_t old_len)993ae02089SMasatake YAMATO static void resetScope (vString *scope, size_t old_len)
1003ae02089SMasatake YAMATO {
101694968ddSMasatake YAMATO 	vStringTruncate (scope, old_len);
1023ae02089SMasatake YAMATO }
1033ae02089SMasatake YAMATO 
1043ae02089SMasatake YAMATO /* Adds a name to the end of the scope string */
addToScope(vString * scope,vString * name)1053ae02089SMasatake YAMATO static void addToScope (vString *scope, vString *name)
1063ae02089SMasatake YAMATO {
1073ae02089SMasatake YAMATO 	if (vStringLength(scope) > 0)
1083ae02089SMasatake YAMATO 		vStringCatS(scope, "::");
1093ae02089SMasatake YAMATO 	vStringCat(scope, name);
1103ae02089SMasatake YAMATO }
1113ae02089SMasatake YAMATO 
1123ae02089SMasatake YAMATO /* Write the lexer's current token to string, taking care of special tokens */
writeCurTokenToStr(lexerState * lexer,vString * out_str)1133ae02089SMasatake YAMATO static void writeCurTokenToStr (lexerState *lexer, vString *out_str)
1143ae02089SMasatake YAMATO {
1153ae02089SMasatake YAMATO 	switch (lexer->cur_token)
1163ae02089SMasatake YAMATO 	{
1173ae02089SMasatake YAMATO 		case TOKEN_IDENT:
1183ae02089SMasatake YAMATO 			vStringCat(out_str, lexer->token_str);
1193ae02089SMasatake YAMATO 			break;
1203ae02089SMasatake YAMATO 		case TOKEN_STRING:
1213ae02089SMasatake YAMATO 			vStringCat(out_str, lexer->token_str);
1223ae02089SMasatake YAMATO 			break;
1233ae02089SMasatake YAMATO 		case TOKEN_WHITESPACE:
1243ae02089SMasatake YAMATO 			vStringPut(out_str, ' ');
1253ae02089SMasatake YAMATO 			break;
1263ae02089SMasatake YAMATO 		case TOKEN_LSHIFT:
1273ae02089SMasatake YAMATO 			vStringCatS(out_str, "<<");
1283ae02089SMasatake YAMATO 			break;
1293ae02089SMasatake YAMATO 		case TOKEN_RSHIFT:
1303ae02089SMasatake YAMATO 			vStringCatS(out_str, ">>");
1313ae02089SMasatake YAMATO 			break;
1323ae02089SMasatake YAMATO 		case TOKEN_RARROW:
1333ae02089SMasatake YAMATO 			vStringCatS(out_str, "->");
1343ae02089SMasatake YAMATO 			break;
1353ae02089SMasatake YAMATO 		default:
1363ae02089SMasatake YAMATO 			vStringPut(out_str, (char) lexer->cur_token);
1373ae02089SMasatake YAMATO 	}
1383ae02089SMasatake YAMATO }
1393ae02089SMasatake YAMATO 
1403ae02089SMasatake YAMATO /* Reads a character from the file */
advanceChar(lexerState * lexer)1413ae02089SMasatake YAMATO static void advanceChar (lexerState *lexer)
1423ae02089SMasatake YAMATO {
1433ae02089SMasatake YAMATO 	lexer->cur_c = lexer->next_c;
144018bce0bSMasatake YAMATO 	lexer->next_c = getcFromInputFile();
1453ae02089SMasatake YAMATO }
1463ae02089SMasatake YAMATO 
1473ae02089SMasatake YAMATO /* Reads N characters from the file */
advanceNChar(lexerState * lexer,int n)1483ae02089SMasatake YAMATO static void advanceNChar (lexerState *lexer, int n)
1493ae02089SMasatake YAMATO {
1503ae02089SMasatake YAMATO 	while (n--)
1513ae02089SMasatake YAMATO 		advanceChar(lexer);
1523ae02089SMasatake YAMATO }
1533ae02089SMasatake YAMATO 
1543ae02089SMasatake YAMATO /* Store the current character in lexerState::token_str if there is space
1553ae02089SMasatake YAMATO  * (set by MAX_STRING_LENGTH), and then read the next character from the file */
advanceAndStoreChar(lexerState * lexer)1563ae02089SMasatake YAMATO static void advanceAndStoreChar (lexerState *lexer)
1573ae02089SMasatake YAMATO {
1583ae02089SMasatake YAMATO 	if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH)
1593ae02089SMasatake YAMATO 		vStringPut(lexer->token_str, (char) lexer->cur_c);
1603ae02089SMasatake YAMATO 	advanceChar(lexer);
1613ae02089SMasatake YAMATO }
1623ae02089SMasatake YAMATO 
isWhitespace(int c)163ce990805SThomas Braun static bool isWhitespace (int c)
1643ae02089SMasatake YAMATO {
1653ae02089SMasatake YAMATO 	return c == ' ' || c == '\t' || c == '\r' || c == '\n';
1663ae02089SMasatake YAMATO }
1673ae02089SMasatake YAMATO 
isAscii(int c)168ce990805SThomas Braun static bool isAscii (int c)
1693ae02089SMasatake YAMATO {
1703ae02089SMasatake YAMATO 	return (c >= 0) && (c < 0x80);
1713ae02089SMasatake YAMATO }
1723ae02089SMasatake YAMATO 
1733ae02089SMasatake YAMATO /* This isn't quite right for Unicode identifiers */
isIdentifierStart(int c)174ce990805SThomas Braun static bool isIdentifierStart (int c)
1753ae02089SMasatake YAMATO {
1763ae02089SMasatake YAMATO 	return (isAscii(c) && (isalpha(c) || c == '_')) || !isAscii(c);
1773ae02089SMasatake YAMATO }
1783ae02089SMasatake YAMATO 
1793ae02089SMasatake YAMATO /* This isn't quite right for Unicode identifiers */
isIdentifierContinue(int c)180ce990805SThomas Braun static bool isIdentifierContinue (int c)
1813ae02089SMasatake YAMATO {
1823ae02089SMasatake YAMATO 	return (isAscii(c) && (isalnum(c) || c == '_')) || !isAscii(c);
1833ae02089SMasatake YAMATO }
1843ae02089SMasatake YAMATO 
scanWhitespace(lexerState * lexer)1853ae02089SMasatake YAMATO static void scanWhitespace (lexerState *lexer)
1863ae02089SMasatake YAMATO {
1873ae02089SMasatake YAMATO 	while (isWhitespace(lexer->cur_c))
1883ae02089SMasatake YAMATO 		advanceChar(lexer);
1893ae02089SMasatake YAMATO }
1903ae02089SMasatake YAMATO 
1913ae02089SMasatake YAMATO /* Normal line comments start with two /'s and continue until the next \n
1923ae02089SMasatake YAMATO  * (potentially after a \r). Additionally, a shebang in the beginning of the
1933ae02089SMasatake YAMATO  * file also counts as a line comment as long as it is not this sequence: #![ .
1943ae02089SMasatake YAMATO  * Block comments start with / followed by a * and end with a * followed by a /.
1953ae02089SMasatake YAMATO  * Unlike in C/C++ they nest. */
scanComments(lexerState * lexer)1963ae02089SMasatake YAMATO static void scanComments (lexerState *lexer)
1973ae02089SMasatake YAMATO {
1983ae02089SMasatake YAMATO 	/* // */
1993ae02089SMasatake YAMATO 	if (lexer->next_c == '/')
2003ae02089SMasatake YAMATO 	{
2013ae02089SMasatake YAMATO 		advanceNChar(lexer, 2);
2023ae02089SMasatake YAMATO 		while (lexer->cur_c != EOF && lexer->cur_c != '\n')
2033ae02089SMasatake YAMATO 			advanceChar(lexer);
2043ae02089SMasatake YAMATO 	}
2053ae02089SMasatake YAMATO 	/* #! */
2063ae02089SMasatake YAMATO 	else if (lexer->next_c == '!')
2073ae02089SMasatake YAMATO 	{
2083ae02089SMasatake YAMATO 		advanceNChar(lexer, 2);
2093ae02089SMasatake YAMATO 		/* If it is exactly #![ then it is not a comment, but an attribute */
2103ae02089SMasatake YAMATO 		if (lexer->cur_c == '[')
2113ae02089SMasatake YAMATO 			return;
2123ae02089SMasatake YAMATO 		while (lexer->cur_c != EOF && lexer->cur_c != '\n')
2133ae02089SMasatake YAMATO 			advanceChar(lexer);
2143ae02089SMasatake YAMATO 	}
2153ae02089SMasatake YAMATO 	/* block comment */
2163ae02089SMasatake YAMATO 	else if (lexer->next_c == '*')
2173ae02089SMasatake YAMATO 	{
2183ae02089SMasatake YAMATO 		int level = 1;
2193ae02089SMasatake YAMATO 		advanceNChar(lexer, 2);
2203ae02089SMasatake YAMATO 		while (lexer->cur_c != EOF && level > 0)
2213ae02089SMasatake YAMATO 		{
2223ae02089SMasatake YAMATO 			if (lexer->cur_c == '*' && lexer->next_c == '/')
2233ae02089SMasatake YAMATO 			{
2243ae02089SMasatake YAMATO 				level--;
2253ae02089SMasatake YAMATO 				advanceNChar(lexer, 2);
2263ae02089SMasatake YAMATO 			}
2273ae02089SMasatake YAMATO 			else if (lexer->cur_c == '/' && lexer->next_c == '*')
2283ae02089SMasatake YAMATO 			{
2293ae02089SMasatake YAMATO 				level++;
2303ae02089SMasatake YAMATO 				advanceNChar(lexer, 2);
2313ae02089SMasatake YAMATO 			}
2323ae02089SMasatake YAMATO 			else
2333ae02089SMasatake YAMATO 			{
2343ae02089SMasatake YAMATO 				advanceChar(lexer);
2353ae02089SMasatake YAMATO 			}
2363ae02089SMasatake YAMATO 		}
2373ae02089SMasatake YAMATO 	}
2383ae02089SMasatake YAMATO }
2393ae02089SMasatake YAMATO 
scanIdentifier(lexerState * lexer)2403ae02089SMasatake YAMATO static void scanIdentifier (lexerState *lexer)
2413ae02089SMasatake YAMATO {
2423ae02089SMasatake YAMATO 	vStringClear(lexer->token_str);
2433ae02089SMasatake YAMATO 	do
2443ae02089SMasatake YAMATO 	{
2453ae02089SMasatake YAMATO 		advanceAndStoreChar(lexer);
2463ae02089SMasatake YAMATO 	} while(lexer->cur_c != EOF && isIdentifierContinue(lexer->cur_c));
2473ae02089SMasatake YAMATO }
2483ae02089SMasatake YAMATO 
2493ae02089SMasatake YAMATO /* Double-quoted strings, we only care about the \" escape. These
2503ae02089SMasatake YAMATO  * last past the end of the line, so be careful not too store too much
2513ae02089SMasatake YAMATO  * of them (see MAX_STRING_LENGTH). The only place we look at their
2523ae02089SMasatake YAMATO  * contents is in the function definitions, and there the valid strings are
2533ae02089SMasatake YAMATO  * things like "C" and "Rust" */
scanString(lexerState * lexer)2543ae02089SMasatake YAMATO static void scanString (lexerState *lexer)
2553ae02089SMasatake YAMATO {
2563ae02089SMasatake YAMATO 	vStringClear(lexer->token_str);
2573ae02089SMasatake YAMATO 	advanceAndStoreChar(lexer);
2583ae02089SMasatake YAMATO 	while (lexer->cur_c != EOF && lexer->cur_c != '"')
2593ae02089SMasatake YAMATO 	{
2603ae02089SMasatake YAMATO 		if (lexer->cur_c == '\\' && lexer->next_c == '"')
2613ae02089SMasatake YAMATO 			advanceAndStoreChar(lexer);
2623ae02089SMasatake YAMATO 		advanceAndStoreChar(lexer);
2633ae02089SMasatake YAMATO 	}
2643ae02089SMasatake YAMATO 	advanceAndStoreChar(lexer);
2653ae02089SMasatake YAMATO }
2663ae02089SMasatake YAMATO 
2673ae02089SMasatake YAMATO /* Raw strings look like this: r"" or r##""## where the number of
2683ae02089SMasatake YAMATO  * hashes must match */
scanRawString(lexerState * lexer)2693ae02089SMasatake YAMATO static void scanRawString (lexerState *lexer)
2703ae02089SMasatake YAMATO {
2713ae02089SMasatake YAMATO 	size_t num_initial_hashes = 0;
2723ae02089SMasatake YAMATO 	vStringClear(lexer->token_str);
2733ae02089SMasatake YAMATO 	advanceAndStoreChar(lexer);
2743ae02089SMasatake YAMATO 	/* Count how many leading hashes there are */
2753ae02089SMasatake YAMATO 	while (lexer->cur_c == '#')
2763ae02089SMasatake YAMATO 	{
2773ae02089SMasatake YAMATO 		num_initial_hashes++;
2783ae02089SMasatake YAMATO 		advanceAndStoreChar(lexer);
2793ae02089SMasatake YAMATO 	}
2803ae02089SMasatake YAMATO 	if (lexer->cur_c != '"')
2813ae02089SMasatake YAMATO 		return;
2823ae02089SMasatake YAMATO 	advanceAndStoreChar(lexer);
2833ae02089SMasatake YAMATO 	while (lexer->cur_c != EOF)
2843ae02089SMasatake YAMATO 	{
2853ae02089SMasatake YAMATO 		/* Count how many trailing hashes there are. If the number is equal or more
2863ae02089SMasatake YAMATO 		 * than the number of leading hashes, break. */
2873ae02089SMasatake YAMATO 		if (lexer->cur_c == '"')
2883ae02089SMasatake YAMATO 		{
2893ae02089SMasatake YAMATO 			size_t num_trailing_hashes = 0;
2903ae02089SMasatake YAMATO 			advanceAndStoreChar(lexer);
2913ae02089SMasatake YAMATO 			while (lexer->cur_c == '#' && num_trailing_hashes < num_initial_hashes)
2923ae02089SMasatake YAMATO 			{
2933ae02089SMasatake YAMATO 				num_trailing_hashes++;
2943ae02089SMasatake YAMATO 
2953ae02089SMasatake YAMATO 				advanceAndStoreChar(lexer);
2963ae02089SMasatake YAMATO 			}
2973ae02089SMasatake YAMATO 			if (num_trailing_hashes == num_initial_hashes)
2983ae02089SMasatake YAMATO 				break;
2993ae02089SMasatake YAMATO 		}
3003ae02089SMasatake YAMATO 		else
3013ae02089SMasatake YAMATO 		{
3023ae02089SMasatake YAMATO 			advanceAndStoreChar(lexer);
3033ae02089SMasatake YAMATO 		}
3043ae02089SMasatake YAMATO 	}
3053ae02089SMasatake YAMATO }
3063ae02089SMasatake YAMATO 
3073ae02089SMasatake YAMATO /* This deals with character literals: 'n', '\n', '\uFFFF'; and lifetimes:
3083ae02089SMasatake YAMATO  * 'lifetime. We'll use this approximate regexp for the literals:
3093ae02089SMasatake YAMATO  * \' \\ [^']+ \' or \' [^'] \' or \' \\ \' \'. Either way, we'll treat this
3103ae02089SMasatake YAMATO  * token as a string, so it gets preserved as is for function signatures with
3113ae02089SMasatake YAMATO  * lifetimes. */
scanCharacterOrLifetime(lexerState * lexer)3123ae02089SMasatake YAMATO static void scanCharacterOrLifetime (lexerState *lexer)
3133ae02089SMasatake YAMATO {
3143ae02089SMasatake YAMATO 	vStringClear(lexer->token_str);
3153ae02089SMasatake YAMATO 	advanceAndStoreChar(lexer);
3163ae02089SMasatake YAMATO 
3173ae02089SMasatake YAMATO 	if (lexer->cur_c == '\\')
3183ae02089SMasatake YAMATO 	{
3193ae02089SMasatake YAMATO 		advanceAndStoreChar(lexer);
3203ae02089SMasatake YAMATO 		/* The \' \\ \' \' (literally '\'') case */
3213ae02089SMasatake YAMATO 		if (lexer->cur_c == '\'' && lexer->next_c == '\'')
3223ae02089SMasatake YAMATO 		{
3233ae02089SMasatake YAMATO 			advanceAndStoreChar(lexer);
3243ae02089SMasatake YAMATO 			advanceAndStoreChar(lexer);
3253ae02089SMasatake YAMATO 		}
3263ae02089SMasatake YAMATO 		/* The \' \\ [^']+ \' case */
3273ae02089SMasatake YAMATO 		else
3283ae02089SMasatake YAMATO 		{
3293ae02089SMasatake YAMATO 			while (lexer->cur_c != EOF && lexer->cur_c != '\'')
3303ae02089SMasatake YAMATO 				advanceAndStoreChar(lexer);
3313ae02089SMasatake YAMATO 		}
3323ae02089SMasatake YAMATO 	}
3333ae02089SMasatake YAMATO 	/* The \' [^'] \' case */
3343ae02089SMasatake YAMATO 	else if (lexer->cur_c != '\'' && lexer->next_c == '\'')
3353ae02089SMasatake YAMATO 	{
3363ae02089SMasatake YAMATO 		advanceAndStoreChar(lexer);
3373ae02089SMasatake YAMATO 		advanceAndStoreChar(lexer);
3383ae02089SMasatake YAMATO 	}
3393ae02089SMasatake YAMATO 	/* Otherwise it is malformed, or a lifetime */
3403ae02089SMasatake YAMATO }
3413ae02089SMasatake YAMATO 
3423ae02089SMasatake YAMATO /* Advances the parser one token, optionally skipping whitespace
3433ae02089SMasatake YAMATO  * (otherwise it is concatenated and returned as a single whitespace token).
3443ae02089SMasatake YAMATO  * Whitespace is needed to properly render function signatures. Unrecognized
3453ae02089SMasatake YAMATO  * token starts are stored literally, e.g. token may equal to a character '#'. */
advanceToken(lexerState * lexer,bool skip_whitspace)346ce990805SThomas Braun static int advanceToken (lexerState *lexer, bool skip_whitspace)
3473ae02089SMasatake YAMATO {
348ce990805SThomas Braun 	bool have_whitespace = false;
349a31b37dcSMasatake YAMATO 	lexer->line = getInputLineNumber();
3503ae02089SMasatake YAMATO 	lexer->pos = getInputFilePosition();
3513ae02089SMasatake YAMATO 	while (lexer->cur_c != EOF)
3523ae02089SMasatake YAMATO 	{
3533ae02089SMasatake YAMATO 		if (isWhitespace(lexer->cur_c))
3543ae02089SMasatake YAMATO 		{
3553ae02089SMasatake YAMATO 			scanWhitespace(lexer);
356ce990805SThomas Braun 			have_whitespace = true;
3573ae02089SMasatake YAMATO 		}
3583ae02089SMasatake YAMATO 		else if (lexer->cur_c == '/' && (lexer->next_c == '/' || lexer->next_c == '*'))
3593ae02089SMasatake YAMATO 		{
3603ae02089SMasatake YAMATO 			scanComments(lexer);
361ce990805SThomas Braun 			have_whitespace = true;
3623ae02089SMasatake YAMATO 		}
3633ae02089SMasatake YAMATO 		else
3643ae02089SMasatake YAMATO 		{
3653ae02089SMasatake YAMATO 			if (have_whitespace && !skip_whitspace)
3663ae02089SMasatake YAMATO 				return lexer->cur_token = TOKEN_WHITESPACE;
3673ae02089SMasatake YAMATO 			break;
3683ae02089SMasatake YAMATO 		}
3693ae02089SMasatake YAMATO 	}
370a31b37dcSMasatake YAMATO 	lexer->line = getInputLineNumber();
3713ae02089SMasatake YAMATO 	lexer->pos = getInputFilePosition();
3723ae02089SMasatake YAMATO 	while (lexer->cur_c != EOF)
3733ae02089SMasatake YAMATO 	{
3743ae02089SMasatake YAMATO 		if (lexer->cur_c == '"')
3753ae02089SMasatake YAMATO 		{
3763ae02089SMasatake YAMATO 			scanString(lexer);
3773ae02089SMasatake YAMATO 			return lexer->cur_token = TOKEN_STRING;
3783ae02089SMasatake YAMATO 		}
3793ae02089SMasatake YAMATO 		else if (lexer->cur_c == 'r' && (lexer->next_c == '#' || lexer->next_c == '"'))
3803ae02089SMasatake YAMATO 		{
3813ae02089SMasatake YAMATO 			scanRawString(lexer);
3823ae02089SMasatake YAMATO 			return lexer->cur_token = TOKEN_STRING;
3833ae02089SMasatake YAMATO 		}
3843ae02089SMasatake YAMATO 		else if (lexer->cur_c == '\'')
3853ae02089SMasatake YAMATO 		{
3863ae02089SMasatake YAMATO 			scanCharacterOrLifetime(lexer);
3873ae02089SMasatake YAMATO 			return lexer->cur_token = TOKEN_STRING;
3883ae02089SMasatake YAMATO 		}
3893ae02089SMasatake YAMATO 		else if (isIdentifierStart(lexer->cur_c))
3903ae02089SMasatake YAMATO 		{
3913ae02089SMasatake YAMATO 			scanIdentifier(lexer);
3923ae02089SMasatake YAMATO 			return lexer->cur_token = TOKEN_IDENT;
3933ae02089SMasatake YAMATO 		}
3943ae02089SMasatake YAMATO 		/* These shift tokens aren't too important for tag-generation per se,
3953ae02089SMasatake YAMATO 		 * but they confuse the skipUntil code which tracks the <> pairs. */
3963ae02089SMasatake YAMATO 		else if (lexer->cur_c == '>' && lexer->next_c == '>')
3973ae02089SMasatake YAMATO 		{
3983ae02089SMasatake YAMATO 			advanceNChar(lexer, 2);
3993ae02089SMasatake YAMATO 			return lexer->cur_token = TOKEN_RSHIFT;
4003ae02089SMasatake YAMATO 		}
4013ae02089SMasatake YAMATO 		else if (lexer->cur_c == '<' && lexer->next_c == '<')
4023ae02089SMasatake YAMATO 		{
4033ae02089SMasatake YAMATO 			advanceNChar(lexer, 2);
4043ae02089SMasatake YAMATO 			return lexer->cur_token = TOKEN_LSHIFT;
4053ae02089SMasatake YAMATO 		}
4063ae02089SMasatake YAMATO 		else if (lexer->cur_c == '-' && lexer->next_c == '>')
4073ae02089SMasatake YAMATO 		{
4083ae02089SMasatake YAMATO 			advanceNChar(lexer, 2);
4093ae02089SMasatake YAMATO 			return lexer->cur_token = TOKEN_RARROW;
4103ae02089SMasatake YAMATO 		}
4113ae02089SMasatake YAMATO 		else
4123ae02089SMasatake YAMATO 		{
4133ae02089SMasatake YAMATO 			int c = lexer->cur_c;
4143ae02089SMasatake YAMATO 			advanceChar(lexer);
4153ae02089SMasatake YAMATO 			return lexer->cur_token = c;
4163ae02089SMasatake YAMATO 		}
4173ae02089SMasatake YAMATO 	}
4183ae02089SMasatake YAMATO 	return lexer->cur_token = TOKEN_EOF;
4193ae02089SMasatake YAMATO }
4203ae02089SMasatake YAMATO 
initLexer(lexerState * lexer)4213ae02089SMasatake YAMATO static void initLexer (lexerState *lexer)
4223ae02089SMasatake YAMATO {
4233ae02089SMasatake YAMATO 	advanceNChar(lexer, 2);
4243ae02089SMasatake YAMATO 	lexer->token_str = vStringNew();
4253ae02089SMasatake YAMATO 
4263ae02089SMasatake YAMATO 	if (lexer->cur_c == '#' && lexer->next_c == '!')
4273ae02089SMasatake YAMATO 		scanComments(lexer);
428ce990805SThomas Braun 	advanceToken(lexer, true);
4293ae02089SMasatake YAMATO }
4303ae02089SMasatake YAMATO 
deInitLexer(lexerState * lexer)4313ae02089SMasatake YAMATO static void deInitLexer (lexerState *lexer)
4323ae02089SMasatake YAMATO {
4333ae02089SMasatake YAMATO 	vStringDelete(lexer->token_str);
4343ae02089SMasatake YAMATO 	lexer->token_str = NULL;
4353ae02089SMasatake YAMATO }
4363ae02089SMasatake YAMATO 
addTag(vString * ident,const char * arg_list,int kind,unsigned long line,MIOPos pos,vString * scope,int parent_kind)437509a47dbSJiří Techet static void addTag (vString* ident, const char* arg_list, int kind, unsigned long line, MIOPos pos, vString *scope, int parent_kind)
4383ae02089SMasatake YAMATO {
4394a95e4a5SColomban Wendling 	if (kind == K_NONE || ! rustKinds[kind].enabled)
4403ae02089SMasatake YAMATO 		return;
4413ae02089SMasatake YAMATO 	tagEntryInfo tag;
44217aff2f6SMasatake YAMATO 	initTagEntry(&tag, vStringValue(ident), kind);
4433ae02089SMasatake YAMATO 
4443ae02089SMasatake YAMATO 	tag.lineNumber = line;
4453ae02089SMasatake YAMATO 	tag.filePosition = pos;
4463ae02089SMasatake YAMATO 
4473ae02089SMasatake YAMATO 	tag.extensionFields.signature = arg_list;
4483ae02089SMasatake YAMATO 	/*tag.extensionFields.varType = type;*/ /* FIXME: map to typeRef[1]? */
4493ae02089SMasatake YAMATO 	if (parent_kind != K_NONE)
4503ae02089SMasatake YAMATO 	{
451f92e6bf2SMasatake YAMATO 		tag.extensionFields.scopeKindIndex = parent_kind;
45217aff2f6SMasatake YAMATO 		tag.extensionFields.scopeName = vStringValue(scope);
4533ae02089SMasatake YAMATO 	}
4543ae02089SMasatake YAMATO 	makeTagEntry(&tag);
4553ae02089SMasatake YAMATO }
4563ae02089SMasatake YAMATO 
4573ae02089SMasatake YAMATO /* Skip tokens until one of the goal tokens is hit. Escapes when level = 0 if there are no goal tokens.
4583ae02089SMasatake YAMATO  * Keeps track of balanced <>'s, ()'s, []'s, and {}'s and ignores the goal tokens within those pairings */
skipUntil(lexerState * lexer,int goal_tokens[],int num_goal_tokens)4593ae02089SMasatake YAMATO static void skipUntil (lexerState *lexer, int goal_tokens[], int num_goal_tokens)
4603ae02089SMasatake YAMATO {
4613ae02089SMasatake YAMATO 	int angle_level = 0;
4623ae02089SMasatake YAMATO 	int paren_level = 0;
4633ae02089SMasatake YAMATO 	int brace_level = 0;
4643ae02089SMasatake YAMATO 	int bracket_level = 0;
4653ae02089SMasatake YAMATO 	while (lexer->cur_token != TOKEN_EOF)
4663ae02089SMasatake YAMATO 	{
4673ae02089SMasatake YAMATO 		if (angle_level == 0 && paren_level == 0 && brace_level == 0
4683ae02089SMasatake YAMATO 		    && bracket_level == 0)
4693ae02089SMasatake YAMATO 		{
4703ae02089SMasatake YAMATO 			int ii = 0;
4713ae02089SMasatake YAMATO 			for(ii = 0; ii < num_goal_tokens; ii++)
4723ae02089SMasatake YAMATO 			{
4733ae02089SMasatake YAMATO 				if (lexer->cur_token == goal_tokens[ii])
4743ae02089SMasatake YAMATO 				{
4753ae02089SMasatake YAMATO 					break;
4763ae02089SMasatake YAMATO 				}
4773ae02089SMasatake YAMATO 			}
4783ae02089SMasatake YAMATO 			if (ii < num_goal_tokens)
4793ae02089SMasatake YAMATO 				break;
4803ae02089SMasatake YAMATO 		}
4813ae02089SMasatake YAMATO 		switch (lexer->cur_token)
4823ae02089SMasatake YAMATO 		{
4833ae02089SMasatake YAMATO 			case '<':
4843ae02089SMasatake YAMATO 				angle_level++;
4853ae02089SMasatake YAMATO 				break;
4863ae02089SMasatake YAMATO 			case '(':
4873ae02089SMasatake YAMATO 				paren_level++;
4883ae02089SMasatake YAMATO 				break;
4893ae02089SMasatake YAMATO 			case '{':
4903ae02089SMasatake YAMATO 				brace_level++;
4913ae02089SMasatake YAMATO 				break;
4923ae02089SMasatake YAMATO 			case '[':
4933ae02089SMasatake YAMATO 				bracket_level++;
4943ae02089SMasatake YAMATO 				break;
4953ae02089SMasatake YAMATO 			case '>':
4963ae02089SMasatake YAMATO 				angle_level--;
4973ae02089SMasatake YAMATO 				break;
4983ae02089SMasatake YAMATO 			case ')':
4993ae02089SMasatake YAMATO 				paren_level--;
5003ae02089SMasatake YAMATO 				break;
5013ae02089SMasatake YAMATO 			case '}':
5023ae02089SMasatake YAMATO 				brace_level--;
5033ae02089SMasatake YAMATO 				break;
5043ae02089SMasatake YAMATO 			case ']':
5053ae02089SMasatake YAMATO 				bracket_level--;
5063ae02089SMasatake YAMATO 				break;
5073ae02089SMasatake YAMATO 			case TOKEN_RSHIFT:
5083ae02089SMasatake YAMATO 				if (angle_level >= 2)
5093ae02089SMasatake YAMATO 					angle_level -= 2;
5103ae02089SMasatake YAMATO 				break;
5113ae02089SMasatake YAMATO 			/* TOKEN_LSHIFT is never interpreted as two <'s in valid Rust code */
5123ae02089SMasatake YAMATO 			default:
5133ae02089SMasatake YAMATO 				break;
5143ae02089SMasatake YAMATO 		}
5153ae02089SMasatake YAMATO 		/* Has to be after the token switch to catch the case when we start with the initial level token */
5163ae02089SMasatake YAMATO 		if (num_goal_tokens == 0 && angle_level == 0 && paren_level == 0 && brace_level == 0
5173ae02089SMasatake YAMATO 		    && bracket_level == 0)
5183ae02089SMasatake YAMATO 			break;
519ce990805SThomas Braun 		advanceToken(lexer, true);
5203ae02089SMasatake YAMATO 	}
5213ae02089SMasatake YAMATO }
5223ae02089SMasatake YAMATO 
5233ae02089SMasatake YAMATO /* Function format:
5243ae02089SMasatake YAMATO  * "fn" <ident>[<type_bounds>] "(" [<args>] ")" ["->" <ret_type>] "{" [<body>] "}"*/
parseFn(lexerState * lexer,vString * scope,int parent_kind)5253ae02089SMasatake YAMATO static void parseFn (lexerState *lexer, vString *scope, int parent_kind)
5263ae02089SMasatake YAMATO {
5273ae02089SMasatake YAMATO 	int kind = (parent_kind == K_TRAIT || parent_kind == K_IMPL) ? K_METHOD : K_FN;
5283ae02089SMasatake YAMATO 	vString *name;
5293ae02089SMasatake YAMATO 	vString *arg_list;
5303ae02089SMasatake YAMATO 	unsigned long line;
531509a47dbSJiří Techet 	MIOPos pos;
5323ae02089SMasatake YAMATO 	int paren_level = 0;
53329209756SLionel Flandrin 	int bracket_level = 0;
534ce990805SThomas Braun 	bool found_paren = false;
535ce990805SThomas Braun 	bool valid_signature = true;
5363ae02089SMasatake YAMATO 
537ce990805SThomas Braun 	advanceToken(lexer, true);
5383ae02089SMasatake YAMATO 	if (lexer->cur_token != TOKEN_IDENT)
5393ae02089SMasatake YAMATO 		return;
5403ae02089SMasatake YAMATO 
5413ae02089SMasatake YAMATO 	name = vStringNewCopy(lexer->token_str);
5423ae02089SMasatake YAMATO 	arg_list = vStringNew();
5433ae02089SMasatake YAMATO 
5443ae02089SMasatake YAMATO 	line = lexer->line;
5453ae02089SMasatake YAMATO 	pos = lexer->pos;
5463ae02089SMasatake YAMATO 
547ce990805SThomas Braun 	advanceToken(lexer, true);
5483ae02089SMasatake YAMATO 
5493ae02089SMasatake YAMATO 	/* HACK: This is a bit coarse as far as what tag entry means by
5503ae02089SMasatake YAMATO 	 * 'arglist'... */
55129209756SLionel Flandrin 	while (lexer->cur_token != '{')
5523ae02089SMasatake YAMATO 	{
55329209756SLionel Flandrin 		if (lexer->cur_token == ';' && bracket_level == 0)
55429209756SLionel Flandrin 		{
55529209756SLionel Flandrin 			break;
55629209756SLionel Flandrin 		}
55729209756SLionel Flandrin 		else if (lexer->cur_token == '}')
5583ae02089SMasatake YAMATO 		{
559ce990805SThomas Braun 			valid_signature = false;
5603ae02089SMasatake YAMATO 			break;
5613ae02089SMasatake YAMATO 		}
5623ae02089SMasatake YAMATO 		else if (lexer->cur_token == '(')
5633ae02089SMasatake YAMATO 		{
564ce990805SThomas Braun 			found_paren = true;
5653ae02089SMasatake YAMATO 			paren_level++;
5663ae02089SMasatake YAMATO 		}
5673ae02089SMasatake YAMATO 		else if (lexer->cur_token == ')')
5683ae02089SMasatake YAMATO 		{
5693ae02089SMasatake YAMATO 			paren_level--;
5703ae02089SMasatake YAMATO 			if (paren_level < 0)
5713ae02089SMasatake YAMATO 			{
572ce990805SThomas Braun 				valid_signature = false;
5733ae02089SMasatake YAMATO 				break;
5743ae02089SMasatake YAMATO 			}
5753ae02089SMasatake YAMATO 		}
57629209756SLionel Flandrin 		else if (lexer->cur_token == '[')
57729209756SLionel Flandrin 		{
57829209756SLionel Flandrin 			bracket_level++;
57929209756SLionel Flandrin 		}
58029209756SLionel Flandrin 		else if (lexer->cur_token == ']')
58129209756SLionel Flandrin 		{
58229209756SLionel Flandrin 			bracket_level--;
58329209756SLionel Flandrin 		}
5843ae02089SMasatake YAMATO 		else if (lexer->cur_token == TOKEN_EOF)
5853ae02089SMasatake YAMATO 		{
586ce990805SThomas Braun 			valid_signature = false;
5873ae02089SMasatake YAMATO 			break;
5883ae02089SMasatake YAMATO 		}
5893ae02089SMasatake YAMATO 		writeCurTokenToStr(lexer, arg_list);
590ce990805SThomas Braun 		advanceToken(lexer, false);
5913ae02089SMasatake YAMATO 	}
59229209756SLionel Flandrin 	if (!found_paren || paren_level != 0 || bracket_level != 0)
593ce990805SThomas Braun 		valid_signature = false;
5943ae02089SMasatake YAMATO 
5953ae02089SMasatake YAMATO 	if (valid_signature)
5963ae02089SMasatake YAMATO 	{
5973ae02089SMasatake YAMATO 		vStringStripTrailing(arg_list);
59817aff2f6SMasatake YAMATO 		addTag(name, vStringValue(arg_list), kind, line, pos, scope, parent_kind);
5993ae02089SMasatake YAMATO 		addToScope(scope, name);
600ce990805SThomas Braun 		parseBlock(lexer, true, kind, scope);
6013ae02089SMasatake YAMATO 	}
6023ae02089SMasatake YAMATO 
6033ae02089SMasatake YAMATO 	vStringDelete(name);
6043ae02089SMasatake YAMATO 	vStringDelete(arg_list);
6053ae02089SMasatake YAMATO }
6063ae02089SMasatake YAMATO 
6073ae02089SMasatake YAMATO /* Mod format:
6083ae02089SMasatake YAMATO  * "mod" <ident> "{" [<body>] "}"
6093ae02089SMasatake YAMATO  * "mod" <ident> ";"*/
parseMod(lexerState * lexer,vString * scope,int parent_kind)6103ae02089SMasatake YAMATO static void parseMod (lexerState *lexer, vString *scope, int parent_kind)
6113ae02089SMasatake YAMATO {
612ce990805SThomas Braun 	advanceToken(lexer, true);
6133ae02089SMasatake YAMATO 	if (lexer->cur_token != TOKEN_IDENT)
6143ae02089SMasatake YAMATO 		return;
6153ae02089SMasatake YAMATO 
616b0918b66SSteven Oliver 	addTag(lexer->token_str, NULL, K_MOD, lexer->line, lexer->pos, scope, parent_kind);
6173ae02089SMasatake YAMATO 	addToScope(scope, lexer->token_str);
6183ae02089SMasatake YAMATO 
619ce990805SThomas Braun 	advanceToken(lexer, true);
6203ae02089SMasatake YAMATO 
621ce990805SThomas Braun 	parseBlock(lexer, true, K_MOD, scope);
6223ae02089SMasatake YAMATO }
6233ae02089SMasatake YAMATO 
6243ae02089SMasatake YAMATO /* Trait format:
6253ae02089SMasatake YAMATO  * "trait" <ident> [<type_bounds>] "{" [<body>] "}"
6263ae02089SMasatake YAMATO  */
parseTrait(lexerState * lexer,vString * scope,int parent_kind)6273ae02089SMasatake YAMATO static void parseTrait (lexerState *lexer, vString *scope, int parent_kind)
6283ae02089SMasatake YAMATO {
6293ae02089SMasatake YAMATO 	int goal_tokens[] = {'{'};
6303ae02089SMasatake YAMATO 
631ce990805SThomas Braun 	advanceToken(lexer, true);
6323ae02089SMasatake YAMATO 	if (lexer->cur_token != TOKEN_IDENT)
6333ae02089SMasatake YAMATO 		return;
6343ae02089SMasatake YAMATO 
635b0918b66SSteven Oliver 	addTag(lexer->token_str, NULL, K_TRAIT, lexer->line, lexer->pos, scope, parent_kind);
6363ae02089SMasatake YAMATO 	addToScope(scope, lexer->token_str);
6373ae02089SMasatake YAMATO 
638ce990805SThomas Braun 	advanceToken(lexer, true);
6393ae02089SMasatake YAMATO 
6403ae02089SMasatake YAMATO 	skipUntil(lexer, goal_tokens, 1);
6413ae02089SMasatake YAMATO 
642ce990805SThomas Braun 	parseBlock(lexer, true, K_TRAIT, scope);
6433ae02089SMasatake YAMATO }
6443ae02089SMasatake YAMATO 
6453ae02089SMasatake YAMATO /* Skips type blocks of the form <T:T<T>, ...> */
skipTypeBlock(lexerState * lexer)6463ae02089SMasatake YAMATO static void skipTypeBlock (lexerState *lexer)
6473ae02089SMasatake YAMATO {
6483ae02089SMasatake YAMATO 	if (lexer->cur_token == '<')
6493ae02089SMasatake YAMATO 	{
6503ae02089SMasatake YAMATO 		skipUntil(lexer, NULL, 0);
651ce990805SThomas Braun 		advanceToken(lexer, true);
6523ae02089SMasatake YAMATO 	}
6533ae02089SMasatake YAMATO }
6543ae02089SMasatake YAMATO 
6553ae02089SMasatake YAMATO /* Essentially grabs the last ident before 'for', '<' and '{', which
6563ae02089SMasatake YAMATO  * tends to correspond to what we want as the impl tag entry name */
parseQualifiedType(lexerState * lexer,vString * name)6573ae02089SMasatake YAMATO static void parseQualifiedType (lexerState *lexer, vString* name)
6583ae02089SMasatake YAMATO {
6593ae02089SMasatake YAMATO 	while (lexer->cur_token != TOKEN_EOF)
6603ae02089SMasatake YAMATO 	{
6613ae02089SMasatake YAMATO 		if (lexer->cur_token == TOKEN_IDENT)
6623ae02089SMasatake YAMATO 		{
66317aff2f6SMasatake YAMATO 			if (strcmp(vStringValue(lexer->token_str), "for") == 0
66417aff2f6SMasatake YAMATO 				|| strcmp(vStringValue(lexer->token_str), "where") == 0)
6653ae02089SMasatake YAMATO 				break;
6663ae02089SMasatake YAMATO 			vStringClear(name);
6673ae02089SMasatake YAMATO 			vStringCat(name, lexer->token_str);
6683ae02089SMasatake YAMATO 		}
6693ae02089SMasatake YAMATO 		else if (lexer->cur_token == '<' || lexer->cur_token == '{')
6703ae02089SMasatake YAMATO 		{
6713ae02089SMasatake YAMATO 			break;
6723ae02089SMasatake YAMATO 		}
673ce990805SThomas Braun 		advanceToken(lexer, true);
6743ae02089SMasatake YAMATO 	}
6753ae02089SMasatake YAMATO 	skipTypeBlock(lexer);
6763ae02089SMasatake YAMATO }
6773ae02089SMasatake YAMATO 
6783ae02089SMasatake YAMATO /* Impl format:
6793ae02089SMasatake YAMATO  * "impl" [<type_bounds>] <qualified_ident>[<type_bounds>] ["for" <qualified_ident>[<type_bounds>]] "{" [<body>] "}"
6803ae02089SMasatake YAMATO  */
parseImpl(lexerState * lexer,vString * scope,int parent_kind)6813ae02089SMasatake YAMATO static void parseImpl (lexerState *lexer, vString *scope, int parent_kind)
6823ae02089SMasatake YAMATO {
6833ae02089SMasatake YAMATO 	unsigned long line;
684509a47dbSJiří Techet 	MIOPos pos;
6853ae02089SMasatake YAMATO 	vString *name;
6863ae02089SMasatake YAMATO 
687ce990805SThomas Braun 	advanceToken(lexer, true);
6883ae02089SMasatake YAMATO 
6893ae02089SMasatake YAMATO 	line = lexer->line;
6903ae02089SMasatake YAMATO 	pos = lexer->pos;
6913ae02089SMasatake YAMATO 
6923ae02089SMasatake YAMATO 	skipTypeBlock(lexer);
6933ae02089SMasatake YAMATO 
6943ae02089SMasatake YAMATO 	name = vStringNew();
6953ae02089SMasatake YAMATO 
6963ae02089SMasatake YAMATO 	parseQualifiedType(lexer, name);
6973ae02089SMasatake YAMATO 
69817aff2f6SMasatake YAMATO 	if (lexer->cur_token == TOKEN_IDENT && strcmp(vStringValue(lexer->token_str), "for") == 0)
6993ae02089SMasatake YAMATO 	{
700ce990805SThomas Braun 		advanceToken(lexer, true);
7013ae02089SMasatake YAMATO 		parseQualifiedType(lexer, name);
7023ae02089SMasatake YAMATO 	}
7033ae02089SMasatake YAMATO 
704b0918b66SSteven Oliver 	addTag(name, NULL, K_IMPL, line, pos, scope, parent_kind);
7053ae02089SMasatake YAMATO 	addToScope(scope, name);
7063ae02089SMasatake YAMATO 
707ce990805SThomas Braun 	parseBlock(lexer, true, K_IMPL, scope);
7083ae02089SMasatake YAMATO 
7093ae02089SMasatake YAMATO 	vStringDelete(name);
7103ae02089SMasatake YAMATO }
7113ae02089SMasatake YAMATO 
7123ae02089SMasatake YAMATO /* Static format:
7133ae02089SMasatake YAMATO  * "static" ["mut"] <ident>
7143ae02089SMasatake YAMATO  */
parseStatic(lexerState * lexer,vString * scope,int parent_kind)7153ae02089SMasatake YAMATO static void parseStatic (lexerState *lexer, vString *scope, int parent_kind)
7163ae02089SMasatake YAMATO {
717ce990805SThomas Braun 	advanceToken(lexer, true);
7183ae02089SMasatake YAMATO 	if (lexer->cur_token != TOKEN_IDENT)
7193ae02089SMasatake YAMATO 		return;
72017aff2f6SMasatake YAMATO 	if (strcmp(vStringValue(lexer->token_str), "mut") == 0)
7213ae02089SMasatake YAMATO 	{
722ce990805SThomas Braun 		advanceToken(lexer, true);
7233ae02089SMasatake YAMATO 	}
7243ae02089SMasatake YAMATO 	if (lexer->cur_token != TOKEN_IDENT)
7253ae02089SMasatake YAMATO 		return;
7263ae02089SMasatake YAMATO 
727b0918b66SSteven Oliver 	addTag(lexer->token_str, NULL, K_STATIC, lexer->line, lexer->pos, scope, parent_kind);
7283ae02089SMasatake YAMATO }
7293ae02089SMasatake YAMATO 
730*648cbe27SJiří Techet /* Const format:
731*648cbe27SJiří Techet  * "const" <ident>
732*648cbe27SJiří Techet  */
parseConst(lexerState * lexer,vString * scope,int parent_kind)733*648cbe27SJiří Techet static void parseConst (lexerState *lexer, vString *scope, int parent_kind)
734*648cbe27SJiří Techet {
735*648cbe27SJiří Techet 	advanceToken(lexer, true);
736*648cbe27SJiří Techet 	if (lexer->cur_token != TOKEN_IDENT)
737*648cbe27SJiří Techet 		return;
738*648cbe27SJiří Techet 
739*648cbe27SJiří Techet 	addTag(lexer->token_str, NULL, K_CONST, lexer->line, lexer->pos, scope, parent_kind);
740*648cbe27SJiří Techet }
741*648cbe27SJiří Techet 
7423ae02089SMasatake YAMATO /* Type format:
7433ae02089SMasatake YAMATO  * "type" <ident>
7443ae02089SMasatake YAMATO  */
parseType(lexerState * lexer,vString * scope,int parent_kind)7453ae02089SMasatake YAMATO static void parseType (lexerState *lexer, vString *scope, int parent_kind)
7463ae02089SMasatake YAMATO {
747ce990805SThomas Braun 	advanceToken(lexer, true);
7483ae02089SMasatake YAMATO 	if (lexer->cur_token != TOKEN_IDENT)
7493ae02089SMasatake YAMATO 		return;
7503ae02089SMasatake YAMATO 
751b0918b66SSteven Oliver 	addTag(lexer->token_str, NULL, K_TYPE, lexer->line, lexer->pos, scope, parent_kind);
7523ae02089SMasatake YAMATO }
7533ae02089SMasatake YAMATO 
7543ae02089SMasatake YAMATO /* Structs and enums are very similar syntax-wise.
7553ae02089SMasatake YAMATO  * It is possible to parse variants a bit more cleverly (e.g. make tuple variants functions and
7563ae02089SMasatake YAMATO  * struct variants structs) but it'd be too clever and the signature wouldn't make too much sense without
7573ae02089SMasatake YAMATO  * the enum's definition (e.g. for the type bounds)
7583ae02089SMasatake YAMATO  *
7593ae02089SMasatake YAMATO  * Struct/Enum format:
7603ae02089SMasatake YAMATO  * "struct/enum" <ident>[<type_bounds>] "{" [<ident>,]+ "}"
7613ae02089SMasatake YAMATO  * "struct/enum" <ident>[<type_bounds>] ";"
7623ae02089SMasatake YAMATO  * */
parseStructOrEnum(lexerState * lexer,vString * scope,int parent_kind,bool is_struct)763ce990805SThomas Braun static void parseStructOrEnum (lexerState *lexer, vString *scope, int parent_kind, bool is_struct)
7643ae02089SMasatake YAMATO {
7653ae02089SMasatake YAMATO 	int kind = is_struct ? K_STRUCT : K_ENUM;
7663ae02089SMasatake YAMATO 	int field_kind = is_struct ? K_FIELD : K_VARIANT;
7673ae02089SMasatake YAMATO 	int goal_tokens1[] = {';', '{'};
7683ae02089SMasatake YAMATO 
769ce990805SThomas Braun 	advanceToken(lexer, true);
7703ae02089SMasatake YAMATO 	if (lexer->cur_token != TOKEN_IDENT)
7713ae02089SMasatake YAMATO 		return;
7723ae02089SMasatake YAMATO 
773b0918b66SSteven Oliver 	addTag(lexer->token_str, NULL, kind, lexer->line, lexer->pos, scope, parent_kind);
7743ae02089SMasatake YAMATO 	addToScope(scope, lexer->token_str);
7753ae02089SMasatake YAMATO 
7763ae02089SMasatake YAMATO 	skipUntil(lexer, goal_tokens1, 2);
7773ae02089SMasatake YAMATO 
7783ae02089SMasatake YAMATO 	if (lexer->cur_token == '{')
7793ae02089SMasatake YAMATO 	{
7803ae02089SMasatake YAMATO 		vString *field_name = vStringNew();
7813ae02089SMasatake YAMATO 		while (lexer->cur_token != TOKEN_EOF)
7823ae02089SMasatake YAMATO 		{
7833ae02089SMasatake YAMATO 			int goal_tokens2[] = {'}', ','};
7843ae02089SMasatake YAMATO 			/* Skip attributes. Format:
7853ae02089SMasatake YAMATO 			 * #[..] or #![..]
7863ae02089SMasatake YAMATO 			 * */
7873ae02089SMasatake YAMATO 			if (lexer->cur_token == '#')
7883ae02089SMasatake YAMATO 			{
789ce990805SThomas Braun 				advanceToken(lexer, true);
7903ae02089SMasatake YAMATO 				if (lexer->cur_token == '!')
791ce990805SThomas Braun 					advanceToken(lexer, true);
7923ae02089SMasatake YAMATO 				if (lexer->cur_token == '[')
7933ae02089SMasatake YAMATO 				{
7943ae02089SMasatake YAMATO 					/* It's an attribute, skip it. */
7953ae02089SMasatake YAMATO 					skipUntil(lexer, NULL, 0);
7963ae02089SMasatake YAMATO 				}
7973ae02089SMasatake YAMATO 				else
7983ae02089SMasatake YAMATO 				{
7993ae02089SMasatake YAMATO 					/* Something's up with this field, skip to the next one */
8003ae02089SMasatake YAMATO 					skipUntil(lexer, goal_tokens2, 2);
8013ae02089SMasatake YAMATO 					continue;
8023ae02089SMasatake YAMATO 				}
8033ae02089SMasatake YAMATO 			}
8043ae02089SMasatake YAMATO 			if (lexer->cur_token == TOKEN_IDENT)
8053ae02089SMasatake YAMATO 			{
80617aff2f6SMasatake YAMATO 				if (strcmp(vStringValue(lexer->token_str), "priv") == 0
80717aff2f6SMasatake YAMATO 				    || strcmp(vStringValue(lexer->token_str), "pub") == 0)
8083ae02089SMasatake YAMATO 				{
809ce990805SThomas Braun 					advanceToken(lexer, true);
8106481c72aSMasatake YAMATO 
8116481c72aSMasatake YAMATO 					/* Skip thevisibility specificaions.
8126481c72aSMasatake YAMATO 					 * https://doc.rust-lang.org/reference/visibility-and-privacy.html */
8136481c72aSMasatake YAMATO 					if (lexer->cur_token == '(')
8146481c72aSMasatake YAMATO 					{
8156481c72aSMasatake YAMATO 						advanceToken(lexer, true);
8166481c72aSMasatake YAMATO 						skipUntil (lexer, (int []){')'}, 1);
8176481c72aSMasatake YAMATO 						advanceToken(lexer, true);
8186481c72aSMasatake YAMATO 					}
8196481c72aSMasatake YAMATO 
8203ae02089SMasatake YAMATO 					if (lexer->cur_token != TOKEN_IDENT)
8213ae02089SMasatake YAMATO 					{
8223ae02089SMasatake YAMATO 						/* Something's up with this field, skip to the next one */
8233ae02089SMasatake YAMATO 						skipUntil(lexer, goal_tokens2, 2);
8243ae02089SMasatake YAMATO 						continue;
8253ae02089SMasatake YAMATO 					}
8263ae02089SMasatake YAMATO 				}
8273ae02089SMasatake YAMATO 
8283ae02089SMasatake YAMATO 				vStringClear(field_name);
8293ae02089SMasatake YAMATO 				vStringCat(field_name, lexer->token_str);
830b0918b66SSteven Oliver 				addTag(field_name, NULL, field_kind, lexer->line, lexer->pos, scope, kind);
8313ae02089SMasatake YAMATO 				skipUntil(lexer, goal_tokens2, 2);
8323ae02089SMasatake YAMATO 			}
8333ae02089SMasatake YAMATO 			if (lexer->cur_token == '}')
8343ae02089SMasatake YAMATO 			{
835ce990805SThomas Braun 				advanceToken(lexer, true);
8363ae02089SMasatake YAMATO 				break;
8373ae02089SMasatake YAMATO 			}
838ce990805SThomas Braun 			advanceToken(lexer, true);
8393ae02089SMasatake YAMATO 		}
8403ae02089SMasatake YAMATO 		vStringDelete(field_name);
8413ae02089SMasatake YAMATO 	}
8423ae02089SMasatake YAMATO }
8433ae02089SMasatake YAMATO 
8443ae02089SMasatake YAMATO /* Skip the body of the macro. Can't use skipUntil here as
8453ae02089SMasatake YAMATO  * the body of the macro may have arbitrary code which confuses it (e.g.
8463ae02089SMasatake YAMATO  * bitshift operators/function return arrows) */
skipMacro(lexerState * lexer)8473ae02089SMasatake YAMATO static void skipMacro (lexerState *lexer)
8483ae02089SMasatake YAMATO {
8493ae02089SMasatake YAMATO 	int level = 0;
8503ae02089SMasatake YAMATO 	int plus_token = 0;
8513ae02089SMasatake YAMATO 	int minus_token = 0;
8523ae02089SMasatake YAMATO 
853ce990805SThomas Braun 	advanceToken(lexer, true);
8543ae02089SMasatake YAMATO 	switch (lexer->cur_token)
8553ae02089SMasatake YAMATO 	{
8563ae02089SMasatake YAMATO 		case '(':
8573ae02089SMasatake YAMATO 			plus_token = '(';
8583ae02089SMasatake YAMATO 			minus_token = ')';
8593ae02089SMasatake YAMATO 			break;
8603ae02089SMasatake YAMATO 		case '{':
8613ae02089SMasatake YAMATO 			plus_token = '{';
8623ae02089SMasatake YAMATO 			minus_token = '}';
8633ae02089SMasatake YAMATO 			break;
8643ae02089SMasatake YAMATO 		case '[':
8653ae02089SMasatake YAMATO 			plus_token = '[';
8663ae02089SMasatake YAMATO 			minus_token = ']';
8673ae02089SMasatake YAMATO 			break;
8683ae02089SMasatake YAMATO 		default:
8693ae02089SMasatake YAMATO 			return;
8703ae02089SMasatake YAMATO 	}
8713ae02089SMasatake YAMATO 
8723ae02089SMasatake YAMATO 	while (lexer->cur_token != TOKEN_EOF)
8733ae02089SMasatake YAMATO 	{
8743ae02089SMasatake YAMATO 		if (lexer->cur_token == plus_token)
8753ae02089SMasatake YAMATO 			level++;
8763ae02089SMasatake YAMATO 		else if (lexer->cur_token == minus_token)
8773ae02089SMasatake YAMATO 			level--;
8783ae02089SMasatake YAMATO 		if (level == 0)
8793ae02089SMasatake YAMATO 			break;
880ce990805SThomas Braun 		advanceToken(lexer, true);
8813ae02089SMasatake YAMATO 	}
882ce990805SThomas Braun 	advanceToken(lexer, true);
8833ae02089SMasatake YAMATO }
8843ae02089SMasatake YAMATO 
8853ae02089SMasatake YAMATO /*
8863ae02089SMasatake YAMATO  * Macro rules format:
8873ae02089SMasatake YAMATO  * "macro_rules" "!" <ident> <macro_body>
8883ae02089SMasatake YAMATO  */
parseMacroRules(lexerState * lexer,vString * scope,int parent_kind)8893ae02089SMasatake YAMATO static void parseMacroRules (lexerState *lexer, vString *scope, int parent_kind)
8903ae02089SMasatake YAMATO {
891ce990805SThomas Braun 	advanceToken(lexer, true);
8923ae02089SMasatake YAMATO 
8933ae02089SMasatake YAMATO 	if (lexer->cur_token != '!')
8943ae02089SMasatake YAMATO 		return;
8953ae02089SMasatake YAMATO 
896ce990805SThomas Braun 	advanceToken(lexer, true);
8973ae02089SMasatake YAMATO 
8983ae02089SMasatake YAMATO 	if (lexer->cur_token != TOKEN_IDENT)
8993ae02089SMasatake YAMATO 		return;
9003ae02089SMasatake YAMATO 
901b0918b66SSteven Oliver 	addTag(lexer->token_str, NULL, K_MACRO, lexer->line, lexer->pos, scope, parent_kind);
9023ae02089SMasatake YAMATO 
9033ae02089SMasatake YAMATO 	skipMacro(lexer);
9043ae02089SMasatake YAMATO }
9053ae02089SMasatake YAMATO 
9063ae02089SMasatake YAMATO /*
9073ae02089SMasatake YAMATO  * Rust is very liberal with nesting, so this function is used pretty much for any block
9083ae02089SMasatake YAMATO  */
parseBlock(lexerState * lexer,bool delim,int kind,vString * scope)909ce990805SThomas Braun static void parseBlock (lexerState *lexer, bool delim, int kind, vString *scope)
9103ae02089SMasatake YAMATO {
9113ae02089SMasatake YAMATO 	int level = 1;
9123ae02089SMasatake YAMATO 	if (delim)
9133ae02089SMasatake YAMATO 	{
9143ae02089SMasatake YAMATO 		if (lexer->cur_token != '{')
9153ae02089SMasatake YAMATO 			return;
916ce990805SThomas Braun 		advanceToken(lexer, true);
9173ae02089SMasatake YAMATO 	}
9183ae02089SMasatake YAMATO 	while (lexer->cur_token != TOKEN_EOF)
9193ae02089SMasatake YAMATO 	{
9203ae02089SMasatake YAMATO 		if (lexer->cur_token == TOKEN_IDENT)
9213ae02089SMasatake YAMATO 		{
9223ae02089SMasatake YAMATO 			size_t old_scope_len = vStringLength(scope);
92317aff2f6SMasatake YAMATO 			if (strcmp(vStringValue(lexer->token_str), "fn") == 0)
9243ae02089SMasatake YAMATO 			{
9253ae02089SMasatake YAMATO 				parseFn(lexer, scope, kind);
9263ae02089SMasatake YAMATO 			}
92717aff2f6SMasatake YAMATO 			else if(strcmp(vStringValue(lexer->token_str), "mod") == 0)
9283ae02089SMasatake YAMATO 			{
9293ae02089SMasatake YAMATO 				parseMod(lexer, scope, kind);
9303ae02089SMasatake YAMATO 			}
93117aff2f6SMasatake YAMATO 			else if(strcmp(vStringValue(lexer->token_str), "static") == 0)
9323ae02089SMasatake YAMATO 			{
9333ae02089SMasatake YAMATO 				parseStatic(lexer, scope, kind);
9343ae02089SMasatake YAMATO 			}
935*648cbe27SJiří Techet 			else if(strcmp(vStringValue(lexer->token_str), "const") == 0)
936*648cbe27SJiří Techet 			{
937*648cbe27SJiří Techet 				parseConst(lexer, scope, kind);
938*648cbe27SJiří Techet 			}
93917aff2f6SMasatake YAMATO 			else if(strcmp(vStringValue(lexer->token_str), "trait") == 0)
9403ae02089SMasatake YAMATO 			{
9413ae02089SMasatake YAMATO 				parseTrait(lexer, scope, kind);
9423ae02089SMasatake YAMATO 			}
94317aff2f6SMasatake YAMATO 			else if(strcmp(vStringValue(lexer->token_str), "type") == 0)
9443ae02089SMasatake YAMATO 			{
9453ae02089SMasatake YAMATO 				parseType(lexer, scope, kind);
9463ae02089SMasatake YAMATO 			}
94717aff2f6SMasatake YAMATO 			else if(strcmp(vStringValue(lexer->token_str), "impl") == 0)
9483ae02089SMasatake YAMATO 			{
9493ae02089SMasatake YAMATO 				parseImpl(lexer, scope, kind);
9503ae02089SMasatake YAMATO 			}
95117aff2f6SMasatake YAMATO 			else if(strcmp(vStringValue(lexer->token_str), "struct") == 0)
9523ae02089SMasatake YAMATO 			{
953ce990805SThomas Braun 				parseStructOrEnum(lexer, scope, kind, true);
9543ae02089SMasatake YAMATO 			}
95517aff2f6SMasatake YAMATO 			else if(strcmp(vStringValue(lexer->token_str), "enum") == 0)
9563ae02089SMasatake YAMATO 			{
957ce990805SThomas Braun 				parseStructOrEnum(lexer, scope, kind, false);
9583ae02089SMasatake YAMATO 			}
95917aff2f6SMasatake YAMATO 			else if(strcmp(vStringValue(lexer->token_str), "macro_rules") == 0)
9603ae02089SMasatake YAMATO 			{
9613ae02089SMasatake YAMATO 				parseMacroRules(lexer, scope, kind);
9623ae02089SMasatake YAMATO 			}
9633ae02089SMasatake YAMATO 			else
9643ae02089SMasatake YAMATO 			{
965ce990805SThomas Braun 				advanceToken(lexer, true);
9663ae02089SMasatake YAMATO 				if (lexer->cur_token == '!')
9673ae02089SMasatake YAMATO 				{
9683ae02089SMasatake YAMATO 					skipMacro(lexer);
9693ae02089SMasatake YAMATO 				}
9703ae02089SMasatake YAMATO 			}
9713ae02089SMasatake YAMATO 			resetScope(scope, old_scope_len);
9723ae02089SMasatake YAMATO 		}
9733ae02089SMasatake YAMATO 		else if (lexer->cur_token == '{')
9743ae02089SMasatake YAMATO 		{
9753ae02089SMasatake YAMATO 			level++;
976ce990805SThomas Braun 			advanceToken(lexer, true);
9773ae02089SMasatake YAMATO 		}
9783ae02089SMasatake YAMATO 		else if (lexer->cur_token == '}')
9793ae02089SMasatake YAMATO 		{
9803ae02089SMasatake YAMATO 			level--;
981ce990805SThomas Braun 			advanceToken(lexer, true);
9823ae02089SMasatake YAMATO 		}
9833ae02089SMasatake YAMATO 		else if (lexer->cur_token == '\'')
9843ae02089SMasatake YAMATO 		{
9853ae02089SMasatake YAMATO 			/* Skip over the 'static lifetime, as it confuses the static parser above */
986ce990805SThomas Braun 			advanceToken(lexer, true);
98717aff2f6SMasatake YAMATO 			if (lexer->cur_token == TOKEN_IDENT && strcmp(vStringValue(lexer->token_str), "static") == 0)
988ce990805SThomas Braun 				advanceToken(lexer, true);
9893ae02089SMasatake YAMATO 		}
9903ae02089SMasatake YAMATO 		else
9913ae02089SMasatake YAMATO 		{
992ce990805SThomas Braun 			advanceToken(lexer, true);
9933ae02089SMasatake YAMATO 		}
9943ae02089SMasatake YAMATO 		if (delim && level <= 0)
9953ae02089SMasatake YAMATO 			break;
9963ae02089SMasatake YAMATO 	}
9973ae02089SMasatake YAMATO }
9983ae02089SMasatake YAMATO 
findRustTags(void)9993ae02089SMasatake YAMATO static void findRustTags (void)
10003ae02089SMasatake YAMATO {
1001f6fc36daSAlan Barr 	lexerState lexer = {0};
10023ae02089SMasatake YAMATO 	vString* scope = vStringNew();
10033ae02089SMasatake YAMATO 	initLexer(&lexer);
10043ae02089SMasatake YAMATO 
1005ce990805SThomas Braun 	parseBlock(&lexer, false, K_NONE, scope);
10063ae02089SMasatake YAMATO 	vStringDelete(scope);
10073ae02089SMasatake YAMATO 
10083ae02089SMasatake YAMATO 	deInitLexer(&lexer);
10093ae02089SMasatake YAMATO }
10103ae02089SMasatake YAMATO 
RustParser(void)10113ae02089SMasatake YAMATO extern parserDefinition *RustParser (void)
10123ae02089SMasatake YAMATO {
10133ae02089SMasatake YAMATO 	static const char *const extensions[] = { "rs", NULL };
1014b29ae60fSMasatake YAMATO 	parserDefinition *def = parserNew ("Rust");
101509ae690fSMasatake YAMATO 	def->kindTable = rustKinds;
10163db72c21SMasatake YAMATO 	def->kindCount = ARRAY_SIZE (rustKinds);
10173ae02089SMasatake YAMATO 	def->extensions = extensions;
10183ae02089SMasatake YAMATO 	def->parser = findRustTags;
10193ae02089SMasatake YAMATO 
10203ae02089SMasatake YAMATO 	return def;
10213ae02089SMasatake YAMATO }
1022