1 /*
2 *
3 * This source code is released for free distribution under the terms of the
4 * GNU General Public License version 2 or (at your option) any later version.
5 *
6 * This module contains functions for generating tags for Rust files.
7 */
8
9 /*
10 * INCLUDE FILES
11 */
12 #include "general.h" /* must always come first */
13
14 #include <string.h>
15
16 #include "keyword.h"
17 #include "parse.h"
18 #include "entry.h"
19 #include "options.h"
20 #include "read.h"
21 #include "routines.h"
22 #include "vstring.h"
23
24 /*
25 * MACROS
26 */
27 #define MAX_STRING_LENGTH 256
28
29 /*
30 * DATA DECLARATIONS
31 */
32
33 typedef enum {
34 K_MOD,
35 K_STRUCT,
36 K_TRAIT,
37 K_IMPL,
38 K_FN,
39 K_ENUM,
40 K_TYPE,
41 K_STATIC,
42 K_MACRO,
43 K_FIELD,
44 K_VARIANT,
45 K_METHOD,
46 K_CONST,
47 K_NONE
48 } RustKind;
49
50 static kindDefinition rustKinds[] = {
51 {true, 'n', "module", "module"},
52 {true, 's', "struct", "structural type"},
53 {true, 'i', "interface", "trait interface"},
54 {true, 'c', "implementation", "implementation"},
55 {true, 'f', "function", "Function"},
56 {true, 'g', "enum", "Enum"},
57 {true, 't', "typedef", "Type Alias"},
58 {true, 'v', "variable", "Global variable"},
59 {true, 'M', "macro", "Macro Definition"},
60 {true, 'm', "field", "A struct field"},
61 {true, 'e', "enumerator", "An enum variant"},
62 {true, 'P', "method", "A method"},
63 {true, 'C', "constant", "A constant"},
64 };
65
66 typedef enum {
67 TOKEN_WHITESPACE,
68 TOKEN_STRING,
69 TOKEN_IDENT,
70 TOKEN_LSHIFT,
71 TOKEN_RSHIFT,
72 TOKEN_RARROW,
73 TOKEN_EOF
74 } tokenType;
75
76 typedef struct {
77 /* Characters */
78 int cur_c;
79 int next_c;
80
81 /* Tokens */
82 int cur_token;
83 vString* token_str;
84 unsigned long line;
85 MIOPos pos;
86 } lexerState;
87
88 /*
89 * FUNCTION PROTOTYPES
90 */
91
92 static void parseBlock (lexerState *lexer, bool delim, int kind, vString *scope);
93
94 /*
95 * FUNCTION DEFINITIONS
96 */
97
98 /* Resets the scope string to the old length */
resetScope(vString * scope,size_t old_len)99 static void resetScope (vString *scope, size_t old_len)
100 {
101 vStringTruncate (scope, old_len);
102 }
103
104 /* Adds a name to the end of the scope string */
addToScope(vString * scope,vString * name)105 static void addToScope (vString *scope, vString *name)
106 {
107 if (vStringLength(scope) > 0)
108 vStringCatS(scope, "::");
109 vStringCat(scope, name);
110 }
111
112 /* Write the lexer's current token to string, taking care of special tokens */
writeCurTokenToStr(lexerState * lexer,vString * out_str)113 static void writeCurTokenToStr (lexerState *lexer, vString *out_str)
114 {
115 switch (lexer->cur_token)
116 {
117 case TOKEN_IDENT:
118 vStringCat(out_str, lexer->token_str);
119 break;
120 case TOKEN_STRING:
121 vStringCat(out_str, lexer->token_str);
122 break;
123 case TOKEN_WHITESPACE:
124 vStringPut(out_str, ' ');
125 break;
126 case TOKEN_LSHIFT:
127 vStringCatS(out_str, "<<");
128 break;
129 case TOKEN_RSHIFT:
130 vStringCatS(out_str, ">>");
131 break;
132 case TOKEN_RARROW:
133 vStringCatS(out_str, "->");
134 break;
135 default:
136 vStringPut(out_str, (char) lexer->cur_token);
137 }
138 }
139
140 /* Reads a character from the file */
advanceChar(lexerState * lexer)141 static void advanceChar (lexerState *lexer)
142 {
143 lexer->cur_c = lexer->next_c;
144 lexer->next_c = getcFromInputFile();
145 }
146
147 /* Reads N characters from the file */
advanceNChar(lexerState * lexer,int n)148 static void advanceNChar (lexerState *lexer, int n)
149 {
150 while (n--)
151 advanceChar(lexer);
152 }
153
154 /* Store the current character in lexerState::token_str if there is space
155 * (set by MAX_STRING_LENGTH), and then read the next character from the file */
advanceAndStoreChar(lexerState * lexer)156 static void advanceAndStoreChar (lexerState *lexer)
157 {
158 if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH)
159 vStringPut(lexer->token_str, (char) lexer->cur_c);
160 advanceChar(lexer);
161 }
162
isWhitespace(int c)163 static bool isWhitespace (int c)
164 {
165 return c == ' ' || c == '\t' || c == '\r' || c == '\n';
166 }
167
isAscii(int c)168 static bool isAscii (int c)
169 {
170 return (c >= 0) && (c < 0x80);
171 }
172
173 /* This isn't quite right for Unicode identifiers */
isIdentifierStart(int c)174 static bool isIdentifierStart (int c)
175 {
176 return (isAscii(c) && (isalpha(c) || c == '_')) || !isAscii(c);
177 }
178
179 /* This isn't quite right for Unicode identifiers */
isIdentifierContinue(int c)180 static bool isIdentifierContinue (int c)
181 {
182 return (isAscii(c) && (isalnum(c) || c == '_')) || !isAscii(c);
183 }
184
scanWhitespace(lexerState * lexer)185 static void scanWhitespace (lexerState *lexer)
186 {
187 while (isWhitespace(lexer->cur_c))
188 advanceChar(lexer);
189 }
190
191 /* Normal line comments start with two /'s and continue until the next \n
192 * (potentially after a \r). Additionally, a shebang in the beginning of the
193 * file also counts as a line comment as long as it is not this sequence: #![ .
194 * Block comments start with / followed by a * and end with a * followed by a /.
195 * Unlike in C/C++ they nest. */
scanComments(lexerState * lexer)196 static void scanComments (lexerState *lexer)
197 {
198 /* // */
199 if (lexer->next_c == '/')
200 {
201 advanceNChar(lexer, 2);
202 while (lexer->cur_c != EOF && lexer->cur_c != '\n')
203 advanceChar(lexer);
204 }
205 /* #! */
206 else if (lexer->next_c == '!')
207 {
208 advanceNChar(lexer, 2);
209 /* If it is exactly #![ then it is not a comment, but an attribute */
210 if (lexer->cur_c == '[')
211 return;
212 while (lexer->cur_c != EOF && lexer->cur_c != '\n')
213 advanceChar(lexer);
214 }
215 /* block comment */
216 else if (lexer->next_c == '*')
217 {
218 int level = 1;
219 advanceNChar(lexer, 2);
220 while (lexer->cur_c != EOF && level > 0)
221 {
222 if (lexer->cur_c == '*' && lexer->next_c == '/')
223 {
224 level--;
225 advanceNChar(lexer, 2);
226 }
227 else if (lexer->cur_c == '/' && lexer->next_c == '*')
228 {
229 level++;
230 advanceNChar(lexer, 2);
231 }
232 else
233 {
234 advanceChar(lexer);
235 }
236 }
237 }
238 }
239
scanIdentifier(lexerState * lexer)240 static void scanIdentifier (lexerState *lexer)
241 {
242 vStringClear(lexer->token_str);
243 do
244 {
245 advanceAndStoreChar(lexer);
246 } while(lexer->cur_c != EOF && isIdentifierContinue(lexer->cur_c));
247 }
248
249 /* Double-quoted strings, we only care about the \" escape. These
250 * last past the end of the line, so be careful not too store too much
251 * of them (see MAX_STRING_LENGTH). The only place we look at their
252 * contents is in the function definitions, and there the valid strings are
253 * things like "C" and "Rust" */
scanString(lexerState * lexer)254 static void scanString (lexerState *lexer)
255 {
256 vStringClear(lexer->token_str);
257 advanceAndStoreChar(lexer);
258 while (lexer->cur_c != EOF && lexer->cur_c != '"')
259 {
260 if (lexer->cur_c == '\\' && lexer->next_c == '"')
261 advanceAndStoreChar(lexer);
262 advanceAndStoreChar(lexer);
263 }
264 advanceAndStoreChar(lexer);
265 }
266
267 /* Raw strings look like this: r"" or r##""## where the number of
268 * hashes must match */
scanRawString(lexerState * lexer)269 static void scanRawString (lexerState *lexer)
270 {
271 size_t num_initial_hashes = 0;
272 vStringClear(lexer->token_str);
273 advanceAndStoreChar(lexer);
274 /* Count how many leading hashes there are */
275 while (lexer->cur_c == '#')
276 {
277 num_initial_hashes++;
278 advanceAndStoreChar(lexer);
279 }
280 if (lexer->cur_c != '"')
281 return;
282 advanceAndStoreChar(lexer);
283 while (lexer->cur_c != EOF)
284 {
285 /* Count how many trailing hashes there are. If the number is equal or more
286 * than the number of leading hashes, break. */
287 if (lexer->cur_c == '"')
288 {
289 size_t num_trailing_hashes = 0;
290 advanceAndStoreChar(lexer);
291 while (lexer->cur_c == '#' && num_trailing_hashes < num_initial_hashes)
292 {
293 num_trailing_hashes++;
294
295 advanceAndStoreChar(lexer);
296 }
297 if (num_trailing_hashes == num_initial_hashes)
298 break;
299 }
300 else
301 {
302 advanceAndStoreChar(lexer);
303 }
304 }
305 }
306
307 /* This deals with character literals: 'n', '\n', '\uFFFF'; and lifetimes:
308 * 'lifetime. We'll use this approximate regexp for the literals:
309 * \' \\ [^']+ \' or \' [^'] \' or \' \\ \' \'. Either way, we'll treat this
310 * token as a string, so it gets preserved as is for function signatures with
311 * lifetimes. */
scanCharacterOrLifetime(lexerState * lexer)312 static void scanCharacterOrLifetime (lexerState *lexer)
313 {
314 vStringClear(lexer->token_str);
315 advanceAndStoreChar(lexer);
316
317 if (lexer->cur_c == '\\')
318 {
319 advanceAndStoreChar(lexer);
320 /* The \' \\ \' \' (literally '\'') case */
321 if (lexer->cur_c == '\'' && lexer->next_c == '\'')
322 {
323 advanceAndStoreChar(lexer);
324 advanceAndStoreChar(lexer);
325 }
326 /* The \' \\ [^']+ \' case */
327 else
328 {
329 while (lexer->cur_c != EOF && lexer->cur_c != '\'')
330 advanceAndStoreChar(lexer);
331 }
332 }
333 /* The \' [^'] \' case */
334 else if (lexer->cur_c != '\'' && lexer->next_c == '\'')
335 {
336 advanceAndStoreChar(lexer);
337 advanceAndStoreChar(lexer);
338 }
339 /* Otherwise it is malformed, or a lifetime */
340 }
341
342 /* Advances the parser one token, optionally skipping whitespace
343 * (otherwise it is concatenated and returned as a single whitespace token).
344 * Whitespace is needed to properly render function signatures. Unrecognized
345 * token starts are stored literally, e.g. token may equal to a character '#'. */
advanceToken(lexerState * lexer,bool skip_whitspace)346 static int advanceToken (lexerState *lexer, bool skip_whitspace)
347 {
348 bool have_whitespace = false;
349 lexer->line = getInputLineNumber();
350 lexer->pos = getInputFilePosition();
351 while (lexer->cur_c != EOF)
352 {
353 if (isWhitespace(lexer->cur_c))
354 {
355 scanWhitespace(lexer);
356 have_whitespace = true;
357 }
358 else if (lexer->cur_c == '/' && (lexer->next_c == '/' || lexer->next_c == '*'))
359 {
360 scanComments(lexer);
361 have_whitespace = true;
362 }
363 else
364 {
365 if (have_whitespace && !skip_whitspace)
366 return lexer->cur_token = TOKEN_WHITESPACE;
367 break;
368 }
369 }
370 lexer->line = getInputLineNumber();
371 lexer->pos = getInputFilePosition();
372 while (lexer->cur_c != EOF)
373 {
374 if (lexer->cur_c == '"')
375 {
376 scanString(lexer);
377 return lexer->cur_token = TOKEN_STRING;
378 }
379 else if (lexer->cur_c == 'r' && (lexer->next_c == '#' || lexer->next_c == '"'))
380 {
381 scanRawString(lexer);
382 return lexer->cur_token = TOKEN_STRING;
383 }
384 else if (lexer->cur_c == '\'')
385 {
386 scanCharacterOrLifetime(lexer);
387 return lexer->cur_token = TOKEN_STRING;
388 }
389 else if (isIdentifierStart(lexer->cur_c))
390 {
391 scanIdentifier(lexer);
392 return lexer->cur_token = TOKEN_IDENT;
393 }
394 /* These shift tokens aren't too important for tag-generation per se,
395 * but they confuse the skipUntil code which tracks the <> pairs. */
396 else if (lexer->cur_c == '>' && lexer->next_c == '>')
397 {
398 advanceNChar(lexer, 2);
399 return lexer->cur_token = TOKEN_RSHIFT;
400 }
401 else if (lexer->cur_c == '<' && lexer->next_c == '<')
402 {
403 advanceNChar(lexer, 2);
404 return lexer->cur_token = TOKEN_LSHIFT;
405 }
406 else if (lexer->cur_c == '-' && lexer->next_c == '>')
407 {
408 advanceNChar(lexer, 2);
409 return lexer->cur_token = TOKEN_RARROW;
410 }
411 else
412 {
413 int c = lexer->cur_c;
414 advanceChar(lexer);
415 return lexer->cur_token = c;
416 }
417 }
418 return lexer->cur_token = TOKEN_EOF;
419 }
420
initLexer(lexerState * lexer)421 static void initLexer (lexerState *lexer)
422 {
423 advanceNChar(lexer, 2);
424 lexer->token_str = vStringNew();
425
426 if (lexer->cur_c == '#' && lexer->next_c == '!')
427 scanComments(lexer);
428 advanceToken(lexer, true);
429 }
430
deInitLexer(lexerState * lexer)431 static void deInitLexer (lexerState *lexer)
432 {
433 vStringDelete(lexer->token_str);
434 lexer->token_str = NULL;
435 }
436
addTag(vString * ident,const char * arg_list,int kind,unsigned long line,MIOPos pos,vString * scope,int parent_kind)437 static void addTag (vString* ident, const char* arg_list, int kind, unsigned long line, MIOPos pos, vString *scope, int parent_kind)
438 {
439 if (kind == K_NONE || ! rustKinds[kind].enabled)
440 return;
441 tagEntryInfo tag;
442 initTagEntry(&tag, vStringValue(ident), kind);
443
444 tag.lineNumber = line;
445 tag.filePosition = pos;
446
447 tag.extensionFields.signature = arg_list;
448 /*tag.extensionFields.varType = type;*/ /* FIXME: map to typeRef[1]? */
449 if (parent_kind != K_NONE)
450 {
451 tag.extensionFields.scopeKindIndex = parent_kind;
452 tag.extensionFields.scopeName = vStringValue(scope);
453 }
454 makeTagEntry(&tag);
455 }
456
457 /* Skip tokens until one of the goal tokens is hit. Escapes when level = 0 if there are no goal tokens.
458 * Keeps track of balanced <>'s, ()'s, []'s, and {}'s and ignores the goal tokens within those pairings */
skipUntil(lexerState * lexer,int goal_tokens[],int num_goal_tokens)459 static void skipUntil (lexerState *lexer, int goal_tokens[], int num_goal_tokens)
460 {
461 int angle_level = 0;
462 int paren_level = 0;
463 int brace_level = 0;
464 int bracket_level = 0;
465 while (lexer->cur_token != TOKEN_EOF)
466 {
467 if (angle_level == 0 && paren_level == 0 && brace_level == 0
468 && bracket_level == 0)
469 {
470 int ii = 0;
471 for(ii = 0; ii < num_goal_tokens; ii++)
472 {
473 if (lexer->cur_token == goal_tokens[ii])
474 {
475 break;
476 }
477 }
478 if (ii < num_goal_tokens)
479 break;
480 }
481 switch (lexer->cur_token)
482 {
483 case '<':
484 angle_level++;
485 break;
486 case '(':
487 paren_level++;
488 break;
489 case '{':
490 brace_level++;
491 break;
492 case '[':
493 bracket_level++;
494 break;
495 case '>':
496 angle_level--;
497 break;
498 case ')':
499 paren_level--;
500 break;
501 case '}':
502 brace_level--;
503 break;
504 case ']':
505 bracket_level--;
506 break;
507 case TOKEN_RSHIFT:
508 if (angle_level >= 2)
509 angle_level -= 2;
510 break;
511 /* TOKEN_LSHIFT is never interpreted as two <'s in valid Rust code */
512 default:
513 break;
514 }
515 /* Has to be after the token switch to catch the case when we start with the initial level token */
516 if (num_goal_tokens == 0 && angle_level == 0 && paren_level == 0 && brace_level == 0
517 && bracket_level == 0)
518 break;
519 advanceToken(lexer, true);
520 }
521 }
522
523 /* Function format:
524 * "fn" <ident>[<type_bounds>] "(" [<args>] ")" ["->" <ret_type>] "{" [<body>] "}"*/
parseFn(lexerState * lexer,vString * scope,int parent_kind)525 static void parseFn (lexerState *lexer, vString *scope, int parent_kind)
526 {
527 int kind = (parent_kind == K_TRAIT || parent_kind == K_IMPL) ? K_METHOD : K_FN;
528 vString *name;
529 vString *arg_list;
530 unsigned long line;
531 MIOPos pos;
532 int paren_level = 0;
533 int bracket_level = 0;
534 bool found_paren = false;
535 bool valid_signature = true;
536
537 advanceToken(lexer, true);
538 if (lexer->cur_token != TOKEN_IDENT)
539 return;
540
541 name = vStringNewCopy(lexer->token_str);
542 arg_list = vStringNew();
543
544 line = lexer->line;
545 pos = lexer->pos;
546
547 advanceToken(lexer, true);
548
549 /* HACK: This is a bit coarse as far as what tag entry means by
550 * 'arglist'... */
551 while (lexer->cur_token != '{')
552 {
553 if (lexer->cur_token == ';' && bracket_level == 0)
554 {
555 break;
556 }
557 else if (lexer->cur_token == '}')
558 {
559 valid_signature = false;
560 break;
561 }
562 else if (lexer->cur_token == '(')
563 {
564 found_paren = true;
565 paren_level++;
566 }
567 else if (lexer->cur_token == ')')
568 {
569 paren_level--;
570 if (paren_level < 0)
571 {
572 valid_signature = false;
573 break;
574 }
575 }
576 else if (lexer->cur_token == '[')
577 {
578 bracket_level++;
579 }
580 else if (lexer->cur_token == ']')
581 {
582 bracket_level--;
583 }
584 else if (lexer->cur_token == TOKEN_EOF)
585 {
586 valid_signature = false;
587 break;
588 }
589 writeCurTokenToStr(lexer, arg_list);
590 advanceToken(lexer, false);
591 }
592 if (!found_paren || paren_level != 0 || bracket_level != 0)
593 valid_signature = false;
594
595 if (valid_signature)
596 {
597 vStringStripTrailing(arg_list);
598 addTag(name, vStringValue(arg_list), kind, line, pos, scope, parent_kind);
599 addToScope(scope, name);
600 parseBlock(lexer, true, kind, scope);
601 }
602
603 vStringDelete(name);
604 vStringDelete(arg_list);
605 }
606
607 /* Mod format:
608 * "mod" <ident> "{" [<body>] "}"
609 * "mod" <ident> ";"*/
parseMod(lexerState * lexer,vString * scope,int parent_kind)610 static void parseMod (lexerState *lexer, vString *scope, int parent_kind)
611 {
612 advanceToken(lexer, true);
613 if (lexer->cur_token != TOKEN_IDENT)
614 return;
615
616 addTag(lexer->token_str, NULL, K_MOD, lexer->line, lexer->pos, scope, parent_kind);
617 addToScope(scope, lexer->token_str);
618
619 advanceToken(lexer, true);
620
621 parseBlock(lexer, true, K_MOD, scope);
622 }
623
624 /* Trait format:
625 * "trait" <ident> [<type_bounds>] "{" [<body>] "}"
626 */
parseTrait(lexerState * lexer,vString * scope,int parent_kind)627 static void parseTrait (lexerState *lexer, vString *scope, int parent_kind)
628 {
629 int goal_tokens[] = {'{'};
630
631 advanceToken(lexer, true);
632 if (lexer->cur_token != TOKEN_IDENT)
633 return;
634
635 addTag(lexer->token_str, NULL, K_TRAIT, lexer->line, lexer->pos, scope, parent_kind);
636 addToScope(scope, lexer->token_str);
637
638 advanceToken(lexer, true);
639
640 skipUntil(lexer, goal_tokens, 1);
641
642 parseBlock(lexer, true, K_TRAIT, scope);
643 }
644
645 /* Skips type blocks of the form <T:T<T>, ...> */
skipTypeBlock(lexerState * lexer)646 static void skipTypeBlock (lexerState *lexer)
647 {
648 if (lexer->cur_token == '<')
649 {
650 skipUntil(lexer, NULL, 0);
651 advanceToken(lexer, true);
652 }
653 }
654
655 /* Essentially grabs the last ident before 'for', '<' and '{', which
656 * tends to correspond to what we want as the impl tag entry name */
parseQualifiedType(lexerState * lexer,vString * name)657 static void parseQualifiedType (lexerState *lexer, vString* name)
658 {
659 while (lexer->cur_token != TOKEN_EOF)
660 {
661 if (lexer->cur_token == TOKEN_IDENT)
662 {
663 if (strcmp(vStringValue(lexer->token_str), "for") == 0
664 || strcmp(vStringValue(lexer->token_str), "where") == 0)
665 break;
666 vStringClear(name);
667 vStringCat(name, lexer->token_str);
668 }
669 else if (lexer->cur_token == '<' || lexer->cur_token == '{')
670 {
671 break;
672 }
673 advanceToken(lexer, true);
674 }
675 skipTypeBlock(lexer);
676 }
677
678 /* Impl format:
679 * "impl" [<type_bounds>] <qualified_ident>[<type_bounds>] ["for" <qualified_ident>[<type_bounds>]] "{" [<body>] "}"
680 */
parseImpl(lexerState * lexer,vString * scope,int parent_kind)681 static void parseImpl (lexerState *lexer, vString *scope, int parent_kind)
682 {
683 unsigned long line;
684 MIOPos pos;
685 vString *name;
686
687 advanceToken(lexer, true);
688
689 line = lexer->line;
690 pos = lexer->pos;
691
692 skipTypeBlock(lexer);
693
694 name = vStringNew();
695
696 parseQualifiedType(lexer, name);
697
698 if (lexer->cur_token == TOKEN_IDENT && strcmp(vStringValue(lexer->token_str), "for") == 0)
699 {
700 advanceToken(lexer, true);
701 parseQualifiedType(lexer, name);
702 }
703
704 addTag(name, NULL, K_IMPL, line, pos, scope, parent_kind);
705 addToScope(scope, name);
706
707 parseBlock(lexer, true, K_IMPL, scope);
708
709 vStringDelete(name);
710 }
711
712 /* Static format:
713 * "static" ["mut"] <ident>
714 */
parseStatic(lexerState * lexer,vString * scope,int parent_kind)715 static void parseStatic (lexerState *lexer, vString *scope, int parent_kind)
716 {
717 advanceToken(lexer, true);
718 if (lexer->cur_token != TOKEN_IDENT)
719 return;
720 if (strcmp(vStringValue(lexer->token_str), "mut") == 0)
721 {
722 advanceToken(lexer, true);
723 }
724 if (lexer->cur_token != TOKEN_IDENT)
725 return;
726
727 addTag(lexer->token_str, NULL, K_STATIC, lexer->line, lexer->pos, scope, parent_kind);
728 }
729
730 /* Const format:
731 * "const" <ident>
732 */
parseConst(lexerState * lexer,vString * scope,int parent_kind)733 static void parseConst (lexerState *lexer, vString *scope, int parent_kind)
734 {
735 advanceToken(lexer, true);
736 if (lexer->cur_token != TOKEN_IDENT)
737 return;
738
739 addTag(lexer->token_str, NULL, K_CONST, lexer->line, lexer->pos, scope, parent_kind);
740 }
741
742 /* Type format:
743 * "type" <ident>
744 */
parseType(lexerState * lexer,vString * scope,int parent_kind)745 static void parseType (lexerState *lexer, vString *scope, int parent_kind)
746 {
747 advanceToken(lexer, true);
748 if (lexer->cur_token != TOKEN_IDENT)
749 return;
750
751 addTag(lexer->token_str, NULL, K_TYPE, lexer->line, lexer->pos, scope, parent_kind);
752 }
753
754 /* Structs and enums are very similar syntax-wise.
755 * It is possible to parse variants a bit more cleverly (e.g. make tuple variants functions and
756 * struct variants structs) but it'd be too clever and the signature wouldn't make too much sense without
757 * the enum's definition (e.g. for the type bounds)
758 *
759 * Struct/Enum format:
760 * "struct/enum" <ident>[<type_bounds>] "{" [<ident>,]+ "}"
761 * "struct/enum" <ident>[<type_bounds>] ";"
762 * */
parseStructOrEnum(lexerState * lexer,vString * scope,int parent_kind,bool is_struct)763 static void parseStructOrEnum (lexerState *lexer, vString *scope, int parent_kind, bool is_struct)
764 {
765 int kind = is_struct ? K_STRUCT : K_ENUM;
766 int field_kind = is_struct ? K_FIELD : K_VARIANT;
767 int goal_tokens1[] = {';', '{'};
768
769 advanceToken(lexer, true);
770 if (lexer->cur_token != TOKEN_IDENT)
771 return;
772
773 addTag(lexer->token_str, NULL, kind, lexer->line, lexer->pos, scope, parent_kind);
774 addToScope(scope, lexer->token_str);
775
776 skipUntil(lexer, goal_tokens1, 2);
777
778 if (lexer->cur_token == '{')
779 {
780 vString *field_name = vStringNew();
781 while (lexer->cur_token != TOKEN_EOF)
782 {
783 int goal_tokens2[] = {'}', ','};
784 /* Skip attributes. Format:
785 * #[..] or #![..]
786 * */
787 if (lexer->cur_token == '#')
788 {
789 advanceToken(lexer, true);
790 if (lexer->cur_token == '!')
791 advanceToken(lexer, true);
792 if (lexer->cur_token == '[')
793 {
794 /* It's an attribute, skip it. */
795 skipUntil(lexer, NULL, 0);
796 }
797 else
798 {
799 /* Something's up with this field, skip to the next one */
800 skipUntil(lexer, goal_tokens2, 2);
801 continue;
802 }
803 }
804 if (lexer->cur_token == TOKEN_IDENT)
805 {
806 if (strcmp(vStringValue(lexer->token_str), "priv") == 0
807 || strcmp(vStringValue(lexer->token_str), "pub") == 0)
808 {
809 advanceToken(lexer, true);
810
811 /* Skip thevisibility specificaions.
812 * https://doc.rust-lang.org/reference/visibility-and-privacy.html */
813 if (lexer->cur_token == '(')
814 {
815 advanceToken(lexer, true);
816 skipUntil (lexer, (int []){')'}, 1);
817 advanceToken(lexer, true);
818 }
819
820 if (lexer->cur_token != TOKEN_IDENT)
821 {
822 /* Something's up with this field, skip to the next one */
823 skipUntil(lexer, goal_tokens2, 2);
824 continue;
825 }
826 }
827
828 vStringClear(field_name);
829 vStringCat(field_name, lexer->token_str);
830 addTag(field_name, NULL, field_kind, lexer->line, lexer->pos, scope, kind);
831 skipUntil(lexer, goal_tokens2, 2);
832 }
833 if (lexer->cur_token == '}')
834 {
835 advanceToken(lexer, true);
836 break;
837 }
838 advanceToken(lexer, true);
839 }
840 vStringDelete(field_name);
841 }
842 }
843
844 /* Skip the body of the macro. Can't use skipUntil here as
845 * the body of the macro may have arbitrary code which confuses it (e.g.
846 * bitshift operators/function return arrows) */
skipMacro(lexerState * lexer)847 static void skipMacro (lexerState *lexer)
848 {
849 int level = 0;
850 int plus_token = 0;
851 int minus_token = 0;
852
853 advanceToken(lexer, true);
854 switch (lexer->cur_token)
855 {
856 case '(':
857 plus_token = '(';
858 minus_token = ')';
859 break;
860 case '{':
861 plus_token = '{';
862 minus_token = '}';
863 break;
864 case '[':
865 plus_token = '[';
866 minus_token = ']';
867 break;
868 default:
869 return;
870 }
871
872 while (lexer->cur_token != TOKEN_EOF)
873 {
874 if (lexer->cur_token == plus_token)
875 level++;
876 else if (lexer->cur_token == minus_token)
877 level--;
878 if (level == 0)
879 break;
880 advanceToken(lexer, true);
881 }
882 advanceToken(lexer, true);
883 }
884
885 /*
886 * Macro rules format:
887 * "macro_rules" "!" <ident> <macro_body>
888 */
parseMacroRules(lexerState * lexer,vString * scope,int parent_kind)889 static void parseMacroRules (lexerState *lexer, vString *scope, int parent_kind)
890 {
891 advanceToken(lexer, true);
892
893 if (lexer->cur_token != '!')
894 return;
895
896 advanceToken(lexer, true);
897
898 if (lexer->cur_token != TOKEN_IDENT)
899 return;
900
901 addTag(lexer->token_str, NULL, K_MACRO, lexer->line, lexer->pos, scope, parent_kind);
902
903 skipMacro(lexer);
904 }
905
906 /*
907 * Rust is very liberal with nesting, so this function is used pretty much for any block
908 */
parseBlock(lexerState * lexer,bool delim,int kind,vString * scope)909 static void parseBlock (lexerState *lexer, bool delim, int kind, vString *scope)
910 {
911 int level = 1;
912 if (delim)
913 {
914 if (lexer->cur_token != '{')
915 return;
916 advanceToken(lexer, true);
917 }
918 while (lexer->cur_token != TOKEN_EOF)
919 {
920 if (lexer->cur_token == TOKEN_IDENT)
921 {
922 size_t old_scope_len = vStringLength(scope);
923 if (strcmp(vStringValue(lexer->token_str), "fn") == 0)
924 {
925 parseFn(lexer, scope, kind);
926 }
927 else if(strcmp(vStringValue(lexer->token_str), "mod") == 0)
928 {
929 parseMod(lexer, scope, kind);
930 }
931 else if(strcmp(vStringValue(lexer->token_str), "static") == 0)
932 {
933 parseStatic(lexer, scope, kind);
934 }
935 else if(strcmp(vStringValue(lexer->token_str), "const") == 0)
936 {
937 parseConst(lexer, scope, kind);
938 }
939 else if(strcmp(vStringValue(lexer->token_str), "trait") == 0)
940 {
941 parseTrait(lexer, scope, kind);
942 }
943 else if(strcmp(vStringValue(lexer->token_str), "type") == 0)
944 {
945 parseType(lexer, scope, kind);
946 }
947 else if(strcmp(vStringValue(lexer->token_str), "impl") == 0)
948 {
949 parseImpl(lexer, scope, kind);
950 }
951 else if(strcmp(vStringValue(lexer->token_str), "struct") == 0)
952 {
953 parseStructOrEnum(lexer, scope, kind, true);
954 }
955 else if(strcmp(vStringValue(lexer->token_str), "enum") == 0)
956 {
957 parseStructOrEnum(lexer, scope, kind, false);
958 }
959 else if(strcmp(vStringValue(lexer->token_str), "macro_rules") == 0)
960 {
961 parseMacroRules(lexer, scope, kind);
962 }
963 else
964 {
965 advanceToken(lexer, true);
966 if (lexer->cur_token == '!')
967 {
968 skipMacro(lexer);
969 }
970 }
971 resetScope(scope, old_scope_len);
972 }
973 else if (lexer->cur_token == '{')
974 {
975 level++;
976 advanceToken(lexer, true);
977 }
978 else if (lexer->cur_token == '}')
979 {
980 level--;
981 advanceToken(lexer, true);
982 }
983 else if (lexer->cur_token == '\'')
984 {
985 /* Skip over the 'static lifetime, as it confuses the static parser above */
986 advanceToken(lexer, true);
987 if (lexer->cur_token == TOKEN_IDENT && strcmp(vStringValue(lexer->token_str), "static") == 0)
988 advanceToken(lexer, true);
989 }
990 else
991 {
992 advanceToken(lexer, true);
993 }
994 if (delim && level <= 0)
995 break;
996 }
997 }
998
findRustTags(void)999 static void findRustTags (void)
1000 {
1001 lexerState lexer = {0};
1002 vString* scope = vStringNew();
1003 initLexer(&lexer);
1004
1005 parseBlock(&lexer, false, K_NONE, scope);
1006 vStringDelete(scope);
1007
1008 deInitLexer(&lexer);
1009 }
1010
RustParser(void)1011 extern parserDefinition *RustParser (void)
1012 {
1013 static const char *const extensions[] = { "rs", NULL };
1014 parserDefinition *def = parserNew ("Rust");
1015 def->kindTable = rustKinds;
1016 def->kindCount = ARRAY_SIZE (rustKinds);
1017 def->extensions = extensions;
1018 def->parser = findRustTags;
1019
1020 return def;
1021 }
1022