1 /*
2 * Copyright (c) 2000-2001, Thaddeus Covert <sahuagin@mediaone.net>
3 * Copyright (c) 2002 Matthias Veit <matthias_veit@yahoo.de>
4 * Copyright (c) 2004 Elliott Hughes <enh@acm.org>
5 *
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License version 2 or (at your option) any later version.
8 *
9 * This module contains functions for generating tags for Ruby language
10 * files.
11 */
12
13 /*
14 * INCLUDE FILES
15 */
16 #include "general.h" /* must always come first */
17
18 #include <ctype.h>
19 #include <string.h>
20
21 #include "debug.h"
22 #include "entry.h"
23 #include "parse.h"
24 #include "promise.h"
25 #include "nestlevel.h"
26 #include "read.h"
27 #include "routines.h"
28 #include "strlist.h"
29 #include "subparser.h"
30 #include "vstring.h"
31
32 #include "ruby.h"
33
34 /*
35 * DATA DECLARATIONS
36 */
37 typedef enum {
38 K_UNDEFINED = -1,
39 K_CLASS,
40 K_METHOD,
41 K_MODULE,
42 K_SINGLETON,
43 K_CONST,
44 K_ACCESSOR,
45 K_ALIAS,
46 K_LIBRARY,
47 } rubyKind;
48
49 typedef enum {
50 RUBY_LIBRARY_REQUIRED,
51 RUBY_LIBRARY_REQUIRED_REL,
52 RUBY_LIBRARY_LOADED,
53 } rubyLibraryRole;
54
55 /*
56 * DATA DEFINITIONS
57 */
58
59 static roleDefinition RubyLibraryRoles [] = {
60 { true, "required", "loaded by \"require\" method" },
61 { true, "requiredRel", "loaded by \"require_relative\" method" },
62 { true, "loaded", "loaded by \"load\" method" },
63 };
64
65 static kindDefinition RubyKinds [] = {
66 { true, 'c', "class", "classes" },
67 { true, 'f', "method", "methods" },
68 { true, 'm', "module", "modules" },
69 { true, 'S', "singletonMethod", "singleton methods" },
70 { true, 'C', "constant", "constants" },
71 { true, 'A', "accessor", "accessors" },
72 { true, 'a', "alias", "aliases" },
73 { true, 'L', "library", "libraries",
74 .referenceOnly = true, ATTACH_ROLES(RubyLibraryRoles) },
75 };
76
77 typedef enum {
78 F_MIXIN,
79 } rubyField;
80
81 static fieldDefinition RubyFields[] = {
82 { .name = "mixin",
83 .description = "how the class or module is mixed in (mixin:HOW:MODULE)",
84 .enabled = true },
85 };
86
87 struct blockData {
88 stringList *mixin;
89 rubySubparser *subparser;
90 int subparserCorkIndex;
91 };
92
93 static NestingLevels* nesting = NULL;
94
95 #define SCOPE_SEPARATOR '.'
96
97 /*
98 * FUNCTION DEFINITIONS
99 */
100
101 static void enterUnnamedScope (void);
102
103 /*
104 * Returns a string describing the scope in 'nls'.
105 * We record the current scope as a list of entered scopes.
106 * Scopes corresponding to 'if' statements and the like are
107 * represented by empty strings. Scopes corresponding to
108 * modules and classes are represented by the name of the
109 * module or class.
110 */
nestingLevelsToScope(const NestingLevels * nls)111 static vString* nestingLevelsToScope (const NestingLevels* nls)
112 {
113 int i;
114 unsigned int chunks_output = 0;
115 vString* result = vStringNew ();
116 for (i = 0; i < nls->n; ++i)
117 {
118 NestingLevel *nl = nestingLevelsGetNthFromRoot (nls, i);
119 tagEntryInfo *e = getEntryOfNestingLevel (nl);
120 if (e && strlen (e->name) > 0 && (!e->placeholder))
121 {
122 if (chunks_output++ > 0)
123 vStringPut (result, SCOPE_SEPARATOR);
124 vStringCatS (result, e->name);
125 }
126 }
127 return result;
128 }
129
130 /*
131 * Attempts to advance 's' past 'literal'.
132 * Returns true if it did, false (and leaves 's' where
133 * it was) otherwise.
134 */
canMatch(const unsigned char ** s,const char * literal,bool (* end_check)(int))135 static bool canMatch (const unsigned char** s, const char* literal,
136 bool (*end_check) (int))
137 {
138 const int literal_length = strlen (literal);
139 const int s_length = strlen ((const char *)*s);
140
141 if (s_length < literal_length)
142 return false;
143
144 const unsigned char next_char = *(*s + literal_length);
145 if (strncmp ((const char*) *s, literal, literal_length) != 0)
146 {
147 return false;
148 }
149 /* Additionally check that we're at the end of a token. */
150 if (! end_check (next_char))
151 {
152 return false;
153 }
154 *s += literal_length;
155 return true;
156 }
157
isIdentChar(int c)158 static bool isIdentChar (int c)
159 {
160 return (isalnum (c) || c == '_');
161 }
162
notIdentCharButColon(int c)163 static bool notIdentCharButColon (int c)
164 {
165 return ! (isIdentChar (c) || c == ':');
166 }
167
isOperatorChar(int c)168 static bool isOperatorChar (int c)
169 {
170 return (c == '[' || c == ']' ||
171 c == '=' || c == '!' || c == '~' ||
172 c == '+' || c == '-' ||
173 c == '@' || c == '*' || c == '/' || c == '%' ||
174 c == '<' || c == '>' ||
175 c == '&' || c == '^' || c == '|');
176 }
177
notOperatorChar(int c)178 static bool notOperatorChar (int c)
179 {
180 return ! isOperatorChar (c);
181 }
182
isSigilChar(int c)183 static bool isSigilChar (int c)
184 {
185 return (c == '@' || c == '$');
186 }
187
isWhitespace(int c)188 static bool isWhitespace (int c)
189 {
190 return c == 0 || isspace (c);
191 }
192
193 /*
194 * Advance 's' while the passed predicate is true. Returns true if
195 * advanced by at least one position.
196 */
advanceWhile(const unsigned char ** s,bool (* predicate)(int))197 static bool advanceWhile (const unsigned char** s, bool (*predicate) (int))
198 {
199 const unsigned char* original_pos = *s;
200
201 while (**s != '\0')
202 {
203 if (! predicate (**s))
204 {
205 return *s != original_pos;
206 }
207
208 (*s)++;
209 }
210
211 return *s != original_pos;
212 }
213
214 #define canMatchKeyword rubyCanMatchKeyword
rubyCanMatchKeyword(const unsigned char ** s,const char * literal)215 extern bool rubyCanMatchKeyword (const unsigned char** s, const char* literal)
216 {
217 /* Using notIdentCharButColon() here.
218 *
219 * A hash can be defined like {for: nil, foo: 0}.
220 *"for" in the above example is not a keyword.
221 */
222 return canMatch (s, literal, notIdentCharButColon);
223 }
224
225 /*
226 * Extends canMatch. Works similarly, but allows assignment to precede
227 * the keyword, as block assignment is a common Ruby idiom.
228 */
229 #define canMatchKeywordWithAssign rubyCanMatchKeywordWithAssign
rubyCanMatchKeywordWithAssign(const unsigned char ** s,const char * literal)230 extern bool rubyCanMatchKeywordWithAssign (const unsigned char** s, const char* literal)
231 {
232 const unsigned char* original_pos = *s;
233
234 if (canMatchKeyword (s, literal))
235 {
236 return true;
237 }
238
239 advanceWhile (s, isSigilChar);
240
241 if (! advanceWhile (s, isIdentChar))
242 {
243 *s = original_pos;
244 return false;
245 }
246
247 advanceWhile (s, isWhitespace);
248
249 if (! (advanceWhile (s, isOperatorChar) && *(*s - 1) == '='))
250 {
251 *s = original_pos;
252 return false;
253 }
254
255 advanceWhile (s, isWhitespace);
256
257 if (canMatchKeyword (s, literal))
258 {
259 return true;
260 }
261
262 *s = original_pos;
263 return false;
264 }
265
266 /*
267 * Attempts to advance 'cp' past a Ruby operator method name. Returns
268 * true if successful (and copies the name into 'name'), false otherwise.
269 */
parseRubyOperator(vString * name,const unsigned char ** cp)270 static bool parseRubyOperator (vString* name, const unsigned char** cp)
271 {
272 static const char* RUBY_OPERATORS[] = {
273 "[]", "[]=",
274 "**",
275 "!", "~", "+@", "-@",
276 "*", "/", "%",
277 "+", "-",
278 ">>", "<<",
279 "&",
280 "^", "|",
281 "<=", "<", ">", ">=",
282 "<=>", "==", "===", "!=", "=~", "!~",
283 "`",
284 NULL
285 };
286 int i;
287 for (i = 0; RUBY_OPERATORS[i] != NULL; ++i)
288 {
289 if (canMatch (cp, RUBY_OPERATORS[i], notOperatorChar))
290 {
291 vStringCatS (name, RUBY_OPERATORS[i]);
292 return true;
293 }
294 }
295 return false;
296 }
297
298 /*
299 * Emits a tag for the given 'name' of kind 'kind' at the current nesting.
300 */
emitRubyTagFull(vString * name,rubyKind kind,bool pushLevel,bool clearName)301 static int emitRubyTagFull (vString* name, rubyKind kind, bool pushLevel, bool clearName)
302 {
303 tagEntryInfo tag;
304 vString* scope;
305 tagEntryInfo *parent;
306 rubyKind parent_kind = K_UNDEFINED;
307 NestingLevel *lvl;
308 const char *unqualified_name;
309 const char *qualified_name;
310 int r;
311 bool anonymous = false;
312
313 if (!name)
314 {
315 name = anonGenerateNew ("__anon", K_CLASS);
316 anonymous = true;
317 }
318
319 if (!RubyKinds[kind].enabled) {
320 return CORK_NIL;
321 }
322
323 scope = nestingLevelsToScope (nesting);
324 lvl = nestingLevelsGetCurrent (nesting);
325 parent = getEntryOfNestingLevel (lvl);
326 if (parent)
327 parent_kind = parent->kindIndex;
328
329 qualified_name = vStringValue (name);
330 unqualified_name = strrchr (qualified_name, SCOPE_SEPARATOR);
331 if (unqualified_name && unqualified_name[1])
332 {
333 if (unqualified_name > qualified_name)
334 {
335 if (vStringLength (scope) > 0)
336 vStringPut (scope, SCOPE_SEPARATOR);
337 vStringNCatS (scope, qualified_name,
338 unqualified_name - qualified_name);
339 /* assume module parent type for a lack of a better option */
340 parent_kind = K_MODULE;
341 }
342 unqualified_name++;
343 }
344 else
345 unqualified_name = qualified_name;
346
347 initTagEntry (&tag, unqualified_name, kind);
348
349 /* Don't fill the scope field for a tag entry representing
350 * a global variable. */
351 if (unqualified_name[0] != '$'
352 && vStringLength (scope) > 0) {
353 Assert (0 <= parent_kind &&
354 (size_t) parent_kind < (ARRAY_SIZE (RubyKinds)));
355
356 tag.extensionFields.scopeKindIndex = parent_kind;
357 tag.extensionFields.scopeName = vStringValue (scope);
358 }
359
360 if (anonymous)
361 markTagExtraBit (&tag, XTAG_ANONYMOUS);
362
363 r = makeTagEntry (&tag);
364
365 if (pushLevel)
366 nestingLevelsPush (nesting, r);
367
368 if (clearName)
369 vStringClear (name);
370
371 if (anonymous)
372 vStringDelete (name);
373
374 vStringDelete (scope);
375
376 return r;
377 }
378
emitRubyTag(vString * name,rubyKind kind)379 static int emitRubyTag (vString* name, rubyKind kind)
380 {
381 return emitRubyTagFull (name, kind, kind != K_CONST, true);
382 }
383
384 /* Tests whether 'ch' is a character in 'list'. */
charIsIn(char ch,const char * list)385 static bool charIsIn (char ch, const char* list)
386 {
387 return (strchr (list, ch) != NULL);
388 }
389
390 /* Advances 'cp' over leading whitespace. */
391 #define skipWhitespace rubySkipWhitespace
rubySkipWhitespace(const unsigned char ** cp)392 extern void rubySkipWhitespace (const unsigned char** cp)
393 {
394 while (isspace (**cp))
395 {
396 ++*cp;
397 }
398 }
399
400 /*
401 * Copies the characters forming an identifier from *cp into
402 * name, leaving *cp pointing to the character after the identifier.
403 */
parseIdentifier(const unsigned char ** cp,vString * name,rubyKind kind)404 static rubyKind parseIdentifier (
405 const unsigned char** cp, vString* name, rubyKind kind)
406 {
407 /* Method names are slightly different to class and variable names.
408 * A method name may optionally end with a question mark, exclamation
409 * point or equals sign. These are all part of the name.
410 * A method name may also contain a period if it's a singleton method.
411 */
412 bool had_sep = false;
413 const char* also_ok;
414 if (kind == K_METHOD)
415 {
416 also_ok = ".?!=";
417 }
418 else if (kind == K_SINGLETON)
419 {
420 also_ok = "?!=";
421 }
422 else
423 {
424 also_ok = "";
425 }
426
427 skipWhitespace (cp);
428
429 /* Check for an anonymous (singleton) class such as "class << HTTP". */
430 if (kind == K_CLASS && **cp == '<' && *(*cp + 1) == '<')
431 {
432 return K_UNDEFINED;
433 }
434
435 /* Check for operators such as "def []=(key, val)". */
436 if (kind == K_METHOD || kind == K_SINGLETON)
437 {
438 if (parseRubyOperator (name, cp))
439 {
440 return kind;
441 }
442 }
443
444 /* Copy the identifier into 'name'. */
445 while (**cp != 0 && (**cp == ':' || isIdentChar (**cp) || charIsIn (**cp, also_ok)))
446 {
447 char last_char = **cp;
448
449 if (last_char == ':')
450 had_sep = true;
451 else
452 {
453 if (had_sep)
454 {
455 vStringPut (name, SCOPE_SEPARATOR);
456 had_sep = false;
457 }
458 vStringPut (name, last_char);
459 }
460 ++*cp;
461
462 if (kind == K_METHOD)
463 {
464 /* Recognize singleton methods. */
465 if (last_char == '.')
466 {
467 vStringClear (name);
468 return parseIdentifier (cp, name, K_SINGLETON);
469 }
470 }
471
472 if (kind == K_METHOD || kind == K_SINGLETON)
473 {
474 /* Recognize characters which mark the end of a method name. */
475 if (charIsIn (last_char, "?!="))
476 {
477 break;
478 }
479 }
480 }
481 return kind;
482 }
483
rubyParseMethodName(const unsigned char ** cp,vString * vstr)484 extern bool rubyParseMethodName (const unsigned char **cp, vString* vstr)
485 {
486 return (parseIdentifier (cp, vstr, K_METHOD) == K_METHOD);
487 }
488
rubyParseModuleName(const unsigned char ** cp,vString * vstr)489 extern bool rubyParseModuleName (const unsigned char **cp, vString* vstr)
490 {
491 return (parseIdentifier (cp, vstr, K_MODULE) == K_MODULE);
492 }
493
parseString(const unsigned char ** cp,unsigned char boundary,vString * vstr)494 static void parseString (const unsigned char** cp, unsigned char boundary, vString* vstr)
495 {
496 while (**cp != 0 && **cp != boundary)
497 {
498 if (vstr)
499 vStringPut (vstr, **cp);
500 ++*cp;
501 }
502
503 /* skip the last found '"' */
504 if (**cp == boundary)
505 ++*cp;
506 }
507
rubyParseString(const unsigned char ** cp,unsigned char boundary,vString * vstr)508 extern bool rubyParseString (const unsigned char** cp, unsigned char boundary, vString* vstr)
509 {
510 const unsigned char *p = *cp;
511 parseString (cp, boundary, vstr);
512 return (p != *cp);
513 }
514
parseSignature(const unsigned char ** cp,vString * vstr)515 static void parseSignature (const unsigned char** cp, vString* vstr)
516 {
517 int depth = 1;
518
519 while (1)
520 {
521 /* FIXME:
522 * - handle string literals including ( or ), and
523 * - skip comments.
524 */
525 while (! (depth == 0 || **cp == '\0'))
526 {
527 if (**cp == '(' || **cp == ')')
528 {
529 depth += (**cp == '(')? 1: -1;
530 vStringPut (vstr, **cp);
531 }
532 else if (**cp == '#')
533 {
534 ++*cp;
535 while (**cp != '\0')
536 ++*cp;
537 break;
538 }
539 else if (**cp == '\'' || **cp == '"')
540 {
541 unsigned char b = **cp;
542 vStringPut (vstr, b);
543 ++*cp;
544 parseString (cp, b, vstr);
545 vStringPut (vstr, b);
546 continue;
547 }
548 else if (isspace (vStringLast (vstr)))
549 {
550 if (! (isspace (**cp)))
551 {
552 if (**cp == ',')
553 vStringChop (vstr);
554 vStringPut (vstr, **cp);
555 }
556 }
557 else
558 vStringPut (vstr, **cp);
559 ++*cp;
560 }
561 if (depth == 0)
562 return;
563
564 const unsigned char *line = readLineFromInputFile ();
565 if (line == NULL)
566 return;
567 else
568 *cp = line;
569 }
570 }
571
readAndEmitTagFull(const unsigned char ** cp,rubyKind expected_kind,bool pushLevel,bool clearName)572 static int readAndEmitTagFull (const unsigned char** cp, rubyKind expected_kind,
573 bool pushLevel, bool clearName)
574 {
575 int r = CORK_NIL;
576 if (isspace (**cp))
577 {
578 vString *name = vStringNew ();
579 rubyKind actual_kind = parseIdentifier (cp, name, expected_kind);
580
581 if (actual_kind == K_UNDEFINED || vStringLength (name) == 0)
582 {
583 /*
584 * What kind of tags should we create for code like this?
585 *
586 * %w(self.clfloor clfloor).each do |name|
587 * module_eval <<-"end;"
588 * def #{name}(x, y=1)
589 * q, r = x.divmod(y)
590 * q = q.to_i
591 * return q, r
592 * end
593 * end;
594 * end
595 *
596 * Or this?
597 *
598 * class << HTTP
599 *
600 * For now, we don't create any.
601 */
602 enterUnnamedScope ();
603 }
604 else
605 {
606 r = emitRubyTagFull (name, actual_kind, pushLevel, clearName);
607 }
608 vStringDelete (name);
609 }
610 return r;
611 }
612
readAndEmitTag(const unsigned char ** cp,rubyKind expected_kind)613 static int readAndEmitTag (const unsigned char** cp, rubyKind expected_kind)
614 {
615 return readAndEmitTagFull (cp, expected_kind, expected_kind != K_CONST, true);
616 }
617
readAndStoreMixinSpec(const unsigned char ** cp,const char * how_mixin)618 static void readAndStoreMixinSpec (const unsigned char** cp, const char *how_mixin)
619 {
620
621 NestingLevel *nl = NULL;
622 tagEntryInfo *e = NULL;
623 int ownerLevel = 0;
624
625 for (ownerLevel = 0; ownerLevel < nesting->n; ownerLevel++)
626 {
627 nl = nestingLevelsGetNthParent (nesting, ownerLevel);
628 e = nl? getEntryOfNestingLevel (nl): NULL;
629
630 /* Ignore "if", "unless", "while" ... */
631 if ((nl && (nl->corkIndex == CORK_NIL)) || (e && e->placeholder))
632 continue;
633 break;
634 }
635
636 if (!e)
637 return;
638
639 if (e->kindIndex == K_SINGLETON)
640 {
641 nl = nestingLevelsGetNthParent (nesting,
642 ownerLevel + 1);
643 if (nl == NULL)
644 return;
645 e = getEntryOfNestingLevel (nl);
646 }
647
648 if (!e)
649 return;
650
651 if (! (e->kindIndex == K_CLASS || e->kindIndex == K_MODULE))
652 return;
653
654 if (isspace (**cp) || (**cp == '('))
655 {
656 if (isspace (**cp))
657 skipWhitespace (cp);
658 if (**cp == '(')
659 ++*cp;
660
661 vString *spec = vStringNewInit (how_mixin);
662 vStringPut(spec, ':');
663
664 size_t len = vStringLength (spec);
665 parseIdentifier (cp, spec, K_MODULE);
666 if (len == vStringLength (spec))
667 {
668 vStringDelete (spec);
669 return;
670 }
671
672 struct blockData *bdata = nestingLevelGetUserData (nl);
673 if (bdata->mixin == NULL)
674 bdata->mixin = stringListNew ();
675 stringListAdd (bdata->mixin, spec);
676 }
677 }
678
enterUnnamedScope(void)679 static void enterUnnamedScope (void)
680 {
681 int r = CORK_NIL;
682 NestingLevel *parent = nestingLevelsGetCurrent (nesting);
683 tagEntryInfo *e_parent = getEntryOfNestingLevel (parent);
684
685 if (e_parent)
686 {
687 tagEntryInfo e;
688 initTagEntry (&e, "", e_parent->kindIndex);
689 e.placeholder = 1;
690 r = makeTagEntry (&e);
691 }
692 nestingLevelsPush (nesting, r);
693 }
694
parasiteToScope(rubySubparser * subparser,int subparserCorkIndex)695 static void parasiteToScope (rubySubparser *subparser, int subparserCorkIndex)
696 {
697 NestingLevel *nl = nestingLevelsGetCurrent (nesting);
698 struct blockData *bdata = nestingLevelGetUserData (nl);
699 bdata->subparser = subparser;
700 bdata->subparserCorkIndex = subparserCorkIndex;
701
702 if (subparser->enterBlockNotify)
703 subparser->enterBlockNotify (subparser, subparserCorkIndex);
704 }
705
attachMixinField(int corkIndex,stringList * mixinSpec)706 static void attachMixinField (int corkIndex, stringList *mixinSpec)
707 {
708 vString *mixinField = stringListItem (mixinSpec, 0);
709 for (unsigned int i = 1; i < stringListCount (mixinSpec); i++)
710 {
711 vStringPut (mixinField, ',');
712 vStringCat (mixinField, stringListItem (mixinSpec, i));
713 }
714
715 attachParserFieldToCorkEntry (corkIndex, RubyFields [F_MIXIN].ftype,
716 vStringValue (mixinField));
717 }
718
deleteBlockData(NestingLevel * nl,void * data CTAGS_ATTR_UNUSED)719 static void deleteBlockData (NestingLevel *nl, void *data CTAGS_ATTR_UNUSED)
720 {
721 struct blockData *bdata = nestingLevelGetUserData (nl);
722
723 if (nl->corkIndex != CORK_NIL
724 && bdata->mixin != NULL
725 && stringListCount (bdata->mixin) > 0)
726 attachMixinField (nl->corkIndex, bdata->mixin);
727
728 tagEntryInfo *e = getEntryInCorkQueue (nl->corkIndex);
729 if (e && !e->placeholder)
730 e->extensionFields.endLine = getInputLineNumber ();
731
732 tagEntryInfo *sub_e;
733 if (bdata->subparserCorkIndex != CORK_NIL
734 && (sub_e = getEntryInCorkQueue (bdata->subparserCorkIndex)))
735 {
736 sub_e->extensionFields.endLine = getInputLineNumber ();
737 if (bdata->subparser)
738 bdata->subparser->leaveBlockNotify (bdata->subparser,
739 bdata->subparserCorkIndex);
740 }
741
742 if (bdata->mixin)
743 stringListDelete (bdata->mixin);
744 }
745
doesLineIncludeConstant(const unsigned char ** cp,vString * constant)746 static bool doesLineIncludeConstant (const unsigned char **cp, vString *constant)
747 {
748 const unsigned char *p = *cp;
749
750 if (isspace (*p))
751 skipWhitespace (&p);
752
753 if (isupper (*p))
754 {
755 while (*p != 0 && isIdentChar (*p))
756 {
757 vStringPut (constant, *p);
758 ++p;
759 }
760 if (isspace (*p))
761 skipWhitespace (&p);
762 if (*p == '=')
763 {
764 *cp = p;
765 return true;
766 }
767 vStringClear (constant);
768 }
769
770 return false;
771 }
772
emitRubyAccessorTags(vString * a,bool reader,bool writer)773 static void emitRubyAccessorTags (vString *a, bool reader, bool writer)
774 {
775 if (vStringLength (a) == 0)
776 return;
777
778 if (reader)
779 emitRubyTagFull (a, K_ACCESSOR, false, !writer);
780 if (writer)
781 {
782 vStringPut (a, '=');
783 emitRubyTagFull (a, K_ACCESSOR, false, true);
784 }
785 }
786
readAttrsAndEmitTags(const unsigned char ** cp,bool reader,bool writer)787 static void readAttrsAndEmitTags (const unsigned char **cp, bool reader, bool writer)
788 {
789 vString *a = vStringNew ();
790
791 skipWhitespace (cp);
792 if (**cp == '(')
793 ++*cp;
794
795 do {
796 skipWhitespace (cp);
797 if (**cp == ':')
798 {
799 ++*cp;
800 if (K_METHOD == parseIdentifier (cp, a, K_METHOD))
801 {
802 emitRubyAccessorTags (a, reader, writer);
803 skipWhitespace (cp);
804 if (**cp == ',')
805 {
806 ++*cp;
807 continue;
808 }
809 }
810 }
811 else if (**cp == '"' || **cp == '\'')
812 {
813 unsigned char b = **cp;
814 ++*cp;
815 parseString (cp, b, a);
816
817 emitRubyAccessorTags (a, reader, writer);
818 skipWhitespace (cp);
819 if (**cp == ',')
820 {
821 ++*cp;
822 continue;
823 }
824 }
825 break;
826 } while (1);
827
828 vStringDelete (a);
829 }
830
readAliasMethodAndEmitTags(const unsigned char ** cp)831 static int readAliasMethodAndEmitTags (const unsigned char **cp)
832 {
833 int r = CORK_NIL;
834 vString *a = vStringNew ();
835
836 skipWhitespace (cp);
837 if (**cp == '(')
838 ++*cp;
839
840 skipWhitespace (cp);
841 if (**cp == ':')
842 {
843 ++*cp;
844 if (K_METHOD != parseIdentifier (cp, a, K_METHOD))
845 vStringClear (a);
846 }
847 else if (**cp == '"' || **cp == '\'')
848 {
849 unsigned char b = **cp;
850 ++*cp;
851 parseString (cp, b, a);
852 }
853
854 if (vStringLength (a) > 0)
855 r = emitRubyTagFull (a, K_ALIAS, false, false);
856
857 vStringDelete (a);
858 return r;
859 }
860
readStringAndEmitTag(const unsigned char ** cp,rubyKind kind,int role)861 static int readStringAndEmitTag (const unsigned char **cp, rubyKind kind, int role)
862 {
863 int r = CORK_NIL;
864 vString *s = NULL;
865
866 skipWhitespace (cp);
867 if (**cp == '(')
868 ++*cp;
869
870 skipWhitespace (cp);
871 if (**cp == '"' || **cp == '\'')
872 {
873 unsigned char b = **cp;
874 ++*cp;
875 s = vStringNew ();
876 parseString (cp, b, s);
877 }
878
879 if (s && vStringLength (s) > 0)
880 r = makeSimpleRefTag (s, kind, role);
881
882 vStringDelete (s);
883 return r;
884 }
885
readAndEmitDef(const unsigned char ** cp)886 static int readAndEmitDef (const unsigned char **cp)
887 {
888 rubyKind kind = K_METHOD;
889 NestingLevel *nl = nestingLevelsGetCurrent (nesting);
890 tagEntryInfo *e_scope = getEntryOfNestingLevel (nl);
891
892 /* if the def is inside an unnamed scope at the class level, assume
893 * it's from a singleton from a construct like this:
894 *
895 * class C
896 * class << self
897 * def singleton
898 * ...
899 * end
900 * end
901 * end
902 */
903 if (e_scope && e_scope->kindIndex == K_CLASS && strlen (e_scope->name) == 0)
904 kind = K_SINGLETON;
905 int corkIndex = readAndEmitTag (cp, kind);
906 tagEntryInfo *e = getEntryInCorkQueue (corkIndex);
907
908 /* Fill signature: field. */
909 if (e)
910 {
911 vString *signature = vStringNewInit ("(");
912 skipWhitespace (cp);
913 if (**cp == '(')
914 {
915 ++(*cp);
916 parseSignature (cp, signature);
917 if (vStringLast(signature) != ')')
918 {
919 vStringDelete (signature);
920 signature = NULL;
921 }
922 }
923 else
924 vStringPut (signature, ')');
925 e->extensionFields.signature = vStringDeleteUnwrap (signature);
926 signature = NULL;;
927 vStringDelete (signature);
928 }
929 return corkIndex;
930 }
931
notifyLine(const unsigned char ** cp)932 static rubySubparser *notifyLine (const unsigned char **cp)
933 {
934 subparser *sub;
935 rubySubparser *rubysub = NULL;
936
937 foreachSubparser (sub, false)
938 {
939 rubysub = (rubySubparser *)sub;
940 rubysub->corkIndex = CORK_NIL;
941
942 if (rubysub->lineNotify)
943 {
944 enterSubparser(sub);
945 const unsigned char *base = *cp;
946 rubysub->corkIndex = rubysub->lineNotify(rubysub, cp);
947 leaveSubparser();
948 if (rubysub->corkIndex != CORK_NIL)
949 break;
950 *cp = base;
951 }
952 }
953
954 if (rubysub && rubysub->corkIndex != CORK_NIL)
955 return rubysub;
956 return NULL;
957 }
958
findRubyTags(void)959 static void findRubyTags (void)
960 {
961 const unsigned char *line;
962 bool inMultiLineComment = false;
963 vString *constant = vStringNew ();
964 bool found_rdoc = false;
965
966 nesting = nestingLevelsNewFull (sizeof (struct blockData), deleteBlockData);
967
968 /* FIXME: this whole scheme is wrong, because Ruby isn't line-based.
969 * You could perfectly well write:
970 *
971 * def
972 * method
973 * puts("hello")
974 * end
975 *
976 * if you wished, and this function would fail to recognize anything.
977 */
978 while ((line = readLineFromInputFile ()) != NULL)
979 {
980 rubySubparser *subparser = CORK_NIL;
981 const unsigned char *cp = line;
982 /* if we expect a separator after a while, for, or until statement
983 * separators are "do", ";" or newline */
984 bool expect_separator = false;
985
986 if (found_rdoc == false && strncmp ((const char*)cp, "# =", 3) == 0)
987 {
988 found_rdoc = true;
989 makePromise ("RDoc", 0, 0, 0, 0, 0);
990 }
991
992 if (canMatch (&cp, "=begin", isWhitespace))
993 {
994 inMultiLineComment = true;
995 continue;
996 }
997 if (canMatch (&cp, "=end", isWhitespace))
998 {
999 inMultiLineComment = false;
1000 continue;
1001 }
1002 if (inMultiLineComment)
1003 continue;
1004
1005 skipWhitespace (&cp);
1006
1007 /* Avoid mistakenly starting a scope for modifiers such as
1008 *
1009 * return if <exp>
1010 *
1011 * FIXME: we're fooled if someone does something heinous such as
1012 *
1013 * puts("hello") \
1014 * unless <exp>
1015 */
1016
1017 if (canMatchKeywordWithAssign (&cp, "for") ||
1018 canMatchKeywordWithAssign (&cp, "until") ||
1019 canMatchKeywordWithAssign (&cp, "while"))
1020 {
1021 expect_separator = true;
1022 enterUnnamedScope ();
1023 }
1024 else if (canMatchKeywordWithAssign (&cp, "case") ||
1025 canMatchKeywordWithAssign (&cp, "if") ||
1026 canMatchKeywordWithAssign (&cp, "unless"))
1027 {
1028 enterUnnamedScope ();
1029 }
1030
1031 /*
1032 * "module M", "class C" and "def m" should only be at the beginning
1033 * of a line.
1034 */
1035 if (canMatchKeywordWithAssign (&cp, "module"))
1036 {
1037 readAndEmitTag (&cp, K_MODULE);
1038 }
1039 else if (canMatchKeywordWithAssign (&cp, "class")
1040 || (canMatchKeywordWithAssign (&cp, "Class.new")))
1041
1042 {
1043
1044 int r;
1045 if (*(cp - 1) != 's')
1046 r = emitRubyTagFull(NULL, K_CLASS, true, false);
1047 else
1048 r = readAndEmitTag (&cp, K_CLASS); /* "class" */
1049
1050 tagEntryInfo *e = getEntryInCorkQueue (r);
1051
1052 if (e)
1053 {
1054 skipWhitespace (&cp);
1055 if (*cp == '<' && *(cp + 1) != '<')
1056 {
1057 cp++;
1058 vString *parent = vStringNew ();
1059 parseIdentifier (&cp, parent, K_CLASS);
1060 if (vStringLength (parent) > 0)
1061 e->extensionFields.inheritance = vStringDeleteUnwrap (parent);
1062 else
1063 vStringDelete (parent);
1064 }
1065 }
1066 }
1067 else if (canMatchKeywordWithAssign (&cp, "include"))
1068 {
1069 readAndStoreMixinSpec (&cp, "include");
1070 }
1071 else if (canMatchKeywordWithAssign (&cp, "prepend"))
1072 {
1073 readAndStoreMixinSpec (&cp, "prepend");
1074 }
1075 else if (canMatchKeywordWithAssign (&cp, "extend"))
1076 {
1077 readAndStoreMixinSpec (&cp, "extend");
1078 }
1079 else if (canMatchKeywordWithAssign (&cp, "def"))
1080 {
1081 readAndEmitDef (&cp);
1082 }
1083 else if (canMatchKeywordWithAssign (&cp, "attr_reader"))
1084 {
1085 readAttrsAndEmitTags (&cp, true, false);
1086 }
1087 else if (canMatchKeywordWithAssign (&cp, "attr_writer"))
1088 {
1089 readAttrsAndEmitTags (&cp, false, true);
1090 }
1091 else if (canMatchKeywordWithAssign (&cp, "attr_accessor"))
1092 {
1093 readAttrsAndEmitTags (&cp, true, true);
1094 }
1095 else if (doesLineIncludeConstant (&cp, constant))
1096 {
1097 emitRubyTag (constant, K_CONST);
1098 vStringClear (constant);
1099 }
1100 else if (canMatchKeywordWithAssign (&cp, "require"))
1101 {
1102 readStringAndEmitTag (&cp, K_LIBRARY, RUBY_LIBRARY_REQUIRED);
1103 }
1104 else if (canMatchKeywordWithAssign (&cp, "require_relative"))
1105 {
1106 readStringAndEmitTag (&cp, K_LIBRARY, RUBY_LIBRARY_REQUIRED_REL);
1107 }
1108 else if (canMatchKeywordWithAssign (&cp, "load"))
1109 {
1110 readStringAndEmitTag (&cp, K_LIBRARY, RUBY_LIBRARY_LOADED);
1111 }
1112 else if (canMatchKeywordWithAssign (&cp, "alias"))
1113 {
1114 if (!readAndEmitTagFull (&cp, K_ALIAS, false, true)
1115 && (*cp == '$'))
1116 {
1117 /* Alias for a global variable. */
1118 ++cp;
1119 vString *alias = vStringNew ();
1120 vStringPut (alias, '$');
1121 if (K_METHOD == parseIdentifier (&cp, alias, K_METHOD)
1122 && vStringLength (alias) > 0)
1123 emitRubyTagFull (alias, K_ALIAS, false, false);
1124 vStringDelete (alias);
1125 }
1126 }
1127 else if (canMatchKeywordWithAssign (&cp, "alias_method"))
1128 readAliasMethodAndEmitTags (&cp);
1129 else if ((canMatchKeywordWithAssign (&cp, "private")
1130 || canMatchKeywordWithAssign (&cp, "protected")
1131 || canMatchKeywordWithAssign (&cp, "public")
1132 || canMatchKeywordWithAssign (&cp, "private_class_method")
1133 || canMatchKeywordWithAssign (&cp, "public_class_method")))
1134 {
1135 skipWhitespace (&cp);
1136 if (canMatchKeywordWithAssign (&cp, "def"))
1137 readAndEmitDef (&cp);
1138 /* TODO: store the method for controlling visibility
1139 * to the "access:" field of the tag.*/
1140 }
1141 else
1142 subparser = notifyLine(&cp);
1143
1144
1145 while (*cp != '\0')
1146 {
1147 /* FIXME: we don't cope with here documents,
1148 * or regular expression literals, or ... you get the idea.
1149 * Hopefully, the restriction above that insists on seeing
1150 * definitions at the starts of lines should keep us out of
1151 * mischief.
1152 */
1153 if (inMultiLineComment || isspace (*cp))
1154 {
1155 ++cp;
1156 }
1157 else if (*cp == '#')
1158 {
1159 /* FIXME: this is wrong, but there *probably* won't be a
1160 * definition after an interpolated string (where # doesn't
1161 * mean 'comment').
1162 */
1163 break;
1164 }
1165 else if (canMatchKeyword (&cp, "begin"))
1166 {
1167 enterUnnamedScope ();
1168 }
1169 else if (canMatchKeyword (&cp, "do"))
1170 {
1171 if (! expect_separator)
1172 {
1173 enterUnnamedScope ();
1174 if (subparser && subparser->corkIndex)
1175 parasiteToScope (subparser, subparser->corkIndex);
1176 }
1177 else
1178 expect_separator = false;
1179 }
1180 else if (canMatchKeyword (&cp, "end") && nesting->n > 0)
1181 {
1182 /* Leave the most recent scope. */
1183 nestingLevelsPop (nesting);
1184 }
1185 else if (*cp == '"' || *cp == '\'')
1186 {
1187 unsigned char b = *cp;
1188 /* Skip string literals.
1189 * FIXME: should cope with escapes and interpolation.
1190 */
1191 ++cp;
1192 parseString (&cp, b, NULL);
1193 }
1194 else if (*cp == ';')
1195 {
1196 ++cp;
1197 expect_separator = false;
1198 }
1199 else if (*cp != '\0')
1200 {
1201 do
1202 ++cp;
1203 while (isIdentChar (*cp));
1204 }
1205 }
1206 }
1207 nestingLevelsFree (nesting);
1208 vStringDelete (constant);
1209 }
1210
RubyParser(void)1211 extern parserDefinition* RubyParser (void)
1212 {
1213 static const char *const extensions [] = { "rb", "ruby", NULL };
1214 parserDefinition* def = parserNew ("Ruby");
1215 def->kindTable = RubyKinds;
1216 def->kindCount = ARRAY_SIZE (RubyKinds);
1217 def->extensions = extensions;
1218 def->parser = findRubyTags;
1219 def->fieldTable = RubyFields;
1220 def->fieldCount = ARRAY_SIZE (RubyFields);
1221 def->useCork = CORK_QUEUE;
1222 return def;
1223 }
1224