xref: /Universal-ctags/parsers/ruby.c (revision 6b212cad3c28e21e9fae93d633216afdcb6c9af2)
1 /*
2 *   Copyright (c) 2000-2001, Thaddeus Covert <sahuagin@mediaone.net>
3 *   Copyright (c) 2002 Matthias Veit <matthias_veit@yahoo.de>
4 *   Copyright (c) 2004 Elliott Hughes <enh@acm.org>
5 *
6 *   This source code is released for free distribution under the terms of the
7 *   GNU General Public License version 2 or (at your option) any later version.
8 *
9 *   This module contains functions for generating tags for Ruby language
10 *   files.
11 */
12 
13 /*
14 *   INCLUDE FILES
15 */
16 #include "general.h"  /* must always come first */
17 
18 #include <ctype.h>
19 #include <string.h>
20 
21 #include "debug.h"
22 #include "entry.h"
23 #include "parse.h"
24 #include "promise.h"
25 #include "nestlevel.h"
26 #include "read.h"
27 #include "routines.h"
28 #include "strlist.h"
29 #include "subparser.h"
30 #include "vstring.h"
31 
32 #include "ruby.h"
33 
34 /*
35 *   DATA DECLARATIONS
36 */
37 typedef enum {
38 	K_UNDEFINED = -1,
39 	K_CLASS,
40 	K_METHOD,
41 	K_MODULE,
42 	K_SINGLETON,
43 	K_CONST,
44 	K_ACCESSOR,
45 	K_ALIAS,
46 	K_LIBRARY,
47 } rubyKind;
48 
49 typedef enum {
50 	RUBY_LIBRARY_REQUIRED,
51 	RUBY_LIBRARY_REQUIRED_REL,
52 	RUBY_LIBRARY_LOADED,
53 } rubyLibraryRole;
54 
55 /*
56 *   DATA DEFINITIONS
57 */
58 
59 static roleDefinition RubyLibraryRoles [] = {
60 	{ true, "required",  "loaded by \"require\" method" },
61 	{ true, "requiredRel", "loaded by \"require_relative\" method" },
62 	{ true, "loaded", "loaded by \"load\" method" },
63 };
64 
65 static kindDefinition RubyKinds [] = {
66 	{ true, 'c', "class",  "classes" },
67 	{ true, 'f', "method", "methods" },
68 	{ true, 'm', "module", "modules" },
69 	{ true, 'S', "singletonMethod", "singleton methods" },
70 	{ true, 'C', "constant", "constants" },
71 	{ true, 'A', "accessor", "accessors" },
72 	{ true, 'a', "alias",    "aliases" },
73 	{ true, 'L', "library",  "libraries",
74 	  .referenceOnly = true, ATTACH_ROLES(RubyLibraryRoles) },
75 };
76 
77 typedef enum {
78 	F_MIXIN,
79 } rubyField;
80 
81 static fieldDefinition RubyFields[] = {
82 	{ .name = "mixin",
83 	  .description = "how the class or module is mixed in (mixin:HOW:MODULE)",
84 	  .enabled = true },
85 };
86 
87 struct blockData {
88 	stringList *mixin;
89 	rubySubparser *subparser;
90 	int subparserCorkIndex;
91 };
92 
93 static NestingLevels* nesting = NULL;
94 
95 #define SCOPE_SEPARATOR '.'
96 
97 /*
98 *   FUNCTION DEFINITIONS
99 */
100 
101 static void enterUnnamedScope (void);
102 
103 /*
104 * Returns a string describing the scope in 'nls'.
105 * We record the current scope as a list of entered scopes.
106 * Scopes corresponding to 'if' statements and the like are
107 * represented by empty strings. Scopes corresponding to
108 * modules and classes are represented by the name of the
109 * module or class.
110 */
nestingLevelsToScope(const NestingLevels * nls)111 static vString* nestingLevelsToScope (const NestingLevels* nls)
112 {
113 	int i;
114 	unsigned int chunks_output = 0;
115 	vString* result = vStringNew ();
116 	for (i = 0; i < nls->n; ++i)
117 	{
118 	    NestingLevel *nl = nestingLevelsGetNthFromRoot (nls, i);
119 	    tagEntryInfo *e = getEntryOfNestingLevel (nl);
120 	    if (e && strlen (e->name) > 0 && (!e->placeholder))
121 	    {
122 	        if (chunks_output++ > 0)
123 	            vStringPut (result, SCOPE_SEPARATOR);
124 	        vStringCatS (result, e->name);
125 	    }
126 	}
127 	return result;
128 }
129 
130 /*
131 * Attempts to advance 's' past 'literal'.
132 * Returns true if it did, false (and leaves 's' where
133 * it was) otherwise.
134 */
canMatch(const unsigned char ** s,const char * literal,bool (* end_check)(int))135 static bool canMatch (const unsigned char** s, const char* literal,
136                          bool (*end_check) (int))
137 {
138 	const int literal_length = strlen (literal);
139 	const int s_length = strlen ((const char *)*s);
140 
141 	if (s_length < literal_length)
142 		return false;
143 
144 	const unsigned char next_char = *(*s + literal_length);
145 	if (strncmp ((const char*) *s, literal, literal_length) != 0)
146 	{
147 	    return false;
148 	}
149 	/* Additionally check that we're at the end of a token. */
150 	if (! end_check (next_char))
151 	{
152 	    return false;
153 	}
154 	*s += literal_length;
155 	return true;
156 }
157 
isIdentChar(int c)158 static bool isIdentChar (int c)
159 {
160 	return (isalnum (c) || c == '_');
161 }
162 
notIdentCharButColon(int c)163 static bool notIdentCharButColon (int c)
164 {
165 	return ! (isIdentChar (c) || c == ':');
166 }
167 
isOperatorChar(int c)168 static bool isOperatorChar (int c)
169 {
170 	return (c == '[' || c == ']' ||
171 	        c == '=' || c == '!' || c == '~' ||
172 	        c == '+' || c == '-' ||
173 	        c == '@' || c == '*' || c == '/' || c == '%' ||
174 	        c == '<' || c == '>' ||
175 	        c == '&' || c == '^' || c == '|');
176 }
177 
notOperatorChar(int c)178 static bool notOperatorChar (int c)
179 {
180 	return ! isOperatorChar (c);
181 }
182 
isSigilChar(int c)183 static bool isSigilChar (int c)
184 {
185 	return (c == '@' || c == '$');
186 }
187 
isWhitespace(int c)188 static bool isWhitespace (int c)
189 {
190 	return c == 0 || isspace (c);
191 }
192 
193 /*
194  * Advance 's' while the passed predicate is true. Returns true if
195  * advanced by at least one position.
196  */
advanceWhile(const unsigned char ** s,bool (* predicate)(int))197 static bool advanceWhile (const unsigned char** s, bool (*predicate) (int))
198 {
199 	const unsigned char* original_pos = *s;
200 
201 	while (**s != '\0')
202 	{
203 		if (! predicate (**s))
204 		{
205 			return *s != original_pos;
206 		}
207 
208 		(*s)++;
209 	}
210 
211 	return *s != original_pos;
212 }
213 
214 #define canMatchKeyword rubyCanMatchKeyword
rubyCanMatchKeyword(const unsigned char ** s,const char * literal)215 extern bool rubyCanMatchKeyword (const unsigned char** s, const char* literal)
216 {
217 	/* Using notIdentCharButColon() here.
218 	 *
219 	 * A hash can be defined like {for: nil, foo: 0}.
220 	 *"for" in the above example is not a keyword.
221 	 */
222 	return canMatch (s, literal, notIdentCharButColon);
223 }
224 
225 /*
226  * Extends canMatch. Works similarly, but allows assignment to precede
227  * the keyword, as block assignment is a common Ruby idiom.
228  */
229 #define canMatchKeywordWithAssign rubyCanMatchKeywordWithAssign
rubyCanMatchKeywordWithAssign(const unsigned char ** s,const char * literal)230 extern bool rubyCanMatchKeywordWithAssign (const unsigned char** s, const char* literal)
231 {
232 	const unsigned char* original_pos = *s;
233 
234 	if (canMatchKeyword (s, literal))
235 	{
236 		return true;
237 	}
238 
239 	advanceWhile (s, isSigilChar);
240 
241 	if (! advanceWhile (s, isIdentChar))
242 	{
243 		*s = original_pos;
244 		return false;
245 	}
246 
247 	advanceWhile (s, isWhitespace);
248 
249 	if (! (advanceWhile (s, isOperatorChar) && *(*s - 1) == '='))
250 	{
251 		*s = original_pos;
252 		return false;
253 	}
254 
255 	advanceWhile (s, isWhitespace);
256 
257 	if (canMatchKeyword (s, literal))
258 	{
259 		return true;
260 	}
261 
262 	*s = original_pos;
263 	return false;
264 }
265 
266 /*
267 * Attempts to advance 'cp' past a Ruby operator method name. Returns
268 * true if successful (and copies the name into 'name'), false otherwise.
269 */
parseRubyOperator(vString * name,const unsigned char ** cp)270 static bool parseRubyOperator (vString* name, const unsigned char** cp)
271 {
272 	static const char* RUBY_OPERATORS[] = {
273 	    "[]", "[]=",
274 	    "**",
275 	    "!", "~", "+@", "-@",
276 	    "*", "/", "%",
277 	    "+", "-",
278 	    ">>", "<<",
279 	    "&",
280 	    "^", "|",
281 	    "<=", "<", ">", ">=",
282 	    "<=>", "==", "===", "!=", "=~", "!~",
283 	    "`",
284 	    NULL
285 	};
286 	int i;
287 	for (i = 0; RUBY_OPERATORS[i] != NULL; ++i)
288 	{
289 	    if (canMatch (cp, RUBY_OPERATORS[i], notOperatorChar))
290 	    {
291 	        vStringCatS (name, RUBY_OPERATORS[i]);
292 	        return true;
293 	    }
294 	}
295 	return false;
296 }
297 
298 /*
299 * Emits a tag for the given 'name' of kind 'kind' at the current nesting.
300 */
emitRubyTagFull(vString * name,rubyKind kind,bool pushLevel,bool clearName)301 static int emitRubyTagFull (vString* name, rubyKind kind, bool pushLevel, bool clearName)
302 {
303 	tagEntryInfo tag;
304 	vString* scope;
305 	tagEntryInfo *parent;
306 	rubyKind parent_kind = K_UNDEFINED;
307 	NestingLevel *lvl;
308 	const char *unqualified_name;
309 	const char *qualified_name;
310 	int r;
311 	bool anonymous = false;
312 
313 	if (!name)
314 	{
315 		name = anonGenerateNew ("__anon", K_CLASS);
316 		anonymous = true;
317 	}
318 
319         if (!RubyKinds[kind].enabled) {
320             return CORK_NIL;
321         }
322 
323 	scope = nestingLevelsToScope (nesting);
324 	lvl = nestingLevelsGetCurrent (nesting);
325 	parent = getEntryOfNestingLevel (lvl);
326 	if (parent)
327 		parent_kind =  parent->kindIndex;
328 
329 	qualified_name = vStringValue (name);
330 	unqualified_name = strrchr (qualified_name, SCOPE_SEPARATOR);
331 	if (unqualified_name && unqualified_name[1])
332 	{
333 		if (unqualified_name > qualified_name)
334 		{
335 			if (vStringLength (scope) > 0)
336 				vStringPut (scope, SCOPE_SEPARATOR);
337 			vStringNCatS (scope, qualified_name,
338 			              unqualified_name - qualified_name);
339 			/* assume module parent type for a lack of a better option */
340 			parent_kind = K_MODULE;
341 		}
342 		unqualified_name++;
343 	}
344 	else
345 		unqualified_name = qualified_name;
346 
347 	initTagEntry (&tag, unqualified_name, kind);
348 
349 	/* Don't fill the scope field for a tag entry representing
350 	 * a global variable. */
351 	if (unqualified_name[0] != '$'
352 		&& vStringLength (scope) > 0) {
353 		Assert (0 <= parent_kind &&
354 		        (size_t) parent_kind < (ARRAY_SIZE (RubyKinds)));
355 
356 		tag.extensionFields.scopeKindIndex = parent_kind;
357 		tag.extensionFields.scopeName = vStringValue (scope);
358 	}
359 
360 	if (anonymous)
361 		markTagExtraBit (&tag, XTAG_ANONYMOUS);
362 
363 	r = makeTagEntry (&tag);
364 
365 	if (pushLevel)
366 		nestingLevelsPush (nesting, r);
367 
368 	if (clearName)
369 		vStringClear (name);
370 
371 	if (anonymous)
372 		vStringDelete (name);
373 
374 	vStringDelete (scope);
375 
376 	return r;
377 }
378 
emitRubyTag(vString * name,rubyKind kind)379 static int emitRubyTag (vString* name, rubyKind kind)
380 {
381 	return emitRubyTagFull (name, kind, kind != K_CONST, true);
382 }
383 
384 /* Tests whether 'ch' is a character in 'list'. */
charIsIn(char ch,const char * list)385 static bool charIsIn (char ch, const char* list)
386 {
387 	return (strchr (list, ch) != NULL);
388 }
389 
390 /* Advances 'cp' over leading whitespace. */
391 #define skipWhitespace rubySkipWhitespace
rubySkipWhitespace(const unsigned char ** cp)392 extern void rubySkipWhitespace (const unsigned char** cp)
393 {
394 	while (isspace (**cp))
395 	{
396 	    ++*cp;
397 	}
398 }
399 
400 /*
401 * Copies the characters forming an identifier from *cp into
402 * name, leaving *cp pointing to the character after the identifier.
403 */
parseIdentifier(const unsigned char ** cp,vString * name,rubyKind kind)404 static rubyKind parseIdentifier (
405 		const unsigned char** cp, vString* name, rubyKind kind)
406 {
407 	/* Method names are slightly different to class and variable names.
408 	 * A method name may optionally end with a question mark, exclamation
409 	 * point or equals sign. These are all part of the name.
410 	 * A method name may also contain a period if it's a singleton method.
411 	 */
412 	bool had_sep = false;
413 	const char* also_ok;
414 	if (kind == K_METHOD)
415 	{
416 		also_ok = ".?!=";
417 	}
418 	else if (kind == K_SINGLETON)
419 	{
420 		also_ok = "?!=";
421 	}
422 	else
423 	{
424 		also_ok = "";
425 	}
426 
427 	skipWhitespace (cp);
428 
429 	/* Check for an anonymous (singleton) class such as "class << HTTP". */
430 	if (kind == K_CLASS && **cp == '<' && *(*cp + 1) == '<')
431 	{
432 		return K_UNDEFINED;
433 	}
434 
435 	/* Check for operators such as "def []=(key, val)". */
436 	if (kind == K_METHOD || kind == K_SINGLETON)
437 	{
438 		if (parseRubyOperator (name, cp))
439 		{
440 			return kind;
441 		}
442 	}
443 
444 	/* Copy the identifier into 'name'. */
445 	while (**cp != 0 && (**cp == ':' || isIdentChar (**cp) || charIsIn (**cp, also_ok)))
446 	{
447 		char last_char = **cp;
448 
449 		if (last_char == ':')
450 			had_sep = true;
451 		else
452 		{
453 			if (had_sep)
454 			{
455 				vStringPut (name, SCOPE_SEPARATOR);
456 				had_sep = false;
457 			}
458 			vStringPut (name, last_char);
459 		}
460 		++*cp;
461 
462 		if (kind == K_METHOD)
463 		{
464 			/* Recognize singleton methods. */
465 			if (last_char == '.')
466 			{
467 				vStringClear (name);
468 				return parseIdentifier (cp, name, K_SINGLETON);
469 			}
470 		}
471 
472 		if (kind == K_METHOD || kind == K_SINGLETON)
473 		{
474 			/* Recognize characters which mark the end of a method name. */
475 			if (charIsIn (last_char, "?!="))
476 			{
477 				break;
478 			}
479 		}
480 	}
481 	return kind;
482 }
483 
rubyParseMethodName(const unsigned char ** cp,vString * vstr)484 extern bool rubyParseMethodName (const unsigned char **cp, vString* vstr)
485 {
486 	return (parseIdentifier (cp, vstr, K_METHOD) == K_METHOD);
487 }
488 
rubyParseModuleName(const unsigned char ** cp,vString * vstr)489 extern bool rubyParseModuleName (const unsigned char **cp, vString* vstr)
490 {
491 	return (parseIdentifier (cp, vstr, K_MODULE) == K_MODULE);
492 }
493 
parseString(const unsigned char ** cp,unsigned char boundary,vString * vstr)494 static void parseString (const unsigned char** cp, unsigned char boundary, vString* vstr)
495 {
496 	while (**cp != 0 && **cp != boundary)
497 	{
498 		if (vstr)
499 			vStringPut (vstr, **cp);
500 		++*cp;
501 	}
502 
503 	/* skip the last found '"' */
504 	if (**cp == boundary)
505 		++*cp;
506 }
507 
rubyParseString(const unsigned char ** cp,unsigned char boundary,vString * vstr)508 extern bool rubyParseString (const unsigned char** cp, unsigned char boundary, vString* vstr)
509 {
510 	const unsigned char *p = *cp;
511 	parseString (cp, boundary, vstr);
512 	return (p != *cp);
513 }
514 
parseSignature(const unsigned char ** cp,vString * vstr)515 static void parseSignature (const unsigned char** cp, vString* vstr)
516 {
517 	int depth = 1;
518 
519 	while (1)
520 	{
521 		/* FIXME:
522 		 * - handle string literals including ( or ), and
523 		 * - skip comments.
524 		 */
525 		while (! (depth == 0 || **cp == '\0'))
526 		{
527 			if (**cp == '(' || **cp == ')')
528 			{
529 				depth += (**cp == '(')? 1: -1;
530 				vStringPut (vstr, **cp);
531 			}
532 			else if (**cp == '#')
533 			{
534 				++*cp;
535 				while (**cp != '\0')
536 					++*cp;
537 				break;
538 			}
539 			else if (**cp == '\'' || **cp == '"')
540 			{
541 				unsigned char b = **cp;
542 				vStringPut (vstr, b);
543 				++*cp;
544 				parseString (cp, b, vstr);
545 				vStringPut (vstr, b);
546 				continue;
547 			}
548 			else if (isspace (vStringLast (vstr)))
549 			{
550 				if (! (isspace (**cp)))
551 				{
552 					if (**cp == ',')
553 						vStringChop (vstr);
554 					vStringPut (vstr, **cp);
555 				}
556 			}
557 			else
558 				vStringPut (vstr, **cp);
559 			++*cp;
560 		}
561 		if (depth == 0)
562 			return;
563 
564 		const unsigned char *line = readLineFromInputFile ();
565 		if (line == NULL)
566 			return;
567 		else
568 			*cp = line;
569 	}
570 }
571 
readAndEmitTagFull(const unsigned char ** cp,rubyKind expected_kind,bool pushLevel,bool clearName)572 static int readAndEmitTagFull (const unsigned char** cp, rubyKind expected_kind,
573 							   bool pushLevel, bool clearName)
574 {
575 	int r = CORK_NIL;
576 	if (isspace (**cp))
577 	{
578 		vString *name = vStringNew ();
579 		rubyKind actual_kind = parseIdentifier (cp, name, expected_kind);
580 
581 		if (actual_kind == K_UNDEFINED || vStringLength (name) == 0)
582 		{
583 			/*
584 			* What kind of tags should we create for code like this?
585 			*
586 			*    %w(self.clfloor clfloor).each do |name|
587 			*        module_eval <<-"end;"
588 			*            def #{name}(x, y=1)
589 			*                q, r = x.divmod(y)
590 			*                q = q.to_i
591 			*                return q, r
592 			*            end
593 			*        end;
594 			*    end
595 			*
596 			* Or this?
597 			*
598 			*    class << HTTP
599 			*
600 			* For now, we don't create any.
601 			*/
602 			enterUnnamedScope ();
603 		}
604 		else
605 		{
606 			r = emitRubyTagFull (name, actual_kind, pushLevel, clearName);
607 		}
608 		vStringDelete (name);
609 	}
610 	return r;
611 }
612 
readAndEmitTag(const unsigned char ** cp,rubyKind expected_kind)613 static int readAndEmitTag (const unsigned char** cp, rubyKind expected_kind)
614 {
615 	return readAndEmitTagFull (cp, expected_kind, expected_kind != K_CONST, true);
616 }
617 
readAndStoreMixinSpec(const unsigned char ** cp,const char * how_mixin)618 static void readAndStoreMixinSpec (const unsigned char** cp, const char *how_mixin)
619 {
620 
621 	NestingLevel *nl = NULL;
622 	tagEntryInfo *e = NULL;
623 	int ownerLevel = 0;
624 
625 	for (ownerLevel = 0; ownerLevel < nesting->n; ownerLevel++)
626 	{
627 		nl = nestingLevelsGetNthParent (nesting, ownerLevel);
628 		e = nl? getEntryOfNestingLevel (nl): NULL;
629 
630 		/* Ignore "if", "unless", "while" ... */
631 		if ((nl && (nl->corkIndex == CORK_NIL)) || (e && e->placeholder))
632 			continue;
633 		break;
634 	}
635 
636 	if (!e)
637 		return;
638 
639 	if (e->kindIndex == K_SINGLETON)
640 	{
641 		nl = nestingLevelsGetNthParent (nesting,
642 										ownerLevel + 1);
643 		if (nl == NULL)
644 			return;
645 		e = getEntryOfNestingLevel (nl);
646 	}
647 
648 	if (!e)
649 		return;
650 
651 	if (! (e->kindIndex == K_CLASS || e->kindIndex == K_MODULE))
652 		return;
653 
654 	if (isspace (**cp) || (**cp == '('))
655 	{
656 		if (isspace (**cp))
657 			skipWhitespace (cp);
658 		if (**cp == '(')
659 			++*cp;
660 
661 		vString *spec = vStringNewInit (how_mixin);
662 		vStringPut(spec, ':');
663 
664 		size_t len = vStringLength (spec);
665 		parseIdentifier (cp, spec, K_MODULE);
666 		if (len == vStringLength (spec))
667 		{
668 			vStringDelete (spec);
669 			return;
670 		}
671 
672 		struct blockData *bdata =  nestingLevelGetUserData (nl);
673 		if (bdata->mixin == NULL)
674 			bdata->mixin = stringListNew ();
675 		stringListAdd (bdata->mixin, spec);
676 	}
677 }
678 
enterUnnamedScope(void)679 static void enterUnnamedScope (void)
680 {
681 	int r = CORK_NIL;
682 	NestingLevel *parent = nestingLevelsGetCurrent (nesting);
683 	tagEntryInfo *e_parent = getEntryOfNestingLevel (parent);
684 
685 	if (e_parent)
686 	{
687 		tagEntryInfo e;
688 		initTagEntry (&e, "", e_parent->kindIndex);
689 		e.placeholder = 1;
690 		r = makeTagEntry (&e);
691 	}
692 	nestingLevelsPush (nesting, r);
693 }
694 
parasiteToScope(rubySubparser * subparser,int subparserCorkIndex)695 static void parasiteToScope (rubySubparser *subparser, int subparserCorkIndex)
696 {
697 	NestingLevel *nl = nestingLevelsGetCurrent (nesting);
698 	struct blockData *bdata =  nestingLevelGetUserData (nl);
699 	bdata->subparser = subparser;
700 	bdata->subparserCorkIndex = subparserCorkIndex;
701 
702 	if (subparser->enterBlockNotify)
703 		subparser->enterBlockNotify (subparser, subparserCorkIndex);
704 }
705 
attachMixinField(int corkIndex,stringList * mixinSpec)706 static void attachMixinField (int corkIndex, stringList *mixinSpec)
707 {
708 	vString *mixinField = stringListItem (mixinSpec, 0);
709 	for (unsigned int i = 1; i < stringListCount (mixinSpec); i++)
710 	{
711 		vStringPut (mixinField, ',');
712 		vStringCat (mixinField, stringListItem (mixinSpec, i));
713 	}
714 
715 	attachParserFieldToCorkEntry (corkIndex, RubyFields [F_MIXIN].ftype,
716 								  vStringValue (mixinField));
717 }
718 
deleteBlockData(NestingLevel * nl,void * data CTAGS_ATTR_UNUSED)719 static void deleteBlockData (NestingLevel *nl, void *data CTAGS_ATTR_UNUSED)
720 {
721 	struct blockData *bdata = nestingLevelGetUserData (nl);
722 
723 	if (nl->corkIndex != CORK_NIL
724 		&& bdata->mixin != NULL
725 		&& stringListCount (bdata->mixin) > 0)
726 		attachMixinField (nl->corkIndex, bdata->mixin);
727 
728 	tagEntryInfo *e = getEntryInCorkQueue (nl->corkIndex);
729 	if (e && !e->placeholder)
730 			e->extensionFields.endLine = getInputLineNumber ();
731 
732 	tagEntryInfo *sub_e;
733 	if (bdata->subparserCorkIndex != CORK_NIL
734 		&& (sub_e = getEntryInCorkQueue (bdata->subparserCorkIndex)))
735 	{
736 		sub_e->extensionFields.endLine = getInputLineNumber ();
737 		if (bdata->subparser)
738 			bdata->subparser->leaveBlockNotify (bdata->subparser,
739 												bdata->subparserCorkIndex);
740 	}
741 
742 	if (bdata->mixin)
743 		stringListDelete (bdata->mixin);
744 }
745 
doesLineIncludeConstant(const unsigned char ** cp,vString * constant)746 static bool doesLineIncludeConstant (const unsigned char **cp, vString *constant)
747 {
748 	const unsigned char *p = *cp;
749 
750 	if (isspace (*p))
751 		skipWhitespace (&p);
752 
753 	if (isupper (*p))
754 	{
755 		while (*p != 0 && isIdentChar (*p))
756 		{
757 			vStringPut (constant, *p);
758 			++p;
759 		}
760 		if (isspace (*p))
761 			skipWhitespace (&p);
762 		if (*p == '=')
763 		{
764 			*cp = p;
765 			return true;
766 		}
767 		vStringClear (constant);
768 	}
769 
770 	return false;
771 }
772 
emitRubyAccessorTags(vString * a,bool reader,bool writer)773 static void emitRubyAccessorTags (vString *a, bool reader, bool writer)
774 {
775 	if (vStringLength (a) == 0)
776 		return;
777 
778 	if (reader)
779 		emitRubyTagFull (a, K_ACCESSOR, false, !writer);
780 	if (writer)
781 	{
782 		vStringPut (a, '=');
783 		emitRubyTagFull (a, K_ACCESSOR, false, true);
784 	}
785 }
786 
readAttrsAndEmitTags(const unsigned char ** cp,bool reader,bool writer)787 static void readAttrsAndEmitTags (const unsigned char **cp, bool reader, bool writer)
788 {
789 	vString *a = vStringNew ();
790 
791 	skipWhitespace (cp);
792 	if (**cp == '(')
793 		++*cp;
794 
795 	do {
796 		skipWhitespace (cp);
797 		if (**cp == ':')
798 		{
799 			++*cp;
800 			if (K_METHOD == parseIdentifier (cp, a, K_METHOD))
801 			{
802 				emitRubyAccessorTags (a, reader, writer);
803 				skipWhitespace (cp);
804 				if (**cp == ',')
805 				{
806 					++*cp;
807 					continue;
808 				}
809 			}
810 		}
811 		else if (**cp == '"' || **cp == '\'')
812 		{
813 			unsigned char b = **cp;
814 			++*cp;
815 			parseString (cp, b, a);
816 
817 			emitRubyAccessorTags (a, reader, writer);
818 			skipWhitespace (cp);
819 			if (**cp == ',')
820 			{
821 				++*cp;
822 				continue;
823 			}
824 		}
825 		break;
826 	} while (1);
827 
828 	vStringDelete (a);
829 }
830 
readAliasMethodAndEmitTags(const unsigned char ** cp)831 static int readAliasMethodAndEmitTags (const unsigned char **cp)
832 {
833 	int r = CORK_NIL;
834 	vString *a = vStringNew ();
835 
836 	skipWhitespace (cp);
837 	if (**cp == '(')
838 		++*cp;
839 
840 	skipWhitespace (cp);
841 	if (**cp == ':')
842 	{
843 		++*cp;
844 		if (K_METHOD != parseIdentifier (cp, a, K_METHOD))
845 			vStringClear (a);
846 	}
847 	else if (**cp == '"' || **cp == '\'')
848 	{
849 		unsigned char b = **cp;
850 		++*cp;
851 		parseString (cp, b, a);
852 	}
853 
854 	if (vStringLength (a) > 0)
855 		r = emitRubyTagFull (a, K_ALIAS, false, false);
856 
857 	vStringDelete (a);
858 	return r;
859 }
860 
readStringAndEmitTag(const unsigned char ** cp,rubyKind kind,int role)861 static int readStringAndEmitTag (const unsigned char **cp, rubyKind kind, int role)
862 {
863 	int r = CORK_NIL;
864 	vString *s = NULL;
865 
866 	skipWhitespace (cp);
867 	if (**cp == '(')
868 		++*cp;
869 
870 	skipWhitespace (cp);
871 	if (**cp == '"' || **cp == '\'')
872 	{
873 		unsigned char b = **cp;
874 		++*cp;
875 		s = vStringNew ();
876 		parseString (cp, b, s);
877 	}
878 
879 	if (s && vStringLength (s) > 0)
880 		r = makeSimpleRefTag (s, kind, role);
881 
882 	vStringDelete (s);
883 	return r;
884 }
885 
readAndEmitDef(const unsigned char ** cp)886 static int readAndEmitDef (const unsigned char **cp)
887 {
888 	rubyKind kind = K_METHOD;
889 	NestingLevel *nl = nestingLevelsGetCurrent (nesting);
890 	tagEntryInfo *e_scope  = getEntryOfNestingLevel (nl);
891 
892 	/* if the def is inside an unnamed scope at the class level, assume
893 	 * it's from a singleton from a construct like this:
894 	 *
895 	 * class C
896 	 *   class << self
897 	 *     def singleton
898 	 *       ...
899 	 *     end
900 	 *   end
901 	 * end
902 	 */
903 	if (e_scope && e_scope->kindIndex == K_CLASS && strlen (e_scope->name) == 0)
904 		kind = K_SINGLETON;
905 	int corkIndex = readAndEmitTag (cp, kind);
906 	tagEntryInfo *e = getEntryInCorkQueue (corkIndex);
907 
908 	/* Fill signature: field. */
909 	if (e)
910 	{
911 		vString *signature = vStringNewInit ("(");
912 		skipWhitespace (cp);
913 		if (**cp == '(')
914 		{
915 			++(*cp);
916 			parseSignature (cp, signature);
917 			if (vStringLast(signature) != ')')
918 			{
919 				vStringDelete (signature);
920 				signature = NULL;
921 			}
922 		}
923 		else
924 			vStringPut (signature, ')');
925 		e->extensionFields.signature = vStringDeleteUnwrap (signature);
926 		signature = NULL;;
927 		vStringDelete (signature);
928 	}
929 	return corkIndex;
930 }
931 
notifyLine(const unsigned char ** cp)932 static rubySubparser *notifyLine (const unsigned char **cp)
933 {
934 	subparser *sub;
935 	rubySubparser *rubysub = NULL;
936 
937 	foreachSubparser (sub, false)
938 	{
939 		rubysub = (rubySubparser *)sub;
940 		rubysub->corkIndex = CORK_NIL;
941 
942 		if (rubysub->lineNotify)
943 		{
944 			enterSubparser(sub);
945 			const unsigned char *base = *cp;
946 			rubysub->corkIndex = rubysub->lineNotify(rubysub, cp);
947 			leaveSubparser();
948 			if (rubysub->corkIndex != CORK_NIL)
949 				break;
950 			*cp = base;
951 		}
952 	}
953 
954 	if (rubysub && rubysub->corkIndex != CORK_NIL)
955 		return rubysub;
956 	return NULL;
957 }
958 
findRubyTags(void)959 static void findRubyTags (void)
960 {
961 	const unsigned char *line;
962 	bool inMultiLineComment = false;
963 	vString *constant = vStringNew ();
964 	bool found_rdoc = false;
965 
966 	nesting = nestingLevelsNewFull (sizeof (struct blockData), deleteBlockData);
967 
968 	/* FIXME: this whole scheme is wrong, because Ruby isn't line-based.
969 	* You could perfectly well write:
970 	*
971 	*  def
972 	*  method
973 	*   puts("hello")
974 	*  end
975 	*
976 	* if you wished, and this function would fail to recognize anything.
977 	*/
978 	while ((line = readLineFromInputFile ()) != NULL)
979 	{
980 		rubySubparser *subparser = CORK_NIL;
981 		const unsigned char *cp = line;
982 		/* if we expect a separator after a while, for, or until statement
983 		 * separators are "do", ";" or newline */
984 		bool expect_separator = false;
985 
986 		if (found_rdoc == false && strncmp ((const char*)cp, "# =", 3) == 0)
987 		{
988 			found_rdoc = true;
989 			makePromise ("RDoc", 0, 0, 0, 0, 0);
990 		}
991 
992 		if (canMatch (&cp, "=begin", isWhitespace))
993 		{
994 			inMultiLineComment = true;
995 			continue;
996 		}
997 		if (canMatch (&cp, "=end", isWhitespace))
998 		{
999 			inMultiLineComment = false;
1000 			continue;
1001 		}
1002 		if (inMultiLineComment)
1003 			continue;
1004 
1005 		skipWhitespace (&cp);
1006 
1007 		/* Avoid mistakenly starting a scope for modifiers such as
1008 		*
1009 		*   return if <exp>
1010 		*
1011 		* FIXME: we're fooled if someone does something heinous such as
1012 		*
1013 		*   puts("hello") \
1014 		*       unless <exp>
1015 		*/
1016 
1017 		if (canMatchKeywordWithAssign (&cp, "for") ||
1018 		    canMatchKeywordWithAssign (&cp, "until") ||
1019 		    canMatchKeywordWithAssign (&cp, "while"))
1020 		{
1021 			expect_separator = true;
1022 			enterUnnamedScope ();
1023 		}
1024 		else if (canMatchKeywordWithAssign (&cp, "case") ||
1025 		         canMatchKeywordWithAssign (&cp, "if") ||
1026 		         canMatchKeywordWithAssign (&cp, "unless"))
1027 		{
1028 			enterUnnamedScope ();
1029 		}
1030 
1031 		/*
1032 		* "module M", "class C" and "def m" should only be at the beginning
1033 		* of a line.
1034 		*/
1035 		if (canMatchKeywordWithAssign (&cp, "module"))
1036 		{
1037 			readAndEmitTag (&cp, K_MODULE);
1038 		}
1039 		else if (canMatchKeywordWithAssign (&cp, "class")
1040 				 || (canMatchKeywordWithAssign (&cp, "Class.new")))
1041 
1042 		{
1043 
1044 			int r;
1045 			if (*(cp - 1) != 's')
1046 				r = emitRubyTagFull(NULL, K_CLASS, true, false);
1047 			else
1048 				r = readAndEmitTag (&cp, K_CLASS); /* "class" */
1049 
1050 			tagEntryInfo *e = getEntryInCorkQueue (r);
1051 
1052 			if (e)
1053 			{
1054 				skipWhitespace (&cp);
1055 				if (*cp == '<' && *(cp + 1) != '<')
1056 				{
1057 					cp++;
1058 					vString *parent = vStringNew ();
1059 					parseIdentifier (&cp, parent, K_CLASS);
1060 					if (vStringLength (parent) > 0)
1061 						e->extensionFields.inheritance = vStringDeleteUnwrap (parent);
1062 					else
1063 						vStringDelete (parent);
1064 				}
1065 			}
1066 		}
1067 		else if (canMatchKeywordWithAssign (&cp, "include"))
1068 		{
1069 			readAndStoreMixinSpec (&cp, "include");
1070 		}
1071 		else if (canMatchKeywordWithAssign (&cp, "prepend"))
1072 		{
1073 			readAndStoreMixinSpec (&cp, "prepend");
1074 		}
1075 		else if (canMatchKeywordWithAssign (&cp, "extend"))
1076 		{
1077 			readAndStoreMixinSpec (&cp, "extend");
1078 		}
1079 		else if (canMatchKeywordWithAssign (&cp, "def"))
1080 		{
1081 			readAndEmitDef (&cp);
1082 		}
1083 		else if (canMatchKeywordWithAssign (&cp, "attr_reader"))
1084 		{
1085 			readAttrsAndEmitTags (&cp, true, false);
1086 		}
1087 		else if (canMatchKeywordWithAssign (&cp, "attr_writer"))
1088 		{
1089 			readAttrsAndEmitTags (&cp, false, true);
1090 		}
1091 		else if (canMatchKeywordWithAssign (&cp, "attr_accessor"))
1092 		{
1093 			readAttrsAndEmitTags (&cp, true, true);
1094 		}
1095 		else if (doesLineIncludeConstant (&cp, constant))
1096 		{
1097 			emitRubyTag (constant, K_CONST);
1098 			vStringClear (constant);
1099 		}
1100 		else if (canMatchKeywordWithAssign (&cp, "require"))
1101 		{
1102 			readStringAndEmitTag (&cp, K_LIBRARY, RUBY_LIBRARY_REQUIRED);
1103 		}
1104 		else if (canMatchKeywordWithAssign (&cp, "require_relative"))
1105 		{
1106 			readStringAndEmitTag (&cp, K_LIBRARY, RUBY_LIBRARY_REQUIRED_REL);
1107 		}
1108 		else if (canMatchKeywordWithAssign (&cp, "load"))
1109 		{
1110 			readStringAndEmitTag (&cp, K_LIBRARY, RUBY_LIBRARY_LOADED);
1111 		}
1112 		else if (canMatchKeywordWithAssign (&cp, "alias"))
1113 		{
1114 			if (!readAndEmitTagFull (&cp, K_ALIAS, false, true)
1115 				&& (*cp == '$'))
1116 			{
1117 				/* Alias for a global variable. */
1118 				++cp;
1119 				vString *alias = vStringNew ();
1120 				vStringPut (alias, '$');
1121 				if (K_METHOD == parseIdentifier (&cp, alias, K_METHOD)
1122 					&& vStringLength (alias) > 0)
1123 					emitRubyTagFull (alias, K_ALIAS, false, false);
1124 				vStringDelete (alias);
1125 			}
1126 		}
1127 		else if (canMatchKeywordWithAssign (&cp, "alias_method"))
1128 			readAliasMethodAndEmitTags (&cp);
1129 		else if ((canMatchKeywordWithAssign (&cp, "private")
1130 				  || canMatchKeywordWithAssign (&cp, "protected")
1131 				  || canMatchKeywordWithAssign (&cp, "public")
1132 				  || canMatchKeywordWithAssign (&cp, "private_class_method")
1133 				  || canMatchKeywordWithAssign (&cp, "public_class_method")))
1134 		{
1135 			skipWhitespace (&cp);
1136 			if (canMatchKeywordWithAssign (&cp, "def"))
1137 				readAndEmitDef (&cp);
1138 			/* TODO: store the method for controlling visibility
1139 			 * to the "access:" field of the tag.*/
1140 		}
1141 		else
1142 			subparser = notifyLine(&cp);
1143 
1144 
1145 		while (*cp != '\0')
1146 		{
1147 			/* FIXME: we don't cope with here documents,
1148 			* or regular expression literals, or ... you get the idea.
1149 			* Hopefully, the restriction above that insists on seeing
1150 			* definitions at the starts of lines should keep us out of
1151 			* mischief.
1152 			*/
1153 			if (inMultiLineComment || isspace (*cp))
1154 			{
1155 				++cp;
1156 			}
1157 			else if (*cp == '#')
1158 			{
1159 				/* FIXME: this is wrong, but there *probably* won't be a
1160 				* definition after an interpolated string (where # doesn't
1161 				* mean 'comment').
1162 				*/
1163 				break;
1164 			}
1165 			else if (canMatchKeyword (&cp, "begin"))
1166 			{
1167 				enterUnnamedScope ();
1168 			}
1169 			else if (canMatchKeyword (&cp, "do"))
1170 			{
1171 				if (! expect_separator)
1172 				{
1173 					enterUnnamedScope ();
1174 					if (subparser && subparser->corkIndex)
1175 						parasiteToScope (subparser, subparser->corkIndex);
1176 				}
1177 				else
1178 					expect_separator = false;
1179 			}
1180 			else if (canMatchKeyword (&cp, "end") && nesting->n > 0)
1181 			{
1182 				/* Leave the most recent scope. */
1183 				nestingLevelsPop (nesting);
1184 			}
1185 			else if (*cp == '"' || *cp == '\'')
1186 			{
1187 				unsigned char b = *cp;
1188 				/* Skip string literals.
1189 				 * FIXME: should cope with escapes and interpolation.
1190 				 */
1191 				++cp;
1192 				parseString (&cp, b, NULL);
1193 			}
1194 			else if (*cp == ';')
1195 			{
1196 				++cp;
1197 				expect_separator = false;
1198 			}
1199 			else if (*cp != '\0')
1200 			{
1201 				do
1202 					++cp;
1203 				while (isIdentChar (*cp));
1204 			}
1205 		}
1206 	}
1207 	nestingLevelsFree (nesting);
1208 	vStringDelete (constant);
1209 }
1210 
RubyParser(void)1211 extern parserDefinition* RubyParser (void)
1212 {
1213 	static const char *const extensions [] = { "rb", "ruby", NULL };
1214 	parserDefinition* def = parserNew ("Ruby");
1215 	def->kindTable      = RubyKinds;
1216 	def->kindCount  = ARRAY_SIZE (RubyKinds);
1217 	def->extensions = extensions;
1218 	def->parser     = findRubyTags;
1219 	def->fieldTable = RubyFields;
1220 	def->fieldCount = ARRAY_SIZE (RubyFields);
1221 	def->useCork    = CORK_QUEUE;
1222 	return def;
1223 }
1224