xref: /Universal-ctags/parsers/r.c (revision aaaac7eeac8399141aa8e6d9e6ec0379931848b2)
1 /*
2 *   Copyright (c) 2003-2004, Ascher Stefan <stievie@utanet.at>
3 *   Copyright (c) 2020, Masatake YAMATO <yamato@redhat.com>
4 *   Copyright (c) 2020, Red Hat, Inc.
5 *
6 *   This source code is released for free distribution under the terms of the
7 *   GNU General Public License version 2 or (at your option) any later version.
8 *
9 *   This module contains functions for generating tags for R language files.
10 *   R is a programming language for statistical computing.
11 *   R is GPL Software, get it from http://www.r-project.org/
12 *
13 *   The language references are available at
14 *   https://cran.r-project.org/manuals.html, and
15 *   https://cran.r-project.org/doc/manuals/r-release/R-lang.html
16 *
17 *   The base library (including library and source functions) release is at
18 *   https://stat.ethz.ch/R-manual/R-devel/library/base/html/00Index.html
19 */
20 
21 /*
22 *   INCLUDE FILES
23 */
24 #include "general.h"	/* must always come first */
25 
26 #include "debug.h"
27 #include "entry.h"
28 #include "keyword.h"
29 #include "parse.h"
30 #include "read.h"
31 #include "selectors.h"
32 #include "tokeninfo.h"
33 #include "trace.h"
34 #include "vstring.h"
35 #include "subparser.h"
36 #include "r.h"
37 
38 #include <string.h>
39 #include <ctype.h>	/* to define isalpha(), isalnum(), isspace() */
40 
41 
42 /*
43 *   MACROS
44 */
45 #ifdef DEBUG
46 #define R_TRACE_TOKEN_TEXT(TXT,T,Q) TRACE_PRINT("<%s> token: %s (%s), parent: %s", \
47 												(TXT),					\
48 												tokenIsTypeVal(T, '\n')? "\\n": tokenString(T), \
49 												tokenTypeStr(T->type),	\
50 												(Q) == CORK_NIL? "": getEntryInCorkQueue(Q)->name)
51 #define R_TRACE_TOKEN(T,Q) TRACE_PRINT("token: %s (%s), parent: %s", \
52 									   tokenIsTypeVal((T), '\n')? "\\n": tokenString(T), \
53 									   tokenTypeStr((T)->type),			\
54 									   (Q) == CORK_NIL? "": getEntryInCorkQueue(Q)->name)
55 
56 #define R_TRACE_ENTER() TRACE_ENTER_TEXT("token: %s (%s), parent: %s", \
57 										 tokenIsTypeVal(token, '\n')? "\\n": tokenString(token), \
58 										 tokenTypeStr(token->type), \
59 										 parent == CORK_NIL? "": getEntryInCorkQueue(parent)->name)
60 #define R_TRACE_LEAVE() TRACE_LEAVE()
61 #else
62 #define R_TRACE_TOKEN_TEXT(TXT,T,Q) do {} while (0);
63 #define R_TRACE_TOKEN(T,Q) do {} while (0);
64 #define R_TRACE_ENTER() do {} while (0);
65 #define R_TRACE_LEAVE() do {} while (0);
66 #endif
67 
68 
69 /*
70 *   DATA DEFINITIONS
71 */
72 typedef enum {
73 	K_UNDEFINED = -1,
74 	K_FUNCTION,
75 	K_LIBRARY,
76 	K_SOURCE,
77 	K_GLOBALVAR,
78 	K_FUNCVAR,
79 	K_PARAM,
80 	K_VECTOR,
81 	K_LIST,
82 	K_DATAFRAME,
83 	K_NAMEATTR,
84 	KIND_COUNT
85 } rKind;
86 
87 typedef enum {
88 	R_LIBRARY_ATTACHED_BY_LIBRARY,
89 	R_LIBRARY_ATTACHED_BY_REQUIRE,
90 } rLibraryRole;
91 
92 typedef enum {
93 	R_SOURCE_LOADED_BY_SOURCE,
94 } rSourceRole;
95 
96 static roleDefinition RLibraryRoles [] = {
97 	{ true, "library", "library attached by library function" },
98 	{ true, "require", "library attached by require function" },
99 };
100 
101 static roleDefinition RSourceRoles [] = {
102 	{ true, "source", "source loaded by source fucntion" },
103 };
104 
105 static kindDefinition RKinds[KIND_COUNT] = {
106 	{true, 'f', "function", "functions"},
107 	{true, 'l', "library", "libraries",
108 	 .referenceOnly = true, ATTACH_ROLES (RLibraryRoles) },
109 	{true, 's', "source", "sources",
110 	 .referenceOnly = true, ATTACH_ROLES (RSourceRoles) },
111 	{true, 'g', "globalVar", "global variables having values other than function()"},
112 	{true, 'v', "functionVar", "function variables having values other than function()"},
113 	{false,'z', "parameter",  "function parameters inside function definitions" },
114 	{true, 'c', "vector", "vectors explicitly created with `c()'" },
115 	{true, 'L', "list", "lists explicitly created with `list()'" },
116 	{true, 'd', "dataframe", "data frame explicitly created with `data.frame()'" },
117 	{true, 'n', "nameattr", "names attribtes in vectors, lists, or dataframes" },
118 };
119 
120 struct sKindExtraInfo {
121 	const char *anon_prefix;
122 	const char *ctor;
123 };
124 
125 static struct sKindExtraInfo kindExtraInfo[KIND_COUNT] = {
126 	[K_FUNCTION] = {
127 		"anonFunc",
128 		"function",
129 	},
130 	[K_VECTOR] = {
131 		"anonVec",
132 		"c",
133 	},
134 	[K_LIST] = {
135 		"anonList",
136 		"list",
137 	},
138 	[K_DATAFRAME] = {
139 		"anonDataFrame",
140 		"data.frame",
141 	},
142 };
143 
144 typedef enum {
145 	F_ASSIGNMENT_OPERATOR,
146 	F_CONSTRUCTOR,
147 } rField;
148 
149 static fieldDefinition RFields [] = {
150 	{
151 		.name = "assignmentop",
152 		.description = "operator for assignment",
153 		.enabled = false,
154 	},
155 	{
156 		.name = "constructor",
157 		.description = "function used for making value assigned to the nameattr tag",
158 		.enabled = true,
159 	}
160 };
161 
162 typedef int keywordId;			/* to allow KEYWORD_NONE */
163 
164 static const keywordTable RKeywordTable [] = {
165 	{ "c",        KEYWORD_R_C        },
166 	{ "list",     KEYWORD_R_LIST     },
167 	{ "data.frame",KEYWORD_R_DATAFRAME },
168 	{ "function", KEYWORD_R_FUNCTION },
169 	{ "if",       KEYWORD_R_IF       },
170 	{ "else",     KEYWORD_R_ELSE     },
171 	{ "for",      KEYWORD_R_FOR      },
172 	{ "while",    KEYWORD_R_WHILE    },
173 	{ "repeat",   KEYWORD_R_REPEAT   },
174 	{ "in",       KEYWORD_R_IN       },
175 	{ "next",     KEYWORD_R_NEXT     },
176 	{ "break",    KEYWORD_R_BREAK    },
177 	{ "TRUE",     KEYWORD_R_TRUE,    },
178 	{ "FALSE",    KEYWORD_R_FALSE,   },
179 	{ "NULL",     KEYWORD_R_NULL,    },
180 	{ "Inf",      KEYWORD_R_INF,     },
181 	{ "NaN",      KEYWORD_R_NAN,     },
182 	{ "NA",       KEYWORD_R_NA,      },
183 	{ "NA_integer_",   KEYWORD_R_NA, },
184 	{ "NA_real_",      KEYWORD_R_NA, },
185 	{ "NA_complex_",   KEYWORD_R_NA, },
186 	{ "NA_character_", KEYWORD_R_NA, },
187 	{ "source",   KEYWORD_R_SOURCE   },
188 	{ "library",  KEYWORD_R_LIBRARY  },
189 	{ "require",  KEYWORD_R_LIBRARY  },
190 };
191 
192 #ifdef DEBUG
193 static const char *tokenTypeStr(enum RTokenType e);
194 #endif
195 
196 static struct tokenTypePair typePairs [] = {
197 	{ '{', '}' },
198 	{ '[', ']' },
199 	{ '(', ')' },
200 };
201 
202 typedef struct sRToken {
203 	tokenInfo base;
204 	int scopeIndex;
205 	int parenDepth;
206 	vString *signature;
207 	int kindIndexForParams;		/* Used only when gathering parameters */
208 } rToken;
209 
210 #define R(TOKEN) ((rToken *)TOKEN)
211 
212 static int blackHoleIndex;
213 
214 static langType Lang_R;
215 
216 static void readToken (tokenInfo *const token, void *data);
217 static void clearToken (tokenInfo *token);
218 static struct tokenInfoClass rTokenInfoClass = {
219 	.nPreAlloc        = 4,
220 	.typeForUndefined = TOKEN_R_UNDEFINED,
221 	.keywordNone      = KEYWORD_NONE,
222 	.typeForKeyword   = TOKEN_R_KEYWORD,
223 	.typeForEOF       = TOKEN_R_EOF,
224 	.extraSpace       = sizeof (rToken) - sizeof (tokenInfo),
225 	.pairs            = typePairs,
226 	.pairCount        = ARRAY_SIZE (typePairs),
227 	.init             = NULL,
228 	.read             = readToken,
229 	.clear            = clearToken,
230 	.copy             = NULL,
231 };
232 
233 
234 /*
235  * FUNCTION PROTOTYPES
236  */
237 static bool parseStatement (tokenInfo *const token, int parent, bool in_arglist, bool in_continuous_pair);
238 static void parsePair (tokenInfo *const token, int parent, tokenInfo *const funcall);
239 
240 static  int notifyReadRightSideSymbol (tokenInfo *const symbol,
241 									   const char *const assignmentOperator,
242 									   int parent,
243 									   tokenInfo *const token);
244 static  int makeSimpleSubparserTag (int langType, tokenInfo *const token, int parent,
245 									bool in_func, int kindInR, const char *assignmentOperator);
246 static  bool askSubparserTagAcceptancy (tagEntryInfo *pe);
247 static  bool askSubparserTagHasFunctionAlikeKind (tagEntryInfo *e);
248 static  int notifyReadFuncall (tokenInfo *const func, tokenInfo *const token, int parent);
249 
250 /*
251 *   FUNCTION DEFINITIONS
252 */
hasKindsOrCtors(tagEntryInfo * e,int kinds[],size_t count)253 static bool hasKindsOrCtors (tagEntryInfo * e, int kinds[], size_t count)
254 {
255        if (e->langType == Lang_R)
256 	   {
257 		   for (size_t i = 0; i < count; i++)
258 		   {
259 			   if (e->kindIndex == kinds[i])
260 				   return true;
261 		   }
262 	   }
263 	   else
264 	   {
265 		   bool function = false;
266 		   for (size_t i = 0; i < count; i++)
267 		   {
268 			   if (K_FUNCTION == kinds[i])
269 			   {
270 				   function = true;
271 				   break;
272 			   }
273 		   }
274 		   if (function && askSubparserTagHasFunctionAlikeKind (e))
275 			   return true;
276 	   }
277 
278 	   const char *tmp = getParserFieldValueForType (e,
279 													 RFields [F_CONSTRUCTOR].ftype);
280 	   if (tmp == NULL)
281 		   return false;
282 
283 	   for (size_t i = 0; i < count; i++)
284 	   {
285 		   const char * ctor = kindExtraInfo [kinds[i]].ctor;
286 		   if (ctor && strcmp (tmp, ctor) == 0)
287                return true;
288 	   }
289 
290        return false;
291 }
292 
searchScopeOtherThan(int scope,int kinds[],size_t count)293 static int searchScopeOtherThan (int scope, int kinds[], size_t count)
294 {
295 	do
296 	{
297 		tagEntryInfo * e = getEntryInCorkQueue (scope);
298 		if (!e)
299 			return CORK_NIL;
300 
301 		if (!hasKindsOrCtors (e, kinds, count))
302 			return scope;
303 
304 		scope = e->extensionFields.scopeIndex;
305 	}
306 	while (1);
307 }
308 
makeSimpleRTagR(tokenInfo * const token,int parent,int kind,const char * assignmentOp)309 static int makeSimpleRTagR (tokenInfo *const token, int parent, int kind,
310 							const char * assignmentOp)
311 {
312 	if (assignmentOp && (strlen (assignmentOp) == 3))
313 	{
314 		/* <<- or ->> is used here. */
315 		if (anyKindsEntryInScopeRecursive (parent, tokenString (token),
316 										   (int[]){K_FUNCTION,
317 												   K_GLOBALVAR,
318 												   K_FUNCVAR,
319 												   K_PARAM}, 4,
320 										   false) != CORK_NIL)
321 			return CORK_NIL;
322 
323 		parent = CORK_NIL;
324 	}
325 
326 	/* If the tag (T) to be created is defined in a scope and
327 	   the scope already has another tag having the same name
328 	   as T, T should not be created. */
329 	tagEntryInfo *pe = getEntryInCorkQueue (parent);
330 	int cousin = CORK_NIL;
331 	if (pe && ((pe->langType == Lang_R && pe->kindIndex == K_FUNCTION)
332 			   || (pe->langType != Lang_R && askSubparserTagHasFunctionAlikeKind (pe))))
333 	{
334 		cousin = anyEntryInScope (parent, tokenString (token), false);
335 		if (kind == K_GLOBALVAR)
336 			kind = K_FUNCVAR;
337 	}
338 	else if (pe && (kind == K_GLOBALVAR)
339 			 && hasKindsOrCtors (pe, (int[]){K_VECTOR, K_LIST, K_DATAFRAME}, 3))
340 	{
341 		parent = searchScopeOtherThan (pe->extensionFields.scopeIndex,
342 									   (int[]){K_VECTOR, K_LIST, K_DATAFRAME}, 3);
343 		if (parent == CORK_NIL)
344 			cousin = anyKindEntryInScope (parent, tokenString (token), K_GLOBALVAR, false);
345 		else
346 		{
347 			cousin = anyKindEntryInScope (parent, tokenString (token), K_FUNCVAR, false);
348 			kind = K_FUNCVAR;
349 		}
350 	}
351 	else if (pe)
352 	{
353 		/* The condition for tagging is a bit relaxed here.
354 		   Even if the same name tag is created in the scope, a name
355 		   is tagged if kinds are different. */
356 		cousin = anyKindEntryInScope (parent, tokenString (token), kind, false);
357 	}
358 	if (cousin != CORK_NIL)
359 		return CORK_NIL;
360 
361 	int corkIndex = makeSimpleTag (token->string, kind);
362 	tagEntryInfo *tag = getEntryInCorkQueue (corkIndex);
363 	if (tag)
364 	{
365 		tag->extensionFields.scopeIndex = parent;
366 		if (assignmentOp)
367 		{
368 			if (strlen (assignmentOp) > 0)
369 				attachParserField (tag, true,
370 								   RFields [F_ASSIGNMENT_OPERATOR].ftype,
371 								   assignmentOp);
372 			else
373 				markTagExtraBit (tag, XTAG_ANONYMOUS);
374 		}
375 		registerEntry (corkIndex);
376 	}
377 	return corkIndex;
378 }
379 
makeSimpleRTag(tokenInfo * const token,int parent,bool in_func,int kind,const char * assignmentOp)380 static int makeSimpleRTag (tokenInfo *const token, int parent, bool in_func, int kind,
381 						   const char * assignmentOp)
382 {
383 	int r;
384 	const char *ctor = kindExtraInfo [kind].ctor;
385 	tagEntryInfo *pe = (parent == CORK_NIL)? NULL: getEntryInCorkQueue (parent);
386 
387 	/* makeTagWithTranslation method for subparsers
388 	   called from makeSimpleSubparserTag expects
389 	   kind should be resolved. */
390 	if (pe && hasKindsOrCtors (pe, (int[]){K_VECTOR, K_LIST, K_DATAFRAME}, 3))
391 	{
392 		if (assignmentOp
393 			&& strcmp (assignmentOp, "=") == 0)
394 			kind = K_NAMEATTR;
395 	}
396 
397 	bool foreign_tag = false;
398 	if (pe == NULL || pe->langType == Lang_R ||
399 		!askSubparserTagAcceptancy (pe))
400 		r = makeSimpleRTagR (token, parent, kind, assignmentOp);
401 	else
402 	{
403 		foreign_tag = true;
404 		r = makeSimpleSubparserTag (pe->langType, token, parent, in_func,
405 									kind, assignmentOp);
406 	}
407 
408 	if ((kind == K_NAMEATTR || foreign_tag) && ctor)
409 	{
410 		tagEntryInfo *e = getEntryInCorkQueue (r);
411 		if (e)
412 			attachParserField (e, true,
413 							   RFields [F_CONSTRUCTOR].ftype,
414 							   ctor);
415 	}
416 
417 	return r;
418 }
419 
clearToken(tokenInfo * token)420 static void clearToken (tokenInfo *token)
421 {
422 	R (token)->parenDepth = 0;
423 	R (token)->scopeIndex = CORK_NIL;
424 	R (token)->kindIndexForParams = KIND_GHOST_INDEX;
425 	if (R (token)->signature)
426 	{
427 		vStringDelete (R (token)->signature);
428 		R (token)->signature = NULL;
429 	}
430 }
431 
readString(tokenInfo * const token,void * data)432 static void readString (tokenInfo *const token, void *data)
433 {
434 	int c;
435 	bool escaped = false;
436 
437 	int c0 = tokenString(token)[0];
438 
439 	while (1)
440 	{
441 		c = getcFromInputFile ();
442 		switch (c)
443 		{
444 		case EOF:
445 			return;
446 		case '\'':
447 		case '"':
448 		case '`':
449 			tokenPutc (token, c);
450 			if (!escaped && c == c0)
451 				return;
452 			escaped = false;
453 			break;
454 		case '\\':
455 			tokenPutc (token, c);
456 			escaped = !escaped;
457 			break;
458 		default:
459 			tokenPutc (token, c);
460 			escaped = false;
461 			break;
462 		}
463 	}
464 }
465 
readNumber(tokenInfo * const token,void * data)466 static void readNumber (tokenInfo *const token, void *data)
467 {
468 	int c;
469 
470 	/* 10.3.1 Constants
471 	 *
472 	 * Valid numeric constants: 1 10 0.1 .2 1e-7 1.2e+7
473 	 * Valid integer constants:  1L, 0x10L, 1000000L, 1e6L
474 	 * Valid numeric constants:  1.1L, 1e-3L, 0x1.1p-2
475 	 * Valid complex constants: 2i 4.1i 1e-2i
476 	 */
477 	while ((c = getcFromInputFile ()))
478 	{
479 		if (isxdigit (c) || c == '.' || c == 'E'
480 			|| c == '+' || c == '-'
481 			|| c == 'L' || c == 'x' || c == 'p'
482 			|| c == 'i')
483 			tokenPutc (token, c);
484 		else
485 		{
486 			ungetcToInputFile (c);
487 			break;
488 		}
489 	}
490 }
491 
readSymbol(tokenInfo * const token,void * data)492 static void readSymbol (tokenInfo *const token, void *data)
493 {
494 	int c;
495 	while ((c = getcFromInputFile ()))
496 	{
497 		if (isalnum (c) || c == '.' || c == '_')
498 			tokenPutc (token, c);
499 		else
500 		{
501 			ungetcToInputFile (c);
502 			break;
503 		}
504 	}
505 }
506 
resolveKeyword(vString * string)507 static keywordId resolveKeyword (vString *string)
508 {
509 	char *s = vStringValue (string);
510 	static langType lang = LANG_AUTO;
511 
512 	if (lang == LANG_AUTO)
513 		lang = getInputLanguage ();
514 
515 	return lookupCaseKeyword (s, lang);
516 }
517 
signatureExpectingParameter(vString * signature)518 static bool signatureExpectingParameter (vString *signature)
519 {
520 	if (vStringLast (signature) == '(')
521 		return true;
522 
523 	for (size_t i = vStringLength (signature); i > 0; i--)
524 	{
525 		char c = vStringChar (signature, i - 1);
526 		if (c == ' ')
527 			continue;
528 		else if (c == ',')
529 			return true;
530 		break;
531 	}
532 	return false;
533 }
534 
readToken(tokenInfo * const token,void * data)535 static void readToken (tokenInfo *const token, void *data)
536 {
537 	int c, c0;
538 
539 	token->type = TOKEN_R_UNDEFINED;
540 	token->keyword = KEYWORD_NONE;
541 	vStringClear (token->string);
542 
543 	do
544 		c = getcFromInputFile ();
545 	while (c == ' ' || c== '\t' || c == '\f');
546 
547 	token->lineNumber   = getInputLineNumber ();
548 	token->filePosition = getInputFilePosition ();
549 
550 	switch (c)
551 	{
552 	case EOF:
553 		token->type = TOKEN_R_EOF;
554 		break;
555 	case '#':
556 		while (1)
557 		{
558 			c = getcFromInputFile ();
559 			if (c == EOF)
560 			{
561 				token->type = TOKEN_R_EOF;
562 				break;
563 			}
564 			else if (c == '\n')
565 			{
566 				token->type = c;
567 				tokenPutc (token, c);
568 				break;
569 			}
570 		}
571 		break;
572 	case '\n':
573 	case ';':
574 		token->type = c;
575 		tokenPutc (token, c);
576 		break;
577 	case '\'':
578 	case '"':
579 	case '`':
580 		token->type = TOKEN_R_STRING;
581 		tokenPutc (token, c);
582 		readString (token, data);
583 		break;
584 	case '+':
585 	case '/':
586 	case '^':
587 	case '~':
588 		token->type = TOKEN_R_OPERATOR;
589 		tokenPutc (token, c);
590 		break;
591 	case ':':
592 		token->type = TOKEN_R_OPERATOR;
593 		tokenPutc (token, c);
594 		c = getcFromInputFile ();
595 		if (c == ':')
596 		{
597 			tokenPutc (token, c);
598 			token->type = TOKEN_R_SCOPE;
599 			c = getcFromInputFile ();
600 			if (c == ':')
601 				tokenPutc (token, c);
602 			else
603 				ungetcToInputFile (c);
604 		}
605 		else
606 			ungetcToInputFile (c);
607 		break;
608 	case '&':
609 	case '|':
610 	case '*':
611 		token->type = TOKEN_R_OPERATOR;
612 		tokenPutc (token, c);
613 		c0 = getcFromInputFile ();
614 		if (c == c0)
615 			tokenPutc (token, c0);
616 		else
617 			ungetcToInputFile (c0);
618 		break;
619 	case '=':
620 		token->type = TOKEN_R_OPERATOR;
621 		tokenPutc (token, c);
622 		c = getcFromInputFile ();
623 		if (c == '=')
624 			tokenPutc (token, c);
625 		else
626 		{
627 			token->type = '=';
628 			ungetcToInputFile (c);
629 		}
630 		break;
631 	case '-':
632 		token->type = TOKEN_R_OPERATOR;
633 		tokenPutc (token, c);
634 		c = getcFromInputFile ();
635 		if (c == '>')
636 		{
637 			token->type = TOKEN_R_RASSIGN;
638 			tokenPutc (token, c);
639 			c = getcFromInputFile ();
640 			if (c == '>')
641 				tokenPutc (token, c);
642 			else
643 				ungetcToInputFile (c);
644 		}
645 		else
646 			ungetcToInputFile (c);
647 		break;
648 	case '>':
649 		token->type = TOKEN_R_OPERATOR;
650 		tokenPutc (token, c);
651 		c = getcFromInputFile ();
652 		if (c == '=')
653 			tokenPutc (token, c);
654 		else
655 			ungetcToInputFile (c);
656 		break;
657 	case '<':
658 		token->type = TOKEN_R_OPERATOR;
659 		tokenPutc (token, c);
660 		c = getcFromInputFile ();
661 
662 		/* <<- */
663 		if (c == '<')
664 		{
665 			tokenPutc (token, c);
666 			c = getcFromInputFile ();
667 		}
668 
669 		if (c == '-')
670 		{
671 			token->type = TOKEN_R_LASSIGN;
672 			tokenPutc (token, c);
673 		}
674 		else if (c == '=')
675 			tokenPutc (token, c);
676 		else
677 			ungetcToInputFile (c);
678 		break;
679 	case '%':
680 		token->type = TOKEN_R_OPERATOR;
681 		tokenPutc (token, c);
682 		do
683 		{
684 			c = getcFromInputFile ();
685 			if (c == EOF)
686 				break;
687 
688 			tokenPutc (token, c);
689 			if (c == '%')
690 				break;
691 		}
692 		while (1);
693 		break;
694 	case '!':
695 		token->type = TOKEN_R_OPERATOR;
696 		tokenPutc (token, c);
697 		c = getcFromInputFile ();
698 		if (c == '=')
699 			tokenPutc (token, c);
700 		else
701 			ungetcToInputFile (c);
702 		break;
703 	case '{':
704 	case '}':
705 	case '(':
706 	case ')':
707 	case '[':
708 	case ']':
709 	case ',':
710 	case '$':
711 	case '@':
712 		token->type = c;
713 		tokenPutc (token, c);
714 		break;
715 	case '.':
716 		tokenPutc (token, c);
717 		c = getcFromInputFile ();
718 		if (isdigit(c))
719 		{
720 			token->type = TOKEN_R_NUMBER;
721 			tokenPutc (token, c);
722 			readNumber(token, data);
723 		}
724 		else if (isalpha (c) || c == '_')
725 		{
726 			token->type = TOKEN_R_SYMBOL;
727 			tokenPutc (token, c);
728 			readSymbol (token, data);
729 
730 			token->keyword = resolveKeyword (token->string);
731 			if (token->keyword != KEYWORD_NONE)
732 				token->type = TOKEN_R_KEYWORD;
733 		}
734 		else if (c == '.')
735 		{
736 			token->type = TOKEN_R_DOTS;
737 			tokenPutc (token, c);
738 
739 			c = getcFromInputFile ();
740 			if (c == '.')
741 				tokenPutc (token, c);
742 			else if (isdigit(c))
743 			{
744 				token->type = TOKEN_R_DOTS_N;
745 				do
746 				{
747 					tokenPutc (token, c);
748 					c = getcFromInputFile ();
749 				}
750 				while (isdigit(c));
751 				ungetcToInputFile (c);
752 			}
753 			else if (isalpha (c) || c == '_')
754 			{
755 				token->type = TOKEN_R_SYMBOL;
756 				tokenPutc (token, c);
757 				readSymbol (token, data);
758 
759 				token->keyword = resolveKeyword (token->string);
760 				if (token->keyword != KEYWORD_NONE)
761 					token->type = TOKEN_R_KEYWORD;
762 			}
763 			else
764 			{
765 				token->type = TOKEN_R_UNDEFINED;
766 				ungetcToInputFile (c);
767 			}
768 		}
769 		break;
770 	default:
771 		tokenPutc (token, c);
772 		if (isdigit (c))
773 		{
774 			token->type = TOKEN_R_NUMBER;
775 			readNumber(token, data);
776 		}
777 		else if (isalpha (c))
778 		{
779 			token->type = TOKEN_R_SYMBOL;
780 			readSymbol (token, data);
781 
782 			token->keyword = resolveKeyword (token->string);
783 			if (token->keyword != KEYWORD_NONE)
784 				token->type = TOKEN_R_KEYWORD;
785 		}
786 		else
787 			token->type = TOKEN_R_UNDEFINED;
788 		break;
789 	}
790 
791 	/* Handle parameters in a signature */
792 	if (R(token)->signature && !tokenIsType(token, R_EOF) && !tokenIsTypeVal(token, '\n'))
793 	{
794 		vString *signature = R (token)->signature;
795 
796 		if (tokenIsTypeVal (token, '('))
797 			R (token)->parenDepth++;
798 		else if (tokenIsTypeVal (token, ')'))
799 			R (token)->parenDepth--;
800 
801 		if (R (token)->kindIndexForParams != KIND_GHOST_INDEX
802 			&& R (token)->parenDepth == 1 && tokenIsType (token, R_SYMBOL)
803 			&& signatureExpectingParameter (signature))
804 			makeSimpleRTag (token, R (token)->scopeIndex, false,
805 							R (token)->kindIndexForParams, NULL);
806 
807 		if (vStringLast (signature) != '(' &&
808 			!tokenIsTypeVal (token, ',') &&
809 			!tokenIsTypeVal (token, ')'))
810 			vStringPut (signature, ' ');
811 		vStringCat (signature, token->string);
812 	}
813 }
814 
815 #define newRToken rNewToken
rNewToken(void)816 extern tokenInfo *rNewToken (void)
817 {
818 	return newToken (&rTokenInfoClass);
819 }
820 
821 #define tokenReadNoNewline rTokenReadNoNewline
rTokenReadNoNewline(tokenInfo * const token)822 extern void rTokenReadNoNewline (tokenInfo *const token)
823 {
824 	while (1)
825 	{
826 		tokenRead(token);
827 		if (!tokenIsTypeVal (token, '\n'))
828 			break;
829 	}
830 }
831 
setupCollectingSignature(tokenInfo * const token,vString * signature,int kindIndexForParams,int corkIndex)832 static void setupCollectingSignature (tokenInfo *const token,
833 									  vString   *signature,
834 									  int kindIndexForParams,
835 									  int corkIndex)
836 {
837 	R (token)->signature = signature;
838 	R (token)->kindIndexForParams = kindIndexForParams;
839 	R (token)->scopeIndex = corkIndex;
840 	R (token)->parenDepth = 1;
841 }
842 
rSetupCollectingSignature(tokenInfo * const token,vString * signature)843 extern void rSetupCollectingSignature (tokenInfo *const token,
844 									   vString   *signature)
845 {
846 	setupCollectingSignature (token, signature,
847 							  KIND_GHOST_INDEX, CORK_NIL);
848 }
849 
teardownCollectingSignature(tokenInfo * const token)850 static void teardownCollectingSignature (tokenInfo *const token)
851 {
852 	R (token)->parenDepth = 0;
853 	R (token)->scopeIndex = CORK_NIL;
854 	R (token)->kindIndexForParams = KIND_GHOST_INDEX;
855 	R (token)->signature = NULL;
856 }
857 
rTeardownCollectingSignature(tokenInfo * const token)858 extern void rTeardownCollectingSignature (tokenInfo *const token)
859 {
860 	teardownCollectingSignature (token);
861 }
862 
getKindForToken(tokenInfo * const token)863 static int getKindForToken (tokenInfo *const token)
864 {
865 	if (tokenIsKeyword (token, R_FUNCTION))
866 		return K_FUNCTION;
867 	else if (tokenIsKeyword (token, R_C))
868 		return K_VECTOR;
869 	else if (tokenIsKeyword (token, R_LIST))
870 		return K_LIST;
871 	else if (tokenIsKeyword (token, R_DATAFRAME))
872 		return K_DATAFRAME;
873 	return K_GLOBALVAR;
874 }
875 
findNonPlaceholder(int corkIndex,tagEntryInfo * entry,void * data)876 static bool findNonPlaceholder (int corkIndex, tagEntryInfo *entry, void *data)
877 {
878 	bool *any_non_placehoders = data;
879 	if (!entry->placeholder)
880 	{
881 		*any_non_placehoders = true;
882 		return false;
883 	}
884 	return true;
885 }
886 
parseRightSide(tokenInfo * const token,tokenInfo * const symbol,int parent)887 static void parseRightSide (tokenInfo *const token, tokenInfo *const symbol, int parent)
888 {
889 	R_TRACE_ENTER();
890 
891 	char *const assignment_operator = eStrdup (tokenString (token));
892 	vString *signature = NULL;
893 
894 	tokenReadNoNewline (token);
895 
896 	int kind = getKindForToken (token);
897 
898 	/* Call sub parsers */
899 	int corkIndex = notifyReadRightSideSymbol (symbol,
900 											   assignment_operator,
901 											   parent,
902 											   token);
903 	if (corkIndex == CORK_NIL)
904 	{
905 		/* No subparser handle the symbol */
906 		corkIndex = makeSimpleRTag (symbol, parent, kind == K_FUNCTION,
907 									kind,
908 									assignment_operator);
909 	}
910 
911 	if (kind == K_FUNCTION)
912 	{
913 		/* parse signature */
914 		tokenReadNoNewline (token);
915 		if (tokenIsTypeVal (token, '('))
916 		{
917 			if (corkIndex == CORK_NIL)
918 				tokenSkipOverPair (token);
919 			else
920 			{
921 				signature = vStringNewInit("(");
922 				setupCollectingSignature (token, signature, K_PARAM, corkIndex);
923 				tokenSkipOverPair (token);
924 				teardownCollectingSignature (token);
925 			}
926 			tokenReadNoNewline (token);
927 		}
928 		parent = (corkIndex == CORK_NIL
929 				  ? blackHoleIndex
930 				  : corkIndex);
931 	}
932 	else if (kind == K_VECTOR || kind == K_LIST || kind == K_DATAFRAME)
933 	{
934 		tokenRead (token);
935 		parsePair (token, corkIndex, NULL);
936 		tokenRead (token);
937 		parent = corkIndex;
938 	}
939 
940 	R_TRACE_TOKEN_TEXT("body", token, parent);
941 
942 	parseStatement (token, parent, false, false);
943 
944 	tagEntryInfo *tag = getEntryInCorkQueue (corkIndex);
945 	if (tag)
946 	{
947 		tag->extensionFields.endLine = token->lineNumber;
948 		if (signature)
949 		{
950 			tag->extensionFields.signature = vStringDeleteUnwrap(signature);
951 			signature = NULL;
952 		}
953 		/* If a vector has no named attribte and it has no lval,
954 		 * we don't make a tag for the vector. */
955 		if ((kind == K_VECTOR || kind == K_LIST || kind == K_DATAFRAME)
956 			&& *assignment_operator == '\0')
957 		{
958 			bool any_non_placehoders = false;
959 			foreachEntriesInScope (corkIndex, NULL,
960 								   findNonPlaceholder, &any_non_placehoders);
961 			if (!any_non_placehoders)
962 				tag->placeholder = 1;
963 		}
964 	}
965 
966 	vStringDelete (signature);	/* NULL is acceptable. */
967 	eFree (assignment_operator);
968 	R_TRACE_LEAVE();
969 }
970 
971 /* Parse arguments for library and source. */
preParseExternalEntitiy(tokenInfo * const token,tokenInfo * const funcall)972 static bool preParseExternalEntitiy (tokenInfo *const token, tokenInfo *const funcall)
973 {
974 	TRACE_ENTER();
975 
976 	bool r = true;
977 	tokenInfo *prefetch_token = newRToken ();
978 
979 	tokenReadNoNewline (prefetch_token);
980 	if (tokenIsType (prefetch_token, R_SYMBOL)
981 		|| tokenIsType (prefetch_token, R_STRING))
982 	{
983 		tokenInfo *const loaded_obj_token = newTokenByCopying (prefetch_token);
984 		tokenReadNoNewline (prefetch_token);
985 		if (tokenIsTypeVal (prefetch_token, ')')
986 			|| tokenIsTypeVal (prefetch_token, ','))
987 		{
988 			if (tokenIsTypeVal (prefetch_token, ')'))
989 				r = false;
990 
991 			makeSimpleRefTag (loaded_obj_token->string,
992 							  (tokenIsKeyword (funcall, R_LIBRARY)
993 							   ? K_LIBRARY
994 							   : K_SOURCE),
995 							  (tokenIsKeyword (funcall, R_LIBRARY)
996 							   ? (strcmp (tokenString(funcall), "library") == 0
997 								  ? R_LIBRARY_ATTACHED_BY_LIBRARY
998 								  : R_LIBRARY_ATTACHED_BY_REQUIRE)
999 							   : R_SOURCE_LOADED_BY_SOURCE));
1000 			tokenDelete (loaded_obj_token);
1001 		}
1002 		else if (tokenIsEOF (prefetch_token))
1003 		{
1004 			tokenCopy (token, prefetch_token);
1005 			tokenDelete (loaded_obj_token);
1006 			r = false;
1007 		}
1008 		else
1009 		{
1010 			tokenUnread (prefetch_token);
1011 			tokenUnread (loaded_obj_token);
1012 			tokenDelete (loaded_obj_token);
1013 		}
1014 	}
1015 	else if (tokenIsEOF (prefetch_token))
1016 	{
1017 		tokenCopy (token, prefetch_token);
1018 		r = false;
1019 	}
1020 	else
1021 		tokenUnread (prefetch_token);
1022 
1023 	tokenDelete (prefetch_token);
1024 
1025 	TRACE_LEAVE_TEXT(r
1026 					 ? "unread tokens and request parsing again to the upper context"
1027 					 : "parse all arguments");
1028 	return r;
1029 }
1030 
preParseLoopCounter(tokenInfo * const token,int parent)1031 static bool preParseLoopCounter(tokenInfo *const token, int parent)
1032 {
1033 	bool r = true;
1034 	TRACE_ENTER();
1035 
1036 	tokenReadNoNewline (token);
1037 	if (tokenIsType (token, R_SYMBOL))
1038 		makeSimpleRTag (token, parent, false, K_GLOBALVAR, NULL);
1039 
1040 	if (tokenIsEOF (token)
1041 		|| tokenIsTypeVal (token, ')'))
1042 		r = false;
1043 
1044 	TRACE_LEAVE_TEXT(r
1045 					 ? "unread tokens and request parsing again to the upper context"
1046 					 : "parse all arguments");
1047 	return r;
1048 }
1049 
1050 
1051 /* If funcall is non-NULL, this pair represents the argument list for the function
1052  * call for FUNCALL. */
parsePair(tokenInfo * const token,int parent,tokenInfo * const funcall)1053 static void parsePair (tokenInfo *const token, int parent, tokenInfo *const funcall)
1054 {
1055 	R_TRACE_ENTER();
1056 
1057 	bool in_continuous_pair = tokenIsTypeVal (token, '(')
1058 		|| tokenIsTypeVal (token, '[');
1059 	bool is_funcall = funcall && tokenIsTypeVal (token, '(');
1060 	bool done = false;
1061 
1062 	if (is_funcall)
1063 	{
1064 		if 	(tokenIsKeyword (funcall, R_LIBRARY) ||
1065 			 tokenIsKeyword (funcall, R_SOURCE))
1066 			done = !preParseExternalEntitiy (token, funcall);
1067 		else if (tokenIsKeyword (funcall, R_FOR))
1068 			done = !preParseLoopCounter (token, parent);
1069 		else if (notifyReadFuncall (funcall, token, parent) != CORK_NIL)
1070 			done = true;
1071 	}
1072 
1073 	if (done)
1074 	{
1075 		R_TRACE_LEAVE();
1076 		return;
1077 	}
1078 
1079 	do
1080 	{
1081 		tokenRead (token);
1082 		R_TRACE_TOKEN_TEXT("inside pair", token, parent);
1083 		parseStatement (token, parent, (funcall != NULL), in_continuous_pair);
1084 	}
1085 	while (! (tokenIsEOF (token)
1086 			  || tokenIsTypeVal (token, ')')
1087 			  || tokenIsTypeVal (token, '}')
1088 			  || tokenIsTypeVal (token, ']')));
1089 	R_TRACE_LEAVE();
1090 }
1091 
isAtConstructorInvocation(void)1092 static bool isAtConstructorInvocation (void)
1093 {
1094 	bool r = false;
1095 
1096 	tokenInfo *const token = newRToken ();
1097 	tokenRead (token);
1098 	if (tokenIsTypeVal (token, '('))
1099 		r = true;
1100 	tokenUnread (token);
1101 	tokenDelete (token);
1102 	return r;
1103 }
1104 
parseStatement(tokenInfo * const token,int parent,bool in_arglist,bool in_continuous_pair)1105 static bool parseStatement (tokenInfo *const token, int parent,
1106 							bool in_arglist, bool in_continuous_pair)
1107 {
1108 	R_TRACE_ENTER();
1109 	int last_count = rTokenInfoClass.read_counter;
1110 
1111 	do
1112 	{
1113 		if (tokenIsEOF (token))
1114 			break;
1115 		else if (tokenIsTypeVal (token, ';'))
1116 		{
1117 			R_TRACE_TOKEN_TEXT ("break with ;", token, parent);
1118 			break;
1119 		}
1120 		else if (tokenIsTypeVal (token, '\n'))
1121 		{
1122 			R_TRACE_TOKEN_TEXT ("break with \\n", token, parent);
1123 			break;
1124 		}
1125 		else if ((tokenIsKeyword (token, R_FUNCTION)
1126 				  || ((tokenIsKeyword (token, R_C)
1127 					   || tokenIsKeyword (token, R_LIST)
1128 					   || tokenIsKeyword (token, R_DATAFRAME))
1129 					  && isAtConstructorInvocation ())))
1130 		{
1131 			/* This statement doesn't start with a symbol.
1132 			 * This function is not assigned to any symbol. */
1133 			tokenInfo *const anonfunc = newTokenByCopying (token);
1134 			int kind = getKindForToken (token);
1135 			anonGenerate (anonfunc->string,
1136 						  kindExtraInfo [kind].anon_prefix, kind);
1137 			tokenUnread (token);
1138 			vStringClear (token->string);
1139 			parseRightSide (token, anonfunc, parent);
1140 			tokenDelete (anonfunc);
1141 		}
1142 		else if (tokenIsType (token, R_SYMBOL)
1143 				 || tokenIsType (token, R_STRING)
1144 				 || tokenIsType (token, R_KEYWORD))
1145 		{
1146 			tokenInfo *const symbol = newTokenByCopying (token);
1147 
1148 			if (in_continuous_pair)
1149 				tokenReadNoNewline (token);
1150 			else
1151 				tokenRead (token);
1152 
1153 			if (tokenIsType (token, R_LASSIGN))
1154 			{
1155 				/* Assignment */
1156 				parseRightSide (token, symbol, parent);
1157 				R_TRACE_TOKEN_TEXT ("break with right side", token, parent);
1158 				tokenDelete(symbol);
1159 				break;
1160 			}
1161 			else if (tokenIsTypeVal (token, '='))
1162 			{
1163 				/* Assignment */
1164 				if (in_arglist)
1165 				{
1166 					/* Ignore the left side symbol. */
1167 					tokenRead (token);
1168 					R_TRACE_TOKEN_TEXT("(in arg list) after = body", token, parent);
1169 				}
1170 				else
1171 				{
1172 					parseRightSide (token, symbol, parent);
1173 					R_TRACE_TOKEN_TEXT ("break with right side", token, parent);
1174 					tokenDelete(symbol);
1175 					break;
1176 				}
1177 			}
1178 			else if (tokenIsTypeVal (token, '('))
1179 			{
1180 				/* function call */
1181 				parsePair (token, parent, symbol);
1182 				tokenRead (token);
1183 				R_TRACE_TOKEN_TEXT("after arglist", token, parent);
1184 			}
1185 			else if (tokenIsTypeVal (token, '$')
1186 					 || tokenIsTypeVal (token, '@')
1187 					 || tokenIsType (token, R_SCOPE))
1188 			{
1189 				tokenReadNoNewline (token); /* Skip the next identifier */
1190 				tokenRead (token);
1191 				R_TRACE_TOKEN_TEXT("after $", token, parent);
1192 			}
1193 			else
1194 				R_TRACE_TOKEN_TEXT("else after symbol", token, parent);
1195 			tokenDelete(symbol);
1196 		}
1197 		else if (tokenIsType (token, R_RASSIGN))
1198 		{
1199 			char *const assignment_operator = eStrdup (tokenString (token));
1200 			tokenReadNoNewline (token);
1201 			if (tokenIsType (token, R_SYMBOL)
1202 				|| tokenIsType (token, R_STRING))
1203 			{
1204 				makeSimpleRTag (token, parent, false,
1205 								K_GLOBALVAR, assignment_operator);
1206 				tokenRead (token);
1207 			}
1208 			eFree (assignment_operator);
1209 			R_TRACE_TOKEN_TEXT("after ->", token, parent);
1210 		}
1211 		else if (tokenIsType (token, R_OPERATOR))
1212 		{
1213 			tokenReadNoNewline (token);
1214 			R_TRACE_TOKEN_TEXT("after operator", token, parent);
1215 		}
1216 		else if (tokenIsTypeVal (token, '(')
1217 				 || tokenIsTypeVal (token, '{')
1218 				 || tokenIsTypeVal (token, '['))
1219 		{
1220 			parsePair (token, parent, NULL);
1221 			tokenRead (token);
1222 			R_TRACE_TOKEN_TEXT("after pair", token, parent);
1223 		}
1224 		else if (tokenIsTypeVal (token, ')')
1225 				 || tokenIsTypeVal (token, '}')
1226 				 || tokenIsTypeVal (token, ']'))
1227 		{
1228 			R_TRACE_TOKEN_TEXT ("break with close", token, parent);
1229 			break;
1230 		}
1231 		else if (tokenIsTypeVal (token, '$')
1232 				 || tokenIsTypeVal (token, '@')
1233 				 || tokenIsType (token, R_SCOPE))
1234 		{
1235 			tokenReadNoNewline (token); /* Skip the next identifier */
1236 			tokenRead (token);
1237 			R_TRACE_TOKEN_TEXT("after $", token, parent);
1238 		}
1239 		else
1240 		{
1241 			tokenRead (token);
1242 			R_TRACE_TOKEN_TEXT("else", token, parent);
1243 		}
1244 	}
1245 	while (!tokenIsEOF (token));
1246 
1247 	R_TRACE_LEAVE();
1248 
1249 	return (last_count != rTokenInfoClass.read_counter);
1250 }
1251 
rParseStatement(tokenInfo * const token,int parentIndex,bool in_arglist)1252 extern bool rParseStatement (tokenInfo *const token, int parentIndex, bool in_arglist)
1253 {
1254 	pushLanguage (Lang_R);
1255 	bool r = parseStatement (token, parentIndex, in_arglist, true);
1256 	popLanguage ();
1257 	return r;
1258 }
1259 
notifyReadRightSideSymbol(tokenInfo * const symbol,const char * const assignmentOperator,int parent,tokenInfo * const token)1260 static  int notifyReadRightSideSymbol (tokenInfo *const symbol,
1261 									   const char *const assignmentOperator,
1262 									   int parent,
1263 									   tokenInfo *const token)
1264 {
1265 	subparser *sub;
1266 	int q = CORK_NIL;
1267 
1268 	foreachSubparser (sub, false)
1269 	{
1270 		rSubparser *rsub = (rSubparser *)sub;
1271 		if (rsub->readRightSideSymbol)
1272 		{
1273 			enterSubparser (sub);
1274 			q = rsub->readRightSideSymbol (rsub, symbol, assignmentOperator, parent, token);
1275 			leaveSubparser ();
1276 			if (q != CORK_NIL)
1277 				break;
1278 		}
1279 	}
1280 
1281 	return q;
1282 }
1283 
makeSimpleSubparserTag(int langType,tokenInfo * const token,int parent,bool in_func,int kindInR,const char * assignmentOperator)1284 static  int makeSimpleSubparserTag (int langType,
1285 									tokenInfo *const token, int parent,
1286 									bool in_func, int kindInR,
1287 									const char *assignmentOperator)
1288 {
1289 	int q = CORK_NIL;
1290 	subparser *sub = getLanguageSubparser (langType, false);
1291 	if (sub)
1292 	{
1293 		rSubparser *rsub = (rSubparser *)sub;
1294 		if (rsub->makeTagWithTranslation)
1295 		{
1296 			enterSubparser (sub);
1297 			q = rsub->makeTagWithTranslation (rsub,
1298 											  token, parent,
1299 											  in_func, kindInR,
1300 											  assignmentOperator);
1301 			leaveSubparser ();
1302 		}
1303 	}
1304 	return q;
1305 }
1306 
askSubparserTagAcceptancy(tagEntryInfo * pe)1307 static  bool askSubparserTagAcceptancy (tagEntryInfo *pe)
1308 {
1309 	bool q = false;
1310 	subparser *sub = getLanguageSubparser (pe->langType, false);
1311 	{
1312 		rSubparser *rsub = (rSubparser *)sub;
1313 		if (rsub->askTagAcceptancy)
1314 		{
1315 			enterSubparser (sub);
1316 			q = rsub->askTagAcceptancy (rsub, pe);
1317 			leaveSubparser ();
1318 		}
1319 	}
1320 	return q;
1321 }
1322 
askSubparserTagHasFunctionAlikeKind(tagEntryInfo * e)1323 static  bool askSubparserTagHasFunctionAlikeKind (tagEntryInfo *e)
1324 {
1325 	bool q = false;
1326 	pushLanguage (Lang_R);
1327 	subparser *sub = getLanguageSubparser (e->langType, false);
1328 	Assert (sub);
1329 	popLanguage ();
1330 	rSubparser *rsub = (rSubparser *)sub;
1331 	if (rsub->hasFunctionAlikeKind)
1332 	{
1333 		enterSubparser (sub);
1334 		q = rsub->hasFunctionAlikeKind (rsub, e);
1335 		leaveSubparser ();
1336 	}
1337 	return q;
1338 }
1339 
notifyReadFuncall(tokenInfo * const func,tokenInfo * const token,int parent)1340 static  int notifyReadFuncall (tokenInfo *const func,
1341 							   tokenInfo *const token,
1342 							   int parent)
1343 {
1344 	int q = CORK_NIL;
1345 	subparser *sub;
1346 	foreachSubparser (sub, false)
1347 	{
1348 		rSubparser *rsub = (rSubparser *)sub;
1349 		if (rsub->readFuncall)
1350 		{
1351 			enterSubparser (sub);
1352 			q = rsub->readFuncall (rsub, func, token, parent);
1353 			leaveSubparser ();
1354 			if (q != CORK_NIL)
1355 				break;
1356 		}
1357 	}
1358 	return q;
1359 }
1360 
findRTags(void)1361 static void findRTags (void)
1362 {
1363 	tokenInfo *const token = newRToken ();
1364 
1365 	blackHoleIndex = makePlaceholder ("**BLACK-HOLE/DON'T TAG ME**");
1366 	registerEntry (blackHoleIndex);
1367 
1368 	TRACE_PRINT ("install blackhole: %d", blackHoleIndex);
1369 
1370 	do
1371 	{
1372 		tokenRead(token);
1373 		R_TRACE_TOKEN(token, CORK_NIL);
1374 		parseStatement (token, CORK_NIL, false, false);
1375 	}
1376 	while (!tokenIsEOF (token));
1377 
1378 	TRACE_PRINT ("run blackhole", blackHoleIndex);
1379 	markAllEntriesInScopeAsPlaceholder (blackHoleIndex);
1380 
1381 	tokenDelete (token);
1382 }
1383 
initializeRParser(const langType language)1384 static void initializeRParser (const langType language)
1385 {
1386 	Lang_R = language;
1387 }
1388 
RParser(void)1389 extern parserDefinition *RParser (void)
1390 {
1391 	static const char *const extensions[] = { "r", "R", "s", "q", NULL };
1392 	parserDefinition *const def = parserNew ("R");
1393 	static selectLanguage selectors[] = { selectByArrowOfR,
1394 										  NULL };
1395 
1396 	def->extensions = extensions;
1397 	def->kindTable = RKinds;
1398 	def->kindCount = ARRAY_SIZE(RKinds);
1399 	def->fieldTable = RFields;
1400 	def->fieldCount = ARRAY_SIZE (RFields);
1401 	def->keywordTable = RKeywordTable;
1402 	def->keywordCount = ARRAY_SIZE(RKeywordTable);
1403 	def->useCork = CORK_QUEUE | CORK_SYMTAB;
1404 	def->parser = findRTags;
1405 	def->selectLanguage = selectors;
1406 	def->initialize = initializeRParser;
1407 
1408 	return def;
1409 }
1410 
rExtractNameFromString(vString * str)1411 extern vString *rExtractNameFromString (vString* str)
1412 {
1413 	int offset = 0;
1414 
1415 	if (vStringLength (str) == 0)
1416 		return NULL;
1417 
1418 	char b = vStringChar (str, 0);
1419 	if (b == '\'' || b == '"' || b == '`')
1420 		offset = 1;
1421 
1422 	if (offset && vStringLength (str) < 3)
1423 		return NULL;
1424 
1425 	vString *n = vStringNewInit (vStringValue (str) + offset);
1426 	if (vStringChar (n, vStringLength (n) - 1) == b)
1427 		vStringChop (n);
1428 
1429 	return n;
1430 }
1431 
1432 #ifdef DEBUG
tokenTypeStr(enum RTokenType e)1433 static const char *tokenTypeStr(enum RTokenType e)
1434 { /* Generated by misc/enumstr.sh with cmdline:
1435      parsers/r.c RTokenType tokenTypeStr TOKEN_R_ --use-lower-bits-as-is */
1436 	switch (e)
1437 	{
1438 		case            TOKEN_R_EOF: return "EOF";
1439 		case      TOKEN_R_UNDEFINED: return "UNDEFINED";
1440 		case        TOKEN_R_KEYWORD: return "KEYWORD";
1441 		case        TOKEN_R_NEWLINE: return "NEWLINE";
1442 		case         TOKEN_R_NUMBER: return "NUMBER";
1443 		case         TOKEN_R_SYMBOL: return "SYMBOL";
1444 		case         TOKEN_R_STRING: return "STRING";
1445 		case       TOKEN_R_OPERATOR: return "OPERATOR";
1446 		case           TOKEN_R_DOTS: return "DOTS";
1447 		case         TOKEN_R_DOTS_N: return "DOTS_N";
1448 		case        TOKEN_R_LASSIGN: return "LASSIGN";
1449 		case        TOKEN_R_RASSIGN: return "RASSIGN";
1450 		case          TOKEN_R_SCOPE: return "SCOPE";
1451 		default:                   break;
1452 	}
1453 	static char buf[3];
1454 	if (isprint (e))
1455 	{
1456 		buf[0] = e;
1457 		buf[1] = '\0';
1458 	}
1459 	else if (e == '\n')
1460 	{
1461 		buf[0] = '\\';
1462 		buf[1] = 'n';
1463 		buf[2] = '\0';
1464 	}
1465 	else
1466 	{
1467 		buf[0] = '\0';
1468 	}
1469 	return buf;
1470 }
1471 #endif
1472