xref: /Universal-ctags/parsers/cpreprocessor.c (revision d711b26b07c2fa826faf4de3347e540c202cb6b3)
1 /*
2 *   Copyright (c) 1996-2002, Darren Hiebert
3 *
4 *   This source code is released for free distribution under the terms of the
5 *   GNU General Public License version 2 or (at your option) any later version.
6 *
7 *   This module contains the high level input read functions (preprocessor
8 *   directives are handled within this level).
9 */
10 
11 /*
12 *   INCLUDE FILES
13 */
14 #include "general.h"  /* must always come first */
15 
16 #include <string.h>
17 
18 #include "debug.h"
19 #include "entry.h"
20 #include "htable.h"
21 #include "cpreprocessor.h"
22 #include "kind.h"
23 #include "options.h"
24 #include "read.h"
25 #include "vstring.h"
26 #include "param.h"
27 #include "parse.h"
28 #include "promise.h"
29 #include "xtag.h"
30 
31 #include "cxx/cxx_debug.h"
32 
33 /*
34 *   MACROS
35 */
36 #define stringMatch(s1,s2)		(strcmp (s1,s2) == 0)
37 #define isspacetab(c)			((c) == SPACE || (c) == TAB)
38 
39 /*
40 *   DATA DECLARATIONS
41 */
42 typedef enum { COMMENT_NONE, COMMENT_C, COMMENT_CPLUS, COMMENT_D } Comment;
43 
44 enum eCppLimits {
45 	MaxCppNestingLevel = 20,
46 	MaxDirectiveName = 10
47 };
48 
49 /* For tracking __ASSEMBLER__ area. */
50 enum eIfSubstate {
51 	IF_IF,
52 	IF_IFDEF,
53 	IF_IFNDEF,
54 	IF_ELSE,
55 	IF_ELIF,
56 	IF_ENDIF,
57 };
58 
59 struct asmAreaInfo {
60 	enum eIfSubstate ifSubstate;
61 	unsigned long line;
62 };
63 
64 /*  Defines the one nesting level of a preprocessor conditional.
65  */
66 typedef struct sConditionalInfo {
67 	bool ignoreAllBranches;  /* ignoring parent conditional branch */
68 	bool singleBranch;       /* choose only one branch */
69 	bool branchChosen;       /* branch already selected */
70 	bool ignoring;           /* current ignore state */
71 	int enterExternalParserBlockNestLevel;          /* the parser state when entering this conditional: used only by cxx */
72 
73 	/* tracking __ASSEMBLER__ area */
74 	struct asmAreaInfo asmArea;
75 } conditionalInfo;
76 
77 enum eState {
78 	DRCTV_NONE,    /* no known directive - ignore to end of line */
79 	DRCTV_DEFINE,  /* "#define" encountered */
80 	DRCTV_HASH,    /* initial '#' read; determine directive */
81 	DRCTV_IF,      /* "#if" or "#ifdef" encountered */
82 	DRCTV_ELIF,    /* "#elif" encountered */
83 	DRCTV_PRAGMA,  /* #pragma encountered */
84 	DRCTV_UNDEF,   /* "#undef" encountered */
85 	DRCTV_INCLUDE, /* "#include" encountered */
86 };
87 
88 /*  Defines the current state of the pre-processor.
89  */
90 typedef struct sCppState {
91 	langType lang;
92 	langType clientLang;
93 
94 	int * ungetBuffer;       /* memory buffer for unget characters */
95 	int ungetBufferSize;      /* the current unget buffer size */
96 	int * ungetPointer;      /* the current unget char: points in the middle of the buffer */
97 	int ungetDataSize;        /* the number of valid unget characters in the buffer */
98 
99 	/* the contents of the last SYMBOL_CHAR or SYMBOL_STRING */
100 	vString * charOrStringContents;
101 
102 	bool resolveRequired;     /* must resolve if/else/elif/endif branch */
103 	bool hasAtLiteralStrings; /* supports @"c:\" strings */
104 	bool hasCxxRawLiteralStrings; /* supports R"xxx(...)xxx" strings */
105 	bool hasSingleQuoteLiteralNumbers; /* supports vera number literals:
106 						 'h..., 'o..., 'd..., and 'b... */
107 
108 	bool useClientLangDefineMacroKindIndex;
109 	int defineMacroKindIndex;
110 	int macroUndefRoleIndex;
111 	int macroConditionRoleIndex;
112 
113 	bool useClientLangMacroParamKindIndex;
114 	int macroParamKindIndex;
115 
116 	bool useClientLangHeaderKindIndex;
117 	int headerKindIndex;
118 	int headerSystemRoleIndex;
119 	int headerLocalRoleIndex;
120 
121 	int macrodefFieldIndex;
122 
123 	struct sDirective {
124 		enum eState state;       /* current directive being processed */
125 		enum eIfSubstate ifsubstate; /* For tracking __ASSEMBLER__.
126 									  * assigned only when state == DICTV_IF */
127 		bool	accept;          /* is a directive syntactically permitted? */
128 		vString * name;          /* macro name */
129 		unsigned int nestLevel;  /* level 0 is not used */
130 		conditionalInfo ifdef [MaxCppNestingLevel];
131 	} directive;
132 
133 	cppMacroInfo * macroInUse;
134 	hashTable * fileMacroTable;
135 
136 } cppState;
137 
138 
139 typedef enum {
140 	CPREPRO_MACRO_KIND_UNDEF_ROLE,
141 	CPREPRO_MACRO_KIND_CONDITION_ROLE,
142 } cPreProMacroRole;
143 
144 static roleDefinition CPREPROMacroRoles [] = {
145 	RoleTemplateUndef,
146 	RoleTemplateCondition,
147 };
148 
149 
150 typedef enum {
151 	CPREPRO_HEADER_KIND_SYSTEM_ROLE,
152 	CPREPRO_HEADER_KIND_LOCAL_ROLE,
153 } cPreProHeaderRole;
154 
155 static roleDefinition CPREPROHeaderRoles [] = {
156 	RoleTemplateSystem,
157 	RoleTemplateLocal,
158 };
159 
160 
161 typedef enum {
162 	CPREPRO_MACRO, CPREPRO_HEADER, CPREPRO_PARAM,
163 } cPreProkind;
164 
165 static kindDefinition CPreProKinds [] = {
166 	{ true,  'd', "macro",      "macro definitions",
167 	  .referenceOnly = false, ATTACH_ROLES(CPREPROMacroRoles)},
168 	{ true, 'h', "header",     "included header files",
169 	  .referenceOnly = true, ATTACH_ROLES(CPREPROHeaderRoles)},
170 	{ false, 'D', "parameter", "macro parameters", },
171 };
172 
173 typedef enum {
174 	F_MACRODEF,
175 	COUNT_FIELD
176 } cPreProField;
177 
178 static fieldDefinition CPreProFields[COUNT_FIELD] = {
179 	{ .name = "macrodef",
180 	  .description = "macro definition",
181 	  .enabled = false },
182 };
183 
184 /*
185 *   DATA DEFINITIONS
186 */
187 
188 static bool doesExaminCodeWithInIf0Branch;
189 static bool doesExpandMacros;
190 
191 /*
192 * CXX parser state. This is stored at the beginning of a conditional.
193 * If at the exit of the conditional the state is changed then we assume
194 * that no further branches should be followed.
195 */
196 static int externalParserBlockNestLevel;
197 
198 
199 /*  Use brace formatting to detect end of block.
200  */
201 static bool BraceFormat = false;
202 
cppPushExternalParserBlock(void)203 void cppPushExternalParserBlock(void)
204 {
205 	externalParserBlockNestLevel++;
206 }
207 
cppPopExternalParserBlock(void)208 void cppPopExternalParserBlock(void)
209 {
210 	externalParserBlockNestLevel--;
211 }
212 
213 
214 static cppState Cpp = {
215 	.lang = LANG_IGNORE,
216 	.clientLang = LANG_IGNORE,
217 	.ungetBuffer = NULL,
218 	.ungetBufferSize = 0,
219 	.ungetPointer = NULL,
220 	.ungetDataSize = 0,
221 	.charOrStringContents = NULL,
222 	.resolveRequired = false,
223 	.hasAtLiteralStrings = false,
224 	.hasCxxRawLiteralStrings = false,
225 	.hasSingleQuoteLiteralNumbers = false,
226 	.useClientLangDefineMacroKindIndex = false,
227 	.defineMacroKindIndex = CPREPRO_MACRO,
228 	.macroUndefRoleIndex = CPREPRO_MACRO_KIND_UNDEF_ROLE,
229 	.macroConditionRoleIndex = CPREPRO_MACRO_KIND_CONDITION_ROLE,
230 	.useClientLangMacroParamKindIndex = false,
231 	.macroParamKindIndex = CPREPRO_PARAM,
232 	.useClientLangHeaderKindIndex = false,
233 	.headerKindIndex = CPREPRO_HEADER,
234 	.headerSystemRoleIndex = CPREPRO_HEADER_KIND_SYSTEM_ROLE,
235 	.headerLocalRoleIndex = CPREPRO_HEADER_KIND_LOCAL_ROLE,
236 	.macrodefFieldIndex = FIELD_UNKNOWN,
237 	.directive = {
238 		.state = DRCTV_NONE,
239 		.accept = false,
240 		.name = NULL,
241 		.nestLevel = 0,
242 		.ifdef = {
243 			{
244 				.ignoreAllBranches = false,
245 				.singleBranch = false,
246 				.branchChosen = false,
247 				.ignoring = false,
248 			}
249 		}
250 	}  /* directive */
251 };
252 
253 /*
254 *   FUNCTION DECLARATIONS
255 */
256 
257 static hashTable *makeMacroTable (void);
258 static cppMacroInfo * saveMacro(hashTable *table, const char * macro);
259 
260 /*
261 *   FUNCTION DEFINITIONS
262 */
263 
cppIsBraceFormat(void)264 extern bool cppIsBraceFormat (void)
265 {
266 	return BraceFormat;
267 }
268 
cppGetDirectiveNestLevel(void)269 extern unsigned int cppGetDirectiveNestLevel (void)
270 {
271 	return Cpp.directive.nestLevel;
272 }
273 
cppInitCommon(langType clientLang,const bool state,const bool hasAtLiteralStrings,const bool hasCxxRawLiteralStrings,const bool hasSingleQuoteLiteralNumbers,int defineMacroKindIndex,int macroUndefRoleIndex,int macroConditionRoleIndex,int macroParamKindIndex,int headerKindIndex,int headerSystemRoleIndex,int headerLocalRoleIndex,int macrodefFieldIndex)274 static void cppInitCommon(langType clientLang,
275 		     const bool state, const bool hasAtLiteralStrings,
276 		     const bool hasCxxRawLiteralStrings,
277 		     const bool hasSingleQuoteLiteralNumbers,
278 		     int defineMacroKindIndex,
279 		     int macroUndefRoleIndex,
280 		     int macroConditionRoleIndex,
281 		     int macroParamKindIndex,
282 		     int headerKindIndex,
283 		     int headerSystemRoleIndex, int headerLocalRoleIndex,
284 		     int macrodefFieldIndex)
285 {
286 	BraceFormat = state;
287 
288 	CXX_DEBUG_PRINT("cppInit: brace format is %d",BraceFormat);
289 
290 	externalParserBlockNestLevel = 0;
291 
292 	if (Cpp.lang == LANG_IGNORE)
293 	{
294 		langType t;
295 
296 		t = getNamedLanguage ("CPreProcessor", 0);
297 		initializeParser (t);
298 	}
299 
300 	Cpp.clientLang = clientLang;
301 	Cpp.ungetBuffer = NULL;
302 	Cpp.ungetPointer = NULL;
303 
304 	CXX_DEBUG_ASSERT(!Cpp.charOrStringContents,"This string should be null when CPP is not initialized");
305 	Cpp.charOrStringContents = vStringNew();
306 
307 	Cpp.resolveRequired = false;
308 	Cpp.hasAtLiteralStrings = hasAtLiteralStrings;
309 	Cpp.hasCxxRawLiteralStrings = hasCxxRawLiteralStrings;
310 	Cpp.hasSingleQuoteLiteralNumbers = hasSingleQuoteLiteralNumbers;
311 
312 	if (defineMacroKindIndex != KIND_GHOST_INDEX)
313 	{
314 		Cpp.defineMacroKindIndex = defineMacroKindIndex;
315 		Cpp.useClientLangDefineMacroKindIndex = true;
316 
317 		Cpp.macroUndefRoleIndex = macroUndefRoleIndex;
318 		Cpp.macroConditionRoleIndex = macroConditionRoleIndex;
319 		Cpp.macrodefFieldIndex = macrodefFieldIndex;
320 	}
321 	else
322 	{
323 		Cpp.defineMacroKindIndex = CPREPRO_MACRO;
324 		Cpp.useClientLangDefineMacroKindIndex = false;
325 
326 		Cpp.macroUndefRoleIndex = CPREPRO_MACRO_KIND_UNDEF_ROLE;
327 		Cpp.macroConditionRoleIndex = CPREPRO_MACRO_KIND_CONDITION_ROLE;
328 		Cpp.macrodefFieldIndex = CPreProFields [F_MACRODEF].ftype;
329 	}
330 
331 	if (macroParamKindIndex != KIND_GHOST_INDEX)
332 	{
333 		Cpp.macroParamKindIndex = macroParamKindIndex;
334 		Cpp.useClientLangMacroParamKindIndex = true;
335 	}
336 	else
337 	{
338 		Cpp.macroParamKindIndex = CPREPRO_PARAM;
339 		Cpp.useClientLangMacroParamKindIndex = false;
340 	}
341 
342 	if (headerKindIndex != KIND_GHOST_INDEX)
343 	{
344 		Cpp.headerKindIndex = headerKindIndex;
345 		Cpp.useClientLangHeaderKindIndex = true;
346 
347 		Cpp.headerSystemRoleIndex = headerSystemRoleIndex;
348 		Cpp.headerLocalRoleIndex =  headerLocalRoleIndex;
349 	}
350 	else
351 	{
352 		Cpp.headerKindIndex = CPREPRO_HEADER;
353 		Cpp.useClientLangHeaderKindIndex = false;
354 
355 		Cpp.headerSystemRoleIndex = CPREPRO_HEADER_KIND_SYSTEM_ROLE;
356 		Cpp.headerLocalRoleIndex = CPREPRO_HEADER_KIND_LOCAL_ROLE;
357 	}
358 
359 	Cpp.directive.state     = DRCTV_NONE;
360 	Cpp.directive.accept    = true;
361 	Cpp.directive.nestLevel = 0;
362 
363 	Cpp.directive.ifdef [0].ignoreAllBranches = false;
364 	Cpp.directive.ifdef [0].singleBranch = false;
365 	Cpp.directive.ifdef [0].branchChosen = false;
366 	Cpp.directive.ifdef [0].ignoring     = false;
367 
368 	Cpp.directive.name = vStringNewOrClear (Cpp.directive.name);
369 
370 	Cpp.macroInUse = NULL;
371 	Cpp.fileMacroTable =
372 		(doesExpandMacros
373 		 && isFieldEnabled (FIELD_SIGNATURE)
374 		 && isFieldEnabled (Cpp.macrodefFieldIndex)
375 		 && (getLanguageCorkUsage ((clientLang == LANG_IGNORE)
376 								   ? Cpp.lang
377 								   : clientLang) & CORK_SYMTAB))
378 		? makeMacroTable ()
379 		: NULL;
380 }
381 
cppInit(const bool state,const bool hasAtLiteralStrings,const bool hasCxxRawLiteralStrings,const bool hasSingleQuoteLiteralNumbers,int defineMacroKindIndex,int macroUndefRoleIndex,int macroConditionRoleIndex,int macroParamKindIndex,int headerKindIndex,int headerSystemRoleIndex,int headerLocalRoleIndex,int macrodefFieldIndex)382 extern void cppInit (const bool state, const bool hasAtLiteralStrings,
383 		     const bool hasCxxRawLiteralStrings,
384 		     const bool hasSingleQuoteLiteralNumbers,
385 		     int defineMacroKindIndex,
386 		     int macroUndefRoleIndex,
387 		     int macroConditionRoleIndex,
388 		     int macroParamKindIndex,
389 		     int headerKindIndex,
390 		     int headerSystemRoleIndex, int headerLocalRoleIndex,
391 		     int macrodefFieldIndex)
392 {
393 	langType client = getInputLanguage ();
394 
395 	cppInitCommon (client, state, hasAtLiteralStrings,
396 				   hasCxxRawLiteralStrings, hasSingleQuoteLiteralNumbers,
397 				   defineMacroKindIndex, macroUndefRoleIndex, macroConditionRoleIndex,
398 				   macroParamKindIndex,
399 				   headerKindIndex, headerSystemRoleIndex, headerLocalRoleIndex,
400 				   macrodefFieldIndex);
401 }
402 
cppClearMacroInUse(cppMacroInfo ** pM)403 static void cppClearMacroInUse (cppMacroInfo **pM)
404 {
405 	for (cppMacroInfo *p = *pM; p; p = p->next)
406 	{
407 		CXX_DEBUG_PRINT("Macro <%p> clear useCount: %d -> 0", p, p->useCount);
408 		p->useCount = 0;
409 	}
410 	*pM = NULL;
411 }
412 
cppTerminate(void)413 extern void cppTerminate (void)
414 {
415 	if (Cpp.directive.name != NULL)
416 	{
417 		vStringDelete (Cpp.directive.name);
418 		Cpp.directive.name = NULL;
419 	}
420 
421 	if(Cpp.ungetBuffer)
422 	{
423 		eFree(Cpp.ungetBuffer);
424 		Cpp.ungetBuffer = NULL;
425 	}
426 
427 	if(Cpp.charOrStringContents)
428 	{
429 		vStringDelete(Cpp.charOrStringContents);
430 		Cpp.charOrStringContents = NULL;
431 	}
432 
433 	Cpp.clientLang = LANG_IGNORE;
434 
435 	cppClearMacroInUse (&Cpp.macroInUse);
436 
437 	if (Cpp.fileMacroTable)
438 	{
439 		hashTableDelete (Cpp.fileMacroTable);
440 		Cpp.fileMacroTable = NULL;
441 	}
442 }
443 
cppBeginStatement(void)444 extern void cppBeginStatement (void)
445 {
446 	Cpp.resolveRequired = true;
447 }
448 
cppEndStatement(void)449 extern void cppEndStatement (void)
450 {
451 	Cpp.resolveRequired = false;
452 }
453 
454 /*
455 *   Scanning functions
456 *
457 *   This section handles preprocessor directives.  It strips out all
458 *   directives and may emit a tag for #define directives.
459 */
460 
461 /*  This puts a character back into the input queue for the input File. */
cppUngetc(const int c)462 extern void cppUngetc (const int c)
463 {
464 	if(!Cpp.ungetPointer)
465 	{
466 		// no unget data
467 		if(!Cpp.ungetBuffer)
468 		{
469 			Cpp.ungetBuffer = (int *)eMalloc(8 * sizeof(int));
470 			Cpp.ungetBufferSize = 8;
471 		}
472 		Assert(Cpp.ungetBufferSize > 0);
473 		Cpp.ungetPointer = Cpp.ungetBuffer + Cpp.ungetBufferSize - 1;
474 		*(Cpp.ungetPointer) = c;
475 		Cpp.ungetDataSize = 1;
476 		return;
477 	}
478 
479 	// Already have some unget data in the buffer. Must prepend.
480 	Assert(Cpp.ungetBuffer);
481 	Assert(Cpp.ungetBufferSize > 0);
482 	Assert(Cpp.ungetDataSize > 0);
483 	Assert(Cpp.ungetPointer >= Cpp.ungetBuffer);
484 
485 	if(Cpp.ungetPointer == Cpp.ungetBuffer)
486 	{
487 		Cpp.ungetBufferSize += 8;
488 		int * tmp = (int *)eMalloc(Cpp.ungetBufferSize * sizeof(int));
489 		memcpy(tmp+8,Cpp.ungetPointer,Cpp.ungetDataSize * sizeof(int));
490 		eFree(Cpp.ungetBuffer);
491 		Cpp.ungetBuffer = tmp;
492 		Cpp.ungetPointer = tmp + 7;
493 	} else {
494 		Cpp.ungetPointer--;
495 	}
496 
497 	*(Cpp.ungetPointer) = c;
498 	Cpp.ungetDataSize++;
499 }
500 
cppUngetBufferSize()501 int cppUngetBufferSize()
502 {
503 	return Cpp.ungetBufferSize;
504 }
505 
506 /*  This puts an entire string back into the input queue for the input File. */
cppUngetString(const char * string,int len)507 void cppUngetString(const char * string,int len)
508 {
509 	if(!string)
510 		return;
511 	if(len < 1)
512 		return;
513 
514 	if(!Cpp.ungetPointer)
515 	{
516 		// no unget data
517 		if(!Cpp.ungetBuffer)
518 		{
519 			Cpp.ungetBufferSize = 8 + len;
520 			Cpp.ungetBuffer = (int *)eMalloc(Cpp.ungetBufferSize * sizeof(int));
521 		} else if(Cpp.ungetBufferSize < len)
522 		{
523 			Cpp.ungetBufferSize = 8 + len;
524 			Cpp.ungetBuffer = (int *)eRealloc(Cpp.ungetBuffer,Cpp.ungetBufferSize * sizeof(int));
525 		}
526 		Cpp.ungetPointer = Cpp.ungetBuffer + Cpp.ungetBufferSize - len;
527 	} else {
528 		// Already have some unget data in the buffer. Must prepend.
529 		Assert(Cpp.ungetBuffer);
530 		Assert(Cpp.ungetBufferSize > 0);
531 		Assert(Cpp.ungetDataSize > 0);
532 		Assert(Cpp.ungetPointer >= Cpp.ungetBuffer);
533 
534 		if(Cpp.ungetBufferSize < (Cpp.ungetDataSize + len))
535 		{
536 			Cpp.ungetBufferSize = 8 + len + Cpp.ungetDataSize;
537 			int * tmp = (int *)eMalloc(Cpp.ungetBufferSize * sizeof(int));
538 			memcpy(tmp + 8 + len,Cpp.ungetPointer,Cpp.ungetDataSize * sizeof(int));
539 			eFree(Cpp.ungetBuffer);
540 			Cpp.ungetBuffer = tmp;
541 			Cpp.ungetPointer = tmp + 8;
542 		} else {
543 			Cpp.ungetPointer -= len;
544 			Assert(Cpp.ungetPointer >= Cpp.ungetBuffer);
545 		}
546 	}
547 
548 	int * p = Cpp.ungetPointer;
549 	const char * s = string;
550 	const char * e = string + len;
551 
552 	while(s < e)
553 		*p++ = *s++;
554 
555 	Cpp.ungetDataSize += len;
556 }
557 
cppUngetStringBuiltByMacro(const char * string,int len,cppMacroInfo * macro)558 extern void cppUngetStringBuiltByMacro(const char * string,int len, cppMacroInfo *macro)
559 {
560 	if (macro->useCount == 0)
561 	{
562 		cppMacroInfo *m = Cpp.macroInUse;
563 		Cpp.macroInUse = macro;
564 		macro->next = m;
565 	}
566 	macro->useCount++;
567 
568 	CXX_DEBUG_PRINT("Macro <%p> increment useCount: %d->%d", macro,
569 					(macro->useCount - 1), macro->useCount);
570 
571 	cppUngetString (string, len);
572 }
573 
cppGetcFromUngetBufferOrFile(void)574 static int cppGetcFromUngetBufferOrFile(void)
575 {
576 	if(Cpp.ungetPointer)
577 	{
578 		Assert(Cpp.ungetBuffer);
579 		Assert(Cpp.ungetBufferSize > 0);
580 		Assert(Cpp.ungetDataSize > 0);
581 
582 		int c = *(Cpp.ungetPointer);
583 		Cpp.ungetDataSize--;
584 		if(Cpp.ungetDataSize > 0)
585 			Cpp.ungetPointer++;
586 		else
587 			Cpp.ungetPointer = NULL;
588 		return c;
589 	}
590 
591 	if (Cpp.macroInUse)
592 		cppClearMacroInUse (&Cpp.macroInUse);
593 	return getcFromInputFile();
594 }
595 
596 
597 /*  Reads a directive, whose first character is given by "c", into "name".
598  */
readDirective(int c,char * const name,unsigned int maxLength)599 static bool readDirective (int c, char *const name, unsigned int maxLength)
600 {
601 	unsigned int i;
602 
603 	for (i = 0  ;  i < maxLength - 1  ;  ++i)
604 	{
605 		if (i > 0)
606 		{
607 			c = cppGetcFromUngetBufferOrFile ();
608 			if (c == EOF  ||  ! isalpha (c))
609 			{
610 				cppUngetc (c);
611 				break;
612 			}
613 		}
614 		name [i] = c;
615 	}
616 	name [i] = '\0';  /* null terminate */
617 
618 	return (bool) isspacetab (c);
619 }
620 
621 /*  Reads an identifier, whose first character is given by "c", into "tag",
622  *  together with the file location and corresponding line number.
623  */
readIdentifier(int c,vString * const name)624 static void readIdentifier (int c, vString *const name)
625 {
626 	vStringClear (name);
627 	do
628 	{
629 		vStringPut (name, c);
630 		c = cppGetcFromUngetBufferOrFile ();
631 	} while (c != EOF  && cppIsident (c));
632 	cppUngetc (c);
633 }
634 
readFilename(int c,vString * const name)635 static void readFilename (int c, vString *const name)
636 {
637 	int c_end = (c == '<')? '>': '"';
638 
639 	vStringClear (name);
640 
641 	while (c = cppGetcFromUngetBufferOrFile (), (c != EOF && c != c_end && c != '\n'))
642 		vStringPut (name, c);
643 }
644 
currentConditional(void)645 static conditionalInfo *currentConditional (void)
646 {
647 	return &Cpp.directive.ifdef [Cpp.directive.nestLevel];
648 }
649 
isIgnore(void)650 static bool isIgnore (void)
651 {
652 	return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring;
653 }
654 
setIgnore(const bool ignore)655 static bool setIgnore (const bool ignore)
656 {
657 	return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring = ignore;
658 }
659 
isIgnoreBranch(void)660 static bool isIgnoreBranch (void)
661 {
662 	conditionalInfo *const ifdef = currentConditional ();
663 
664 	/*  Force a single branch if an incomplete statement is discovered
665 	 *  en route. This may have allowed earlier branches containing complete
666 	 *  statements to be followed, but we must follow no further branches.
667 	 */
668 
669 	/*
670 	* CXX: Force a single branch if the external parser (cxx) block nest level at the beginning
671 	* of this conditional is not equal to the current block nest level (at exit of the first branch).
672 	*
673 	* Follow both branches example: (same state at enter and exit)
674 	*
675 	* #if something
676 	*     xxxxx;
677 	* #else
678 	*     yyyy;
679 	* #endif
680 	*
681 	* Follow single branch example: (different block level at enter and exit)
682 	*
683 	*    if {
684 	* #if something
685     *    } else x;
686 	* #else
687 	*    }
688 	* #endif
689 	*/
690 
691 	if (
692 			(Cpp.resolveRequired || (ifdef->enterExternalParserBlockNestLevel != externalParserBlockNestLevel)) &&
693 			(!BraceFormat)
694 		)
695 	{
696 		CXX_DEBUG_PRINT("Choosing single branch");
697 		ifdef->singleBranch = true;
698 	}
699 
700 	/*  We will ignore this branch in the following cases:
701 	 *
702 	 *  1.  We are ignoring all branches (conditional was within an ignored
703 	 *        branch of the parent conditional)
704 	 *  2.  A branch has already been chosen and either of:
705 	 *      a.  A statement was incomplete upon entering the conditional
706 	 *      b.  A statement is incomplete upon encountering a branch
707 	 */
708 	return (bool) (ifdef->ignoreAllBranches ||
709 					 (ifdef->branchChosen  &&  ifdef->singleBranch));
710 }
711 
chooseBranch(void)712 static void chooseBranch (void)
713 {
714 	if (! BraceFormat)
715 	{
716 		conditionalInfo *const ifdef = currentConditional ();
717 
718 		ifdef->branchChosen = (bool) (ifdef->singleBranch ||
719 										Cpp.resolveRequired);
720 	}
721 }
722 
723 /*  Pushes one nesting level for an #if directive, indicating whether or not
724  *  the branch should be ignored and whether a branch has already been chosen.
725  */
pushConditional(const bool firstBranchChosen)726 static bool pushConditional (const bool firstBranchChosen)
727 {
728 	const bool ignoreAllBranches = isIgnore ();  /* current ignore */
729 	bool ignoreBranch = false;
730 
731 	if (Cpp.directive.nestLevel < (unsigned int) MaxCppNestingLevel - 1)
732 	{
733 		conditionalInfo *ifdef;
734 
735 		++Cpp.directive.nestLevel;
736 		ifdef = currentConditional ();
737 
738 		/*  We take a snapshot of whether there is an incomplete statement in
739 		 *  progress upon encountering the preprocessor conditional. If so,
740 		 *  then we will flag that only a single branch of the conditional
741 		 *  should be followed.
742 		 */
743 		ifdef->ignoreAllBranches = ignoreAllBranches;
744 		ifdef->singleBranch      = Cpp.resolveRequired;
745 		ifdef->branchChosen      = firstBranchChosen;
746 		ifdef->ignoring = (bool) (ignoreAllBranches || (
747 				! firstBranchChosen  &&  ! BraceFormat  &&
748 				(ifdef->singleBranch || !doesExaminCodeWithInIf0Branch)));
749 		ifdef->enterExternalParserBlockNestLevel = externalParserBlockNestLevel;
750 		ifdef->asmArea.line = 0;
751 		ignoreBranch = ifdef->ignoring;
752 	}
753 	return ignoreBranch;
754 }
755 
756 /*  Pops one nesting level for an #endif directive.
757  */
popConditional(void)758 static bool popConditional (void)
759 {
760 	if (Cpp.directive.nestLevel > 0)
761 		--Cpp.directive.nestLevel;
762 
763 	return isIgnore ();
764 }
765 
doesCPreProRunAsStandaloneParser(int kind)766 static bool doesCPreProRunAsStandaloneParser (int kind)
767 {
768 	if (kind == CPREPRO_HEADER)
769 		return !Cpp.useClientLangDefineMacroKindIndex;
770 	else if (kind == CPREPRO_MACRO)
771 		return !Cpp.useClientLangHeaderKindIndex;
772 	else if (kind == CPREPRO_PARAM)
773 		return !Cpp.useClientLangMacroParamKindIndex;
774 	else
775 	{
776 		AssertNotReached();
777 		return true;
778 	}
779 }
780 
makeDefineTag(const char * const name,const char * const signature,bool undef)781 static int makeDefineTag (const char *const name, const char* const signature, bool undef)
782 {
783 	bool standing_alone = doesCPreProRunAsStandaloneParser(CPREPRO_MACRO);
784 	langType lang = standing_alone ? Cpp.lang: Cpp.clientLang;
785 	const bool isFileScope = (bool) (! isInputHeaderFile ());
786 
787 	if (!isLanguageEnabled (lang))
788 			return CORK_NIL;
789 
790 	Assert (Cpp.defineMacroKindIndex != KIND_GHOST_INDEX);
791 
792 	if (isFileScope && !isXtagEnabled(XTAG_FILE_SCOPE))
793 		return CORK_NIL;
794 
795 	if (undef && (Cpp.macroUndefRoleIndex == ROLE_DEFINITION_INDEX))
796 		return CORK_NIL;
797 
798 	if (! isLanguageKindEnabled (lang,
799 								 Cpp.defineMacroKindIndex))
800 		return CORK_NIL;
801 
802 	if (
803 		/* condition for definition tag */
804 		(!undef)
805 		|| /* condition for reference tag */
806 		(undef && isXtagEnabled(XTAG_REFERENCE_TAGS) &&
807 		 isLanguageRoleEnabled(lang, Cpp.defineMacroKindIndex,
808 							   Cpp.macroUndefRoleIndex)))
809 	{
810 		tagEntryInfo e;
811 		int r;
812 
813 		if (standing_alone)
814 			pushLanguage (Cpp.lang);
815 
816 		if (undef)
817 			initRefTagEntry (&e, name, Cpp.defineMacroKindIndex,
818 							 Cpp.macroUndefRoleIndex);
819 		else
820 			initTagEntry (&e, name, Cpp.defineMacroKindIndex);
821 		e.isFileScope  = isFileScope;
822 		if (isFileScope)
823 			markTagExtraBit (&e, XTAG_FILE_SCOPE);
824 		e.truncateLineAfterTag = true;
825 		e.extensionFields.signature = signature;
826 
827 		r = makeTagEntry (&e);
828 
829 		if (standing_alone)
830 			popLanguage ();
831 
832 		return r;
833 	}
834 	return CORK_NIL;
835 }
836 
makeIncludeTag(const char * const name,bool systemHeader)837 static void makeIncludeTag (const  char *const name, bool systemHeader)
838 {
839 	bool standing_alone = doesCPreProRunAsStandaloneParser(CPREPRO_HEADER);
840 	langType lang = standing_alone ? Cpp.lang: Cpp.clientLang;
841 	tagEntryInfo e;
842 	int role_index;
843 
844 	if (!isLanguageEnabled (lang))
845 		return;
846 
847 	Assert (Cpp.headerKindIndex != KIND_GHOST_INDEX);
848 
849 	role_index = systemHeader? Cpp.headerSystemRoleIndex: Cpp.headerLocalRoleIndex;
850 	if (role_index == ROLE_DEFINITION_INDEX)
851 		return;
852 
853 	if (!isXtagEnabled (XTAG_REFERENCE_TAGS))
854 		return;
855 
856 	if (!isLanguageKindEnabled(lang, Cpp.headerKindIndex))
857 		return;
858 
859 	if (isLanguageRoleEnabled(lang, Cpp.headerKindIndex, role_index))
860 	{
861 		if (standing_alone)
862 			pushLanguage (Cpp.lang);
863 
864 		initRefTagEntry (&e, name, Cpp.headerKindIndex, role_index);
865 		e.isFileScope  = false;
866 		e.truncateLineAfterTag = true;
867 		makeTagEntry (&e);
868 
869 		if (standing_alone)
870 			popLanguage ();
871 	}
872 }
873 
makeParamTag(vString * name,short nth,bool placeholder)874 static void makeParamTag (vString *name, short nth, bool placeholder)
875 {
876 	bool standing_alone = doesCPreProRunAsStandaloneParser(CPREPRO_MACRO);
877 
878 	Assert (Cpp.macroParamKindIndex != KIND_GHOST_INDEX);
879 
880 	if (standing_alone)
881 		pushLanguage (Cpp.lang);
882 	int r = makeSimpleTag (name, Cpp.macroParamKindIndex);
883 	if (standing_alone)
884 		popLanguage ();
885 
886 	tagEntryInfo *e = getEntryInCorkQueue (r);
887 	if (e)
888 	{
889 		e->extensionFields.nth = nth;
890 		if (placeholder)
891 			e->placeholder = 1;
892 	}
893 }
894 
regenreateSignatureFromParameters(vString * buffer,int from,int to)895 static void regenreateSignatureFromParameters (vString * buffer, int from, int to)
896 {
897 	vStringPut(buffer, '(');
898 	for (int pindex = from; pindex < to; pindex++)
899 	{
900 		tagEntryInfo *e = getEntryInCorkQueue (pindex);
901 		if (e && !isTagExtra (e))
902 		{
903 			vStringCatS (buffer, e->name);
904 			vStringPut (buffer, ',');
905 		}
906 	}
907 	if (vStringLast (buffer) == ',')
908 		vStringChop (buffer);
909 	vStringPut (buffer, ')');
910 }
911 
patchScopeFieldOfParameters(int from,int to,int parentIndex)912 static void patchScopeFieldOfParameters(int from, int to, int parentIndex)
913 {
914 	for (int pindex = from; pindex < to; pindex++)
915 	{
916 		tagEntryInfo *e = getEntryInCorkQueue (pindex);
917 		if (e)
918 			e->extensionFields.scopeIndex = parentIndex;
919 	}
920 }
921 
directiveDefine(const int c,bool undef)922 static int directiveDefine (const int c, bool undef)
923 {
924 	// FIXME: We could possibly handle the macros here!
925 	//        However we'd need a separate hash table for macros of the current file
926 	//        to avoid breaking the "global" ones.
927 
928 	int r = CORK_NIL;
929 
930 	if (cppIsident1 (c))
931 	{
932 		readIdentifier (c, Cpp.directive.name);
933 		if (! isIgnore ())
934 		{
935 			unsigned long 	lineNumber = getInputLineNumber ();
936 			MIOPos filePosition = getInputFilePosition ();
937 			int p = cppGetcFromUngetBufferOrFile ();
938 			short nth = 0;
939 
940 			if (p == '(')
941 			{
942 				vString *param = vStringNew ();
943 				int param_start = (int)countEntryInCorkQueue();
944 				do {
945 					p = cppGetcFromUngetBufferOrFile ();
946 					if (isalnum(p) || p == '_' || p == '$'
947 						/* Handle variadic macros like (a,...) */
948 						|| p == '.')
949 					{
950 						vStringPut (param, p);
951 						continue;
952 					}
953 
954 					if (vStringLength (param) > 0)
955 					{
956 						makeParamTag (param, nth++, vStringChar(param, 0) == '.');
957 						vStringClear (param);
958 					}
959 					if (p == '\\')
960 						cppGetcFromUngetBufferOrFile (); /* Throw away the next char */
961 				} while (p != ')' && p != EOF);
962 				vStringDelete (param);
963 
964 				int param_end = (int)countEntryInCorkQueue();
965 				if (p == ')')
966 				{
967 					vString *signature = vStringNew ();
968 					regenreateSignatureFromParameters (signature, param_start, param_end);
969 					r = makeDefineTag (vStringValue (Cpp.directive.name), vStringValue (signature), undef);
970 					vStringDelete (signature);
971 				}
972 				else
973 					r = makeDefineTag (vStringValue (Cpp.directive.name), NULL, undef);
974 
975 				tagEntryInfo *e = getEntryInCorkQueue (r);
976 				if (e)
977 				{
978 					e->lineNumber = lineNumber;
979 					e->filePosition = filePosition;
980 					patchScopeFieldOfParameters (param_start, param_end, r);
981 				}
982 			}
983 			else
984 			{
985 				cppUngetc (p);
986 				r = makeDefineTag (vStringValue (Cpp.directive.name), NULL, undef);
987 			}
988 		}
989 	}
990 	Cpp.directive.state = DRCTV_NONE;
991 
992 	if (r != CORK_NIL && Cpp.fileMacroTable)
993 		registerEntry (r);
994 	return r;
995 }
996 
directiveUndef(const int c)997 static void directiveUndef (const int c)
998 {
999 	if (isXtagEnabled (XTAG_REFERENCE_TAGS))
1000 	{
1001 		directiveDefine (c, true);
1002 	}
1003 	else
1004 	{
1005 		Cpp.directive.state = DRCTV_NONE;
1006 	}
1007 }
1008 
directivePragma(int c)1009 static void directivePragma (int c)
1010 {
1011 	if (cppIsident1 (c))
1012 	{
1013 		readIdentifier (c, Cpp.directive.name);
1014 		if (stringMatch (vStringValue (Cpp.directive.name), "weak"))
1015 		{
1016 			/* generate macro tag for weak name */
1017 			do
1018 			{
1019 				c = cppGetcFromUngetBufferOrFile ();
1020 			} while (c == SPACE);
1021 			if (cppIsident1 (c))
1022 			{
1023 				readIdentifier (c, Cpp.directive.name);
1024 				makeDefineTag (vStringValue (Cpp.directive.name), NULL, false);
1025 			}
1026 		}
1027 	}
1028 	Cpp.directive.state = DRCTV_NONE;
1029 }
1030 
1031 /*
1032  * __ASSEMBLER__ ("3.7.1 Standard Predefined Macros" in GNU cpp info),
1033  * __ASSEMBLY__	 (Used in Linux kernel)
1034  */
isAssemblerBlock(int c)1035 static bool isAssemblerBlock (int c)
1036 {
1037 	if (c != '_')
1038 		return false;
1039 
1040 	bool r = false;
1041 	vString *cond = vStringNew ();
1042 	readIdentifier (c, cond);
1043 	if (strcmp (vStringValue (cond), "__ASSEMBLER__") == 0
1044 		|| strcmp (vStringValue (cond), "__ASSEMBLY__") == 0)
1045 		r = true;
1046 
1047 	CXX_DEBUG_PRINT("ASSEMBLER[%s]: %s", r? "true": "false", vStringValue(cond));
1048 
1049 	size_t len = vStringLength (cond);
1050 	/* Pushing back to the stream.
1051 	 * The first character is not read in this function.
1052 	 * So don't touch the character here. */
1053 	for (size_t i = len; i > 1; i--)
1054 	{
1055 		c = vStringChar (cond, i - 1);
1056 		cppUngetc (c);
1057 	}
1058 
1059 	vStringDelete (cond);
1060 	return r;
1061 }
1062 
directiveIf(const int c,enum eIfSubstate if_substate)1063 static bool directiveIf (const int c, enum eIfSubstate if_substate)
1064 {
1065 	static langType asmLang = LANG_IGNORE;
1066 	if (asmLang == LANG_IGNORE)
1067 		asmLang = getNamedLanguage ("Asm", 0);
1068 
1069 	DebugStatement ( const bool ignore0 = isIgnore (); )
1070 	bool firstBranchChosen = (bool) (c != '0');
1071 	bool assemblerBlock = false;
1072 	if (Cpp.clientLang != asmLang && firstBranchChosen)
1073 	{
1074 		assemblerBlock = isAssemblerBlock(c);
1075 		if (assemblerBlock && if_substate != IF_IFNDEF)
1076 			firstBranchChosen = false;
1077 	}
1078 
1079 	CXX_DEBUG_PRINT("firstBranchChosen: %d", firstBranchChosen);
1080 	const bool ignore = pushConditional (firstBranchChosen);
1081 	if (assemblerBlock)
1082 	{
1083 		conditionalInfo *ifdef = currentConditional ();
1084 		ifdef->asmArea.ifSubstate = if_substate;
1085 		ifdef->asmArea.line = getInputLineNumber();
1086 	}
1087 
1088 	Cpp.directive.state = DRCTV_NONE;
1089 	DebugStatement ( debugCppNest (true, Cpp.directive.nestLevel);
1090 	                 if (ignore != ignore0) debugCppIgnore (ignore); )
1091 
1092 	return ignore;
1093 }
1094 
directiveElif(const int c)1095 static void directiveElif (const int c)
1096 {
1097 	Cpp.directive.state = DRCTV_NONE;
1098 }
1099 
directiveInclude(const int c)1100 static void directiveInclude (const int c)
1101 {
1102 	if (c == '<' || c == '"')
1103 	{
1104 		readFilename (c, Cpp.directive.name);
1105 		if ((! isIgnore ()) && vStringLength (Cpp.directive.name))
1106 			makeIncludeTag (vStringValue (Cpp.directive.name),
1107 					c == '<');
1108 	}
1109 	Cpp.directive.state = DRCTV_NONE;
1110 }
1111 
promiseOrPrepareAsm(conditionalInfo * ifdef,enum eIfSubstate currentState)1112 static void promiseOrPrepareAsm (conditionalInfo *ifdef, enum eIfSubstate currentState)
1113 {
1114 	if (!ifdef->asmArea.line)
1115 		return;
1116 
1117 	if (((ifdef->asmArea.ifSubstate == IF_IF || ifdef->asmArea.ifSubstate == IF_IFDEF)
1118 		 && (currentState == IF_ELSE || currentState == IF_ELIF || currentState == IF_ENDIF))
1119 		|| ((ifdef->asmArea.ifSubstate == IF_ELSE)
1120 			&& (currentState == IF_ENDIF)))
1121 	{
1122 		unsigned long start = ifdef->asmArea.line + 1;
1123 		unsigned long end = getInputLineNumber ();
1124 
1125 		if (start < end)
1126 			makePromise ("Asm", start, 0, end, 0, start);
1127 
1128 		ifdef->asmArea.line = 0;
1129 	}
1130 	else if (ifdef->asmArea.ifSubstate == IF_IFNDEF)
1131 	{
1132 		if (currentState == IF_ELIF)
1133 			ifdef->asmArea.line = 0;
1134 		else if (currentState == IF_ELSE)
1135 		{
1136 			ifdef->asmArea.ifSubstate = IF_ELSE;
1137 			ifdef->asmArea.line = getInputLineNumber ();
1138 		}
1139 	}
1140 }
1141 
directiveHash(const int c)1142 static bool directiveHash (const int c)
1143 {
1144 	bool ignore = false;
1145 	char directive [MaxDirectiveName];
1146 	DebugStatement ( const bool ignore0 = isIgnore (); )
1147 
1148 	readDirective (c, directive, MaxDirectiveName);
1149 	if (stringMatch (directive, "define"))
1150 		Cpp.directive.state = DRCTV_DEFINE;
1151 	else if (stringMatch (directive, "include"))
1152 		Cpp.directive.state = DRCTV_INCLUDE;
1153 	else if (stringMatch (directive, "undef"))
1154 		Cpp.directive.state = DRCTV_UNDEF;
1155 	else if (strncmp (directive, "if", (size_t) 2) == 0)
1156 	{
1157 		Cpp.directive.state = DRCTV_IF;
1158 		Cpp.directive.ifsubstate = IF_IF;
1159 		if (directive[2] == 'd')
1160 			Cpp.directive.ifsubstate = IF_IFDEF;
1161 		else if (directive[2] == 'n')
1162 			Cpp.directive.ifsubstate = IF_IFNDEF;
1163 	}
1164 	else if (stringMatch (directive, "elif")  ||
1165 			stringMatch (directive, "else"))
1166 	{
1167 		enum eIfSubstate s = (directive[2] == 's')? IF_ELSE: IF_ELIF;
1168 		conditionalInfo *ifdef = currentConditional ();
1169 		promiseOrPrepareAsm (ifdef, s);
1170 
1171 		ignore = setIgnore (isIgnoreBranch ());
1172 		CXX_DEBUG_PRINT("Found #elif or #else: ignore is %d",ignore);
1173 		if (! ignore  &&  s == IF_ELSE)
1174 			chooseBranch ();
1175 		Cpp.directive.state = (s == IF_ELIF)? DRCTV_ELIF: DRCTV_NONE;
1176 		DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); )
1177 	}
1178 	else if (stringMatch (directive, "endif"))
1179 	{
1180 		conditionalInfo *ifdef = currentConditional ();
1181 		promiseOrPrepareAsm (ifdef, IF_ENDIF);
1182 
1183 		DebugStatement ( debugCppNest (false, Cpp.directive.nestLevel); )
1184 		ignore = popConditional ();
1185 		Cpp.directive.state = DRCTV_NONE;
1186 		DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); )
1187 	}
1188 	else if (stringMatch (directive, "pragma"))
1189 		Cpp.directive.state = DRCTV_PRAGMA;
1190 	else
1191 		Cpp.directive.state = DRCTV_NONE;
1192 
1193 	return ignore;
1194 }
1195 
1196 /*  Handles a pre-processor directive whose first character is given by "c".
1197  */
handleDirective(const int c,int * macroCorkIndex,bool * inspect_conidtion)1198 static bool handleDirective (const int c, int *macroCorkIndex, bool *inspect_conidtion)
1199 {
1200 	bool ignore = isIgnore ();
1201 
1202 	switch (Cpp.directive.state)
1203 	{
1204 		case DRCTV_NONE:    ignore = isIgnore ();        break;
1205 		case DRCTV_DEFINE:
1206 			*macroCorkIndex = directiveDefine (c, false);
1207 			break;
1208 		case DRCTV_HASH:    ignore = directiveHash (c);  break;
1209 		case DRCTV_IF:
1210 			ignore = directiveIf (c, Cpp.directive.ifsubstate);
1211 			*inspect_conidtion = true;
1212 			break;
1213 		case DRCTV_ELIF:
1214 			directiveElif (c);
1215 			*inspect_conidtion = true;
1216 			break;
1217 		case DRCTV_PRAGMA:  directivePragma (c);         break;
1218 		case DRCTV_UNDEF:   directiveUndef (c);          break;
1219 		case DRCTV_INCLUDE: directiveInclude (c);        break;
1220 	}
1221 	return ignore;
1222 }
1223 
1224 /*  Called upon reading of a slash ('/') characters, determines whether a
1225  *  comment is encountered, and its type.
1226  */
isComment(void)1227 static Comment isComment (void)
1228 {
1229 	Comment comment;
1230 	const int next = cppGetcFromUngetBufferOrFile ();
1231 
1232 	if (next == '*')
1233 		comment = COMMENT_C;
1234 	else if (next == '/')
1235 		comment = COMMENT_CPLUS;
1236 	else if (next == '+')
1237 		comment = COMMENT_D;
1238 	else
1239 	{
1240 		cppUngetc (next);
1241 		comment = COMMENT_NONE;
1242 	}
1243 	return comment;
1244 }
1245 
1246 /*  Skips over a C style comment. According to ANSI specification a comment
1247  *  is treated as white space, so we perform this substitution.
1248  */
cppSkipOverCComment(void)1249 static int cppSkipOverCComment (void)
1250 {
1251 	int c = cppGetcFromUngetBufferOrFile ();
1252 
1253 	while (c != EOF)
1254 	{
1255 		if (c != '*')
1256 			c = cppGetcFromUngetBufferOrFile ();
1257 		else
1258 		{
1259 			const int next = cppGetcFromUngetBufferOrFile ();
1260 
1261 			if (next != '/')
1262 				c = next;
1263 			else
1264 			{
1265 				c = SPACE;  /* replace comment with space */
1266 				break;
1267 			}
1268 		}
1269 	}
1270 	return c;
1271 }
1272 
1273 /*  Skips over a C++ style comment.
1274  */
skipOverCplusComment(void)1275 static int skipOverCplusComment (void)
1276 {
1277 	int c;
1278 
1279 	while ((c = cppGetcFromUngetBufferOrFile ()) != EOF)
1280 	{
1281 		if (c == BACKSLASH)
1282 			cppGetcFromUngetBufferOrFile ();  /* throw away next character, too */
1283 		else if (c == NEWLINE)
1284 			break;
1285 	}
1286 	return c;
1287 }
1288 
1289 /* Skips over a D style comment.
1290  * Really we should match nested /+ comments. At least they're less common.
1291  */
skipOverDComment(void)1292 static int skipOverDComment (void)
1293 {
1294 	int c = cppGetcFromUngetBufferOrFile ();
1295 
1296 	while (c != EOF)
1297 	{
1298 		if (c != '+')
1299 			c = cppGetcFromUngetBufferOrFile ();
1300 		else
1301 		{
1302 			const int next = cppGetcFromUngetBufferOrFile ();
1303 
1304 			if (next != '/')
1305 				c = next;
1306 			else
1307 			{
1308 				c = SPACE;  /* replace comment with space */
1309 				break;
1310 			}
1311 		}
1312 	}
1313 	return c;
1314 }
1315 
cppGetLastCharOrStringContents(void)1316 const vString * cppGetLastCharOrStringContents (void)
1317 {
1318 	CXX_DEBUG_ASSERT(Cpp.charOrStringContents,"Shouldn't be called when CPP is not initialized");
1319 	return Cpp.charOrStringContents;
1320 }
1321 
1322 /*  Skips to the end of a string, returning a special character to
1323  *  symbolically represent a generic string.
1324  */
skipToEndOfString(bool ignoreBackslash)1325 static int skipToEndOfString (bool ignoreBackslash)
1326 {
1327 	int c;
1328 
1329 	vStringClear(Cpp.charOrStringContents);
1330 
1331 	while ((c = cppGetcFromUngetBufferOrFile ()) != EOF)
1332 	{
1333 		if (c == BACKSLASH && ! ignoreBackslash)
1334 		{
1335 			vStringPutWithLimit (Cpp.charOrStringContents, c, 1024);
1336 			c = cppGetcFromUngetBufferOrFile ();  /* throw away next character, too */
1337 			if (c != EOF)
1338 				vStringPutWithLimit (Cpp.charOrStringContents, c, 1024);
1339 		}
1340 		else if (c == DOUBLE_QUOTE)
1341 			break;
1342 		else
1343 			vStringPutWithLimit (Cpp.charOrStringContents, c, 1024);
1344 	}
1345 	return STRING_SYMBOL;  /* symbolic representation of string */
1346 }
1347 
isCxxRawLiteralDelimiterChar(int c)1348 static int isCxxRawLiteralDelimiterChar (int c)
1349 {
1350 	return (c != ' ' && c != '\f' && c != '\n' && c != '\r' && c != '\t' && c != '\v' &&
1351 	        c != '(' && c != ')' && c != '\\');
1352 }
1353 
skipToEndOfCxxRawLiteralString(void)1354 static int skipToEndOfCxxRawLiteralString (void)
1355 {
1356 	int c = cppGetcFromUngetBufferOrFile ();
1357 
1358 	if (c != '(' && ! isCxxRawLiteralDelimiterChar (c))
1359 	{
1360 		cppUngetc (c);
1361 		c = skipToEndOfString (false);
1362 	}
1363 	else
1364 	{
1365 		char delim[16];
1366 		unsigned int delimLen = 0;
1367 		bool collectDelim = true;
1368 
1369 		do
1370 		{
1371 			if (collectDelim)
1372 			{
1373 				if (isCxxRawLiteralDelimiterChar (c) &&
1374 				    delimLen < (sizeof delim / sizeof *delim))
1375 					delim[delimLen++] = c;
1376 				else
1377 					collectDelim = false;
1378 			}
1379 			else if (c == ')')
1380 			{
1381 				unsigned int i = 0;
1382 
1383 				while ((c = cppGetcFromUngetBufferOrFile ()) != EOF && i < delimLen && delim[i] == c)
1384 					i++;
1385 				if (i == delimLen && c == DOUBLE_QUOTE)
1386 					break;
1387 				else
1388 					cppUngetc (c);
1389 			}
1390 		}
1391 		while ((c = cppGetcFromUngetBufferOrFile ()) != EOF);
1392 		c = STRING_SYMBOL;
1393 	}
1394 	return c;
1395 }
1396 
1397 /*  Skips to the end of the three (possibly four) 'c' sequence, returning a
1398  *  special character to symbolically represent a generic character.
1399  *  Also detects Vera numbers that include a base specifier (ie. 'b1010).
1400  */
skipToEndOfChar()1401 static int skipToEndOfChar ()
1402 {
1403 	int c;
1404 	int count = 0, veraBase = '\0';
1405 
1406 	vStringClear(Cpp.charOrStringContents);
1407 
1408 	while ((c = cppGetcFromUngetBufferOrFile ()) != EOF)
1409 	{
1410 	    ++count;
1411 		if (c == BACKSLASH)
1412 		{
1413 			vStringPutWithLimit (Cpp.charOrStringContents, c, 10);
1414 			c = cppGetcFromUngetBufferOrFile ();  /* throw away next character, too */
1415 			if (c != EOF)
1416 				vStringPutWithLimit (Cpp.charOrStringContents, c, 10);
1417 		}
1418 		else if (c == SINGLE_QUOTE)
1419 			break;
1420 		else if (c == NEWLINE)
1421 		{
1422 			cppUngetc (c);
1423 			break;
1424 		}
1425 		else if (Cpp.hasSingleQuoteLiteralNumbers)
1426 		{
1427 			if (count == 1  &&  strchr ("DHOB", toupper (c)) != NULL)
1428 			{
1429 				veraBase = c;
1430 				vStringPutWithLimit (Cpp.charOrStringContents, c, 10);
1431 			}
1432 			else if (veraBase != '\0'  &&  ! isalnum (c))
1433 			{
1434 				cppUngetc (c);
1435 				break;
1436 			}
1437 			else
1438 				vStringPutWithLimit (Cpp.charOrStringContents, c, 10);
1439 		}
1440 		else
1441 			vStringPutWithLimit (Cpp.charOrStringContents, c, 10);
1442 	}
1443 	return CHAR_SYMBOL;  /* symbolic representation of character */
1444 }
1445 
attachFields(int macroCorkIndex,unsigned long endLine,const char * macrodef)1446 static void attachFields (int macroCorkIndex, unsigned long endLine, const char *macrodef)
1447 {
1448 	tagEntryInfo *tag = getEntryInCorkQueue (macroCorkIndex);
1449 	if (!tag)
1450 		return;
1451 
1452 	tag->extensionFields.endLine = endLine;
1453 	if (macrodef)
1454 		attachParserFieldToCorkEntry (macroCorkIndex, Cpp.macrodefFieldIndex, macrodef);
1455 }
1456 
conditionMayFlush(vString * condition,bool del)1457 static vString * conditionMayFlush (vString* condition, bool del)
1458 {
1459 	bool standing_alone = doesCPreProRunAsStandaloneParser(CPREPRO_MACRO);
1460 
1461 	if (condition == NULL)
1462 		return condition;
1463 
1464 	size_t len = vStringLength(condition);
1465 	if (len > 0
1466 		&& (! (
1467 				(len == 7
1468 				 && strcmp (vStringValue (condition), "defined") == 0)
1469 			   )))
1470 	{
1471 		if (standing_alone)
1472 			pushLanguage (Cpp.lang);
1473 
1474 		makeSimpleRefTag (condition, Cpp.defineMacroKindIndex, Cpp.macroConditionRoleIndex);
1475 
1476 		if (standing_alone)
1477 			popLanguage ();
1478 	}
1479 
1480 	if (del)
1481 	{
1482 		vStringDelete (condition);
1483 		return NULL;
1484 	}
1485 
1486 	vStringClear(condition);
1487 	return condition;
1488 }
1489 
conditionMayPut(vString * condition,int c)1490 static void conditionMayPut (vString *condition, int c)
1491 {
1492 	if (condition == NULL)
1493 		return;
1494 
1495 	if (vStringLength (condition) > 0
1496 		|| (!isdigit(c)))
1497 		vStringPut(condition, c);
1498 }
1499 
1500 /*  This function returns the next character, stripping out comments,
1501  *  C pre-processor directives, and the contents of single and double
1502  *  quoted strings. In short, strip anything which places a burden upon
1503  *  the tokenizer.
1504  */
cppGetc(void)1505 extern int cppGetc (void)
1506 {
1507 	bool directive = false;
1508 	bool ignore = false;
1509 	int c;
1510 	int macroCorkIndex = CORK_NIL;
1511 	vString *macrodef = NULL;
1512 	vString *condition = NULL;
1513 
1514 
1515 	do {
1516 start_loop:
1517 		c = cppGetcFromUngetBufferOrFile ();
1518 process:
1519 		switch (c)
1520 		{
1521 			case EOF:
1522 				ignore    = false;
1523 				directive = false;
1524 				if (macroCorkIndex != CORK_NIL)
1525 				{
1526 					attachFields (macroCorkIndex,
1527 								  getInputLineNumber(),
1528 								  macrodef? vStringValue (macrodef): NULL);
1529 					macroCorkIndex = CORK_NIL;
1530 				}
1531 				condition = conditionMayFlush(condition, true);
1532 				break;
1533 
1534 			case TAB:
1535 			case SPACE:
1536 				if (macrodef && vStringLength (macrodef) > 0
1537 					&& vStringLast (macrodef) != ' ')
1538 					vStringPut (macrodef, ' ');
1539 				condition = conditionMayFlush(condition, false);
1540 				break;  /* ignore most white space */
1541 
1542 			case NEWLINE:
1543 				if (directive)
1544 					condition = conditionMayFlush(condition, true);
1545 				if (directive  &&  ! ignore)
1546 				{
1547 					directive = false;
1548 					if (macroCorkIndex != CORK_NIL)
1549 					{
1550 						attachFields (macroCorkIndex,
1551 									  getInputLineNumber(),
1552 									  macrodef? vStringValue (macrodef): NULL);
1553 						macroCorkIndex = CORK_NIL;
1554 					}
1555 				}
1556 				Cpp.directive.accept = true;
1557 				break;
1558 
1559 			case DOUBLE_QUOTE:
1560 				condition = conditionMayFlush(condition, false);
1561 
1562 				if (Cpp.directive.state == DRCTV_INCLUDE)
1563 					goto enter;
1564 				else
1565 				{
1566 					Cpp.directive.accept = false;
1567 					c = skipToEndOfString (false);
1568 				}
1569 
1570 				if (macrodef)
1571 				{
1572 					/* We record the contents of string literal.
1573 					 *
1574 					 */
1575 					vStringPut (macrodef, '"');
1576 					vStringCat (macrodef, Cpp.charOrStringContents);
1577 					vStringPut (macrodef, '"');
1578 				}
1579 
1580 				break;
1581 
1582 			case '#':
1583 				condition = conditionMayFlush(condition, false);
1584 
1585 				if (Cpp.directive.accept)
1586 				{
1587 					directive = true;
1588 					Cpp.directive.state  = DRCTV_HASH;
1589 					Cpp.directive.accept = false;
1590 				}
1591 				if (macrodef)
1592 					vStringPut (macrodef, '#');
1593 				break;
1594 
1595 			case SINGLE_QUOTE:
1596 				condition = conditionMayFlush(condition, false);
1597 
1598 				Cpp.directive.accept = false;
1599 				c = skipToEndOfChar ();
1600 
1601 				/* We assume none may want to know the content of the
1602 				 * literal; just put ''. */
1603 				if (macrodef)
1604 					vStringCatS (macrodef, "''");
1605 
1606 				break;
1607 
1608 			case '/':
1609 			{
1610 				condition = conditionMayFlush(condition, false);
1611 
1612 				const Comment comment = isComment ();
1613 
1614 				if (comment == COMMENT_C)
1615 					c = cppSkipOverCComment ();
1616 				else if (comment == COMMENT_CPLUS)
1617 				{
1618 					c = skipOverCplusComment ();
1619 					if (c == NEWLINE)
1620 						cppUngetc (c);
1621 				}
1622 				else if (comment == COMMENT_D)
1623 					c = skipOverDComment ();
1624 				else
1625 				{
1626 					Cpp.directive.accept = false;
1627 					if (macrodef)
1628 						vStringPut (macrodef, '/');
1629 				}
1630 				break;
1631 			}
1632 
1633 			case BACKSLASH:
1634 			{
1635 				condition = conditionMayFlush(condition, false);
1636 
1637 				int next = cppGetcFromUngetBufferOrFile ();
1638 
1639 				if (next == NEWLINE)
1640 					goto start_loop;
1641 				else
1642 				{
1643 					cppUngetc (next);
1644 					if (macrodef)
1645 						vStringPut (macrodef, '\\');
1646 				}
1647 				break;
1648 			}
1649 
1650 			case '?':
1651 			{
1652 				condition = conditionMayFlush(condition, false);
1653 
1654 				int next = cppGetcFromUngetBufferOrFile ();
1655 				if (next != '?')
1656 				{
1657 					cppUngetc (next);
1658 					if (macrodef)
1659 						vStringPut (macrodef, '?');
1660 				}
1661 				else
1662 				{
1663 					next = cppGetcFromUngetBufferOrFile ();
1664 					switch (next)
1665 					{
1666 						case '(':          c = '[';       break;
1667 						case ')':          c = ']';       break;
1668 						case '<':          c = '{';       break;
1669 						case '>':          c = '}';       break;
1670 						case '/':          c = BACKSLASH; goto process;
1671 						case '!':          c = '|';       break;
1672 						case SINGLE_QUOTE: c = '^';       break;
1673 						case '-':          c = '~';       break;
1674 						case '=':          c = '#';       goto process;
1675 						default:
1676 							cppUngetc ('?');
1677 							cppUngetc (next);
1678 							break;
1679 					}
1680 					if (macrodef)
1681 						vStringPut (macrodef, c);
1682 				}
1683 			} break;
1684 
1685 			/* digraphs:
1686 			 * input:  <:  :>  <%  %>  %:  %:%:
1687 			 * output: [   ]   {   }   #   ##
1688 			 */
1689 			case '<':
1690 			{
1691 				condition = conditionMayFlush(condition, false);
1692 
1693 				/*
1694 				   Quoted from http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3237.html:
1695 				   ------
1696 				   if the next three characters are <:: and the
1697 				   subsequent character is neither : nor >, the < is
1698 				   treated as a preprocessor token by itself (and not as
1699 				   the first character of the alternative token */
1700 				int next[3];
1701 				next[0] = cppGetcFromUngetBufferOrFile ();
1702 				switch (next[0])
1703 				{
1704 					case ':':
1705 						next[1] = cppGetcFromUngetBufferOrFile ();
1706 						if (next[1] == ':')
1707 						{
1708 							next[2] = cppGetcFromUngetBufferOrFile ();
1709 							if (! (next[2] == ':' || next[2] == '>'))
1710 							{
1711 								cppUngetc (next[2]);
1712 								cppUngetc (next[1]);
1713 								cppUngetc (next[0]);
1714 								c = '<';
1715 							}
1716 							else
1717 							{
1718 								cppUngetc (next[2]);
1719 								cppUngetc (next[1]);
1720 								c = '[';
1721 							}
1722 						}
1723 						else
1724 						{
1725 							cppUngetc (next[1]);
1726 							c = '[';
1727 						}
1728 						break;
1729 					case '%':	c = '{'; break;
1730 					default: cppUngetc (next[0]);
1731 				}
1732 
1733 				if (macrodef)
1734 					vStringPut (macrodef, c);
1735 
1736 				goto enter;
1737 			}
1738 			case ':':
1739 			{
1740 				condition = conditionMayFlush(condition, false);
1741 
1742 				int next = cppGetcFromUngetBufferOrFile ();
1743 				if (next == '>')
1744 					c = ']';
1745 				else
1746 					cppUngetc (next);
1747 
1748 				if (macrodef)
1749 					vStringPut (macrodef, c);
1750 
1751 				goto enter;
1752 			}
1753 			case '%':
1754 			{
1755 				condition = conditionMayFlush(condition, false);
1756 
1757 				int next = cppGetcFromUngetBufferOrFile ();
1758 				switch (next)
1759 				{
1760 					case '>':	c = '}'; break;
1761 					case ':':	c = '#'; goto process;
1762 					default: cppUngetc (next);
1763 				}
1764 
1765 				if (macrodef)
1766 					vStringPut (macrodef, c);
1767 
1768 				goto enter;
1769 			}
1770 
1771 			default:
1772 				if (c == '@' && Cpp.hasAtLiteralStrings)
1773 				{
1774 					condition = conditionMayFlush(condition, false);
1775 
1776 					int next = cppGetcFromUngetBufferOrFile ();
1777 					if (next == DOUBLE_QUOTE)
1778 					{
1779 						Cpp.directive.accept = false;
1780 						c = skipToEndOfString (true);
1781 						if (macrodef)
1782 							vStringCatS (macrodef, "@\"\"");
1783 						break;
1784 					}
1785 					else
1786 					{
1787 						cppUngetc (next);
1788 						if (macrodef)
1789 							vStringPut (macrodef, '@');
1790 					}
1791 				}
1792 				else if (c == 'R' && Cpp.hasCxxRawLiteralStrings)
1793 				{
1794 					conditionMayPut(condition, c);
1795 
1796 					/* OMG!11 HACK!!11  Get the previous character.
1797 					 *
1798 					 * We need to know whether the previous character was an identifier or not,
1799 					 * because "R" has to be on its own, not part of an identifier.  This allows
1800 					 * for constructs like:
1801 					 *
1802 					 * 	#define FOUR "4"
1803 					 * 	const char *p = FOUR"5";
1804 					 *
1805 					 * which is not a raw literal, but a preprocessor concatenation.
1806 					 *
1807 					 * FIXME: handle
1808 					 *
1809 					 * 	const char *p = R\
1810 					 * 	"xxx(raw)xxx";
1811 					 *
1812 					 * which is perfectly valid (yet probably very unlikely). */
1813 					int prev = getNthPrevCFromInputFile (1, '\0');
1814 					int prev2 = getNthPrevCFromInputFile (2, '\0');
1815 					int prev3 = getNthPrevCFromInputFile (3, '\0');
1816 
1817 					if (! cppIsident (prev) ||
1818 					    (! cppIsident (prev2) && (prev == 'L' || prev == 'u' || prev == 'U')) ||
1819 					    (! cppIsident (prev3) && (prev2 == 'u' && prev == '8')))
1820 					{
1821 						int next = cppGetcFromUngetBufferOrFile ();
1822 						if (next != DOUBLE_QUOTE)
1823 						{
1824 							cppUngetc (next);
1825 							if (macrodef)
1826 								vStringPut (macrodef, 'R');
1827 						}
1828 						else
1829 						{
1830 							Cpp.directive.accept = false;
1831 							c = skipToEndOfCxxRawLiteralString ();
1832 
1833 							/* We assume none may want to know the content of the
1834 							 * literal; just put "". */
1835 							if (macrodef)
1836 								vStringCatS (macrodef, "\"\"");
1837 
1838 							break;
1839 						}
1840 					}
1841 					else
1842 					{
1843 						if (macrodef)
1844 							vStringPut (macrodef, 'R');
1845 					}
1846 				}
1847 				else if(isxdigit(c))
1848 				{
1849 					/* Check for digit separator. If we find it we just skip it */
1850 					int next = cppGetcFromUngetBufferOrFile();
1851 					if(next != SINGLE_QUOTE)
1852 						cppUngetc(next);
1853 					if (macrodef)
1854 						vStringPut (macrodef, c);
1855 					conditionMayPut(condition, c);
1856 				}
1857 				else
1858 				{
1859 					if (macrodef)
1860 						vStringPut (macrodef, c);
1861 					if (isalnum(c) || c == '_')
1862 						conditionMayPut(condition, c);
1863 					else
1864 						condition = conditionMayFlush(condition, false);
1865 				}
1866 			enter:
1867 				Cpp.directive.accept = false;
1868 				if (directive)
1869 				{
1870 					bool inspect_conidtion = false;
1871 					ignore = handleDirective (c, &macroCorkIndex, &inspect_conidtion);
1872 					if (Cpp.macrodefFieldIndex != FIELD_UNKNOWN
1873 						&& macroCorkIndex != CORK_NIL
1874 						&& macrodef == NULL)
1875 						macrodef = vStringNew ();
1876 					if (condition == NULL
1877 						&& inspect_conidtion)
1878 					{
1879 						condition = vStringNew ();
1880 						if (isalpha(c) || c == '_')
1881 							conditionMayPut(condition, c);
1882 					}
1883 				}
1884 				break;
1885 		}
1886 	} while (directive || ignore);
1887 
1888 	if (macrodef)
1889 		vStringDelete (macrodef);
1890 
1891 	if (condition)
1892 		vStringDelete (condition);
1893 
1894 	DebugStatement ( debugPutc (DEBUG_CPP, c); )
1895 	DebugStatement ( if (c == NEWLINE)
1896 				debugPrintf (DEBUG_CPP, "%6ld: ", getInputLineNumber () + 1); )
1897 
1898 	return c;
1899 }
1900 
findCppTags(void)1901 static void findCppTags (void)
1902 {
1903 	cppInitCommon (Cpp.lang, 0, false, false, false,
1904 				   KIND_GHOST_INDEX, 0, 0,
1905 				   KIND_GHOST_INDEX,
1906 				   KIND_GHOST_INDEX, 0, 0,
1907 				   FIELD_UNKNOWN);
1908 
1909 	findRegexTagsMainloop (cppGetc);
1910 
1911 	cppTerminate ();
1912 }
1913 
1914 
1915 /*
1916  *  Token ignore processing
1917  */
1918 
1919 static hashTable * cmdlineMacroTable;
1920 
1921 
buildMacroInfoFromTagEntry(int corkIndex,tagEntryInfo * entry,void * data)1922 static bool buildMacroInfoFromTagEntry (int corkIndex,
1923 										tagEntryInfo * entry,
1924 										void * data)
1925 {
1926 	cppMacroInfo **info = data;
1927 
1928 	if ((entry->langType == Cpp.clientLang || entry->langType == Cpp.lang)
1929 		&& entry->kindIndex == Cpp.defineMacroKindIndex
1930 		&& isRoleAssigned (entry, ROLE_DEFINITION_INDEX))
1931 	{
1932 		vString *macrodef = vStringNewInit (entry->name);
1933 		if (entry->extensionFields.signature)
1934 			vStringCatS (macrodef, entry->extensionFields.signature);
1935 		vStringPut (macrodef, '=');
1936 
1937 		const char *val = getParserFieldValueForType (entry, Cpp.macrodefFieldIndex);
1938 		if (val)
1939 			vStringCatS (macrodef, val);
1940 
1941 		*info = saveMacro (Cpp.fileMacroTable, vStringValue (macrodef));
1942 		vStringDelete (macrodef);
1943 
1944 		return false;
1945 	}
1946 	return true;
1947 }
1948 
cppFindMacroFromSymtab(const char * const name)1949 extern cppMacroInfo * cppFindMacroFromSymtab (const char *const name)
1950 {
1951 	cppMacroInfo *info = NULL;
1952 	foreachEntriesInScope (CORK_NIL, name, buildMacroInfoFromTagEntry, &info);
1953 
1954 	return info;
1955 }
1956 
1957 /*  Determines whether or not "name" should be ignored, per the ignore list.
1958  */
cppFindMacro(const char * const name)1959 extern cppMacroInfo * cppFindMacro (const char *const name)
1960 {
1961 	cppMacroInfo *info;
1962 
1963 	if (cmdlineMacroTable)
1964 	{
1965 		info = (cppMacroInfo *)hashTableGetItem (cmdlineMacroTable,(char *)name);
1966 		if (info)
1967 			return info;
1968 	}
1969 
1970 	if (Cpp.fileMacroTable)
1971 	{
1972 		info = (cppMacroInfo *)hashTableGetItem (Cpp.fileMacroTable,(char *)name);
1973 		if (info)
1974 			return info;
1975 
1976 		info = cppFindMacroFromSymtab(name);
1977 		if (info)
1978 			return info;
1979 	}
1980 	return NULL;
1981 }
1982 
cppBuildMacroReplacement(const cppMacroInfo * macro,const char ** parameters,int parameterCount)1983 extern vString * cppBuildMacroReplacement(
1984 		const cppMacroInfo * macro,
1985 		const char ** parameters, /* may be NULL */
1986 		int parameterCount
1987 	)
1988 {
1989 	if(!macro)
1990 		return NULL;
1991 
1992 	if(!macro->replacements)
1993 		return NULL;
1994 
1995 	vString * ret = vStringNew();
1996 
1997 	cppMacroReplacementPartInfo * r = macro->replacements;
1998 
1999 	while(r)
2000 	{
2001 		if(r->parameterIndex < 0)
2002 		{
2003 			if(r->constant)
2004 				vStringCat(ret,r->constant);
2005 		} else {
2006 			if(parameters && (r->parameterIndex < parameterCount))
2007 			{
2008 				if(r->flags & CPP_MACRO_REPLACEMENT_FLAG_STRINGIFY)
2009 					vStringPut(ret,'"');
2010 
2011 				vStringCatS(ret,parameters[r->parameterIndex]);
2012 				if(r->flags & CPP_MACRO_REPLACEMENT_FLAG_VARARGS)
2013 				{
2014 					int idx = r->parameterIndex + 1;
2015 					while(idx < parameterCount)
2016 					{
2017 						vStringPut(ret,',');
2018 						vStringCatS(ret,parameters[idx]);
2019 						idx++;
2020 					}
2021 				}
2022 
2023 				if(r->flags & CPP_MACRO_REPLACEMENT_FLAG_STRINGIFY)
2024 					vStringPut(ret,'"');
2025 			}
2026 		}
2027 
2028 		r = r->next;
2029 	}
2030 
2031 	return ret;
2032 }
2033 
2034 // We stop applying macro replacements if the unget buffer gets too big
2035 // as it is a sign of recursive macro expansion
2036 #define CPP_PARSER_MAXIMUM_UNGET_BUFFER_SIZE_FOR_MACRO_REPLACEMENTS 65536
2037 
cppBuildMacroReplacementWithPtrArrayAndUngetResult(cppMacroInfo * macro,const ptrArray * args)2038 extern void cppBuildMacroReplacementWithPtrArrayAndUngetResult(
2039 		cppMacroInfo * macro,
2040 		const ptrArray * args)
2041 {
2042 	vString * replacement = NULL;
2043 
2044 	// Detect other cases of nasty macro expansion that cause
2045 	// the unget buffer to grow fast (but the token chain to grow slowly)
2046 	//    -D'p=a' -D'a=p+p'
2047 	if ((cppUngetBufferSize() < CPP_PARSER_MAXIMUM_UNGET_BUFFER_SIZE_FOR_MACRO_REPLACEMENTS)
2048 		&& macro->replacements)
2049 	{
2050 		int argc = 0;
2051 		const char ** argv = NULL;
2052 
2053 		if (args)
2054 		{
2055 			argc = ptrArrayCount (args);
2056 			argv = (const char **)eMalloc (sizeof(char *) * argc);
2057 			for (int i = 0; i < argc; i++)
2058 			{
2059 				TRACE_PRINT("Arg[%d] for %s<%p>: %s",
2060 							i, macro->name, macro, ptrArrayItem (args, i));
2061 				argv[i] = ptrArrayItem (args, i);
2062 			}
2063 		}
2064 
2065 		replacement = cppBuildMacroReplacement(macro, argv, argc);
2066 
2067 		if (argv)
2068 			eFree ((void *)argv);
2069 	}
2070 
2071 	if (replacement)
2072 	{
2073 		cppUngetStringBuiltByMacro(vStringValue(replacement), vStringLength(replacement),
2074 								   macro);
2075 		TRACE_PRINT("Replacement for %s<%p>: %s", macro->name, macro, vStringValue (replacement));
2076 		vStringDelete (replacement);
2077 	}
2078 	else
2079 		TRACE_PRINT("Replacement for %s<%p>: ", macro->name, macro);
2080 
2081 }
2082 
saveIgnoreToken(const char * ignoreToken)2083 static void saveIgnoreToken(const char * ignoreToken)
2084 {
2085 	if(!ignoreToken)
2086 		return;
2087 
2088 	Assert (cmdlineMacroTable);
2089 
2090 	const char * c = ignoreToken;
2091 	char cc = *c;
2092 
2093 	const char * tokenBegin = c;
2094 	const char * tokenEnd = NULL;
2095 	const char * replacement = NULL;
2096 	bool ignoreFollowingParenthesis = false;
2097 
2098 	while(cc)
2099 	{
2100 		if(cc == '=')
2101 		{
2102 			if(!tokenEnd)
2103 				tokenEnd = c;
2104 			c++;
2105 			if(*c)
2106 				replacement = c;
2107 			break;
2108 		}
2109 
2110 		if(cc == '+')
2111 		{
2112 			if(!tokenEnd)
2113 				tokenEnd = c;
2114 			ignoreFollowingParenthesis = true;
2115 		}
2116 
2117 		c++;
2118 		cc = *c;
2119 	}
2120 
2121 	if(!tokenEnd)
2122 		tokenEnd = c;
2123 
2124 	if(tokenEnd <= tokenBegin)
2125 		return;
2126 
2127 	cppMacroInfo * info = (cppMacroInfo *)eMalloc(sizeof(cppMacroInfo));
2128 
2129 	info->hasParameterList = ignoreFollowingParenthesis;
2130 	if(replacement)
2131 	{
2132 		cppMacroReplacementPartInfo * rep = \
2133 			(cppMacroReplacementPartInfo *)eMalloc(sizeof(cppMacroReplacementPartInfo));
2134 		rep->parameterIndex = -1;
2135 		rep->flags = 0;
2136 		rep->constant = vStringNewInit(replacement);
2137 		rep->next = NULL;
2138 		info->replacements = rep;
2139 	} else {
2140 		info->replacements = NULL;
2141 	}
2142 	info->useCount = 0;
2143 	info->next = NULL;
2144 	info->name = eStrndup(tokenBegin,tokenEnd - tokenBegin);
2145 	hashTablePutItem(cmdlineMacroTable,info->name,info);
2146 
2147 	verbose ("    ignore token: %s\n", ignoreToken);
2148 }
2149 
saveMacro(hashTable * table,const char * macro)2150 static cppMacroInfo * saveMacro(hashTable *table, const char * macro)
2151 {
2152 	CXX_DEBUG_ENTER_TEXT("Save macro %s",macro);
2153 
2154 	if(!macro)
2155 		return NULL;
2156 
2157 	Assert (table);
2158 
2159 	const char * c = macro;
2160 
2161 	// skip initial spaces
2162 	while(*c && isspacetab(*c))
2163 		c++;
2164 
2165 	if(!*c)
2166 	{
2167 		CXX_DEBUG_LEAVE_TEXT("Bad empty macro definition");
2168 		return NULL;
2169 	}
2170 
2171 	if(!(isalpha(*c) || (*c == '_' || (*c == '$') )))
2172 	{
2173 		CXX_DEBUG_LEAVE_TEXT("Macro does not start with an alphanumeric character");
2174 		return NULL; // must be a sequence of letters and digits
2175 	}
2176 
2177 	const char * identifierBegin = c;
2178 
2179 	while(*c && (isalnum(*c) || (*c == '_') || (*c == '$') ))
2180 		c++;
2181 
2182 	const char * identifierEnd = c;
2183 
2184 	CXX_DEBUG_PRINT("Macro identifier '%.*s'",identifierEnd - identifierBegin,identifierBegin);
2185 
2186 #define MAX_PARAMS 16
2187 
2188 	const char * paramBegin[MAX_PARAMS];
2189 	const char * paramEnd[MAX_PARAMS];
2190 
2191 	int iParamCount = 0;
2192 
2193 	while(*c && isspacetab(*c))
2194 		c++;
2195 
2196 	cppMacroInfo * info = (cppMacroInfo *)eMalloc(sizeof(cppMacroInfo));
2197 	info->useCount = 0;
2198 	info->next = NULL;
2199 
2200 	if(*c == '(')
2201 	{
2202 		// parameter list
2203 		CXX_DEBUG_PRINT("Macro has a parameter list");
2204 
2205 		info->hasParameterList = true;
2206 
2207 		c++;
2208 		while(*c)
2209 		{
2210 			while(*c && isspacetab(*c))
2211 				c++;
2212 
2213 			if(*c && (*c != ',') && (*c != ')'))
2214 			{
2215 				paramBegin[iParamCount] = c;
2216 				c++;
2217 				while(*c && (*c != ',') && (*c != ')') && (!isspacetab(*c)))
2218 					c++;
2219 				paramEnd[iParamCount] = c;
2220 
2221 				CXX_DEBUG_PRINT(
2222 						"Macro parameter %d '%.*s'",
2223 							iParamCount,
2224 							paramEnd[iParamCount] - paramBegin[iParamCount],
2225 							paramBegin[iParamCount]
2226 					);
2227 
2228 				iParamCount++;
2229 				if(iParamCount >= MAX_PARAMS)
2230 					break;
2231 			}
2232 
2233 			while(*c && isspacetab(*c))
2234 				c++;
2235 
2236 			if(*c == ')')
2237 				break;
2238 
2239 			if(*c == ',')
2240 				c++;
2241 		}
2242 
2243 		while(*c && (*c != ')'))
2244 			c++;
2245 
2246 		if(*c == ')')
2247 			c++;
2248 
2249 		CXX_DEBUG_PRINT("Got %d parameters",iParamCount);
2250 
2251 	} else {
2252 		info->hasParameterList = false;
2253 	}
2254 
2255 	while(*c && isspacetab(*c))
2256 		c++;
2257 
2258 	info->replacements = NULL;
2259 
2260 
2261 	if(*c == '=')
2262 	{
2263 		CXX_DEBUG_PRINT("Macro has a replacement part");
2264 
2265 		// have replacement part
2266 		c++;
2267 
2268 		cppMacroReplacementPartInfo * lastReplacement = NULL;
2269 		int nextParameterReplacementFlags = 0;
2270 
2271 #define ADD_REPLACEMENT_NEW_PART(part) \
2272 		do { \
2273 			if(lastReplacement) \
2274 				lastReplacement->next = part; \
2275 			else \
2276 				info->replacements = part; \
2277 			lastReplacement = part; \
2278 		} while(0)
2279 
2280 #define ADD_CONSTANT_REPLACEMENT_NEW_PART(start,len) \
2281 		do { \
2282 			cppMacroReplacementPartInfo * rep = \
2283 				(cppMacroReplacementPartInfo *)eMalloc(sizeof(cppMacroReplacementPartInfo)); \
2284 			rep->parameterIndex = -1; \
2285 			rep->flags = 0; \
2286 			rep->constant = vStringNew(); \
2287 			vStringNCatS(rep->constant,start,len); \
2288 			rep->next = NULL; \
2289 			CXX_DEBUG_PRINT("Constant replacement part: '%s'",vStringValue(rep->constant)); \
2290 			ADD_REPLACEMENT_NEW_PART(rep); \
2291 		} while(0)
2292 
2293 #define ADD_CONSTANT_REPLACEMENT(start,len) \
2294 		do { \
2295 			if(lastReplacement && (lastReplacement->parameterIndex == -1)) \
2296 			{ \
2297 				vStringNCatS(lastReplacement->constant,start,len); \
2298 				CXX_DEBUG_PRINT( \
2299 						"Constant replacement part changed: '%s'", \
2300 						vStringValue(lastReplacement->constant) \
2301 					); \
2302 			} else { \
2303 				ADD_CONSTANT_REPLACEMENT_NEW_PART(start,len); \
2304 			} \
2305 		} while(0)
2306 
2307 		// parse replacements
2308 		const char * begin = c;
2309 
2310 		while(*c)
2311 		{
2312 			if(isalpha(*c) || (*c == '_'))
2313 			{
2314 				if(c > begin)
2315 					ADD_CONSTANT_REPLACEMENT(begin,c - begin);
2316 
2317 				const char * tokenBegin = c;
2318 
2319 				while(*c && (isalnum(*c) || (*c == '_')))
2320 					c++;
2321 
2322 				// check if it is a parameter
2323 				int tokenLen = c - tokenBegin;
2324 
2325 				CXX_DEBUG_PRINT("Check token '%.*s'",tokenLen,tokenBegin);
2326 
2327 				bool bIsVarArg = (tokenLen == 11) && (strncmp(tokenBegin,"__VA_ARGS__",11) == 0);
2328 
2329 				int i = 0;
2330 				for(;i<iParamCount;i++)
2331 				{
2332 					int paramLen = paramEnd[i] - paramBegin[i];
2333 
2334 					if(
2335 							(
2336 								bIsVarArg &&
2337 								(paramLen == 3) &&
2338 								(strncmp(paramBegin[i],"...",3) == 0)
2339 							) || (
2340 								(!bIsVarArg) &&
2341 								(paramLen == tokenLen) &&
2342 								(strncmp(paramBegin[i],tokenBegin,paramLen) == 0)
2343 							)
2344 						)
2345 					{
2346 						// parameter!
2347 						cppMacroReplacementPartInfo * rep = \
2348 								(cppMacroReplacementPartInfo *)eMalloc(sizeof(cppMacroReplacementPartInfo));
2349 						rep->parameterIndex = i;
2350 						rep->flags = nextParameterReplacementFlags |
2351 								(bIsVarArg ? CPP_MACRO_REPLACEMENT_FLAG_VARARGS : 0);
2352 						rep->constant = NULL;
2353 						rep->next = NULL;
2354 
2355 						nextParameterReplacementFlags = 0;
2356 
2357 						CXX_DEBUG_PRINT("Parameter replacement part: %d (vararg %d)",i,bIsVarArg);
2358 
2359 						ADD_REPLACEMENT_NEW_PART(rep);
2360 						break;
2361 					}
2362 				}
2363 
2364 				if(i >= iParamCount)
2365 				{
2366 					// no parameter found
2367 					ADD_CONSTANT_REPLACEMENT(tokenBegin,tokenLen);
2368 				}
2369 
2370 				begin = c;
2371 				continue;
2372 			}
2373 
2374 			if((*c == '"') || (*c == '\''))
2375 			{
2376 				// skip string/char constant
2377 				char term = *c;
2378 				c++;
2379 				while(*c)
2380 				{
2381 					if(*c == '\\')
2382 					{
2383 						c++;
2384 						if(*c)
2385 							c++;
2386 					} else if(*c == term)
2387 					{
2388 						c++;
2389 						break;
2390 					}
2391 					c++;
2392 				}
2393 				continue;
2394 			}
2395 
2396 			if(*c == '#')
2397 			{
2398 				// check for token paste/stringification
2399 				if(c > begin)
2400 					ADD_CONSTANT_REPLACEMENT(begin,c - begin);
2401 
2402 				c++;
2403 				if(*c == '#')
2404 				{
2405 					// token paste
2406 					CXX_DEBUG_PRINT("Found token paste operator");
2407 					while(*c == '#')
2408 						c++;
2409 
2410 					// we just skip this part and the following spaces
2411 					while(*c && isspacetab(*c))
2412 						c++;
2413 
2414 					if(lastReplacement && (lastReplacement->parameterIndex == -1))
2415 					{
2416 						// trim spaces from the last replacement constant!
2417 						vStringStripTrailing(lastReplacement->constant);
2418 						CXX_DEBUG_PRINT(
2419 								"Last replacement truncated to '%s'",
2420 								vStringValue(lastReplacement->constant)
2421 							);
2422 					}
2423 				} else {
2424 					// stringification
2425 					CXX_DEBUG_PRINT("Found stringification operator");
2426 					nextParameterReplacementFlags |= CPP_MACRO_REPLACEMENT_FLAG_STRINGIFY;
2427 				}
2428 
2429 				begin = c;
2430 				continue;
2431 			}
2432 
2433 			c++;
2434 		}
2435 
2436 		if(c > begin)
2437 			ADD_CONSTANT_REPLACEMENT(begin,c - begin);
2438 	}
2439 
2440 	info->name = eStrndup(identifierBegin,identifierEnd - identifierBegin);
2441 	hashTablePutItem(table,info->name,info);
2442 	CXX_DEBUG_LEAVE();
2443 
2444 	return info;
2445 }
2446 
freeMacroInfo(cppMacroInfo * info)2447 static void freeMacroInfo(cppMacroInfo * info)
2448 {
2449 	if(!info)
2450 		return;
2451 	cppMacroReplacementPartInfo * pPart = info->replacements;
2452 	while(pPart)
2453 	{
2454 		if(pPart->constant)
2455 			vStringDelete(pPart->constant);
2456 		cppMacroReplacementPartInfo * pPartToDelete = pPart;
2457 		pPart = pPart->next;
2458 		eFree(pPartToDelete);
2459 	}
2460 	eFree(info->name);
2461 	eFree(info);
2462 }
2463 
makeMacroTable(void)2464 static hashTable *makeMacroTable (void)
2465 {
2466 	return hashTableNew(
2467 		1024,
2468 		hashCstrhash,
2469 		hashCstreq,
2470 		NULL,					/* Keys refers values' name fields. */
2471 		(void (*)(void *))freeMacroInfo
2472 		);
2473 }
2474 
initializeCpp(const langType language)2475 static void initializeCpp (const langType language)
2476 {
2477 	Cpp.lang = language;
2478 }
2479 
finalizeCpp(const langType language,bool initialized)2480 static void finalizeCpp (const langType language, bool initialized)
2481 {
2482 	if (cmdlineMacroTable)
2483 	{
2484 		hashTableDelete (cmdlineMacroTable);
2485 		cmdlineMacroTable = NULL;
2486 	}
2487 }
2488 
CpreProExpandMacrosInInput(const langType language CTAGS_ATTR_UNUSED,const char * name,const char * arg)2489 static void CpreProExpandMacrosInInput (const langType language CTAGS_ATTR_UNUSED, const char *name, const char *arg)
2490 {
2491 	doesExpandMacros = paramParserBool (arg, doesExpandMacros,
2492 										name, "parameter");
2493 }
2494 
CpreProInstallIgnoreToken(const langType language CTAGS_ATTR_UNUSED,const char * optname CTAGS_ATTR_UNUSED,const char * arg)2495 static void CpreProInstallIgnoreToken (const langType language CTAGS_ATTR_UNUSED, const char *optname CTAGS_ATTR_UNUSED, const char *arg)
2496 {
2497 	if (arg == NULL || arg[0] == '\0')
2498 	{
2499 		if (cmdlineMacroTable)
2500 		{
2501 			hashTableDelete(cmdlineMacroTable);
2502 			cmdlineMacroTable = NULL;
2503 		}
2504 		verbose ("    clearing list\n");
2505 	} else {
2506 		if (!cmdlineMacroTable)
2507 			cmdlineMacroTable = makeMacroTable ();
2508 		saveIgnoreToken(arg);
2509 	}
2510 }
2511 
CpreProInstallMacroToken(const langType language CTAGS_ATTR_UNUSED,const char * optname CTAGS_ATTR_UNUSED,const char * arg)2512 static void CpreProInstallMacroToken (const langType language CTAGS_ATTR_UNUSED, const char *optname CTAGS_ATTR_UNUSED, const char *arg)
2513 {
2514 	if (arg == NULL || arg[0] == '\0')
2515 	{
2516 		if (cmdlineMacroTable)
2517 		{
2518 			hashTableDelete(cmdlineMacroTable);
2519 			cmdlineMacroTable = NULL;
2520 		}
2521 		verbose ("    clearing list\n");
2522 	} else {
2523 		if (!cmdlineMacroTable)
2524 			cmdlineMacroTable = makeMacroTable ();
2525 		saveMacro(cmdlineMacroTable, arg);
2526 	}
2527 }
2528 
CpreProSetIf0(const langType language CTAGS_ATTR_UNUSED,const char * name,const char * arg)2529 static void CpreProSetIf0 (const langType language CTAGS_ATTR_UNUSED, const char *name, const char *arg)
2530 {
2531 	doesExaminCodeWithInIf0Branch = paramParserBool (arg, doesExaminCodeWithInIf0Branch,
2532 													 name, "parameter");
2533 }
2534 
2535 static parameterHandlerTable CpreProParameterHandlerTable [] = {
2536 	{ .name = "if0",
2537 	  .desc = "examine code within \"#if 0\" branch (true or [false])",
2538 	  .handleParameter = CpreProSetIf0,
2539 	},
2540 	{ .name = "ignore",
2541 	  .desc = "a token to be specially handled",
2542 	  .handleParameter = CpreProInstallIgnoreToken,
2543 	},
2544 	{ .name = "define",
2545 	  .desc = "define replacement for an identifier (name(params,...)=definition)",
2546 	  .handleParameter = CpreProInstallMacroToken,
2547 	},
2548 	{ .name = "_expand",
2549 	  .desc = "expand macros if their definitions are in the current C/C++/CUDA input file (true or [false])",
2550 	  .handleParameter = CpreProExpandMacrosInInput,
2551 	}
2552 };
2553 
CPreProParser(void)2554 extern parserDefinition* CPreProParser (void)
2555 {
2556 	parserDefinition* const def = parserNew ("CPreProcessor");
2557 	def->kindTable      = CPreProKinds;
2558 	def->kindCount  = ARRAY_SIZE (CPreProKinds);
2559 	def->initialize = initializeCpp;
2560 	def->parser     = findCppTags;
2561 	def->finalize   = finalizeCpp;
2562 
2563 	def->fieldTable = CPreProFields;
2564 	def->fieldCount = ARRAY_SIZE (CPreProFields);
2565 
2566 	def->parameterHandlerTable = CpreProParameterHandlerTable;
2567 	def->parameterHandlerCount = ARRAY_SIZE(CpreProParameterHandlerTable);
2568 
2569 	def->useCork = CORK_QUEUE | CORK_SYMTAB;
2570 	return def;
2571 }
2572