1 /*
2 * Copyright (c) 1996-2002, Darren Hiebert
3 *
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
6 *
7 * This module contains the high level input read functions (preprocessor
8 * directives are handled within this level).
9 */
10
11 /*
12 * INCLUDE FILES
13 */
14 #include "general.h" /* must always come first */
15
16 #include <string.h>
17
18 #include "debug.h"
19 #include "entry.h"
20 #include "htable.h"
21 #include "cpreprocessor.h"
22 #include "kind.h"
23 #include "options.h"
24 #include "read.h"
25 #include "vstring.h"
26 #include "param.h"
27 #include "parse.h"
28 #include "promise.h"
29 #include "xtag.h"
30
31 #include "cxx/cxx_debug.h"
32
33 /*
34 * MACROS
35 */
36 #define stringMatch(s1,s2) (strcmp (s1,s2) == 0)
37 #define isspacetab(c) ((c) == SPACE || (c) == TAB)
38
39 /*
40 * DATA DECLARATIONS
41 */
42 typedef enum { COMMENT_NONE, COMMENT_C, COMMENT_CPLUS, COMMENT_D } Comment;
43
44 enum eCppLimits {
45 MaxCppNestingLevel = 20,
46 MaxDirectiveName = 10
47 };
48
49 /* For tracking __ASSEMBLER__ area. */
50 enum eIfSubstate {
51 IF_IF,
52 IF_IFDEF,
53 IF_IFNDEF,
54 IF_ELSE,
55 IF_ELIF,
56 IF_ENDIF,
57 };
58
59 struct asmAreaInfo {
60 enum eIfSubstate ifSubstate;
61 unsigned long line;
62 };
63
64 /* Defines the one nesting level of a preprocessor conditional.
65 */
66 typedef struct sConditionalInfo {
67 bool ignoreAllBranches; /* ignoring parent conditional branch */
68 bool singleBranch; /* choose only one branch */
69 bool branchChosen; /* branch already selected */
70 bool ignoring; /* current ignore state */
71 int enterExternalParserBlockNestLevel; /* the parser state when entering this conditional: used only by cxx */
72
73 /* tracking __ASSEMBLER__ area */
74 struct asmAreaInfo asmArea;
75 } conditionalInfo;
76
77 enum eState {
78 DRCTV_NONE, /* no known directive - ignore to end of line */
79 DRCTV_DEFINE, /* "#define" encountered */
80 DRCTV_HASH, /* initial '#' read; determine directive */
81 DRCTV_IF, /* "#if" or "#ifdef" encountered */
82 DRCTV_ELIF, /* "#elif" encountered */
83 DRCTV_PRAGMA, /* #pragma encountered */
84 DRCTV_UNDEF, /* "#undef" encountered */
85 DRCTV_INCLUDE, /* "#include" encountered */
86 };
87
88 /* Defines the current state of the pre-processor.
89 */
90 typedef struct sCppState {
91 langType lang;
92 langType clientLang;
93
94 int * ungetBuffer; /* memory buffer for unget characters */
95 int ungetBufferSize; /* the current unget buffer size */
96 int * ungetPointer; /* the current unget char: points in the middle of the buffer */
97 int ungetDataSize; /* the number of valid unget characters in the buffer */
98
99 /* the contents of the last SYMBOL_CHAR or SYMBOL_STRING */
100 vString * charOrStringContents;
101
102 bool resolveRequired; /* must resolve if/else/elif/endif branch */
103 bool hasAtLiteralStrings; /* supports @"c:\" strings */
104 bool hasCxxRawLiteralStrings; /* supports R"xxx(...)xxx" strings */
105 bool hasSingleQuoteLiteralNumbers; /* supports vera number literals:
106 'h..., 'o..., 'd..., and 'b... */
107
108 bool useClientLangDefineMacroKindIndex;
109 int defineMacroKindIndex;
110 int macroUndefRoleIndex;
111 int macroConditionRoleIndex;
112
113 bool useClientLangMacroParamKindIndex;
114 int macroParamKindIndex;
115
116 bool useClientLangHeaderKindIndex;
117 int headerKindIndex;
118 int headerSystemRoleIndex;
119 int headerLocalRoleIndex;
120
121 int macrodefFieldIndex;
122
123 struct sDirective {
124 enum eState state; /* current directive being processed */
125 enum eIfSubstate ifsubstate; /* For tracking __ASSEMBLER__.
126 * assigned only when state == DICTV_IF */
127 bool accept; /* is a directive syntactically permitted? */
128 vString * name; /* macro name */
129 unsigned int nestLevel; /* level 0 is not used */
130 conditionalInfo ifdef [MaxCppNestingLevel];
131 } directive;
132
133 cppMacroInfo * macroInUse;
134 hashTable * fileMacroTable;
135
136 } cppState;
137
138
139 typedef enum {
140 CPREPRO_MACRO_KIND_UNDEF_ROLE,
141 CPREPRO_MACRO_KIND_CONDITION_ROLE,
142 } cPreProMacroRole;
143
144 static roleDefinition CPREPROMacroRoles [] = {
145 RoleTemplateUndef,
146 RoleTemplateCondition,
147 };
148
149
150 typedef enum {
151 CPREPRO_HEADER_KIND_SYSTEM_ROLE,
152 CPREPRO_HEADER_KIND_LOCAL_ROLE,
153 } cPreProHeaderRole;
154
155 static roleDefinition CPREPROHeaderRoles [] = {
156 RoleTemplateSystem,
157 RoleTemplateLocal,
158 };
159
160
161 typedef enum {
162 CPREPRO_MACRO, CPREPRO_HEADER, CPREPRO_PARAM,
163 } cPreProkind;
164
165 static kindDefinition CPreProKinds [] = {
166 { true, 'd', "macro", "macro definitions",
167 .referenceOnly = false, ATTACH_ROLES(CPREPROMacroRoles)},
168 { true, 'h', "header", "included header files",
169 .referenceOnly = true, ATTACH_ROLES(CPREPROHeaderRoles)},
170 { false, 'D', "parameter", "macro parameters", },
171 };
172
173 typedef enum {
174 F_MACRODEF,
175 COUNT_FIELD
176 } cPreProField;
177
178 static fieldDefinition CPreProFields[COUNT_FIELD] = {
179 { .name = "macrodef",
180 .description = "macro definition",
181 .enabled = false },
182 };
183
184 /*
185 * DATA DEFINITIONS
186 */
187
188 static bool doesExaminCodeWithInIf0Branch;
189 static bool doesExpandMacros;
190
191 /*
192 * CXX parser state. This is stored at the beginning of a conditional.
193 * If at the exit of the conditional the state is changed then we assume
194 * that no further branches should be followed.
195 */
196 static int externalParserBlockNestLevel;
197
198
199 /* Use brace formatting to detect end of block.
200 */
201 static bool BraceFormat = false;
202
cppPushExternalParserBlock(void)203 void cppPushExternalParserBlock(void)
204 {
205 externalParserBlockNestLevel++;
206 }
207
cppPopExternalParserBlock(void)208 void cppPopExternalParserBlock(void)
209 {
210 externalParserBlockNestLevel--;
211 }
212
213
214 static cppState Cpp = {
215 .lang = LANG_IGNORE,
216 .clientLang = LANG_IGNORE,
217 .ungetBuffer = NULL,
218 .ungetBufferSize = 0,
219 .ungetPointer = NULL,
220 .ungetDataSize = 0,
221 .charOrStringContents = NULL,
222 .resolveRequired = false,
223 .hasAtLiteralStrings = false,
224 .hasCxxRawLiteralStrings = false,
225 .hasSingleQuoteLiteralNumbers = false,
226 .useClientLangDefineMacroKindIndex = false,
227 .defineMacroKindIndex = CPREPRO_MACRO,
228 .macroUndefRoleIndex = CPREPRO_MACRO_KIND_UNDEF_ROLE,
229 .macroConditionRoleIndex = CPREPRO_MACRO_KIND_CONDITION_ROLE,
230 .useClientLangMacroParamKindIndex = false,
231 .macroParamKindIndex = CPREPRO_PARAM,
232 .useClientLangHeaderKindIndex = false,
233 .headerKindIndex = CPREPRO_HEADER,
234 .headerSystemRoleIndex = CPREPRO_HEADER_KIND_SYSTEM_ROLE,
235 .headerLocalRoleIndex = CPREPRO_HEADER_KIND_LOCAL_ROLE,
236 .macrodefFieldIndex = FIELD_UNKNOWN,
237 .directive = {
238 .state = DRCTV_NONE,
239 .accept = false,
240 .name = NULL,
241 .nestLevel = 0,
242 .ifdef = {
243 {
244 .ignoreAllBranches = false,
245 .singleBranch = false,
246 .branchChosen = false,
247 .ignoring = false,
248 }
249 }
250 } /* directive */
251 };
252
253 /*
254 * FUNCTION DECLARATIONS
255 */
256
257 static hashTable *makeMacroTable (void);
258 static cppMacroInfo * saveMacro(hashTable *table, const char * macro);
259
260 /*
261 * FUNCTION DEFINITIONS
262 */
263
cppIsBraceFormat(void)264 extern bool cppIsBraceFormat (void)
265 {
266 return BraceFormat;
267 }
268
cppGetDirectiveNestLevel(void)269 extern unsigned int cppGetDirectiveNestLevel (void)
270 {
271 return Cpp.directive.nestLevel;
272 }
273
cppInitCommon(langType clientLang,const bool state,const bool hasAtLiteralStrings,const bool hasCxxRawLiteralStrings,const bool hasSingleQuoteLiteralNumbers,int defineMacroKindIndex,int macroUndefRoleIndex,int macroConditionRoleIndex,int macroParamKindIndex,int headerKindIndex,int headerSystemRoleIndex,int headerLocalRoleIndex,int macrodefFieldIndex)274 static void cppInitCommon(langType clientLang,
275 const bool state, const bool hasAtLiteralStrings,
276 const bool hasCxxRawLiteralStrings,
277 const bool hasSingleQuoteLiteralNumbers,
278 int defineMacroKindIndex,
279 int macroUndefRoleIndex,
280 int macroConditionRoleIndex,
281 int macroParamKindIndex,
282 int headerKindIndex,
283 int headerSystemRoleIndex, int headerLocalRoleIndex,
284 int macrodefFieldIndex)
285 {
286 BraceFormat = state;
287
288 CXX_DEBUG_PRINT("cppInit: brace format is %d",BraceFormat);
289
290 externalParserBlockNestLevel = 0;
291
292 if (Cpp.lang == LANG_IGNORE)
293 {
294 langType t;
295
296 t = getNamedLanguage ("CPreProcessor", 0);
297 initializeParser (t);
298 }
299
300 Cpp.clientLang = clientLang;
301 Cpp.ungetBuffer = NULL;
302 Cpp.ungetPointer = NULL;
303
304 CXX_DEBUG_ASSERT(!Cpp.charOrStringContents,"This string should be null when CPP is not initialized");
305 Cpp.charOrStringContents = vStringNew();
306
307 Cpp.resolveRequired = false;
308 Cpp.hasAtLiteralStrings = hasAtLiteralStrings;
309 Cpp.hasCxxRawLiteralStrings = hasCxxRawLiteralStrings;
310 Cpp.hasSingleQuoteLiteralNumbers = hasSingleQuoteLiteralNumbers;
311
312 if (defineMacroKindIndex != KIND_GHOST_INDEX)
313 {
314 Cpp.defineMacroKindIndex = defineMacroKindIndex;
315 Cpp.useClientLangDefineMacroKindIndex = true;
316
317 Cpp.macroUndefRoleIndex = macroUndefRoleIndex;
318 Cpp.macroConditionRoleIndex = macroConditionRoleIndex;
319 Cpp.macrodefFieldIndex = macrodefFieldIndex;
320 }
321 else
322 {
323 Cpp.defineMacroKindIndex = CPREPRO_MACRO;
324 Cpp.useClientLangDefineMacroKindIndex = false;
325
326 Cpp.macroUndefRoleIndex = CPREPRO_MACRO_KIND_UNDEF_ROLE;
327 Cpp.macroConditionRoleIndex = CPREPRO_MACRO_KIND_CONDITION_ROLE;
328 Cpp.macrodefFieldIndex = CPreProFields [F_MACRODEF].ftype;
329 }
330
331 if (macroParamKindIndex != KIND_GHOST_INDEX)
332 {
333 Cpp.macroParamKindIndex = macroParamKindIndex;
334 Cpp.useClientLangMacroParamKindIndex = true;
335 }
336 else
337 {
338 Cpp.macroParamKindIndex = CPREPRO_PARAM;
339 Cpp.useClientLangMacroParamKindIndex = false;
340 }
341
342 if (headerKindIndex != KIND_GHOST_INDEX)
343 {
344 Cpp.headerKindIndex = headerKindIndex;
345 Cpp.useClientLangHeaderKindIndex = true;
346
347 Cpp.headerSystemRoleIndex = headerSystemRoleIndex;
348 Cpp.headerLocalRoleIndex = headerLocalRoleIndex;
349 }
350 else
351 {
352 Cpp.headerKindIndex = CPREPRO_HEADER;
353 Cpp.useClientLangHeaderKindIndex = false;
354
355 Cpp.headerSystemRoleIndex = CPREPRO_HEADER_KIND_SYSTEM_ROLE;
356 Cpp.headerLocalRoleIndex = CPREPRO_HEADER_KIND_LOCAL_ROLE;
357 }
358
359 Cpp.directive.state = DRCTV_NONE;
360 Cpp.directive.accept = true;
361 Cpp.directive.nestLevel = 0;
362
363 Cpp.directive.ifdef [0].ignoreAllBranches = false;
364 Cpp.directive.ifdef [0].singleBranch = false;
365 Cpp.directive.ifdef [0].branchChosen = false;
366 Cpp.directive.ifdef [0].ignoring = false;
367
368 Cpp.directive.name = vStringNewOrClear (Cpp.directive.name);
369
370 Cpp.macroInUse = NULL;
371 Cpp.fileMacroTable =
372 (doesExpandMacros
373 && isFieldEnabled (FIELD_SIGNATURE)
374 && isFieldEnabled (Cpp.macrodefFieldIndex)
375 && (getLanguageCorkUsage ((clientLang == LANG_IGNORE)
376 ? Cpp.lang
377 : clientLang) & CORK_SYMTAB))
378 ? makeMacroTable ()
379 : NULL;
380 }
381
cppInit(const bool state,const bool hasAtLiteralStrings,const bool hasCxxRawLiteralStrings,const bool hasSingleQuoteLiteralNumbers,int defineMacroKindIndex,int macroUndefRoleIndex,int macroConditionRoleIndex,int macroParamKindIndex,int headerKindIndex,int headerSystemRoleIndex,int headerLocalRoleIndex,int macrodefFieldIndex)382 extern void cppInit (const bool state, const bool hasAtLiteralStrings,
383 const bool hasCxxRawLiteralStrings,
384 const bool hasSingleQuoteLiteralNumbers,
385 int defineMacroKindIndex,
386 int macroUndefRoleIndex,
387 int macroConditionRoleIndex,
388 int macroParamKindIndex,
389 int headerKindIndex,
390 int headerSystemRoleIndex, int headerLocalRoleIndex,
391 int macrodefFieldIndex)
392 {
393 langType client = getInputLanguage ();
394
395 cppInitCommon (client, state, hasAtLiteralStrings,
396 hasCxxRawLiteralStrings, hasSingleQuoteLiteralNumbers,
397 defineMacroKindIndex, macroUndefRoleIndex, macroConditionRoleIndex,
398 macroParamKindIndex,
399 headerKindIndex, headerSystemRoleIndex, headerLocalRoleIndex,
400 macrodefFieldIndex);
401 }
402
cppClearMacroInUse(cppMacroInfo ** pM)403 static void cppClearMacroInUse (cppMacroInfo **pM)
404 {
405 for (cppMacroInfo *p = *pM; p; p = p->next)
406 {
407 CXX_DEBUG_PRINT("Macro <%p> clear useCount: %d -> 0", p, p->useCount);
408 p->useCount = 0;
409 }
410 *pM = NULL;
411 }
412
cppTerminate(void)413 extern void cppTerminate (void)
414 {
415 if (Cpp.directive.name != NULL)
416 {
417 vStringDelete (Cpp.directive.name);
418 Cpp.directive.name = NULL;
419 }
420
421 if(Cpp.ungetBuffer)
422 {
423 eFree(Cpp.ungetBuffer);
424 Cpp.ungetBuffer = NULL;
425 }
426
427 if(Cpp.charOrStringContents)
428 {
429 vStringDelete(Cpp.charOrStringContents);
430 Cpp.charOrStringContents = NULL;
431 }
432
433 Cpp.clientLang = LANG_IGNORE;
434
435 cppClearMacroInUse (&Cpp.macroInUse);
436
437 if (Cpp.fileMacroTable)
438 {
439 hashTableDelete (Cpp.fileMacroTable);
440 Cpp.fileMacroTable = NULL;
441 }
442 }
443
cppBeginStatement(void)444 extern void cppBeginStatement (void)
445 {
446 Cpp.resolveRequired = true;
447 }
448
cppEndStatement(void)449 extern void cppEndStatement (void)
450 {
451 Cpp.resolveRequired = false;
452 }
453
454 /*
455 * Scanning functions
456 *
457 * This section handles preprocessor directives. It strips out all
458 * directives and may emit a tag for #define directives.
459 */
460
461 /* This puts a character back into the input queue for the input File. */
cppUngetc(const int c)462 extern void cppUngetc (const int c)
463 {
464 if(!Cpp.ungetPointer)
465 {
466 // no unget data
467 if(!Cpp.ungetBuffer)
468 {
469 Cpp.ungetBuffer = (int *)eMalloc(8 * sizeof(int));
470 Cpp.ungetBufferSize = 8;
471 }
472 Assert(Cpp.ungetBufferSize > 0);
473 Cpp.ungetPointer = Cpp.ungetBuffer + Cpp.ungetBufferSize - 1;
474 *(Cpp.ungetPointer) = c;
475 Cpp.ungetDataSize = 1;
476 return;
477 }
478
479 // Already have some unget data in the buffer. Must prepend.
480 Assert(Cpp.ungetBuffer);
481 Assert(Cpp.ungetBufferSize > 0);
482 Assert(Cpp.ungetDataSize > 0);
483 Assert(Cpp.ungetPointer >= Cpp.ungetBuffer);
484
485 if(Cpp.ungetPointer == Cpp.ungetBuffer)
486 {
487 Cpp.ungetBufferSize += 8;
488 int * tmp = (int *)eMalloc(Cpp.ungetBufferSize * sizeof(int));
489 memcpy(tmp+8,Cpp.ungetPointer,Cpp.ungetDataSize * sizeof(int));
490 eFree(Cpp.ungetBuffer);
491 Cpp.ungetBuffer = tmp;
492 Cpp.ungetPointer = tmp + 7;
493 } else {
494 Cpp.ungetPointer--;
495 }
496
497 *(Cpp.ungetPointer) = c;
498 Cpp.ungetDataSize++;
499 }
500
cppUngetBufferSize()501 int cppUngetBufferSize()
502 {
503 return Cpp.ungetBufferSize;
504 }
505
506 /* This puts an entire string back into the input queue for the input File. */
cppUngetString(const char * string,int len)507 void cppUngetString(const char * string,int len)
508 {
509 if(!string)
510 return;
511 if(len < 1)
512 return;
513
514 if(!Cpp.ungetPointer)
515 {
516 // no unget data
517 if(!Cpp.ungetBuffer)
518 {
519 Cpp.ungetBufferSize = 8 + len;
520 Cpp.ungetBuffer = (int *)eMalloc(Cpp.ungetBufferSize * sizeof(int));
521 } else if(Cpp.ungetBufferSize < len)
522 {
523 Cpp.ungetBufferSize = 8 + len;
524 Cpp.ungetBuffer = (int *)eRealloc(Cpp.ungetBuffer,Cpp.ungetBufferSize * sizeof(int));
525 }
526 Cpp.ungetPointer = Cpp.ungetBuffer + Cpp.ungetBufferSize - len;
527 } else {
528 // Already have some unget data in the buffer. Must prepend.
529 Assert(Cpp.ungetBuffer);
530 Assert(Cpp.ungetBufferSize > 0);
531 Assert(Cpp.ungetDataSize > 0);
532 Assert(Cpp.ungetPointer >= Cpp.ungetBuffer);
533
534 if(Cpp.ungetBufferSize < (Cpp.ungetDataSize + len))
535 {
536 Cpp.ungetBufferSize = 8 + len + Cpp.ungetDataSize;
537 int * tmp = (int *)eMalloc(Cpp.ungetBufferSize * sizeof(int));
538 memcpy(tmp + 8 + len,Cpp.ungetPointer,Cpp.ungetDataSize * sizeof(int));
539 eFree(Cpp.ungetBuffer);
540 Cpp.ungetBuffer = tmp;
541 Cpp.ungetPointer = tmp + 8;
542 } else {
543 Cpp.ungetPointer -= len;
544 Assert(Cpp.ungetPointer >= Cpp.ungetBuffer);
545 }
546 }
547
548 int * p = Cpp.ungetPointer;
549 const char * s = string;
550 const char * e = string + len;
551
552 while(s < e)
553 *p++ = *s++;
554
555 Cpp.ungetDataSize += len;
556 }
557
cppUngetStringBuiltByMacro(const char * string,int len,cppMacroInfo * macro)558 extern void cppUngetStringBuiltByMacro(const char * string,int len, cppMacroInfo *macro)
559 {
560 if (macro->useCount == 0)
561 {
562 cppMacroInfo *m = Cpp.macroInUse;
563 Cpp.macroInUse = macro;
564 macro->next = m;
565 }
566 macro->useCount++;
567
568 CXX_DEBUG_PRINT("Macro <%p> increment useCount: %d->%d", macro,
569 (macro->useCount - 1), macro->useCount);
570
571 cppUngetString (string, len);
572 }
573
cppGetcFromUngetBufferOrFile(void)574 static int cppGetcFromUngetBufferOrFile(void)
575 {
576 if(Cpp.ungetPointer)
577 {
578 Assert(Cpp.ungetBuffer);
579 Assert(Cpp.ungetBufferSize > 0);
580 Assert(Cpp.ungetDataSize > 0);
581
582 int c = *(Cpp.ungetPointer);
583 Cpp.ungetDataSize--;
584 if(Cpp.ungetDataSize > 0)
585 Cpp.ungetPointer++;
586 else
587 Cpp.ungetPointer = NULL;
588 return c;
589 }
590
591 if (Cpp.macroInUse)
592 cppClearMacroInUse (&Cpp.macroInUse);
593 return getcFromInputFile();
594 }
595
596
597 /* Reads a directive, whose first character is given by "c", into "name".
598 */
readDirective(int c,char * const name,unsigned int maxLength)599 static bool readDirective (int c, char *const name, unsigned int maxLength)
600 {
601 unsigned int i;
602
603 for (i = 0 ; i < maxLength - 1 ; ++i)
604 {
605 if (i > 0)
606 {
607 c = cppGetcFromUngetBufferOrFile ();
608 if (c == EOF || ! isalpha (c))
609 {
610 cppUngetc (c);
611 break;
612 }
613 }
614 name [i] = c;
615 }
616 name [i] = '\0'; /* null terminate */
617
618 return (bool) isspacetab (c);
619 }
620
621 /* Reads an identifier, whose first character is given by "c", into "tag",
622 * together with the file location and corresponding line number.
623 */
readIdentifier(int c,vString * const name)624 static void readIdentifier (int c, vString *const name)
625 {
626 vStringClear (name);
627 do
628 {
629 vStringPut (name, c);
630 c = cppGetcFromUngetBufferOrFile ();
631 } while (c != EOF && cppIsident (c));
632 cppUngetc (c);
633 }
634
readFilename(int c,vString * const name)635 static void readFilename (int c, vString *const name)
636 {
637 int c_end = (c == '<')? '>': '"';
638
639 vStringClear (name);
640
641 while (c = cppGetcFromUngetBufferOrFile (), (c != EOF && c != c_end && c != '\n'))
642 vStringPut (name, c);
643 }
644
currentConditional(void)645 static conditionalInfo *currentConditional (void)
646 {
647 return &Cpp.directive.ifdef [Cpp.directive.nestLevel];
648 }
649
isIgnore(void)650 static bool isIgnore (void)
651 {
652 return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring;
653 }
654
setIgnore(const bool ignore)655 static bool setIgnore (const bool ignore)
656 {
657 return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring = ignore;
658 }
659
isIgnoreBranch(void)660 static bool isIgnoreBranch (void)
661 {
662 conditionalInfo *const ifdef = currentConditional ();
663
664 /* Force a single branch if an incomplete statement is discovered
665 * en route. This may have allowed earlier branches containing complete
666 * statements to be followed, but we must follow no further branches.
667 */
668
669 /*
670 * CXX: Force a single branch if the external parser (cxx) block nest level at the beginning
671 * of this conditional is not equal to the current block nest level (at exit of the first branch).
672 *
673 * Follow both branches example: (same state at enter and exit)
674 *
675 * #if something
676 * xxxxx;
677 * #else
678 * yyyy;
679 * #endif
680 *
681 * Follow single branch example: (different block level at enter and exit)
682 *
683 * if {
684 * #if something
685 * } else x;
686 * #else
687 * }
688 * #endif
689 */
690
691 if (
692 (Cpp.resolveRequired || (ifdef->enterExternalParserBlockNestLevel != externalParserBlockNestLevel)) &&
693 (!BraceFormat)
694 )
695 {
696 CXX_DEBUG_PRINT("Choosing single branch");
697 ifdef->singleBranch = true;
698 }
699
700 /* We will ignore this branch in the following cases:
701 *
702 * 1. We are ignoring all branches (conditional was within an ignored
703 * branch of the parent conditional)
704 * 2. A branch has already been chosen and either of:
705 * a. A statement was incomplete upon entering the conditional
706 * b. A statement is incomplete upon encountering a branch
707 */
708 return (bool) (ifdef->ignoreAllBranches ||
709 (ifdef->branchChosen && ifdef->singleBranch));
710 }
711
chooseBranch(void)712 static void chooseBranch (void)
713 {
714 if (! BraceFormat)
715 {
716 conditionalInfo *const ifdef = currentConditional ();
717
718 ifdef->branchChosen = (bool) (ifdef->singleBranch ||
719 Cpp.resolveRequired);
720 }
721 }
722
723 /* Pushes one nesting level for an #if directive, indicating whether or not
724 * the branch should be ignored and whether a branch has already been chosen.
725 */
pushConditional(const bool firstBranchChosen)726 static bool pushConditional (const bool firstBranchChosen)
727 {
728 const bool ignoreAllBranches = isIgnore (); /* current ignore */
729 bool ignoreBranch = false;
730
731 if (Cpp.directive.nestLevel < (unsigned int) MaxCppNestingLevel - 1)
732 {
733 conditionalInfo *ifdef;
734
735 ++Cpp.directive.nestLevel;
736 ifdef = currentConditional ();
737
738 /* We take a snapshot of whether there is an incomplete statement in
739 * progress upon encountering the preprocessor conditional. If so,
740 * then we will flag that only a single branch of the conditional
741 * should be followed.
742 */
743 ifdef->ignoreAllBranches = ignoreAllBranches;
744 ifdef->singleBranch = Cpp.resolveRequired;
745 ifdef->branchChosen = firstBranchChosen;
746 ifdef->ignoring = (bool) (ignoreAllBranches || (
747 ! firstBranchChosen && ! BraceFormat &&
748 (ifdef->singleBranch || !doesExaminCodeWithInIf0Branch)));
749 ifdef->enterExternalParserBlockNestLevel = externalParserBlockNestLevel;
750 ifdef->asmArea.line = 0;
751 ignoreBranch = ifdef->ignoring;
752 }
753 return ignoreBranch;
754 }
755
756 /* Pops one nesting level for an #endif directive.
757 */
popConditional(void)758 static bool popConditional (void)
759 {
760 if (Cpp.directive.nestLevel > 0)
761 --Cpp.directive.nestLevel;
762
763 return isIgnore ();
764 }
765
doesCPreProRunAsStandaloneParser(int kind)766 static bool doesCPreProRunAsStandaloneParser (int kind)
767 {
768 if (kind == CPREPRO_HEADER)
769 return !Cpp.useClientLangDefineMacroKindIndex;
770 else if (kind == CPREPRO_MACRO)
771 return !Cpp.useClientLangHeaderKindIndex;
772 else if (kind == CPREPRO_PARAM)
773 return !Cpp.useClientLangMacroParamKindIndex;
774 else
775 {
776 AssertNotReached();
777 return true;
778 }
779 }
780
makeDefineTag(const char * const name,const char * const signature,bool undef)781 static int makeDefineTag (const char *const name, const char* const signature, bool undef)
782 {
783 bool standing_alone = doesCPreProRunAsStandaloneParser(CPREPRO_MACRO);
784 langType lang = standing_alone ? Cpp.lang: Cpp.clientLang;
785 const bool isFileScope = (bool) (! isInputHeaderFile ());
786
787 if (!isLanguageEnabled (lang))
788 return CORK_NIL;
789
790 Assert (Cpp.defineMacroKindIndex != KIND_GHOST_INDEX);
791
792 if (isFileScope && !isXtagEnabled(XTAG_FILE_SCOPE))
793 return CORK_NIL;
794
795 if (undef && (Cpp.macroUndefRoleIndex == ROLE_DEFINITION_INDEX))
796 return CORK_NIL;
797
798 if (! isLanguageKindEnabled (lang,
799 Cpp.defineMacroKindIndex))
800 return CORK_NIL;
801
802 if (
803 /* condition for definition tag */
804 (!undef)
805 || /* condition for reference tag */
806 (undef && isXtagEnabled(XTAG_REFERENCE_TAGS) &&
807 isLanguageRoleEnabled(lang, Cpp.defineMacroKindIndex,
808 Cpp.macroUndefRoleIndex)))
809 {
810 tagEntryInfo e;
811 int r;
812
813 if (standing_alone)
814 pushLanguage (Cpp.lang);
815
816 if (undef)
817 initRefTagEntry (&e, name, Cpp.defineMacroKindIndex,
818 Cpp.macroUndefRoleIndex);
819 else
820 initTagEntry (&e, name, Cpp.defineMacroKindIndex);
821 e.isFileScope = isFileScope;
822 if (isFileScope)
823 markTagExtraBit (&e, XTAG_FILE_SCOPE);
824 e.truncateLineAfterTag = true;
825 e.extensionFields.signature = signature;
826
827 r = makeTagEntry (&e);
828
829 if (standing_alone)
830 popLanguage ();
831
832 return r;
833 }
834 return CORK_NIL;
835 }
836
makeIncludeTag(const char * const name,bool systemHeader)837 static void makeIncludeTag (const char *const name, bool systemHeader)
838 {
839 bool standing_alone = doesCPreProRunAsStandaloneParser(CPREPRO_HEADER);
840 langType lang = standing_alone ? Cpp.lang: Cpp.clientLang;
841 tagEntryInfo e;
842 int role_index;
843
844 if (!isLanguageEnabled (lang))
845 return;
846
847 Assert (Cpp.headerKindIndex != KIND_GHOST_INDEX);
848
849 role_index = systemHeader? Cpp.headerSystemRoleIndex: Cpp.headerLocalRoleIndex;
850 if (role_index == ROLE_DEFINITION_INDEX)
851 return;
852
853 if (!isXtagEnabled (XTAG_REFERENCE_TAGS))
854 return;
855
856 if (!isLanguageKindEnabled(lang, Cpp.headerKindIndex))
857 return;
858
859 if (isLanguageRoleEnabled(lang, Cpp.headerKindIndex, role_index))
860 {
861 if (standing_alone)
862 pushLanguage (Cpp.lang);
863
864 initRefTagEntry (&e, name, Cpp.headerKindIndex, role_index);
865 e.isFileScope = false;
866 e.truncateLineAfterTag = true;
867 makeTagEntry (&e);
868
869 if (standing_alone)
870 popLanguage ();
871 }
872 }
873
makeParamTag(vString * name,short nth,bool placeholder)874 static void makeParamTag (vString *name, short nth, bool placeholder)
875 {
876 bool standing_alone = doesCPreProRunAsStandaloneParser(CPREPRO_MACRO);
877
878 Assert (Cpp.macroParamKindIndex != KIND_GHOST_INDEX);
879
880 if (standing_alone)
881 pushLanguage (Cpp.lang);
882 int r = makeSimpleTag (name, Cpp.macroParamKindIndex);
883 if (standing_alone)
884 popLanguage ();
885
886 tagEntryInfo *e = getEntryInCorkQueue (r);
887 if (e)
888 {
889 e->extensionFields.nth = nth;
890 if (placeholder)
891 e->placeholder = 1;
892 }
893 }
894
regenreateSignatureFromParameters(vString * buffer,int from,int to)895 static void regenreateSignatureFromParameters (vString * buffer, int from, int to)
896 {
897 vStringPut(buffer, '(');
898 for (int pindex = from; pindex < to; pindex++)
899 {
900 tagEntryInfo *e = getEntryInCorkQueue (pindex);
901 if (e && !isTagExtra (e))
902 {
903 vStringCatS (buffer, e->name);
904 vStringPut (buffer, ',');
905 }
906 }
907 if (vStringLast (buffer) == ',')
908 vStringChop (buffer);
909 vStringPut (buffer, ')');
910 }
911
patchScopeFieldOfParameters(int from,int to,int parentIndex)912 static void patchScopeFieldOfParameters(int from, int to, int parentIndex)
913 {
914 for (int pindex = from; pindex < to; pindex++)
915 {
916 tagEntryInfo *e = getEntryInCorkQueue (pindex);
917 if (e)
918 e->extensionFields.scopeIndex = parentIndex;
919 }
920 }
921
directiveDefine(const int c,bool undef)922 static int directiveDefine (const int c, bool undef)
923 {
924 // FIXME: We could possibly handle the macros here!
925 // However we'd need a separate hash table for macros of the current file
926 // to avoid breaking the "global" ones.
927
928 int r = CORK_NIL;
929
930 if (cppIsident1 (c))
931 {
932 readIdentifier (c, Cpp.directive.name);
933 if (! isIgnore ())
934 {
935 unsigned long lineNumber = getInputLineNumber ();
936 MIOPos filePosition = getInputFilePosition ();
937 int p = cppGetcFromUngetBufferOrFile ();
938 short nth = 0;
939
940 if (p == '(')
941 {
942 vString *param = vStringNew ();
943 int param_start = (int)countEntryInCorkQueue();
944 do {
945 p = cppGetcFromUngetBufferOrFile ();
946 if (isalnum(p) || p == '_' || p == '$'
947 /* Handle variadic macros like (a,...) */
948 || p == '.')
949 {
950 vStringPut (param, p);
951 continue;
952 }
953
954 if (vStringLength (param) > 0)
955 {
956 makeParamTag (param, nth++, vStringChar(param, 0) == '.');
957 vStringClear (param);
958 }
959 if (p == '\\')
960 cppGetcFromUngetBufferOrFile (); /* Throw away the next char */
961 } while (p != ')' && p != EOF);
962 vStringDelete (param);
963
964 int param_end = (int)countEntryInCorkQueue();
965 if (p == ')')
966 {
967 vString *signature = vStringNew ();
968 regenreateSignatureFromParameters (signature, param_start, param_end);
969 r = makeDefineTag (vStringValue (Cpp.directive.name), vStringValue (signature), undef);
970 vStringDelete (signature);
971 }
972 else
973 r = makeDefineTag (vStringValue (Cpp.directive.name), NULL, undef);
974
975 tagEntryInfo *e = getEntryInCorkQueue (r);
976 if (e)
977 {
978 e->lineNumber = lineNumber;
979 e->filePosition = filePosition;
980 patchScopeFieldOfParameters (param_start, param_end, r);
981 }
982 }
983 else
984 {
985 cppUngetc (p);
986 r = makeDefineTag (vStringValue (Cpp.directive.name), NULL, undef);
987 }
988 }
989 }
990 Cpp.directive.state = DRCTV_NONE;
991
992 if (r != CORK_NIL && Cpp.fileMacroTable)
993 registerEntry (r);
994 return r;
995 }
996
directiveUndef(const int c)997 static void directiveUndef (const int c)
998 {
999 if (isXtagEnabled (XTAG_REFERENCE_TAGS))
1000 {
1001 directiveDefine (c, true);
1002 }
1003 else
1004 {
1005 Cpp.directive.state = DRCTV_NONE;
1006 }
1007 }
1008
directivePragma(int c)1009 static void directivePragma (int c)
1010 {
1011 if (cppIsident1 (c))
1012 {
1013 readIdentifier (c, Cpp.directive.name);
1014 if (stringMatch (vStringValue (Cpp.directive.name), "weak"))
1015 {
1016 /* generate macro tag for weak name */
1017 do
1018 {
1019 c = cppGetcFromUngetBufferOrFile ();
1020 } while (c == SPACE);
1021 if (cppIsident1 (c))
1022 {
1023 readIdentifier (c, Cpp.directive.name);
1024 makeDefineTag (vStringValue (Cpp.directive.name), NULL, false);
1025 }
1026 }
1027 }
1028 Cpp.directive.state = DRCTV_NONE;
1029 }
1030
1031 /*
1032 * __ASSEMBLER__ ("3.7.1 Standard Predefined Macros" in GNU cpp info),
1033 * __ASSEMBLY__ (Used in Linux kernel)
1034 */
isAssemblerBlock(int c)1035 static bool isAssemblerBlock (int c)
1036 {
1037 if (c != '_')
1038 return false;
1039
1040 bool r = false;
1041 vString *cond = vStringNew ();
1042 readIdentifier (c, cond);
1043 if (strcmp (vStringValue (cond), "__ASSEMBLER__") == 0
1044 || strcmp (vStringValue (cond), "__ASSEMBLY__") == 0)
1045 r = true;
1046
1047 CXX_DEBUG_PRINT("ASSEMBLER[%s]: %s", r? "true": "false", vStringValue(cond));
1048
1049 size_t len = vStringLength (cond);
1050 /* Pushing back to the stream.
1051 * The first character is not read in this function.
1052 * So don't touch the character here. */
1053 for (size_t i = len; i > 1; i--)
1054 {
1055 c = vStringChar (cond, i - 1);
1056 cppUngetc (c);
1057 }
1058
1059 vStringDelete (cond);
1060 return r;
1061 }
1062
directiveIf(const int c,enum eIfSubstate if_substate)1063 static bool directiveIf (const int c, enum eIfSubstate if_substate)
1064 {
1065 static langType asmLang = LANG_IGNORE;
1066 if (asmLang == LANG_IGNORE)
1067 asmLang = getNamedLanguage ("Asm", 0);
1068
1069 DebugStatement ( const bool ignore0 = isIgnore (); )
1070 bool firstBranchChosen = (bool) (c != '0');
1071 bool assemblerBlock = false;
1072 if (Cpp.clientLang != asmLang && firstBranchChosen)
1073 {
1074 assemblerBlock = isAssemblerBlock(c);
1075 if (assemblerBlock && if_substate != IF_IFNDEF)
1076 firstBranchChosen = false;
1077 }
1078
1079 CXX_DEBUG_PRINT("firstBranchChosen: %d", firstBranchChosen);
1080 const bool ignore = pushConditional (firstBranchChosen);
1081 if (assemblerBlock)
1082 {
1083 conditionalInfo *ifdef = currentConditional ();
1084 ifdef->asmArea.ifSubstate = if_substate;
1085 ifdef->asmArea.line = getInputLineNumber();
1086 }
1087
1088 Cpp.directive.state = DRCTV_NONE;
1089 DebugStatement ( debugCppNest (true, Cpp.directive.nestLevel);
1090 if (ignore != ignore0) debugCppIgnore (ignore); )
1091
1092 return ignore;
1093 }
1094
directiveElif(const int c)1095 static void directiveElif (const int c)
1096 {
1097 Cpp.directive.state = DRCTV_NONE;
1098 }
1099
directiveInclude(const int c)1100 static void directiveInclude (const int c)
1101 {
1102 if (c == '<' || c == '"')
1103 {
1104 readFilename (c, Cpp.directive.name);
1105 if ((! isIgnore ()) && vStringLength (Cpp.directive.name))
1106 makeIncludeTag (vStringValue (Cpp.directive.name),
1107 c == '<');
1108 }
1109 Cpp.directive.state = DRCTV_NONE;
1110 }
1111
promiseOrPrepareAsm(conditionalInfo * ifdef,enum eIfSubstate currentState)1112 static void promiseOrPrepareAsm (conditionalInfo *ifdef, enum eIfSubstate currentState)
1113 {
1114 if (!ifdef->asmArea.line)
1115 return;
1116
1117 if (((ifdef->asmArea.ifSubstate == IF_IF || ifdef->asmArea.ifSubstate == IF_IFDEF)
1118 && (currentState == IF_ELSE || currentState == IF_ELIF || currentState == IF_ENDIF))
1119 || ((ifdef->asmArea.ifSubstate == IF_ELSE)
1120 && (currentState == IF_ENDIF)))
1121 {
1122 unsigned long start = ifdef->asmArea.line + 1;
1123 unsigned long end = getInputLineNumber ();
1124
1125 if (start < end)
1126 makePromise ("Asm", start, 0, end, 0, start);
1127
1128 ifdef->asmArea.line = 0;
1129 }
1130 else if (ifdef->asmArea.ifSubstate == IF_IFNDEF)
1131 {
1132 if (currentState == IF_ELIF)
1133 ifdef->asmArea.line = 0;
1134 else if (currentState == IF_ELSE)
1135 {
1136 ifdef->asmArea.ifSubstate = IF_ELSE;
1137 ifdef->asmArea.line = getInputLineNumber ();
1138 }
1139 }
1140 }
1141
directiveHash(const int c)1142 static bool directiveHash (const int c)
1143 {
1144 bool ignore = false;
1145 char directive [MaxDirectiveName];
1146 DebugStatement ( const bool ignore0 = isIgnore (); )
1147
1148 readDirective (c, directive, MaxDirectiveName);
1149 if (stringMatch (directive, "define"))
1150 Cpp.directive.state = DRCTV_DEFINE;
1151 else if (stringMatch (directive, "include"))
1152 Cpp.directive.state = DRCTV_INCLUDE;
1153 else if (stringMatch (directive, "undef"))
1154 Cpp.directive.state = DRCTV_UNDEF;
1155 else if (strncmp (directive, "if", (size_t) 2) == 0)
1156 {
1157 Cpp.directive.state = DRCTV_IF;
1158 Cpp.directive.ifsubstate = IF_IF;
1159 if (directive[2] == 'd')
1160 Cpp.directive.ifsubstate = IF_IFDEF;
1161 else if (directive[2] == 'n')
1162 Cpp.directive.ifsubstate = IF_IFNDEF;
1163 }
1164 else if (stringMatch (directive, "elif") ||
1165 stringMatch (directive, "else"))
1166 {
1167 enum eIfSubstate s = (directive[2] == 's')? IF_ELSE: IF_ELIF;
1168 conditionalInfo *ifdef = currentConditional ();
1169 promiseOrPrepareAsm (ifdef, s);
1170
1171 ignore = setIgnore (isIgnoreBranch ());
1172 CXX_DEBUG_PRINT("Found #elif or #else: ignore is %d",ignore);
1173 if (! ignore && s == IF_ELSE)
1174 chooseBranch ();
1175 Cpp.directive.state = (s == IF_ELIF)? DRCTV_ELIF: DRCTV_NONE;
1176 DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); )
1177 }
1178 else if (stringMatch (directive, "endif"))
1179 {
1180 conditionalInfo *ifdef = currentConditional ();
1181 promiseOrPrepareAsm (ifdef, IF_ENDIF);
1182
1183 DebugStatement ( debugCppNest (false, Cpp.directive.nestLevel); )
1184 ignore = popConditional ();
1185 Cpp.directive.state = DRCTV_NONE;
1186 DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); )
1187 }
1188 else if (stringMatch (directive, "pragma"))
1189 Cpp.directive.state = DRCTV_PRAGMA;
1190 else
1191 Cpp.directive.state = DRCTV_NONE;
1192
1193 return ignore;
1194 }
1195
1196 /* Handles a pre-processor directive whose first character is given by "c".
1197 */
handleDirective(const int c,int * macroCorkIndex,bool * inspect_conidtion)1198 static bool handleDirective (const int c, int *macroCorkIndex, bool *inspect_conidtion)
1199 {
1200 bool ignore = isIgnore ();
1201
1202 switch (Cpp.directive.state)
1203 {
1204 case DRCTV_NONE: ignore = isIgnore (); break;
1205 case DRCTV_DEFINE:
1206 *macroCorkIndex = directiveDefine (c, false);
1207 break;
1208 case DRCTV_HASH: ignore = directiveHash (c); break;
1209 case DRCTV_IF:
1210 ignore = directiveIf (c, Cpp.directive.ifsubstate);
1211 *inspect_conidtion = true;
1212 break;
1213 case DRCTV_ELIF:
1214 directiveElif (c);
1215 *inspect_conidtion = true;
1216 break;
1217 case DRCTV_PRAGMA: directivePragma (c); break;
1218 case DRCTV_UNDEF: directiveUndef (c); break;
1219 case DRCTV_INCLUDE: directiveInclude (c); break;
1220 }
1221 return ignore;
1222 }
1223
1224 /* Called upon reading of a slash ('/') characters, determines whether a
1225 * comment is encountered, and its type.
1226 */
isComment(void)1227 static Comment isComment (void)
1228 {
1229 Comment comment;
1230 const int next = cppGetcFromUngetBufferOrFile ();
1231
1232 if (next == '*')
1233 comment = COMMENT_C;
1234 else if (next == '/')
1235 comment = COMMENT_CPLUS;
1236 else if (next == '+')
1237 comment = COMMENT_D;
1238 else
1239 {
1240 cppUngetc (next);
1241 comment = COMMENT_NONE;
1242 }
1243 return comment;
1244 }
1245
1246 /* Skips over a C style comment. According to ANSI specification a comment
1247 * is treated as white space, so we perform this substitution.
1248 */
cppSkipOverCComment(void)1249 static int cppSkipOverCComment (void)
1250 {
1251 int c = cppGetcFromUngetBufferOrFile ();
1252
1253 while (c != EOF)
1254 {
1255 if (c != '*')
1256 c = cppGetcFromUngetBufferOrFile ();
1257 else
1258 {
1259 const int next = cppGetcFromUngetBufferOrFile ();
1260
1261 if (next != '/')
1262 c = next;
1263 else
1264 {
1265 c = SPACE; /* replace comment with space */
1266 break;
1267 }
1268 }
1269 }
1270 return c;
1271 }
1272
1273 /* Skips over a C++ style comment.
1274 */
skipOverCplusComment(void)1275 static int skipOverCplusComment (void)
1276 {
1277 int c;
1278
1279 while ((c = cppGetcFromUngetBufferOrFile ()) != EOF)
1280 {
1281 if (c == BACKSLASH)
1282 cppGetcFromUngetBufferOrFile (); /* throw away next character, too */
1283 else if (c == NEWLINE)
1284 break;
1285 }
1286 return c;
1287 }
1288
1289 /* Skips over a D style comment.
1290 * Really we should match nested /+ comments. At least they're less common.
1291 */
skipOverDComment(void)1292 static int skipOverDComment (void)
1293 {
1294 int c = cppGetcFromUngetBufferOrFile ();
1295
1296 while (c != EOF)
1297 {
1298 if (c != '+')
1299 c = cppGetcFromUngetBufferOrFile ();
1300 else
1301 {
1302 const int next = cppGetcFromUngetBufferOrFile ();
1303
1304 if (next != '/')
1305 c = next;
1306 else
1307 {
1308 c = SPACE; /* replace comment with space */
1309 break;
1310 }
1311 }
1312 }
1313 return c;
1314 }
1315
cppGetLastCharOrStringContents(void)1316 const vString * cppGetLastCharOrStringContents (void)
1317 {
1318 CXX_DEBUG_ASSERT(Cpp.charOrStringContents,"Shouldn't be called when CPP is not initialized");
1319 return Cpp.charOrStringContents;
1320 }
1321
1322 /* Skips to the end of a string, returning a special character to
1323 * symbolically represent a generic string.
1324 */
skipToEndOfString(bool ignoreBackslash)1325 static int skipToEndOfString (bool ignoreBackslash)
1326 {
1327 int c;
1328
1329 vStringClear(Cpp.charOrStringContents);
1330
1331 while ((c = cppGetcFromUngetBufferOrFile ()) != EOF)
1332 {
1333 if (c == BACKSLASH && ! ignoreBackslash)
1334 {
1335 vStringPutWithLimit (Cpp.charOrStringContents, c, 1024);
1336 c = cppGetcFromUngetBufferOrFile (); /* throw away next character, too */
1337 if (c != EOF)
1338 vStringPutWithLimit (Cpp.charOrStringContents, c, 1024);
1339 }
1340 else if (c == DOUBLE_QUOTE)
1341 break;
1342 else
1343 vStringPutWithLimit (Cpp.charOrStringContents, c, 1024);
1344 }
1345 return STRING_SYMBOL; /* symbolic representation of string */
1346 }
1347
isCxxRawLiteralDelimiterChar(int c)1348 static int isCxxRawLiteralDelimiterChar (int c)
1349 {
1350 return (c != ' ' && c != '\f' && c != '\n' && c != '\r' && c != '\t' && c != '\v' &&
1351 c != '(' && c != ')' && c != '\\');
1352 }
1353
skipToEndOfCxxRawLiteralString(void)1354 static int skipToEndOfCxxRawLiteralString (void)
1355 {
1356 int c = cppGetcFromUngetBufferOrFile ();
1357
1358 if (c != '(' && ! isCxxRawLiteralDelimiterChar (c))
1359 {
1360 cppUngetc (c);
1361 c = skipToEndOfString (false);
1362 }
1363 else
1364 {
1365 char delim[16];
1366 unsigned int delimLen = 0;
1367 bool collectDelim = true;
1368
1369 do
1370 {
1371 if (collectDelim)
1372 {
1373 if (isCxxRawLiteralDelimiterChar (c) &&
1374 delimLen < (sizeof delim / sizeof *delim))
1375 delim[delimLen++] = c;
1376 else
1377 collectDelim = false;
1378 }
1379 else if (c == ')')
1380 {
1381 unsigned int i = 0;
1382
1383 while ((c = cppGetcFromUngetBufferOrFile ()) != EOF && i < delimLen && delim[i] == c)
1384 i++;
1385 if (i == delimLen && c == DOUBLE_QUOTE)
1386 break;
1387 else
1388 cppUngetc (c);
1389 }
1390 }
1391 while ((c = cppGetcFromUngetBufferOrFile ()) != EOF);
1392 c = STRING_SYMBOL;
1393 }
1394 return c;
1395 }
1396
1397 /* Skips to the end of the three (possibly four) 'c' sequence, returning a
1398 * special character to symbolically represent a generic character.
1399 * Also detects Vera numbers that include a base specifier (ie. 'b1010).
1400 */
skipToEndOfChar()1401 static int skipToEndOfChar ()
1402 {
1403 int c;
1404 int count = 0, veraBase = '\0';
1405
1406 vStringClear(Cpp.charOrStringContents);
1407
1408 while ((c = cppGetcFromUngetBufferOrFile ()) != EOF)
1409 {
1410 ++count;
1411 if (c == BACKSLASH)
1412 {
1413 vStringPutWithLimit (Cpp.charOrStringContents, c, 10);
1414 c = cppGetcFromUngetBufferOrFile (); /* throw away next character, too */
1415 if (c != EOF)
1416 vStringPutWithLimit (Cpp.charOrStringContents, c, 10);
1417 }
1418 else if (c == SINGLE_QUOTE)
1419 break;
1420 else if (c == NEWLINE)
1421 {
1422 cppUngetc (c);
1423 break;
1424 }
1425 else if (Cpp.hasSingleQuoteLiteralNumbers)
1426 {
1427 if (count == 1 && strchr ("DHOB", toupper (c)) != NULL)
1428 {
1429 veraBase = c;
1430 vStringPutWithLimit (Cpp.charOrStringContents, c, 10);
1431 }
1432 else if (veraBase != '\0' && ! isalnum (c))
1433 {
1434 cppUngetc (c);
1435 break;
1436 }
1437 else
1438 vStringPutWithLimit (Cpp.charOrStringContents, c, 10);
1439 }
1440 else
1441 vStringPutWithLimit (Cpp.charOrStringContents, c, 10);
1442 }
1443 return CHAR_SYMBOL; /* symbolic representation of character */
1444 }
1445
attachFields(int macroCorkIndex,unsigned long endLine,const char * macrodef)1446 static void attachFields (int macroCorkIndex, unsigned long endLine, const char *macrodef)
1447 {
1448 tagEntryInfo *tag = getEntryInCorkQueue (macroCorkIndex);
1449 if (!tag)
1450 return;
1451
1452 tag->extensionFields.endLine = endLine;
1453 if (macrodef)
1454 attachParserFieldToCorkEntry (macroCorkIndex, Cpp.macrodefFieldIndex, macrodef);
1455 }
1456
conditionMayFlush(vString * condition,bool del)1457 static vString * conditionMayFlush (vString* condition, bool del)
1458 {
1459 bool standing_alone = doesCPreProRunAsStandaloneParser(CPREPRO_MACRO);
1460
1461 if (condition == NULL)
1462 return condition;
1463
1464 size_t len = vStringLength(condition);
1465 if (len > 0
1466 && (! (
1467 (len == 7
1468 && strcmp (vStringValue (condition), "defined") == 0)
1469 )))
1470 {
1471 if (standing_alone)
1472 pushLanguage (Cpp.lang);
1473
1474 makeSimpleRefTag (condition, Cpp.defineMacroKindIndex, Cpp.macroConditionRoleIndex);
1475
1476 if (standing_alone)
1477 popLanguage ();
1478 }
1479
1480 if (del)
1481 {
1482 vStringDelete (condition);
1483 return NULL;
1484 }
1485
1486 vStringClear(condition);
1487 return condition;
1488 }
1489
conditionMayPut(vString * condition,int c)1490 static void conditionMayPut (vString *condition, int c)
1491 {
1492 if (condition == NULL)
1493 return;
1494
1495 if (vStringLength (condition) > 0
1496 || (!isdigit(c)))
1497 vStringPut(condition, c);
1498 }
1499
1500 /* This function returns the next character, stripping out comments,
1501 * C pre-processor directives, and the contents of single and double
1502 * quoted strings. In short, strip anything which places a burden upon
1503 * the tokenizer.
1504 */
cppGetc(void)1505 extern int cppGetc (void)
1506 {
1507 bool directive = false;
1508 bool ignore = false;
1509 int c;
1510 int macroCorkIndex = CORK_NIL;
1511 vString *macrodef = NULL;
1512 vString *condition = NULL;
1513
1514
1515 do {
1516 start_loop:
1517 c = cppGetcFromUngetBufferOrFile ();
1518 process:
1519 switch (c)
1520 {
1521 case EOF:
1522 ignore = false;
1523 directive = false;
1524 if (macroCorkIndex != CORK_NIL)
1525 {
1526 attachFields (macroCorkIndex,
1527 getInputLineNumber(),
1528 macrodef? vStringValue (macrodef): NULL);
1529 macroCorkIndex = CORK_NIL;
1530 }
1531 condition = conditionMayFlush(condition, true);
1532 break;
1533
1534 case TAB:
1535 case SPACE:
1536 if (macrodef && vStringLength (macrodef) > 0
1537 && vStringLast (macrodef) != ' ')
1538 vStringPut (macrodef, ' ');
1539 condition = conditionMayFlush(condition, false);
1540 break; /* ignore most white space */
1541
1542 case NEWLINE:
1543 if (directive)
1544 condition = conditionMayFlush(condition, true);
1545 if (directive && ! ignore)
1546 {
1547 directive = false;
1548 if (macroCorkIndex != CORK_NIL)
1549 {
1550 attachFields (macroCorkIndex,
1551 getInputLineNumber(),
1552 macrodef? vStringValue (macrodef): NULL);
1553 macroCorkIndex = CORK_NIL;
1554 }
1555 }
1556 Cpp.directive.accept = true;
1557 break;
1558
1559 case DOUBLE_QUOTE:
1560 condition = conditionMayFlush(condition, false);
1561
1562 if (Cpp.directive.state == DRCTV_INCLUDE)
1563 goto enter;
1564 else
1565 {
1566 Cpp.directive.accept = false;
1567 c = skipToEndOfString (false);
1568 }
1569
1570 if (macrodef)
1571 {
1572 /* We record the contents of string literal.
1573 *
1574 */
1575 vStringPut (macrodef, '"');
1576 vStringCat (macrodef, Cpp.charOrStringContents);
1577 vStringPut (macrodef, '"');
1578 }
1579
1580 break;
1581
1582 case '#':
1583 condition = conditionMayFlush(condition, false);
1584
1585 if (Cpp.directive.accept)
1586 {
1587 directive = true;
1588 Cpp.directive.state = DRCTV_HASH;
1589 Cpp.directive.accept = false;
1590 }
1591 if (macrodef)
1592 vStringPut (macrodef, '#');
1593 break;
1594
1595 case SINGLE_QUOTE:
1596 condition = conditionMayFlush(condition, false);
1597
1598 Cpp.directive.accept = false;
1599 c = skipToEndOfChar ();
1600
1601 /* We assume none may want to know the content of the
1602 * literal; just put ''. */
1603 if (macrodef)
1604 vStringCatS (macrodef, "''");
1605
1606 break;
1607
1608 case '/':
1609 {
1610 condition = conditionMayFlush(condition, false);
1611
1612 const Comment comment = isComment ();
1613
1614 if (comment == COMMENT_C)
1615 c = cppSkipOverCComment ();
1616 else if (comment == COMMENT_CPLUS)
1617 {
1618 c = skipOverCplusComment ();
1619 if (c == NEWLINE)
1620 cppUngetc (c);
1621 }
1622 else if (comment == COMMENT_D)
1623 c = skipOverDComment ();
1624 else
1625 {
1626 Cpp.directive.accept = false;
1627 if (macrodef)
1628 vStringPut (macrodef, '/');
1629 }
1630 break;
1631 }
1632
1633 case BACKSLASH:
1634 {
1635 condition = conditionMayFlush(condition, false);
1636
1637 int next = cppGetcFromUngetBufferOrFile ();
1638
1639 if (next == NEWLINE)
1640 goto start_loop;
1641 else
1642 {
1643 cppUngetc (next);
1644 if (macrodef)
1645 vStringPut (macrodef, '\\');
1646 }
1647 break;
1648 }
1649
1650 case '?':
1651 {
1652 condition = conditionMayFlush(condition, false);
1653
1654 int next = cppGetcFromUngetBufferOrFile ();
1655 if (next != '?')
1656 {
1657 cppUngetc (next);
1658 if (macrodef)
1659 vStringPut (macrodef, '?');
1660 }
1661 else
1662 {
1663 next = cppGetcFromUngetBufferOrFile ();
1664 switch (next)
1665 {
1666 case '(': c = '['; break;
1667 case ')': c = ']'; break;
1668 case '<': c = '{'; break;
1669 case '>': c = '}'; break;
1670 case '/': c = BACKSLASH; goto process;
1671 case '!': c = '|'; break;
1672 case SINGLE_QUOTE: c = '^'; break;
1673 case '-': c = '~'; break;
1674 case '=': c = '#'; goto process;
1675 default:
1676 cppUngetc ('?');
1677 cppUngetc (next);
1678 break;
1679 }
1680 if (macrodef)
1681 vStringPut (macrodef, c);
1682 }
1683 } break;
1684
1685 /* digraphs:
1686 * input: <: :> <% %> %: %:%:
1687 * output: [ ] { } # ##
1688 */
1689 case '<':
1690 {
1691 condition = conditionMayFlush(condition, false);
1692
1693 /*
1694 Quoted from http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3237.html:
1695 ------
1696 if the next three characters are <:: and the
1697 subsequent character is neither : nor >, the < is
1698 treated as a preprocessor token by itself (and not as
1699 the first character of the alternative token */
1700 int next[3];
1701 next[0] = cppGetcFromUngetBufferOrFile ();
1702 switch (next[0])
1703 {
1704 case ':':
1705 next[1] = cppGetcFromUngetBufferOrFile ();
1706 if (next[1] == ':')
1707 {
1708 next[2] = cppGetcFromUngetBufferOrFile ();
1709 if (! (next[2] == ':' || next[2] == '>'))
1710 {
1711 cppUngetc (next[2]);
1712 cppUngetc (next[1]);
1713 cppUngetc (next[0]);
1714 c = '<';
1715 }
1716 else
1717 {
1718 cppUngetc (next[2]);
1719 cppUngetc (next[1]);
1720 c = '[';
1721 }
1722 }
1723 else
1724 {
1725 cppUngetc (next[1]);
1726 c = '[';
1727 }
1728 break;
1729 case '%': c = '{'; break;
1730 default: cppUngetc (next[0]);
1731 }
1732
1733 if (macrodef)
1734 vStringPut (macrodef, c);
1735
1736 goto enter;
1737 }
1738 case ':':
1739 {
1740 condition = conditionMayFlush(condition, false);
1741
1742 int next = cppGetcFromUngetBufferOrFile ();
1743 if (next == '>')
1744 c = ']';
1745 else
1746 cppUngetc (next);
1747
1748 if (macrodef)
1749 vStringPut (macrodef, c);
1750
1751 goto enter;
1752 }
1753 case '%':
1754 {
1755 condition = conditionMayFlush(condition, false);
1756
1757 int next = cppGetcFromUngetBufferOrFile ();
1758 switch (next)
1759 {
1760 case '>': c = '}'; break;
1761 case ':': c = '#'; goto process;
1762 default: cppUngetc (next);
1763 }
1764
1765 if (macrodef)
1766 vStringPut (macrodef, c);
1767
1768 goto enter;
1769 }
1770
1771 default:
1772 if (c == '@' && Cpp.hasAtLiteralStrings)
1773 {
1774 condition = conditionMayFlush(condition, false);
1775
1776 int next = cppGetcFromUngetBufferOrFile ();
1777 if (next == DOUBLE_QUOTE)
1778 {
1779 Cpp.directive.accept = false;
1780 c = skipToEndOfString (true);
1781 if (macrodef)
1782 vStringCatS (macrodef, "@\"\"");
1783 break;
1784 }
1785 else
1786 {
1787 cppUngetc (next);
1788 if (macrodef)
1789 vStringPut (macrodef, '@');
1790 }
1791 }
1792 else if (c == 'R' && Cpp.hasCxxRawLiteralStrings)
1793 {
1794 conditionMayPut(condition, c);
1795
1796 /* OMG!11 HACK!!11 Get the previous character.
1797 *
1798 * We need to know whether the previous character was an identifier or not,
1799 * because "R" has to be on its own, not part of an identifier. This allows
1800 * for constructs like:
1801 *
1802 * #define FOUR "4"
1803 * const char *p = FOUR"5";
1804 *
1805 * which is not a raw literal, but a preprocessor concatenation.
1806 *
1807 * FIXME: handle
1808 *
1809 * const char *p = R\
1810 * "xxx(raw)xxx";
1811 *
1812 * which is perfectly valid (yet probably very unlikely). */
1813 int prev = getNthPrevCFromInputFile (1, '\0');
1814 int prev2 = getNthPrevCFromInputFile (2, '\0');
1815 int prev3 = getNthPrevCFromInputFile (3, '\0');
1816
1817 if (! cppIsident (prev) ||
1818 (! cppIsident (prev2) && (prev == 'L' || prev == 'u' || prev == 'U')) ||
1819 (! cppIsident (prev3) && (prev2 == 'u' && prev == '8')))
1820 {
1821 int next = cppGetcFromUngetBufferOrFile ();
1822 if (next != DOUBLE_QUOTE)
1823 {
1824 cppUngetc (next);
1825 if (macrodef)
1826 vStringPut (macrodef, 'R');
1827 }
1828 else
1829 {
1830 Cpp.directive.accept = false;
1831 c = skipToEndOfCxxRawLiteralString ();
1832
1833 /* We assume none may want to know the content of the
1834 * literal; just put "". */
1835 if (macrodef)
1836 vStringCatS (macrodef, "\"\"");
1837
1838 break;
1839 }
1840 }
1841 else
1842 {
1843 if (macrodef)
1844 vStringPut (macrodef, 'R');
1845 }
1846 }
1847 else if(isxdigit(c))
1848 {
1849 /* Check for digit separator. If we find it we just skip it */
1850 int next = cppGetcFromUngetBufferOrFile();
1851 if(next != SINGLE_QUOTE)
1852 cppUngetc(next);
1853 if (macrodef)
1854 vStringPut (macrodef, c);
1855 conditionMayPut(condition, c);
1856 }
1857 else
1858 {
1859 if (macrodef)
1860 vStringPut (macrodef, c);
1861 if (isalnum(c) || c == '_')
1862 conditionMayPut(condition, c);
1863 else
1864 condition = conditionMayFlush(condition, false);
1865 }
1866 enter:
1867 Cpp.directive.accept = false;
1868 if (directive)
1869 {
1870 bool inspect_conidtion = false;
1871 ignore = handleDirective (c, ¯oCorkIndex, &inspect_conidtion);
1872 if (Cpp.macrodefFieldIndex != FIELD_UNKNOWN
1873 && macroCorkIndex != CORK_NIL
1874 && macrodef == NULL)
1875 macrodef = vStringNew ();
1876 if (condition == NULL
1877 && inspect_conidtion)
1878 {
1879 condition = vStringNew ();
1880 if (isalpha(c) || c == '_')
1881 conditionMayPut(condition, c);
1882 }
1883 }
1884 break;
1885 }
1886 } while (directive || ignore);
1887
1888 if (macrodef)
1889 vStringDelete (macrodef);
1890
1891 if (condition)
1892 vStringDelete (condition);
1893
1894 DebugStatement ( debugPutc (DEBUG_CPP, c); )
1895 DebugStatement ( if (c == NEWLINE)
1896 debugPrintf (DEBUG_CPP, "%6ld: ", getInputLineNumber () + 1); )
1897
1898 return c;
1899 }
1900
findCppTags(void)1901 static void findCppTags (void)
1902 {
1903 cppInitCommon (Cpp.lang, 0, false, false, false,
1904 KIND_GHOST_INDEX, 0, 0,
1905 KIND_GHOST_INDEX,
1906 KIND_GHOST_INDEX, 0, 0,
1907 FIELD_UNKNOWN);
1908
1909 findRegexTagsMainloop (cppGetc);
1910
1911 cppTerminate ();
1912 }
1913
1914
1915 /*
1916 * Token ignore processing
1917 */
1918
1919 static hashTable * cmdlineMacroTable;
1920
1921
buildMacroInfoFromTagEntry(int corkIndex,tagEntryInfo * entry,void * data)1922 static bool buildMacroInfoFromTagEntry (int corkIndex,
1923 tagEntryInfo * entry,
1924 void * data)
1925 {
1926 cppMacroInfo **info = data;
1927
1928 if ((entry->langType == Cpp.clientLang || entry->langType == Cpp.lang)
1929 && entry->kindIndex == Cpp.defineMacroKindIndex
1930 && isRoleAssigned (entry, ROLE_DEFINITION_INDEX))
1931 {
1932 vString *macrodef = vStringNewInit (entry->name);
1933 if (entry->extensionFields.signature)
1934 vStringCatS (macrodef, entry->extensionFields.signature);
1935 vStringPut (macrodef, '=');
1936
1937 const char *val = getParserFieldValueForType (entry, Cpp.macrodefFieldIndex);
1938 if (val)
1939 vStringCatS (macrodef, val);
1940
1941 *info = saveMacro (Cpp.fileMacroTable, vStringValue (macrodef));
1942 vStringDelete (macrodef);
1943
1944 return false;
1945 }
1946 return true;
1947 }
1948
cppFindMacroFromSymtab(const char * const name)1949 extern cppMacroInfo * cppFindMacroFromSymtab (const char *const name)
1950 {
1951 cppMacroInfo *info = NULL;
1952 foreachEntriesInScope (CORK_NIL, name, buildMacroInfoFromTagEntry, &info);
1953
1954 return info;
1955 }
1956
1957 /* Determines whether or not "name" should be ignored, per the ignore list.
1958 */
cppFindMacro(const char * const name)1959 extern cppMacroInfo * cppFindMacro (const char *const name)
1960 {
1961 cppMacroInfo *info;
1962
1963 if (cmdlineMacroTable)
1964 {
1965 info = (cppMacroInfo *)hashTableGetItem (cmdlineMacroTable,(char *)name);
1966 if (info)
1967 return info;
1968 }
1969
1970 if (Cpp.fileMacroTable)
1971 {
1972 info = (cppMacroInfo *)hashTableGetItem (Cpp.fileMacroTable,(char *)name);
1973 if (info)
1974 return info;
1975
1976 info = cppFindMacroFromSymtab(name);
1977 if (info)
1978 return info;
1979 }
1980 return NULL;
1981 }
1982
cppBuildMacroReplacement(const cppMacroInfo * macro,const char ** parameters,int parameterCount)1983 extern vString * cppBuildMacroReplacement(
1984 const cppMacroInfo * macro,
1985 const char ** parameters, /* may be NULL */
1986 int parameterCount
1987 )
1988 {
1989 if(!macro)
1990 return NULL;
1991
1992 if(!macro->replacements)
1993 return NULL;
1994
1995 vString * ret = vStringNew();
1996
1997 cppMacroReplacementPartInfo * r = macro->replacements;
1998
1999 while(r)
2000 {
2001 if(r->parameterIndex < 0)
2002 {
2003 if(r->constant)
2004 vStringCat(ret,r->constant);
2005 } else {
2006 if(parameters && (r->parameterIndex < parameterCount))
2007 {
2008 if(r->flags & CPP_MACRO_REPLACEMENT_FLAG_STRINGIFY)
2009 vStringPut(ret,'"');
2010
2011 vStringCatS(ret,parameters[r->parameterIndex]);
2012 if(r->flags & CPP_MACRO_REPLACEMENT_FLAG_VARARGS)
2013 {
2014 int idx = r->parameterIndex + 1;
2015 while(idx < parameterCount)
2016 {
2017 vStringPut(ret,',');
2018 vStringCatS(ret,parameters[idx]);
2019 idx++;
2020 }
2021 }
2022
2023 if(r->flags & CPP_MACRO_REPLACEMENT_FLAG_STRINGIFY)
2024 vStringPut(ret,'"');
2025 }
2026 }
2027
2028 r = r->next;
2029 }
2030
2031 return ret;
2032 }
2033
2034 // We stop applying macro replacements if the unget buffer gets too big
2035 // as it is a sign of recursive macro expansion
2036 #define CPP_PARSER_MAXIMUM_UNGET_BUFFER_SIZE_FOR_MACRO_REPLACEMENTS 65536
2037
cppBuildMacroReplacementWithPtrArrayAndUngetResult(cppMacroInfo * macro,const ptrArray * args)2038 extern void cppBuildMacroReplacementWithPtrArrayAndUngetResult(
2039 cppMacroInfo * macro,
2040 const ptrArray * args)
2041 {
2042 vString * replacement = NULL;
2043
2044 // Detect other cases of nasty macro expansion that cause
2045 // the unget buffer to grow fast (but the token chain to grow slowly)
2046 // -D'p=a' -D'a=p+p'
2047 if ((cppUngetBufferSize() < CPP_PARSER_MAXIMUM_UNGET_BUFFER_SIZE_FOR_MACRO_REPLACEMENTS)
2048 && macro->replacements)
2049 {
2050 int argc = 0;
2051 const char ** argv = NULL;
2052
2053 if (args)
2054 {
2055 argc = ptrArrayCount (args);
2056 argv = (const char **)eMalloc (sizeof(char *) * argc);
2057 for (int i = 0; i < argc; i++)
2058 {
2059 TRACE_PRINT("Arg[%d] for %s<%p>: %s",
2060 i, macro->name, macro, ptrArrayItem (args, i));
2061 argv[i] = ptrArrayItem (args, i);
2062 }
2063 }
2064
2065 replacement = cppBuildMacroReplacement(macro, argv, argc);
2066
2067 if (argv)
2068 eFree ((void *)argv);
2069 }
2070
2071 if (replacement)
2072 {
2073 cppUngetStringBuiltByMacro(vStringValue(replacement), vStringLength(replacement),
2074 macro);
2075 TRACE_PRINT("Replacement for %s<%p>: %s", macro->name, macro, vStringValue (replacement));
2076 vStringDelete (replacement);
2077 }
2078 else
2079 TRACE_PRINT("Replacement for %s<%p>: ", macro->name, macro);
2080
2081 }
2082
saveIgnoreToken(const char * ignoreToken)2083 static void saveIgnoreToken(const char * ignoreToken)
2084 {
2085 if(!ignoreToken)
2086 return;
2087
2088 Assert (cmdlineMacroTable);
2089
2090 const char * c = ignoreToken;
2091 char cc = *c;
2092
2093 const char * tokenBegin = c;
2094 const char * tokenEnd = NULL;
2095 const char * replacement = NULL;
2096 bool ignoreFollowingParenthesis = false;
2097
2098 while(cc)
2099 {
2100 if(cc == '=')
2101 {
2102 if(!tokenEnd)
2103 tokenEnd = c;
2104 c++;
2105 if(*c)
2106 replacement = c;
2107 break;
2108 }
2109
2110 if(cc == '+')
2111 {
2112 if(!tokenEnd)
2113 tokenEnd = c;
2114 ignoreFollowingParenthesis = true;
2115 }
2116
2117 c++;
2118 cc = *c;
2119 }
2120
2121 if(!tokenEnd)
2122 tokenEnd = c;
2123
2124 if(tokenEnd <= tokenBegin)
2125 return;
2126
2127 cppMacroInfo * info = (cppMacroInfo *)eMalloc(sizeof(cppMacroInfo));
2128
2129 info->hasParameterList = ignoreFollowingParenthesis;
2130 if(replacement)
2131 {
2132 cppMacroReplacementPartInfo * rep = \
2133 (cppMacroReplacementPartInfo *)eMalloc(sizeof(cppMacroReplacementPartInfo));
2134 rep->parameterIndex = -1;
2135 rep->flags = 0;
2136 rep->constant = vStringNewInit(replacement);
2137 rep->next = NULL;
2138 info->replacements = rep;
2139 } else {
2140 info->replacements = NULL;
2141 }
2142 info->useCount = 0;
2143 info->next = NULL;
2144 info->name = eStrndup(tokenBegin,tokenEnd - tokenBegin);
2145 hashTablePutItem(cmdlineMacroTable,info->name,info);
2146
2147 verbose (" ignore token: %s\n", ignoreToken);
2148 }
2149
saveMacro(hashTable * table,const char * macro)2150 static cppMacroInfo * saveMacro(hashTable *table, const char * macro)
2151 {
2152 CXX_DEBUG_ENTER_TEXT("Save macro %s",macro);
2153
2154 if(!macro)
2155 return NULL;
2156
2157 Assert (table);
2158
2159 const char * c = macro;
2160
2161 // skip initial spaces
2162 while(*c && isspacetab(*c))
2163 c++;
2164
2165 if(!*c)
2166 {
2167 CXX_DEBUG_LEAVE_TEXT("Bad empty macro definition");
2168 return NULL;
2169 }
2170
2171 if(!(isalpha(*c) || (*c == '_' || (*c == '$') )))
2172 {
2173 CXX_DEBUG_LEAVE_TEXT("Macro does not start with an alphanumeric character");
2174 return NULL; // must be a sequence of letters and digits
2175 }
2176
2177 const char * identifierBegin = c;
2178
2179 while(*c && (isalnum(*c) || (*c == '_') || (*c == '$') ))
2180 c++;
2181
2182 const char * identifierEnd = c;
2183
2184 CXX_DEBUG_PRINT("Macro identifier '%.*s'",identifierEnd - identifierBegin,identifierBegin);
2185
2186 #define MAX_PARAMS 16
2187
2188 const char * paramBegin[MAX_PARAMS];
2189 const char * paramEnd[MAX_PARAMS];
2190
2191 int iParamCount = 0;
2192
2193 while(*c && isspacetab(*c))
2194 c++;
2195
2196 cppMacroInfo * info = (cppMacroInfo *)eMalloc(sizeof(cppMacroInfo));
2197 info->useCount = 0;
2198 info->next = NULL;
2199
2200 if(*c == '(')
2201 {
2202 // parameter list
2203 CXX_DEBUG_PRINT("Macro has a parameter list");
2204
2205 info->hasParameterList = true;
2206
2207 c++;
2208 while(*c)
2209 {
2210 while(*c && isspacetab(*c))
2211 c++;
2212
2213 if(*c && (*c != ',') && (*c != ')'))
2214 {
2215 paramBegin[iParamCount] = c;
2216 c++;
2217 while(*c && (*c != ',') && (*c != ')') && (!isspacetab(*c)))
2218 c++;
2219 paramEnd[iParamCount] = c;
2220
2221 CXX_DEBUG_PRINT(
2222 "Macro parameter %d '%.*s'",
2223 iParamCount,
2224 paramEnd[iParamCount] - paramBegin[iParamCount],
2225 paramBegin[iParamCount]
2226 );
2227
2228 iParamCount++;
2229 if(iParamCount >= MAX_PARAMS)
2230 break;
2231 }
2232
2233 while(*c && isspacetab(*c))
2234 c++;
2235
2236 if(*c == ')')
2237 break;
2238
2239 if(*c == ',')
2240 c++;
2241 }
2242
2243 while(*c && (*c != ')'))
2244 c++;
2245
2246 if(*c == ')')
2247 c++;
2248
2249 CXX_DEBUG_PRINT("Got %d parameters",iParamCount);
2250
2251 } else {
2252 info->hasParameterList = false;
2253 }
2254
2255 while(*c && isspacetab(*c))
2256 c++;
2257
2258 info->replacements = NULL;
2259
2260
2261 if(*c == '=')
2262 {
2263 CXX_DEBUG_PRINT("Macro has a replacement part");
2264
2265 // have replacement part
2266 c++;
2267
2268 cppMacroReplacementPartInfo * lastReplacement = NULL;
2269 int nextParameterReplacementFlags = 0;
2270
2271 #define ADD_REPLACEMENT_NEW_PART(part) \
2272 do { \
2273 if(lastReplacement) \
2274 lastReplacement->next = part; \
2275 else \
2276 info->replacements = part; \
2277 lastReplacement = part; \
2278 } while(0)
2279
2280 #define ADD_CONSTANT_REPLACEMENT_NEW_PART(start,len) \
2281 do { \
2282 cppMacroReplacementPartInfo * rep = \
2283 (cppMacroReplacementPartInfo *)eMalloc(sizeof(cppMacroReplacementPartInfo)); \
2284 rep->parameterIndex = -1; \
2285 rep->flags = 0; \
2286 rep->constant = vStringNew(); \
2287 vStringNCatS(rep->constant,start,len); \
2288 rep->next = NULL; \
2289 CXX_DEBUG_PRINT("Constant replacement part: '%s'",vStringValue(rep->constant)); \
2290 ADD_REPLACEMENT_NEW_PART(rep); \
2291 } while(0)
2292
2293 #define ADD_CONSTANT_REPLACEMENT(start,len) \
2294 do { \
2295 if(lastReplacement && (lastReplacement->parameterIndex == -1)) \
2296 { \
2297 vStringNCatS(lastReplacement->constant,start,len); \
2298 CXX_DEBUG_PRINT( \
2299 "Constant replacement part changed: '%s'", \
2300 vStringValue(lastReplacement->constant) \
2301 ); \
2302 } else { \
2303 ADD_CONSTANT_REPLACEMENT_NEW_PART(start,len); \
2304 } \
2305 } while(0)
2306
2307 // parse replacements
2308 const char * begin = c;
2309
2310 while(*c)
2311 {
2312 if(isalpha(*c) || (*c == '_'))
2313 {
2314 if(c > begin)
2315 ADD_CONSTANT_REPLACEMENT(begin,c - begin);
2316
2317 const char * tokenBegin = c;
2318
2319 while(*c && (isalnum(*c) || (*c == '_')))
2320 c++;
2321
2322 // check if it is a parameter
2323 int tokenLen = c - tokenBegin;
2324
2325 CXX_DEBUG_PRINT("Check token '%.*s'",tokenLen,tokenBegin);
2326
2327 bool bIsVarArg = (tokenLen == 11) && (strncmp(tokenBegin,"__VA_ARGS__",11) == 0);
2328
2329 int i = 0;
2330 for(;i<iParamCount;i++)
2331 {
2332 int paramLen = paramEnd[i] - paramBegin[i];
2333
2334 if(
2335 (
2336 bIsVarArg &&
2337 (paramLen == 3) &&
2338 (strncmp(paramBegin[i],"...",3) == 0)
2339 ) || (
2340 (!bIsVarArg) &&
2341 (paramLen == tokenLen) &&
2342 (strncmp(paramBegin[i],tokenBegin,paramLen) == 0)
2343 )
2344 )
2345 {
2346 // parameter!
2347 cppMacroReplacementPartInfo * rep = \
2348 (cppMacroReplacementPartInfo *)eMalloc(sizeof(cppMacroReplacementPartInfo));
2349 rep->parameterIndex = i;
2350 rep->flags = nextParameterReplacementFlags |
2351 (bIsVarArg ? CPP_MACRO_REPLACEMENT_FLAG_VARARGS : 0);
2352 rep->constant = NULL;
2353 rep->next = NULL;
2354
2355 nextParameterReplacementFlags = 0;
2356
2357 CXX_DEBUG_PRINT("Parameter replacement part: %d (vararg %d)",i,bIsVarArg);
2358
2359 ADD_REPLACEMENT_NEW_PART(rep);
2360 break;
2361 }
2362 }
2363
2364 if(i >= iParamCount)
2365 {
2366 // no parameter found
2367 ADD_CONSTANT_REPLACEMENT(tokenBegin,tokenLen);
2368 }
2369
2370 begin = c;
2371 continue;
2372 }
2373
2374 if((*c == '"') || (*c == '\''))
2375 {
2376 // skip string/char constant
2377 char term = *c;
2378 c++;
2379 while(*c)
2380 {
2381 if(*c == '\\')
2382 {
2383 c++;
2384 if(*c)
2385 c++;
2386 } else if(*c == term)
2387 {
2388 c++;
2389 break;
2390 }
2391 c++;
2392 }
2393 continue;
2394 }
2395
2396 if(*c == '#')
2397 {
2398 // check for token paste/stringification
2399 if(c > begin)
2400 ADD_CONSTANT_REPLACEMENT(begin,c - begin);
2401
2402 c++;
2403 if(*c == '#')
2404 {
2405 // token paste
2406 CXX_DEBUG_PRINT("Found token paste operator");
2407 while(*c == '#')
2408 c++;
2409
2410 // we just skip this part and the following spaces
2411 while(*c && isspacetab(*c))
2412 c++;
2413
2414 if(lastReplacement && (lastReplacement->parameterIndex == -1))
2415 {
2416 // trim spaces from the last replacement constant!
2417 vStringStripTrailing(lastReplacement->constant);
2418 CXX_DEBUG_PRINT(
2419 "Last replacement truncated to '%s'",
2420 vStringValue(lastReplacement->constant)
2421 );
2422 }
2423 } else {
2424 // stringification
2425 CXX_DEBUG_PRINT("Found stringification operator");
2426 nextParameterReplacementFlags |= CPP_MACRO_REPLACEMENT_FLAG_STRINGIFY;
2427 }
2428
2429 begin = c;
2430 continue;
2431 }
2432
2433 c++;
2434 }
2435
2436 if(c > begin)
2437 ADD_CONSTANT_REPLACEMENT(begin,c - begin);
2438 }
2439
2440 info->name = eStrndup(identifierBegin,identifierEnd - identifierBegin);
2441 hashTablePutItem(table,info->name,info);
2442 CXX_DEBUG_LEAVE();
2443
2444 return info;
2445 }
2446
freeMacroInfo(cppMacroInfo * info)2447 static void freeMacroInfo(cppMacroInfo * info)
2448 {
2449 if(!info)
2450 return;
2451 cppMacroReplacementPartInfo * pPart = info->replacements;
2452 while(pPart)
2453 {
2454 if(pPart->constant)
2455 vStringDelete(pPart->constant);
2456 cppMacroReplacementPartInfo * pPartToDelete = pPart;
2457 pPart = pPart->next;
2458 eFree(pPartToDelete);
2459 }
2460 eFree(info->name);
2461 eFree(info);
2462 }
2463
makeMacroTable(void)2464 static hashTable *makeMacroTable (void)
2465 {
2466 return hashTableNew(
2467 1024,
2468 hashCstrhash,
2469 hashCstreq,
2470 NULL, /* Keys refers values' name fields. */
2471 (void (*)(void *))freeMacroInfo
2472 );
2473 }
2474
initializeCpp(const langType language)2475 static void initializeCpp (const langType language)
2476 {
2477 Cpp.lang = language;
2478 }
2479
finalizeCpp(const langType language,bool initialized)2480 static void finalizeCpp (const langType language, bool initialized)
2481 {
2482 if (cmdlineMacroTable)
2483 {
2484 hashTableDelete (cmdlineMacroTable);
2485 cmdlineMacroTable = NULL;
2486 }
2487 }
2488
CpreProExpandMacrosInInput(const langType language CTAGS_ATTR_UNUSED,const char * name,const char * arg)2489 static void CpreProExpandMacrosInInput (const langType language CTAGS_ATTR_UNUSED, const char *name, const char *arg)
2490 {
2491 doesExpandMacros = paramParserBool (arg, doesExpandMacros,
2492 name, "parameter");
2493 }
2494
CpreProInstallIgnoreToken(const langType language CTAGS_ATTR_UNUSED,const char * optname CTAGS_ATTR_UNUSED,const char * arg)2495 static void CpreProInstallIgnoreToken (const langType language CTAGS_ATTR_UNUSED, const char *optname CTAGS_ATTR_UNUSED, const char *arg)
2496 {
2497 if (arg == NULL || arg[0] == '\0')
2498 {
2499 if (cmdlineMacroTable)
2500 {
2501 hashTableDelete(cmdlineMacroTable);
2502 cmdlineMacroTable = NULL;
2503 }
2504 verbose (" clearing list\n");
2505 } else {
2506 if (!cmdlineMacroTable)
2507 cmdlineMacroTable = makeMacroTable ();
2508 saveIgnoreToken(arg);
2509 }
2510 }
2511
CpreProInstallMacroToken(const langType language CTAGS_ATTR_UNUSED,const char * optname CTAGS_ATTR_UNUSED,const char * arg)2512 static void CpreProInstallMacroToken (const langType language CTAGS_ATTR_UNUSED, const char *optname CTAGS_ATTR_UNUSED, const char *arg)
2513 {
2514 if (arg == NULL || arg[0] == '\0')
2515 {
2516 if (cmdlineMacroTable)
2517 {
2518 hashTableDelete(cmdlineMacroTable);
2519 cmdlineMacroTable = NULL;
2520 }
2521 verbose (" clearing list\n");
2522 } else {
2523 if (!cmdlineMacroTable)
2524 cmdlineMacroTable = makeMacroTable ();
2525 saveMacro(cmdlineMacroTable, arg);
2526 }
2527 }
2528
CpreProSetIf0(const langType language CTAGS_ATTR_UNUSED,const char * name,const char * arg)2529 static void CpreProSetIf0 (const langType language CTAGS_ATTR_UNUSED, const char *name, const char *arg)
2530 {
2531 doesExaminCodeWithInIf0Branch = paramParserBool (arg, doesExaminCodeWithInIf0Branch,
2532 name, "parameter");
2533 }
2534
2535 static parameterHandlerTable CpreProParameterHandlerTable [] = {
2536 { .name = "if0",
2537 .desc = "examine code within \"#if 0\" branch (true or [false])",
2538 .handleParameter = CpreProSetIf0,
2539 },
2540 { .name = "ignore",
2541 .desc = "a token to be specially handled",
2542 .handleParameter = CpreProInstallIgnoreToken,
2543 },
2544 { .name = "define",
2545 .desc = "define replacement for an identifier (name(params,...)=definition)",
2546 .handleParameter = CpreProInstallMacroToken,
2547 },
2548 { .name = "_expand",
2549 .desc = "expand macros if their definitions are in the current C/C++/CUDA input file (true or [false])",
2550 .handleParameter = CpreProExpandMacrosInInput,
2551 }
2552 };
2553
CPreProParser(void)2554 extern parserDefinition* CPreProParser (void)
2555 {
2556 parserDefinition* const def = parserNew ("CPreProcessor");
2557 def->kindTable = CPreProKinds;
2558 def->kindCount = ARRAY_SIZE (CPreProKinds);
2559 def->initialize = initializeCpp;
2560 def->parser = findCppTags;
2561 def->finalize = finalizeCpp;
2562
2563 def->fieldTable = CPreProFields;
2564 def->fieldCount = ARRAY_SIZE (CPreProFields);
2565
2566 def->parameterHandlerTable = CpreProParameterHandlerTable;
2567 def->parameterHandlerCount = ARRAY_SIZE(CpreProParameterHandlerTable);
2568
2569 def->useCork = CORK_QUEUE | CORK_SYMTAB;
2570 return def;
2571 }
2572