xref: /Universal-ctags/parsers/objc.c (revision 3671ad7255885a0c8f6ff4979d80c70f201ea411)
1 
2 /*
3 *   Copyright (c) 2010, Vincent Berthoux
4 *
5 *   This source code is released for free distribution under the terms of the
6 *   GNU General Public License version 2 or (at your option) any later version.
7 *
8 *   This module contains functions for generating tags for Objective C
9 *   language files.
10 */
11 /*
12 *   INCLUDE FILES
13 */
14 #include "general.h"	/* must always come first */
15 
16 #include <string.h>
17 
18 #include "keyword.h"
19 #include "debug.h"
20 #include "entry.h"
21 #include "parse.h"
22 #include "read.h"
23 #include "routines.h"
24 #include "selectors.h"
25 #include "trashbox.h"
26 #include "vstring.h"
27 
28 typedef enum {
29 	K_INTERFACE,
30 	K_IMPLEMENTATION,
31 	K_PROTOCOL,
32 	K_METHOD,
33 	K_CLASSMETHOD,
34 	K_VAR,
35 	K_FIELD,
36 	K_FUNCTION,
37 	K_PROPERTY,
38 	K_TYPEDEF,
39 	K_STRUCT,
40 	K_ENUM,
41 	K_MACRO,
42 	K_CATEGORY,
43 } objcKind;
44 
45 static kindDefinition ObjcKinds[] = {
46 	{true, 'i', "interface", "class interface"},
47 	{true, 'I', "implementation", "class implementation"},
48 	{true, 'P', "protocol", "Protocol"},
49 	{true, 'm', "method", "Object's method"},
50 	{true, 'c', "class", "Class' method"},
51 	{true, 'v', "var", "Global variable"},
52 	{true, 'E', "field", "Object field"},
53 	{true, 'f', "function", "A function"},
54 	{true, 'p', "property", "A property"},
55 	{true, 't', "typedef", "A type alias"},
56 	{true, 's', "struct", "A type structure"},
57 	{true, 'e', "enum", "An enumeration"},
58 	{true, 'M', "macro", "A preprocessor macro"},
59 	{true, 'C', "category", "categories"},
60 };
61 
62 typedef enum {
63 	ObjcTYPEDEF,
64 	ObjcSTRUCT,
65 	ObjcENUM,
66 	ObjcIMPLEMENTATION,
67 	ObjcINTERFACE,
68 	ObjcPROTOCOL,
69 	ObjcENCODE,
70 	ObjcEXTERN,
71 	ObjcSYNCHRONIZED,
72 	ObjcSELECTOR,
73 	ObjcPROPERTY,
74 	ObjcEND,
75 	ObjcDEFS,
76 	ObjcCLASS,
77 	ObjcPRIVATE,
78 	ObjcPACKAGE,
79 	ObjcPUBLIC,
80 	ObjcPROTECTED,
81 	ObjcSYNTHESIZE,
82 	ObjcDYNAMIC,
83 	ObjcOPTIONAL,
84 	ObjcREQUIRED,
85 	ObjcSTRING,
86 	ObjcIDENTIFIER,
87 
88 	Tok_COMA,	/* ',' */
89 	Tok_PLUS,	/* '+' */
90 	Tok_MINUS,	/* '-' */
91 	Tok_PARL,	/* '(' */
92 	Tok_PARR,	/* ')' */
93 	Tok_CurlL,	/* '{' */
94 	Tok_CurlR,	/* '}' */
95 	Tok_SQUAREL,	/* '[' */
96 	Tok_SQUARER,	/* ']' */
97 	Tok_semi,	/* ';' */
98 	Tok_dpoint,	/* ':' */
99 	Tok_Sharp,	/* '#' */
100 	Tok_Backslash,	/* '\\' */
101 	Tok_Asterisk,	/* '*' */
102 	Tok_ANGLEL,		/* '<' */
103 	Tok_ANGLER,		/* '>' */
104 	Tok_EOL,	/* '\r''\n' */
105 	Tok_CSTRING,	/* "..." */
106 	Tok_any,
107 
108 	Tok_EOF	/* END of file */
109 } objcKeyword;
110 
111 typedef objcKeyword objcToken;
112 
113 static const keywordTable objcKeywordTable[] = {
114 	{"typedef", ObjcTYPEDEF},
115 	{"struct", ObjcSTRUCT},
116 	{"enum", ObjcENUM},
117 	{"extern", ObjcEXTERN},
118 	{"@implementation", ObjcIMPLEMENTATION},
119 	{"@interface", ObjcINTERFACE},
120 	{"@protocol", ObjcPROTOCOL},
121 	{"@encode", ObjcENCODE},
122 	{"@property", ObjcPROPERTY},
123 	{"@synchronized", ObjcSYNCHRONIZED},
124 	{"@selector", ObjcSELECTOR},
125 	{"@end", ObjcEND},
126 	{"@defs", ObjcDEFS},
127 	{"@class", ObjcCLASS},
128 	{"@private", ObjcPRIVATE},
129 	{"@package", ObjcPACKAGE},
130 	{"@public", ObjcPUBLIC},
131 	{"@protected", ObjcPROTECTED},
132 	{"@synthesize", ObjcSYNTHESIZE},
133 	{"@dynamic", ObjcDYNAMIC},
134 	{"@optional", ObjcOPTIONAL},
135 	{"@required", ObjcREQUIRED},
136 };
137 
138 typedef enum {
139 	F_CATEGORY,
140 	F_PROTOCOLS,
141 } objcField;
142 
143 static fieldDefinition ObjcFields [] = {
144 	{
145 		.name = "category",
146 		.description = "category attached to the class",
147 		.enabled = true,
148 	},
149 	{
150 		.name = "protocols",
151 		.description = "protocols that the class (or category) confirms to",
152 		.enabled = true,
153 	},
154 };
155 
156 static langType Lang_ObjectiveC;
157 
158 /*//////////////////////////////////////////////////////////////////
159 //// lexingInit             */
160 typedef struct _lexingState {
161 	vString *name;	/* current parsed identifier/operator */
162 	const unsigned char *cp;	/* position in stream */
163 } lexingState;
164 
165 /*//////////////////////////////////////////////////////////////////////
166 //// Lexing                                     */
isNum(char c)167 static bool isNum (char c)
168 {
169 	return c >= '0' && c <= '9';
170 }
171 
isLowerAlpha(char c)172 static bool isLowerAlpha (char c)
173 {
174 	return c >= 'a' && c <= 'z';
175 }
176 
isUpperAlpha(char c)177 static bool isUpperAlpha (char c)
178 {
179 	return c >= 'A' && c <= 'Z';
180 }
181 
isAlpha(char c)182 static bool isAlpha (char c)
183 {
184 	return isLowerAlpha (c) || isUpperAlpha (c);
185 }
186 
isIdent(char c)187 static bool isIdent (char c)
188 {
189 	return isNum (c) || isAlpha (c) || c == '_';
190 }
191 
isSpace(char c)192 static bool isSpace (char c)
193 {
194 	return c == ' ' || c == '\t';
195 }
196 
197 /* return true if it end with an end of line */
eatWhiteSpace(lexingState * st)198 static void eatWhiteSpace (lexingState * st)
199 {
200 	const unsigned char *cp = st->cp;
201 	while (isSpace (*cp))
202 		cp++;
203 
204 	st->cp = cp;
205 }
206 
readCString(lexingState * st)207 static void readCString (lexingState * st)
208 {
209 	bool lastIsBackSlash = false;
210 	bool unfinished = true;
211 	const unsigned char *c = st->cp + 1;
212 
213 	vStringClear (st->name);
214 
215 	while (unfinished)
216 	{
217 		/* end of line should never happen.
218 		 * we tolerate it */
219 		if (c == NULL || c[0] == '\0')
220 			break;
221 		else if (*c == '"' && !lastIsBackSlash)
222 			unfinished = false;
223 		else
224 		{
225 			lastIsBackSlash = *c == '\\';
226 			vStringPut (st->name, (int) *c);
227 		}
228 
229 		c++;
230 	}
231 
232 	st->cp = c;
233 }
234 
eatComment(lexingState * st)235 static void eatComment (lexingState * st)
236 {
237 	bool unfinished = true;
238 	bool lastIsStar = false;
239 	const unsigned char *c = st->cp + 2;
240 
241 	while (unfinished)
242 	{
243 		/* we've reached the end of the line..
244 		 * so we have to reload a line... */
245 		if (c == NULL || *c == '\0')
246 		{
247 			st->cp = readLineFromInputFile ();
248 			/* WOOPS... no more input...
249 			 * we return, next lexing read
250 			 * will be null and ok */
251 			if (st->cp == NULL)
252 				return;
253 			c = st->cp;
254 		}
255 		/* we've reached the end of the comment */
256 		else if (*c == '/' && lastIsStar)
257 			unfinished = false;
258 		else
259 		{
260 			lastIsStar = '*' == *c;
261 			c++;
262 		}
263 	}
264 
265 	st->cp = c;
266 }
267 
readIdentifier(lexingState * st)268 static void readIdentifier (lexingState * st)
269 {
270 	const unsigned char *p;
271 	vStringClear (st->name);
272 
273 	/* first char is a simple letter */
274 	if (isAlpha (*st->cp) || *st->cp == '_')
275 		vStringPut (st->name, (int) *st->cp);
276 
277 	/* Go till you get identifier chars */
278 	for (p = st->cp + 1; isIdent (*p); p++)
279 		vStringPut (st->name, (int) *p);
280 
281 	st->cp = p;
282 }
283 
284 /* read the @something directives */
readIdentifierObjcDirective(lexingState * st)285 static void readIdentifierObjcDirective (lexingState * st)
286 {
287 	const unsigned char *p;
288 	vStringClear (st->name);
289 
290 	/* first char is a simple letter */
291 	if (*st->cp == '@')
292 		vStringPut (st->name, (int) *st->cp);
293 
294 	/* Go till you get identifier chars */
295 	for (p = st->cp + 1; isIdent (*p); p++)
296 		vStringPut (st->name, (int) *p);
297 
298 	st->cp = p;
299 }
300 
301 /* The lexer is in charge of reading the file.
302  * Some of sub-lexer (like eatComment) also read file.
303  * lexing is finished when the lexer return Tok_EOF */
lex(lexingState * st)304 static objcKeyword lex (lexingState * st)
305 {
306 	int retType;
307 
308 	/* handling data input here */
309 	while (st->cp == NULL || st->cp[0] == '\0')
310 	{
311 		st->cp = readLineFromInputFile ();
312 		if (st->cp == NULL)
313 			return Tok_EOF;
314 
315 		return Tok_EOL;
316 	}
317 
318 	if (isAlpha (*st->cp) || (*st->cp == '_'))
319 	{
320 		readIdentifier (st);
321 		retType = lookupKeyword (vStringValue (st->name), Lang_ObjectiveC);
322 
323 		if (retType == -1)	/* If it's not a keyword */
324 		{
325 			return ObjcIDENTIFIER;
326 		}
327 		else
328 		{
329 			return retType;
330 		}
331 	}
332 	else if (*st->cp == '@')
333 	{
334 		readIdentifierObjcDirective (st);
335 		retType = lookupKeyword (vStringValue (st->name), Lang_ObjectiveC);
336 
337 		if (retType == -1)	/* If it's not a keyword */
338 		{
339 			return Tok_any;
340 		}
341 		else
342 		{
343 			return retType;
344 		}
345 	}
346 	else if (isSpace (*st->cp))
347 	{
348 		eatWhiteSpace (st);
349 		return lex (st);
350 	}
351 	else
352 		switch (*st->cp)
353 		{
354 		case '(':
355 			st->cp++;
356 			return Tok_PARL;
357 
358 		case '\\':
359 			st->cp++;
360 			return Tok_Backslash;
361 
362 		case '#':
363 			st->cp++;
364 			return Tok_Sharp;
365 
366 		case '/':
367 			if (st->cp[1] == '*')	/* ergl, a comment */
368 			{
369 				eatComment (st);
370 				return lex (st);
371 			}
372 			else if (st->cp[1] == '/')
373 			{
374 				st->cp = NULL;
375 				return lex (st);
376 			}
377 			else
378 			{
379 				st->cp++;
380 				return Tok_any;
381 			}
382 			break;
383 
384 		case ')':
385 			st->cp++;
386 			return Tok_PARR;
387 		case '{':
388 			st->cp++;
389 			return Tok_CurlL;
390 		case '}':
391 			st->cp++;
392 			return Tok_CurlR;
393 		case '[':
394 			st->cp++;
395 			return Tok_SQUAREL;
396 		case ']':
397 			st->cp++;
398 			return Tok_SQUARER;
399 		case ',':
400 			st->cp++;
401 			return Tok_COMA;
402 		case ';':
403 			st->cp++;
404 			return Tok_semi;
405 		case ':':
406 			st->cp++;
407 			return Tok_dpoint;
408 		case '"':
409 			readCString (st);
410 			return Tok_CSTRING;
411 		case '+':
412 			st->cp++;
413 			return Tok_PLUS;
414 		case '-':
415 			st->cp++;
416 			return Tok_MINUS;
417 		case '*':
418 			st->cp++;
419 			return Tok_Asterisk;
420 		case '<':
421 			st->cp++;
422 			return Tok_ANGLEL;
423 		case '>':
424 			st->cp++;
425 			return Tok_ANGLER;
426 
427 		default:
428 			st->cp++;
429 			break;
430 		}
431 
432 	/* default return if nothing is recognized,
433 	 * shouldn't happen, but at least, it will
434 	 * be handled without destroying the parsing. */
435 	return Tok_any;
436 }
437 
438 /*//////////////////////////////////////////////////////////////////////
439 //// Parsing                                    */
440 typedef void (*parseNext) (vString * const ident, objcToken what);
441 
442 /********** Helpers */
443 /* This variable hold the 'parser' which is going to
444  * handle the next token */
445 static parseNext toDoNext;
446 
447 /* Special variable used by parser eater to
448  * determine which action to put after their
449  * job is finished. */
450 static parseNext comeAfter;
451 
452 /* Used by some parsers detecting certain token
453  * to revert to previous parser. */
454 static parseNext fallback;
455 
456 
457 /********** Grammar */
458 static void globalScope (vString * const ident, objcToken what);
459 static void parseMethods (vString * const ident, objcToken what);
460 static void parseImplemMethods (vString * const ident, objcToken what);
461 static vString *tempName = NULL;
462 static vString *parentName = NULL;
463 static objcKind parentType = K_INTERFACE;
464 static int parentCorkIndex = CORK_NIL;
465 static int categoryCorkIndex = CORK_NIL;
466 
467 /* used to prepare tag for OCaml, just in case their is a need to
468  * add additional information to the tag. */
prepareTag(tagEntryInfo * tag,vString const * name,objcKind kind)469 static void prepareTag (tagEntryInfo * tag, vString const *name, objcKind kind)
470 {
471 	initTagEntry (tag, vStringValue (name), kind);
472 
473 	if (vStringLength (parentName) > 0)
474 	{
475 		tag->extensionFields.scopeKindIndex = parentType;
476 		tag->extensionFields.scopeName = vStringValue (parentName);
477 	}
478 }
479 
pushEnclosingContext(const vString * parent,objcKind type)480 static void pushEnclosingContext (const vString * parent, objcKind type)
481 {
482 	vStringCopy (parentName, parent);
483 	parentType = type;
484 }
485 
pushEnclosingContextFull(const vString * parent,objcKind type,int corkIndex)486 static void pushEnclosingContextFull (const vString * parent, objcKind type, int corkIndex)
487 {
488 	pushEnclosingContext (parent, type);
489 	parentCorkIndex = corkIndex;
490 }
491 
popEnclosingContext(void)492 static void popEnclosingContext (void)
493 {
494 	vStringClear (parentName);
495 	parentCorkIndex = CORK_NIL;
496 }
497 
pushCategoryContext(int category_index)498 static void pushCategoryContext (int category_index)
499 {
500 	categoryCorkIndex = category_index;
501 }
502 
popCategoryContext(void)503 static void popCategoryContext (void)
504 {
505 	categoryCorkIndex = CORK_NIL;
506 }
507 
508 /* Used to centralise tag creation, and be able to add
509  * more information to it in the future */
addTag(vString * const ident,int kind)510 static int addTag (vString * const ident, int kind)
511 {
512 	tagEntryInfo toCreate;
513 
514 	if (! ObjcKinds[kind].enabled)
515 		return CORK_NIL;
516 
517 	prepareTag (&toCreate, ident, kind);
518 	return makeTagEntry (&toCreate);
519 }
520 
521 static objcToken waitedToken, fallBackToken;
522 
523 /* Ignore everything till waitedToken and jump to comeAfter.
524  * If the "end" keyword is encountered break, doesn't remember
525  * why though. */
tillToken(vString * const ident CTAGS_ATTR_UNUSED,objcToken what)526 static void tillToken (vString * const ident CTAGS_ATTR_UNUSED, objcToken what)
527 {
528 	if (what == waitedToken)
529 		toDoNext = comeAfter;
530 }
531 
tillTokenOrFallBack(vString * const ident CTAGS_ATTR_UNUSED,objcToken what)532 static void tillTokenOrFallBack (vString * const ident CTAGS_ATTR_UNUSED, objcToken what)
533 {
534 	if (what == waitedToken)
535 		toDoNext = comeAfter;
536 	else if (what == fallBackToken)
537 	{
538 		toDoNext = fallback;
539 	}
540 }
541 
542 static int ignoreBalanced_count = 0;
ignoreBalanced(vString * const ident CTAGS_ATTR_UNUSED,objcToken what)543 static void ignoreBalanced (vString * const ident CTAGS_ATTR_UNUSED, objcToken what)
544 {
545 
546 	switch (what)
547 	{
548 	case Tok_PARL:
549 	case Tok_CurlL:
550 	case Tok_SQUAREL:
551 		ignoreBalanced_count++;
552 		break;
553 
554 	case Tok_PARR:
555 	case Tok_CurlR:
556 	case Tok_SQUARER:
557 		ignoreBalanced_count--;
558 		break;
559 
560 	default:
561 		/* don't care */
562 		break;
563 	}
564 
565 	if (ignoreBalanced_count == 0)
566 		toDoNext = comeAfter;
567 }
568 
parseFields(vString * const ident,objcToken what)569 static void parseFields (vString * const ident, objcToken what)
570 {
571 	switch (what)
572 	{
573 	case Tok_CurlR:
574 		toDoNext = &parseMethods;
575 		break;
576 
577 	case Tok_SQUAREL:
578 	case Tok_PARL:
579 		toDoNext = &ignoreBalanced;
580 		comeAfter = &parseFields;
581 		break;
582 
583 		/* we got an identifier, keep track of it */
584 	case ObjcIDENTIFIER:
585 		vStringCopy (tempName, ident);
586 		break;
587 
588 		/* our last kept identifier must be our variable name =) */
589 	case Tok_semi:
590 		addTag (tempName, K_FIELD);
591 		vStringClear (tempName);
592 		break;
593 
594 	default:
595 		/* NOTHING */
596 		break;
597 	}
598 }
599 
600 static objcKind methodKind;
601 
602 
603 static vString *fullMethodName;
604 static vString *prevIdent;
605 static vString *signature;
606 
tillTokenWithCapturingSignature(vString * const ident,objcToken what)607 static void tillTokenWithCapturingSignature (vString * const ident, objcToken what)
608 {
609 	tillToken (ident, what);
610 
611 	if (what != waitedToken)
612 	{
613 		if (what == Tok_Asterisk)
614 			vStringPut (signature, '*');
615 		else if (vStringLength (ident) > 0)
616 		{
617 			if (! (vStringLast (signature) == ','
618 				   || vStringLast (signature) == '('
619 				   || vStringLast (signature) == ' '))
620 				vStringPut (signature, ' ');
621 
622 			vStringCat (signature, ident);
623 		}
624 	}
625 }
626 
parseMethodsNameCommon(vString * const ident,objcToken what,parseNext reEnter,parseNext nextAction)627 static void parseMethodsNameCommon (vString * const ident, objcToken what,
628 									parseNext reEnter,
629 									parseNext nextAction)
630 {
631 	int index;
632 
633 	switch (what)
634 	{
635 	case Tok_PARL:
636 		toDoNext = &tillToken;
637 		comeAfter = reEnter;
638 		waitedToken = Tok_PARR;
639 
640 		if (! (vStringLength(prevIdent) == 0
641 			   && vStringLength(fullMethodName) == 0))
642 			toDoNext = &tillTokenWithCapturingSignature;
643 		break;
644 
645 	case Tok_dpoint:
646 		vStringCat (fullMethodName, prevIdent);
647 		vStringPut (fullMethodName, ':');
648 		vStringClear (prevIdent);
649 
650 		if (vStringLength (signature) > 1)
651 			vStringPut (signature, ',');
652 		break;
653 
654 	case ObjcIDENTIFIER:
655 		if ((vStringLength (prevIdent) > 0
656 			 /* "- initWithObject: o0 withAnotherObject: o1;"
657 				Overwriting the last value of prevIdent ("o0");
658 				a parameter name ("o0") was stored to prevIdent,
659 				and a part of selector("withAnotherObject")
660 				overwrites it.
661 				If type for the parameter specified explicitly,
662 				the last char of signature should not be ',' nor
663 				'('. In this case, "id" must be put as the type for
664 				the parameter. */
665 			 && (vStringLast (signature) == ','
666 				 || vStringLast (signature) == '('))
667 			|| (/* "- initWithObject: object;"
668 				   In this case no overwriting happens.
669 				   However, "id" for "object" is part
670 				   of signature. */
671 				vStringLength (prevIdent) == 0
672 				&& vStringLength (fullMethodName) > 0
673 				&& vStringLast (signature) == '('))
674 			vStringCatS (signature, "id");
675 
676 		vStringCopy (prevIdent, ident);
677 		break;
678 
679 	case Tok_CurlL:
680 	case Tok_semi:
681 		/* method name is not simple */
682 		if (vStringLength (fullMethodName) != '\0')
683 		{
684 			index = addTag (fullMethodName, methodKind);
685 			vStringClear (fullMethodName);
686 		}
687 		else
688 			index = addTag (prevIdent, methodKind);
689 
690 		toDoNext = nextAction;
691 		parseImplemMethods (ident, what);
692 		vStringClear (prevIdent);
693 
694 		tagEntryInfo *e = getEntryInCorkQueue (index);
695 		if (e)
696 		{
697 			if (vStringLast (signature) == ',')
698 				vStringCatS (signature, "id");
699 			vStringPut (signature, ')');
700 
701 			e->extensionFields.signature = vStringStrdup (signature);
702 
703 			vStringClear (signature);
704 			vStringPut (signature, '(');
705 
706 			tagEntryInfo *e_cat = getEntryInCorkQueue (categoryCorkIndex);
707 			if (e_cat)
708 				attachParserFieldToCorkEntry (index,
709 											  ObjcFields [F_CATEGORY].ftype,
710 											  e_cat->name);
711 		}
712 		break;
713 
714 	default:
715 		break;
716 	}
717 }
718 
parseMethodsName(vString * const ident,objcToken what)719 static void parseMethodsName (vString * const ident, objcToken what)
720 {
721 	parseMethodsNameCommon (ident, what, parseMethodsName, parseMethods);
722 }
723 
parseMethodsImplemName(vString * const ident,objcToken what)724 static void parseMethodsImplemName (vString * const ident, objcToken what)
725 {
726 	parseMethodsNameCommon (ident, what, parseMethodsImplemName, parseImplemMethods);
727 }
728 
parseCategory(vString * const ident,objcToken what)729 static void parseCategory (vString * const ident, objcToken what)
730 {
731 	if (what == ObjcIDENTIFIER)
732 	{
733 		tagEntryInfo *e = getEntryInCorkQueue (parentCorkIndex);
734 		if (e)
735 		{
736 			attachParserFieldToCorkEntry (parentCorkIndex,
737 										  ObjcFields [F_CATEGORY].ftype,
738 										  vStringValue (ident));
739 			if (e->kindIndex == K_INTERFACE)
740 				toDoNext = &parseMethods;
741 			else
742 				toDoNext = &parseImplemMethods;
743 		}
744 
745 		int index = addTag (ident, K_CATEGORY);
746 		pushCategoryContext (index);
747 	}
748 }
749 
parseImplemMethods(vString * const ident,objcToken what)750 static void parseImplemMethods (vString * const ident, objcToken what)
751 {
752 	switch (what)
753 	{
754 	case Tok_PLUS:	/* + */
755 		toDoNext = &parseMethodsImplemName;
756 		methodKind = K_CLASSMETHOD;
757 		break;
758 
759 	case Tok_MINUS:	/* - */
760 		toDoNext = &parseMethodsImplemName;
761 		methodKind = K_METHOD;
762 		break;
763 
764 	case ObjcEND:	/* @end */
765 		popEnclosingContext ();
766 		popCategoryContext ();
767 		toDoNext = &globalScope;
768 		break;
769 
770 	case Tok_CurlL:	/* { */
771 		toDoNext = &ignoreBalanced;
772 		ignoreBalanced (ident, what);
773 		comeAfter = &parseImplemMethods;
774 		break;
775 
776 	case Tok_PARL: /* ( */
777 		toDoNext = &parseCategory;
778 		break;
779 
780 	default:
781 		break;
782 	}
783 }
784 
parseProperty(vString * const ident,objcToken what)785 static void parseProperty (vString * const ident, objcToken what)
786 {
787 	switch (what)
788 	{
789 	case Tok_PARL:
790 		toDoNext = &tillToken;
791 		comeAfter = &parseProperty;
792 		waitedToken = Tok_PARR;
793 		break;
794 
795 		/* we got an identifier, keep track of it */
796 	case ObjcIDENTIFIER:
797 		vStringCopy (tempName, ident);
798 		break;
799 
800 		/* our last kept identifier must be our variable name =) */
801 	case Tok_semi:
802 		addTag (tempName, K_PROPERTY);
803 		vStringClear (tempName);
804 		toDoNext = &parseMethods;
805 		break;
806 
807 	default:
808 		break;
809 	}
810 }
811 
parseInterfaceSuperclass(vString * const ident,objcToken what)812 static void parseInterfaceSuperclass (vString * const ident, objcToken what)
813 {
814 	tagEntryInfo *e = getEntryInCorkQueue (parentCorkIndex);
815 	if (what == ObjcIDENTIFIER && e)
816 		e->extensionFields.inheritance = vStringStrdup (ident);
817 
818 	toDoNext = &parseMethods;
819 }
820 
parseInterfaceProtocolList(vString * const ident,objcToken what)821 static void parseInterfaceProtocolList (vString * const ident, objcToken what)
822 {
823 	static vString *protocol_list;
824 
825 	if (parentCorkIndex == CORK_NIL)
826 	{
827 		toDoNext = &parseMethods;
828 		return;
829 	}
830 
831 	if (protocol_list == NULL)
832 	{
833 		protocol_list = vStringNew ();
834 		DEFAULT_TRASH_BOX(protocol_list, vStringDelete);
835 	}
836 
837 	if (what == ObjcIDENTIFIER)
838 		vStringCat(protocol_list, ident);
839 	else if (what == Tok_COMA)
840 		vStringPut (protocol_list, ',');
841 	else if (what == Tok_ANGLER)
842 	{
843 		attachParserFieldToCorkEntry (parentCorkIndex,
844 									  ObjcFields [F_PROTOCOLS].ftype,
845 									  vStringValue (protocol_list));
846 		if (categoryCorkIndex != CORK_NIL)
847 			attachParserFieldToCorkEntry (categoryCorkIndex,
848 										  ObjcFields [F_PROTOCOLS].ftype,
849 										  vStringValue (protocol_list));
850 		vStringClear (protocol_list);
851 		toDoNext = &parseMethods;
852 	}
853 }
854 
parseMethods(vString * const ident CTAGS_ATTR_UNUSED,objcToken what)855 static void parseMethods (vString * const ident CTAGS_ATTR_UNUSED, objcToken what)
856 {
857 	switch (what)
858 	{
859 	case Tok_PLUS:	/* + */
860 		toDoNext = &parseMethodsName;
861 		methodKind = K_CLASSMETHOD;
862 		break;
863 
864 	case Tok_MINUS:	/* - */
865 		toDoNext = &parseMethodsName;
866 		methodKind = K_METHOD;
867 		break;
868 
869 	case ObjcPROPERTY:
870 		toDoNext = &parseProperty;
871 		break;
872 
873 	case ObjcEND:	/* @end */
874 		popEnclosingContext ();
875 		popCategoryContext ();
876 		toDoNext = &globalScope;
877 		break;
878 
879 	case Tok_CurlL:	/* { */
880 		toDoNext = &parseFields;
881 		break;
882 
883 	case Tok_dpoint: /* : */
884 		toDoNext = &parseInterfaceSuperclass;
885 		break;
886 
887 	case Tok_PARL: /* ( */
888 		toDoNext = &parseCategory;
889 		break;
890 
891 	case Tok_ANGLEL: /* < */
892 		toDoNext = &parseInterfaceProtocolList;
893 		break;
894 
895 	default:
896 		break;
897 	}
898 }
899 
900 
parseProtocol(vString * const ident,objcToken what)901 static void parseProtocol (vString * const ident, objcToken what)
902 {
903 	if (what == ObjcIDENTIFIER)
904 	{
905 		int index = addTag (ident, K_PROTOCOL);
906 		pushEnclosingContextFull (ident, K_PROTOCOL, index);
907 	}
908 	toDoNext = &parseMethods;
909 }
910 
parseImplementation(vString * const ident,objcToken what)911 static void parseImplementation (vString * const ident, objcToken what)
912 {
913 	if (what == ObjcIDENTIFIER)
914 	{
915 		int index = addTag (ident, K_IMPLEMENTATION);
916 		pushEnclosingContextFull (ident, K_IMPLEMENTATION, index);
917 	}
918 	toDoNext = &parseImplemMethods;
919 }
920 
parseInterface(vString * const ident,objcToken what)921 static void parseInterface (vString * const ident, objcToken what)
922 {
923 	if (what == ObjcIDENTIFIER)
924 	{
925 		int index = addTag (ident, K_INTERFACE);
926 		pushEnclosingContextFull (ident, K_INTERFACE, index);
927 	}
928 
929 	toDoNext = &parseMethods;
930 }
931 
parseStructMembers(vString * const ident,objcToken what)932 static void parseStructMembers (vString * const ident, objcToken what)
933 {
934 	static parseNext prev = NULL;
935 
936 	if (prev != NULL)
937 	{
938 		comeAfter = prev;
939 		prev = NULL;
940 	}
941 
942 	switch (what)
943 	{
944 	case ObjcIDENTIFIER:
945 		vStringCopy (tempName, ident);
946 		break;
947 
948 	case Tok_semi:	/* ';' */
949 		addTag (tempName, K_FIELD);
950 		vStringClear (tempName);
951 		break;
952 
953 		/* some types are complex, the only one
954 		 * we will loose is the function type.
955 		 */
956 	case Tok_CurlL:	/* '{' */
957 	case Tok_PARL:	/* '(' */
958 	case Tok_SQUAREL:	/* '[' */
959 		toDoNext = &ignoreBalanced;
960 		prev = comeAfter;
961 		comeAfter = &parseStructMembers;
962 		ignoreBalanced (ident, what);
963 		break;
964 
965 	case Tok_CurlR:
966 		toDoNext = comeAfter;
967 		break;
968 
969 	default:
970 		/* don't care */
971 		break;
972 	}
973 }
974 
975 /* Called just after the struct keyword */
976 static bool parseStruct_gotName = false;
parseStruct(vString * const ident,objcToken what)977 static void parseStruct (vString * const ident, objcToken what)
978 {
979 	switch (what)
980 	{
981 	case ObjcIDENTIFIER:
982 		if (!parseStruct_gotName)
983 		{
984 			addTag (ident, K_STRUCT);
985 			pushEnclosingContext (ident, K_STRUCT);
986 			parseStruct_gotName = true;
987 		}
988 		else
989 		{
990 			parseStruct_gotName = false;
991 			popEnclosingContext ();
992 			toDoNext = comeAfter;
993 			comeAfter (ident, what);
994 		}
995 		break;
996 
997 	case Tok_CurlL:
998 		toDoNext = &parseStructMembers;
999 		break;
1000 
1001 		/* maybe it was just a forward declaration
1002 		 * in which case, we pop the context */
1003 	case Tok_semi:
1004 		if (parseStruct_gotName)
1005 			popEnclosingContext ();
1006 
1007 		toDoNext = comeAfter;
1008 		comeAfter (ident, what);
1009 		break;
1010 
1011 	default:
1012 		/* we don't care */
1013 		break;
1014 	}
1015 }
1016 
1017 /* Parse enumeration members, ignoring potential initialization */
1018 static parseNext parseEnumFields_prev = NULL;
parseEnumFields(vString * const ident,objcToken what)1019 static void parseEnumFields (vString * const ident, objcToken what)
1020 {
1021 	if (parseEnumFields_prev != NULL)
1022 	{
1023 		comeAfter = parseEnumFields_prev;
1024 		parseEnumFields_prev = NULL;
1025 	}
1026 
1027 	switch (what)
1028 	{
1029 	case ObjcIDENTIFIER:
1030 		addTag (ident, K_ENUM);
1031 		parseEnumFields_prev = comeAfter;
1032 		waitedToken = Tok_COMA;
1033 		/* last item might not have a coma */
1034 		fallBackToken = Tok_CurlR;
1035 		fallback = comeAfter;
1036 		comeAfter = parseEnumFields;
1037 		toDoNext = &tillTokenOrFallBack;
1038 		break;
1039 
1040 	case Tok_CurlR:
1041 		toDoNext = comeAfter;
1042 		popEnclosingContext ();
1043 		break;
1044 
1045 	default:
1046 		/* don't care */
1047 		break;
1048 	}
1049 }
1050 
1051 /* parse enum ... { ... */
1052 static bool parseEnum_named = false;
parseEnum(vString * const ident,objcToken what)1053 static void parseEnum (vString * const ident, objcToken what)
1054 {
1055 	switch (what)
1056 	{
1057 	case ObjcIDENTIFIER:
1058 		if (!parseEnum_named)
1059 		{
1060 			addTag (ident, K_ENUM);
1061 			pushEnclosingContext (ident, K_ENUM);
1062 			parseEnum_named = true;
1063 		}
1064 		else
1065 		{
1066 			parseEnum_named = false;
1067 			popEnclosingContext ();
1068 			toDoNext = comeAfter;
1069 			comeAfter (ident, what);
1070 		}
1071 		break;
1072 
1073 	case Tok_CurlL:	/* '{' */
1074 		toDoNext = &parseEnumFields;
1075 		parseEnum_named = false;
1076 		break;
1077 
1078 	case Tok_semi:	/* ';' */
1079 		if (parseEnum_named)
1080 			popEnclosingContext ();
1081 		toDoNext = comeAfter;
1082 		comeAfter (ident, what);
1083 		break;
1084 
1085 	default:
1086 		/* don't care */
1087 		break;
1088 	}
1089 }
1090 
1091 /* Parse something like
1092  * typedef .... ident ;
1093  * ignoring the defined type but in the case of struct,
1094  * in which case struct are parsed.
1095  */
parseTypedef(vString * const ident,objcToken what)1096 static void parseTypedef (vString * const ident, objcToken what)
1097 {
1098 	switch (what)
1099 	{
1100 	case ObjcSTRUCT:
1101 		toDoNext = &parseStruct;
1102 		comeAfter = &parseTypedef;
1103 		break;
1104 
1105 	case ObjcENUM:
1106 		toDoNext = &parseEnum;
1107 		comeAfter = &parseTypedef;
1108 		break;
1109 
1110 	case ObjcIDENTIFIER:
1111 		vStringCopy (tempName, ident);
1112 		break;
1113 
1114 	case Tok_semi:	/* ';' */
1115 		addTag (tempName, K_TYPEDEF);
1116 		vStringClear (tempName);
1117 		toDoNext = &globalScope;
1118 		break;
1119 
1120 	default:
1121 		/* we don't care */
1122 		break;
1123 	}
1124 }
1125 
1126 static bool ignorePreprocStuff_escaped = false;
ignorePreprocStuff(vString * const ident CTAGS_ATTR_UNUSED,objcToken what)1127 static void ignorePreprocStuff (vString * const ident CTAGS_ATTR_UNUSED, objcToken what)
1128 {
1129 	switch (what)
1130 	{
1131 	case Tok_Backslash:
1132 		ignorePreprocStuff_escaped = true;
1133 		break;
1134 
1135 	case Tok_EOL:
1136 		if (ignorePreprocStuff_escaped)
1137 		{
1138 			ignorePreprocStuff_escaped = false;
1139 		}
1140 		else
1141 		{
1142 			toDoNext = &globalScope;
1143 		}
1144 		break;
1145 
1146 	default:
1147 		ignorePreprocStuff_escaped = false;
1148 		break;
1149 	}
1150 }
1151 
parseMacroName(vString * const ident,objcToken what)1152 static void parseMacroName (vString * const ident, objcToken what)
1153 {
1154 	if (what == ObjcIDENTIFIER)
1155 		addTag (ident, K_MACRO);
1156 
1157 	toDoNext = &ignorePreprocStuff;
1158 }
1159 
parsePreproc(vString * const ident,objcToken what)1160 static void parsePreproc (vString * const ident, objcToken what)
1161 {
1162 	switch (what)
1163 	{
1164 	case ObjcIDENTIFIER:
1165 		if (strcmp (vStringValue (ident), "define") == 0)
1166 			toDoNext = &parseMacroName;
1167 		else
1168 			toDoNext = &ignorePreprocStuff;
1169 		break;
1170 
1171 	default:
1172 		toDoNext = &ignorePreprocStuff;
1173 		break;
1174 	}
1175 }
1176 
skipCurlL(vString * const ident,objcToken what)1177 static void skipCurlL (vString * const ident, objcToken what)
1178 {
1179 	if (what == Tok_CurlL)
1180 		toDoNext = comeAfter;
1181 }
1182 
parseCPlusPlusCLinkage(vString * const ident,objcToken what)1183 static void parseCPlusPlusCLinkage (vString * const ident, objcToken what)
1184 {
1185 	toDoNext = comeAfter;
1186 
1187 	/* Linkage specification like "C" */
1188 	if (what == Tok_CSTRING)
1189 		toDoNext = skipCurlL;
1190 	else
1191 		/* Force handle this ident in globalScope */
1192 		globalScope (ident, what);
1193 }
1194 
1195 /* Handle the "strong" top levels, all 'big' declarations
1196  * happen here */
globalScope(vString * const ident,objcToken what)1197 static void globalScope (vString * const ident, objcToken what)
1198 {
1199 	switch (what)
1200 	{
1201 	case Tok_Sharp:
1202 		toDoNext = &parsePreproc;
1203 		break;
1204 
1205 	case ObjcSTRUCT:
1206 		toDoNext = &parseStruct;
1207 		comeAfter = &globalScope;
1208 		break;
1209 
1210 	case ObjcIDENTIFIER:
1211 		/* we keep track of the identifier if we
1212 		 * come across a function. */
1213 		vStringCopy (tempName, ident);
1214 		break;
1215 
1216 	case Tok_PARL:
1217 		/* if we find an opening parenthesis it means we
1218 		 * found a function (or a macro...) */
1219 		addTag (tempName, K_FUNCTION);
1220 		vStringClear (tempName);
1221 		comeAfter = &globalScope;
1222 		toDoNext = &ignoreBalanced;
1223 		ignoreBalanced (ident, what);
1224 		break;
1225 
1226 	case ObjcINTERFACE:
1227 		toDoNext = &parseInterface;
1228 		break;
1229 
1230 	case ObjcIMPLEMENTATION:
1231 		toDoNext = &parseImplementation;
1232 		break;
1233 
1234 	case ObjcPROTOCOL:
1235 		toDoNext = &parseProtocol;
1236 		break;
1237 
1238 	case ObjcTYPEDEF:
1239 		toDoNext = parseTypedef;
1240 		comeAfter = &globalScope;
1241 		break;
1242 
1243 	case Tok_CurlL:
1244 		comeAfter = &globalScope;
1245 		toDoNext = &ignoreBalanced;
1246 		ignoreBalanced (ident, what);
1247 		break;
1248 
1249 	case ObjcEXTERN:
1250 		comeAfter = &globalScope;
1251 		toDoNext = &parseCPlusPlusCLinkage;
1252 		break;
1253 
1254 	case ObjcEND:
1255 	case ObjcPUBLIC:
1256 	case ObjcPROTECTED:
1257 	case ObjcPRIVATE:
1258 
1259 	default:
1260 		/* we don't care */
1261 		break;
1262 	}
1263 }
1264 
1265 /*////////////////////////////////////////////////////////////////
1266 //// Deal with the system                                       */
1267 
findObjcTags(void)1268 static void findObjcTags (void)
1269 {
1270 	vString *name = vStringNew ();
1271 	lexingState st;
1272 	objcToken tok;
1273 
1274 	parentName = vStringNew ();
1275 	tempName = vStringNew ();
1276 	fullMethodName = vStringNew ();
1277 	prevIdent = vStringNew ();
1278 	signature = vStringNewInit ("(");
1279 
1280 	/* (Re-)initialize state variables, this might be a second file */
1281 	comeAfter = NULL;
1282 	fallback = NULL;
1283 	parentType = K_INTERFACE;
1284 	ignoreBalanced_count = 0;
1285 	methodKind = 0;
1286 	parseStruct_gotName = false;
1287 	parseEnumFields_prev = NULL;
1288 	parseEnum_named = false;
1289 	ignorePreprocStuff_escaped = false;
1290 
1291 	st.name = vStringNew ();
1292 	st.cp = readLineFromInputFile ();
1293 	toDoNext = &globalScope;
1294 	tok = lex (&st);
1295 	while (tok != Tok_EOF)
1296 	{
1297 		(*toDoNext) (st.name, tok);
1298 		tok = lex (&st);
1299 	}
1300 	vStringDelete(st.name);
1301 
1302 	vStringDelete (name);
1303 	vStringDelete (parentName);
1304 	vStringDelete (tempName);
1305 	vStringDelete (fullMethodName);
1306 	vStringDelete (prevIdent);
1307 	vStringDelete (signature);
1308 	signature = NULL;
1309 	parentName = NULL;
1310 	tempName = NULL;
1311 	prevIdent = NULL;
1312 	fullMethodName = NULL;
1313 	categoryCorkIndex = CORK_NIL;
1314 	parentCorkIndex = CORK_NIL;
1315 }
1316 
objcInitialize(const langType language)1317 static void objcInitialize (const langType language)
1318 {
1319 	Lang_ObjectiveC = language;
1320 }
1321 
ObjcParser(void)1322 extern parserDefinition *ObjcParser (void)
1323 {
1324 	static const char *const extensions[] = { "mm", "m", "h",
1325 						  NULL };
1326 	static const char *const aliases[] = { "objc", "objective-c",
1327 					       NULL };
1328 	static selectLanguage selectors[] = { selectByObjectiveCAndMatLabKeywords,
1329 					      selectByObjectiveCKeywords,
1330 					      NULL };
1331 	parserDefinition *def = parserNew ("ObjectiveC");
1332 	def->kindTable = ObjcKinds;
1333 	def->kindCount = ARRAY_SIZE (ObjcKinds);
1334 	def->extensions = extensions;
1335 	def->fieldTable = ObjcFields;
1336 	def->fieldCount = ARRAY_SIZE (ObjcFields);
1337 	def->aliases = aliases;
1338 	def->parser = findObjcTags;
1339 	def->initialize = objcInitialize;
1340 	def->selectLanguage = selectors;
1341 	def->keywordTable = objcKeywordTable;
1342 	def->keywordCount = ARRAY_SIZE (objcKeywordTable);
1343 	def->useCork = CORK_QUEUE;
1344 	return def;
1345 }
1346