xref: /Universal-ctags/parsers/asm.c (revision aaaac7eeac8399141aa8e6d9e6ec0379931848b2)
1 /*
2 *   Copyright (c) 2000-2003, Darren Hiebert
3 *
4 *   This source code is released for free distribution under the terms of the
5 *   GNU General Public License version 2 or (at your option) any later version.
6 *
7 *   This module contains functions for generating tags for assembly language
8 *   files.
9 */
10 
11 /*
12 *   INCLUDE FILES
13 */
14 #include "general.h"  /* must always come first */
15 
16 #include <string.h>
17 
18 #include "cpreprocessor.h"
19 #include "debug.h"
20 #include "entry.h"
21 #include "keyword.h"
22 #include "parse.h"
23 #include "read.h"
24 #include "routines.h"
25 #include "selectors.h"
26 #include "vstring.h"
27 
28 /*
29 *   DATA DECLARATIONS
30 */
31 typedef enum {
32 	K_PSUEDO_MACRO_END = -2,
33 	K_NONE = -1, K_DEFINE, K_LABEL, K_MACRO, K_TYPE,
34 	K_SECTION,
35 	K_PARAM,
36 } AsmKind;
37 
38 typedef enum {
39 	OP_UNDEFINED = -1,
40 	OP_ALIGN,
41 	OP_COLON_EQUAL,
42 	OP_END,
43 	OP_ENDM,
44 	OP_ENDMACRO,
45 	OP_ENDP,
46 	OP_ENDS,
47 	OP_EQU,
48 	OP_EQUAL,
49 	OP_LABEL,
50 	OP_MACRO,
51 	OP_PROC,
52 	OP_RECORD,
53 	OP_SECTIONS,
54 	OP_SECTION,
55 	OP_SET,
56 	OP_STRUCT,
57 	OP_LAST
58 } opKeyword;
59 
60 typedef enum {
61 	ASM_SECTION_PLACEMENT,
62 } asmSectionRole;
63 
64 typedef struct {
65 	opKeyword keyword;
66 	AsmKind kind;
67 } opKind;
68 
69 typedef enum {
70 	F_PROPERTIES,
71 } asmField;
72 
73 static fieldDefinition AsmFields[] = {
74 	{ .name = "properties",
75 	  .description = "properties (req, vararg for parameters)",
76 	  .enabled = true },
77 };
78 
79 /*
80 *   DATA DEFINITIONS
81 */
82 static langType Lang_asm;
83 
84 static roleDefinition asmSectionRoles [] = {
85 	{ true, "placement", "placement where the assembled code goes" },
86 };
87 
88 static kindDefinition AsmKinds [] = {
89 	{ true, 'd', "define", "defines" },
90 	{ true, 'l', "label",  "labels"  },
91 	{ true, 'm', "macro",  "macros"  },
92 	{ true, 't', "type",   "types (structs and records)"   },
93 	{ true, 's', "section",   "sections",
94 	  .referenceOnly = true, ATTACH_ROLES(asmSectionRoles)},
95 	{ false,'z', "parameter", "parameters for a macro" },
96 };
97 
98 static const keywordTable AsmKeywords [] = {
99 	{ "align",    OP_ALIGN       },
100 	{ "endmacro", OP_ENDMACRO    },
101 	{ "endm",     OP_ENDM        },
102 	{ "end",      OP_END         },
103 	{ "endp",     OP_ENDP        },
104 	{ "ends",     OP_ENDS        },
105 	{ "equ",      OP_EQU         },
106 	{ "label",    OP_LABEL       },
107 	{ "macro",    OP_MACRO       },
108 	{ ":=",       OP_COLON_EQUAL },
109 	{ "=",        OP_EQUAL       },
110 	{ "proc",     OP_PROC        },
111 	{ "record",   OP_RECORD      },
112 	{ "sections", OP_SECTIONS    },
113 
114 	/* These are used in GNU as. */
115 	{ "section",  OP_SECTION     },
116 	{ "equiv",    OP_EQU         },
117 	{ "eqv",      OP_EQU         },
118 
119 	{ "set",      OP_SET         },
120 	{ "struct",   OP_STRUCT      }
121 };
122 
123 static const opKind OpKinds [] = {
124 	/* must be ordered same as opKeyword enumeration */
125 	{ OP_ALIGN,       K_NONE   },
126 	{ OP_COLON_EQUAL, K_DEFINE },
127 	{ OP_END,         K_NONE   },
128 	{ OP_ENDM,        K_PSUEDO_MACRO_END },
129 	{ OP_ENDMACRO,    K_NONE   },
130 	{ OP_ENDP,        K_NONE   },
131 	{ OP_ENDS,        K_NONE   },
132 	{ OP_EQU,         K_DEFINE },
133 	{ OP_EQUAL,       K_DEFINE },
134 	{ OP_LABEL,       K_LABEL  },
135 	{ OP_MACRO,       K_MACRO  },
136 	{ OP_PROC,        K_LABEL  },
137 	{ OP_RECORD,      K_TYPE   },
138 	{ OP_SECTIONS,    K_NONE   },
139 	{ OP_SECTION,     K_SECTION },
140 	{ OP_SET,         K_DEFINE },
141 	{ OP_STRUCT,      K_TYPE   }
142 };
143 
144 /*
145 *   FUNCTION DEFINITIONS
146 */
analyzeOperator(const vString * const op)147 static opKeyword analyzeOperator (const vString *const op)
148 {
149 	vString *keyword = vStringNew ();
150 	opKeyword result;
151 
152 	vStringCopyToLower (keyword, op);
153 	result = (opKeyword) lookupKeyword (vStringValue (keyword), Lang_asm);
154 	vStringDelete (keyword);
155 	return result;
156 }
157 
isInitialSymbolCharacter(int c)158 static bool isInitialSymbolCharacter (int c)
159 {
160 	return (bool) (c != '\0' && (isalpha (c) || strchr ("_$", c) != NULL));
161 }
162 
isSymbolCharacter(int c)163 static bool isSymbolCharacter (int c)
164 {
165 	/* '?' character is allowed in AMD 29K family */
166 	return (bool) (c != '\0' && (isalnum (c) || strchr ("_$?", c) != NULL));
167 }
168 
operatorKind(const vString * const operator,bool * const found)169 static AsmKind operatorKind (
170 		const vString *const operator,
171 		bool *const found)
172 {
173 	AsmKind result = K_NONE;
174 	const opKeyword kw = analyzeOperator (operator);
175 	*found = (bool) (kw != OP_UNDEFINED);
176 	if (*found)
177 	{
178 		result = OpKinds [kw].kind;
179 		Assert (OpKinds [kw].keyword == kw);
180 	}
181 	return result;
182 }
183 
184 /*  We must check for "DB", "DB.L", "DCB.W" (68000)
185  */
isDefineOperator(const vString * const operator)186 static bool isDefineOperator (const vString *const operator)
187 {
188 	const unsigned char *const op =
189 		(unsigned char*) vStringValue (operator);
190 	const size_t length = vStringLength (operator);
191 	const bool result = (bool) (length > 0  &&
192 		toupper ((int) *op) == 'D'  &&
193 		(length == 2 ||
194 		 (length == 4  &&  (int) op [2] == '.') ||
195 		 (length == 5  &&  (int) op [3] == '.')));
196 	return result;
197 }
198 
makeAsmTag(const vString * const name,const vString * const operator,const bool labelCandidate,const bool nameFollows,const bool directive,int * scope)199 static int makeAsmTag (
200 		const vString *const name,
201 		const vString *const operator,
202 		const bool labelCandidate,
203 		const bool nameFollows,
204 		const bool directive,
205 		int *scope)
206 {
207 	int r = CORK_NIL;
208 
209 	if (vStringLength (name) > 0)
210 	{
211 		bool found;
212 		const AsmKind kind = operatorKind (operator, &found);
213 		if (found)
214 		{
215 			if (kind > K_NONE)
216 				r = makeSimpleTag (name, kind);
217 		}
218 		else if (isDefineOperator (operator))
219 		{
220 			if (! nameFollows)
221 				r = makeSimpleTag (name, K_DEFINE);
222 		}
223 		else if (labelCandidate)
224 		{
225 			operatorKind (name, &found);
226 			if (! found)
227 				r = makeSimpleTag (name, K_LABEL);
228 		}
229 		else if (directive)
230 		{
231 			bool found_dummy;
232 			const AsmKind kind_for_directive = operatorKind (name, &found_dummy);
233 			tagEntryInfo *macro_tag;
234 
235 			switch (kind_for_directive)
236 			{
237 			case K_NONE:
238 				break;
239 			case K_MACRO:
240 				r = makeSimpleTag (operator, kind_for_directive);
241 				macro_tag = getEntryInCorkQueue (r);
242 				if (macro_tag)
243 				{
244 					macro_tag->extensionFields.scopeIndex = *scope;
245 					registerEntry (r);
246 					*scope = r;
247 				}
248 				break;
249 			case K_PSUEDO_MACRO_END:
250 				macro_tag = getEntryInCorkQueue (*scope);
251 				if (macro_tag)
252 				{
253 					macro_tag->extensionFields.endLine = getInputLineNumber ();
254 					*scope = macro_tag->extensionFields.scopeIndex;
255 				}
256 				break;
257 			case K_SECTION:
258 				r = makeSimpleRefTag (operator,
259 									  kind_for_directive,
260 									  ASM_SECTION_PLACEMENT);
261 				break;
262 			default:
263 				r = makeSimpleTag (operator, kind_for_directive);
264 			}
265 		}
266 	}
267 	return r;
268 }
269 
readSymbol(const unsigned char * const start,vString * const sym)270 static const unsigned char *readSymbol (
271 		const unsigned char *const start,
272 		vString *const sym)
273 {
274 	const unsigned char *cp = start;
275 	vStringClear (sym);
276 	if (isInitialSymbolCharacter ((int) *cp))
277 	{
278 		while (isSymbolCharacter ((int) *cp))
279 		{
280 			vStringPut (sym, *cp);
281 			++cp;
282 		}
283 	}
284 	return cp;
285 }
286 
readOperator(const unsigned char * const start,vString * const operator)287 static const unsigned char *readOperator (
288 		const unsigned char *const start,
289 		vString *const operator)
290 {
291 	const unsigned char *cp = start;
292 	vStringClear (operator);
293 	while (*cp != '\0'  &&  ! isspace ((int) *cp) && *cp != ',')
294 	{
295 		vStringPut (operator, *cp);
296 		++cp;
297 	}
298 	return cp;
299 }
300 
asmReadLineFromInputFile(void)301 static const unsigned char *asmReadLineFromInputFile (void)
302 {
303 	static vString *line;
304 	int c;
305 
306 	line = vStringNewOrClear (line);
307 
308 	while ((c = cppGetc()) != EOF)
309 	{
310 		if (c == '\n')
311 			break;
312 		else if (c == STRING_SYMBOL || c == CHAR_SYMBOL)
313 		{
314 			/* We cannot store these values to vString
315 			 * Store a whitespace as a dummy value for them.
316 			 */
317 			vStringPut (line, ' ');
318 		}
319 		else
320 			vStringPut (line, c);
321 	}
322 
323 	if ((vStringLength (line) == 0)&& (c == EOF))
324 		return NULL;
325 	else
326 		return (unsigned char *)vStringValue (line);
327 }
328 
readMacroParameters(int index,tagEntryInfo * e,const unsigned char * cp)329 static void  readMacroParameters (int index, tagEntryInfo *e, const unsigned char *cp)
330 {
331 	vString *name = vStringNew ();
332 	vString *signature = vStringNew ();
333 	int nth = 0;
334 
335 	if (*cp == ',')
336 		++cp;
337 
338 	while (*cp)
339 	{
340 		const unsigned char *tmp;
341 		tagEntryInfo *e = NULL;
342 
343 		while (isspace ((int) *cp))
344 			++cp;
345 
346 		tmp = cp;
347 		cp = readSymbol (cp, name);
348 		if (cp == tmp)
349 			break;
350 
351 		{
352 			int r = makeSimpleTag (name, K_PARAM);
353 			e = getEntryInCorkQueue (r);
354 			if (e)
355 			{
356 				e->extensionFields.scopeIndex = index;
357 				e->extensionFields.nth = nth++;
358 			}
359 			if (vStringLength (signature) > 0 && vStringLast (signature) != ' ')
360 				vStringPut (signature, ' ');
361 			vStringCat (signature, name);
362 		}
363 
364 		if (*cp == ':')
365 		{
366 			cp++;
367 			if (strncmp((const char *)cp, "req" ,3) == 0)
368 			{
369 				cp += 3;
370 				if (e)
371 					attachParserField (e, true, AsmFields[F_PROPERTIES].ftype,
372 									   "req");
373 				vStringCatS (signature, ":req");
374 			}
375 			else if (strncmp((const char *)cp, "vararg", 6) == 0)
376 			{
377 				cp += 6;
378 				if (e)
379 					attachParserField (e, true, AsmFields[F_PROPERTIES].ftype,
380 									   "vararg");
381 				vStringCatS (signature, ":vararg");
382 			}
383 			cp = (const unsigned char *)strpbrk ((const char *)cp , " \t,=");
384 			if (cp == NULL)
385 				break;
386 		}
387 		if (*cp == '=')
388 		{
389 			const unsigned char *start = cp;
390 			cp = (const unsigned char *)strpbrk ((const char *)cp , " \t,");
391 
392 			if (cp)
393 				vStringNCatS (signature, (const char *)start, cp - start);
394 			else
395 			{
396 				vStringCatS (signature, (const char *)start);
397 				break;
398 			}
399 		}
400 
401 		while (isspace ((int) *cp))
402 			++cp;
403 
404 		if (*cp == ',')
405 			cp++;
406 	}
407 
408 	if (vStringLength (signature) > 0)
409 	{
410 		e->extensionFields.signature = vStringDeleteUnwrap (signature);
411 		signature = NULL;
412 	}
413 	vStringDelete (signature);	/* NULL is acceptable. */
414 	vStringDelete (name);
415 }
416 
findAsmTags(void)417 static void findAsmTags (void)
418 {
419 	vString *name = vStringNew ();
420 	vString *operator = vStringNew ();
421 	const unsigned char *line;
422 
423 	cppInit (false, false, false, false,
424 			 KIND_GHOST_INDEX, 0, 0, KIND_GHOST_INDEX, KIND_GHOST_INDEX, 0, 0,
425 			 FIELD_UNKNOWN);
426 
427 	 int scope = CORK_NIL;
428 
429 	while ((line = asmReadLineFromInputFile ()) != NULL)
430 	{
431 		const unsigned char *cp = line;
432 		bool labelCandidate = (bool) (! isspace ((int) *cp));
433 		bool nameFollows = false;
434 		bool directive = false;
435 		const bool isComment = (bool)
436 				(*cp != '\0' && strchr (";*@", *cp) != NULL);
437 
438 		/* skip comments */
439 		if (isComment)
440 			continue;
441 
442 		/* skip white space */
443 		while (isspace ((int) *cp))
444 			++cp;
445 
446 		/* read symbol */
447 		if (*cp == '.')
448 		{
449 			directive = true;
450 			labelCandidate = false;
451 			++cp;
452 		}
453 
454 		cp = readSymbol (cp, name);
455 		if (vStringLength (name) > 0)
456 		{
457 			if (*cp == ':')
458 			{
459 				labelCandidate = true;
460 				++cp;
461 			}
462 			else if (anyKindEntryInScope (CORK_NIL,
463 										  vStringValue (name),
464 										  K_MACRO, true))
465 				labelCandidate = false;
466 		}
467 
468 		if (! isspace ((int) *cp)  &&  *cp != '\0')
469 			continue;
470 
471 		/* skip white space */
472 		while (isspace ((int) *cp))
473 			++cp;
474 
475 		/* skip leading dot */
476 #if 0
477 		if (*cp == '.')
478 			++cp;
479 #endif
480 
481 		cp = readOperator (cp, operator);
482 
483 		/* attempt second read of symbol */
484 		if (vStringLength (name) == 0)
485 		{
486 			while (isspace ((int) *cp))
487 				++cp;
488 			cp = readSymbol (cp, name);
489 			nameFollows = true;
490 		}
491 		int r = makeAsmTag (name, operator, labelCandidate, nameFollows, directive, &scope);
492 		tagEntryInfo *e = getEntryInCorkQueue (r);
493 		if (e && e->kindIndex == K_MACRO && isRoleAssigned(e, ROLE_DEFINITION_INDEX))
494 			readMacroParameters (r, e, cp);
495 	}
496 
497 	cppTerminate ();
498 
499 	vStringDelete (name);
500 	vStringDelete (operator);
501 }
502 
initialize(const langType language)503 static void initialize (const langType language)
504 {
505 	Lang_asm = language;
506 }
507 
AsmParser(void)508 extern parserDefinition* AsmParser (void)
509 {
510 	static const char *const extensions [] = {
511 		"asm", "ASM", "s", "S", NULL
512 	};
513 	static const char *const patterns [] = {
514 		"*.A51",
515 		"*.29[kK]",
516 		"*.[68][68][kKsSxX]",
517 		"*.[xX][68][68]",
518 		NULL
519 	};
520 	static selectLanguage selectors[] = { selectByArrowOfR,
521 					      NULL };
522 
523 	parserDefinition* def = parserNew ("Asm");
524 	def->kindTable      = AsmKinds;
525 	def->kindCount  = ARRAY_SIZE (AsmKinds);
526 	def->extensions = extensions;
527 	def->patterns   = patterns;
528 	def->parser     = findAsmTags;
529 	def->initialize = initialize;
530 	def->keywordTable = AsmKeywords;
531 	def->keywordCount = ARRAY_SIZE (AsmKeywords);
532 	def->selectLanguage = selectors;
533 	def->useCork = CORK_QUEUE | CORK_SYMTAB;
534 	def->fieldTable = AsmFields;
535 	def->fieldCount = ARRAY_SIZE (AsmFields);
536 	return def;
537 }
538