xref: /Universal-ctags/parsers/m4.c (revision 3671ad7255885a0c8f6ff4979d80c70f201ea411)
1 /*
2  *   Copyright (c) 2011, Colomban Wendling <colomban@geany.org>
3  *
4  *   This source code is released for free distribution under the terms of the
5  *   GNU General Public License version 2 or (at your option) any later version.
6  *
7  *   This module contains functions for generating tags for M4.
8  */
9 
10 #include "general.h"	/* must always come first */
11 
12 #include <ctype.h>
13 #include <string.h>
14 #include <stdio.h>
15 
16 #include "entry.h"
17 #include "htable.h"
18 #include "keyword.h"
19 #include "m4.h"
20 #include "parse.h"
21 #include "read.h"
22 #include "vstring.h"
23 
24 
25 enum M4Kind {
26 	M4_MACRO_KIND,
27 	M4_MACROFILE_KIND,
28 };
29 
30 enum M4MacroRole {
31 	M4_MACRO_ROLE_UNDEF,
32 };
33 
34 enum M4MacrofileRole {
35 	M4_MACROFILE_ROLE_INCLUDED,
36 	M4_MACROFILE_ROLE_SILENTLY_INCLUDED,
37 };
38 
39 
40 static roleDefinition M4MacroRoles [] = {
41 	{ true, "undef", "undefined" },
42 };
43 
44 static roleDefinition M4MacrofileRoles [] = {
45 	{ true, "included", "included macro" },
46 	{ true, "sincluded", "silently included macro" },
47 };
48 
49 static kindDefinition M4Kinds[] = {
50 	{ true, 'd', "macro", "macros",
51 	  .referenceOnly = false, ATTACH_ROLES(M4MacroRoles) },
52 	{ true, 'I', "macrofile", "macro files",
53 	  .referenceOnly = true, ATTACH_ROLES(M4MacrofileRoles) },
54 };
55 
56 typedef enum {
57 	KEYWORD_define,
58 	KEYWORD_undefine,
59 	KEYWORD_include,
60 	KEYWORD_sinclude,
61 	KEYWORD_changequote,
62 } m4KeywordId;
63 
64 /* TODO: ideally "m4_" prefix keywords should be
65    installed and handled in Autoconf parser. */
66 static const keywordTable m4KeywordTable[] = {
67 #define ENTRY(K) \
68 	{ #K, KEYWORD_##K }, \
69 	{ "m4_" #K, KEYWORD_##K }
70 	ENTRY(define),
71 	ENTRY(undefine),
72 	ENTRY(include),
73 	ENTRY(sinclude),
74 	ENTRY(changequote),
75 };
76 
77 
78 /* Quote handling */
79 
80 /* TODO: Characters are assumed for quoting.
81    However, m4 allows strings. */
82 static char m4QuoteOpen = 0;
83 static char m4QuoteClose = 0;
84 
setM4Quotes(char openQuote,char closeQuote)85 extern void setM4Quotes(char openQuote, char closeQuote)
86 {
87 	m4QuoteOpen = openQuote;
88 	m4QuoteClose = closeQuote;
89 }
90 
91 /* gets the close quote corresponding to openQuote.
92  * return 0 if openQuote is not a valid open quote */
getCloseQuote(int openQuote)93 static int getCloseQuote(int openQuote)
94 {
95 	if (openQuote == m4QuoteOpen)
96 	{
97 		return m4QuoteClose;
98 	}
99 	return 0;
100 }
101 
skipQuotes(int c)102 static void skipQuotes(int c)
103 {
104 	unsigned int depth = 0;
105 	int openQuote = 0, closeQuote = 0;
106 
107 	closeQuote = getCloseQuote(c);
108 	if (! closeQuote)
109 		return;
110 	else
111 		openQuote = c;
112 
113 	for (; c != EOF; c = getcFromInputFile())
114 	{
115 		if (c == closeQuote)
116 			depth --;
117 		else if (c == openQuote)
118 			depth ++;
119 		if (depth == 0)
120 			break;
121 	}
122 }
123 
124 
125 /* parser */
126 
127 #define IS_WORD(c) (isalnum(c) || (c) == '_')
128 
129 /* reads a possibly quoted word.  word characters are those passing IS_WORD() */
readQuotedWord(vString * const name)130 static void readQuotedWord(vString *const name)
131 {
132 	unsigned int depth = 0;
133 	int openQuote = 0, closeQuote = 0;
134 	int c = getcFromInputFile();
135 
136 	closeQuote = getCloseQuote(c);
137 	if (closeQuote != 0)
138 	{
139 		openQuote = c;
140 		depth ++;
141 		c = getcFromInputFile();
142 	}
143 
144 	for (; c != EOF; c = getcFromInputFile())
145 	{
146 		/* don't allow embedded NULs, and prevents to match when quote == 0 (aka none) */
147 		if (c == 0)
148 			break;
149 		/* close before open to support open and close characters to be the same */
150 		else if (c == closeQuote)
151 			depth --;
152 		else if (c == openQuote)
153 			depth ++;
154 		else if (IS_WORD(c) || depth > 0)
155 			vStringPut(name, c);
156 		else
157 		{
158 			ungetcToInputFile(c);
159 			break;
160 		}
161 	}
162 }
163 
skipLineEnding(int c)164 static bool skipLineEnding(int c)
165 {
166 	if (c == '\n')
167 		return true;
168 	else if (c == '\r')
169 	{
170 		/* try to eat the `\n' of a `\r\n' sequence */
171 		c = getcFromInputFile();
172 		if (c != '\n')
173 			ungetcToInputFile(c);
174 		return true;
175 	}
176 
177 	return false;
178 }
179 
skipToCharacter(int ch,bool oneLine)180 static void skipToCharacter(int ch, bool oneLine)
181 {
182 	int c;
183 
184 	while ((c = getcFromInputFile()) != EOF)
185 	{
186 		if (c == ch)
187 			break;
188 		else if (oneLine && skipLineEnding(c))
189 			break;
190 	}
191 }
192 
skipLine(int c)193 static void skipLine(int c)
194 {
195 	for (; c != EOF; c = getcFromInputFile())
196 	{
197 		if (skipLineEnding(c))
198 			break;
199 	}
200 }
201 
maySwitchLanguage(const char * token)202 static m4Subparser * maySwitchLanguage (const char* token)
203 {
204 	subparser *tmp;
205 	m4Subparser *m4found = NULL;
206 
207 	foreachSubparser (tmp, false)
208 	{
209 		m4Subparser *m4tmp = (m4Subparser *)tmp;
210 
211 		enterSubparser(tmp);
212 		if (m4tmp->probeLanguage
213 			&& m4tmp->probeLanguage (m4tmp, token))
214 		{
215 			chooseExclusiveSubparser (tmp, NULL);
216 			m4found = m4tmp;
217 		}
218 		leaveSubparser();
219 
220 		if (m4found)
221 			break;
222 	}
223 
224 	return m4found;
225 }
226 
227 /* reads everything in a macro argument
228  * return true if there are more args, false otherwise */
readM4MacroArgument(vString * const arg)229 extern bool readM4MacroArgument(vString *const arg)
230 {
231 	int c;
232 
233 	/* discard leading blanks */
234 	while ((c = getcFromInputFile()) != EOF && isspace(c))
235 		;
236 
237 	for (; c != EOF; c = getcFromInputFile())
238 	{
239 		if (c == ',' || c == ')')
240 		{
241 			ungetcToInputFile(c);
242 			return c == ',';
243 		}
244 		else if (getCloseQuote(c) != 0)
245 		{
246 			ungetcToInputFile(c);
247 			readQuotedWord(arg);
248 		}
249 		else
250 			vStringPut(arg, c);
251 	}
252 
253 	return false;
254 }
255 
handleM4Changequote(void)256 static void handleM4Changequote(void)
257 {
258 	vString *const arg = vStringNew();
259 	char args[2] = {0,0};
260 	int i, n = (sizeof(args) / sizeof(args[0]));
261 	bool more = true;
262 
263 	for (i = 0; more && i < n; i++)
264 	{
265 		const char *v;
266 
267 		vStringClear(arg);
268 		more = readM4MacroArgument(arg);
269 		if (more)
270 			getcFromInputFile();
271 		v = vStringValue(arg);
272 		if (! v[0] || v[1])
273 			break;
274 		else
275 			args[i] = *v;
276 	}
277 
278 	if (! more)
279 	{
280 		if (args[0] && args[1])
281 			setM4Quotes (args[0], args[1]);
282 		else if (args[1])
283 			setM4Quotes (args[0], '\'');
284 		else if (args[0])
285 			setM4Quotes ('\0', '\0');
286 		else
287 			setM4Quotes ('`', '\'');
288 	}
289 
290 	vStringDelete(arg);
291 }
292 
doesQuoteStart(int c)293 static bool doesQuoteStart (int c)
294 {
295 	return (c == m4QuoteOpen);
296 }
297 
doesLineCommentStart(m4Subparser * m4,int c,char * token)298 static bool doesLineCommentStart (m4Subparser *m4, int c, char *token)
299 {
300 	if (m4 && m4->doesLineCommentStart)
301 	{
302 		bool r;
303 		enterSubparser ((subparser *)m4);
304 		r = m4->doesLineCommentStart (m4, c, token);
305 		leaveSubparser ();
306 		if (r)
307 			return true;
308 	}
309 
310 	return (strcmp(token, "dnl") == 0);
311 }
312 
doesStringLiteralStart(m4Subparser * m4,int c)313 static bool doesStringLiteralStart (m4Subparser *m4, int c)
314 {
315 	if (m4 && m4->doesStringLiteralStart)
316 	{
317 		bool r;
318 		enterSubparser ((subparser *)m4);
319 		r = m4->doesStringLiteralStart (m4, c);
320 		leaveSubparser ();
321 		return r;
322 	}
323 	return false;
324 }
325 
notifyNewMacro(m4Subparser * m4,const char * token)326 static int notifyNewMacro (m4Subparser *m4, const char *token)
327 {
328 	int index;
329 
330 	enterSubparser ((subparser *)m4);
331 	index = m4->newMacroNotify (m4, token);
332 	leaveSubparser ();
333 
334 	return index;
335 }
336 
337 /* tag creation */
338 
makeM4RefTag(int kind,const vString * const name,int role)339 static int makeM4RefTag(int kind, const vString *const name, int role)
340 {
341 	tagEntryInfo e;
342 
343 	if (vStringLength(name) <= 0)
344 		return CORK_NIL;
345 
346 	initRefTagEntry (&e, vStringValue(name), kind, role);
347 
348 	return makeTagEntry(&e);
349 }
350 
makeM4Tag(int kind,int role)351 static int makeM4Tag (int kind, int role)
352 {
353 	int index = CORK_NIL;
354 	vString *name = NULL;
355 
356 	if (kind == M4_MACRO_KIND)
357 	{
358 		if (role == ROLE_DEFINITION_INDEX)
359 		{
360 			name = vStringNew();
361 			readM4MacroArgument(name);
362 			index = makeM4RefTag (kind, name, role);
363 		}
364 		else if (role == M4_MACRO_ROLE_UNDEF)
365 		{
366 			name = vStringNew();
367 			while (true)
368 			{
369 				bool more = readM4MacroArgument(name);
370 				/* TODO: The cork indexes are thrown away here.
371 				   `end' field cannot be attached to multiple
372 				   indexes. */
373 				makeM4RefTag (kind, name, role);
374 				vStringClear (name);
375 				if (more)
376 					getcFromInputFile ();
377 				else
378 					break;
379 			}
380 
381 		}
382 	}
383 	else if (kind == M4_MACROFILE_KIND)
384 	{
385 		name = vStringNew();
386 		readM4MacroArgument(name);
387 		index = makeM4RefTag (kind, name, role);
388 	}
389 
390 	if (name)
391 		vStringDelete (name);
392 
393 	return index;
394 }
395 
396 struct newMacroResult
397 {
398 	int index;
399 	bool consumed;
400 };
401 
newMacroM4(const char * token)402 static struct newMacroResult newMacroM4 (const char* token)
403 {
404 	static langType lang = LANG_IGNORE;
405 	struct newMacroResult result = {
406 		.index = CORK_NIL,
407 		.consumed = false,
408 	};
409 
410 	int keyword;
411 	int role = ROLE_DEFINITION_INDEX;
412 	int kind = -1;
413 
414 	if (lang == LANG_IGNORE)
415 		lang = getNamedLanguage ("M4", 0);
416 	keyword = lookupKeyword (token, lang);
417 
418 	switch (keyword)
419 	{
420 	case KEYWORD_NONE:
421 		break;
422 	case KEYWORD_define:
423 		kind = M4_MACRO_KIND;
424 		role = ROLE_DEFINITION_INDEX;
425 		result.consumed = true;
426 		break;
427 	case KEYWORD_undefine:
428 		kind = M4_MACRO_KIND;
429 		role = M4_MACRO_ROLE_UNDEF;
430 		result.consumed = true;
431 		break;
432 	case KEYWORD_include:
433 		kind = M4_MACROFILE_KIND;
434 		role = M4_MACROFILE_ROLE_INCLUDED;
435 		result.consumed = true;
436 		break;
437 	case KEYWORD_sinclude:
438 		kind = M4_MACROFILE_KIND;
439 		role = M4_MACROFILE_ROLE_SILENTLY_INCLUDED;
440 		result.consumed = true;
441 		break;
442 	case KEYWORD_changequote:
443 		handleM4Changequote ();
444 		result.consumed = true;
445 		break;
446 	}
447 
448 	if (kind == -1)
449 		return result;
450 
451 	if ((! isXtagEnabled (XTAG_REFERENCE_TAGS))
452 	    && (role != ROLE_DEFINITION_INDEX))
453 		return result;
454 
455 	result.index = makeM4Tag (kind, role);
456 	return result;
457 }
458 
459 
460 /* parser instance  */
461 
findM4Tags(void)462 static void findM4Tags(void)
463 {
464 	m4Subparser *sub;
465 	vString *const token = vStringNew();
466 	int c;
467 	int index = CORK_NIL;
468 
469 	setM4Quotes ('`', '\'');
470 
471 	sub = (m4Subparser *)getSubparserRunningBaseparser();
472 	if (sub)
473 		chooseExclusiveSubparser ((subparser *)sub, NULL);
474 
475 	while ((c = getcFromInputFile()) != EOF)
476 	{
477 		if (doesLineCommentStart (sub, c, vStringValue (token)))
478 			skipLine(c);
479 		else if (doesQuoteStart (c))
480 			skipQuotes(c);
481 		else if (doesStringLiteralStart (sub, c))
482 			skipToCharacter(c, false);
483 		else if (c == '(' && vStringLength(token) > 0) /* catch a few macro calls */
484 		{
485 			struct newMacroResult r;
486 
487 			if (!sub)
488 				sub = maySwitchLanguage (vStringValue (token));
489 
490 			r = newMacroM4 (vStringValue (token));
491 			if (r.consumed)
492 				index = r.index;
493 			else if (sub)
494 				index = notifyNewMacro (sub, vStringValue (token));
495 		}
496 
497 		vStringClear(token);
498 		if (IS_WORD(c))
499 		{
500 			ungetcToInputFile(c);
501 			readQuotedWord(token);
502 		}
503 		else if (c == ')')
504 		{
505 			tagEntryInfo *e = getEntryInCorkQueue (index);
506 			if (e)
507 				e->extensionFields.endLine = getInputLineNumber ();
508 			index = CORK_NIL;
509 		}
510 	}
511 
512 	vStringDelete(token);
513 }
514 
M4Parser(void)515 extern parserDefinition* M4Parser (void)
516 {
517 	static const char *const extensions [] = { "m4",
518 						   "spt", /* used in `selinux-policy' */
519 						   NULL };
520 	parserDefinition* const def = parserNew("M4");
521 
522 	def->kindTable = M4Kinds;
523 	def->kindCount = ARRAY_SIZE(M4Kinds);
524 	def->extensions = extensions;
525 	def->parser = findM4Tags;
526 	def->useCork = CORK_QUEUE;
527 	def->keywordTable = m4KeywordTable;
528 	def->keywordCount = ARRAY_SIZE (m4KeywordTable);
529 
530 	return def;
531 }
532