xref: /Universal-ctags/parsers/sh.c (revision 3671ad7255885a0c8f6ff4979d80c70f201ea411)
1 /*
2 *   Copyright (c) 2000-2002, Darren Hiebert
3 *
4 *   This source code is released for free distribution under the terms of the
5 *   GNU General Public License version 2 or (at your option) any later version.
6 *
7 *   This module contains functions for generating tags for scripts for the
8 *   Bourne shell (and its derivatives, the Korn and Z shells).
9 */
10 
11 /*
12 *   INCLUDE FILES
13 */
14 #include "general.h"  /* must always come first */
15 
16 #include <string.h>
17 
18 #include "entry.h"
19 #include "kind.h"
20 #include "parse.h"
21 #include "read.h"
22 #include "promise.h"
23 #include "routines.h"
24 #include "vstring.h"
25 #include "xtag.h"
26 
27 /*
28 *   DATA DEFINITIONS
29 */
30 typedef enum {
31 	K_NOTHING = -1,		/* place holder. Never appears on tags file. */
32 	K_ALIAS,
33 	K_FUNCTION,
34 	K_SOURCE,
35 	K_HEREDOCLABEL,
36 } shKind;
37 
38 typedef enum {
39 	R_SCRIPT_LOADED,
40 } shScriptRole;
41 
42 static roleDefinition ShScriptRoles [] = {
43 	{ true, "loaded", "loaded" },
44 };
45 
46 typedef enum {
47 	R_HEREDOC_ENDMARKER,
48 } shHeredocRole;
49 
50 static roleDefinition ShHeredocRoles [] = {
51 	{ true, "endmarker", "end marker" },
52 };
53 
54 static kindDefinition ShKinds [] = {
55 	{ true, 'a', "alias", "aliases"},
56 	{ true, 'f', "function", "functions"},
57 	{ true, 's', "script", "script files",
58 	  .referenceOnly = true, ATTACH_ROLES (ShScriptRoles) },
59 	{ true, 'h', "heredoc", "label for here document",
60 	  .referenceOnly = false, ATTACH_ROLES (ShHeredocRoles) },
61 };
62 
63 /*
64 *   FUNCTION DEFINITIONS
65 */
66 
isFileChar(int c)67 static bool isFileChar  (int c)
68 {
69 	return (isalnum (c)
70 		|| c == '_' || c == '-'
71 		|| c == '/' || c == '.'
72 		|| c == '+' || c == '^'
73 		|| c == '%' || c == '@'
74 		|| c == '~');
75 }
76 
isIdentChar(int c)77 static bool isIdentChar (int c)
78 {
79 	return (isalnum (c) || c == '_' || c == '-');
80 }
81 
82 /* bash allows all kinds of crazy stuff as the identifier after 'function' */
isBashFunctionChar(int c)83 static bool isBashFunctionChar (int c)
84 {
85 	return (c > 1 /* NUL and SOH are disallowed */ && c != 0x7f &&
86 	        /* blanks are disallowed, but VT and FF (and CR to some extent, but
87 	         * let's not fall into the pit of craziness) */
88 	        c != ' ' && c != '\t' && c != '\n' && c != '\r' &&
89 	        c != '"' && c != '\'' && c != '$' && c != '`' && c != '\\' &&
90 	        c != '&' && c != ';' &&
91 	        c != '(' && c != ')' &&
92 	        c != '<' && c != '>');
93 }
94 
skipDoubleString(const unsigned char * cp)95 static const unsigned char *skipDoubleString (const unsigned char *cp)
96 {
97 	const unsigned char* prev = cp;
98 	cp++;
99 	while ((*cp != '"' || *prev == '\\') && *cp != '\0')
100 	{
101 		prev = cp;
102 		cp++;
103 	}
104 	return cp;
105 }
106 
skipSingleString(const unsigned char * cp)107 static const unsigned char *skipSingleString (const unsigned char *cp)
108 {
109 	cp++;
110 	while (*cp != '\'' && *cp != '\0')
111 		cp++;
112 	return cp;
113 }
114 
isEnvCommand(const vString * cmd)115 static bool isEnvCommand (const vString *cmd)
116 {
117 	const char *lc = vStringValue(cmd);
118 	const char * tmp = baseFilename (lc);
119 
120 	return (strcmp(tmp, "env") == 0);
121 }
122 
readDestfileName(const unsigned char * cp,vString * destfile)123 static int readDestfileName (const unsigned char *cp, vString *destfile)
124 {
125 	const unsigned char *origin = cp;
126 
127 	while (isspace ((int) *cp))
128 		++cp;
129 
130 	/* >... */
131 	if (*cp != '>')
132 		return 0;
133 
134 	/* >>... */
135 	if (*cp == '>')
136 		++cp;
137 
138 	while (isspace ((int) *cp))
139 		++cp;
140 
141 	if (!isFileChar ((int) *cp))
142 		return 0;
143 
144 	vStringClear(destfile);
145 	do {
146 		vStringPut (destfile, (int) *cp);
147 		++cp;
148 	} while (isFileChar ((int) *cp));
149 
150 	if (vStringLength(destfile) > 0)
151 		return cp - origin;
152 
153 	return 0;
154 }
155 
156 struct hereDocParsingState {
157 	vString *args[2];
158 	vString *destfile;
159 	langType sublang;
160 	unsigned long startLine;
161 
162 	int corkIndex;
163 };
164 
hdocStateInit(struct hereDocParsingState * hstate)165 static void hdocStateInit (struct hereDocParsingState *hstate)
166 {
167 	hstate->args[0] = vStringNew ();
168 	hstate->args[1] = vStringNew ();
169 	hstate->destfile = vStringNew ();
170 
171 	hstate->corkIndex = CORK_NIL;
172 	hstate->sublang = LANG_IGNORE;
173 }
174 
hdocStateClear(struct hereDocParsingState * hstate)175 static void hdocStateClear (struct hereDocParsingState *hstate)
176 {
177 	vStringClear (hstate->args[0]);
178 	vStringClear (hstate->args[1]);
179 	vStringClear (hstate->destfile);
180 }
181 
hdocStateFini(struct hereDocParsingState * hstate)182 static void hdocStateFini (struct hereDocParsingState *hstate)
183 {
184 	vStringDelete (hstate->args[0]);
185 	vStringDelete (hstate->args[1]);
186 	vStringDelete (hstate->destfile);
187 }
188 
hdocStateUpdateArgs(struct hereDocParsingState * hstate,vString * name)189 static void hdocStateUpdateArgs (struct hereDocParsingState *hstate,
190 										   vString *name)
191 {
192 	if (vStringIsEmpty(hstate->args[0]))
193 		vStringCopy(hstate->args[0], name);
194 	else if (vStringIsEmpty(hstate->args[1]))
195 		vStringCopy(hstate->args[1], name);
196 }
197 
hdocStateMakePromiseMaybe(struct hereDocParsingState * hstate)198 static void hdocStateMakePromiseMaybe (struct hereDocParsingState *hstate)
199 {
200 	if (hstate->sublang != LANG_IGNORE)
201 		makePromise (getLanguageName(hstate->sublang),
202 					 hstate->startLine, 0,
203 					 getInputLineNumber(), 0,
204 					 0);
205 	hstate->sublang = LANG_IGNORE;
206 }
207 
hdocStateRecordStartlineFromDestfileMaybe(struct hereDocParsingState * hstate)208 static void hdocStateRecordStartlineFromDestfileMaybe (struct hereDocParsingState *hstate)
209 {
210 	const char *f = vStringValue(hstate->destfile);
211 
212 	if (hstate->sublang != LANG_IGNORE)
213 		return;
214 
215 	hstate->sublang = getLanguageForFilename (f, 0);
216 	if (hstate->sublang != LANG_IGNORE)
217 		hstate->startLine = getInputLineNumber () + 1;
218 	vStringClear (hstate->destfile);
219 }
220 
hdocStateRecordStatelineMaybe(struct hereDocParsingState * hstate)221 static void hdocStateRecordStatelineMaybe (struct hereDocParsingState *hstate)
222 {
223 	if (!vStringIsEmpty(hstate->args[0]))
224 	{
225 		const char *cmd;
226 
227 		cmd = vStringValue(hstate->args[0]);
228 		if (isEnvCommand (hstate->args[0]))
229 		{
230 			cmd = NULL;
231 			if (!vStringIsEmpty(hstate->args[1]))
232 				cmd = vStringValue(hstate->args[1]);
233 		}
234 
235 		if (cmd)
236 		{
237 			hstate->sublang = getLanguageForCommand (cmd, 0);
238 			if (hstate->sublang != LANG_IGNORE)
239 				hstate->startLine = getInputLineNumber () + 1;
240 		}
241 	}
242 
243 	if (vStringLength(hstate->destfile) > 0)
244 		hdocStateRecordStartlineFromDestfileMaybe (hstate);
245 }
246 
hdocStateReadDestfileName(struct hereDocParsingState * hstate,const unsigned char * cp,const vString * const hereDocDelimiter)247 static int hdocStateReadDestfileName (struct hereDocParsingState *hstate,
248 									  const unsigned char* cp,
249 									  const vString *const hereDocDelimiter)
250 {
251 	int d = readDestfileName (cp, hstate->destfile);
252 
253 	if (d > 0 && hereDocDelimiter)
254 		hdocStateRecordStartlineFromDestfileMaybe (hstate);
255 
256 	return d;
257 }
258 
hdocStateUpdateTag(struct hereDocParsingState * hstate,unsigned long endLine)259 static void hdocStateUpdateTag (struct hereDocParsingState *hstate, unsigned long endLine)
260 {
261 	tagEntryInfo *tag = getEntryInCorkQueue (hstate->corkIndex);
262 	if (tag)
263 	{
264 		tag->extensionFields.endLine = endLine;
265 		hstate->corkIndex = CORK_NIL;
266 	}
267 }
268 
findShTags(void)269 static void findShTags (void)
270 {
271 	vString *name = vStringNew ();
272 	const unsigned char *line;
273 	vString *hereDocDelimiter = NULL;
274 	bool hereDocIndented = false;
275 	bool (* check_char)(int);
276 
277 	struct hereDocParsingState hstate;
278 	hdocStateInit (&hstate);
279 
280 	while ((line = readLineFromInputFile ()) != NULL)
281 	{
282 		const unsigned char* cp = line;
283 		shKind found_kind = K_NOTHING;
284 
285 		if (hereDocDelimiter)
286 		{
287 			if (hereDocIndented)
288 			{
289 				while (*cp == '\t')
290 					cp++;
291 			}
292 			if ((strncmp ((const char *) cp, vStringValue (hereDocDelimiter), vStringLength (hereDocDelimiter)) == 0)
293 				&& ((*(cp + vStringLength (hereDocDelimiter)) == '\0')
294 					|| isspace (*(cp + vStringLength (hereDocDelimiter)) )))
295 			{
296 				hdocStateUpdateTag (&hstate, getInputLineNumber ());
297 				hdocStateMakePromiseMaybe (&hstate);
298 
299 				if (!vStringIsEmpty(hereDocDelimiter))
300 					makeSimpleRefTag(hereDocDelimiter, K_HEREDOCLABEL, R_HEREDOC_ENDMARKER);
301 				vStringDelete (hereDocDelimiter);
302 				hereDocDelimiter = NULL;
303 			}
304 			continue;
305 		}
306 
307 		hdocStateClear (&hstate);
308 		while (*cp != '\0')
309 		{
310 			/* jump over whitespace */
311 			while (isspace ((int)*cp))
312 				cp++;
313 
314 			/* jump over strings */
315 			if (*cp == '"')
316 				cp = skipDoubleString (cp);
317 			else if (*cp == '\'')
318 				cp = skipSingleString (cp);
319 			/* jump over comments */
320 			else if (*cp == '#')
321 				break;
322 			/* jump over here-documents */
323 			else if (cp[0] == '<' && cp[1] == '<')
324 			{
325 				const unsigned char *start, *end;
326 				bool trimEscapeSequences = false;
327 				bool quoted = false;
328 				cp += 2;
329 				/* an optional "-" strips leading tabulations from the heredoc lines */
330 				if (*cp != '-')
331 					hereDocIndented = false;
332 				else
333 				{
334 					hereDocIndented = true;
335 					cp++;
336 				}
337 				while (isspace (*cp))
338 					cp++;
339 				start = end = cp;
340 				/* the delimiter can be surrounded by quotes */
341 				if (*cp == '"')
342 				{
343 					start++;
344 					end = cp = skipDoubleString (cp);
345 					/* we need not to worry about variable substitution, they
346 					 * don't happen in heredoc delimiter definition */
347 					trimEscapeSequences = true;
348 					quoted = true;
349 				}
350 				else if (*cp == '\'')
351 				{
352 					start++;
353 					end = cp = skipSingleString (cp);
354 					quoted = true;
355 				}
356 				else
357 				{
358 					while (isIdentChar ((int) *cp))
359 						cp++;
360 					end = cp;
361 				}
362 				if (end > start || quoted)
363 				{
364 					/* The input may be broken as a shell script but we need to avoid
365 					   memory leaking. */
366 					if (hereDocDelimiter)
367 						vStringClear(hereDocDelimiter);
368 					else
369 						hereDocDelimiter = vStringNew ();
370 					for (; end > start; start++)
371 					{
372 						if (trimEscapeSequences && *start == '\\')
373 							start++;
374 						vStringPut (hereDocDelimiter, *start);
375 					}
376 					if (vStringLength(hereDocDelimiter) > 0)
377 						hstate.corkIndex = makeSimpleTag(hereDocDelimiter, K_HEREDOCLABEL);
378 
379 					hdocStateRecordStatelineMaybe(&hstate);
380 				}
381 			}
382 
383 			check_char = isBashFunctionChar;
384 
385 			if (strncmp ((const char*) cp, "function", (size_t) 8) == 0  &&
386 				isspace ((int) cp [8]))
387 			{
388 				found_kind = K_FUNCTION;
389 				cp += 8;
390 			}
391 			else if (strncmp ((const char*) cp, "alias", (size_t) 5) == 0  &&
392 				isspace ((int) cp [5]))
393 			{
394 				check_char = isIdentChar;
395 				found_kind = K_ALIAS;
396 				cp += 5;
397 			}
398 			else if (cp [0] == '.'
399 				    && isspace((int) cp [1]))
400 			{
401 				found_kind = K_SOURCE;
402 				++cp;
403 				check_char = isFileChar;
404 			}
405 			else if (strncmp ((const char*) cp, "source", (size_t) 6) == 0
406 					 && isspace((int) cp [6]))
407 			{
408 				found_kind = K_SOURCE;
409 				cp += 6;
410 				check_char = isFileChar;
411 			}
412 
413 			if (found_kind != K_NOTHING)
414 				while (isspace ((int) *cp))
415 					++cp;
416 
417 			// Get the name of the function, alias or file to be read by source
418 			if (! check_char ((int) *cp))
419 			{
420 				found_kind = K_NOTHING;
421 
422 				int d = hdocStateReadDestfileName (&hstate, cp,
423 												   hereDocDelimiter);
424 				if (d > 0)
425 					cp += d;
426 				else if (*cp != '\0')
427 					++cp;
428 				continue;
429 			}
430 			while (check_char ((int) *cp))
431 			{
432 				vStringPut (name, (int) *cp);
433 				++cp;
434 			}
435 
436 			while (isspace ((int) *cp))
437 				++cp;
438 
439 			if ((found_kind != K_SOURCE)
440 			    && *cp == '(')
441 			{
442 				++cp;
443 				while (isspace ((int) *cp))
444 					++cp;
445 				if (*cp == ')')
446 				{
447 					found_kind = K_FUNCTION;
448 					++cp;
449 				}
450 			}
451 
452 			if (found_kind != K_NOTHING)
453 			{
454 				if (found_kind == K_SOURCE)
455 						makeSimpleRefTag (name, K_SOURCE, R_SCRIPT_LOADED);
456 				else
457 					makeSimpleTag (name, found_kind);
458 				found_kind = K_NOTHING;
459 			}
460 			else if (!hereDocDelimiter)
461 				hdocStateUpdateArgs (&hstate, name);
462 			vStringClear (name);
463 		}
464 	}
465 	hdocStateFini (&hstate);
466 	vStringDelete (name);
467 	if (hereDocDelimiter)
468 		vStringDelete (hereDocDelimiter);
469 }
470 
ShParser(void)471 extern parserDefinition* ShParser (void)
472 {
473 	static const char *const extensions [] = {
474 		"sh", "SH", "bsh", "bash", "ksh", "zsh", "ash", NULL
475 	};
476 	static const char *const aliases [] = {
477 		"sh", "bash", "ksh", "zsh", "ash",
478 		/* major mode name in emacs */
479 		"shell-script",
480 		NULL
481 	};
482 	parserDefinition* def = parserNew ("Sh");
483 	def->kindTable      = ShKinds;
484 	def->kindCount  = ARRAY_SIZE (ShKinds);
485 	def->extensions = extensions;
486 	def->aliases = aliases;
487 	def->parser     = findShTags;
488 	def->useCork    = CORK_QUEUE;
489 	return def;
490 }
491