1 /*
2 * Copyright (c) 2000-2002, Darren Hiebert
3 *
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
6 *
7 * This module contains functions for generating tags for scripts for the
8 * Bourne shell (and its derivatives, the Korn and Z shells).
9 */
10
11 /*
12 * INCLUDE FILES
13 */
14 #include "general.h" /* must always come first */
15
16 #include <string.h>
17
18 #include "entry.h"
19 #include "kind.h"
20 #include "parse.h"
21 #include "read.h"
22 #include "promise.h"
23 #include "routines.h"
24 #include "vstring.h"
25 #include "xtag.h"
26
27 /*
28 * DATA DEFINITIONS
29 */
30 typedef enum {
31 K_NOTHING = -1, /* place holder. Never appears on tags file. */
32 K_ALIAS,
33 K_FUNCTION,
34 K_SOURCE,
35 K_HEREDOCLABEL,
36 } shKind;
37
38 typedef enum {
39 R_SCRIPT_LOADED,
40 } shScriptRole;
41
42 static roleDefinition ShScriptRoles [] = {
43 { true, "loaded", "loaded" },
44 };
45
46 typedef enum {
47 R_HEREDOC_ENDMARKER,
48 } shHeredocRole;
49
50 static roleDefinition ShHeredocRoles [] = {
51 { true, "endmarker", "end marker" },
52 };
53
54 static kindDefinition ShKinds [] = {
55 { true, 'a', "alias", "aliases"},
56 { true, 'f', "function", "functions"},
57 { true, 's', "script", "script files",
58 .referenceOnly = true, ATTACH_ROLES (ShScriptRoles) },
59 { true, 'h', "heredoc", "label for here document",
60 .referenceOnly = false, ATTACH_ROLES (ShHeredocRoles) },
61 };
62
63 /*
64 * FUNCTION DEFINITIONS
65 */
66
isFileChar(int c)67 static bool isFileChar (int c)
68 {
69 return (isalnum (c)
70 || c == '_' || c == '-'
71 || c == '/' || c == '.'
72 || c == '+' || c == '^'
73 || c == '%' || c == '@'
74 || c == '~');
75 }
76
isIdentChar(int c)77 static bool isIdentChar (int c)
78 {
79 return (isalnum (c) || c == '_' || c == '-');
80 }
81
82 /* bash allows all kinds of crazy stuff as the identifier after 'function' */
isBashFunctionChar(int c)83 static bool isBashFunctionChar (int c)
84 {
85 return (c > 1 /* NUL and SOH are disallowed */ && c != 0x7f &&
86 /* blanks are disallowed, but VT and FF (and CR to some extent, but
87 * let's not fall into the pit of craziness) */
88 c != ' ' && c != '\t' && c != '\n' && c != '\r' &&
89 c != '"' && c != '\'' && c != '$' && c != '`' && c != '\\' &&
90 c != '&' && c != ';' &&
91 c != '(' && c != ')' &&
92 c != '<' && c != '>');
93 }
94
skipDoubleString(const unsigned char * cp)95 static const unsigned char *skipDoubleString (const unsigned char *cp)
96 {
97 const unsigned char* prev = cp;
98 cp++;
99 while ((*cp != '"' || *prev == '\\') && *cp != '\0')
100 {
101 prev = cp;
102 cp++;
103 }
104 return cp;
105 }
106
skipSingleString(const unsigned char * cp)107 static const unsigned char *skipSingleString (const unsigned char *cp)
108 {
109 cp++;
110 while (*cp != '\'' && *cp != '\0')
111 cp++;
112 return cp;
113 }
114
isEnvCommand(const vString * cmd)115 static bool isEnvCommand (const vString *cmd)
116 {
117 const char *lc = vStringValue(cmd);
118 const char * tmp = baseFilename (lc);
119
120 return (strcmp(tmp, "env") == 0);
121 }
122
readDestfileName(const unsigned char * cp,vString * destfile)123 static int readDestfileName (const unsigned char *cp, vString *destfile)
124 {
125 const unsigned char *origin = cp;
126
127 while (isspace ((int) *cp))
128 ++cp;
129
130 /* >... */
131 if (*cp != '>')
132 return 0;
133
134 /* >>... */
135 if (*cp == '>')
136 ++cp;
137
138 while (isspace ((int) *cp))
139 ++cp;
140
141 if (!isFileChar ((int) *cp))
142 return 0;
143
144 vStringClear(destfile);
145 do {
146 vStringPut (destfile, (int) *cp);
147 ++cp;
148 } while (isFileChar ((int) *cp));
149
150 if (vStringLength(destfile) > 0)
151 return cp - origin;
152
153 return 0;
154 }
155
156 struct hereDocParsingState {
157 vString *args[2];
158 vString *destfile;
159 langType sublang;
160 unsigned long startLine;
161
162 int corkIndex;
163 };
164
hdocStateInit(struct hereDocParsingState * hstate)165 static void hdocStateInit (struct hereDocParsingState *hstate)
166 {
167 hstate->args[0] = vStringNew ();
168 hstate->args[1] = vStringNew ();
169 hstate->destfile = vStringNew ();
170
171 hstate->corkIndex = CORK_NIL;
172 hstate->sublang = LANG_IGNORE;
173 }
174
hdocStateClear(struct hereDocParsingState * hstate)175 static void hdocStateClear (struct hereDocParsingState *hstate)
176 {
177 vStringClear (hstate->args[0]);
178 vStringClear (hstate->args[1]);
179 vStringClear (hstate->destfile);
180 }
181
hdocStateFini(struct hereDocParsingState * hstate)182 static void hdocStateFini (struct hereDocParsingState *hstate)
183 {
184 vStringDelete (hstate->args[0]);
185 vStringDelete (hstate->args[1]);
186 vStringDelete (hstate->destfile);
187 }
188
hdocStateUpdateArgs(struct hereDocParsingState * hstate,vString * name)189 static void hdocStateUpdateArgs (struct hereDocParsingState *hstate,
190 vString *name)
191 {
192 if (vStringIsEmpty(hstate->args[0]))
193 vStringCopy(hstate->args[0], name);
194 else if (vStringIsEmpty(hstate->args[1]))
195 vStringCopy(hstate->args[1], name);
196 }
197
hdocStateMakePromiseMaybe(struct hereDocParsingState * hstate)198 static void hdocStateMakePromiseMaybe (struct hereDocParsingState *hstate)
199 {
200 if (hstate->sublang != LANG_IGNORE)
201 makePromise (getLanguageName(hstate->sublang),
202 hstate->startLine, 0,
203 getInputLineNumber(), 0,
204 0);
205 hstate->sublang = LANG_IGNORE;
206 }
207
hdocStateRecordStartlineFromDestfileMaybe(struct hereDocParsingState * hstate)208 static void hdocStateRecordStartlineFromDestfileMaybe (struct hereDocParsingState *hstate)
209 {
210 const char *f = vStringValue(hstate->destfile);
211
212 if (hstate->sublang != LANG_IGNORE)
213 return;
214
215 hstate->sublang = getLanguageForFilename (f, 0);
216 if (hstate->sublang != LANG_IGNORE)
217 hstate->startLine = getInputLineNumber () + 1;
218 vStringClear (hstate->destfile);
219 }
220
hdocStateRecordStatelineMaybe(struct hereDocParsingState * hstate)221 static void hdocStateRecordStatelineMaybe (struct hereDocParsingState *hstate)
222 {
223 if (!vStringIsEmpty(hstate->args[0]))
224 {
225 const char *cmd;
226
227 cmd = vStringValue(hstate->args[0]);
228 if (isEnvCommand (hstate->args[0]))
229 {
230 cmd = NULL;
231 if (!vStringIsEmpty(hstate->args[1]))
232 cmd = vStringValue(hstate->args[1]);
233 }
234
235 if (cmd)
236 {
237 hstate->sublang = getLanguageForCommand (cmd, 0);
238 if (hstate->sublang != LANG_IGNORE)
239 hstate->startLine = getInputLineNumber () + 1;
240 }
241 }
242
243 if (vStringLength(hstate->destfile) > 0)
244 hdocStateRecordStartlineFromDestfileMaybe (hstate);
245 }
246
hdocStateReadDestfileName(struct hereDocParsingState * hstate,const unsigned char * cp,const vString * const hereDocDelimiter)247 static int hdocStateReadDestfileName (struct hereDocParsingState *hstate,
248 const unsigned char* cp,
249 const vString *const hereDocDelimiter)
250 {
251 int d = readDestfileName (cp, hstate->destfile);
252
253 if (d > 0 && hereDocDelimiter)
254 hdocStateRecordStartlineFromDestfileMaybe (hstate);
255
256 return d;
257 }
258
hdocStateUpdateTag(struct hereDocParsingState * hstate,unsigned long endLine)259 static void hdocStateUpdateTag (struct hereDocParsingState *hstate, unsigned long endLine)
260 {
261 tagEntryInfo *tag = getEntryInCorkQueue (hstate->corkIndex);
262 if (tag)
263 {
264 tag->extensionFields.endLine = endLine;
265 hstate->corkIndex = CORK_NIL;
266 }
267 }
268
findShTags(void)269 static void findShTags (void)
270 {
271 vString *name = vStringNew ();
272 const unsigned char *line;
273 vString *hereDocDelimiter = NULL;
274 bool hereDocIndented = false;
275 bool (* check_char)(int);
276
277 struct hereDocParsingState hstate;
278 hdocStateInit (&hstate);
279
280 while ((line = readLineFromInputFile ()) != NULL)
281 {
282 const unsigned char* cp = line;
283 shKind found_kind = K_NOTHING;
284
285 if (hereDocDelimiter)
286 {
287 if (hereDocIndented)
288 {
289 while (*cp == '\t')
290 cp++;
291 }
292 if ((strncmp ((const char *) cp, vStringValue (hereDocDelimiter), vStringLength (hereDocDelimiter)) == 0)
293 && ((*(cp + vStringLength (hereDocDelimiter)) == '\0')
294 || isspace (*(cp + vStringLength (hereDocDelimiter)) )))
295 {
296 hdocStateUpdateTag (&hstate, getInputLineNumber ());
297 hdocStateMakePromiseMaybe (&hstate);
298
299 if (!vStringIsEmpty(hereDocDelimiter))
300 makeSimpleRefTag(hereDocDelimiter, K_HEREDOCLABEL, R_HEREDOC_ENDMARKER);
301 vStringDelete (hereDocDelimiter);
302 hereDocDelimiter = NULL;
303 }
304 continue;
305 }
306
307 hdocStateClear (&hstate);
308 while (*cp != '\0')
309 {
310 /* jump over whitespace */
311 while (isspace ((int)*cp))
312 cp++;
313
314 /* jump over strings */
315 if (*cp == '"')
316 cp = skipDoubleString (cp);
317 else if (*cp == '\'')
318 cp = skipSingleString (cp);
319 /* jump over comments */
320 else if (*cp == '#')
321 break;
322 /* jump over here-documents */
323 else if (cp[0] == '<' && cp[1] == '<')
324 {
325 const unsigned char *start, *end;
326 bool trimEscapeSequences = false;
327 bool quoted = false;
328 cp += 2;
329 /* an optional "-" strips leading tabulations from the heredoc lines */
330 if (*cp != '-')
331 hereDocIndented = false;
332 else
333 {
334 hereDocIndented = true;
335 cp++;
336 }
337 while (isspace (*cp))
338 cp++;
339 start = end = cp;
340 /* the delimiter can be surrounded by quotes */
341 if (*cp == '"')
342 {
343 start++;
344 end = cp = skipDoubleString (cp);
345 /* we need not to worry about variable substitution, they
346 * don't happen in heredoc delimiter definition */
347 trimEscapeSequences = true;
348 quoted = true;
349 }
350 else if (*cp == '\'')
351 {
352 start++;
353 end = cp = skipSingleString (cp);
354 quoted = true;
355 }
356 else
357 {
358 while (isIdentChar ((int) *cp))
359 cp++;
360 end = cp;
361 }
362 if (end > start || quoted)
363 {
364 /* The input may be broken as a shell script but we need to avoid
365 memory leaking. */
366 if (hereDocDelimiter)
367 vStringClear(hereDocDelimiter);
368 else
369 hereDocDelimiter = vStringNew ();
370 for (; end > start; start++)
371 {
372 if (trimEscapeSequences && *start == '\\')
373 start++;
374 vStringPut (hereDocDelimiter, *start);
375 }
376 if (vStringLength(hereDocDelimiter) > 0)
377 hstate.corkIndex = makeSimpleTag(hereDocDelimiter, K_HEREDOCLABEL);
378
379 hdocStateRecordStatelineMaybe(&hstate);
380 }
381 }
382
383 check_char = isBashFunctionChar;
384
385 if (strncmp ((const char*) cp, "function", (size_t) 8) == 0 &&
386 isspace ((int) cp [8]))
387 {
388 found_kind = K_FUNCTION;
389 cp += 8;
390 }
391 else if (strncmp ((const char*) cp, "alias", (size_t) 5) == 0 &&
392 isspace ((int) cp [5]))
393 {
394 check_char = isIdentChar;
395 found_kind = K_ALIAS;
396 cp += 5;
397 }
398 else if (cp [0] == '.'
399 && isspace((int) cp [1]))
400 {
401 found_kind = K_SOURCE;
402 ++cp;
403 check_char = isFileChar;
404 }
405 else if (strncmp ((const char*) cp, "source", (size_t) 6) == 0
406 && isspace((int) cp [6]))
407 {
408 found_kind = K_SOURCE;
409 cp += 6;
410 check_char = isFileChar;
411 }
412
413 if (found_kind != K_NOTHING)
414 while (isspace ((int) *cp))
415 ++cp;
416
417 // Get the name of the function, alias or file to be read by source
418 if (! check_char ((int) *cp))
419 {
420 found_kind = K_NOTHING;
421
422 int d = hdocStateReadDestfileName (&hstate, cp,
423 hereDocDelimiter);
424 if (d > 0)
425 cp += d;
426 else if (*cp != '\0')
427 ++cp;
428 continue;
429 }
430 while (check_char ((int) *cp))
431 {
432 vStringPut (name, (int) *cp);
433 ++cp;
434 }
435
436 while (isspace ((int) *cp))
437 ++cp;
438
439 if ((found_kind != K_SOURCE)
440 && *cp == '(')
441 {
442 ++cp;
443 while (isspace ((int) *cp))
444 ++cp;
445 if (*cp == ')')
446 {
447 found_kind = K_FUNCTION;
448 ++cp;
449 }
450 }
451
452 if (found_kind != K_NOTHING)
453 {
454 if (found_kind == K_SOURCE)
455 makeSimpleRefTag (name, K_SOURCE, R_SCRIPT_LOADED);
456 else
457 makeSimpleTag (name, found_kind);
458 found_kind = K_NOTHING;
459 }
460 else if (!hereDocDelimiter)
461 hdocStateUpdateArgs (&hstate, name);
462 vStringClear (name);
463 }
464 }
465 hdocStateFini (&hstate);
466 vStringDelete (name);
467 if (hereDocDelimiter)
468 vStringDelete (hereDocDelimiter);
469 }
470
ShParser(void)471 extern parserDefinition* ShParser (void)
472 {
473 static const char *const extensions [] = {
474 "sh", "SH", "bsh", "bash", "ksh", "zsh", "ash", NULL
475 };
476 static const char *const aliases [] = {
477 "sh", "bash", "ksh", "zsh", "ash",
478 /* major mode name in emacs */
479 "shell-script",
480 NULL
481 };
482 parserDefinition* def = parserNew ("Sh");
483 def->kindTable = ShKinds;
484 def->kindCount = ARRAY_SIZE (ShKinds);
485 def->extensions = extensions;
486 def->aliases = aliases;
487 def->parser = findShTags;
488 def->useCork = CORK_QUEUE;
489 return def;
490 }
491