1 /*
2 * Copyright (c) 2015, Enrico Tröger <enrico.troeger@uvena.de>
3 *
4 * Loosely based on the PHP tags parser since the syntax is somewhat similar
5 * regarding variable and function definitions.
6 *
7 * This source code is released for free distribution under the terms of the
8 * GNU General Public License version 2 or (at your option) any later version.
9 *
10 * This module contains code for generating tags for Windows PowerShell scripts
11 * (https://en.wikipedia.org/wiki/PowerShell).
12 */
13
14 /*
15 * INCLUDE FILES
16 */
17 #include "general.h" /* must always come first */
18 #include "debug.h"
19 #include "parse.h"
20 #include "read.h"
21 #include "vstring.h"
22 #include "keyword.h"
23 #include "entry.h"
24 #include "routines.h"
25 #include <string.h>
26
27 #define SCOPE_SEPARATOR "::"
28
29
30 #define ACCESS_UNDEFINED NULL
31 static const char *const accessTypes[] = {
32 ACCESS_UNDEFINED,
33 "global",
34 "local",
35 "script",
36 "private"
37 };
38
39 typedef enum {
40 K_FUNCTION,
41 K_VARIABLE,
42 COUNT_KIND
43 } powerShellKind;
44
45 static kindDefinition PowerShellKinds[COUNT_KIND] = {
46 { true, 'f', "function", "functions" },
47 { true, 'v', "variable", "variables" }
48 };
49
50
51 typedef enum eTokenType {
52 TOKEN_UNDEFINED,
53 TOKEN_EOF,
54 TOKEN_CLOSE_PAREN,
55 TOKEN_SEMICOLON,
56 TOKEN_COLON,
57 TOKEN_COMMA,
58 TOKEN_KEYWORD,
59 TOKEN_OPEN_PAREN,
60 TOKEN_OPERATOR,
61 TOKEN_IDENTIFIER,
62 TOKEN_STRING,
63 TOKEN_PERIOD,
64 TOKEN_OPEN_CURLY,
65 TOKEN_CLOSE_CURLY,
66 TOKEN_EQUAL_SIGN,
67 TOKEN_OPEN_SQUARE,
68 TOKEN_CLOSE_SQUARE,
69 TOKEN_VARIABLE
70 } tokenType;
71
72 typedef struct {
73 tokenType type;
74 vString * string;
75 vString * scope;
76 unsigned long lineNumber;
77 MIOPos filePosition;
78 int parentKind; /* KIND_GHOST_INDEX if none */
79 } tokenInfo;
80
81
findValidAccessType(const char * const access)82 static const char *findValidAccessType (const char *const access)
83 {
84 unsigned int i;
85 if (access == ACCESS_UNDEFINED)
86 return ACCESS_UNDEFINED; /* early out to save the for-loop if possible */
87 for (i = 0; i < ARRAY_SIZE(accessTypes); i++)
88 {
89 if (accessTypes[i] == ACCESS_UNDEFINED)
90 continue;
91 if (strcasecmp (access, accessTypes[i]) == 0)
92 return accessTypes[i];
93 i++;
94 }
95 return ACCESS_UNDEFINED;
96 }
97
initPowerShellEntry(tagEntryInfo * const e,const tokenInfo * const token,const powerShellKind kind,const char * const access)98 static void initPowerShellEntry (tagEntryInfo *const e, const tokenInfo *const token,
99 const powerShellKind kind, const char *const access)
100 {
101 initTagEntry (e, vStringValue (token->string), kind);
102
103 e->lineNumber = token->lineNumber;
104 e->filePosition = token->filePosition;
105
106 if (access != NULL)
107 e->extensionFields.access = access;
108 if (vStringLength (token->scope) > 0)
109 {
110 int parentKind = token->parentKind;
111 Assert (parentKind >= 0);
112
113 e->extensionFields.scopeKindIndex = parentKind;
114 e->extensionFields.scopeName = vStringValue (token->scope);
115 }
116 }
117
makeSimplePowerShellTag(const tokenInfo * const token,const powerShellKind kind,const char * const access)118 static void makeSimplePowerShellTag (const tokenInfo *const token, const powerShellKind kind,
119 const char *const access)
120 {
121 if (PowerShellKinds[kind].enabled)
122 {
123 tagEntryInfo e;
124
125 initPowerShellEntry (&e, token, kind, access);
126 makeTagEntry (&e);
127 }
128 }
129
makeFunctionTag(const tokenInfo * const token,const vString * const arglist,const char * const access)130 static void makeFunctionTag (const tokenInfo *const token, const vString *const arglist,
131 const char *const access)
132 {
133 if (PowerShellKinds[K_FUNCTION].enabled)
134 {
135 tagEntryInfo e;
136
137 initPowerShellEntry (&e, token, K_FUNCTION, access);
138
139 if (arglist)
140 e.extensionFields.signature = vStringValue (arglist);
141
142 makeTagEntry (&e);
143 }
144 }
145
newToken(void)146 static tokenInfo *newToken (void)
147 {
148 tokenInfo *const token = xMalloc (1, tokenInfo);
149
150 token->type = TOKEN_UNDEFINED;
151 token->string = vStringNew ();
152 token->scope = vStringNew ();
153 token->lineNumber = getInputLineNumber ();
154 token->filePosition = getInputFilePosition ();
155 token->parentKind = KIND_GHOST_INDEX;
156
157 return token;
158 }
159
deleteToken(tokenInfo * const token)160 static void deleteToken (tokenInfo *const token)
161 {
162 vStringDelete (token->string);
163 vStringDelete (token->scope);
164 eFree (token);
165 }
166
copyToken(tokenInfo * const dest,const tokenInfo * const src,bool scope)167 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
168 bool scope)
169 {
170 dest->lineNumber = src->lineNumber;
171 dest->filePosition = src->filePosition;
172 dest->type = src->type;
173 vStringCopy (dest->string, src->string);
174 dest->parentKind = src->parentKind;
175 if (scope)
176 vStringCopy (dest->scope, src->scope);
177 }
178
addToScope(tokenInfo * const token,const vString * const extra)179 static void addToScope (tokenInfo *const token, const vString *const extra)
180 {
181 if (vStringLength (token->scope) > 0)
182 vStringCatS (token->scope, SCOPE_SEPARATOR);
183 vStringCatS (token->scope, vStringValue (extra));
184 }
185
isIdentChar(const int c)186 static bool isIdentChar (const int c)
187 {
188 return (isalnum (c) || c == ':' || c == '_' || c == '-' || c >= 0x80);
189 }
190
parseString(vString * const string,const int delimiter)191 static void parseString (vString *const string, const int delimiter)
192 {
193 while (true)
194 {
195 int c = getcFromInputFile ();
196
197 if (c == '\\' && (c = getcFromInputFile ()) != EOF)
198 vStringPut (string, (char) c);
199 else if (c == EOF || c == delimiter)
200 break;
201 else
202 vStringPut (string, (char) c);
203 }
204 }
205
parseIdentifier(vString * const string,const int firstChar)206 static void parseIdentifier (vString *const string, const int firstChar)
207 {
208 int c = firstChar;
209 do
210 {
211 vStringPut (string, (char) c);
212 c = getcFromInputFile ();
213 } while (isIdentChar (c));
214 ungetcToInputFile (c);
215 }
216
isTokenFunction(vString * const name)217 static bool isTokenFunction (vString *const name)
218 {
219 return (strcasecmp (vStringValue (name), "function") == 0 ||
220 strcasecmp (vStringValue (name), "filter") == 0);
221 }
222
isSpace(int c)223 static bool isSpace (int c)
224 {
225 return (c == '\t' || c == ' ' || c == '\v' ||
226 c == '\n' || c == '\r' || c == '\f');
227 }
228
skipWhitespaces(int c)229 static int skipWhitespaces (int c)
230 {
231 while (isSpace (c))
232 c = getcFromInputFile ();
233 return c;
234 }
235
skipSingleComment(void)236 static int skipSingleComment (void)
237 {
238 int c;
239 do
240 {
241 c = getcFromInputFile ();
242 if (c == '\r')
243 {
244 int next = getcFromInputFile ();
245 if (next != '\n')
246 ungetcToInputFile (next);
247 else
248 c = next;
249 }
250 } while (c != EOF && c != '\n' && c != '\r');
251 return c;
252 }
253
readToken(tokenInfo * const token)254 static void readToken (tokenInfo *const token)
255 {
256 int c;
257
258 token->type = TOKEN_UNDEFINED;
259 vStringClear (token->string);
260
261 getNextChar:
262
263 c = getcFromInputFile ();
264 c = skipWhitespaces (c);
265
266 token->lineNumber = getInputLineNumber ();
267 token->filePosition = getInputFilePosition ();
268
269 switch (c)
270 {
271 case EOF: token->type = TOKEN_EOF; break;
272 case '(': token->type = TOKEN_OPEN_PAREN; break;
273 case ')': token->type = TOKEN_CLOSE_PAREN; break;
274 case ';': token->type = TOKEN_SEMICOLON; break;
275 case ',': token->type = TOKEN_COMMA; break;
276 case '.': token->type = TOKEN_PERIOD; break;
277 case ':': token->type = TOKEN_COLON; break;
278 case '{': token->type = TOKEN_OPEN_CURLY; break;
279 case '}': token->type = TOKEN_CLOSE_CURLY; break;
280 case '[': token->type = TOKEN_OPEN_SQUARE; break;
281 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
282 case '=': token->type = TOKEN_EQUAL_SIGN; break;
283
284 case '\'':
285 case '"':
286 token->type = TOKEN_STRING;
287 parseString (token->string, c);
288 token->lineNumber = getInputLineNumber ();
289 token->filePosition = getInputFilePosition ();
290 break;
291
292 case '<':
293 {
294 int d = getcFromInputFile ();
295 if (d == '#')
296 {
297 /* <# ... #> multiline comment */
298 do
299 {
300 c = skipToCharacterInInputFile ('#');
301 if (c != EOF)
302 {
303 c = getcFromInputFile ();
304 if (c == '>')
305 break;
306 else
307 ungetcToInputFile (c);
308 }
309 } while (c != EOF);
310 goto getNextChar;
311 }
312 else
313 {
314 ungetcToInputFile (d);
315 token->type = TOKEN_UNDEFINED;
316 }
317 break;
318 }
319
320 case '#': /* comment */
321 skipSingleComment ();
322 goto getNextChar;
323 break;
324
325 case '+':
326 case '-':
327 case '*':
328 case '/':
329 case '%':
330 {
331 int d = getcFromInputFile ();
332 if (d != '=')
333 ungetcToInputFile (d);
334 token->type = TOKEN_OPERATOR;
335 break;
336 }
337
338 case '$': /* variable start */
339 {
340 int d = getcFromInputFile ();
341 if (! isIdentChar (d))
342 {
343 ungetcToInputFile (d);
344 token->type = TOKEN_UNDEFINED;
345 }
346 else
347 {
348 parseIdentifier (token->string, d);
349 token->type = TOKEN_VARIABLE;
350 }
351 break;
352 }
353
354 default:
355 if (! isIdentChar (c))
356 token->type = TOKEN_UNDEFINED;
357 else
358 {
359 parseIdentifier (token->string, c);
360 if (isTokenFunction (token->string))
361 token->type = TOKEN_KEYWORD;
362 else
363 token->type = TOKEN_IDENTIFIER;
364 }
365 break;
366 }
367 }
368
369 static void enterScope (tokenInfo *const parentToken,
370 const vString *const extraScope,
371 const int parentKind);
372
373 /* strip a possible PowerShell scope specification and convert it to accessType */
parsePowerShellScope(tokenInfo * const token)374 static const char *parsePowerShellScope (tokenInfo *const token)
375 {
376 const char *access = ACCESS_UNDEFINED;
377 const char *const tokenName = vStringValue (token->string);
378 const char *powershellScopeEnd;
379
380 powershellScopeEnd = strchr (tokenName, ':');
381 if (powershellScopeEnd)
382 {
383 size_t powershellScopeLen;
384 vString * powershellScope = vStringNew ();
385
386 powershellScopeLen = (size_t)(powershellScopeEnd - tokenName);
387 /* extract the scope */
388 vStringNCopyS (powershellScope, tokenName, powershellScopeLen);
389 /* cut the resulting scope string from the identifier */
390 memmove (vStringValue (token->string),
391 /* +1 to skip the leading colon */
392 vStringValue (token->string) + powershellScopeLen + 1,
393 /* +1 for the skipped leading colon and - 1 to include the trailing \0 byte */
394 token->string->length + 1 - powershellScopeLen - 1);
395 token->string->length -= powershellScopeLen + 1;
396
397 access = findValidAccessType (vStringValue (powershellScope));
398
399 vStringDelete (powershellScope);
400 }
401 return access;
402 }
403
404
405 /* parse a function
406 *
407 * function myfunc($foo, $bar) {}
408 */
parseFunction(tokenInfo * const token)409 static bool parseFunction (tokenInfo *const token)
410 {
411 bool readNext = true;
412 tokenInfo *nameFree = NULL;
413 const char *access;
414
415 readToken (token);
416
417 if (token->type != TOKEN_IDENTIFIER)
418 return false;
419
420 access = parsePowerShellScope (token);
421
422 nameFree = newToken ();
423 copyToken (nameFree, token, true);
424 readToken (token);
425
426 if (token->type == TOKEN_OPEN_PAREN)
427 {
428 vString *arglist = vStringNew ();
429 int depth = 1;
430
431 vStringPut (arglist, '(');
432 do
433 {
434 readToken (token);
435
436 switch (token->type)
437 {
438 case TOKEN_OPEN_PAREN: depth++; break;
439 case TOKEN_CLOSE_PAREN: depth--; break;
440 default: break;
441 }
442 /* display part */
443 switch (token->type)
444 {
445 case TOKEN_CLOSE_CURLY: vStringPut (arglist, '}'); break;
446 case TOKEN_CLOSE_PAREN: vStringPut (arglist, ')'); break;
447 case TOKEN_CLOSE_SQUARE: vStringPut (arglist, ']'); break;
448 case TOKEN_COLON: vStringPut (arglist, ':'); break;
449 case TOKEN_COMMA: vStringCatS (arglist, ", "); break;
450 case TOKEN_EQUAL_SIGN: vStringCatS (arglist, " = "); break;
451 case TOKEN_OPEN_CURLY: vStringPut (arglist, '{'); break;
452 case TOKEN_OPEN_PAREN: vStringPut (arglist, '('); break;
453 case TOKEN_OPEN_SQUARE: vStringPut (arglist, '['); break;
454 case TOKEN_PERIOD: vStringPut (arglist, '.'); break;
455 case TOKEN_SEMICOLON: vStringPut (arglist, ';'); break;
456 case TOKEN_STRING: vStringCatS (arglist, "'...'"); break;
457
458 case TOKEN_IDENTIFIER:
459 case TOKEN_KEYWORD:
460 case TOKEN_VARIABLE:
461 {
462 switch (vStringLast (arglist))
463 {
464 case 0:
465 case ' ':
466 case '{':
467 case '(':
468 case '[':
469 case '.':
470 /* no need for a space between those and the identifier */
471 break;
472
473 default:
474 vStringPut (arglist, ' ');
475 break;
476 }
477 if (token->type == TOKEN_VARIABLE)
478 vStringPut (arglist, '$');
479 vStringCat (arglist, token->string);
480 break;
481 }
482
483 default: break;
484 }
485 }
486 while (token->type != TOKEN_EOF && depth > 0);
487
488 makeFunctionTag (nameFree, arglist, access);
489 vStringDelete (arglist);
490
491 readToken (token);
492 }
493 else if (token->type == TOKEN_OPEN_CURLY)
494 { /* filters doesn't need to have an arglist */
495 makeFunctionTag (nameFree, NULL, access);
496 }
497
498 if (token->type == TOKEN_OPEN_CURLY)
499 enterScope (token, nameFree->string, K_FUNCTION);
500 else
501 readNext = false;
502
503 if (nameFree)
504 deleteToken (nameFree);
505
506 return readNext;
507 }
508
509 /* parses declarations of the form
510 * $var = VALUE
511 */
parseVariable(tokenInfo * const token)512 static bool parseVariable (tokenInfo *const token)
513 {
514 tokenInfo *name;
515 bool readNext = true;
516 const char *access;
517
518 name = newToken ();
519 copyToken (name, token, true);
520
521 readToken (token);
522 if (token->type == TOKEN_EQUAL_SIGN)
523 {
524 if (token->parentKind != K_FUNCTION)
525 { /* ignore local variables (i.e. within a function) */
526 access = parsePowerShellScope (name);
527 makeSimplePowerShellTag (name, K_VARIABLE, access);
528 readNext = true;
529 }
530 }
531 else
532 readNext = false;
533
534 deleteToken (name);
535
536 return readNext;
537 }
538
enterScope(tokenInfo * const parentToken,const vString * const extraScope,const int parentKind)539 static void enterScope (tokenInfo *const parentToken,
540 const vString *const extraScope,
541 const int parentKind)
542 {
543 tokenInfo *token = newToken ();
544 int origParentKind = parentToken->parentKind;
545
546 copyToken (token, parentToken, true);
547
548 if (extraScope)
549 {
550 addToScope (token, extraScope);
551 token->parentKind = parentKind;
552 }
553
554 readToken (token);
555 while (token->type != TOKEN_EOF &&
556 token->type != TOKEN_CLOSE_CURLY)
557 {
558 bool readNext = true;
559
560 switch (token->type)
561 {
562 case TOKEN_OPEN_CURLY:
563 enterScope (token, NULL, KIND_GHOST_INDEX);
564 break;
565
566 case TOKEN_KEYWORD:
567 readNext = parseFunction (token);
568 break;
569
570 case TOKEN_VARIABLE:
571 readNext = parseVariable (token);
572 break;
573
574 default: break;
575 }
576
577 if (readNext)
578 readToken (token);
579 }
580
581 copyToken (parentToken, token, false);
582 parentToken->parentKind = origParentKind;
583 deleteToken (token);
584 }
585
findPowerShellTags(void)586 static void findPowerShellTags (void)
587 {
588 tokenInfo *const token = newToken ();
589
590 do
591 {
592 enterScope (token, NULL, KIND_GHOST_INDEX);
593 }
594 while (token->type != TOKEN_EOF); /* keep going even with unmatched braces */
595
596 deleteToken (token);
597 }
598
PowerShellParser(void)599 extern parserDefinition* PowerShellParser (void)
600 {
601 static const char *const extensions [] = { "ps1", "psm1", NULL };
602 parserDefinition* def = parserNew ("PowerShell");
603 def->kindTable = PowerShellKinds;
604 def->kindCount = ARRAY_SIZE (PowerShellKinds);
605 def->extensions = extensions;
606 def->parser = findPowerShellTags;
607 return def;
608 }
609