xref: /Universal-ctags/parsers/powershell.c (revision e852ee0e939802331dc3117fd85a31b243c3d04f)
1 /*
2 *   Copyright (c) 2015, Enrico Tröger <enrico.troeger@uvena.de>
3 *
4 *   Loosely based on the PHP tags parser since the syntax is somewhat similar
5 *   regarding variable and function definitions.
6 *
7 *   This source code is released for free distribution under the terms of the
8 *   GNU General Public License version 2 or (at your option) any later version.
9 *
10 *   This module contains code for generating tags for Windows PowerShell scripts
11 *   (https://en.wikipedia.org/wiki/PowerShell).
12 */
13 
14 /*
15 *   INCLUDE FILES
16 */
17 #include "general.h"  /* must always come first */
18 #include "debug.h"
19 #include "parse.h"
20 #include "read.h"
21 #include "vstring.h"
22 #include "keyword.h"
23 #include "entry.h"
24 #include "routines.h"
25 #include <string.h>
26 
27 #define SCOPE_SEPARATOR "::"
28 
29 
30 #define ACCESS_UNDEFINED NULL
31 static const char *const accessTypes[] = {
32 	ACCESS_UNDEFINED,
33 	"global",
34 	"local",
35 	"script",
36 	"private"
37 };
38 
39 typedef enum {
40 	K_FUNCTION,
41 	K_VARIABLE,
42 	COUNT_KIND
43 } powerShellKind;
44 
45 static kindDefinition PowerShellKinds[COUNT_KIND] = {
46 	{ true, 'f', "function",	"functions" },
47 	{ true, 'v', "variable",	"variables" }
48 };
49 
50 
51 typedef enum eTokenType {
52 	TOKEN_UNDEFINED,
53 	TOKEN_EOF,
54 	TOKEN_CLOSE_PAREN,
55 	TOKEN_SEMICOLON,
56 	TOKEN_COLON,
57 	TOKEN_COMMA,
58 	TOKEN_KEYWORD,
59 	TOKEN_OPEN_PAREN,
60 	TOKEN_OPERATOR,
61 	TOKEN_IDENTIFIER,
62 	TOKEN_STRING,
63 	TOKEN_PERIOD,
64 	TOKEN_OPEN_CURLY,
65 	TOKEN_CLOSE_CURLY,
66 	TOKEN_EQUAL_SIGN,
67 	TOKEN_OPEN_SQUARE,
68 	TOKEN_CLOSE_SQUARE,
69 	TOKEN_VARIABLE
70 } tokenType;
71 
72 typedef struct {
73 	tokenType		type;
74 	vString *		string;
75 	vString *		scope;
76 	unsigned long	lineNumber;
77 	MIOPos			filePosition;
78 	int 			parentKind; /* KIND_GHOST_INDEX if none */
79 } tokenInfo;
80 
81 
findValidAccessType(const char * const access)82 static const char *findValidAccessType (const char *const access)
83 {
84 	unsigned int i;
85 	if (access == ACCESS_UNDEFINED)
86 		return ACCESS_UNDEFINED; /* early out to save the for-loop if possible */
87 	for (i = 0; i < ARRAY_SIZE(accessTypes); i++)
88 	{
89 		if (accessTypes[i] == ACCESS_UNDEFINED)
90 			continue;
91 		if (strcasecmp (access, accessTypes[i]) == 0)
92 			return accessTypes[i];
93 		i++;
94 	}
95 	return ACCESS_UNDEFINED;
96 }
97 
initPowerShellEntry(tagEntryInfo * const e,const tokenInfo * const token,const powerShellKind kind,const char * const access)98 static void initPowerShellEntry (tagEntryInfo *const e, const tokenInfo *const token,
99 								 const powerShellKind kind, const char *const access)
100 {
101 	initTagEntry (e, vStringValue (token->string), kind);
102 
103 	e->lineNumber	= token->lineNumber;
104 	e->filePosition	= token->filePosition;
105 
106 	if (access != NULL)
107 		e->extensionFields.access = access;
108 	if (vStringLength (token->scope) > 0)
109 	{
110 		int parentKind = token->parentKind;
111 		Assert (parentKind >= 0);
112 
113 		e->extensionFields.scopeKindIndex = parentKind;
114 		e->extensionFields.scopeName = vStringValue (token->scope);
115 	}
116 }
117 
makeSimplePowerShellTag(const tokenInfo * const token,const powerShellKind kind,const char * const access)118 static void makeSimplePowerShellTag (const tokenInfo *const token, const powerShellKind kind,
119 									 const char *const access)
120 {
121 	if (PowerShellKinds[kind].enabled)
122 	{
123 		tagEntryInfo e;
124 
125 		initPowerShellEntry (&e, token, kind, access);
126 		makeTagEntry (&e);
127 	}
128 }
129 
makeFunctionTag(const tokenInfo * const token,const vString * const arglist,const char * const access)130 static void makeFunctionTag (const tokenInfo *const token, const vString *const arglist,
131 							 const char *const access)
132 {
133 	if (PowerShellKinds[K_FUNCTION].enabled)
134 	{
135 		tagEntryInfo e;
136 
137 		initPowerShellEntry (&e, token, K_FUNCTION, access);
138 
139 		if (arglist)
140 			e.extensionFields.signature = vStringValue (arglist);
141 
142 		makeTagEntry (&e);
143 	}
144 }
145 
newToken(void)146 static tokenInfo *newToken (void)
147 {
148 	tokenInfo *const token = xMalloc (1, tokenInfo);
149 
150 	token->type			= TOKEN_UNDEFINED;
151 	token->string		= vStringNew ();
152 	token->scope		= vStringNew ();
153 	token->lineNumber   = getInputLineNumber ();
154 	token->filePosition = getInputFilePosition ();
155 	token->parentKind	= KIND_GHOST_INDEX;
156 
157 	return token;
158 }
159 
deleteToken(tokenInfo * const token)160 static void deleteToken (tokenInfo *const token)
161 {
162 	vStringDelete (token->string);
163 	vStringDelete (token->scope);
164 	eFree (token);
165 }
166 
copyToken(tokenInfo * const dest,const tokenInfo * const src,bool scope)167 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
168 					   bool scope)
169 {
170 	dest->lineNumber = src->lineNumber;
171 	dest->filePosition = src->filePosition;
172 	dest->type = src->type;
173 	vStringCopy (dest->string, src->string);
174 	dest->parentKind = src->parentKind;
175 	if (scope)
176 		vStringCopy (dest->scope, src->scope);
177 }
178 
addToScope(tokenInfo * const token,const vString * const extra)179 static void addToScope (tokenInfo *const token, const vString *const extra)
180 {
181 	if (vStringLength (token->scope) > 0)
182 		vStringCatS (token->scope, SCOPE_SEPARATOR);
183 	vStringCatS (token->scope, vStringValue (extra));
184 }
185 
isIdentChar(const int c)186 static bool isIdentChar (const int c)
187 {
188 	return (isalnum (c) || c == ':' || c == '_' || c == '-' || c >= 0x80);
189 }
190 
parseString(vString * const string,const int delimiter)191 static void parseString (vString *const string, const int delimiter)
192 {
193 	while (true)
194 	{
195 		int c = getcFromInputFile ();
196 
197 		if (c == '\\' && (c = getcFromInputFile ()) != EOF)
198 			vStringPut (string, (char) c);
199 		else if (c == EOF || c == delimiter)
200 			break;
201 		else
202 			vStringPut (string, (char) c);
203 	}
204 }
205 
parseIdentifier(vString * const string,const int firstChar)206 static void parseIdentifier (vString *const string, const int firstChar)
207 {
208 	int c = firstChar;
209 	do
210 	{
211 		vStringPut (string, (char) c);
212 		c = getcFromInputFile ();
213 	} while (isIdentChar (c));
214 	ungetcToInputFile (c);
215 }
216 
isTokenFunction(vString * const name)217 static bool isTokenFunction (vString *const name)
218 {
219 	return (strcasecmp (vStringValue (name), "function") == 0 ||
220 			strcasecmp (vStringValue (name), "filter") == 0);
221 }
222 
isSpace(int c)223 static bool isSpace (int c)
224 {
225 	return (c == '\t' || c == ' ' || c == '\v' ||
226 			c == '\n' || c == '\r' || c == '\f');
227 }
228 
skipWhitespaces(int c)229 static int skipWhitespaces (int c)
230 {
231 	while (isSpace (c))
232 		c = getcFromInputFile ();
233 	return c;
234 }
235 
skipSingleComment(void)236 static int skipSingleComment (void)
237 {
238 	int c;
239 	do
240 	{
241 		c = getcFromInputFile ();
242 		if (c == '\r')
243 		{
244 			int next = getcFromInputFile ();
245 			if (next != '\n')
246 				ungetcToInputFile (next);
247 			else
248 				c = next;
249 		}
250 	} while (c != EOF && c != '\n' && c != '\r');
251 	return c;
252 }
253 
readToken(tokenInfo * const token)254 static void readToken (tokenInfo *const token)
255 {
256 	int c;
257 
258 	token->type		= TOKEN_UNDEFINED;
259 	vStringClear (token->string);
260 
261 getNextChar:
262 
263 	c = getcFromInputFile ();
264 	c = skipWhitespaces (c);
265 
266 	token->lineNumber   = getInputLineNumber ();
267 	token->filePosition = getInputFilePosition ();
268 
269 	switch (c)
270 	{
271 		case EOF: token->type = TOKEN_EOF;					break;
272 		case '(': token->type = TOKEN_OPEN_PAREN;			break;
273 		case ')': token->type = TOKEN_CLOSE_PAREN;			break;
274 		case ';': token->type = TOKEN_SEMICOLON;			break;
275 		case ',': token->type = TOKEN_COMMA;				break;
276 		case '.': token->type = TOKEN_PERIOD;				break;
277 		case ':': token->type = TOKEN_COLON;				break;
278 		case '{': token->type = TOKEN_OPEN_CURLY;			break;
279 		case '}': token->type = TOKEN_CLOSE_CURLY;			break;
280 		case '[': token->type = TOKEN_OPEN_SQUARE;			break;
281 		case ']': token->type = TOKEN_CLOSE_SQUARE;			break;
282 		case '=': token->type = TOKEN_EQUAL_SIGN;			break;
283 
284 		case '\'':
285 		case '"':
286 			token->type = TOKEN_STRING;
287 			parseString (token->string, c);
288 			token->lineNumber = getInputLineNumber ();
289 			token->filePosition = getInputFilePosition ();
290 			break;
291 
292 		case '<':
293 		{
294 			int d = getcFromInputFile ();
295 			if (d == '#')
296 			{
297 				/* <# ... #> multiline comment */
298 				do
299 				{
300 					c = skipToCharacterInInputFile ('#');
301 					if (c != EOF)
302 					{
303 						c = getcFromInputFile ();
304 						if (c == '>')
305 							break;
306 						else
307 							ungetcToInputFile (c);
308 					}
309 				} while (c != EOF);
310 				goto getNextChar;
311 			}
312 			else
313 			{
314 				ungetcToInputFile (d);
315 				token->type = TOKEN_UNDEFINED;
316 			}
317 			break;
318 		}
319 
320 		case '#': /* comment */
321 			skipSingleComment ();
322 			goto getNextChar;
323 			break;
324 
325 		case '+':
326 		case '-':
327 		case '*':
328 		case '/':
329 		case '%':
330 		{
331 			int d = getcFromInputFile ();
332 			if (d != '=')
333 				ungetcToInputFile (d);
334 			token->type = TOKEN_OPERATOR;
335 			break;
336 		}
337 
338 		case '$': /* variable start */
339 		{
340 			int d = getcFromInputFile ();
341 			if (! isIdentChar (d))
342 			{
343 				ungetcToInputFile (d);
344 				token->type = TOKEN_UNDEFINED;
345 			}
346 			else
347 			{
348 				parseIdentifier (token->string, d);
349 				token->type = TOKEN_VARIABLE;
350 			}
351 			break;
352 		}
353 
354 		default:
355 			if (! isIdentChar (c))
356 				token->type = TOKEN_UNDEFINED;
357 			else
358 			{
359 				parseIdentifier (token->string, c);
360 				if (isTokenFunction (token->string))
361 					token->type = TOKEN_KEYWORD;
362 				else
363 					token->type = TOKEN_IDENTIFIER;
364 			}
365 			break;
366 	}
367 }
368 
369 static void enterScope (tokenInfo *const parentToken,
370 						const vString *const extraScope,
371 						const int parentKind);
372 
373 /* strip a possible PowerShell scope specification and convert it to accessType */
parsePowerShellScope(tokenInfo * const token)374 static const char *parsePowerShellScope (tokenInfo *const token)
375 {
376 	const char *access = ACCESS_UNDEFINED;
377 	const char *const tokenName = vStringValue (token->string);
378 	const char *powershellScopeEnd;
379 
380 	powershellScopeEnd = strchr (tokenName, ':');
381 	if (powershellScopeEnd)
382 	{
383 		size_t powershellScopeLen;
384 		vString * powershellScope = vStringNew ();
385 
386 		powershellScopeLen = (size_t)(powershellScopeEnd - tokenName);
387 		/* extract the scope */
388 		vStringNCopyS (powershellScope, tokenName, powershellScopeLen);
389 		/* cut the resulting scope string from the identifier */
390 		memmove (vStringValue (token->string),
391 				 /* +1 to skip the leading colon */
392 				 vStringValue (token->string) + powershellScopeLen + 1,
393 				 /* +1 for the skipped leading colon and - 1 to include the trailing \0 byte */
394 				 token->string->length + 1 - powershellScopeLen - 1);
395 		token->string->length -= powershellScopeLen + 1;
396 
397 		access = findValidAccessType (vStringValue (powershellScope));
398 
399 		vStringDelete (powershellScope);
400 	}
401 	return access;
402 }
403 
404 
405 /* parse a function
406  *
407  * 	function myfunc($foo, $bar) {}
408  */
parseFunction(tokenInfo * const token)409 static bool parseFunction (tokenInfo *const token)
410 {
411 	bool readNext = true;
412 	tokenInfo *nameFree = NULL;
413 	const char *access;
414 
415 	readToken (token);
416 
417 	if (token->type != TOKEN_IDENTIFIER)
418 		return false;
419 
420 	access = parsePowerShellScope (token);
421 
422 	nameFree = newToken ();
423 	copyToken (nameFree, token, true);
424 	readToken (token);
425 
426 	if (token->type == TOKEN_OPEN_PAREN)
427 	{
428 		vString *arglist = vStringNew ();
429 		int depth = 1;
430 
431 		vStringPut (arglist, '(');
432 		do
433 		{
434 			readToken (token);
435 
436 			switch (token->type)
437 			{
438 				case TOKEN_OPEN_PAREN:  depth++; break;
439 				case TOKEN_CLOSE_PAREN: depth--; break;
440 				default: break;
441 			}
442 			/* display part */
443 			switch (token->type)
444 			{
445 				case TOKEN_CLOSE_CURLY:		vStringPut (arglist, '}');		break;
446 				case TOKEN_CLOSE_PAREN:		vStringPut (arglist, ')');		break;
447 				case TOKEN_CLOSE_SQUARE:	vStringPut (arglist, ']');		break;
448 				case TOKEN_COLON:			vStringPut (arglist, ':');		break;
449 				case TOKEN_COMMA:			vStringCatS (arglist, ", ");	break;
450 				case TOKEN_EQUAL_SIGN:		vStringCatS (arglist, " = ");	break;
451 				case TOKEN_OPEN_CURLY:		vStringPut (arglist, '{');		break;
452 				case TOKEN_OPEN_PAREN:		vStringPut (arglist, '(');		break;
453 				case TOKEN_OPEN_SQUARE:		vStringPut (arglist, '[');		break;
454 				case TOKEN_PERIOD:			vStringPut (arglist, '.');		break;
455 				case TOKEN_SEMICOLON:		vStringPut (arglist, ';');		break;
456 				case TOKEN_STRING:			vStringCatS (arglist, "'...'");	break;
457 
458 				case TOKEN_IDENTIFIER:
459 				case TOKEN_KEYWORD:
460 				case TOKEN_VARIABLE:
461 				{
462 					switch (vStringLast (arglist))
463 					{
464 						case 0:
465 						case ' ':
466 						case '{':
467 						case '(':
468 						case '[':
469 						case '.':
470 							/* no need for a space between those and the identifier */
471 							break;
472 
473 						default:
474 							vStringPut (arglist, ' ');
475 							break;
476 					}
477 					if (token->type == TOKEN_VARIABLE)
478 						vStringPut (arglist, '$');
479 					vStringCat (arglist, token->string);
480 					break;
481 				}
482 
483 				default: break;
484 			}
485 		}
486 		while (token->type != TOKEN_EOF && depth > 0);
487 
488 		makeFunctionTag (nameFree, arglist, access);
489 		vStringDelete (arglist);
490 
491 		readToken (token);
492 	}
493 	else if (token->type == TOKEN_OPEN_CURLY)
494 	{	/* filters doesn't need to have an arglist */
495 		makeFunctionTag (nameFree, NULL, access);
496 	}
497 
498 	if (token->type == TOKEN_OPEN_CURLY)
499 		enterScope (token, nameFree->string, K_FUNCTION);
500 	else
501 		readNext = false;
502 
503 	if (nameFree)
504 		deleteToken (nameFree);
505 
506 	return readNext;
507 }
508 
509 /* parses declarations of the form
510  * 	$var = VALUE
511  */
parseVariable(tokenInfo * const token)512 static bool parseVariable (tokenInfo *const token)
513 {
514 	tokenInfo *name;
515 	bool readNext = true;
516 	const char *access;
517 
518 	name = newToken ();
519 	copyToken (name, token, true);
520 
521 	readToken (token);
522 	if (token->type == TOKEN_EQUAL_SIGN)
523 	{
524 		if (token->parentKind != K_FUNCTION)
525 		{	/* ignore local variables (i.e. within a function) */
526 			access = parsePowerShellScope (name);
527 			makeSimplePowerShellTag (name, K_VARIABLE, access);
528 			readNext = true;
529 		}
530 	}
531 	else
532 		readNext = false;
533 
534 	deleteToken (name);
535 
536 	return readNext;
537 }
538 
enterScope(tokenInfo * const parentToken,const vString * const extraScope,const int parentKind)539 static void enterScope (tokenInfo *const parentToken,
540 						const vString *const extraScope,
541 						const int parentKind)
542 {
543 	tokenInfo *token = newToken ();
544 	int origParentKind = parentToken->parentKind;
545 
546 	copyToken (token, parentToken, true);
547 
548 	if (extraScope)
549 	{
550 		addToScope (token, extraScope);
551 		token->parentKind = parentKind;
552 	}
553 
554 	readToken (token);
555 	while (token->type != TOKEN_EOF &&
556 		   token->type != TOKEN_CLOSE_CURLY)
557 	{
558 		bool readNext = true;
559 
560 		switch (token->type)
561 		{
562 			case TOKEN_OPEN_CURLY:
563 				enterScope (token, NULL, KIND_GHOST_INDEX);
564 				break;
565 
566 			case TOKEN_KEYWORD:
567 				readNext = parseFunction (token);
568 				break;
569 
570 			case TOKEN_VARIABLE:
571 				readNext = parseVariable (token);
572 				break;
573 
574 			default: break;
575 		}
576 
577 		if (readNext)
578 			readToken (token);
579 	}
580 
581 	copyToken (parentToken, token, false);
582 	parentToken->parentKind = origParentKind;
583 	deleteToken (token);
584 }
585 
findPowerShellTags(void)586 static void findPowerShellTags (void)
587 {
588 	tokenInfo *const token = newToken ();
589 
590 	do
591 	{
592 		enterScope (token, NULL, KIND_GHOST_INDEX);
593 	}
594 	while (token->type != TOKEN_EOF); /* keep going even with unmatched braces */
595 
596 	deleteToken (token);
597 }
598 
PowerShellParser(void)599 extern parserDefinition* PowerShellParser (void)
600 {
601 	static const char *const extensions [] = { "ps1", "psm1", NULL };
602 	parserDefinition* def = parserNew ("PowerShell");
603 	def->kindTable  = PowerShellKinds;
604 	def->kindCount  = ARRAY_SIZE (PowerShellKinds);
605 	def->extensions = extensions;
606 	def->parser     = findPowerShellTags;
607 	return def;
608 }
609