xref: /Universal-ctags/parsers/vim.c (revision 750ebcec75bfca0d9bf0ddf958a1616cf76779c8)
1 /*
2 *   Copyright (c) 2000-2003, Darren Hiebert
3 *
4 *   This source code is released for free distribution under the terms of the
5 *   GNU General Public License version 2 or (at your option) any later version.
6 *
7 *   Thanks are due to Jay Glanville for significant improvements.
8 *
9 *   This module contains functions for generating tags for user-defined
10 *   functions for the Vim editor.
11 */
12 
13 /*
14  *  INCLUDE FILES
15  */
16 #include "general.h"  /* must always come first */
17 
18 #include <string.h>
19 #ifdef DEBUG
20 #include <stdio.h>
21 #endif
22 
23 #include "debug.h"
24 #include "entry.h"
25 #include "parse.h"
26 #include "read.h"
27 #include "routines.h"
28 #include "vstring.h"
29 
30 #if 0
31 typedef struct sLineInfo {
32 	tokenType type;
33 	keywordId keyword;
34 	vString *string;
35 	vString *scope;
36 	unsigned long lineNumber;
37 	MIOPos filePosition;
38 } lineInfo;
39 #endif
40 
41 /*
42  * DATA DEFINITIONS
43  */
44 typedef enum {
45 	K_AUGROUP,
46 	K_COMMAND,
47 	K_FUNCTION,
48 	K_MAP,
49 	K_VARIABLE,
50 	K_FILENAME,
51 	K_CONST,
52 } vimKind;
53 
54 static kindDefinition VimKinds [] = {
55 	{ true,  'a', "augroup",  "autocommand groups" },
56 	{ true,  'c', "command",  "user-defined commands" },
57 	{ true,  'f', "function", "function definitions" },
58 	{ true,  'm', "map",      "maps" },
59 	{ true,  'v', "variable", "variable definitions" },
60 	{ true,  'n', "filename", "vimball filename" },
61 	{ true,  'C', "constant", "constant definitions" },
62 };
63 
64 /*
65  *  DATA DECLARATIONS
66  */
67 
68 #if 0
69 typedef enum eException {
70 	ExceptionNone, ExceptionEOF
71 } exception_t;
72 #endif
73 
74 /*
75  *  DATA DEFINITIONS
76  */
77 
78 #if 0
79 static jmp_buf Exception;
80 #endif
81 
82 /*
83  *  FUNCTION DEFINITIONS
84  */
85 
86 static bool parseVimLine (const unsigned char *line, int infunction);
87 
88 /* This function takes a char pointer, tries to find a scope separator in the
89  * string, and if it does, returns a pointer to the character after the colon,
90  * and the character defining the scope.
91  * If a colon is not found, it returns the original pointer.
92  */
skipPrefix(const unsigned char * name,int * scope)93 static const unsigned char *skipPrefix (const unsigned char *name, int *scope)
94 {
95 	const unsigned char *result = name;
96 	int counter;
97 	size_t length;
98 	length = strlen ((const char *) name);
99 	if (scope != NULL)
100 		*scope = '\0';
101 	if (length > 3 && name[1] == ':')
102 	{
103 		if (scope != NULL)
104 			*scope = *name;
105 		result = name + 2;
106 	}
107 	else if (length > 5 && strncasecmp ((const char *) name, "<SID>", (size_t) 5) == 0)
108 	{
109 		if (scope != NULL)
110 			*scope = *name;
111 		result = name + 5;
112 	}
113 	else
114 	{
115 		/*
116 		 * Vim7 check for dictionaries or autoload function names
117 		 */
118 		counter = 0;
119 		do
120 		{
121 			switch (name[counter])
122 			{
123 				case '.':
124 					/* Set the scope to d - Dictionary */
125 					*scope = 'd';
126 					break;
127 				case '#':
128 					/* Set the scope to a - autoload */
129 					*scope = 'a';
130 					break;
131 			}
132 			++counter;
133 		} while (isalnum ((int) name[counter]) ||
134 				name[counter] == '_'           ||
135 				name[counter] == '.'           ||
136 				name[counter] == '#'
137 				);
138 	}
139 	return result;
140 }
141 
isWordChar(const unsigned char c)142 static bool isWordChar (const unsigned char c)
143 {
144 	return (isalnum (c) || c == '_');
145 }
146 
147 /* checks if a word at the start of `p` matches at least `min_len` first
148  * characters from `word` */
wordMatchLen(const unsigned char * p,const char * const word,size_t min_len)149 static bool wordMatchLen (const unsigned char *p, const char *const word, size_t min_len)
150 {
151 	const unsigned char *w = (const unsigned char *) word;
152 	size_t n = 0;
153 
154 	while (*p && *p == *w)
155 	{
156 		p++;
157 		w++;
158 		n++;
159 	}
160 
161 	if (isWordChar (*p))
162 		return false;
163 
164 	return n >= min_len;
165 }
166 
skipWord(const unsigned char * p)167 static const unsigned char *skipWord (const unsigned char *p)
168 {
169 	while (*p && isWordChar (*p))
170 		p++;
171 	return p;
172 }
173 
isMap(const unsigned char * line)174 static bool isMap (const unsigned char *line)
175 {
176 	/*
177 	 * There are many different short cuts for specifying a map.
178 	 * This routine should capture all the permutations.
179 	 */
180 	return (wordMatchLen (line, "map", 3) ||
181 			wordMatchLen (line, "nmap", 2) ||
182 			wordMatchLen (line, "vmap", 2) ||
183 			wordMatchLen (line, "xmap", 2) ||
184 			wordMatchLen (line, "smap", 4) ||
185 			wordMatchLen (line, "omap", 2) ||
186 			wordMatchLen (line, "imap", 2) ||
187 			wordMatchLen (line, "lmap", 2) ||
188 			wordMatchLen (line, "cmap", 2) ||
189 			wordMatchLen (line, "noremap", 2) ||
190 			wordMatchLen (line, "nnoremap", 2) ||
191 			wordMatchLen (line, "vnoremap", 2) ||
192 			wordMatchLen (line, "xnoremap", 2) ||
193 			wordMatchLen (line, "snoremap", 4) ||
194 			wordMatchLen (line, "onoremap", 3) ||
195 			wordMatchLen (line, "inoremap", 3) ||
196 			wordMatchLen (line, "lnoremap", 2) ||
197 			wordMatchLen (line, "cnoremap", 3));
198 }
199 
readVimLine(void)200 static const unsigned char *readVimLine (void)
201 {
202 	const unsigned char *line;
203 
204 	while ((line = readLineFromInputFile ()) != NULL)
205 	{
206 		while (isspace ((int) *line))
207 			++line;
208 
209 		if ((int) *line == '"')
210 			continue;  /* skip comment */
211 
212 		break;
213 	}
214 
215 	return line;
216 }
217 
readVimballLine(void)218 static const unsigned char *readVimballLine (void)
219 {
220 	const unsigned char *line;
221 
222 	while ((line = readLineFromInputFile ()) != NULL)
223 	{
224 		break;
225 	}
226 
227 	return line;
228 }
229 
parseSignature(const unsigned char * cp,tagEntryInfo * e,vString * buf)230 static vString *parseSignature (const unsigned char *cp,
231 								tagEntryInfo *e,
232 								vString *buf)
233 {
234 	/* TODO capture parameters */
235 
236 	Assert (e);
237 	Assert (cp);
238 
239 	if (!buf)
240 	{
241 		buf = vStringNew ();
242 		vStringPut (buf, *cp);
243 		++cp;
244 	}
245 
246 	while (*cp != '\0')
247 	{
248 		if (isspace ((int) *cp)
249 			&& vStringLast (buf) == ',')
250 		{
251 			++cp;
252 			continue;
253 		}
254 		vStringPut (buf, *cp);
255 		if (*cp == ')')
256 			break;
257 		++cp;
258 	}
259 
260 	if (*cp == ')')
261 	{
262 		e->extensionFields.signature = vStringDeleteUnwrap (buf);
263 		buf = NULL;
264 	}
265 
266 	return buf;
267 }
268 
parseFunction(const unsigned char * line)269 static void parseFunction (const unsigned char *line)
270 {
271 	vString *name = vStringNew ();
272 	vString *signature = NULL;
273 	/* bool inFunction = false; */
274 	int scope;
275 	const unsigned char *cp = line;
276 	int index = CORK_NIL;
277 	tagEntryInfo *e = NULL;
278 
279 	if (*cp == '!')
280 		++cp;
281 	if (isspace ((int) *cp))
282 	{
283 		while (*cp && isspace ((int) *cp))
284 			++cp;
285 
286 		if (*cp)
287 		{
288 			cp = skipPrefix (cp, &scope);
289 			if (isupper ((int) *cp)  ||
290 					scope == 's'  ||  /* script scope */
291 					scope == '<'  ||  /* script scope */
292 					scope == 'g'  ||  /* global scope */
293 					scope == 'd'  ||  /* dictionary */
294 					scope == 'a')     /* autoload */
295 			{
296 				char prefix[3] = { [0] = (char)scope, [1] = ':', [2] = '\0' };
297 				if (scope == 's')
298 					vStringCatS (name, prefix);
299 
300 				do
301 				{
302 					vStringPut (name, (int) *cp);
303 					++cp;
304 				} while (isalnum ((int) *cp) || *cp == '_' || *cp == '.' || *cp == '#');
305 				index = makeSimpleTag (name, K_FUNCTION);
306 				vStringClear (name);
307 
308 				e = getEntryInCorkQueue (index);
309 				if (e && isFieldEnabled (FIELD_SIGNATURE))
310 				{
311 					while (*cp && isspace ((int) *cp))
312 						++cp;
313 					if (*cp == '(')
314 						signature = parseSignature (cp, e, NULL);
315 				}
316 			}
317 		}
318 	}
319 
320 	/* TODO - update struct to indicate inside function */
321 	while ((line = readVimLine ()) != NULL)
322 	{
323 		if (signature)
324 		{
325 			cp = line;
326 			while (*cp && isspace ((int) *cp))
327 				++cp;
328 			/* A backslash at the start of a line stands for a line continuation.
329 			 * https://vimhelp.org/repeat.txt.html#line-continuation */
330 			if (*cp == '\\')
331 				signature = parseSignature (++cp, e, signature);
332 		}
333 
334 		if (wordMatchLen (line, "endfunction", 4) || wordMatchLen (line, "enddef", 6))
335 		{
336 			if (e)
337 				e->extensionFields.endLine = getInputLineNumber ();
338 			break;
339 		}
340 
341 		parseVimLine (line, true);
342 	}
343 	if (signature)
344 		vStringDelete (signature);
345 	vStringDelete (name);
346 }
347 
parseAutogroup(const unsigned char * line)348 static void parseAutogroup (const unsigned char *line)
349 {
350 	vString *name = vStringNew ();
351 
352 	/* Found Autocommand Group (augroup) */
353 	const unsigned char *cp = line;
354 	if (isspace ((int) *cp))
355 	{
356 		while (*cp && isspace ((int) *cp))
357 			++cp;
358 
359 		if (*cp)
360 		{
361 			const unsigned char *end = skipWord (cp);
362 
363 			/* "end" (caseless) has a special meaning and should not generate a tag */
364 			if (end > cp && strncasecmp ((const char *) cp, "end", end - cp) != 0)
365 			{
366 				vStringNCatS (name, (const char *) cp, end - cp);
367 				makeSimpleTag (name, K_AUGROUP);
368 				vStringClear (name);
369 			}
370 		}
371 	}
372 	vStringDelete (name);
373 }
374 
parseCommand(const unsigned char * line)375 static bool parseCommand (const unsigned char *line)
376 {
377 	vString *name = vStringNew ();
378 	bool cmdProcessed = true;
379 
380 	/*
381 	 * Found a user-defined command
382 	 *
383 	 * They can have many options preceded by a dash
384 	 * command! -nargs=+ -complete Select  :call s:DB_execSql("select " . <q-args>)
385 	 * The name of the command should be the first word not preceded by a dash
386 	 *
387 	 */
388 	const unsigned char *cp = line;
389 
390 	if (cp && (*cp == '\\'))
391 	{
392 		/*
393 		 * We are recursively calling this function is the command
394 		 * has been continued on to the next line
395 		 *
396 		 * Vim statements can be continued onto a newline using a \
397 		 * to indicate the previous line is continuing.
398 		 *
399 		 * com -nargs=1 -bang -complete=customlist,EditFileComplete
400 		 *          \ EditFile edit<bang> <args>
401 		 *
402 		 * If the following lines do not have a line continuation
403 		 * the command must not be spanning multiple lines and should
404 		 * be syntactically incorrect.
405 		 */
406 		if (*cp == '\\')
407 			++cp;
408 
409 		while (*cp && isspace ((int) *cp))
410 			++cp;
411 	}
412 	else if (line && wordMatchLen (cp, "command", 3))
413 	{
414 		cp = skipWord (cp);
415 
416 		if (*cp == '!')
417 			++cp;
418 
419 		if (*cp != ' ')
420 		{
421 			/*
422 			 * :command must be followed by a space.  If it is not, it is
423 			 * not a valid command.
424 			 * Treat the line as processed and continue.
425 			 */
426 			cmdProcessed = true;
427 			goto cleanUp;
428 		}
429 
430 		while (*cp && isspace ((int) *cp))
431 			++cp;
432 	}
433 	else
434 	{
435 		/*
436 		 * We are recursively calling this function.  If it does not start
437 		 * with "com" or a line continuation character, we have moved off
438 		 * the command line and should let the other routines parse this file.
439 		 */
440 		cmdProcessed = false;
441 		goto cleanUp;
442 	}
443 
444 	/*
445 	 * Strip off any spaces and options which are part of the command.
446 	 * These should precede the command name.
447 	 */
448 	do
449 	{
450 		if (isspace ((int) *cp))
451 		{
452 			++cp;
453 		}
454 		else if (*cp == '-')
455 		{
456 			/*
457 			 * Read until the next space which separates options or the name
458 			 */
459 			while (*cp && !isspace ((int) *cp))
460 				++cp;
461 		}
462 		else if (!isalnum ((int) *cp))
463 		{
464 			/*
465 			 * Broken syntax: throw away this line
466 			 */
467 			cmdProcessed = true;
468 			goto cleanUp;
469 		}
470 	} while (*cp &&  !isalnum ((int) *cp));
471 
472 	if (!*cp)
473 	{
474 		/*
475 		 * We have reached the end of the line without finding the command name.
476 		 * Read the next line and continue processing it as a command.
477 		 */
478 		if ((line = readVimLine ()) != NULL)
479 			cmdProcessed = parseCommand (line);
480 		else
481 			cmdProcessed = false;
482 		goto cleanUp;
483 	}
484 
485 	do
486 	{
487 		vStringPut (name, (int) *cp);
488 		++cp;
489 	} while (isalnum ((int) *cp) || *cp == '_');
490 
491 	makeSimpleTag (name, K_COMMAND);
492 	vStringClear (name);
493 
494 cleanUp:
495 	vStringDelete (name);
496 
497 	return cmdProcessed;
498 }
499 
parseVariableOrConstant(const unsigned char * line,int infunction,int kindIndex)500 static void parseVariableOrConstant (const unsigned char *line, int infunction, int kindIndex)
501 {
502 	vString *name = vStringNew ();
503 
504 	const unsigned char *cp = line;
505 	const unsigned char *np = line;
506 	/* get the name */
507 	if (isspace ((int) *cp))
508 	{
509 		while (*cp && isspace ((int) *cp))
510 			++cp;
511 
512 		/*
513 		 * Ignore lets which set:
514 		 *    &  - local buffer vim settings
515 		 *    @  - registers
516 		 *    [  - Lists or Dictionaries
517 		 */
518 		if (!*cp || *cp == '&' || *cp == '@' || *cp == '[')
519 			goto cleanUp;
520 
521 		/*
522 		 * Ignore vim variables which are read only
523 		 *    v: - Vim variables.
524 		 */
525 		np = cp;
526 		++np;
527 		if (*cp == 'v' && *np == ':')
528 			goto cleanUp;
529 
530 		/* Skip non-global vars in functions */
531 		if (infunction && (*np != ':' || *cp != 'g'))
532 			goto cleanUp;
533 
534 		/* deal with spaces, $, @ and & */
535 		while (*cp && *cp != '$' && !isalnum ((int) *cp))
536 			++cp;
537 
538 		if (!*cp)
539 			goto cleanUp;
540 
541 		/* cp = skipPrefix (cp, &scope); */
542 		do
543 		{
544 			if (!*cp)
545 				break;
546 
547 			vStringPut (name, (int) *cp);
548 			++cp;
549 		} while (isalnum ((int) *cp) || *cp == '_' || *cp == '#' || *cp == ':' || *cp == '$');
550 		makeSimpleTag (name, kindIndex);
551 		vStringClear (name);
552 	}
553 
554 cleanUp:
555 	vStringDelete (name);
556 }
557 
parseMap(const unsigned char * line)558 static bool parseMap (const unsigned char *line)
559 {
560 	vString *name = vStringNew ();
561 	const unsigned char *cp = line;
562 
563 	if (*cp == '!')
564 		++cp;
565 
566 	/*
567 	 * Maps follow this basic format
568 	 *    map
569 	 *    nnoremap <silent> <F8> :Tlist<CR>
570 	 *    map <unique> <Leader>scdt <Plug>GetColumnDataType
571 	 *    inoremap ,,, <esc>diwi<<esc>pa><cr></<esc>pa><esc>kA
572 	 *    inoremap <buffer> ( <C-R>=PreviewFunctionSignature()<LF>
573 	 *
574 	 * The Vim help shows the various special arguments available to a map:
575 	 * 1.2 SPECIAL ARGUMENTS                    *:map-arguments*
576 	 *    <buffer>
577 	 *    <nowait>
578 	 *    <silent>
579 	 *    <script>
580 	 *    <unique>
581 	 *    <special>
582 	 *    <expr>
583 	 *
584 	 * Strip the special arguments from the map command, this should leave
585 	 * the map name which we will use as the "name".
586 	 */
587 
588 	do
589 	{
590 		while (*cp && isspace ((int) *cp))
591 			++cp;
592 
593 		if (strncmp ((const char *) cp, "<Leader>", (size_t) 8) == 0)
594 			break;
595 
596 		if (
597 				strncmp ((const char *) cp, "<buffer>", (size_t) 8) == 0 ||
598 				strncmp ((const char *) cp, "<nowait>", (size_t) 8) == 0 ||
599 				strncmp ((const char *) cp, "<silent>", (size_t) 8) == 0 ||
600 				strncmp ((const char *) cp, "<script>", (size_t) 8) == 0 ||
601 				strncmp ((const char *) cp, "<unique>", (size_t) 8) == 0
602 		   )
603 		{
604 			cp += 8;
605 			continue;
606 		}
607 
608 		if (strncmp ((const char *) cp, "<expr>", (size_t) 6) == 0)
609 		{
610 			cp += 6;
611 			continue;
612 		}
613 
614 		if (strncmp ((const char *) cp, "<special>", (size_t) 9) == 0)
615 		{
616 			cp += 9;
617 			continue;
618 		}
619 
620 		break;
621 	} while (*cp);
622 
623 	do
624 	{
625 		vStringPut (name, (int) *cp);
626 		++cp;
627 	} while (*cp && *cp != ' ');
628 
629 	makeSimpleTag (name, K_MAP);
630 	vStringClear (name);
631 
632 	vStringDelete (name);
633 
634 	return true;
635 }
636 
parseVimLine(const unsigned char * line,int infunction)637 static bool parseVimLine (const unsigned char *line, int infunction)
638 {
639 	bool readNextLine = true;
640 
641 	if (wordMatchLen (line, "command", 3))
642 	{
643 		readNextLine = parseCommand (line);
644 		/* TODO - Handle parseCommand returning false */
645 	}
646 
647 	else if (isMap (line))
648 	{
649 		parseMap (skipWord (line));
650 	}
651 
652 	else if (wordMatchLen (line, "function", 2) || wordMatchLen (line, "def", 3))
653 	{
654 		parseFunction (skipWord (line));
655 	}
656 
657 	else if (wordMatchLen (line, "augroup", 3))
658 	{
659 		parseAutogroup (skipWord (line));
660 	}
661 
662 	else if (wordMatchLen (line, "let", 3))
663 	{
664 		parseVariableOrConstant (skipWord (line), infunction, K_VARIABLE);
665 	}
666 	else if (wordMatchLen (line, "const", 4))
667 	{
668 		parseVariableOrConstant (skipWord (line), infunction, K_CONST);
669 	}
670 
671 	return readNextLine;
672 }
673 
parseVimFile(const unsigned char * line)674 static void parseVimFile (const unsigned char *line)
675 {
676 	bool readNextLine = true;
677 
678 	while (line != NULL)
679 	{
680 		readNextLine = parseVimLine (line, false);
681 
682 		if (readNextLine)
683 			line = readVimLine ();
684 
685 	}
686 }
687 
parseVimBallFile(const unsigned char * line)688 static void parseVimBallFile (const unsigned char *line)
689 {
690 	vString *fname = vStringNew ();
691 	const unsigned char *cp;
692 	int file_line_count;
693 	int i;
694 
695 	/*
696 	 * Vimball Archives follow this format
697 	 *    " Vimball Archiver comment
698 	 *    UseVimball
699 	 *    finish
700 	 *    filename
701 	 *    line count (n) for filename
702 	 *    (n) lines
703 	 *    filename
704 	 *    line count (n) for filename
705 	 *    (n) lines
706 	 *    ...
707 	 */
708 
709 	/* Next line should be "finish" */
710 	line = readVimLine ();
711 
712 	while (line != NULL)
713 	{
714 		/* Next line should be a filename */
715 		line = readVimLine ();
716 		if (line == NULL)
717 		{
718 			goto cleanUp;
719 		}
720 		else
721 		{
722 			cp = line;
723 			do
724 			{
725 				vStringPut (fname, (int) *cp);
726 				++cp;
727 			} while (isalnum ((int) *cp) || *cp == '.' || *cp == '/' || *cp == '\\');
728 			makeSimpleTag (fname, K_FILENAME);
729 			vStringClear (fname);
730 		}
731 
732 		file_line_count = 0;
733 		/* Next line should be the line count of the file */
734 		line = readVimLine ();
735 		if (line == NULL)
736 		{
737 			goto cleanUp;
738 		}
739 		else
740 		{
741 			file_line_count = atoi ((const char *) line);
742 		}
743 
744 		/* Read all lines of the file */
745 		for (i = 0; i < file_line_count; i++)
746 		{
747 			line = readVimballLine ();
748 			if (line == NULL)
749 			{
750 				goto cleanUp;
751 			}
752 		}
753 	}
754 
755 cleanUp:
756 	vStringDelete (fname);
757 }
758 
findVimTags(void)759 static void findVimTags (void)
760 {
761 	const unsigned char *line;
762 	/* TODO - change this into a structure */
763 
764 	line = readVimLine ();
765 
766 	if (line == NULL)
767 	{
768 			return;
769 	}
770 
771 	if (strncmp ((const char *) line, "UseVimball", (size_t) 10) == 0)
772 	{
773 		parseVimBallFile (line);
774 	}
775 	else
776 	{
777 		parseVimFile (line);
778 	}
779 }
780 
VimParser(void)781 extern parserDefinition *VimParser (void)
782 {
783 	static const char *const extensions [] = { "vim", "vba", NULL };
784 	static const char *const patterns [] = { "vimrc", "[._]vimrc", "gvimrc",
785 		"[._]gvimrc", NULL };
786 	parserDefinition *def = parserNew ("Vim");
787 	def->kindTable      = VimKinds;
788 	def->kindCount  = ARRAY_SIZE (VimKinds);
789 	def->extensions = extensions;
790 	def->patterns   = patterns;
791 	def->parser     = findVimTags;
792 	def->useCork    = CORK_QUEUE;
793 	return def;
794 }
795