xref: /Universal-ctags/parsers/python.c (revision 23ceb0b685e027153abaace7a550a98e42d8128e)
1 /*
2 *   Copyright (c) 2000-2003, Darren Hiebert
3 *   Copyright (c) 2014-2016, Colomban Wendling <ban@herbesfolles.org>
4 *
5 *   This source code is released for free distribution under the terms of the
6 *   GNU General Public License version 2 or (at your option) any later version.
7 *
8 *   This module contains functions for generating tags for Python language
9 *   files.
10 */
11 
12 #include "general.h"  /* must always come first */
13 
14 #include <string.h>
15 
16 #include "entry.h"
17 #include "nestlevel.h"
18 #include "read.h"
19 #include "parse.h"
20 #include "vstring.h"
21 #include "keyword.h"
22 #include "routines.h"
23 #include "debug.h"
24 #include "xtag.h"
25 #include "objpool.h"
26 #include "ptrarray.h"
27 
28 #define isIdentifierChar(c) \
29 	(isalnum (c) || (c) == '_' || (c) >= 0x80)
30 #define newToken() (objPoolGet (TokenPool))
31 #define deleteToken(t) (objPoolPut (TokenPool, (t)))
32 
33 enum {
34 	KEYWORD_as,
35 	KEYWORD_async,
36 	KEYWORD_cdef,
37 	KEYWORD_class,
38 	KEYWORD_cpdef,
39 	KEYWORD_def,
40 	KEYWORD_extern,
41 	KEYWORD_from,
42 	KEYWORD_import,
43 	KEYWORD_inline,
44 	KEYWORD_lambda,
45 	KEYWORD_pass,
46 	KEYWORD_return,
47 };
48 typedef int keywordId; /* to allow KEYWORD_NONE */
49 
50 typedef enum {
51 	ACCESS_PRIVATE,
52 	ACCESS_PROTECTED,
53 	ACCESS_PUBLIC,
54 	COUNT_ACCESS
55 } accessType;
56 
57 static const char *const PythonAccesses[COUNT_ACCESS] = {
58 	"private",
59 	"protected",
60 	"public"
61 };
62 
63 typedef enum {
64 	F_DECORATORS,
65 	F_NAMEREF,
66 	COUNT_FIELD
67 } pythonField;
68 
69 static fieldDefinition PythonFields[COUNT_FIELD] = {
70 	{ .name = "decorators",
71 	  .description = "decorators on functions and classes",
72 	  .enabled = false },
73 	{ .name = "nameref",
74 	  .description = "the original name for the tag",
75 	  .enabled = true },
76 };
77 
78 typedef enum {
79 	K_CLASS,
80 	K_FUNCTION,
81 	K_METHOD,
82 	K_VARIABLE,
83 	K_NAMESPACE,
84 	K_MODULE,
85 	K_UNKNOWN,
86 	K_PARAMETER,
87 	K_LOCAL_VARIABLE,
88 	COUNT_KIND
89 } pythonKind;
90 
91 typedef enum {
92 	PYTHON_MODULE_IMPORTED,
93 	PYTHON_MODULE_NAMESPACE,
94 	PYTHON_MODULE_INDIRECTLY_IMPORTED,
95 } pythonModuleRole;
96 
97 typedef enum {
98 	PYTHON_UNKNOWN_IMPORTED,
99 	PYTHON_UNKNOWN_INDIRECTLY_IMPORTED,
100 } pythonUnknownRole;
101 
102 /* Roles related to `import'
103  * ==========================
104  * import X              X = (kind:module, role:imported)
105  *
106  * import X as Y         X = (kind:module, role:indirectlyImported),
107  *                       Y = (kind:namespace, nameref:module:X)
108  *                       ------------------------------------------------
109  *                       Don't confuse the kind of Y with namespace role of module kind.
110  *
111  * from X import *       X = (kind:module,  role:namespace)
112  *
113  * from X import Y       X = (kind:module,  role:namespace),
114  *                       Y = (kind:unknown, role:imported, scope:module:X)
115  *
116  * from X import Y as Z  X = (kind:module,  role:namespace),
117  *                       Y = (kind:unknown, role:indirectlyImported, scope:module:X)
118  *                       Z = (kind:unknown, nameref:unknown:Y) */
119 
120 static roleDefinition PythonModuleRoles [] = {
121 	{ true, "imported",
122 	  "imported modules" },
123 	{ true, "namespace",
124 	  "namespace from where classes/variables/functions are imported" },
125 	{ true, "indirectlyImported",
126 	  "module imported in alternative name" },
127 };
128 
129 static roleDefinition PythonUnknownRoles [] = {
130 	{ true, "imported",   "imported from the other module" },
131 	{ true, "indirectlyImported",
132 	  "classes/variables/functions/modules imported in alternative name" },
133 };
134 
135 static kindDefinition PythonKinds[COUNT_KIND] = {
136 	{true, 'c', "class",    "classes"},
137 	{true, 'f', "function", "functions"},
138 	{true, 'm', "member",   "class members"},
139 	{true, 'v', "variable", "variables"},
140 	{true, 'I', "namespace", "name referring a module defined in other file"},
141 	{true, 'i', "module",    "modules",
142 	 .referenceOnly = true,  ATTACH_ROLES(PythonModuleRoles)},
143 	{true, 'x', "unknown",   "name referring a class/variable/function/module defined in other module",
144 	 .referenceOnly = false, ATTACH_ROLES(PythonUnknownRoles)},
145 	{false, 'z', "parameter", "function parameters" },
146 	{false, 'l', "local",    "local variables" },
147 };
148 
149 static const keywordTable PythonKeywordTable[] = {
150 	/* keyword			keyword ID */
151 	{ "as",				KEYWORD_as				},
152 	{ "async",			KEYWORD_async			},
153 	{ "cdef",			KEYWORD_cdef			},
154 	{ "cimport",		KEYWORD_import			},
155 	{ "class",			KEYWORD_class			},
156 	{ "cpdef",			KEYWORD_cpdef			},
157 	{ "def",			KEYWORD_def				},
158 	{ "extern",			KEYWORD_extern			},
159 	{ "from",			KEYWORD_from			},
160 	{ "import",			KEYWORD_import			},
161 	{ "inline",			KEYWORD_inline			},
162 	{ "lambda",			KEYWORD_lambda			},
163 	{ "pass",			KEYWORD_pass			},
164 	{ "return",			KEYWORD_return			},
165 };
166 
167 typedef enum eTokenType {
168 	/* 0..255 are the byte's value */
169 	TOKEN_EOF = 256,
170 	TOKEN_UNDEFINED,
171 	TOKEN_INDENT,
172 	TOKEN_KEYWORD,
173 	TOKEN_OPERATOR,
174 	TOKEN_IDENTIFIER,
175 	TOKEN_STRING,
176 	TOKEN_ARROW,				/* -> */
177 	TOKEN_WHITESPACE,
178 } tokenType;
179 
180 typedef struct {
181 	int				type;
182 	keywordId		keyword;
183 	vString *		string;
184 	int				indent;
185 	unsigned long 	lineNumber;
186 	MIOPos			filePosition;
187 } tokenInfo;
188 
189 struct pythonNestingLevelUserData {
190 	int indentation;
191 };
192 #define PY_NL(nl) ((struct pythonNestingLevelUserData *) nestingLevelGetUserData (nl))
193 
194 static langType Lang_python;
195 static unsigned int TokenContinuationDepth = 0;
196 static tokenInfo *NextToken = NULL;
197 static NestingLevels *PythonNestingLevels = NULL;
198 static objPool *TokenPool = NULL;
199 
200 
201 /* follows PEP-8, and always reports single-underscores as protected
202  * See:
203  * - http://www.python.org/dev/peps/pep-0008/#method-names-and-instance-variables
204  * - http://www.python.org/dev/peps/pep-0008/#designing-for-inheritance
205  */
accessFromIdentifier(const vString * const ident,pythonKind kind,int parentKind)206 static accessType accessFromIdentifier (const vString *const ident,
207                                         pythonKind kind, int parentKind)
208 {
209 	const char *const p = vStringValue (ident);
210 	const size_t len = vStringLength (ident);
211 
212 	/* inside a function/method, private */
213 	if (parentKind != -1 && parentKind != K_CLASS)
214 		return ACCESS_PRIVATE;
215 	/* not starting with "_", public */
216 	else if (len < 1 || p[0] != '_')
217 		return ACCESS_PUBLIC;
218 	/* "__...__": magic methods */
219 	else if (kind == K_FUNCTION && parentKind == K_CLASS &&
220 	         len > 3 && p[1] == '_' && p[len - 2] == '_' && p[len - 1] == '_')
221 		return ACCESS_PUBLIC;
222 	/* "__...": name mangling */
223 	else if (parentKind == K_CLASS && len > 1 && p[1] == '_')
224 		return ACCESS_PRIVATE;
225 	/* "_...": suggested as non-public, but easily accessible */
226 	else
227 		return ACCESS_PROTECTED;
228 }
229 
initPythonEntry(tagEntryInfo * const e,const tokenInfo * const token,const pythonKind kind)230 static void initPythonEntry (tagEntryInfo *const e, const tokenInfo *const token,
231                              const pythonKind kind)
232 {
233 	accessType access;
234 	int parentKind = -1;
235 	NestingLevel *nl;
236 
237 	initTagEntry (e, vStringValue (token->string), kind);
238 
239 	e->lineNumber	= token->lineNumber;
240 	e->filePosition	= token->filePosition;
241 
242 	nl = nestingLevelsGetCurrent (PythonNestingLevels);
243 	if (nl)
244 	{
245 		tagEntryInfo *nlEntry = getEntryOfNestingLevel (nl);
246 
247 		e->extensionFields.scopeIndex = nl->corkIndex;
248 
249 		/* nlEntry can be NULL if a kind was disabled.  But what can we do
250 		 * here?  Even disabled kinds should count for the hierarchy I
251 		 * guess -- as it'd otherwise be wrong -- but with cork we're
252 		 * fucked up as there's nothing to look up.  Damn. */
253 		if (nlEntry)
254 		{
255 			parentKind = nlEntry->kindIndex;
256 
257 			/* functions directly inside classes are methods, fix it up */
258 			if (kind == K_FUNCTION && parentKind == K_CLASS)
259 				e->kindIndex = K_METHOD;
260 		}
261 	}
262 
263 	access = accessFromIdentifier (token->string, kind, parentKind);
264 	e->extensionFields.access = PythonAccesses[access];
265 	/* FIXME: should we really set isFileScope in addition to access? */
266 	if (access == ACCESS_PRIVATE)
267 		e->isFileScope = true;
268 }
269 
makeClassTag(const tokenInfo * const token,const vString * const inheritance,const vString * const decorators)270 static int makeClassTag (const tokenInfo *const token,
271                          const vString *const inheritance,
272                          const vString *const decorators)
273 {
274 	if (PythonKinds[K_CLASS].enabled)
275 	{
276 		tagEntryInfo e;
277 
278 		initPythonEntry (&e, token, K_CLASS);
279 
280 		e.extensionFields.inheritance = inheritance ? vStringValue (inheritance) : "";
281 		if (decorators && vStringLength (decorators) > 0)
282 		{
283 			attachParserField (&e, false, PythonFields[F_DECORATORS].ftype,
284 			                   vStringValue (decorators));
285 		}
286 
287 		return makeTagEntry (&e);
288 	}
289 
290 	return CORK_NIL;
291 }
292 
makeFunctionTag(const tokenInfo * const token,const vString * const arglist,const vString * const decorators)293 static int makeFunctionTag (const tokenInfo *const token,
294                             const vString *const arglist,
295                             const vString *const decorators)
296 {
297 	if (PythonKinds[K_FUNCTION].enabled)
298 	{
299 		tagEntryInfo e;
300 
301 		initPythonEntry (&e, token, K_FUNCTION);
302 
303 		if (arglist)
304 			e.extensionFields.signature = vStringValue (arglist);
305 		if (decorators && vStringLength (decorators) > 0)
306 		{
307 			attachParserField (&e, false, PythonFields[F_DECORATORS].ftype,
308 			                   vStringValue (decorators));
309 		}
310 
311 		return makeTagEntry (&e);
312 	}
313 
314 	return CORK_NIL;
315 }
316 
makeSimplePythonTag(const tokenInfo * const token,pythonKind const kind)317 static int makeSimplePythonTag (const tokenInfo *const token, pythonKind const kind)
318 {
319 	if (PythonKinds[kind].enabled)
320 	{
321 		tagEntryInfo e;
322 
323 		initPythonEntry (&e, token, kind);
324 		return makeTagEntry (&e);
325 	}
326 
327 	return CORK_NIL;
328 }
329 
makeSimplePythonRefTag(const tokenInfo * const token,const vString * const altName,pythonKind const kind,int roleIndex,xtagType xtag)330 static int makeSimplePythonRefTag (const tokenInfo *const token,
331                                    const vString *const altName,
332                                    pythonKind const kind,
333                                    int roleIndex, xtagType xtag)
334 {
335 	if (isXtagEnabled (XTAG_REFERENCE_TAGS) &&
336 	    PythonKinds[kind].roles[roleIndex].enabled)
337 	{
338 		tagEntryInfo e;
339 
340 		initRefTagEntry (&e, vStringValue (altName ? altName : token->string),
341 		                 kind, roleIndex);
342 
343 		e.lineNumber	= token->lineNumber;
344 		e.filePosition	= token->filePosition;
345 
346 		if (xtag != XTAG_UNKNOWN)
347 			markTagExtraBit (&e, xtag);
348 
349 		return makeTagEntry (&e);
350 	}
351 
352 	return CORK_NIL;
353 }
354 
newPoolToken(void * createArg CTAGS_ATTR_UNUSED)355 static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED)
356 {
357 	tokenInfo *token = xMalloc (1, tokenInfo);
358 	token->string = vStringNew ();
359 	return token;
360 }
361 
deletePoolToken(void * data)362 static void deletePoolToken (void *data)
363 {
364 	tokenInfo *token = data;
365 	vStringDelete (token->string);
366 	eFree (token);
367 }
368 
clearPoolToken(void * data)369 static void clearPoolToken (void *data)
370 {
371 	tokenInfo *token = data;
372 
373 	token->type			= TOKEN_UNDEFINED;
374 	token->keyword		= KEYWORD_NONE;
375 	token->indent		= 0;
376 	token->lineNumber   = getInputLineNumber ();
377 	token->filePosition = getInputFilePosition ();
378 	vStringClear (token->string);
379 }
380 
copyToken(tokenInfo * const dest,const tokenInfo * const src)381 static void copyToken (tokenInfo *const dest, const tokenInfo *const src)
382 {
383 	dest->lineNumber = src->lineNumber;
384 	dest->filePosition = src->filePosition;
385 	dest->type = src->type;
386 	dest->keyword = src->keyword;
387 	dest->indent = src->indent;
388 	vStringCopy(dest->string, src->string);
389 }
390 
391 /* Skip a single or double quoted string. */
readString(vString * const string,const int delimiter)392 static void readString (vString *const string, const int delimiter)
393 {
394 	int escaped = 0;
395 	int c;
396 
397 	while ((c = getcFromInputFile ()) != EOF)
398 	{
399 		if (escaped)
400 		{
401 			vStringPut (string, c);
402 			escaped--;
403 		}
404 		else if (c == '\\')
405 			escaped++;
406 		else if (c == delimiter || c == '\n' || c == '\r')
407 		{
408 			if (c != delimiter)
409 				ungetcToInputFile (c);
410 			break;
411 		}
412 		else
413 			vStringPut (string, c);
414 	}
415 }
416 
417 /* Skip a single or double triple quoted string. */
readTripleString(vString * const string,const int delimiter)418 static void readTripleString (vString *const string, const int delimiter)
419 {
420 	int c;
421 	int escaped = 0;
422 	int n = 0;
423 	while ((c = getcFromInputFile ()) != EOF)
424 	{
425 		if (c == delimiter && ! escaped)
426 		{
427 			if (++n >= 3)
428 				break;
429 		}
430 		else
431 		{
432 			for (; n > 0; n--)
433 				vStringPut (string, delimiter);
434 			if (c != '\\' || escaped)
435 				vStringPut (string, c);
436 			n = 0;
437 		}
438 
439 		if (escaped)
440 			escaped--;
441 		else if (c == '\\')
442 			escaped++;
443 	}
444 }
445 
readIdentifier(vString * const string,const int firstChar)446 static void readIdentifier (vString *const string, const int firstChar)
447 {
448 	int c = firstChar;
449 	do
450 	{
451 		vStringPut (string, (char) c);
452 		c = getcFromInputFile ();
453 	}
454 	while (isIdentifierChar (c));
455 	ungetcToInputFile (c);
456 }
457 
ungetToken(tokenInfo * const token)458 static void ungetToken (tokenInfo *const token)
459 {
460 	Assert (NextToken == NULL);
461 	NextToken = newToken ();
462 	copyToken (NextToken, token);
463 }
464 
readTokenFull(tokenInfo * const token,bool inclWhitespaces)465 static void readTokenFull (tokenInfo *const token, bool inclWhitespaces)
466 {
467 	int c;
468 	int n;
469 
470 	/* if we've got a token held back, emit it */
471 	if (NextToken)
472 	{
473 		copyToken (token, NextToken);
474 		deleteToken (NextToken);
475 		NextToken = NULL;
476 		return;
477 	}
478 
479 	token->type		= TOKEN_UNDEFINED;
480 	token->keyword	= KEYWORD_NONE;
481 	vStringClear (token->string);
482 
483 getNextChar:
484 
485 	n = 0;
486 	do
487 	{
488 		c = getcFromInputFile ();
489 		n++;
490 	}
491 	while (c == ' ' || c == '\t' || c == '\f');
492 
493 	token->lineNumber   = getInputLineNumber ();
494 	token->filePosition = getInputFilePosition ();
495 
496 	if (inclWhitespaces && n > 1 && c != '\r' && c != '\n')
497 	{
498 		ungetcToInputFile (c);
499 		vStringPut (token->string, ' ');
500 		token->type = TOKEN_WHITESPACE;
501 		return;
502 	}
503 
504 	switch (c)
505 	{
506 		case EOF:
507 			token->type = TOKEN_EOF;
508 			break;
509 
510 		case '\'':
511 		case '"':
512 		{
513 			int d = getcFromInputFile ();
514 			token->type = TOKEN_STRING;
515 			vStringPut (token->string, c);
516 			if (d != c)
517 			{
518 				ungetcToInputFile (d);
519 				readString (token->string, c);
520 			}
521 			else if ((d = getcFromInputFile ()) == c)
522 				readTripleString (token->string, c);
523 			else /* empty string */
524 				ungetcToInputFile (d);
525 			vStringPut (token->string, c);
526 			token->lineNumber = getInputLineNumber ();
527 			token->filePosition = getInputFilePosition ();
528 			break;
529 		}
530 
531 		case '=':
532 		{
533 			int d = getcFromInputFile ();
534 			vStringPut (token->string, c);
535 			if (d == c)
536 			{
537 				vStringPut (token->string, d);
538 				token->type = TOKEN_OPERATOR;
539 			}
540 			else
541 			{
542 				ungetcToInputFile (d);
543 				token->type = c;
544 			}
545 			break;
546 		}
547 
548 		case '-':
549 		{
550 			int d = getcFromInputFile ();
551 			if (d == '>')
552 			{
553 				vStringPut (token->string, c);
554 				vStringPut (token->string, d);
555 				token->type = TOKEN_ARROW;
556 				break;
557 			}
558 			ungetcToInputFile (d);
559 			/* fall through */
560 		}
561 		case '+':
562 		case '*':
563 		case '%':
564 		case '<':
565 		case '>':
566 		case '/':
567 		{
568 			int d = getcFromInputFile ();
569 			vStringPut (token->string, c);
570 			if (d != '=')
571 			{
572 				ungetcToInputFile (d);
573 				token->type = c;
574 			}
575 			else
576 			{
577 				vStringPut (token->string, d);
578 				token->type = TOKEN_OPERATOR;
579 			}
580 			break;
581 		}
582 
583 		/* eats newline to implement line continuation  */
584 		case '\\':
585 		{
586 			int d = getcFromInputFile ();
587 			if (d == '\r')
588 				d = getcFromInputFile ();
589 			if (d != '\n')
590 				ungetcToInputFile (d);
591 			goto getNextChar;
592 		}
593 
594 		case '#': /* comment */
595 		case '\r': /* newlines for indent */
596 		case '\n':
597 		{
598 			int indent = 0;
599 			do
600 			{
601 				if (c == '#')
602 				{
603 					do
604 						c = getcFromInputFile ();
605 					while (c != EOF && c != '\r' && c != '\n');
606 				}
607 				if (c == '\r')
608 				{
609 					int d = getcFromInputFile ();
610 					if (d != '\n')
611 						ungetcToInputFile (d);
612 				}
613 				indent = 0;
614 				while ((c = getcFromInputFile ()) == ' ' || c == '\t' || c == '\f')
615 				{
616 					if (c == '\t')
617 						indent += 8 - (indent % 8);
618 					else if (c == '\f') /* yeah, it's weird */
619 						indent = 0;
620 					else
621 						indent++;
622 				}
623 			} /* skip completely empty lines, so retry */
624 			while (c == '\r' || c == '\n' || c == '#');
625 			ungetcToInputFile (c);
626 			if (TokenContinuationDepth > 0)
627 			{
628 				if (inclWhitespaces)
629 				{
630 					vStringPut (token->string, ' ');
631 					token->type = TOKEN_WHITESPACE;
632 				}
633 				else
634 					goto getNextChar;
635 			}
636 			else
637 			{
638 				token->type = TOKEN_INDENT;
639 				token->indent = indent;
640 			}
641 			break;
642 		}
643 
644 		default:
645 			if (! isIdentifierChar (c))
646 			{
647 				vStringPut (token->string, c);
648 				token->type = c;
649 			}
650 			else
651 			{
652 				/* FIXME: handle U, B, R and F string prefixes? */
653 				readIdentifier (token->string, c);
654 				token->keyword = lookupKeyword (vStringValue (token->string), Lang_python);
655 				if (token->keyword == KEYWORD_NONE)
656 					token->type = TOKEN_IDENTIFIER;
657 				else
658 					token->type = TOKEN_KEYWORD;
659 			}
660 			break;
661 	}
662 
663 	/* handle implicit continuation lines not to emit INDENT inside brackets
664 	 * https://docs.python.org/3.6/reference/lexical_analysis.html#implicit-line-joining */
665 	if (token->type == '(' ||
666 	    token->type == '{' ||
667 	    token->type == '[')
668 	{
669 		TokenContinuationDepth ++;
670 	}
671 	else if (TokenContinuationDepth > 0 &&
672 	         (token->type == ')' ||
673 	          token->type == '}' ||
674 	          token->type == ']'))
675 	{
676 		TokenContinuationDepth --;
677 	}
678 }
679 
readToken(tokenInfo * const token)680 static void readToken (tokenInfo *const token)
681 {
682 	readTokenFull (token, false);
683 }
684 
685 /*================================= parsing =================================*/
686 
687 
reprCat(vString * const repr,const tokenInfo * const token)688 static void reprCat (vString *const repr, const tokenInfo *const token)
689 {
690 	if (token->type != TOKEN_INDENT &&
691 	    token->type != TOKEN_WHITESPACE)
692 	{
693 		vStringCat (repr, token->string);
694 	}
695 	else if (vStringLength (repr) > 0 && vStringLast (repr) != ' ')
696 	{
697 		vStringPut (repr, ' ');
698 	}
699 }
700 
skipOverPair(tokenInfo * const token,int tOpen,int tClose,vString * const repr,bool reprOuterPair)701 static bool skipOverPair (tokenInfo *const token, int tOpen, int tClose,
702                              vString *const repr, bool reprOuterPair)
703 {
704 	if (token->type == tOpen)
705 	{
706 		int depth = 1;
707 
708 		if (repr && reprOuterPair)
709 			reprCat (repr, token);
710 		do
711 		{
712 			readTokenFull (token, true);
713 			if (repr && (reprOuterPair || token->type != tClose || depth > 1))
714 			{
715 				reprCat (repr, token);
716 			}
717 			if (token->type == tOpen)
718 				depth ++;
719 			else if (token->type == tClose)
720 				depth --;
721 		}
722 		while (token->type != TOKEN_EOF && depth > 0);
723 	}
724 
725 	return token->type == tClose;
726 }
727 
skipLambdaArglist(tokenInfo * const token,vString * const repr)728 static bool skipLambdaArglist (tokenInfo *const token, vString *const repr)
729 {
730 	while (token->type != TOKEN_EOF && token->type != ':' &&
731 	       /* avoid reading too much, just in case */
732 	       token->type != TOKEN_INDENT)
733 	{
734 		bool readNext = true;
735 
736 		if (token->type == '(')
737 			readNext = skipOverPair (token, '(', ')', repr, true);
738 		else if (token->type == '[')
739 			readNext = skipOverPair (token, '[', ']', repr, true);
740 		else if (token->type == '{')
741 			readNext = skipOverPair (token, '{', '}', repr, true);
742 		else if (token->keyword == KEYWORD_lambda)
743 		{ /* handle lambdas in a default value */
744 			if (repr)
745 				reprCat (repr, token);
746 			readTokenFull (token, true);
747 			readNext = skipLambdaArglist (token, repr);
748 			if (token->type == ':')
749 				readNext = true;
750 			if (readNext && repr)
751 				reprCat (repr, token);
752 		}
753 		else if (repr)
754 		{
755 			reprCat (repr, token);
756 		}
757 
758 		if (readNext)
759 			readTokenFull (token, true);
760 	}
761 	return false;
762 }
763 
readQualifiedName(tokenInfo * const nameToken)764 static void readQualifiedName (tokenInfo *const nameToken)
765 {
766 	readToken (nameToken);
767 
768 	if (nameToken->type == TOKEN_IDENTIFIER ||
769 	    nameToken->type == '.')
770 	{
771 		vString *qualifiedName = vStringNew ();
772 		tokenInfo *token = newToken ();
773 
774 		while (nameToken->type == TOKEN_IDENTIFIER ||
775 		       nameToken->type == '.')
776 		{
777 			vStringCat (qualifiedName, nameToken->string);
778 			copyToken (token, nameToken);
779 
780 			readToken (nameToken);
781 		}
782 		/* put the last, non-matching, token back */
783 		ungetToken (nameToken);
784 
785 		copyToken (nameToken, token);
786 		nameToken->type = TOKEN_IDENTIFIER;
787 		vStringCopy (nameToken->string, qualifiedName);
788 
789 		deleteToken (token);
790 		vStringDelete (qualifiedName);
791 	}
792 }
793 
readCDefName(tokenInfo * const token,pythonKind * kind)794 static bool readCDefName (tokenInfo *const token, pythonKind *kind)
795 {
796 	readToken (token);
797 
798 	if (token->keyword == KEYWORD_extern ||
799 	    token->keyword == KEYWORD_import)
800 	{
801 		readToken (token);
802 		if (token->keyword == KEYWORD_from)
803 			return false;
804 	}
805 
806 	if (token->keyword == KEYWORD_class)
807 	{
808 		*kind = K_CLASS;
809 		readToken (token);
810 	}
811 	else
812 	{
813 		/* skip the optional type declaration -- everything on the same line
814 		 * until an identifier followed by "(". */
815 		tokenInfo *candidate = newToken ();
816 
817 		while (token->type != TOKEN_EOF &&
818 		       token->type != TOKEN_INDENT &&
819 		       token->type != '=' &&
820 		       token->type != ',' &&
821 		       token->type != ':')
822 		{
823 			if (token->type == '[')
824 			{
825 				if (skipOverPair (token, '[', ']', NULL, false))
826 					readToken (token);
827 			}
828 			else if (token->type == '(')
829 			{
830 				if (skipOverPair (token, '(', ')', NULL, false))
831 					readToken (token);
832 			}
833 			else if (token->type == TOKEN_IDENTIFIER)
834 			{
835 				copyToken (candidate, token);
836 				readToken (token);
837 				if (token->type == '(')
838 				{ /* okay, we really found a function, use this */
839 					*kind = K_FUNCTION;
840 					ungetToken (token);
841 					copyToken (token, candidate);
842 					break;
843 				}
844 			}
845 			else
846 				readToken (token);
847 		}
848 
849 		deleteToken (candidate);
850 	}
851 
852 	return token->type == TOKEN_IDENTIFIER;
853 }
854 
parseParamTypeAnnotation(tokenInfo * const token,vString * arglist)855 static vString *parseParamTypeAnnotation (tokenInfo *const token,
856 										  vString *arglist)
857 {
858 	readToken (token);
859 	if (token->type != ':')
860 	{
861 		ungetToken (token);
862 		return NULL;
863 	}
864 
865 	reprCat (arglist, token);
866 	int depth = 0;
867 	vString *t = vStringNew ();
868 	while (true)
869 	{
870 		readTokenFull (token, true);
871 		if (token->type == TOKEN_WHITESPACE)
872 		{
873 			reprCat (arglist, token);
874 			continue;
875 		}
876 		else if (token->type == TOKEN_EOF)
877 			break;
878 
879 		if (token->type == '(' ||
880 			token->type == '[' ||
881 			token->type == '{')
882 			depth ++;
883 		else if (token->type == ')' ||
884 				 token->type == ']' ||
885 				 token->type == '}')
886 			depth --;
887 
888 		if (depth < 0
889 			|| (depth == 0 && (token->type == '='
890 							   || token->type == ',')))
891 		{
892 			ungetToken (token);
893 			return t;
894 		}
895 		reprCat (arglist, token);
896 		reprCat (t, token);
897 	}
898 	vStringDelete (t);
899 	return NULL;
900 }
901 
parseReturnTypeAnnotation(tokenInfo * const token)902 static vString *parseReturnTypeAnnotation (tokenInfo *const token)
903 {
904 	readToken (token);
905 	if (token->type != TOKEN_ARROW)
906 	{
907 		ungetToken (token);
908 		return NULL;
909 	}
910 
911 	int depth = 0;
912 	vString *t = vStringNew ();
913 	while (true)
914 	{
915 		readToken (token);
916 		if (token->type == TOKEN_EOF)
917 			break;
918 
919 		if (token->type == '(' ||
920 			token->type == '[' ||
921 			token->type == '{')
922 			depth ++;
923 		else if (token->type == ')' ||
924 				 token->type == ']' ||
925 				 token->type == '}')
926 			depth --;
927 		if (depth == 0 && token->type == ':')
928 		{
929 			ungetToken (token);
930 			return t;
931 		}
932 		else
933 			reprCat (t, token);
934 	}
935 	vStringDelete (t);
936 	return NULL;
937 }
938 
939 struct typedParam {
940 	tokenInfo *token;
941 	vString *type;
942 };
943 
makeTypedParam(tokenInfo * token,vString * type)944 static struct typedParam *makeTypedParam (tokenInfo *token, vString *type)
945 {
946 	struct typedParam *p = xMalloc (1, struct typedParam);
947 	p->token = token;
948 	p->type = type;
949 	return p;
950 }
951 
makeTypedParamWithCopy(const tokenInfo * token,const vString * type)952 static struct typedParam *makeTypedParamWithCopy (const tokenInfo *token, const vString *type)
953 {
954 	tokenInfo *token_copied = newToken ();
955 	copyToken (token_copied, token);
956 
957 
958 	vString *type_copied = type? vStringNewCopy (type): NULL;
959 	return makeTypedParam (token_copied, type_copied);
960 }
961 
deleteTypedParam(struct typedParam * p)962 static void deleteTypedParam (struct typedParam *p)
963 {
964 	deleteToken (p->token);
965 	vStringDelete (p->type);	/* NULL is acceptable. */
966 	eFree (p);
967 }
968 
parseArglist(tokenInfo * const token,const int kind,vString * const arglist,ptrArray * const parameters)969 static void parseArglist (tokenInfo *const token, const int kind,
970 						  vString *const arglist, ptrArray *const parameters)
971 {
972 	int prevTokenType = token->type;
973 	int depth = 1;
974 
975 	if (kind != K_CLASS)
976 		reprCat (arglist, token);
977 
978 	do
979 	{
980 		if (token->type != TOKEN_WHITESPACE &&
981 			/* for easy `*args` and `**kwargs` support, we also ignore
982 			 * `*`, which anyway can't otherwise screw us up */
983 			token->type != '*')
984 		{
985 			prevTokenType = token->type;
986 		}
987 
988 		readTokenFull (token, true);
989 		if (kind != K_CLASS || token->type != ')' || depth > 1)
990 			reprCat (arglist, token);
991 
992 		if (token->type == '(' ||
993 			token->type == '[' ||
994 			token->type == '{')
995 			depth ++;
996 		else if (token->type == ')' ||
997 				 token->type == ']' ||
998 				 token->type == '}')
999 			depth --;
1000 		else if (kind != K_CLASS && depth == 1 &&
1001 				 token->type == TOKEN_IDENTIFIER &&
1002 				 (prevTokenType == '(' || prevTokenType == ',') &&
1003 				 PythonKinds[K_PARAMETER].enabled)
1004 		{
1005 			tokenInfo *parameterName;
1006 			vString *parameterType;
1007 			struct typedParam *parameter;
1008 
1009 			parameterName = newToken ();
1010 			copyToken (parameterName, token);
1011 			parameterType = parseParamTypeAnnotation (token, arglist);
1012 
1013 			parameter = makeTypedParam (parameterName, parameterType);
1014 			ptrArrayAdd (parameters, parameter);
1015 		}
1016 	}
1017 	while (token->type != TOKEN_EOF && depth > 0);
1018 }
1019 
parseCArglist(tokenInfo * const token,const int kind,vString * const arglist,ptrArray * const parameters)1020 static void parseCArglist (tokenInfo *const token, const int kind,
1021 						  vString *const arglist, ptrArray *const parameters)
1022 {
1023 	int depth = 1;
1024 	tokenInfo *pname = newToken ();
1025 	vString *ptype = vStringNew ();
1026 	vStringCat (arglist, token->string);	/* '(' */
1027 
1028 	while (true)
1029 	{
1030 		readToken (token);
1031 		if (token->type == TOKEN_EOF)
1032 		{
1033 			/* Unexpected input. */
1034 			vStringClear (arglist);
1035 			ptrArrayClear (parameters);
1036 			break;
1037 		}
1038 
1039 		if (depth == 1 && (token->type == ',' || token->type == ')'))
1040 		{
1041 			if (pname->type == TOKEN_IDENTIFIER)
1042 			{
1043 				struct typedParam *p;
1044 
1045 				/*
1046 				 * Clean up the type string.
1047 				 * The type string includes the parameter name at the end.
1048 				 * 1. Trim the parameter name at the end.
1049 				 * 2. Then, trim the white space at the end of the type string.
1050 				 * 3. If the type string is not empty,
1051 				 *    3.a append (the type stirng + ' ' + the parameter name) to arglist.
1052 				 *    3.b else just append the parameter name to arglist.
1053 				 *
1054 				 * FIXME:
1055 				 * This doesn't work well with an array and a function pointer.
1056 				 *
1057 				 *   f(..., int seq [dim], ...)
1058 				 *      in this case, dim is extacted as a parameter.
1059 				 *
1060 				 *   f(..., int (*fn)(int), ...)
1061 				 *      in this case , int is extacted as a parameter.
1062 				 */
1063 				Assert (vStringLength (ptype) >= vStringLength (pname->string));
1064 				size_t ptype_len = vStringLength (ptype) - vStringLength (pname->string);
1065 				vStringTruncate (ptype, ptype_len);
1066 
1067 				if (vStringLength (ptype) > 0)
1068 				{
1069 					vStringStripTrailing (ptype);
1070 					if (vStringLength (ptype) > 0)
1071 					{
1072 						vStringCat (arglist, ptype);
1073 						vStringPut (arglist, ' ');
1074 					}
1075 				}
1076 				vStringCat (arglist, pname->string);
1077 
1078 				p = makeTypedParamWithCopy (pname, vStringIsEmpty(ptype)? NULL: ptype);
1079 				ptrArrayAdd (parameters, p);
1080 			}
1081 			if (token->type == ')')
1082 			{
1083 				vStringPut (arglist, ')');
1084 				break;
1085 			}
1086 			vStringCatS (arglist, ", ");
1087 			vStringClear (ptype);
1088 			pname->type = TOKEN_UNDEFINED;
1089 			continue;
1090 		}
1091 
1092 		if (token->type == '(' ||
1093 			token->type == '[' ||
1094 			token->type == '{')
1095 		{
1096 			vStringPut (ptype, token->type);
1097 			depth ++;
1098 			continue;
1099 		}
1100 
1101 		if (token->type == ')' ||
1102 			token->type == ']' ||
1103 			token->type == '}')
1104 		{
1105 			vStringPut (ptype, token->type);
1106 			depth --;
1107 			continue;
1108 		}
1109 
1110 		if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_KEYWORD)
1111 		{
1112 			if (vStringLength (ptype) > 0
1113 				&& (isalnum ((unsigned char)vStringLast (ptype))
1114 					|| vStringLast (ptype) == ','))
1115 				vStringPut (ptype, ' ');
1116 			vStringCat (ptype, token->string);
1117 
1118 			if (!isdigit ((unsigned char)vStringLast (token->string)))
1119 				copyToken (pname, token);
1120 			continue;
1121 		}
1122 
1123 		vStringCat (ptype, token->string);
1124 	}
1125 
1126 	vStringDelete (ptype);
1127 	deleteToken (pname);
1128 }
1129 
parseClassOrDef(tokenInfo * const token,const vString * const decorators,pythonKind kind,bool isCDef)1130 static bool parseClassOrDef (tokenInfo *const token,
1131                                 const vString *const decorators,
1132                                 pythonKind kind, bool isCDef)
1133 {
1134 	vString *arglist = NULL;
1135 	tokenInfo *name = NULL;
1136 	ptrArray *parameters = NULL;
1137 	NestingLevel *lv;
1138 	int corkIndex;
1139 
1140 	if (isCDef)
1141 	{
1142 		if (! readCDefName (token, &kind))
1143 			return false;
1144 	}
1145 	else
1146 	{
1147 		readToken (token);
1148 		if (token->type != TOKEN_IDENTIFIER)
1149 			return false;
1150 	}
1151 
1152 	name = newToken ();
1153 	copyToken (name, token);
1154 
1155 	readToken (token);
1156 	/* collect parameters or inheritance */
1157 	if (token->type == '(')
1158 	{
1159 		arglist = vStringNew ();
1160 		parameters = ptrArrayNew ((ptrArrayDeleteFunc)deleteTypedParam);
1161 
1162 		if (isCDef && kind != K_CLASS)
1163 			parseCArglist (token, kind, arglist, parameters);
1164 		else
1165 			parseArglist (token, kind, arglist, parameters);
1166 	}
1167 
1168 	if (kind == K_CLASS)
1169 		corkIndex = makeClassTag (name, arglist, decorators);
1170 	else
1171 		corkIndex = makeFunctionTag (name, arglist, decorators);
1172 
1173 	lv = nestingLevelsPush (PythonNestingLevels, corkIndex);
1174 	PY_NL (lv)->indentation = token->indent;
1175 
1176 	deleteToken (name);
1177 	vStringDelete (arglist);
1178 
1179 	if (parameters && !ptrArrayIsEmpty (parameters))
1180 	{
1181 		unsigned int i;
1182 
1183 		for (i = 0; i < ptrArrayCount (parameters); i++)
1184 		{
1185 			struct typedParam *parameter = ptrArrayItem (parameters, i);
1186 			int paramCorkIndex = makeSimplePythonTag (parameter->token, K_PARAMETER);
1187 			tagEntryInfo *e = getEntryInCorkQueue (paramCorkIndex);
1188 			if (e && parameter->type)
1189 			{
1190 				e->extensionFields.typeRef [0] = eStrdup ("typename");
1191 				e->extensionFields.typeRef [1] = vStringDeleteUnwrap (parameter->type);
1192 				parameter->type = NULL;
1193 			}
1194 		}
1195 	}
1196 	ptrArrayDelete (parameters); /* NULL is acceptable. */
1197 
1198 	tagEntryInfo *e;
1199 	vString *t;
1200 	if (kind != K_CLASS
1201 		&& (e = getEntryInCorkQueue (corkIndex))
1202 		&& (t = parseReturnTypeAnnotation (token)))
1203 	{
1204 		e->extensionFields.typeRef [0] = eStrdup ("typename");
1205 		e->extensionFields.typeRef [1] = vStringDeleteUnwrap (t);
1206 	}
1207 
1208 	return true;
1209 }
1210 
parseImport(tokenInfo * const token)1211 static bool parseImport (tokenInfo *const token)
1212 {
1213 	tokenInfo *fromModule = NULL;
1214 
1215 	if (token->keyword == KEYWORD_from)
1216 	{
1217 		readQualifiedName (token);
1218 		if (token->type == TOKEN_IDENTIFIER)
1219 		{
1220 			fromModule = newToken ();
1221 			copyToken (fromModule, token);
1222 			readToken (token);
1223 		}
1224 	}
1225 
1226 	if (token->keyword == KEYWORD_import)
1227 	{
1228 		bool parenthesized = false;
1229 		int moduleIndex;
1230 
1231 		if (fromModule)
1232 		{
1233 			/* from X import ...
1234 			 * --------------------
1235 			 * X = (kind:module, role:namespace) */
1236 			moduleIndex = makeSimplePythonRefTag (fromModule, NULL, K_MODULE,
1237 												  PYTHON_MODULE_NAMESPACE,
1238 												  XTAG_UNKNOWN);
1239 		}
1240 
1241 		do
1242 		{
1243 			readQualifiedName (token);
1244 
1245 			/* support for `from x import (...)` */
1246 			if (fromModule && ! parenthesized && token->type == '(')
1247 			{
1248 				parenthesized = true;
1249 				readQualifiedName (token);
1250 			}
1251 
1252 			if (token->type == TOKEN_IDENTIFIER)
1253 			{
1254 				tokenInfo *name = newToken ();
1255 
1256 				copyToken (name, token);
1257 				readToken (token);
1258 				/* if there is an "as", use it as the name */
1259 				if (token->keyword == KEYWORD_as)
1260 				{
1261 					readToken (token);
1262 					if (token->type == TOKEN_IDENTIFIER)
1263 					{
1264 						if (fromModule)
1265 						{
1266 							/* from x import Y as Z
1267 							 * ----------------------------
1268 							 * x = (kind:module,  role:namespace),
1269 							 * Y = (kind:unknown, role:indirectlyImported, scope:module:X),
1270 							 * Z = (kind:unknown, nameref:unknown:Y) */
1271 							int index;
1272 
1273 							/* Y */
1274 							index = makeSimplePythonRefTag (name, NULL, K_UNKNOWN,
1275 															PYTHON_UNKNOWN_INDIRECTLY_IMPORTED,
1276 															XTAG_UNKNOWN);
1277 							/* fill the scope field for Y */
1278 							tagEntryInfo *e = getEntryInCorkQueue (index);
1279 							if (e)
1280 								e->extensionFields.scopeIndex = moduleIndex;
1281 
1282 							/* Z */
1283 							index = makeSimplePythonTag (token, K_UNKNOWN);
1284 							/* fill the nameref filed for Y */
1285 							if (PythonFields[F_NAMEREF].enabled)
1286 							{
1287 								vString *nameref = vStringNewInit (PythonKinds [K_UNKNOWN].name);
1288 								vStringPut (nameref, ':');
1289 								vStringCat (nameref, name->string);
1290 								attachParserFieldToCorkEntry (index, PythonFields[F_NAMEREF].ftype,
1291 															  vStringValue (nameref));
1292 								vStringDelete (nameref);
1293 							}
1294 						}
1295 						else
1296 						{
1297 							/* import x as Y
1298 							 * ----------------------------
1299 							 * x = (kind:module, role:indirectlyImported)
1300 							 * Y = (kind:namespace, nameref:module:x)*/
1301 							/* x */
1302 							makeSimplePythonRefTag (name, NULL, K_MODULE,
1303 							                        PYTHON_MODULE_INDIRECTLY_IMPORTED,
1304 							                        XTAG_UNKNOWN);
1305 							/* Y */
1306 							int index = makeSimplePythonTag (token, K_NAMESPACE);
1307 							/* fill the nameref filed for Y */
1308 							if (PythonFields[F_NAMEREF].enabled)
1309 							{
1310 								vString *nameref = vStringNewInit (PythonKinds [K_MODULE].name);
1311 								vStringPut (nameref, ':');
1312 								vStringCat (nameref, name->string);
1313 								attachParserFieldToCorkEntry (index, PythonFields[F_NAMEREF].ftype,
1314 															  vStringValue (nameref));
1315 								vStringDelete (nameref);
1316 							}
1317 						}
1318 
1319 						copyToken (name, token);
1320 						readToken (token);
1321 					}
1322 				}
1323 				else
1324 				{
1325 					if (fromModule)
1326 					{
1327 						/* from x import Y
1328 						   --------------
1329 						   x = (kind:module,  role:namespace),
1330 						   Y = (kind:unknown, role:imported, scope:module:x) */
1331 						/* Y */
1332 						int index = makeSimplePythonRefTag (name, NULL, K_UNKNOWN,
1333 															PYTHON_UNKNOWN_IMPORTED,
1334 															XTAG_UNKNOWN);
1335 						/* fill the scope field for Y */
1336 						tagEntryInfo *e = getEntryInCorkQueue (index);
1337 						if (e)
1338 							e->extensionFields.scopeIndex = moduleIndex;
1339 					}
1340 					else
1341 					{
1342 						/* import X
1343 						   --------------
1344 						   X = (kind:module, role:imported) */
1345 						makeSimplePythonRefTag (name, NULL, K_MODULE,
1346 						                        PYTHON_MODULE_IMPORTED,
1347 						                        XTAG_UNKNOWN);
1348 					}
1349 				}
1350 
1351 				deleteToken (name);
1352 			}
1353 		}
1354 		while (token->type == ',');
1355 
1356 		if (parenthesized && token->type == ')')
1357 			readToken (token);
1358 	}
1359 
1360 	if (fromModule)
1361 		deleteToken (fromModule);
1362 
1363 	return false;
1364 }
1365 
1366 /* this only handles the most common cases, but an annotation can be any
1367  * expression in theory.
1368  * this function assumes there must be an annotation, and doesn't do any check
1369  * on the token on which it is called: the caller should do that part. */
skipVariableTypeAnnotation(tokenInfo * const token,vString * const repr)1370 static bool skipVariableTypeAnnotation (tokenInfo *const token, vString *const repr)
1371 {
1372 	bool readNext = true;
1373 
1374 	readToken (token);
1375 	switch (token->type)
1376 	{
1377 		case '[': readNext = skipOverPair (token, '[', ']', repr, true); break;
1378 		case '(': readNext = skipOverPair (token, '(', ')', repr, true); break;
1379 		case '{': readNext = skipOverPair (token, '{', '}', repr, true); break;
1380 		default: reprCat (repr, token);
1381 	}
1382 	if (readNext)
1383 		readToken (token);
1384 	/* skip subscripts and calls */
1385 	while (token->type == '[' || token->type == '(' || token->type == '.' || token->type == '|')
1386 	{
1387 		switch (token->type)
1388 		{
1389 			case '[': readNext = skipOverPair (token, '[', ']', repr, true); break;
1390 			case '(': readNext = skipOverPair (token, '(', ')', repr, true); break;
1391 			case '|':
1392 				reprCat (repr, token);
1393 				skipVariableTypeAnnotation (token, repr);
1394 				readNext = false;
1395 				break;
1396 			case '.':
1397 				reprCat (repr, token);
1398 				readToken (token);
1399 				readNext = token->type == TOKEN_IDENTIFIER;
1400 				if (readNext)
1401 					reprCat (repr, token);
1402 				break;
1403 			default:  readNext = false; break;
1404 		}
1405 		if (readNext)
1406 			readToken (token);
1407 	}
1408 
1409 	return false;
1410 }
1411 
parseVariable(tokenInfo * const token,const pythonKind kind)1412 static bool parseVariable (tokenInfo *const token, const pythonKind kind)
1413 {
1414 	/* In order to support proper tag type for lambdas in multiple
1415 	 * assignations, we first collect all the names, and then try and map
1416 	 * an assignation to it */
1417 	tokenInfo *nameTokens[8] = { NULL };
1418 	vString   *nameTypes [ARRAY_SIZE (nameTokens)] = { NULL };
1419 	unsigned int nameCount = 0;
1420 	vString *type = vStringNew();
1421 
1422 	/* first, collect variable name tokens */
1423 	while (token->type == TOKEN_IDENTIFIER &&
1424 	       nameCount < ARRAY_SIZE (nameTokens))
1425 	{
1426 		unsigned int i;
1427 		tokenInfo *name = newToken ();
1428 		copyToken (name, token);
1429 
1430 		readToken (token);
1431 		if (token->type == '.')
1432 		{
1433 			/* FIXME: what to do with dotted names?  We currently ignore them
1434 			 *        as we need to do something not to break the whole
1435 			 *        declaration, but the expected behavior is questionable */
1436 			deleteToken (name);
1437 			name = NULL;
1438 
1439 			do
1440 			{
1441 				readToken (token);
1442 			}
1443 			while (token->type == TOKEN_IDENTIFIER ||
1444 			       token->type == '.');
1445 		}
1446 
1447 		i = nameCount++;
1448 		nameTokens[i] = name;
1449 
1450 		/* (parse and) skip annotations.  we need not to be too permissive because we
1451 		 * aren't yet sure we're actually parsing a variable. */
1452 		if (token->type == ':' && skipVariableTypeAnnotation (token, type))
1453 			readToken (token);
1454 
1455 		if (vStringLength (type) > 0)
1456 		{
1457 			nameTypes[i] = type;
1458 			type = vStringNew ();
1459 		}
1460 
1461 		if (token->type == ',')
1462 			readToken (token);
1463 		else
1464 			break;
1465 	}
1466 	vStringDelete (type);
1467 
1468 	/* then, if it's a proper assignation, try and map assignations so that
1469 	 * we catch lambdas and alike */
1470 	if (token->type == '=')
1471 	{
1472 		unsigned int i = 0;
1473 
1474 		do
1475 		{
1476 			const tokenInfo *const nameToken = nameTokens[i];
1477 			vString **type = &(nameTypes[i++]);
1478 
1479 			readToken (token);
1480 
1481 			if (! nameToken)
1482 				/* nothing */;
1483 			else if (token->keyword != KEYWORD_lambda)
1484 			{
1485 				int index = makeSimplePythonTag (nameToken, kind);
1486 				tagEntryInfo *e = getEntryInCorkQueue (index);
1487 				if (e && *type)
1488 				{
1489 					e->extensionFields.typeRef [0] = eStrdup ("typename");
1490 					e->extensionFields.typeRef [1] = vStringDeleteUnwrap (*type);
1491 					*type = NULL;
1492 				}
1493 			}
1494 			else
1495 			{
1496 				tokenInfo *anon  = NULL;
1497 				vString *arglist = vStringNew ();
1498 				if (*type)
1499 				{
1500 					anon = newToken ();
1501 					copyToken (anon, token);
1502 				}
1503 				readToken (token);
1504 				vStringPut (arglist, '(');
1505 				skipLambdaArglist (token, arglist);
1506 				vStringPut (arglist, ')');
1507 				if (*type)
1508 				{
1509 					/* How to handle lambda assigned to a variable
1510 					 * --------------------------------------------
1511 					 *
1512 					 * input.py:
1513 					 *
1514 					 * 	  id = lambda var: var
1515 					 * 	  id_t: Callable[[int], int] = lambda var: var
1516 					 *
1517 					 * `id' is tagged as a function kind object like:
1518 					 *
1519 					 *    id	input.py	/^id = lambda var: var$/;"	function
1520 					 *
1521 					 * For `id_t' we cannot do the same as `id'.
1522 					 *
1523 					 * We should not store `Callable[[int], int]' to typeref
1524 					 * field of the tag of `id_t' if the tag has "function" as
1525 					 * its kind because users expect the typeref field of a
1526 					 * function kind represents a type for the value returned
1527 					 * from the function (return type).
1528 					 *
1529 					 * the unexpected tag:
1530 					 *
1531 					 *    id_t	input.py	/^id_t: Callable[[int], int] = lambda var: var$/;"	function \
1532 					 *                          typeref:typename:Callable[[int], int]
1533 					 *
1534 					 * If we make a tag for `id_t' as a function, we should
1535 					 * attach `typeref:typename:int' and `signature:(int)'. To
1536 					 * achieve this, we have to make ctags analyze
1537 					 * `Callable[[int], int]'.  However, we want to avoid the
1538 					 * level of analyzing.
1539 					 *
1540 					 * For recording `Callable[[int], int]', a valuable
1541 					 * information in the input, we use indirection.
1542 					 *
1543 					 *    id_t	input.py	/^id_t: Callable[[int], int] = lambda var: var$/;"	variable \
1544 					 *                          typeref:typename:Callable[[int], int]	nameref:function:anonFuncNNN
1545 					 *    anonFuncNNN	input.py	/^id_t: Callable[[int], int] = lambda var: var$/;"	function \
1546 					 *                          extras:anonymous
1547 					 */
1548 					int vindex = makeSimplePythonTag (nameToken, kind);
1549 					vStringClear (anon->string);
1550 					anonGenerate (anon->string, "anonFunc", K_FUNCTION);
1551 					int findex = makeFunctionTag (anon, arglist, NULL);
1552 					tagEntryInfo *fe = getEntryInCorkQueue (findex);
1553 					if (fe)
1554 						markTagExtraBit (fe, XTAG_ANONYMOUS);
1555 
1556 					tagEntryInfo *ve = getEntryInCorkQueue (vindex);
1557 					if (ve)
1558 					{
1559 						ve->extensionFields.typeRef [0] = eStrdup ("typename");
1560 						ve->extensionFields.typeRef [1] = vStringDeleteUnwrap (*type);
1561 						*type = NULL;
1562 						vString *nameref = vStringNewInit (PythonKinds [K_FUNCTION].name);
1563 						vStringPut (nameref, ':');
1564 						vStringCat (nameref, anon->string);
1565 						attachParserField (ve, true, PythonFields[F_NAMEREF].ftype,
1566 										   vStringValue (nameref));
1567 						vStringDelete (nameref);
1568 					}
1569 					if (anon)
1570 						deleteToken (anon);
1571 				}
1572 				else
1573 					makeFunctionTag (nameToken, arglist, NULL);
1574 				vStringDelete (arglist);
1575 			}
1576 
1577 			/* skip until next initializer */
1578 			while ((TokenContinuationDepth > 0 || token->type != ',') &&
1579 			       token->type != TOKEN_EOF &&
1580 			       token->type != ';' &&
1581 			       token->type != TOKEN_INDENT)
1582 			{
1583 				readToken (token);
1584 			}
1585 		}
1586 		while (token->type == ',' && i < nameCount);
1587 
1588 		/* if we got leftover to initialize, just make variables out of them.
1589 		 * This handles cases like `a, b, c = (c, d, e)` -- or worse */
1590 		for (; i < nameCount; i++)
1591 		{
1592 			if (nameTokens[i])
1593 				makeSimplePythonTag (nameTokens[i], kind);
1594 		}
1595 	}
1596 
1597 	while (nameCount > 0)
1598 	{
1599 		if (nameTokens[--nameCount])
1600 			deleteToken (nameTokens[nameCount]);
1601 		vStringDelete (nameTypes[nameCount]); /* NULL is acceptable. */
1602 	}
1603 
1604 	return false;
1605 }
1606 
1607 /* pops any level >= to indent */
setIndent(tokenInfo * const token)1608 static void setIndent (tokenInfo *const token)
1609 {
1610 	NestingLevel *lv = nestingLevelsGetCurrent (PythonNestingLevels);
1611 
1612 	while (lv && PY_NL (lv)->indentation >= token->indent)
1613 	{
1614 		tagEntryInfo *e = getEntryInCorkQueue (lv->corkIndex);
1615 		if (e)
1616 			e->extensionFields.endLine = token->lineNumber;
1617 
1618 		nestingLevelsPop (PythonNestingLevels);
1619 		lv = nestingLevelsGetCurrent (PythonNestingLevels);
1620 	}
1621 }
1622 
findPythonTags(void)1623 static void findPythonTags (void)
1624 {
1625 	tokenInfo *const token = newToken ();
1626 	vString *decorators = vStringNew ();
1627 	bool atStatementStart = true;
1628 
1629 	TokenContinuationDepth = 0;
1630 	NextToken = NULL;
1631 	PythonNestingLevels = nestingLevelsNew (sizeof (struct pythonNestingLevelUserData));
1632 
1633 	readToken (token);
1634 	while (token->type != TOKEN_EOF)
1635 	{
1636 		tokenType iterationTokenType = token->type;
1637 		bool readNext = true;
1638 
1639 		/* skip async keyword that confuses decorator parsing before a def */
1640 		if (token->keyword == KEYWORD_async)
1641 			readToken (token);
1642 
1643 		if (token->type == TOKEN_INDENT)
1644 			setIndent (token);
1645 		else if (token->keyword == KEYWORD_class ||
1646 		         token->keyword == KEYWORD_def)
1647 		{
1648 			pythonKind kind = token->keyword == KEYWORD_class ? K_CLASS : K_FUNCTION;
1649 
1650 			readNext = parseClassOrDef (token, decorators, kind, false);
1651 		}
1652 		else if (token->keyword == KEYWORD_cdef ||
1653 		         token->keyword == KEYWORD_cpdef)
1654 		{
1655 			readNext = parseClassOrDef (token, decorators, K_FUNCTION, true);
1656 		}
1657 		else if (token->keyword == KEYWORD_from ||
1658 		         token->keyword == KEYWORD_import)
1659 		{
1660 			readNext = parseImport (token);
1661 		}
1662 		else if (token->type == '(')
1663 		{ /* skip parentheses to avoid finding stuff inside them */
1664 			readNext = skipOverPair (token, '(', ')', NULL, false);
1665 		}
1666 		else if (token->type == TOKEN_IDENTIFIER && atStatementStart)
1667 		{
1668 			NestingLevel *lv = nestingLevelsGetCurrent (PythonNestingLevels);
1669 			tagEntryInfo *lvEntry = getEntryOfNestingLevel (lv);
1670 			pythonKind kind = K_VARIABLE;
1671 
1672 			if (lvEntry && lvEntry->kindIndex != K_CLASS)
1673 				kind = K_LOCAL_VARIABLE;
1674 
1675 			readNext = parseVariable (token, kind);
1676 		}
1677 		else if (token->type == '@' && atStatementStart &&
1678 		         PythonFields[F_DECORATORS].enabled)
1679 		{
1680 			/* collect decorators */
1681 			readQualifiedName (token);
1682 			if (token->type != TOKEN_IDENTIFIER)
1683 				readNext = false;
1684 			else
1685 			{
1686 				if (vStringLength (decorators) > 0)
1687 					vStringPut (decorators, ',');
1688 				vStringCat (decorators, token->string);
1689 				readToken (token);
1690 				readNext = skipOverPair (token, '(', ')', decorators, true);
1691 			}
1692 		}
1693 
1694 		/* clear collected decorators for any non-decorator tokens non-indent
1695 		 * token.  decorator collection takes care of skipping the possible
1696 		 * argument list, so we should never hit here parsing a decorator */
1697 		if (iterationTokenType != TOKEN_INDENT &&
1698 		    iterationTokenType != '@' &&
1699 		    PythonFields[F_DECORATORS].enabled)
1700 		{
1701 			vStringClear (decorators);
1702 		}
1703 
1704 		atStatementStart = (token->type == TOKEN_INDENT || token->type == ';');
1705 
1706 		if (readNext)
1707 			readToken (token);
1708 	}
1709 
1710 	nestingLevelsFree (PythonNestingLevels);
1711 	vStringDelete (decorators);
1712 	deleteToken (token);
1713 	Assert (NextToken == NULL);
1714 }
1715 
initialize(const langType language)1716 static void initialize (const langType language)
1717 {
1718 	Lang_python = language;
1719 
1720 	TokenPool = objPoolNew (16, newPoolToken, deletePoolToken, clearPoolToken, NULL);
1721 }
1722 
finalize(langType language CTAGS_ATTR_UNUSED,bool initialized)1723 static void finalize (langType language CTAGS_ATTR_UNUSED, bool initialized)
1724 {
1725 	if (!initialized)
1726 		return;
1727 
1728 	objPoolDelete (TokenPool);
1729 }
1730 
PythonParser(void)1731 extern parserDefinition* PythonParser (void)
1732 {
1733 	static const char *const extensions[] = { "py", "pyx", "pxd", "pxi", "scons",
1734 											  "wsgi", NULL };
1735 	static const char *const aliases[] = { "python[23]*", "scons", NULL };
1736 	parserDefinition *def = parserNew ("Python");
1737 	def->kindTable = PythonKinds;
1738 	def->kindCount = ARRAY_SIZE (PythonKinds);
1739 	def->extensions = extensions;
1740 	def->aliases = aliases;
1741 	def->parser = findPythonTags;
1742 	def->initialize = initialize;
1743 	def->finalize = finalize;
1744 	def->keywordTable = PythonKeywordTable;
1745 	def->keywordCount = ARRAY_SIZE (PythonKeywordTable);
1746 	def->fieldTable = PythonFields;
1747 	def->fieldCount = ARRAY_SIZE (PythonFields);
1748 	def->useCork = CORK_QUEUE;
1749 	def->requestAutomaticFQTag = true;
1750 	return def;
1751 }
1752