1 /*
2 * Copyright (c) 2000-2003, Darren Hiebert
3 * Copyright (c) 2014-2016, Colomban Wendling <ban@herbesfolles.org>
4 *
5 * This source code is released for free distribution under the terms of the
6 * GNU General Public License version 2 or (at your option) any later version.
7 *
8 * This module contains functions for generating tags for Python language
9 * files.
10 */
11
12 #include "general.h" /* must always come first */
13
14 #include <string.h>
15
16 #include "entry.h"
17 #include "nestlevel.h"
18 #include "read.h"
19 #include "parse.h"
20 #include "vstring.h"
21 #include "keyword.h"
22 #include "routines.h"
23 #include "debug.h"
24 #include "xtag.h"
25 #include "objpool.h"
26 #include "ptrarray.h"
27
28 #define isIdentifierChar(c) \
29 (isalnum (c) || (c) == '_' || (c) >= 0x80)
30 #define newToken() (objPoolGet (TokenPool))
31 #define deleteToken(t) (objPoolPut (TokenPool, (t)))
32
33 enum {
34 KEYWORD_as,
35 KEYWORD_async,
36 KEYWORD_cdef,
37 KEYWORD_class,
38 KEYWORD_cpdef,
39 KEYWORD_def,
40 KEYWORD_extern,
41 KEYWORD_from,
42 KEYWORD_import,
43 KEYWORD_inline,
44 KEYWORD_lambda,
45 KEYWORD_pass,
46 KEYWORD_return,
47 };
48 typedef int keywordId; /* to allow KEYWORD_NONE */
49
50 typedef enum {
51 ACCESS_PRIVATE,
52 ACCESS_PROTECTED,
53 ACCESS_PUBLIC,
54 COUNT_ACCESS
55 } accessType;
56
57 static const char *const PythonAccesses[COUNT_ACCESS] = {
58 "private",
59 "protected",
60 "public"
61 };
62
63 typedef enum {
64 F_DECORATORS,
65 F_NAMEREF,
66 COUNT_FIELD
67 } pythonField;
68
69 static fieldDefinition PythonFields[COUNT_FIELD] = {
70 { .name = "decorators",
71 .description = "decorators on functions and classes",
72 .enabled = false },
73 { .name = "nameref",
74 .description = "the original name for the tag",
75 .enabled = true },
76 };
77
78 typedef enum {
79 K_CLASS,
80 K_FUNCTION,
81 K_METHOD,
82 K_VARIABLE,
83 K_NAMESPACE,
84 K_MODULE,
85 K_UNKNOWN,
86 K_PARAMETER,
87 K_LOCAL_VARIABLE,
88 COUNT_KIND
89 } pythonKind;
90
91 typedef enum {
92 PYTHON_MODULE_IMPORTED,
93 PYTHON_MODULE_NAMESPACE,
94 PYTHON_MODULE_INDIRECTLY_IMPORTED,
95 } pythonModuleRole;
96
97 typedef enum {
98 PYTHON_UNKNOWN_IMPORTED,
99 PYTHON_UNKNOWN_INDIRECTLY_IMPORTED,
100 } pythonUnknownRole;
101
102 /* Roles related to `import'
103 * ==========================
104 * import X X = (kind:module, role:imported)
105 *
106 * import X as Y X = (kind:module, role:indirectlyImported),
107 * Y = (kind:namespace, nameref:module:X)
108 * ------------------------------------------------
109 * Don't confuse the kind of Y with namespace role of module kind.
110 *
111 * from X import * X = (kind:module, role:namespace)
112 *
113 * from X import Y X = (kind:module, role:namespace),
114 * Y = (kind:unknown, role:imported, scope:module:X)
115 *
116 * from X import Y as Z X = (kind:module, role:namespace),
117 * Y = (kind:unknown, role:indirectlyImported, scope:module:X)
118 * Z = (kind:unknown, nameref:unknown:Y) */
119
120 static roleDefinition PythonModuleRoles [] = {
121 { true, "imported",
122 "imported modules" },
123 { true, "namespace",
124 "namespace from where classes/variables/functions are imported" },
125 { true, "indirectlyImported",
126 "module imported in alternative name" },
127 };
128
129 static roleDefinition PythonUnknownRoles [] = {
130 { true, "imported", "imported from the other module" },
131 { true, "indirectlyImported",
132 "classes/variables/functions/modules imported in alternative name" },
133 };
134
135 static kindDefinition PythonKinds[COUNT_KIND] = {
136 {true, 'c', "class", "classes"},
137 {true, 'f', "function", "functions"},
138 {true, 'm', "member", "class members"},
139 {true, 'v', "variable", "variables"},
140 {true, 'I', "namespace", "name referring a module defined in other file"},
141 {true, 'i', "module", "modules",
142 .referenceOnly = true, ATTACH_ROLES(PythonModuleRoles)},
143 {true, 'x', "unknown", "name referring a class/variable/function/module defined in other module",
144 .referenceOnly = false, ATTACH_ROLES(PythonUnknownRoles)},
145 {false, 'z', "parameter", "function parameters" },
146 {false, 'l', "local", "local variables" },
147 };
148
149 static const keywordTable PythonKeywordTable[] = {
150 /* keyword keyword ID */
151 { "as", KEYWORD_as },
152 { "async", KEYWORD_async },
153 { "cdef", KEYWORD_cdef },
154 { "cimport", KEYWORD_import },
155 { "class", KEYWORD_class },
156 { "cpdef", KEYWORD_cpdef },
157 { "def", KEYWORD_def },
158 { "extern", KEYWORD_extern },
159 { "from", KEYWORD_from },
160 { "import", KEYWORD_import },
161 { "inline", KEYWORD_inline },
162 { "lambda", KEYWORD_lambda },
163 { "pass", KEYWORD_pass },
164 { "return", KEYWORD_return },
165 };
166
167 typedef enum eTokenType {
168 /* 0..255 are the byte's value */
169 TOKEN_EOF = 256,
170 TOKEN_UNDEFINED,
171 TOKEN_INDENT,
172 TOKEN_KEYWORD,
173 TOKEN_OPERATOR,
174 TOKEN_IDENTIFIER,
175 TOKEN_STRING,
176 TOKEN_ARROW, /* -> */
177 TOKEN_WHITESPACE,
178 } tokenType;
179
180 typedef struct {
181 int type;
182 keywordId keyword;
183 vString * string;
184 int indent;
185 unsigned long lineNumber;
186 MIOPos filePosition;
187 } tokenInfo;
188
189 struct pythonNestingLevelUserData {
190 int indentation;
191 };
192 #define PY_NL(nl) ((struct pythonNestingLevelUserData *) nestingLevelGetUserData (nl))
193
194 static langType Lang_python;
195 static unsigned int TokenContinuationDepth = 0;
196 static tokenInfo *NextToken = NULL;
197 static NestingLevels *PythonNestingLevels = NULL;
198 static objPool *TokenPool = NULL;
199
200
201 /* follows PEP-8, and always reports single-underscores as protected
202 * See:
203 * - http://www.python.org/dev/peps/pep-0008/#method-names-and-instance-variables
204 * - http://www.python.org/dev/peps/pep-0008/#designing-for-inheritance
205 */
accessFromIdentifier(const vString * const ident,pythonKind kind,int parentKind)206 static accessType accessFromIdentifier (const vString *const ident,
207 pythonKind kind, int parentKind)
208 {
209 const char *const p = vStringValue (ident);
210 const size_t len = vStringLength (ident);
211
212 /* inside a function/method, private */
213 if (parentKind != -1 && parentKind != K_CLASS)
214 return ACCESS_PRIVATE;
215 /* not starting with "_", public */
216 else if (len < 1 || p[0] != '_')
217 return ACCESS_PUBLIC;
218 /* "__...__": magic methods */
219 else if (kind == K_FUNCTION && parentKind == K_CLASS &&
220 len > 3 && p[1] == '_' && p[len - 2] == '_' && p[len - 1] == '_')
221 return ACCESS_PUBLIC;
222 /* "__...": name mangling */
223 else if (parentKind == K_CLASS && len > 1 && p[1] == '_')
224 return ACCESS_PRIVATE;
225 /* "_...": suggested as non-public, but easily accessible */
226 else
227 return ACCESS_PROTECTED;
228 }
229
initPythonEntry(tagEntryInfo * const e,const tokenInfo * const token,const pythonKind kind)230 static void initPythonEntry (tagEntryInfo *const e, const tokenInfo *const token,
231 const pythonKind kind)
232 {
233 accessType access;
234 int parentKind = -1;
235 NestingLevel *nl;
236
237 initTagEntry (e, vStringValue (token->string), kind);
238
239 e->lineNumber = token->lineNumber;
240 e->filePosition = token->filePosition;
241
242 nl = nestingLevelsGetCurrent (PythonNestingLevels);
243 if (nl)
244 {
245 tagEntryInfo *nlEntry = getEntryOfNestingLevel (nl);
246
247 e->extensionFields.scopeIndex = nl->corkIndex;
248
249 /* nlEntry can be NULL if a kind was disabled. But what can we do
250 * here? Even disabled kinds should count for the hierarchy I
251 * guess -- as it'd otherwise be wrong -- but with cork we're
252 * fucked up as there's nothing to look up. Damn. */
253 if (nlEntry)
254 {
255 parentKind = nlEntry->kindIndex;
256
257 /* functions directly inside classes are methods, fix it up */
258 if (kind == K_FUNCTION && parentKind == K_CLASS)
259 e->kindIndex = K_METHOD;
260 }
261 }
262
263 access = accessFromIdentifier (token->string, kind, parentKind);
264 e->extensionFields.access = PythonAccesses[access];
265 /* FIXME: should we really set isFileScope in addition to access? */
266 if (access == ACCESS_PRIVATE)
267 e->isFileScope = true;
268 }
269
makeClassTag(const tokenInfo * const token,const vString * const inheritance,const vString * const decorators)270 static int makeClassTag (const tokenInfo *const token,
271 const vString *const inheritance,
272 const vString *const decorators)
273 {
274 if (PythonKinds[K_CLASS].enabled)
275 {
276 tagEntryInfo e;
277
278 initPythonEntry (&e, token, K_CLASS);
279
280 e.extensionFields.inheritance = inheritance ? vStringValue (inheritance) : "";
281 if (decorators && vStringLength (decorators) > 0)
282 {
283 attachParserField (&e, false, PythonFields[F_DECORATORS].ftype,
284 vStringValue (decorators));
285 }
286
287 return makeTagEntry (&e);
288 }
289
290 return CORK_NIL;
291 }
292
makeFunctionTag(const tokenInfo * const token,const vString * const arglist,const vString * const decorators)293 static int makeFunctionTag (const tokenInfo *const token,
294 const vString *const arglist,
295 const vString *const decorators)
296 {
297 if (PythonKinds[K_FUNCTION].enabled)
298 {
299 tagEntryInfo e;
300
301 initPythonEntry (&e, token, K_FUNCTION);
302
303 if (arglist)
304 e.extensionFields.signature = vStringValue (arglist);
305 if (decorators && vStringLength (decorators) > 0)
306 {
307 attachParserField (&e, false, PythonFields[F_DECORATORS].ftype,
308 vStringValue (decorators));
309 }
310
311 return makeTagEntry (&e);
312 }
313
314 return CORK_NIL;
315 }
316
makeSimplePythonTag(const tokenInfo * const token,pythonKind const kind)317 static int makeSimplePythonTag (const tokenInfo *const token, pythonKind const kind)
318 {
319 if (PythonKinds[kind].enabled)
320 {
321 tagEntryInfo e;
322
323 initPythonEntry (&e, token, kind);
324 return makeTagEntry (&e);
325 }
326
327 return CORK_NIL;
328 }
329
makeSimplePythonRefTag(const tokenInfo * const token,const vString * const altName,pythonKind const kind,int roleIndex,xtagType xtag)330 static int makeSimplePythonRefTag (const tokenInfo *const token,
331 const vString *const altName,
332 pythonKind const kind,
333 int roleIndex, xtagType xtag)
334 {
335 if (isXtagEnabled (XTAG_REFERENCE_TAGS) &&
336 PythonKinds[kind].roles[roleIndex].enabled)
337 {
338 tagEntryInfo e;
339
340 initRefTagEntry (&e, vStringValue (altName ? altName : token->string),
341 kind, roleIndex);
342
343 e.lineNumber = token->lineNumber;
344 e.filePosition = token->filePosition;
345
346 if (xtag != XTAG_UNKNOWN)
347 markTagExtraBit (&e, xtag);
348
349 return makeTagEntry (&e);
350 }
351
352 return CORK_NIL;
353 }
354
newPoolToken(void * createArg CTAGS_ATTR_UNUSED)355 static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED)
356 {
357 tokenInfo *token = xMalloc (1, tokenInfo);
358 token->string = vStringNew ();
359 return token;
360 }
361
deletePoolToken(void * data)362 static void deletePoolToken (void *data)
363 {
364 tokenInfo *token = data;
365 vStringDelete (token->string);
366 eFree (token);
367 }
368
clearPoolToken(void * data)369 static void clearPoolToken (void *data)
370 {
371 tokenInfo *token = data;
372
373 token->type = TOKEN_UNDEFINED;
374 token->keyword = KEYWORD_NONE;
375 token->indent = 0;
376 token->lineNumber = getInputLineNumber ();
377 token->filePosition = getInputFilePosition ();
378 vStringClear (token->string);
379 }
380
copyToken(tokenInfo * const dest,const tokenInfo * const src)381 static void copyToken (tokenInfo *const dest, const tokenInfo *const src)
382 {
383 dest->lineNumber = src->lineNumber;
384 dest->filePosition = src->filePosition;
385 dest->type = src->type;
386 dest->keyword = src->keyword;
387 dest->indent = src->indent;
388 vStringCopy(dest->string, src->string);
389 }
390
391 /* Skip a single or double quoted string. */
readString(vString * const string,const int delimiter)392 static void readString (vString *const string, const int delimiter)
393 {
394 int escaped = 0;
395 int c;
396
397 while ((c = getcFromInputFile ()) != EOF)
398 {
399 if (escaped)
400 {
401 vStringPut (string, c);
402 escaped--;
403 }
404 else if (c == '\\')
405 escaped++;
406 else if (c == delimiter || c == '\n' || c == '\r')
407 {
408 if (c != delimiter)
409 ungetcToInputFile (c);
410 break;
411 }
412 else
413 vStringPut (string, c);
414 }
415 }
416
417 /* Skip a single or double triple quoted string. */
readTripleString(vString * const string,const int delimiter)418 static void readTripleString (vString *const string, const int delimiter)
419 {
420 int c;
421 int escaped = 0;
422 int n = 0;
423 while ((c = getcFromInputFile ()) != EOF)
424 {
425 if (c == delimiter && ! escaped)
426 {
427 if (++n >= 3)
428 break;
429 }
430 else
431 {
432 for (; n > 0; n--)
433 vStringPut (string, delimiter);
434 if (c != '\\' || escaped)
435 vStringPut (string, c);
436 n = 0;
437 }
438
439 if (escaped)
440 escaped--;
441 else if (c == '\\')
442 escaped++;
443 }
444 }
445
readIdentifier(vString * const string,const int firstChar)446 static void readIdentifier (vString *const string, const int firstChar)
447 {
448 int c = firstChar;
449 do
450 {
451 vStringPut (string, (char) c);
452 c = getcFromInputFile ();
453 }
454 while (isIdentifierChar (c));
455 ungetcToInputFile (c);
456 }
457
ungetToken(tokenInfo * const token)458 static void ungetToken (tokenInfo *const token)
459 {
460 Assert (NextToken == NULL);
461 NextToken = newToken ();
462 copyToken (NextToken, token);
463 }
464
readTokenFull(tokenInfo * const token,bool inclWhitespaces)465 static void readTokenFull (tokenInfo *const token, bool inclWhitespaces)
466 {
467 int c;
468 int n;
469
470 /* if we've got a token held back, emit it */
471 if (NextToken)
472 {
473 copyToken (token, NextToken);
474 deleteToken (NextToken);
475 NextToken = NULL;
476 return;
477 }
478
479 token->type = TOKEN_UNDEFINED;
480 token->keyword = KEYWORD_NONE;
481 vStringClear (token->string);
482
483 getNextChar:
484
485 n = 0;
486 do
487 {
488 c = getcFromInputFile ();
489 n++;
490 }
491 while (c == ' ' || c == '\t' || c == '\f');
492
493 token->lineNumber = getInputLineNumber ();
494 token->filePosition = getInputFilePosition ();
495
496 if (inclWhitespaces && n > 1 && c != '\r' && c != '\n')
497 {
498 ungetcToInputFile (c);
499 vStringPut (token->string, ' ');
500 token->type = TOKEN_WHITESPACE;
501 return;
502 }
503
504 switch (c)
505 {
506 case EOF:
507 token->type = TOKEN_EOF;
508 break;
509
510 case '\'':
511 case '"':
512 {
513 int d = getcFromInputFile ();
514 token->type = TOKEN_STRING;
515 vStringPut (token->string, c);
516 if (d != c)
517 {
518 ungetcToInputFile (d);
519 readString (token->string, c);
520 }
521 else if ((d = getcFromInputFile ()) == c)
522 readTripleString (token->string, c);
523 else /* empty string */
524 ungetcToInputFile (d);
525 vStringPut (token->string, c);
526 token->lineNumber = getInputLineNumber ();
527 token->filePosition = getInputFilePosition ();
528 break;
529 }
530
531 case '=':
532 {
533 int d = getcFromInputFile ();
534 vStringPut (token->string, c);
535 if (d == c)
536 {
537 vStringPut (token->string, d);
538 token->type = TOKEN_OPERATOR;
539 }
540 else
541 {
542 ungetcToInputFile (d);
543 token->type = c;
544 }
545 break;
546 }
547
548 case '-':
549 {
550 int d = getcFromInputFile ();
551 if (d == '>')
552 {
553 vStringPut (token->string, c);
554 vStringPut (token->string, d);
555 token->type = TOKEN_ARROW;
556 break;
557 }
558 ungetcToInputFile (d);
559 /* fall through */
560 }
561 case '+':
562 case '*':
563 case '%':
564 case '<':
565 case '>':
566 case '/':
567 {
568 int d = getcFromInputFile ();
569 vStringPut (token->string, c);
570 if (d != '=')
571 {
572 ungetcToInputFile (d);
573 token->type = c;
574 }
575 else
576 {
577 vStringPut (token->string, d);
578 token->type = TOKEN_OPERATOR;
579 }
580 break;
581 }
582
583 /* eats newline to implement line continuation */
584 case '\\':
585 {
586 int d = getcFromInputFile ();
587 if (d == '\r')
588 d = getcFromInputFile ();
589 if (d != '\n')
590 ungetcToInputFile (d);
591 goto getNextChar;
592 }
593
594 case '#': /* comment */
595 case '\r': /* newlines for indent */
596 case '\n':
597 {
598 int indent = 0;
599 do
600 {
601 if (c == '#')
602 {
603 do
604 c = getcFromInputFile ();
605 while (c != EOF && c != '\r' && c != '\n');
606 }
607 if (c == '\r')
608 {
609 int d = getcFromInputFile ();
610 if (d != '\n')
611 ungetcToInputFile (d);
612 }
613 indent = 0;
614 while ((c = getcFromInputFile ()) == ' ' || c == '\t' || c == '\f')
615 {
616 if (c == '\t')
617 indent += 8 - (indent % 8);
618 else if (c == '\f') /* yeah, it's weird */
619 indent = 0;
620 else
621 indent++;
622 }
623 } /* skip completely empty lines, so retry */
624 while (c == '\r' || c == '\n' || c == '#');
625 ungetcToInputFile (c);
626 if (TokenContinuationDepth > 0)
627 {
628 if (inclWhitespaces)
629 {
630 vStringPut (token->string, ' ');
631 token->type = TOKEN_WHITESPACE;
632 }
633 else
634 goto getNextChar;
635 }
636 else
637 {
638 token->type = TOKEN_INDENT;
639 token->indent = indent;
640 }
641 break;
642 }
643
644 default:
645 if (! isIdentifierChar (c))
646 {
647 vStringPut (token->string, c);
648 token->type = c;
649 }
650 else
651 {
652 /* FIXME: handle U, B, R and F string prefixes? */
653 readIdentifier (token->string, c);
654 token->keyword = lookupKeyword (vStringValue (token->string), Lang_python);
655 if (token->keyword == KEYWORD_NONE)
656 token->type = TOKEN_IDENTIFIER;
657 else
658 token->type = TOKEN_KEYWORD;
659 }
660 break;
661 }
662
663 /* handle implicit continuation lines not to emit INDENT inside brackets
664 * https://docs.python.org/3.6/reference/lexical_analysis.html#implicit-line-joining */
665 if (token->type == '(' ||
666 token->type == '{' ||
667 token->type == '[')
668 {
669 TokenContinuationDepth ++;
670 }
671 else if (TokenContinuationDepth > 0 &&
672 (token->type == ')' ||
673 token->type == '}' ||
674 token->type == ']'))
675 {
676 TokenContinuationDepth --;
677 }
678 }
679
readToken(tokenInfo * const token)680 static void readToken (tokenInfo *const token)
681 {
682 readTokenFull (token, false);
683 }
684
685 /*================================= parsing =================================*/
686
687
reprCat(vString * const repr,const tokenInfo * const token)688 static void reprCat (vString *const repr, const tokenInfo *const token)
689 {
690 if (token->type != TOKEN_INDENT &&
691 token->type != TOKEN_WHITESPACE)
692 {
693 vStringCat (repr, token->string);
694 }
695 else if (vStringLength (repr) > 0 && vStringLast (repr) != ' ')
696 {
697 vStringPut (repr, ' ');
698 }
699 }
700
skipOverPair(tokenInfo * const token,int tOpen,int tClose,vString * const repr,bool reprOuterPair)701 static bool skipOverPair (tokenInfo *const token, int tOpen, int tClose,
702 vString *const repr, bool reprOuterPair)
703 {
704 if (token->type == tOpen)
705 {
706 int depth = 1;
707
708 if (repr && reprOuterPair)
709 reprCat (repr, token);
710 do
711 {
712 readTokenFull (token, true);
713 if (repr && (reprOuterPair || token->type != tClose || depth > 1))
714 {
715 reprCat (repr, token);
716 }
717 if (token->type == tOpen)
718 depth ++;
719 else if (token->type == tClose)
720 depth --;
721 }
722 while (token->type != TOKEN_EOF && depth > 0);
723 }
724
725 return token->type == tClose;
726 }
727
skipLambdaArglist(tokenInfo * const token,vString * const repr)728 static bool skipLambdaArglist (tokenInfo *const token, vString *const repr)
729 {
730 while (token->type != TOKEN_EOF && token->type != ':' &&
731 /* avoid reading too much, just in case */
732 token->type != TOKEN_INDENT)
733 {
734 bool readNext = true;
735
736 if (token->type == '(')
737 readNext = skipOverPair (token, '(', ')', repr, true);
738 else if (token->type == '[')
739 readNext = skipOverPair (token, '[', ']', repr, true);
740 else if (token->type == '{')
741 readNext = skipOverPair (token, '{', '}', repr, true);
742 else if (token->keyword == KEYWORD_lambda)
743 { /* handle lambdas in a default value */
744 if (repr)
745 reprCat (repr, token);
746 readTokenFull (token, true);
747 readNext = skipLambdaArglist (token, repr);
748 if (token->type == ':')
749 readNext = true;
750 if (readNext && repr)
751 reprCat (repr, token);
752 }
753 else if (repr)
754 {
755 reprCat (repr, token);
756 }
757
758 if (readNext)
759 readTokenFull (token, true);
760 }
761 return false;
762 }
763
readQualifiedName(tokenInfo * const nameToken)764 static void readQualifiedName (tokenInfo *const nameToken)
765 {
766 readToken (nameToken);
767
768 if (nameToken->type == TOKEN_IDENTIFIER ||
769 nameToken->type == '.')
770 {
771 vString *qualifiedName = vStringNew ();
772 tokenInfo *token = newToken ();
773
774 while (nameToken->type == TOKEN_IDENTIFIER ||
775 nameToken->type == '.')
776 {
777 vStringCat (qualifiedName, nameToken->string);
778 copyToken (token, nameToken);
779
780 readToken (nameToken);
781 }
782 /* put the last, non-matching, token back */
783 ungetToken (nameToken);
784
785 copyToken (nameToken, token);
786 nameToken->type = TOKEN_IDENTIFIER;
787 vStringCopy (nameToken->string, qualifiedName);
788
789 deleteToken (token);
790 vStringDelete (qualifiedName);
791 }
792 }
793
readCDefName(tokenInfo * const token,pythonKind * kind)794 static bool readCDefName (tokenInfo *const token, pythonKind *kind)
795 {
796 readToken (token);
797
798 if (token->keyword == KEYWORD_extern ||
799 token->keyword == KEYWORD_import)
800 {
801 readToken (token);
802 if (token->keyword == KEYWORD_from)
803 return false;
804 }
805
806 if (token->keyword == KEYWORD_class)
807 {
808 *kind = K_CLASS;
809 readToken (token);
810 }
811 else
812 {
813 /* skip the optional type declaration -- everything on the same line
814 * until an identifier followed by "(". */
815 tokenInfo *candidate = newToken ();
816
817 while (token->type != TOKEN_EOF &&
818 token->type != TOKEN_INDENT &&
819 token->type != '=' &&
820 token->type != ',' &&
821 token->type != ':')
822 {
823 if (token->type == '[')
824 {
825 if (skipOverPair (token, '[', ']', NULL, false))
826 readToken (token);
827 }
828 else if (token->type == '(')
829 {
830 if (skipOverPair (token, '(', ')', NULL, false))
831 readToken (token);
832 }
833 else if (token->type == TOKEN_IDENTIFIER)
834 {
835 copyToken (candidate, token);
836 readToken (token);
837 if (token->type == '(')
838 { /* okay, we really found a function, use this */
839 *kind = K_FUNCTION;
840 ungetToken (token);
841 copyToken (token, candidate);
842 break;
843 }
844 }
845 else
846 readToken (token);
847 }
848
849 deleteToken (candidate);
850 }
851
852 return token->type == TOKEN_IDENTIFIER;
853 }
854
parseParamTypeAnnotation(tokenInfo * const token,vString * arglist)855 static vString *parseParamTypeAnnotation (tokenInfo *const token,
856 vString *arglist)
857 {
858 readToken (token);
859 if (token->type != ':')
860 {
861 ungetToken (token);
862 return NULL;
863 }
864
865 reprCat (arglist, token);
866 int depth = 0;
867 vString *t = vStringNew ();
868 while (true)
869 {
870 readTokenFull (token, true);
871 if (token->type == TOKEN_WHITESPACE)
872 {
873 reprCat (arglist, token);
874 continue;
875 }
876 else if (token->type == TOKEN_EOF)
877 break;
878
879 if (token->type == '(' ||
880 token->type == '[' ||
881 token->type == '{')
882 depth ++;
883 else if (token->type == ')' ||
884 token->type == ']' ||
885 token->type == '}')
886 depth --;
887
888 if (depth < 0
889 || (depth == 0 && (token->type == '='
890 || token->type == ',')))
891 {
892 ungetToken (token);
893 return t;
894 }
895 reprCat (arglist, token);
896 reprCat (t, token);
897 }
898 vStringDelete (t);
899 return NULL;
900 }
901
parseReturnTypeAnnotation(tokenInfo * const token)902 static vString *parseReturnTypeAnnotation (tokenInfo *const token)
903 {
904 readToken (token);
905 if (token->type != TOKEN_ARROW)
906 {
907 ungetToken (token);
908 return NULL;
909 }
910
911 int depth = 0;
912 vString *t = vStringNew ();
913 while (true)
914 {
915 readToken (token);
916 if (token->type == TOKEN_EOF)
917 break;
918
919 if (token->type == '(' ||
920 token->type == '[' ||
921 token->type == '{')
922 depth ++;
923 else if (token->type == ')' ||
924 token->type == ']' ||
925 token->type == '}')
926 depth --;
927 if (depth == 0 && token->type == ':')
928 {
929 ungetToken (token);
930 return t;
931 }
932 else
933 reprCat (t, token);
934 }
935 vStringDelete (t);
936 return NULL;
937 }
938
939 struct typedParam {
940 tokenInfo *token;
941 vString *type;
942 };
943
makeTypedParam(tokenInfo * token,vString * type)944 static struct typedParam *makeTypedParam (tokenInfo *token, vString *type)
945 {
946 struct typedParam *p = xMalloc (1, struct typedParam);
947 p->token = token;
948 p->type = type;
949 return p;
950 }
951
makeTypedParamWithCopy(const tokenInfo * token,const vString * type)952 static struct typedParam *makeTypedParamWithCopy (const tokenInfo *token, const vString *type)
953 {
954 tokenInfo *token_copied = newToken ();
955 copyToken (token_copied, token);
956
957
958 vString *type_copied = type? vStringNewCopy (type): NULL;
959 return makeTypedParam (token_copied, type_copied);
960 }
961
deleteTypedParam(struct typedParam * p)962 static void deleteTypedParam (struct typedParam *p)
963 {
964 deleteToken (p->token);
965 vStringDelete (p->type); /* NULL is acceptable. */
966 eFree (p);
967 }
968
parseArglist(tokenInfo * const token,const int kind,vString * const arglist,ptrArray * const parameters)969 static void parseArglist (tokenInfo *const token, const int kind,
970 vString *const arglist, ptrArray *const parameters)
971 {
972 int prevTokenType = token->type;
973 int depth = 1;
974
975 if (kind != K_CLASS)
976 reprCat (arglist, token);
977
978 do
979 {
980 if (token->type != TOKEN_WHITESPACE &&
981 /* for easy `*args` and `**kwargs` support, we also ignore
982 * `*`, which anyway can't otherwise screw us up */
983 token->type != '*')
984 {
985 prevTokenType = token->type;
986 }
987
988 readTokenFull (token, true);
989 if (kind != K_CLASS || token->type != ')' || depth > 1)
990 reprCat (arglist, token);
991
992 if (token->type == '(' ||
993 token->type == '[' ||
994 token->type == '{')
995 depth ++;
996 else if (token->type == ')' ||
997 token->type == ']' ||
998 token->type == '}')
999 depth --;
1000 else if (kind != K_CLASS && depth == 1 &&
1001 token->type == TOKEN_IDENTIFIER &&
1002 (prevTokenType == '(' || prevTokenType == ',') &&
1003 PythonKinds[K_PARAMETER].enabled)
1004 {
1005 tokenInfo *parameterName;
1006 vString *parameterType;
1007 struct typedParam *parameter;
1008
1009 parameterName = newToken ();
1010 copyToken (parameterName, token);
1011 parameterType = parseParamTypeAnnotation (token, arglist);
1012
1013 parameter = makeTypedParam (parameterName, parameterType);
1014 ptrArrayAdd (parameters, parameter);
1015 }
1016 }
1017 while (token->type != TOKEN_EOF && depth > 0);
1018 }
1019
parseCArglist(tokenInfo * const token,const int kind,vString * const arglist,ptrArray * const parameters)1020 static void parseCArglist (tokenInfo *const token, const int kind,
1021 vString *const arglist, ptrArray *const parameters)
1022 {
1023 int depth = 1;
1024 tokenInfo *pname = newToken ();
1025 vString *ptype = vStringNew ();
1026 vStringCat (arglist, token->string); /* '(' */
1027
1028 while (true)
1029 {
1030 readToken (token);
1031 if (token->type == TOKEN_EOF)
1032 {
1033 /* Unexpected input. */
1034 vStringClear (arglist);
1035 ptrArrayClear (parameters);
1036 break;
1037 }
1038
1039 if (depth == 1 && (token->type == ',' || token->type == ')'))
1040 {
1041 if (pname->type == TOKEN_IDENTIFIER)
1042 {
1043 struct typedParam *p;
1044
1045 /*
1046 * Clean up the type string.
1047 * The type string includes the parameter name at the end.
1048 * 1. Trim the parameter name at the end.
1049 * 2. Then, trim the white space at the end of the type string.
1050 * 3. If the type string is not empty,
1051 * 3.a append (the type stirng + ' ' + the parameter name) to arglist.
1052 * 3.b else just append the parameter name to arglist.
1053 *
1054 * FIXME:
1055 * This doesn't work well with an array and a function pointer.
1056 *
1057 * f(..., int seq [dim], ...)
1058 * in this case, dim is extacted as a parameter.
1059 *
1060 * f(..., int (*fn)(int), ...)
1061 * in this case , int is extacted as a parameter.
1062 */
1063 Assert (vStringLength (ptype) >= vStringLength (pname->string));
1064 size_t ptype_len = vStringLength (ptype) - vStringLength (pname->string);
1065 vStringTruncate (ptype, ptype_len);
1066
1067 if (vStringLength (ptype) > 0)
1068 {
1069 vStringStripTrailing (ptype);
1070 if (vStringLength (ptype) > 0)
1071 {
1072 vStringCat (arglist, ptype);
1073 vStringPut (arglist, ' ');
1074 }
1075 }
1076 vStringCat (arglist, pname->string);
1077
1078 p = makeTypedParamWithCopy (pname, vStringIsEmpty(ptype)? NULL: ptype);
1079 ptrArrayAdd (parameters, p);
1080 }
1081 if (token->type == ')')
1082 {
1083 vStringPut (arglist, ')');
1084 break;
1085 }
1086 vStringCatS (arglist, ", ");
1087 vStringClear (ptype);
1088 pname->type = TOKEN_UNDEFINED;
1089 continue;
1090 }
1091
1092 if (token->type == '(' ||
1093 token->type == '[' ||
1094 token->type == '{')
1095 {
1096 vStringPut (ptype, token->type);
1097 depth ++;
1098 continue;
1099 }
1100
1101 if (token->type == ')' ||
1102 token->type == ']' ||
1103 token->type == '}')
1104 {
1105 vStringPut (ptype, token->type);
1106 depth --;
1107 continue;
1108 }
1109
1110 if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_KEYWORD)
1111 {
1112 if (vStringLength (ptype) > 0
1113 && (isalnum ((unsigned char)vStringLast (ptype))
1114 || vStringLast (ptype) == ','))
1115 vStringPut (ptype, ' ');
1116 vStringCat (ptype, token->string);
1117
1118 if (!isdigit ((unsigned char)vStringLast (token->string)))
1119 copyToken (pname, token);
1120 continue;
1121 }
1122
1123 vStringCat (ptype, token->string);
1124 }
1125
1126 vStringDelete (ptype);
1127 deleteToken (pname);
1128 }
1129
parseClassOrDef(tokenInfo * const token,const vString * const decorators,pythonKind kind,bool isCDef)1130 static bool parseClassOrDef (tokenInfo *const token,
1131 const vString *const decorators,
1132 pythonKind kind, bool isCDef)
1133 {
1134 vString *arglist = NULL;
1135 tokenInfo *name = NULL;
1136 ptrArray *parameters = NULL;
1137 NestingLevel *lv;
1138 int corkIndex;
1139
1140 if (isCDef)
1141 {
1142 if (! readCDefName (token, &kind))
1143 return false;
1144 }
1145 else
1146 {
1147 readToken (token);
1148 if (token->type != TOKEN_IDENTIFIER)
1149 return false;
1150 }
1151
1152 name = newToken ();
1153 copyToken (name, token);
1154
1155 readToken (token);
1156 /* collect parameters or inheritance */
1157 if (token->type == '(')
1158 {
1159 arglist = vStringNew ();
1160 parameters = ptrArrayNew ((ptrArrayDeleteFunc)deleteTypedParam);
1161
1162 if (isCDef && kind != K_CLASS)
1163 parseCArglist (token, kind, arglist, parameters);
1164 else
1165 parseArglist (token, kind, arglist, parameters);
1166 }
1167
1168 if (kind == K_CLASS)
1169 corkIndex = makeClassTag (name, arglist, decorators);
1170 else
1171 corkIndex = makeFunctionTag (name, arglist, decorators);
1172
1173 lv = nestingLevelsPush (PythonNestingLevels, corkIndex);
1174 PY_NL (lv)->indentation = token->indent;
1175
1176 deleteToken (name);
1177 vStringDelete (arglist);
1178
1179 if (parameters && !ptrArrayIsEmpty (parameters))
1180 {
1181 unsigned int i;
1182
1183 for (i = 0; i < ptrArrayCount (parameters); i++)
1184 {
1185 struct typedParam *parameter = ptrArrayItem (parameters, i);
1186 int paramCorkIndex = makeSimplePythonTag (parameter->token, K_PARAMETER);
1187 tagEntryInfo *e = getEntryInCorkQueue (paramCorkIndex);
1188 if (e && parameter->type)
1189 {
1190 e->extensionFields.typeRef [0] = eStrdup ("typename");
1191 e->extensionFields.typeRef [1] = vStringDeleteUnwrap (parameter->type);
1192 parameter->type = NULL;
1193 }
1194 }
1195 }
1196 ptrArrayDelete (parameters); /* NULL is acceptable. */
1197
1198 tagEntryInfo *e;
1199 vString *t;
1200 if (kind != K_CLASS
1201 && (e = getEntryInCorkQueue (corkIndex))
1202 && (t = parseReturnTypeAnnotation (token)))
1203 {
1204 e->extensionFields.typeRef [0] = eStrdup ("typename");
1205 e->extensionFields.typeRef [1] = vStringDeleteUnwrap (t);
1206 }
1207
1208 return true;
1209 }
1210
parseImport(tokenInfo * const token)1211 static bool parseImport (tokenInfo *const token)
1212 {
1213 tokenInfo *fromModule = NULL;
1214
1215 if (token->keyword == KEYWORD_from)
1216 {
1217 readQualifiedName (token);
1218 if (token->type == TOKEN_IDENTIFIER)
1219 {
1220 fromModule = newToken ();
1221 copyToken (fromModule, token);
1222 readToken (token);
1223 }
1224 }
1225
1226 if (token->keyword == KEYWORD_import)
1227 {
1228 bool parenthesized = false;
1229 int moduleIndex;
1230
1231 if (fromModule)
1232 {
1233 /* from X import ...
1234 * --------------------
1235 * X = (kind:module, role:namespace) */
1236 moduleIndex = makeSimplePythonRefTag (fromModule, NULL, K_MODULE,
1237 PYTHON_MODULE_NAMESPACE,
1238 XTAG_UNKNOWN);
1239 }
1240
1241 do
1242 {
1243 readQualifiedName (token);
1244
1245 /* support for `from x import (...)` */
1246 if (fromModule && ! parenthesized && token->type == '(')
1247 {
1248 parenthesized = true;
1249 readQualifiedName (token);
1250 }
1251
1252 if (token->type == TOKEN_IDENTIFIER)
1253 {
1254 tokenInfo *name = newToken ();
1255
1256 copyToken (name, token);
1257 readToken (token);
1258 /* if there is an "as", use it as the name */
1259 if (token->keyword == KEYWORD_as)
1260 {
1261 readToken (token);
1262 if (token->type == TOKEN_IDENTIFIER)
1263 {
1264 if (fromModule)
1265 {
1266 /* from x import Y as Z
1267 * ----------------------------
1268 * x = (kind:module, role:namespace),
1269 * Y = (kind:unknown, role:indirectlyImported, scope:module:X),
1270 * Z = (kind:unknown, nameref:unknown:Y) */
1271 int index;
1272
1273 /* Y */
1274 index = makeSimplePythonRefTag (name, NULL, K_UNKNOWN,
1275 PYTHON_UNKNOWN_INDIRECTLY_IMPORTED,
1276 XTAG_UNKNOWN);
1277 /* fill the scope field for Y */
1278 tagEntryInfo *e = getEntryInCorkQueue (index);
1279 if (e)
1280 e->extensionFields.scopeIndex = moduleIndex;
1281
1282 /* Z */
1283 index = makeSimplePythonTag (token, K_UNKNOWN);
1284 /* fill the nameref filed for Y */
1285 if (PythonFields[F_NAMEREF].enabled)
1286 {
1287 vString *nameref = vStringNewInit (PythonKinds [K_UNKNOWN].name);
1288 vStringPut (nameref, ':');
1289 vStringCat (nameref, name->string);
1290 attachParserFieldToCorkEntry (index, PythonFields[F_NAMEREF].ftype,
1291 vStringValue (nameref));
1292 vStringDelete (nameref);
1293 }
1294 }
1295 else
1296 {
1297 /* import x as Y
1298 * ----------------------------
1299 * x = (kind:module, role:indirectlyImported)
1300 * Y = (kind:namespace, nameref:module:x)*/
1301 /* x */
1302 makeSimplePythonRefTag (name, NULL, K_MODULE,
1303 PYTHON_MODULE_INDIRECTLY_IMPORTED,
1304 XTAG_UNKNOWN);
1305 /* Y */
1306 int index = makeSimplePythonTag (token, K_NAMESPACE);
1307 /* fill the nameref filed for Y */
1308 if (PythonFields[F_NAMEREF].enabled)
1309 {
1310 vString *nameref = vStringNewInit (PythonKinds [K_MODULE].name);
1311 vStringPut (nameref, ':');
1312 vStringCat (nameref, name->string);
1313 attachParserFieldToCorkEntry (index, PythonFields[F_NAMEREF].ftype,
1314 vStringValue (nameref));
1315 vStringDelete (nameref);
1316 }
1317 }
1318
1319 copyToken (name, token);
1320 readToken (token);
1321 }
1322 }
1323 else
1324 {
1325 if (fromModule)
1326 {
1327 /* from x import Y
1328 --------------
1329 x = (kind:module, role:namespace),
1330 Y = (kind:unknown, role:imported, scope:module:x) */
1331 /* Y */
1332 int index = makeSimplePythonRefTag (name, NULL, K_UNKNOWN,
1333 PYTHON_UNKNOWN_IMPORTED,
1334 XTAG_UNKNOWN);
1335 /* fill the scope field for Y */
1336 tagEntryInfo *e = getEntryInCorkQueue (index);
1337 if (e)
1338 e->extensionFields.scopeIndex = moduleIndex;
1339 }
1340 else
1341 {
1342 /* import X
1343 --------------
1344 X = (kind:module, role:imported) */
1345 makeSimplePythonRefTag (name, NULL, K_MODULE,
1346 PYTHON_MODULE_IMPORTED,
1347 XTAG_UNKNOWN);
1348 }
1349 }
1350
1351 deleteToken (name);
1352 }
1353 }
1354 while (token->type == ',');
1355
1356 if (parenthesized && token->type == ')')
1357 readToken (token);
1358 }
1359
1360 if (fromModule)
1361 deleteToken (fromModule);
1362
1363 return false;
1364 }
1365
1366 /* this only handles the most common cases, but an annotation can be any
1367 * expression in theory.
1368 * this function assumes there must be an annotation, and doesn't do any check
1369 * on the token on which it is called: the caller should do that part. */
skipVariableTypeAnnotation(tokenInfo * const token,vString * const repr)1370 static bool skipVariableTypeAnnotation (tokenInfo *const token, vString *const repr)
1371 {
1372 bool readNext = true;
1373
1374 readToken (token);
1375 switch (token->type)
1376 {
1377 case '[': readNext = skipOverPair (token, '[', ']', repr, true); break;
1378 case '(': readNext = skipOverPair (token, '(', ')', repr, true); break;
1379 case '{': readNext = skipOverPair (token, '{', '}', repr, true); break;
1380 default: reprCat (repr, token);
1381 }
1382 if (readNext)
1383 readToken (token);
1384 /* skip subscripts and calls */
1385 while (token->type == '[' || token->type == '(' || token->type == '.' || token->type == '|')
1386 {
1387 switch (token->type)
1388 {
1389 case '[': readNext = skipOverPair (token, '[', ']', repr, true); break;
1390 case '(': readNext = skipOverPair (token, '(', ')', repr, true); break;
1391 case '|':
1392 reprCat (repr, token);
1393 skipVariableTypeAnnotation (token, repr);
1394 readNext = false;
1395 break;
1396 case '.':
1397 reprCat (repr, token);
1398 readToken (token);
1399 readNext = token->type == TOKEN_IDENTIFIER;
1400 if (readNext)
1401 reprCat (repr, token);
1402 break;
1403 default: readNext = false; break;
1404 }
1405 if (readNext)
1406 readToken (token);
1407 }
1408
1409 return false;
1410 }
1411
parseVariable(tokenInfo * const token,const pythonKind kind)1412 static bool parseVariable (tokenInfo *const token, const pythonKind kind)
1413 {
1414 /* In order to support proper tag type for lambdas in multiple
1415 * assignations, we first collect all the names, and then try and map
1416 * an assignation to it */
1417 tokenInfo *nameTokens[8] = { NULL };
1418 vString *nameTypes [ARRAY_SIZE (nameTokens)] = { NULL };
1419 unsigned int nameCount = 0;
1420 vString *type = vStringNew();
1421
1422 /* first, collect variable name tokens */
1423 while (token->type == TOKEN_IDENTIFIER &&
1424 nameCount < ARRAY_SIZE (nameTokens))
1425 {
1426 unsigned int i;
1427 tokenInfo *name = newToken ();
1428 copyToken (name, token);
1429
1430 readToken (token);
1431 if (token->type == '.')
1432 {
1433 /* FIXME: what to do with dotted names? We currently ignore them
1434 * as we need to do something not to break the whole
1435 * declaration, but the expected behavior is questionable */
1436 deleteToken (name);
1437 name = NULL;
1438
1439 do
1440 {
1441 readToken (token);
1442 }
1443 while (token->type == TOKEN_IDENTIFIER ||
1444 token->type == '.');
1445 }
1446
1447 i = nameCount++;
1448 nameTokens[i] = name;
1449
1450 /* (parse and) skip annotations. we need not to be too permissive because we
1451 * aren't yet sure we're actually parsing a variable. */
1452 if (token->type == ':' && skipVariableTypeAnnotation (token, type))
1453 readToken (token);
1454
1455 if (vStringLength (type) > 0)
1456 {
1457 nameTypes[i] = type;
1458 type = vStringNew ();
1459 }
1460
1461 if (token->type == ',')
1462 readToken (token);
1463 else
1464 break;
1465 }
1466 vStringDelete (type);
1467
1468 /* then, if it's a proper assignation, try and map assignations so that
1469 * we catch lambdas and alike */
1470 if (token->type == '=')
1471 {
1472 unsigned int i = 0;
1473
1474 do
1475 {
1476 const tokenInfo *const nameToken = nameTokens[i];
1477 vString **type = &(nameTypes[i++]);
1478
1479 readToken (token);
1480
1481 if (! nameToken)
1482 /* nothing */;
1483 else if (token->keyword != KEYWORD_lambda)
1484 {
1485 int index = makeSimplePythonTag (nameToken, kind);
1486 tagEntryInfo *e = getEntryInCorkQueue (index);
1487 if (e && *type)
1488 {
1489 e->extensionFields.typeRef [0] = eStrdup ("typename");
1490 e->extensionFields.typeRef [1] = vStringDeleteUnwrap (*type);
1491 *type = NULL;
1492 }
1493 }
1494 else
1495 {
1496 tokenInfo *anon = NULL;
1497 vString *arglist = vStringNew ();
1498 if (*type)
1499 {
1500 anon = newToken ();
1501 copyToken (anon, token);
1502 }
1503 readToken (token);
1504 vStringPut (arglist, '(');
1505 skipLambdaArglist (token, arglist);
1506 vStringPut (arglist, ')');
1507 if (*type)
1508 {
1509 /* How to handle lambda assigned to a variable
1510 * --------------------------------------------
1511 *
1512 * input.py:
1513 *
1514 * id = lambda var: var
1515 * id_t: Callable[[int], int] = lambda var: var
1516 *
1517 * `id' is tagged as a function kind object like:
1518 *
1519 * id input.py /^id = lambda var: var$/;" function
1520 *
1521 * For `id_t' we cannot do the same as `id'.
1522 *
1523 * We should not store `Callable[[int], int]' to typeref
1524 * field of the tag of `id_t' if the tag has "function" as
1525 * its kind because users expect the typeref field of a
1526 * function kind represents a type for the value returned
1527 * from the function (return type).
1528 *
1529 * the unexpected tag:
1530 *
1531 * id_t input.py /^id_t: Callable[[int], int] = lambda var: var$/;" function \
1532 * typeref:typename:Callable[[int], int]
1533 *
1534 * If we make a tag for `id_t' as a function, we should
1535 * attach `typeref:typename:int' and `signature:(int)'. To
1536 * achieve this, we have to make ctags analyze
1537 * `Callable[[int], int]'. However, we want to avoid the
1538 * level of analyzing.
1539 *
1540 * For recording `Callable[[int], int]', a valuable
1541 * information in the input, we use indirection.
1542 *
1543 * id_t input.py /^id_t: Callable[[int], int] = lambda var: var$/;" variable \
1544 * typeref:typename:Callable[[int], int] nameref:function:anonFuncNNN
1545 * anonFuncNNN input.py /^id_t: Callable[[int], int] = lambda var: var$/;" function \
1546 * extras:anonymous
1547 */
1548 int vindex = makeSimplePythonTag (nameToken, kind);
1549 vStringClear (anon->string);
1550 anonGenerate (anon->string, "anonFunc", K_FUNCTION);
1551 int findex = makeFunctionTag (anon, arglist, NULL);
1552 tagEntryInfo *fe = getEntryInCorkQueue (findex);
1553 if (fe)
1554 markTagExtraBit (fe, XTAG_ANONYMOUS);
1555
1556 tagEntryInfo *ve = getEntryInCorkQueue (vindex);
1557 if (ve)
1558 {
1559 ve->extensionFields.typeRef [0] = eStrdup ("typename");
1560 ve->extensionFields.typeRef [1] = vStringDeleteUnwrap (*type);
1561 *type = NULL;
1562 vString *nameref = vStringNewInit (PythonKinds [K_FUNCTION].name);
1563 vStringPut (nameref, ':');
1564 vStringCat (nameref, anon->string);
1565 attachParserField (ve, true, PythonFields[F_NAMEREF].ftype,
1566 vStringValue (nameref));
1567 vStringDelete (nameref);
1568 }
1569 if (anon)
1570 deleteToken (anon);
1571 }
1572 else
1573 makeFunctionTag (nameToken, arglist, NULL);
1574 vStringDelete (arglist);
1575 }
1576
1577 /* skip until next initializer */
1578 while ((TokenContinuationDepth > 0 || token->type != ',') &&
1579 token->type != TOKEN_EOF &&
1580 token->type != ';' &&
1581 token->type != TOKEN_INDENT)
1582 {
1583 readToken (token);
1584 }
1585 }
1586 while (token->type == ',' && i < nameCount);
1587
1588 /* if we got leftover to initialize, just make variables out of them.
1589 * This handles cases like `a, b, c = (c, d, e)` -- or worse */
1590 for (; i < nameCount; i++)
1591 {
1592 if (nameTokens[i])
1593 makeSimplePythonTag (nameTokens[i], kind);
1594 }
1595 }
1596
1597 while (nameCount > 0)
1598 {
1599 if (nameTokens[--nameCount])
1600 deleteToken (nameTokens[nameCount]);
1601 vStringDelete (nameTypes[nameCount]); /* NULL is acceptable. */
1602 }
1603
1604 return false;
1605 }
1606
1607 /* pops any level >= to indent */
setIndent(tokenInfo * const token)1608 static void setIndent (tokenInfo *const token)
1609 {
1610 NestingLevel *lv = nestingLevelsGetCurrent (PythonNestingLevels);
1611
1612 while (lv && PY_NL (lv)->indentation >= token->indent)
1613 {
1614 tagEntryInfo *e = getEntryInCorkQueue (lv->corkIndex);
1615 if (e)
1616 e->extensionFields.endLine = token->lineNumber;
1617
1618 nestingLevelsPop (PythonNestingLevels);
1619 lv = nestingLevelsGetCurrent (PythonNestingLevels);
1620 }
1621 }
1622
findPythonTags(void)1623 static void findPythonTags (void)
1624 {
1625 tokenInfo *const token = newToken ();
1626 vString *decorators = vStringNew ();
1627 bool atStatementStart = true;
1628
1629 TokenContinuationDepth = 0;
1630 NextToken = NULL;
1631 PythonNestingLevels = nestingLevelsNew (sizeof (struct pythonNestingLevelUserData));
1632
1633 readToken (token);
1634 while (token->type != TOKEN_EOF)
1635 {
1636 tokenType iterationTokenType = token->type;
1637 bool readNext = true;
1638
1639 /* skip async keyword that confuses decorator parsing before a def */
1640 if (token->keyword == KEYWORD_async)
1641 readToken (token);
1642
1643 if (token->type == TOKEN_INDENT)
1644 setIndent (token);
1645 else if (token->keyword == KEYWORD_class ||
1646 token->keyword == KEYWORD_def)
1647 {
1648 pythonKind kind = token->keyword == KEYWORD_class ? K_CLASS : K_FUNCTION;
1649
1650 readNext = parseClassOrDef (token, decorators, kind, false);
1651 }
1652 else if (token->keyword == KEYWORD_cdef ||
1653 token->keyword == KEYWORD_cpdef)
1654 {
1655 readNext = parseClassOrDef (token, decorators, K_FUNCTION, true);
1656 }
1657 else if (token->keyword == KEYWORD_from ||
1658 token->keyword == KEYWORD_import)
1659 {
1660 readNext = parseImport (token);
1661 }
1662 else if (token->type == '(')
1663 { /* skip parentheses to avoid finding stuff inside them */
1664 readNext = skipOverPair (token, '(', ')', NULL, false);
1665 }
1666 else if (token->type == TOKEN_IDENTIFIER && atStatementStart)
1667 {
1668 NestingLevel *lv = nestingLevelsGetCurrent (PythonNestingLevels);
1669 tagEntryInfo *lvEntry = getEntryOfNestingLevel (lv);
1670 pythonKind kind = K_VARIABLE;
1671
1672 if (lvEntry && lvEntry->kindIndex != K_CLASS)
1673 kind = K_LOCAL_VARIABLE;
1674
1675 readNext = parseVariable (token, kind);
1676 }
1677 else if (token->type == '@' && atStatementStart &&
1678 PythonFields[F_DECORATORS].enabled)
1679 {
1680 /* collect decorators */
1681 readQualifiedName (token);
1682 if (token->type != TOKEN_IDENTIFIER)
1683 readNext = false;
1684 else
1685 {
1686 if (vStringLength (decorators) > 0)
1687 vStringPut (decorators, ',');
1688 vStringCat (decorators, token->string);
1689 readToken (token);
1690 readNext = skipOverPair (token, '(', ')', decorators, true);
1691 }
1692 }
1693
1694 /* clear collected decorators for any non-decorator tokens non-indent
1695 * token. decorator collection takes care of skipping the possible
1696 * argument list, so we should never hit here parsing a decorator */
1697 if (iterationTokenType != TOKEN_INDENT &&
1698 iterationTokenType != '@' &&
1699 PythonFields[F_DECORATORS].enabled)
1700 {
1701 vStringClear (decorators);
1702 }
1703
1704 atStatementStart = (token->type == TOKEN_INDENT || token->type == ';');
1705
1706 if (readNext)
1707 readToken (token);
1708 }
1709
1710 nestingLevelsFree (PythonNestingLevels);
1711 vStringDelete (decorators);
1712 deleteToken (token);
1713 Assert (NextToken == NULL);
1714 }
1715
initialize(const langType language)1716 static void initialize (const langType language)
1717 {
1718 Lang_python = language;
1719
1720 TokenPool = objPoolNew (16, newPoolToken, deletePoolToken, clearPoolToken, NULL);
1721 }
1722
finalize(langType language CTAGS_ATTR_UNUSED,bool initialized)1723 static void finalize (langType language CTAGS_ATTR_UNUSED, bool initialized)
1724 {
1725 if (!initialized)
1726 return;
1727
1728 objPoolDelete (TokenPool);
1729 }
1730
PythonParser(void)1731 extern parserDefinition* PythonParser (void)
1732 {
1733 static const char *const extensions[] = { "py", "pyx", "pxd", "pxi", "scons",
1734 "wsgi", NULL };
1735 static const char *const aliases[] = { "python[23]*", "scons", NULL };
1736 parserDefinition *def = parserNew ("Python");
1737 def->kindTable = PythonKinds;
1738 def->kindCount = ARRAY_SIZE (PythonKinds);
1739 def->extensions = extensions;
1740 def->aliases = aliases;
1741 def->parser = findPythonTags;
1742 def->initialize = initialize;
1743 def->finalize = finalize;
1744 def->keywordTable = PythonKeywordTable;
1745 def->keywordCount = ARRAY_SIZE (PythonKeywordTable);
1746 def->fieldTable = PythonFields;
1747 def->fieldCount = ARRAY_SIZE (PythonFields);
1748 def->useCork = CORK_QUEUE;
1749 def->requestAutomaticFQTag = true;
1750 return def;
1751 }
1752