/* * Copyright (c) 2013, Colomban Wendling * * This source code is released for free distribution under the terms of the * GNU General Public License version 2 or (at your option) any later version. * * This module contains code for generating tags for the PHP scripting * language. * * The language reference: http://php.net/manual/en/langref.php */ /* * INCLUDE FILES */ #include "general.h" /* must always come first */ #include #include "parse.h" #include "read.h" #include "vstring.h" #include "keyword.h" #include "entry.h" #include "routines.h" #include "debug.h" #include "objpool.h" #include "promise.h" #define isIdentChar(c) (isalnum (c) || (c) == '_' || (c) >= 0x80) #define newToken() (objPoolGet (TokenPool)) #define deleteToken(t) (objPoolPut (TokenPool, (t))) enum { KEYWORD_abstract, KEYWORD_and, KEYWORD_as, KEYWORD_break, KEYWORD_callable, KEYWORD_case, KEYWORD_catch, KEYWORD_class, KEYWORD_clone, KEYWORD_const, KEYWORD_continue, KEYWORD_declare, KEYWORD_define, KEYWORD_default, KEYWORD_do, KEYWORD_echo, KEYWORD_else, KEYWORD_elif, KEYWORD_enddeclare, KEYWORD_endfor, KEYWORD_endforeach, KEYWORD_endif, KEYWORD_endswitch, KEYWORD_endwhile, KEYWORD_extends, KEYWORD_final, KEYWORD_finally, KEYWORD_for, KEYWORD_foreach, KEYWORD_function, KEYWORD_global, KEYWORD_goto, KEYWORD_if, KEYWORD_implements, KEYWORD_include, KEYWORD_include_once, KEYWORD_instanceof, KEYWORD_insteadof, KEYWORD_interface, KEYWORD_namespace, KEYWORD_new, KEYWORD_or, KEYWORD_print, KEYWORD_private, KEYWORD_protected, KEYWORD_public, KEYWORD_require, KEYWORD_require_once, KEYWORD_return, KEYWORD_static, KEYWORD_switch, KEYWORD_throw, KEYWORD_trait, KEYWORD_try, KEYWORD_use, KEYWORD_var, KEYWORD_while, KEYWORD_xor, KEYWORD_yield }; typedef int keywordId; /* to allow KEYWORD_NONE */ typedef enum { ACCESS_UNDEFINED, ACCESS_PRIVATE, ACCESS_PROTECTED, ACCESS_PUBLIC, COUNT_ACCESS } accessType; typedef enum { IMPL_UNDEFINED, IMPL_ABSTRACT, COUNT_IMPL } implType; typedef enum { K_CLASS, K_DEFINE, K_FUNCTION, K_INTERFACE, K_LOCAL_VARIABLE, K_NAMESPACE, K_TRAIT, K_VARIABLE, K_ALIAS, COUNT_KIND } phpKind; #define NAMESPACE_SEPARATOR "\\" static scopeSeparator PhpGenericSeparators [] = { { K_NAMESPACE , NAMESPACE_SEPARATOR }, { KIND_WILDCARD_INDEX, "::" }, }; static kindDefinition PhpKinds[COUNT_KIND] = { { true, 'c', "class", "classes", ATTACH_SEPARATORS(PhpGenericSeparators) }, { true, 'd', "define", "constant definitions", ATTACH_SEPARATORS(PhpGenericSeparators)}, { true, 'f', "function", "functions", ATTACH_SEPARATORS(PhpGenericSeparators)}, { true, 'i', "interface", "interfaces", ATTACH_SEPARATORS(PhpGenericSeparators)}, { false, 'l', "local", "local variables", ATTACH_SEPARATORS(PhpGenericSeparators)}, { true, 'n', "namespace", "namespaces", ATTACH_SEPARATORS(PhpGenericSeparators)}, { true, 't', "trait", "traits", ATTACH_SEPARATORS(PhpGenericSeparators)}, { true, 'v', "variable", "variables", ATTACH_SEPARATORS(PhpGenericSeparators)}, { true, 'a', "alias", "aliases", ATTACH_SEPARATORS(PhpGenericSeparators)}, }; static const keywordTable PhpKeywordTable[] = { /* keyword keyword ID */ { "abstract", KEYWORD_abstract }, { "and", KEYWORD_and }, { "as", KEYWORD_as }, { "break", KEYWORD_break }, { "callable", KEYWORD_callable }, { "case", KEYWORD_case }, { "catch", KEYWORD_catch }, { "cfunction", KEYWORD_function }, /* nobody knows what the hell this is, but it seems to behave much like "function" so bind it to it */ { "class", KEYWORD_class }, { "clone", KEYWORD_clone }, { "const", KEYWORD_const }, { "continue", KEYWORD_continue }, { "declare", KEYWORD_declare }, { "define", KEYWORD_define }, /* this isn't really a keyword but we handle it so it's easier this way */ { "default", KEYWORD_default }, { "do", KEYWORD_do }, { "echo", KEYWORD_echo }, { "else", KEYWORD_else }, { "elseif", KEYWORD_elif }, { "enddeclare", KEYWORD_enddeclare }, { "endfor", KEYWORD_endfor }, { "endforeach", KEYWORD_endforeach }, { "endif", KEYWORD_endif }, { "endswitch", KEYWORD_endswitch }, { "endwhile", KEYWORD_endwhile }, { "extends", KEYWORD_extends }, { "final", KEYWORD_final }, { "finally", KEYWORD_finally }, { "for", KEYWORD_for }, { "foreach", KEYWORD_foreach }, { "function", KEYWORD_function }, { "global", KEYWORD_global }, { "goto", KEYWORD_goto }, { "if", KEYWORD_if }, { "implements", KEYWORD_implements }, { "include", KEYWORD_include }, { "include_once", KEYWORD_include_once }, { "instanceof", KEYWORD_instanceof }, { "insteadof", KEYWORD_insteadof }, { "interface", KEYWORD_interface }, { "namespace", KEYWORD_namespace }, { "new", KEYWORD_new }, { "or", KEYWORD_or }, { "print", KEYWORD_print }, { "private", KEYWORD_private }, { "protected", KEYWORD_protected }, { "public", KEYWORD_public }, { "require", KEYWORD_require }, { "require_once", KEYWORD_require_once }, { "return", KEYWORD_return }, { "static", KEYWORD_static }, { "switch", KEYWORD_switch }, { "throw", KEYWORD_throw }, { "trait", KEYWORD_trait }, { "try", KEYWORD_try }, { "use", KEYWORD_use }, { "var", KEYWORD_var }, { "while", KEYWORD_while }, { "xor", KEYWORD_xor }, { "yield", KEYWORD_yield } }; typedef enum eTokenType { TOKEN_UNDEFINED, TOKEN_EOF, TOKEN_CHARACTER, TOKEN_CLOSE_PAREN, TOKEN_SEMICOLON, TOKEN_COLON, TOKEN_COMMA, TOKEN_KEYWORD, TOKEN_OPEN_PAREN, TOKEN_OPERATOR, TOKEN_IDENTIFIER, TOKEN_STRING, TOKEN_PERIOD, TOKEN_OPEN_CURLY, TOKEN_CLOSE_CURLY, TOKEN_EQUAL_SIGN, TOKEN_OPEN_SQUARE, TOKEN_CLOSE_SQUARE, TOKEN_VARIABLE, TOKEN_AMPERSAND, TOKEN_BACKSLASH, TOKEN_QMARK, } tokenType; typedef struct { tokenType type; keywordId keyword; vString * string; vString * scope; unsigned long lineNumber; MIOPos filePosition; int parentKind; /* -1 if none */ bool anonymous; /* true if token specifies * an anonymous class */ } tokenInfo; static langType Lang_php; static langType Lang_zephir; static bool InPhp = false; /* whether we are between */ /* whether the next token may be a keyword, e.g. not after "::" or "->" */ static bool MayBeKeyword = true; /* current statement details */ static struct { accessType access; implType impl; } CurrentStatement; /* Current namespace */ static vString *CurrentNamesapce; /* Cache variable to build the tag's scope. It has no real meaning outside * of initPhpEntry()'s scope. */ static vString *FullScope; /* The class name specified at "extends" keyword in the current class * definition. Used to resolve "parent" in return type. */ static vString *ParentClass; static objPool *TokenPool = NULL; static const char *phpScopeSeparatorFor (int kind, int upperScopeKind) { return scopeSeparatorFor (getInputLanguage(), kind, upperScopeKind); } static const char *accessToString (const accessType access) { static const char *const names[COUNT_ACCESS] = { "undefined", "private", "protected", "public" }; Assert (access < COUNT_ACCESS); return names[access]; } static const char *implToString (const implType impl) { static const char *const names[COUNT_IMPL] = { "undefined", "abstract" }; Assert (impl < COUNT_IMPL); return names[impl]; } static void initPhpEntry (tagEntryInfo *const e, const tokenInfo *const token, const phpKind kind, const accessType access) { int parentKind = -1; vStringClear (FullScope); if (vStringLength (CurrentNamesapce) > 0) { parentKind = K_NAMESPACE; vStringCat (FullScope, CurrentNamesapce); } initTagEntry (e, vStringValue (token->string), kind); e->lineNumber = token->lineNumber; e->filePosition = token->filePosition; if (access != ACCESS_UNDEFINED) e->extensionFields.access = accessToString (access); if (vStringLength (token->scope) > 0) { parentKind = token->parentKind; if (vStringLength (FullScope) > 0) { const char* sep; sep = phpScopeSeparatorFor (parentKind, K_NAMESPACE); vStringCatS (FullScope, sep); } vStringCat (FullScope, token->scope); } if (vStringLength (FullScope) > 0) { Assert (parentKind >= 0); e->extensionFields.scopeKindIndex = parentKind; e->extensionFields.scopeName = vStringValue (FullScope); } if (token->anonymous) markTagExtraBit (e, XTAG_ANONYMOUS); } static void makePhpTagEntry (tagEntryInfo *const e) { makeTagEntry (e); makeQualifiedTagEntry (e); } static void fillTypeRefField (tagEntryInfo *const e, const vString *const rtype, const tokenInfo *const token) { if ((vStringLength (rtype) == 4) && (strcmp (vStringValue (rtype), "self") == 0) && vStringLength (token->scope) > 0) { if (token->parentKind == -1) e->extensionFields.typeRef [0] = "unknown"; else e->extensionFields.typeRef [0] = PhpKinds [token->parentKind].name; e->extensionFields.typeRef [1] = vStringValue (token->scope); } else if ((vStringLength (rtype) == 6) && (strcmp (vStringValue (rtype), "parent") == 0) && (ParentClass && vStringLength (ParentClass) > 0)) { e->extensionFields.typeRef [0] = "class"; e->extensionFields.typeRef [1] = vStringValue (ParentClass); } else { e->extensionFields.typeRef [0] = "unknown"; e->extensionFields.typeRef [1] = vStringValue (rtype); } } static void makeTypedPhpTag (const tokenInfo *const token, const phpKind kind, const accessType access, vString* typeName) { if (PhpKinds[kind].enabled) { tagEntryInfo e; initPhpEntry (&e, token, kind, access); if (typeName) fillTypeRefField (&e, typeName, token); makePhpTagEntry (&e); } } static void makeSimplePhpTag (const tokenInfo *const token, const phpKind kind, const accessType access) { makeTypedPhpTag (token, kind, access, NULL); } static void makeNamespacePhpTag (const tokenInfo *const token, const vString *const name) { if (PhpKinds[K_NAMESPACE].enabled) { tagEntryInfo e; initTagEntry (&e, vStringValue (name), K_NAMESPACE); e.lineNumber = token->lineNumber; e.filePosition = token->filePosition; makePhpTagEntry (&e); } } static void makeClassOrIfaceTag (const phpKind kind, const tokenInfo *const token, vString *const inheritance, const implType impl) { if (PhpKinds[kind].enabled) { tagEntryInfo e; initPhpEntry (&e, token, kind, ACCESS_UNDEFINED); if (impl != IMPL_UNDEFINED) e.extensionFields.implementation = implToString (impl); if (vStringLength (inheritance) > 0) e.extensionFields.inheritance = vStringValue (inheritance); makePhpTagEntry (&e); } } static void makeFunctionTag (const tokenInfo *const token, const vString *const arglist, const vString *const rtype, const accessType access, const implType impl) { if (PhpKinds[K_FUNCTION].enabled) { tagEntryInfo e; initPhpEntry (&e, token, K_FUNCTION, access); if (impl != IMPL_UNDEFINED) e.extensionFields.implementation = implToString (impl); if (arglist) e.extensionFields.signature = vStringValue (arglist); if (rtype) fillTypeRefField (&e, rtype, token); makePhpTagEntry (&e); } } static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED) { tokenInfo *token = xMalloc (1, tokenInfo); token->string = vStringNew (); token->scope = vStringNew (); return token; } static void clearPoolToken (void *data) { tokenInfo *token = data; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); token->parentKind = -1; token->anonymous = false; vStringClear (token->string); vStringClear (token->scope); } static void deletePoolToken (void *data) { tokenInfo *token = data; vStringDelete (token->string); vStringDelete (token->scope); eFree (token); } static void copyToken (tokenInfo *const dest, const tokenInfo *const src, bool scope) { dest->lineNumber = src->lineNumber; dest->filePosition = src->filePosition; dest->type = src->type; dest->keyword = src->keyword; vStringCopy(dest->string, src->string); dest->parentKind = src->parentKind; if (scope) vStringCopy(dest->scope, src->scope); dest->anonymous = src->anonymous; } #if 0 #include static const char *tokenTypeName (const tokenType type) { switch (type) { case TOKEN_UNDEFINED: return "undefined"; case TOKEN_EOF: return "EOF"; case TOKEN_CHARACTER: return "character"; case TOKEN_CLOSE_PAREN: return "')'"; case TOKEN_SEMICOLON: return "';'"; case TOKEN_COLON: return "':'"; case TOKEN_COMMA: return "','"; case TOKEN_OPEN_PAREN: return "'('"; case TOKEN_OPERATOR: return "operator"; case TOKEN_IDENTIFIER: return "identifier"; case TOKEN_KEYWORD: return "keyword"; case TOKEN_STRING: return "string"; case TOKEN_PERIOD: return "'.'"; case TOKEN_OPEN_CURLY: return "'{'"; case TOKEN_CLOSE_CURLY: return "'}'"; case TOKEN_EQUAL_SIGN: return "'='"; case TOKEN_OPEN_SQUARE: return "'['"; case TOKEN_CLOSE_SQUARE: return "']'"; case TOKEN_VARIABLE: return "variable"; } return NULL; } static void printToken (const tokenInfo *const token) { fprintf (stderr, "%p:\n\ttype:\t%s\n\tline:\t%lu\n\tscope:\t%s\n", (void *) token, tokenTypeName (token->type), token->lineNumber, vStringValue (token->scope)); switch (token->type) { case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_VARIABLE: fprintf (stderr, "\tcontent:\t%s\n", vStringValue (token->string)); break; case TOKEN_KEYWORD: { size_t n = ARRAY_SIZE (PhpKeywordTable); size_t i; fprintf (stderr, "\tkeyword:\t"); for (i = 0; i < n; i++) { if (PhpKeywordTable[i].id == token->keyword) { fprintf (stderr, "%s\n", PhpKeywordTable[i].name); break; } } if (i >= n) fprintf (stderr, "(unknown)\n"); } default: break; } } #endif static void addToScope (tokenInfo *const token, const vString *const extra, int kindOfUpperScope) { if (vStringLength (token->scope) > 0) { const char* sep; sep = phpScopeSeparatorFor(token->parentKind, kindOfUpperScope); vStringCatS (token->scope, sep); } vStringCat (token->scope, extra); } static int skipToCharacter (const int c) { int d; do { d = getcFromInputFile (); } while (d != EOF && d != c); return d; } static void parseString (vString *const string, const int delimiter) { while (true) { int c = getcFromInputFile (); if (c == '\\' && (c = getcFromInputFile ()) != EOF) vStringPut (string, (char) c); else if (c == EOF || c == delimiter) break; else vStringPut (string, (char) c); } } /* Strips @indent_len characters from lines in @string to get the correct * string value for an indented heredoc (PHP 7.3+). * This doesn't handle invalid values specially and might yield surprising * results with them, but it doesn't really matter as it's invalid anyway. */ static void stripHeredocIndent (vString *const string, size_t indent_len) { char *str = vStringValue (string); size_t str_len = vStringLength (string); char *p = str; size_t new_len = str_len; bool at_line_start = true; while (*p) { if (at_line_start) { size_t p_len; size_t strip_len; p_len = str_len - (p - str); strip_len = p_len < indent_len ? p_len : indent_len; memmove (p, p + strip_len, p_len - strip_len); p += strip_len; new_len -= strip_len; } /* CRLF is already normalized as LF */ at_line_start = (*p == '\r' || *p == '\n'); p++; } vStringTruncate (string, new_len); } /* reads a PHP >= 7.3 HereDoc or a NowDoc (the part after the <<<). * <<<[ \t]*(ID|'ID'|"ID") * ... * [ \t]*ID[^:indent-char:];? * * note that: * 1) starting ID must be immediately followed by a newline; * 2) closing ID is the same as opening one; * 3) closing ID must not be immediately followed by an identifier character; * 4) optional indentation of the closing ID is stripped from body lines, * which lines must have the exact same prefix indentation. * * This is slightly relaxed from PHP < 7.3, where the closing ID had to be the * only thing on its line, with the only exception of a semicolon right after * the ID. * * Example of a single valid heredoc: * <<< FOO * something * something else * FOO_this is not an end * FOO; * # previous line was the end, but the semicolon wasn't required * * Another example using indentation and more code after the heredoc: * << 0) stripHeredocIndent (string, indent_len); break; } /* if we are here it wasn't a delimiter, so put everything in the * string */ vStringNCatS (string, delimiter, len); } } while (c != EOF); return; error: ungetcToInputFile (c); } static void parseIdentifier (vString *const string, const int firstChar) { int c = firstChar; do { vStringPut (string, (char) c); c = getcFromInputFile (); } while (isIdentChar (c)); ungetcToInputFile (c); } static bool isSpace (int c) { return (c == '\t' || c == ' ' || c == '\v' || c == '\n' || c == '\r' || c == '\f'); } static int skipWhitespaces (int c) { while (isSpace (c)) c = getcFromInputFile (); return c; } /* * * This is ugly, but the whole "