1 /*
2 *
3 * Copyright (c) 2011, Ivan Krasilnikov
4 *
5 * This source code is released for free distribution under the terms of the
6 * GNU General Public License version 2 or (at your option) any later version.
7 *
8 * This module implements parsing of protocol buffers definition files
9 * (http://code.google.com/apis/protocolbuffers/docs/proto.html)
10 */
11
12 /*
13 Masatake YAMATO takes this from https://sourceforge.net/p/ctags/patches/74/
14 after getting following approval:
15 ===============================================================================
16 Message-ID: <CALPttHe+hSa_kjwx6GoWS6CsDf_OG0bcmhmPahb4shnKb8tkWg@mail.gmail.com>
17 Subject: Re: your protobuf.patch
18 From: Ivan Krasilnikov <infnty@gmail.com>
19 To: m_yamato@users.sf.net
20 Date: Fri, 8 Jul 2016 15:37:07 +0200
21
22 Hi, yes, it's fine, no problem.
23
24 --
25 Ivan
26
27 On 8 July 2016 at 06:31, <m_yamato@users.sf.net> wrote:
28
29 > Hi,
30 >
31 > I am a developer of universal ctags(http://ctags.io).
32 >
33 > I would like to merge your patch for protobuf in *GPL v2 or later*.
34 >
35 > Is it o.k.?
36 > ------------------------------
37 >
38 > This message was sent to you via the SourceForge web mail form.
39 > You may reply to this message directly, or at
40 > https://sourceforge.net/u/userid-2121776/profile/send_message
41 >
42 ===============================================================================
43 */
44
45 /*
46 * INCLUDE FILES
47 */
48 #include "general.h" /* must always come first */
49
50 #include <string.h>
51 #include <ctype.h>
52
53 #include "cpreprocessor.h"
54
55 #include "entry.h"
56 #include "keyword.h"
57 #include "parse.h"
58 #include "read.h"
59 #include "vstring.h"
60
61 /*
62 * DATA DEFINITIONS
63 */
64 static langType Lang_protobuf;
65
66 typedef enum {
67 SYNTAX_UNKNOWN,
68 SYNTAX_PROTO2,
69 SYNTAX_PROTO3,
70 } protobufSyntax;
71 static protobufSyntax syntax = SYNTAX_UNKNOWN;
72
73 typedef enum {
74 PK_PACKAGE,
75 PK_MESSAGE,
76 PK_FIELD,
77 PK_ENUMERATOR,
78 PK_ENUM,
79 PK_SERVICE,
80 PK_RPC,
81 PK_ONEOF,
82 PK_GROUP,
83 PK_PROTODEF,
84 } protobufKind;
85
86 typedef enum {
87 R_MESSAGE_EXTENSION,
88 } protobufMessageRole;
89
90 typedef enum {
91 R_PROTODEF_IMPORTED,
92 } protobufProtodefRole;
93
94 static roleDefinition ProtobufMessageRoles [] = {
95 { true, "extension", "extending the message" },
96 };
97
98 static roleDefinition ProtobufProtodefRoles [] = {
99 { true, "imported", "imported" },
100 };
101
102 static kindDefinition ProtobufKinds [] = {
103 { true, 'p', "package", "packages" },
104 { true, 'm', "message", "messages",
105 .referenceOnly = false, ATTACH_ROLES (ProtobufMessageRoles)},
106 { true, 'f', "field", "fields" },
107 { true, 'e', "enumerator", "enum constants" },
108 { true, 'g', "enum", "enum types" },
109 { true, 's', "service", "services" },
110 { true, 'r', "rpc", "RPC methods" },
111 { true, 'o', "oneof", "oneof names" },
112 { true, 'G', "group", "groups" },
113 { true, 'D', "protodef", ".proto definition",
114 .referenceOnly = true, ATTACH_ROLES (ProtobufProtodefRoles)},
115 };
116
117 typedef enum eKeywordId {
118 KEYWORD_OPTION,
119 KEYWORD_PACKAGE,
120 KEYWORD_MESSAGE,
121 KEYWORD_ENUM,
122 KEYWORD_REPEATED,
123 KEYWORD_OPTIONAL,
124 KEYWORD_REQUIRED,
125 KEYWORD_SERVICE,
126 KEYWORD_RPC,
127 KEYWORD_STREAM,
128 KEYWORD_RETURNS,
129 KEYWORD_EXTEND,
130 KEYWORD_ONEOF,
131 KEYWORD_MAP,
132 KEYWORD_GROUP,
133 KEYWORD_IMPORT,
134 KEYWORD_PUBLIC,
135 KEYWORD_WEAK,
136 KEYWORD_SYNTAX,
137 } keywordId;
138
139 static const keywordTable ProtobufKeywordTable [] = {
140 { "option", KEYWORD_OPTION },
141 { "package", KEYWORD_PACKAGE },
142 { "message", KEYWORD_MESSAGE },
143 { "enum", KEYWORD_ENUM },
144 { "repeated", KEYWORD_REPEATED },
145 { "optional", KEYWORD_OPTIONAL },
146 { "required", KEYWORD_REQUIRED },
147 { "service", KEYWORD_SERVICE },
148 { "rpc", KEYWORD_RPC },
149 { "stream", KEYWORD_STREAM },
150 { "returns", KEYWORD_RETURNS },
151 { "extend", KEYWORD_EXTEND },
152 { "oneof", KEYWORD_ONEOF },
153 { "map", KEYWORD_MAP },
154 { "group", KEYWORD_GROUP },
155 { "import", KEYWORD_IMPORT },
156 { "public", KEYWORD_PUBLIC },
157 { "weak", KEYWORD_WEAK },
158 { "syntax", KEYWORD_SYNTAX },
159 };
160
161 #define TOKEN_EOF 0
162 #define TOKEN_ID 'i'
163 #define TOKEN_STR 's'
164
165 static struct sTokenInfo {
166 int type; /* one of TOKEN_* constants or punctuation characters */
167 keywordId keyword;
168 vString *value;
169 } token;
170
171
172 /*
173 * FUNCTION DECLARATIONS
174 */
175 static void findProtobufTags0 (bool oneshot, int originalScopeCorkIndex);
176
177
178 /*
179 * FUNCTION DEFINITIONS
180 */
181
nextTokenFull(bool expectingStringLiteral)182 static void nextTokenFull (bool expectingStringLiteral)
183 {
184 int c;
185
186 repeat:
187 /*
188 * .proto files may contain C and C++ style comments and
189 * quoted strings. cppGetc() takes care of them.
190 */
191 c = cppGetc ();
192
193 token.keyword = KEYWORD_NONE;
194 if (c <= 0)
195 token.type = TOKEN_EOF;
196 else if (c == '{' || c == '}' || c == ';' || c == '.' || c == '=' || c == ',' || c == '<' || c == '>')
197 token.type = c;
198 else if (cppIsalnum (c) || c == '_')
199 {
200 token.type = TOKEN_ID;
201 vStringClear (token.value);
202 while (c > 0 && (cppIsalnum (c) || c == '_')) {
203 vStringPut (token.value, c);
204 c = cppGetc ();
205 }
206 token.keyword = lookupCaseKeyword (vStringValue (token.value), Lang_protobuf);
207 cppUngetc (c);
208 }
209 else if (expectingStringLiteral && c == STRING_SYMBOL)
210 {
211 token.type = TOKEN_STR;
212 vStringCopy (token.value,
213 cppGetLastCharOrStringContents ());
214 }
215 else
216 goto repeat; /* anything else is not important for this parser */
217 }
218
nextToken(void)219 static void nextToken (void)
220 {
221 nextTokenFull (false);
222 }
223
skipUntil(const char * punctuation)224 static void skipUntil (const char *punctuation)
225 {
226 while (token.type != TOKEN_EOF && strchr (punctuation, token.type) == NULL)
227 nextToken ();
228 }
229
parseFullQualifiedId(vString * buf)230 static void parseFullQualifiedId (vString *buf)
231 {
232 while (true)
233 {
234 nextToken ();
235
236 if (token.type == TOKEN_ID)
237 {
238 if (vStringIsEmpty (buf) || vStringLast (buf) == '.')
239 vStringCat (buf, token.value);
240 else
241 break;
242 }
243 else if (token.type == '.')
244 {
245 if (vStringIsEmpty (buf) || vStringLast (buf) != '.')
246 vStringPut (buf, '.');
247 else
248 break;
249 }
250 else
251 break;
252 }
253 }
254
tokenIsKeyword(keywordId keyword)255 static int tokenIsKeyword(keywordId keyword)
256 {
257 return token.type == TOKEN_ID && token.keyword == keyword;
258 }
259
createProtobufTagFull(const vString * name,int kind,int role,int scopeCorkIndex)260 static int createProtobufTagFull (const vString *name, int kind, int role, int scopeCorkIndex)
261 {
262 static tagEntryInfo tag;
263 int corkIndex = CORK_NIL;
264
265 if (ProtobufKinds [kind].enabled)
266 {
267 initRefTagEntry (&tag, vStringValue (name), kind, role);
268 tag.extensionFields.scopeIndex = scopeCorkIndex;
269 corkIndex = makeTagEntry (&tag);
270 }
271
272 return corkIndex;
273 }
274
createProtobufTag(const vString * name,int kind,int scopeCorkIndex)275 static int createProtobufTag (const vString *name, int kind, int scopeCorkIndex)
276 {
277 return createProtobufTagFull (name, kind, ROLE_DEFINITION_INDEX, scopeCorkIndex);
278 }
279
parseEnumConstants(int scopeCorkIndex)280 static void parseEnumConstants (int scopeCorkIndex)
281 {
282 if (token.type != '{')
283 return;
284 nextToken ();
285
286 while (token.type != TOKEN_EOF && token.type != '}')
287 {
288 if (token.type == TOKEN_ID && !tokenIsKeyword (KEYWORD_OPTION))
289 {
290 nextToken (); /* doesn't clear token.value if it's punctuation */
291 if (token.type == '=')
292 createProtobufTag (token.value, PK_ENUMERATOR, scopeCorkIndex);
293 }
294
295 skipUntil (";}");
296
297 if (token.type == ';')
298 nextToken ();
299 }
300 tagEntryInfo *e = getEntryInCorkQueue (scopeCorkIndex);
301 if (e)
302 e->extensionFields.endLine = getInputLineNumber ();
303 }
304
parseOneofField(int scopeCorkIndex)305 static void parseOneofField (int scopeCorkIndex)
306 {
307 if (tokenIsKeyword (KEYWORD_GROUP))
308 {
309 findProtobufTags0 (true, scopeCorkIndex);
310 return;
311 }
312
313 vString *type = vStringNewCopy (token.value);
314 parseFullQualifiedId (type);
315
316 if (token.type == TOKEN_ID)
317 {
318 int corkIndex = createProtobufTag (token.value, PK_FIELD, scopeCorkIndex);
319 tagEntryInfo *e = getEntryInCorkQueue (corkIndex);
320 if (e)
321 {
322 e->extensionFields.typeRef [0] = eStrdup ("typename"); /* As C++ parser does */
323 e->extensionFields.typeRef [1] = vStringDeleteUnwrap (type);
324 type = NULL;
325 }
326 }
327
328 skipUntil (";}");
329 vStringDelete (type); /* NULL is acceptable */
330 }
331
parseOneofFields(int scopeCorkIndex)332 static void parseOneofFields (int scopeCorkIndex)
333 {
334 if (token.type != '{')
335 return;
336 nextToken ();
337
338 while (token.type != TOKEN_EOF && token.type != '}')
339 {
340 if (token.type == TOKEN_ID || token.type == '.')
341 {
342 parseOneofField (scopeCorkIndex);
343 if (token.type == ';')
344 nextToken ();
345 }
346 else
347 break;
348 }
349
350 tagEntryInfo *e = getEntryInCorkQueue (scopeCorkIndex);
351 if (e)
352 e->extensionFields.endLine = getInputLineNumber ();
353 }
354
355 #define gatherTypeinfo(VSTRING,CONDITION) \
356 while (CONDITION) \
357 { \
358 if (token.type == TOKEN_ID) \
359 vStringCat (VSTRING, token.value); \
360 else if (tokenIsKeyword (KEYWORD_STREAM)) \
361 { \
362 vStringCat (VSTRING, token.value); \
363 vStringPut (VSTRING, ' '); \
364 } \
365 else \
366 vStringPut (VSTRING, token.type); \
367 nextToken (); \
368 }
369
parseRPCTypeinfos(int corkIndex)370 static void parseRPCTypeinfos (int corkIndex)
371 {
372 tagEntryInfo *e = getEntryInCorkQueue (corkIndex);
373 if (!e)
374 return;
375
376 vString *signature = vStringNew ();
377 gatherTypeinfo(signature,
378 (token.type != TOKEN_EOF
379 && token.type != '{' && token.type != ';'
380 && !tokenIsKeyword (KEYWORD_RETURNS)));
381 if (!vStringIsEmpty(signature))
382 e->extensionFields.signature = vStringDeleteUnwrap (signature);
383 else
384 vStringDelete (signature);
385
386 if (!tokenIsKeyword (KEYWORD_RETURNS))
387 return;
388 nextToken ();
389
390 vString *typeref = vStringNew ();
391 gatherTypeinfo(typeref, (token.type != EOF
392 && token.type != '{' && token.type != ';'));
393 if (!vStringIsEmpty(typeref))
394 {
395 e->extensionFields.typeRef [0] = eStrdup ("typename"); /* As C++ parser does */
396 e->extensionFields.typeRef [1] = vStringDeleteUnwrap (typeref);
397 }
398 else
399 vStringDelete (typeref);
400 }
401
parseStatementFull(int kind,int role,int scopeCorkIndex)402 static int parseStatementFull (int kind, int role, int scopeCorkIndex)
403 {
404 int corkIndex = CORK_NIL;
405 vString *fullName = NULL;
406 vString *fieldType = NULL;
407
408 if (kind == PK_FIELD)
409 {
410 fieldType = vStringNew ();
411
412 if (syntax == SYNTAX_PROTO3
413 && !tokenIsKeyword (KEYWORD_REPEATED))
414 {
415 if (token.type == TOKEN_ID)
416 vStringCat (fieldType, token.value);
417 else if (token.type == '.')
418 vStringPut (fieldType, '.');
419 }
420
421 parseFullQualifiedId (fieldType);
422 if (vStringIsEmpty (fieldType) || vStringLast (fieldType) == '.')
423 goto out;
424 }
425 else
426 nextToken ();
427
428 /* When extending message defined in the external package, the name
429 * becomes longer. */
430 if (kind == PK_MESSAGE && role == R_MESSAGE_EXTENSION)
431 {
432 if (token.type != TOKEN_ID)
433 goto out;
434
435 fullName = vStringNewCopy (token.value);
436 parseFullQualifiedId (fullName);
437 }
438 else if (token.type != TOKEN_ID)
439 goto out;
440
441 corkIndex = createProtobufTagFull (fullName? fullName: token.value,
442 kind, role, scopeCorkIndex);
443
444 if (!fullName)
445 nextToken ();
446
447 tagEntryInfo *e = getEntryInCorkQueue (corkIndex);
448 if (fieldType && e)
449 {
450 e->extensionFields.typeRef [0] = eStrdup ("typename"); /* As C++ parser does */
451 e->extensionFields.typeRef [1] = vStringDeleteUnwrap (fieldType);
452 fieldType = NULL;
453 }
454
455 if (kind == PK_RPC && corkIndex != CORK_NIL)
456 parseRPCTypeinfos (corkIndex);
457
458 if (kind == PK_ENUM)
459 parseEnumConstants (corkIndex);
460 else if (kind == PK_ONEOF)
461 parseOneofFields (corkIndex);
462
463 out:
464 vStringDelete (fieldType); /* NULL is acceptable. */
465 vStringDelete (fullName); /* NULL is acceptable. */
466 return corkIndex;
467 }
468
parseStatement(int kind,int scopeCorkIndex)469 static int parseStatement (int kind, int scopeCorkIndex)
470 {
471 return parseStatementFull (kind, ROLE_DEFINITION_INDEX, scopeCorkIndex);
472 }
473
parsePackage(void)474 static int parsePackage (void)
475 {
476 int corkIndex = CORK_NIL;
477
478 vString *pkg = vStringNew ();
479 parseFullQualifiedId (pkg);
480 if (vStringLength (pkg) > 0)
481 corkIndex = createProtobufTag (pkg, PK_PACKAGE, CORK_NIL);
482 vStringDelete (pkg);
483
484 return corkIndex;
485 }
486
parseMap(int scopeCorkIndex)487 static void parseMap (int scopeCorkIndex)
488 {
489 nextToken ();
490 if (token.type != '<')
491 return;
492
493 vString *typeref = vStringNewInit ("map<");
494
495 nextToken ();
496 if (token.type != TOKEN_ID)
497 goto out;
498
499 vStringCat (typeref, token.value);
500
501 nextToken ();
502 if (token.type != ',')
503 goto out;
504 vStringPut (typeref, ',');
505
506 vString *vtyperef = vStringNew ();
507 parseFullQualifiedId (vtyperef);
508 vStringCat (typeref, vtyperef);
509 vStringDelete (vtyperef);
510 if (vStringLast (typeref) == ',')
511 goto out;
512
513 if (token.type != '>')
514 goto out;
515 vStringPut (typeref, '>');
516
517 nextToken ();
518 if (token.type != TOKEN_ID)
519 goto out;
520
521 int corkIndex = createProtobufTag (token.value, PK_FIELD, scopeCorkIndex);
522 tagEntryInfo *e = getEntryInCorkQueue (corkIndex);
523 if (e)
524 {
525 e->extensionFields.typeRef [0] = eStrdup ("typename"); /* As C++ parser does */
526 e->extensionFields.typeRef [1] = vStringDeleteUnwrap (typeref);
527 typeref = NULL;
528 }
529
530 out:
531 vStringDelete (typeref);
532 }
533
parseImport(int scopeCorkIndex)534 static int parseImport (int scopeCorkIndex)
535 {
536 nextTokenFull (true);
537 if (token.type == TOKEN_ID)
538 {
539 if (tokenIsKeyword (KEYWORD_PUBLIC)
540 || tokenIsKeyword (KEYWORD_WEAK))
541 nextTokenFull (true);
542 else
543 return CORK_NIL; /* Unexpected */
544 }
545
546 if (token.type == TOKEN_STR)
547 return createProtobufTagFull (token.value,
548 PK_PROTODEF, R_PROTODEF_IMPORTED,
549 /* TODO: whether the package scope should be specified or not. */
550 scopeCorkIndex
551 );
552
553 return CORK_NIL;
554 }
555
parseSyntax(void)556 static void parseSyntax (void)
557 {
558 nextToken ();
559 if (token.type != '=')
560 return;
561
562 nextTokenFull (true);
563 if (token.type == TOKEN_STR)
564 {
565 const vString *proto = cppGetLastCharOrStringContents ();
566 if (strcmp (vStringValue (proto), "proto2") == 0)
567 syntax = SYNTAX_PROTO2;
568 else if (strcmp (vStringValue (proto), "proto3") == 0)
569 syntax = SYNTAX_PROTO3;
570 else
571 syntax = SYNTAX_UNKNOWN;
572 }
573 }
574
findProtobufTags0(bool oneshot,int originalScopeCorkIndex)575 static void findProtobufTags0 (bool oneshot, int originalScopeCorkIndex)
576 {
577 int scopeCorkIndex = originalScopeCorkIndex;
578 while (token.type != TOKEN_EOF)
579 {
580 int corkIndex = CORK_NIL;
581 bool dontChangeScope = false;
582
583 if (tokenIsKeyword (KEYWORD_SYNTAX) && originalScopeCorkIndex == CORK_NIL)
584 {
585 parseSyntax ();
586 dontChangeScope = true;
587 }
588 else if (tokenIsKeyword (KEYWORD_PACKAGE))
589 {
590 corkIndex = parsePackage ();
591 scopeCorkIndex = corkIndex;
592 }
593 else if (tokenIsKeyword (KEYWORD_MESSAGE))
594 corkIndex = parseStatement (PK_MESSAGE, scopeCorkIndex);
595 else if (tokenIsKeyword (KEYWORD_ENUM))
596 {
597 corkIndex = parseStatement (PK_ENUM, scopeCorkIndex);
598 dontChangeScope = true;
599 }
600 else if (tokenIsKeyword (KEYWORD_REPEATED) || tokenIsKeyword (KEYWORD_OPTIONAL) || tokenIsKeyword (KEYWORD_REQUIRED))
601 corkIndex = parseStatement (PK_FIELD, scopeCorkIndex);
602 else if (tokenIsKeyword (KEYWORD_SERVICE))
603 corkIndex = parseStatement (PK_SERVICE, scopeCorkIndex);
604 else if (tokenIsKeyword (KEYWORD_RPC))
605 corkIndex = parseStatement (PK_RPC, scopeCorkIndex);
606 else if (tokenIsKeyword (KEYWORD_EXTEND))
607 corkIndex = parseStatementFull (PK_MESSAGE, R_MESSAGE_EXTENSION, scopeCorkIndex);
608 else if (tokenIsKeyword (KEYWORD_ONEOF))
609 {
610 corkIndex = parseStatement (PK_ONEOF, scopeCorkIndex);
611 dontChangeScope = true;
612 }
613 else if (tokenIsKeyword (KEYWORD_MAP))
614 parseMap (scopeCorkIndex);
615 else if (tokenIsKeyword (KEYWORD_GROUP))
616 corkIndex = parseStatement (PK_GROUP, scopeCorkIndex);
617 else if (tokenIsKeyword (KEYWORD_IMPORT))
618 {
619 corkIndex = parseImport (scopeCorkIndex);
620 dontChangeScope = true;
621 }
622 else if (tokenIsKeyword (KEYWORD_OPTION))
623 dontChangeScope = true;
624 else if (syntax == SYNTAX_PROTO3
625 && (token.type == '.' || token.type == TOKEN_ID))
626 {
627 tagEntryInfo *e = getEntryInCorkQueue (scopeCorkIndex);
628 if (e && e->kindIndex == PK_MESSAGE)
629 corkIndex = parseStatement (PK_FIELD, scopeCorkIndex);
630 }
631
632 skipUntil (";{}");
633 if (!dontChangeScope && token.type == '{' && corkIndex != CORK_NIL)
634 {
635 /* Enter the new scope. */
636 scopeCorkIndex = corkIndex;
637 }
638 else if (!dontChangeScope && token.type == '}')
639 {
640 /* Return to the parent scope. */
641 tagEntryInfo *e = getEntryInCorkQueue (scopeCorkIndex);
642 if (e)
643 {
644 scopeCorkIndex = e->extensionFields.scopeIndex;
645 e->extensionFields.endLine = getInputLineNumber ();
646 }
647 }
648 nextToken ();
649
650 if (oneshot && scopeCorkIndex == originalScopeCorkIndex)
651 break;
652 }
653 }
654
findProtobufTags(void)655 static void findProtobufTags (void)
656 {
657 cppInit (false, false, false, false,
658 KIND_GHOST_INDEX, 0, 0,
659 KIND_GHOST_INDEX,
660 KIND_GHOST_INDEX, 0, 0,
661 FIELD_UNKNOWN);
662 token.value = vStringNew ();
663
664 syntax = SYNTAX_UNKNOWN;
665
666 nextToken ();
667 findProtobufTags0 (false, CORK_NIL);
668
669 vStringDelete (token.value);
670 cppTerminate ();
671 }
672
initialize(const langType language)673 static void initialize (const langType language)
674 {
675 Lang_protobuf = language;
676 }
677
ProtobufParser(void)678 extern parserDefinition* ProtobufParser (void)
679 {
680 static const char *const extensions [] = { "proto", NULL };
681 parserDefinition* def = parserNew ("Protobuf");
682
683 def->extensions = extensions;
684 def->kindTable = ProtobufKinds;
685 def->initialize = initialize;
686 def->kindCount = ARRAY_SIZE (ProtobufKinds);
687 def->parser = findProtobufTags;
688 def->keywordTable = ProtobufKeywordTable;
689 def->keywordCount = ARRAY_SIZE (ProtobufKeywordTable);
690
691 /* cpreprocessor wants corkQueue. */
692 def->useCork = CORK_QUEUE;
693
694 return def;
695 }
696