1 /*
2 * Copyright (c) 2009, Vincent Berthoux
3 *
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
6 *
7 * This module contains functions for generating tags for Objective Caml
8 * language files.
9 */
10 /*
11 * INCLUDE FILES
12 */
13 #include "general.h" /* must always come first */
14
15 #include <string.h>
16
17 #include "debug.h"
18 #include "entry.h"
19 #include "keyword.h"
20 #include "options.h"
21 #include "parse.h"
22 #include "read.h"
23 #include "routines.h"
24 #include "vstring.h"
25
26 #define OCAML_MAX_STACK_SIZE 256
27
28 typedef enum {
29 K_CLASS, /* OCaml class, relatively rare */
30 K_METHOD, /* class method */
31 K_MODULE, /* OCaml module OR functor */
32 K_VARIABLE,
33 K_VAL,
34 K_TYPE, /* name of an OCaml type */
35 K_FUNCTION,
36 K_CONSTRUCTOR, /* Constructor of a sum type */
37 K_RECORDFIELD,
38 K_EXCEPTION,
39 } ocamlKind;
40
41 static kindDefinition OcamlKinds[] = {
42 {true, 'c', "class", "classes"},
43 {true, 'm', "method", "Object's method"},
44 {true, 'M', "module", "Module or functor"},
45 {true, 'v', "var", "Global variable"},
46 {true, 'p', "val", "Signature item"},
47 {true, 't', "type", "Type name"},
48 {true, 'f', "function", "A function"},
49 {true, 'C', "Constructor", "A constructor"},
50 {true, 'r', "RecordField", "A 'structure' field"},
51 {true, 'e', "Exception", "An exception"},
52 };
53
54 typedef enum {
55 OcaKEYWORD_and,
56 OcaKEYWORD_begin,
57 OcaKEYWORD_class,
58 OcaKEYWORD_do,
59 OcaKEYWORD_done,
60 OcaKEYWORD_else,
61 OcaKEYWORD_end,
62 OcaKEYWORD_exception,
63 OcaKEYWORD_for,
64 OcaKEYWORD_functor,
65 OcaKEYWORD_fun,
66 OcaKEYWORD_function,
67 OcaKEYWORD_if,
68 OcaKEYWORD_in,
69 OcaKEYWORD_let,
70 OcaKEYWORD_value,
71 OcaKEYWORD_match,
72 OcaKEYWORD_method,
73 OcaKEYWORD_module,
74 OcaKEYWORD_mutable,
75 OcaKEYWORD_object,
76 OcaKEYWORD_of,
77 OcaKEYWORD_rec,
78 OcaKEYWORD_sig,
79 OcaKEYWORD_struct,
80 OcaKEYWORD_then,
81 OcaKEYWORD_try,
82 OcaKEYWORD_type,
83 OcaKEYWORD_val,
84 OcaKEYWORD_virtual,
85 OcaKEYWORD_while,
86 OcaKEYWORD_with,
87
88 OcaIDENTIFIER,
89 Tok_PARL, /* '(' */
90 Tok_PARR, /* ')' */
91 Tok_BRL, /* '[' */
92 Tok_BRR, /* ']' */
93 Tok_CurlL, /* '{' */
94 Tok_CurlR, /* '}' */
95 Tok_Prime, /* '\'' */
96 Tok_Pipe, /* '|' */
97 Tok_EQ, /* '=' */
98 Tok_Val, /* string/number/poo */
99 Tok_Op, /* any operator recognized by the language */
100 Tok_semi, /* ';' */
101 Tok_comma, /* ',' */
102 Tok_To, /* '->' */
103 Tok_Of, /* ':' */
104 Tok_Sharp, /* '#' */
105 Tok_Backslash, /* '\\' */
106
107 Tok_EOF /* END of file */
108 } ocamlKeyword;
109
110 typedef struct sOcaKeywordDesc {
111 const char *name;
112 ocamlKeyword id;
113 } ocaKeywordDesc;
114
115 typedef ocamlKeyword ocaToken;
116
117 static const keywordTable OcamlKeywordTable[] = {
118 { "and" , OcaKEYWORD_and },
119 { "begin" , OcaKEYWORD_begin },
120 { "class" , OcaKEYWORD_class },
121 { "do" , OcaKEYWORD_do },
122 { "done" , OcaKEYWORD_done },
123 { "else" , OcaKEYWORD_else },
124 { "end" , OcaKEYWORD_end },
125 { "exception" , OcaKEYWORD_exception },
126 { "for" , OcaKEYWORD_for },
127 { "fun" , OcaKEYWORD_fun },
128 { "function" , OcaKEYWORD_fun },
129 { "functor" , OcaKEYWORD_functor },
130 { "if" , OcaKEYWORD_if },
131 { "in" , OcaKEYWORD_in },
132 { "let" , OcaKEYWORD_let },
133 { "match" , OcaKEYWORD_match },
134 { "method" , OcaKEYWORD_method },
135 { "module" , OcaKEYWORD_module },
136 { "mutable" , OcaKEYWORD_mutable },
137 { "object" , OcaKEYWORD_object },
138 { "of" , OcaKEYWORD_of },
139 { "rec" , OcaKEYWORD_rec },
140 { "sig" , OcaKEYWORD_sig },
141 { "struct" , OcaKEYWORD_struct },
142 { "then" , OcaKEYWORD_then },
143 { "try" , OcaKEYWORD_try },
144 { "type" , OcaKEYWORD_type },
145 { "val" , OcaKEYWORD_val },
146 { "value" , OcaKEYWORD_value }, /* just to handle revised syntax */
147 { "virtual" , OcaKEYWORD_virtual },
148 { "while" , OcaKEYWORD_while },
149 { "with" , OcaKEYWORD_with },
150
151 { "or" , Tok_Op },
152 { "mod " , Tok_Op },
153 { "land " , Tok_Op },
154 { "lor " , Tok_Op },
155 { "lxor " , Tok_Op },
156 { "lsl " , Tok_Op },
157 { "lsr " , Tok_Op },
158 { "asr" , Tok_Op },
159 { "->" , Tok_To },
160 { ":" , Tok_Of },
161 { "true" , Tok_Val },
162 { "false" , Tok_Val }
163 };
164
165 static langType Lang_Ocaml;
166
167 static bool exportLocalInfo = false;
168
169 /*//////////////////////////////////////////////////////////////////
170 //// lexingInit */
171 typedef struct _lexingState {
172 vString *name; /* current parsed identifier/operator */
173 const unsigned char *cp; /* position in stream */
174 } lexingState;
175
176 /* array of the size of all possible value for a char */
177 static bool isOperator[1 << (8 * sizeof (char))] = { false };
178
179 /* definition of all the operator in OCaml,
180 * /!\ certain operator get special treatment
181 * in regards of their role in OCaml grammar :
182 * '|' ':' '=' '~' and '?' */
initOperatorTable(void)183 static void initOperatorTable ( void )
184 {
185 isOperator['!'] = true;
186 isOperator['$'] = true;
187 isOperator['%'] = true;
188 isOperator['&'] = true;
189 isOperator['*'] = true;
190 isOperator['+'] = true;
191 isOperator['-'] = true;
192 isOperator['.'] = true;
193 isOperator['/'] = true;
194 isOperator[':'] = true;
195 isOperator['<'] = true;
196 isOperator['='] = true;
197 isOperator['>'] = true;
198 isOperator['?'] = true;
199 isOperator['@'] = true;
200 isOperator['^'] = true;
201 isOperator['~'] = true;
202 isOperator['|'] = true;
203 }
204
205 /*//////////////////////////////////////////////////////////////////////
206 //// Lexing */
isNum(char c)207 static bool isNum (char c)
208 {
209 return c >= '0' && c <= '9';
210 }
211
isLowerAlpha(char c)212 static bool isLowerAlpha (char c)
213 {
214 return c >= 'a' && c <= 'z';
215 }
216
isUpperAlpha(char c)217 static bool isUpperAlpha (char c)
218 {
219 return c >= 'A' && c <= 'Z';
220 }
221
isAlpha(char c)222 static bool isAlpha (char c)
223 {
224 return isLowerAlpha (c) || isUpperAlpha (c);
225 }
226
isIdent(char c)227 static bool isIdent (char c)
228 {
229 return isNum (c) || isAlpha (c) || c == '_' || c == '\'';
230 }
231
isSpace(char c)232 static bool isSpace (char c)
233 {
234 return c == ' ' || c == '\t' || c == '\r' || c == '\n';
235 }
236
eatWhiteSpace(lexingState * st)237 static void eatWhiteSpace (lexingState * st)
238 {
239 const unsigned char *cp = st->cp;
240 while (isSpace (*cp))
241 cp++;
242
243 st->cp = cp;
244 }
245
eatString(lexingState * st)246 static void eatString (lexingState * st)
247 {
248 bool lastIsBackSlash = false;
249 bool unfinished = true;
250 const unsigned char *c = st->cp + 1;
251
252 while (unfinished)
253 {
254 /* end of line should never happen.
255 * we tolerate it */
256 if (c == NULL || c[0] == '\0')
257 break;
258 else if (*c == '"' && !lastIsBackSlash)
259 unfinished = false;
260 else
261 lastIsBackSlash = *c == '\\';
262
263 c++;
264 }
265
266 st->cp = c;
267 }
268
eatComment(lexingState * st)269 static void eatComment (lexingState * st)
270 {
271 bool unfinished = true;
272 bool lastIsStar = false;
273 const unsigned char *c = st->cp + 2;
274
275 while (unfinished)
276 {
277 /* we've reached the end of the line..
278 * so we have to reload a line... */
279 if (c == NULL || *c == '\0')
280 {
281 st->cp = readLineFromInputFile ();
282 /* WOOPS... no more input...
283 * we return, next lexing read
284 * will be null and ok */
285 if (st->cp == NULL)
286 return;
287 c = st->cp;
288 }
289 /* we've reached the end of the comment */
290 else if (*c == ')' && lastIsStar)
291 {
292 unfinished = false;
293 c++;
294 }
295 /* here we deal with imbricated comment, which
296 * are allowed in OCaml */
297 else if (c[0] == '(' && c[1] == '*')
298 {
299 st->cp = c;
300 eatComment (st);
301
302 c = st->cp;
303 if (c == NULL)
304 return;
305
306 lastIsStar = false;
307 c++;
308 }
309 /* OCaml has a rule which says :
310 *
311 * "Comments do not occur inside string or character literals.
312 * Nested comments are handled correctly."
313 *
314 * So if we encounter a string beginning, we must parse it to
315 * get a good comment nesting (bug ID: 3117537)
316 */
317 else if (*c == '"')
318 {
319 st->cp = c;
320 eatString (st);
321 c = st->cp;
322 }
323 else
324 {
325 lastIsStar = '*' == *c;
326 c++;
327 }
328 }
329
330 st->cp = c;
331 }
332
readIdentifier(lexingState * st)333 static void readIdentifier (lexingState * st)
334 {
335 const unsigned char *p;
336 vStringClear (st->name);
337
338 /* first char is a simple letter */
339 if (isAlpha (*st->cp) || *st->cp == '_')
340 vStringPut (st->name, (int) *st->cp);
341
342 /* Go till you get identifier chars */
343 for (p = st->cp + 1; isIdent (*p); p++)
344 vStringPut (st->name, (int) *p);
345
346 st->cp = p;
347 }
348
eatNumber(lexingState * st)349 static ocamlKeyword eatNumber (lexingState * st)
350 {
351 while (isNum (*st->cp))
352 st->cp++;
353 return Tok_Val;
354 }
355
356 /* Operator can be defined in OCaml as a function
357 * so we must be ample enough to parse them normally */
eatOperator(lexingState * st)358 static ocamlKeyword eatOperator (lexingState * st)
359 {
360 int count = 0;
361 const unsigned char *root = st->cp;
362
363 vStringClear (st->name);
364
365 while (isOperator[st->cp[count]])
366 {
367 vStringPut (st->name, st->cp[count]);
368 count++;
369 }
370
371 st->cp += count;
372 if (count <= 1)
373 {
374 switch (root[0])
375 {
376 case '|':
377 return Tok_Pipe;
378 case '=':
379 return Tok_EQ;
380 case ':':
381 return Tok_Of;
382 default:
383 return Tok_Op;
384 }
385 }
386 else if (count == 2 && root[0] == '-' && root[1] == '>')
387 return Tok_To;
388 else if (count == 2 && root[0] == '|' && root[1] == '>')
389 return Tok_Op;
390 else
391 return Tok_Op;
392 }
393
394 /* The lexer is in charge of reading the file.
395 * Some of sub-lexer (like eatComment) also read file.
396 * lexing is finished when the lexer return Tok_EOF */
lex(lexingState * st)397 static ocamlKeyword lex (lexingState * st)
398 {
399 int retType;
400 /* handling data input here */
401 while (st->cp == NULL || st->cp[0] == '\0')
402 {
403 st->cp = readLineFromInputFile ();
404 if (st->cp == NULL)
405 return Tok_EOF;
406 }
407
408 if (isAlpha (*st->cp))
409 {
410 readIdentifier (st);
411 retType = lookupKeyword (vStringValue (st->name), Lang_Ocaml);
412
413 if (retType == -1) /* If it's not a keyword */
414 {
415 return OcaIDENTIFIER;
416 }
417 else
418 {
419 return retType;
420 }
421 }
422 else if (isNum (*st->cp))
423 return eatNumber (st);
424 else if (isSpace (*st->cp))
425 {
426 eatWhiteSpace (st);
427 return lex (st);
428 }
429 else if (*st->cp == '_')
430 { // special
431 readIdentifier (st);
432 return Tok_Val;
433 }
434
435 /* OCaml permit the definition of our own operators
436 * so here we check all the consecutive chars which
437 * are operators to discard them. */
438 else if (isOperator[*st->cp])
439 return eatOperator (st);
440 else
441 {
442 switch (*st->cp)
443 {
444 case '(':
445 if (st->cp[1] == '*') /* ergl, a comment */
446 {
447 eatComment (st);
448 return lex (st);
449 }
450 else
451 {
452 st->cp++;
453 return Tok_PARL;
454 }
455
456 case ')':
457 st->cp++;
458 return Tok_PARR;
459 case '[':
460 st->cp++;
461 return Tok_BRL;
462 case ']':
463 st->cp++;
464 return Tok_BRR;
465 case '{':
466 st->cp++;
467 return Tok_CurlL;
468 case '}':
469 st->cp++;
470 return Tok_CurlR;
471 case '\'':
472 st->cp++;
473 return Tok_Prime;
474 case ',':
475 st->cp++;
476 return Tok_comma;
477 case '=':
478 st->cp++;
479 return Tok_EQ;
480 case ';':
481 st->cp++;
482 return Tok_semi;
483 case '"':
484 eatString (st);
485 return Tok_Val;
486 case '#':
487 st->cp++;
488 return Tok_Sharp;
489 case '\\':
490 st->cp++;
491 return Tok_Backslash;
492 default:
493 st->cp++;
494 break;
495 }
496 }
497 /* default return if nothing is recognized,
498 * shouldn't happen, but at least, it will
499 * be handled without destroying the parsing. */
500 return Tok_Val;
501 }
502
503 /*//////////////////////////////////////////////////////////////////////
504 //// Parsing */
505 typedef void (*parseNext) (vString * const ident, ocaToken what, ocaToken whatNext);
506
507 /********** Helpers */
508 /* This variable hold the 'parser' which is going to
509 * handle the next token */
510 static parseNext toDoNext;
511
512 /* Special variable used by parser eater to
513 * determine which action to put after their
514 * job is finished. */
515 static parseNext comeAfter;
516
517 /* If a token put an end to current declaration/
518 * statement */
519 static ocaToken terminatingToken;
520
521 /* Token to be searched by the different
522 * parser eater. */
523 static ocaToken waitedToken;
524
525 /* name of the last class, used for
526 * context stacking. */
527 static vString *lastClass;
528
529 typedef enum _sContextKind {
530 ContextStrong,
531 ContextSoft
532 } contextKind;
533
534 typedef enum _sContextType {
535 ContextType,
536 ContextModule,
537 ContextClass,
538 ContextValue,
539 ContextFunction,
540 ContextMethod,
541 ContextBlock,
542 ContextMatch
543 } contextType;
544
545 typedef struct _sOcamlContext {
546 contextKind kind; /* well if the context is strong or not */
547 contextType type;
548 parseNext callback; /* what to do when a context is pop'd */
549 vString *contextName; /* name, if any, of the surrounding context */
550 } ocamlContext;
551
552 /* context stack, can be used to output scope information
553 * into the tag file. */
554 static ocamlContext stack[OCAML_MAX_STACK_SIZE];
555 /* current position in the tag */
556 static int stackIndex;
557
558 /* special function, often recalled, so putting it here */
559 static void globalScope (vString * const ident, ocaToken what, ocaToken whatNext);
560
561 /* Return : index of the last named context if one
562 * is found, -1 otherwise */
getLastNamedIndex(void)563 static int getLastNamedIndex ( void )
564 {
565 int i;
566
567 for (i = stackIndex - 1; i >= 0; --i)
568 {
569 if (vStringLength (stack[i].contextName) > 0)
570 {
571 return i;
572 }
573 }
574
575 return -1;
576 }
577
contextDescription(contextType t)578 static int contextDescription (contextType t)
579 {
580 switch (t)
581 {
582 case ContextFunction:
583 return K_FUNCTION;
584 case ContextMethod:
585 return K_METHOD;
586 case ContextValue:
587 return K_VAL;
588 case ContextModule:
589 return K_MODULE;
590 case ContextType:
591 return K_TYPE;
592 case ContextClass:
593 return K_CLASS;
594 default:
595 AssertNotReached();
596 return KIND_GHOST_INDEX;
597 }
598 }
599
contextTypeSuffix(contextType t)600 static char contextTypeSuffix (contextType t)
601 {
602 switch (t)
603 {
604 case ContextFunction:
605 case ContextMethod:
606 case ContextValue:
607 case ContextModule:
608 return '/';
609 case ContextType:
610 return '.';
611 case ContextClass:
612 return '#';
613 case ContextBlock:
614 return ' ';
615 case ContextMatch:
616 return '|';
617 default:
618 return '$';
619 }
620 }
621
622 /* Push a new context, handle null string */
pushContext(contextKind kind,contextType type,parseNext after,vString const * contextName)623 static void pushContext (contextKind kind, contextType type, parseNext after,
624 vString const *contextName)
625 {
626 int parentIndex;
627
628 if (stackIndex >= OCAML_MAX_STACK_SIZE)
629 {
630 verbose ("OCaml Maximum depth reached");
631 return;
632 }
633
634 stack[stackIndex].kind = kind;
635 stack[stackIndex].type = type;
636 stack[stackIndex].callback = after;
637
638 parentIndex = getLastNamedIndex ();
639 if (contextName == NULL)
640 {
641 vStringClear (stack[stackIndex++].contextName);
642 return;
643 }
644
645 if (parentIndex >= 0)
646 {
647 vStringCopy (stack[stackIndex].contextName,
648 stack[parentIndex].contextName);
649 vStringPut (stack[stackIndex].contextName,
650 contextTypeSuffix (stack[parentIndex].type));
651
652 vStringCat (stack[stackIndex].contextName, contextName);
653 }
654 else
655 vStringCopy (stack[stackIndex].contextName, contextName);
656
657 stackIndex++;
658 }
659
pushStrongContext(vString * name,contextType type)660 static void pushStrongContext (vString * name, contextType type)
661 {
662 pushContext (ContextStrong, type, &globalScope, name);
663 }
664
pushSoftContext(parseNext continuation,vString * name,contextType type)665 static void pushSoftContext (parseNext continuation,
666 vString * name, contextType type)
667 {
668 pushContext (ContextSoft, type, continuation, name);
669 }
670
pushEmptyContext(parseNext continuation)671 static void pushEmptyContext (parseNext continuation)
672 {
673 pushContext (ContextSoft, ContextValue, continuation, NULL);
674 }
675
676 /* unroll the stack until the last named context.
677 * then discard it. Used to handle the :
678 * let f x y = ...
679 * in ...
680 * where the context is reseted after the in. Context may have
681 * been really nested before that. */
popLastNamed(void)682 static void popLastNamed ( void )
683 {
684 int i = getLastNamedIndex ();
685
686 if (i >= 0)
687 {
688 stackIndex = i;
689 toDoNext = stack[i].callback;
690 vStringClear (stack[i].contextName);
691 }
692 else
693 {
694 /* ok, no named context found...
695 * (should not happen). */
696 stackIndex = 0;
697 toDoNext = &globalScope;
698 }
699 }
700
701 /* pop a context without regarding it's content
702 * (beside handling empty stack case) */
popSoftContext(void)703 static void popSoftContext ( void )
704 {
705 if (stackIndex <= 0)
706 {
707 toDoNext = &globalScope;
708 }
709 else
710 {
711 stackIndex--;
712 toDoNext = stack[stackIndex].callback;
713 vStringClear (stack[stackIndex].contextName);
714 }
715 }
716
717 /* Reset everything until the last global space.
718 * a strong context can be :
719 * - module
720 * - class definition
721 * - the initial global space
722 * - a _global_ declaration (let at global scope or in a module).
723 * Created to exit quickly deeply nested context */
popStrongContext(void)724 static contextType popStrongContext ( void )
725 {
726 int i;
727
728 for (i = stackIndex - 1; i >= 0; --i)
729 {
730 if (stack[i].kind == ContextStrong)
731 {
732 stackIndex = i;
733 toDoNext = stack[i].callback;
734 vStringClear (stack[i].contextName);
735 return stack[i].type;
736 }
737 }
738 /* ok, no strong context found... */
739 stackIndex = 0;
740 toDoNext = &globalScope;
741 return -1;
742 }
743
744 /* Reset everything before the last match. */
jumpToMatchContext(void)745 static void jumpToMatchContext ( void )
746 {
747 int i;
748 for (i = stackIndex - 1; i >= 0; --i)
749 {
750 if (stack[i].type == ContextMatch)
751 {
752 stackIndex = i + 1;
753 toDoNext = stack[i].callback; // this should always be
754 // matchPattern
755 stack[i + 1].callback = NULL;
756 vStringClear (stack[i + 1].contextName);
757 return;
758 }
759 }
760 }
761
762 /* Ignore everything till waitedToken and jump to comeAfter.
763 * If the "end" keyword is encountered break, doesn't remember
764 * why though. */
tillToken(vString * const ident CTAGS_ATTR_UNUSED,ocaToken what,ocaToken whatNext CTAGS_ATTR_UNUSED)765 static void tillToken (vString * const ident CTAGS_ATTR_UNUSED, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED)
766 {
767 if (what == waitedToken)
768 toDoNext = comeAfter;
769 else if (what == OcaKEYWORD_end)
770 {
771 popStrongContext ();
772 toDoNext = &globalScope;
773 }
774 }
775
776 /* Ignore everything till a waitedToken is seen, but
777 * take care of balanced parentheses/bracket use */
contextualTillToken(vString * const ident,ocaToken what,ocaToken whatNext)778 static void contextualTillToken (vString * const ident, ocaToken what, ocaToken whatNext)
779 {
780 static int parentheses = 0;
781 static int bracket = 0;
782 static int curly = 0;
783
784 switch (what)
785 {
786 case Tok_PARL:
787 parentheses--;
788 break;
789 case Tok_PARR:
790 parentheses++;
791 break;
792 case Tok_CurlL:
793 curly--;
794 break;
795 case Tok_CurlR:
796 curly++;
797 break;
798 case Tok_BRL:
799 bracket--;
800 break;
801 case Tok_BRR:
802 bracket++;
803 break;
804
805 default: /* other token are ignored */
806 break;
807 }
808
809 if (what == waitedToken && parentheses == 0 && bracket == 0 && curly == 0)
810 toDoNext = comeAfter;
811 else if (what == OcaKEYWORD_end)
812 globalScope (ident, what, whatNext);
813 }
814
815 /* Wait for waitedToken and jump to comeAfter or let
816 * the globalScope handle declarations */
tillTokenOrFallback(vString * const ident,ocaToken what,ocaToken whatNext)817 static void tillTokenOrFallback (vString * const ident, ocaToken what, ocaToken whatNext)
818 {
819 if (what == waitedToken)
820 toDoNext = comeAfter;
821 else
822 globalScope (ident, what, whatNext);
823 }
824
825 /* ignore token till waitedToken, or give up if find
826 * terminatingToken. Use globalScope to handle new
827 * declarations. */
tillTokenOrTerminatingOrFallback(vString * const ident,ocaToken what,ocaToken whatNext)828 static void tillTokenOrTerminatingOrFallback (vString * const ident, ocaToken what, ocaToken whatNext)
829 {
830 if (what == waitedToken)
831 toDoNext = comeAfter;
832 else if (what == terminatingToken)
833 toDoNext = globalScope;
834 else
835 globalScope (ident, what, whatNext);
836 }
837
838 /* ignore the next token in the stream and jump to the
839 * given comeAfter state */
ignoreToken(vString * const ident CTAGS_ATTR_UNUSED,ocaToken what CTAGS_ATTR_UNUSED,ocaToken whatNext CTAGS_ATTR_UNUSED)840 static void ignoreToken (vString * const ident CTAGS_ATTR_UNUSED, ocaToken what CTAGS_ATTR_UNUSED, ocaToken whatNext CTAGS_ATTR_UNUSED)
841 {
842 toDoNext = comeAfter;
843 }
844
845 /********** Grammar */
846 /* the purpose of each function is detailed near their
847 * implementation */
848
killCurrentState(void)849 static contextType killCurrentState ( void )
850 {
851 contextType popped = popStrongContext ();
852
853 /* Tracking the kind of previous strong
854 * context, if it doesn't match with a
855 * really strong entity, repop */
856 switch (popped)
857 {
858 case ContextValue:
859 popped = popStrongContext ();
860 break;
861 case ContextFunction:
862 popped = popStrongContext ();
863 break;
864 case ContextMethod:
865 popped = popStrongContext ();
866 break;
867 case ContextType:
868 popped = popStrongContext ();
869 break;
870 case ContextMatch:
871 popped = popStrongContext ();
872 break;
873 case ContextBlock:
874 break;
875 case ContextModule:
876 break;
877 case ContextClass:
878 break;
879 default:
880 /* nothing more */
881 break;
882 }
883 return popped;
884 }
885
886 /* Keep track of our _true_ line number and file pos,
887 * as the lookahead token gives us false values. */
888 static unsigned long ocaLineNumber;
889 static MIOPos ocaFilePosition;
890
891 /* Used to prepare an OCaml tag, just in case there is a need to
892 * add additional information to the tag. */
prepareTag(tagEntryInfo * tag,vString const * name,int kind)893 static void prepareTag (tagEntryInfo * tag, vString const *name, int kind)
894 {
895 int parentIndex;
896
897 initTagEntry (tag, vStringValue (name), kind);
898 /* Ripped out of read.h initTagEntry, because of line number
899 * shenanigans.
900 * Ugh. Lookahead is harder than I expected. */
901 tag->lineNumber = ocaLineNumber;
902 tag->filePosition = ocaFilePosition;
903
904 parentIndex = getLastNamedIndex ();
905 if (parentIndex >= 0)
906 {
907 tag->extensionFields.scopeKindIndex =
908 contextDescription (stack[parentIndex].type);
909 tag->extensionFields.scopeName =
910 vStringValue (stack[parentIndex].contextName);
911 }
912 }
913
914 /* Used to centralise tag creation, and be able to add
915 * more information to it in the future */
addTag(vString * const ident,int kind)916 static void addTag (vString * const ident, int kind)
917 {
918 if (OcamlKinds [kind].enabled && ident != NULL && vStringLength (ident) > 0)
919 {
920 tagEntryInfo toCreate;
921 prepareTag (&toCreate, ident, kind);
922 makeTagEntry (&toCreate);
923 }
924 }
925
926 static bool needStrongPoping = false;
requestStrongPoping(void)927 static void requestStrongPoping ( void )
928 {
929 needStrongPoping = true;
930 }
931
cleanupPreviousParser(void)932 static void cleanupPreviousParser ( void )
933 {
934 if (needStrongPoping)
935 {
936 needStrongPoping = false;
937 popStrongContext ();
938 }
939 }
940
941 /* Due to some circular dependencies, the following functions
942 * must be forward-declared. */
943 static void letParam (vString * const ident, ocaToken what, ocaToken whatNext);
944 static void localScope (vString * const ident, ocaToken what, ocaToken whatNext);
945 static void mayRedeclare (vString * const ident, ocaToken what, ocaToken whatNext);
946 static void typeSpecification (vString * const ident, ocaToken what, ocaToken whatNext);
947
948 /*
949 * Parse a record type
950 * type ident = // parsed previously
951 * {
952 * ident1: type1;
953 * ident2: type2;
954 * }
955 */
typeRecord(vString * const ident,ocaToken what,ocaToken whatNext CTAGS_ATTR_UNUSED)956 static void typeRecord (vString * const ident, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED)
957 {
958 switch (what)
959 {
960 case OcaIDENTIFIER:
961 addTag (ident, K_RECORDFIELD);
962 terminatingToken = Tok_CurlR;
963 waitedToken = Tok_semi;
964 comeAfter = &typeRecord;
965 toDoNext = &tillTokenOrTerminatingOrFallback;
966 break;
967
968 case OcaKEYWORD_mutable:
969 /* ignore it */
970 break;
971
972 case Tok_CurlR:
973 popStrongContext ();
974 // don't pop the module context when going to another expression
975 needStrongPoping = false;
976 toDoNext = &globalScope;
977 break;
978
979 default: /* don't care */
980 break;
981 }
982 }
983
984 /* handle :
985 * exception ExceptionName of ... */
exceptionDecl(vString * const ident,ocaToken what,ocaToken whatNext)986 static void exceptionDecl (vString * const ident, ocaToken what, ocaToken whatNext)
987 {
988 if (what == OcaIDENTIFIER)
989 {
990 addTag (ident, K_EXCEPTION);
991 }
992 else /* probably ill-formed, give back to global scope */
993 {
994 globalScope (ident, what, whatNext);
995 }
996 toDoNext = &globalScope;
997 }
998
999 static tagEntryInfo tempTag;
1000 static vString *tempIdent;
1001
1002 /* Ensure a constructor is not a type path beginning
1003 * with a module */
constructorValidation(vString * const ident,ocaToken what,ocaToken whatNext)1004 static void constructorValidation (vString * const ident, ocaToken what, ocaToken whatNext)
1005 {
1006 switch (what)
1007 {
1008 case Tok_Op: /* if we got a '.' which is an operator */
1009 toDoNext = &globalScope;
1010 popStrongContext ();
1011 needStrongPoping = false;
1012 break;
1013
1014 case OcaKEYWORD_of: /* OK, it must be a constructor :) */
1015 if (vStringLength (tempIdent) > 0)
1016 {
1017 makeTagEntry (&tempTag);
1018 vStringClear (tempIdent);
1019 }
1020 toDoNext = &tillTokenOrFallback;
1021 comeAfter = &typeSpecification;
1022 waitedToken = Tok_Pipe;
1023 break;
1024
1025 case Tok_Pipe: /* OK, it was a constructor :) */
1026 if (vStringLength (tempIdent) > 0)
1027 {
1028 makeTagEntry (&tempTag);
1029 vStringClear (tempIdent);
1030 }
1031 toDoNext = &typeSpecification;
1032 break;
1033
1034 default: /* and mean that we're not facing a module name */
1035 if (vStringLength (tempIdent) > 0)
1036 {
1037 makeTagEntry (&tempTag);
1038 vStringClear (tempIdent);
1039 }
1040 toDoNext = &tillTokenOrFallback;
1041 comeAfter = &typeSpecification;
1042 waitedToken = Tok_Pipe;
1043
1044 popStrongContext ();
1045
1046 // don't pop the module context when going to another expression
1047 needStrongPoping = false;
1048
1049 /* to be sure we use this token */
1050 globalScope (ident, what, whatNext);
1051 }
1052 }
1053
1054 /* Parse beginning of type definition
1055 * type 'avar ident =
1056 * or
1057 * type ('var1, 'var2) ident =
1058 */
typeDecl(vString * const ident,ocaToken what,ocaToken whatNext)1059 static void typeDecl (vString * const ident, ocaToken what, ocaToken whatNext)
1060 {
1061 switch (what)
1062 {
1063 /* parameterized */
1064 case Tok_Prime:
1065 comeAfter = &typeDecl;
1066 toDoNext = &ignoreToken;
1067 break;
1068 /* LOTS of parameters */
1069 case Tok_PARL:
1070 comeAfter = &typeDecl;
1071 waitedToken = Tok_PARR;
1072 toDoNext = &tillToken;
1073 break;
1074
1075 case OcaIDENTIFIER:
1076 addTag (ident, K_TYPE);
1077 // true type declaration
1078 if (whatNext == Tok_EQ)
1079 {
1080 pushStrongContext (ident, ContextType);
1081 requestStrongPoping ();
1082 toDoNext = &typeSpecification;
1083 }
1084 else // we're in a sig
1085 toDoNext = &globalScope;
1086 break;
1087
1088 default:
1089 globalScope (ident, what, whatNext);
1090 }
1091 }
1092
1093 /** handle 'val' signatures in sigs and .mli files
1094 * val ident : String.t -> Val.t
1095 * Eventually, this will do cool things to annotate
1096 * functions with their actual signatures. But for now,
1097 * it's basically globalLet */
val(vString * const ident,ocaToken what,ocaToken whatNext CTAGS_ATTR_UNUSED)1098 static void val (vString * const ident, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED)
1099 {
1100 switch (what)
1101 {
1102 case Tok_PARL:
1103 case OcaKEYWORD_rec:
1104 break;
1105
1106 case Tok_Op:
1107 /* we are defining a new operator, it's a
1108 * function definition */
1109 addTag (ident, K_VAL);
1110 toDoNext = &globalScope;
1111 break;
1112
1113 case Tok_Val: /* Can be a weiiird binding, or an '_' */
1114 case OcaIDENTIFIER:
1115 addTag (ident, K_VAL);
1116 toDoNext = &globalScope; // sig parser ?
1117 break;
1118
1119 default:
1120 toDoNext = &globalScope;
1121 break;
1122 }
1123 }
1124
1125 /* Parse type of kind
1126 * type bidule = Ctor1 of ...
1127 * | Ctor2
1128 * | Ctor3 of ...
1129 * or
1130 * type bidule = | Ctor1 of ... | Ctor2
1131 *
1132 * when type bidule = { ... } is detected,
1133 * let typeRecord handle it. */
typeSpecification(vString * const ident,ocaToken what,ocaToken whatNext CTAGS_ATTR_UNUSED)1134 static void typeSpecification (vString * const ident, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED)
1135 {
1136 switch (what)
1137 {
1138 case OcaIDENTIFIER:
1139 if (isUpperAlpha (vStringChar (ident, 0)))
1140 {
1141 /* here we handle type aliases of type
1142 * type foo = AnotherModule.bar
1143 * AnotherModule can mistakenly be took
1144 * for a constructor. */
1145 if (! OcamlKinds[K_CONSTRUCTOR].enabled)
1146 vStringClear (tempIdent);
1147 else
1148 {
1149 vStringCopy (tempIdent, ident);
1150 prepareTag (&tempTag, tempIdent, K_CONSTRUCTOR);
1151 }
1152 toDoNext = &constructorValidation;
1153 }
1154 else
1155 {
1156 toDoNext = &tillTokenOrFallback;
1157 comeAfter = &typeSpecification;
1158 waitedToken = Tok_Pipe;
1159 }
1160 break;
1161
1162 case OcaKEYWORD_and:
1163 toDoNext = &typeDecl;
1164 break;
1165
1166 case OcaKEYWORD_val:
1167 toDoNext = &val;
1168 break;
1169
1170 case Tok_BRL: /* the '[' & ']' are ignored to accommodate */
1171 case Tok_BRR: /* with the revised syntax */
1172 case Tok_Pipe:
1173 /* just ignore it */
1174 break;
1175
1176 case Tok_CurlL:
1177 toDoNext = &typeRecord;
1178 break;
1179
1180 default: /* don't care */
1181 break;
1182 }
1183 }
1184
1185
1186 static bool dirtySpecialParam = false;
1187
1188 /* parse the ~label and ~label:type parameter */
parseLabel(vString * const ident,ocaToken what,ocaToken whatNext)1189 static void parseLabel (vString * const ident, ocaToken what, ocaToken whatNext)
1190 {
1191 static int parCount = 0;
1192
1193 switch (what)
1194 {
1195 case OcaIDENTIFIER:
1196 if (!dirtySpecialParam)
1197 {
1198 if (exportLocalInfo)
1199 addTag (ident, K_VARIABLE);
1200
1201 dirtySpecialParam = true;
1202 }
1203 break;
1204
1205 case Tok_PARL:
1206 parCount++;
1207 break;
1208
1209 case Tok_PARR:
1210 parCount--;
1211 if (parCount == 0)
1212 toDoNext = &letParam;
1213 break;
1214
1215 case Tok_Op:
1216 if (vStringChar(ident, 0) == ':')
1217 {
1218 toDoNext = &ignoreToken;
1219 comeAfter = &letParam;
1220 }
1221 else if (parCount == 0 && dirtySpecialParam)
1222 {
1223 toDoNext = &letParam;
1224 letParam (ident, what, whatNext);
1225 }
1226 break;
1227
1228 default:
1229 if (parCount == 0 && dirtySpecialParam)
1230 {
1231 toDoNext = &letParam;
1232 letParam (ident, what, whatNext);
1233 }
1234 break;
1235 }
1236 }
1237
1238 /* Optional argument with syntax like this :
1239 * ?(foo = value) */
parseOptionnal(vString * const ident,ocaToken what,ocaToken whatNext CTAGS_ATTR_UNUSED)1240 static void parseOptionnal (vString * const ident, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED)
1241 {
1242 static int parCount = 0;
1243
1244 switch (what)
1245 {
1246 case OcaIDENTIFIER:
1247 if (!dirtySpecialParam)
1248 {
1249 if (exportLocalInfo)
1250 addTag (ident, K_VARIABLE);
1251
1252 dirtySpecialParam = true;
1253
1254 if (parCount == 0)
1255 toDoNext = &letParam;
1256 }
1257 break;
1258
1259 case Tok_PARL:
1260 parCount++;
1261 break;
1262
1263 case Tok_PARR:
1264 parCount--;
1265 if (parCount == 0)
1266 toDoNext = &letParam;
1267 break;
1268
1269 default: /* don't care */
1270 break;
1271 }
1272 }
1273
1274 /** handle let inside functions (so like it's name
1275 * say : local let */
localLet(vString * const ident,ocaToken what,ocaToken whatNext)1276 static void localLet (vString * const ident, ocaToken what, ocaToken whatNext)
1277 {
1278 switch (what)
1279 {
1280 case Tok_PARL:
1281 /* We ignore this token to be able to parse such
1282 * declarations :
1283 * let (ident : type) = ...
1284 */
1285 break;
1286
1287 case OcaKEYWORD_rec:
1288 /* just ignore to be able to parse such declarations:
1289 * let rec ident = ... */
1290 break;
1291
1292 case Tok_Op:
1293 /* we are defining a new operator, it's a
1294 * function definition */
1295 if (exportLocalInfo)
1296 addTag (ident, K_FUNCTION);
1297 pushSoftContext (mayRedeclare, ident, ContextFunction);
1298 toDoNext = &letParam;
1299 break;
1300
1301 case Tok_Val: /* Can be a weiiird binding, or an '_' */
1302 case OcaIDENTIFIER:
1303 // if we're an identifier, and the next token is too, then
1304 // we're definitely a function.
1305 if (whatNext == OcaIDENTIFIER || whatNext == Tok_PARL)
1306 {
1307 if (exportLocalInfo)
1308 addTag (ident, K_FUNCTION);
1309 pushSoftContext (mayRedeclare, ident, ContextFunction);
1310 }
1311 else
1312 {
1313 if (exportLocalInfo)
1314 addTag (ident, K_VARIABLE);
1315 pushSoftContext (mayRedeclare, ident, ContextValue);
1316 }
1317 toDoNext = &letParam;
1318 break;
1319
1320 case OcaKEYWORD_end:
1321 localScope (ident, what, whatNext);
1322 break;
1323
1324 default:
1325 toDoNext = &localScope;
1326 break;
1327 }
1328 }
1329
1330 /* parse :
1331 * | pattern pattern -> ...
1332 * or
1333 * pattern apttern apttern -> ...
1334 * we ignore all identifiers declared in the pattern,
1335 * because their scope is likely to be even more limited
1336 * than the let definitions.
1337 * Used after a match ... with, or a function ...
1338 * because their syntax is similar. */
matchPattern(vString * const ident,ocaToken what,ocaToken whatNext)1339 static void matchPattern (vString * const ident, ocaToken what, ocaToken whatNext)
1340 {
1341 /* keep track of [], as it
1342 * can be used in patterns and can
1343 * mean the end of match expression in
1344 * revised syntax */
1345 static int braceCount = 0;
1346
1347 switch (what)
1348 {
1349 case Tok_To:
1350 pushEmptyContext (&matchPattern);
1351 toDoNext = &mayRedeclare;
1352 break;
1353
1354 case Tok_BRL:
1355 braceCount++;
1356 break;
1357
1358 case OcaKEYWORD_value:
1359 popLastNamed ();
1360 case OcaKEYWORD_and:
1361 case OcaKEYWORD_end:
1362 // why was this global? matches only make sense in local scope
1363 localScope (ident, what, whatNext);
1364 break;
1365
1366 case OcaKEYWORD_in:
1367 popLastNamed ();
1368 break;
1369
1370 default:
1371 break;
1372 }
1373 }
1374
1375 /* Used at the beginning of a new scope (begin of a
1376 * definition, parenthesis...) to catch inner let
1377 * definition that may be in. */
mayRedeclare(vString * const ident,ocaToken what,ocaToken whatNext)1378 static void mayRedeclare (vString * const ident, ocaToken what, ocaToken whatNext)
1379 {
1380 switch (what)
1381 {
1382 case OcaKEYWORD_value:
1383 /* let globalScope handle it */
1384 globalScope (ident, what, whatNext);
1385
1386 case OcaKEYWORD_let:
1387 toDoNext = &localLet;
1388 break;
1389
1390 case OcaKEYWORD_val:
1391 toDoNext = &val;
1392 break;
1393
1394 case OcaKEYWORD_object:
1395 vStringClear (lastClass);
1396 pushContext (ContextStrong, ContextClass,
1397 &localScope, NULL);
1398 needStrongPoping = false;
1399 toDoNext = &globalScope;
1400 break;
1401
1402 case OcaKEYWORD_for:
1403 case OcaKEYWORD_while:
1404 toDoNext = &tillToken;
1405 waitedToken = OcaKEYWORD_do;
1406 comeAfter = &mayRedeclare;
1407 break;
1408
1409 case OcaKEYWORD_try:
1410 toDoNext = &mayRedeclare;
1411 pushSoftContext (&matchPattern, ident, ContextFunction);
1412 break;
1413
1414 case OcaKEYWORD_function:
1415 toDoNext = &matchPattern;
1416 pushSoftContext (&matchPattern, NULL, ContextMatch);
1417 break;
1418
1419 case OcaKEYWORD_fun:
1420 toDoNext = &letParam;
1421 break;
1422
1423 /* Handle the special ;; from the OCaml
1424 * Top level */
1425 case Tok_semi:
1426 default:
1427 toDoNext = &localScope;
1428 localScope (ident, what, whatNext);
1429 }
1430 }
1431
1432 /* parse :
1433 * p1 p2 ... pn = ...
1434 * or
1435 * ?(p1=v) p2 ~p3 ~pn:ja ... = ... */
letParam(vString * const ident,ocaToken what,ocaToken whatNext CTAGS_ATTR_UNUSED)1436 static void letParam (vString * const ident, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED)
1437 {
1438 switch (what)
1439 {
1440 case Tok_To:
1441 case Tok_EQ:
1442 toDoNext = &mayRedeclare;
1443 break;
1444
1445 case OcaIDENTIFIER:
1446 if (exportLocalInfo)
1447 addTag (ident, K_VARIABLE);
1448 break;
1449
1450 case Tok_Op:
1451 switch (vStringChar (ident, 0))
1452 {
1453 case ':':
1454 /*popSoftContext(); */
1455 /* we got a type signature */
1456 comeAfter = &mayRedeclare;
1457 toDoNext = &tillTokenOrFallback;
1458 waitedToken = Tok_EQ;
1459 break;
1460
1461 /* parse something like
1462 * ~varname:type
1463 * or
1464 * ~varname
1465 * or
1466 * ~(varname: long type) */
1467 case '~':
1468 toDoNext = &parseLabel;
1469 dirtySpecialParam = false;
1470 break;
1471
1472 /* Optional argument with syntax like this :
1473 * ?(bla = value)
1474 * or
1475 * ?bla */
1476 case '?':
1477 toDoNext = &parseOptionnal;
1478 dirtySpecialParam = false;
1479 break;
1480
1481 default:
1482 break;
1483 }
1484 break;
1485
1486 default: /* don't care */
1487 break;
1488 }
1489 }
1490
1491 /* parse object ...
1492 * used to be sure the class definition is not a type
1493 * alias */
classSpecif(vString * const ident CTAGS_ATTR_UNUSED,ocaToken what,ocaToken whatNext CTAGS_ATTR_UNUSED)1494 static void classSpecif (vString * const ident CTAGS_ATTR_UNUSED, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED)
1495 {
1496 switch (what)
1497 {
1498 case OcaKEYWORD_object:
1499 pushStrongContext (lastClass, ContextClass);
1500 toDoNext = &globalScope;
1501 break;
1502
1503 default:
1504 vStringClear (lastClass);
1505 toDoNext = &globalScope;
1506 }
1507 }
1508
1509 /* Handle a method ... class declaration.
1510 * nearly a copy/paste of globalLet. */
methodDecl(vString * const ident,ocaToken what,ocaToken whatNext)1511 static void methodDecl (vString * const ident, ocaToken what, ocaToken whatNext)
1512 {
1513 switch (what)
1514 {
1515 case Tok_PARL:
1516 /* We ignore this token to be able to parse such
1517 * declarations :
1518 * let (ident : type) = ... */
1519 break;
1520
1521 case OcaKEYWORD_mutable:
1522 case OcaKEYWORD_virtual:
1523 case OcaKEYWORD_rec:
1524 /* just ignore to be able to parse such declarations:
1525 * let rec ident = ... */
1526 break;
1527
1528 case OcaIDENTIFIER:
1529 addTag (ident, K_METHOD);
1530 /* Normal pushing to get good subs */
1531 pushStrongContext (ident, ContextMethod);
1532 /*pushSoftContext( globalScope, ident, ContextMethod ); */
1533 toDoNext = &letParam;
1534 break;
1535
1536 case OcaKEYWORD_end:
1537 localScope (ident, what, whatNext);
1538 break;
1539
1540 default:
1541 toDoNext = &globalScope;
1542 break;
1543 }
1544 }
1545
1546 /* name of the last module, used for
1547 * context stacking. */
1548 static vString *lastModule;
1549
1550 /* parse
1551 * ... struct (* new global scope *) end
1552 * or
1553 * ... sig (* new global scope *) end
1554 * or
1555 * functor ... -> moduleSpecif
1556 */
moduleSpecif(vString * const ident,ocaToken what,ocaToken whatNext)1557 static void moduleSpecif (vString * const ident, ocaToken what, ocaToken whatNext)
1558 {
1559 switch (what)
1560 {
1561 case OcaKEYWORD_functor:
1562 toDoNext = &contextualTillToken;
1563 waitedToken = Tok_To;
1564 comeAfter = &moduleSpecif;
1565 break;
1566
1567 case OcaKEYWORD_struct:
1568 case OcaKEYWORD_sig:
1569 pushStrongContext (lastModule, ContextModule);
1570 toDoNext = &globalScope;
1571 needStrongPoping = false;
1572 break;
1573
1574 case Tok_PARL: /* ( */
1575 toDoNext = &contextualTillToken;
1576 comeAfter = &globalScope;
1577 waitedToken = Tok_PARR;
1578 contextualTillToken (ident, what, whatNext);
1579 break;
1580
1581 case Tok_Of:
1582 case Tok_EQ:
1583 break;
1584
1585 default:
1586 vStringClear (lastModule);
1587 toDoNext = &globalScope;
1588 break;
1589 }
1590 }
1591
1592 /* parse :
1593 * module name = ...
1594 * then pass the token stream to moduleSpecif */
moduleDecl(vString * const ident,ocaToken what,ocaToken whatNext)1595 static void moduleDecl (vString * const ident, ocaToken what, ocaToken whatNext)
1596 {
1597 switch (what)
1598 {
1599 case OcaKEYWORD_rec:
1600 /* recursive modules are _weird_, but they happen */
1601 case OcaKEYWORD_type:
1602 /* this is technically a special type, but whatever */
1603 break;
1604
1605 case OcaIDENTIFIER:
1606 addTag (ident, K_MODULE);
1607 vStringCopy (lastModule, ident);
1608 if (whatNext == Tok_Of || whatNext == Tok_EQ)
1609 toDoNext = &moduleSpecif;
1610 else
1611 {
1612 // default to waiting on a '=' since
1613 // module M : sig ... end = struct ... end
1614 // is rarer
1615 waitedToken = Tok_EQ;
1616 comeAfter = &moduleSpecif;
1617 toDoNext = &contextualTillToken;
1618 }
1619 break;
1620
1621 default: /* don't care */
1622 break;
1623 }
1624 }
1625
1626 /* parse :
1627 * class name = ...
1628 * or
1629 * class virtual ['a,'b] classname = ... */
classDecl(vString * const ident,ocaToken what,ocaToken whatNext CTAGS_ATTR_UNUSED)1630 static void classDecl (vString * const ident, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED)
1631 {
1632 switch (what)
1633 {
1634 case OcaIDENTIFIER:
1635 addTag (ident, K_CLASS);
1636 vStringCopy (lastClass, ident);
1637 toDoNext = &contextualTillToken;
1638 waitedToken = Tok_EQ;
1639 comeAfter = &classSpecif;
1640 break;
1641
1642 case Tok_BRL:
1643 toDoNext = &tillToken;
1644 waitedToken = Tok_BRR;
1645 comeAfter = &classDecl;
1646 break;
1647
1648 default:
1649 break;
1650 }
1651 }
1652
1653 /* Handle a global
1654 * let ident ...
1655 * or
1656 * let rec ident ... */
globalLet(vString * const ident,ocaToken what,ocaToken whatNext)1657 static void globalLet (vString * const ident, ocaToken what, ocaToken whatNext)
1658 {
1659 switch (what)
1660 {
1661 case Tok_PARL:
1662 /* We ignore this token to be able to parse such
1663 * declarations :
1664 * let (ident : type) = ...
1665 * but () is the toplevel function name, so fake ourselves
1666 * as an ident and make a new function */
1667 if (whatNext == Tok_PARR)
1668 {
1669 vString *fakeIdent = vStringNewInit ("()");
1670 addTag (fakeIdent, K_FUNCTION);
1671 pushStrongContext (fakeIdent, ContextFunction);
1672 vStringDelete (fakeIdent);
1673 requestStrongPoping ();
1674 toDoNext = &letParam;
1675 }
1676 break;
1677
1678 case OcaKEYWORD_mutable:
1679 case OcaKEYWORD_virtual:
1680 case OcaKEYWORD_rec:
1681 /* just ignore to be able to parse such declarations:
1682 * let rec ident = ... */
1683 break;
1684
1685 case Tok_Op:
1686 /* we are defining a new operator, it's a
1687 * function definition */
1688 addTag (ident, K_FUNCTION);
1689 pushStrongContext (ident, ContextFunction);
1690 toDoNext = &letParam;
1691 break;
1692
1693 case Tok_Val:
1694 if (vStringValue (ident)[0] == '_')
1695 addTag (ident, K_FUNCTION);
1696 pushStrongContext (ident, ContextFunction);
1697 requestStrongPoping ();
1698 toDoNext = &letParam;
1699 break;
1700
1701 case OcaIDENTIFIER:
1702 // if we're an identifier, and the next token is too, then
1703 // we're definitely a function.
1704 if (whatNext == OcaIDENTIFIER || whatNext == Tok_PARL)
1705 {
1706 addTag (ident, K_FUNCTION);
1707 pushStrongContext (ident, ContextFunction);
1708 }
1709 else
1710 {
1711 addTag (ident, K_VARIABLE);
1712 pushStrongContext (ident, ContextValue);
1713 }
1714 requestStrongPoping ();
1715 toDoNext = &letParam;
1716 break;
1717
1718 case OcaKEYWORD_end:
1719 globalScope (ident, what, whatNext);
1720 break;
1721
1722 default:
1723 toDoNext = &globalScope;
1724 break;
1725 }
1726 }
1727
1728 /* Handle the "strong" top levels, all 'big' declarations
1729 * happen here */
globalScope(vString * const ident CTAGS_ATTR_UNUSED,ocaToken what,ocaToken whatNext)1730 static void globalScope (vString * const ident CTAGS_ATTR_UNUSED, ocaToken what, ocaToken whatNext)
1731 {
1732 /* Do not touch, this is used only by the global scope
1733 * to handle an 'and' */
1734 static parseNext previousParser = &globalScope;
1735
1736 switch (what)
1737 {
1738 case OcaKEYWORD_and:
1739 cleanupPreviousParser ();
1740 // deal with module M = struct ... end _and_ N = struct ... end
1741 toDoNext = previousParser;
1742 break;
1743
1744 case OcaKEYWORD_type:
1745 cleanupPreviousParser ();
1746 toDoNext = &typeDecl;
1747 previousParser = &typeDecl;
1748 break;
1749
1750 case OcaKEYWORD_class:
1751 cleanupPreviousParser ();
1752 toDoNext = &classDecl;
1753 previousParser = &classDecl;
1754 break;
1755
1756 case OcaKEYWORD_module:
1757 cleanupPreviousParser ();
1758 toDoNext = &moduleDecl;
1759 previousParser = &moduleDecl;
1760 break;
1761
1762 case OcaKEYWORD_end:;
1763 contextType popped = killCurrentState ();
1764
1765 /** so here, end can legally be followed by = or and in the
1766 * situation of
1767 * module M : sig ... end = struct ... end and
1768 * module M struct ... end and N = struct ... end
1769 * and we need to make sure we know we're still inside of a
1770 * struct */
1771 if (whatNext == Tok_EQ && popped == ContextModule)
1772 {
1773 previousParser = &moduleDecl;
1774 toDoNext = &moduleSpecif;
1775 }
1776 else if (whatNext == OcaKEYWORD_and && popped == ContextModule)
1777 toDoNext = &moduleDecl;
1778 needStrongPoping = false;
1779 break;
1780
1781 case OcaKEYWORD_method:
1782 cleanupPreviousParser ();
1783 toDoNext = &methodDecl;
1784 /* and is not allowed in methods */
1785 break;
1786
1787 case OcaKEYWORD_val:
1788 toDoNext = &val;
1789 /* and is not allowed in sigs */
1790 break;
1791
1792 case OcaKEYWORD_let:
1793 cleanupPreviousParser ();
1794 toDoNext = &globalLet;
1795 previousParser = &globalLet;
1796 break;
1797
1798 case OcaKEYWORD_exception:
1799 cleanupPreviousParser ();
1800 toDoNext = &exceptionDecl;
1801 previousParser = &globalScope;
1802 break;
1803
1804 /* must be a #line directive, discard the
1805 * whole line. */
1806 case Tok_Sharp:
1807 /* ignore */
1808 break;
1809
1810 default:
1811 /* we don't care */
1812 break;
1813 }
1814 }
1815
1816 /* Parse expression. Well ignore it is more the case,
1817 * ignore all tokens except "shocking" keywords */
localScope(vString * const ident,ocaToken what,ocaToken whatNext)1818 static void localScope (vString * const ident, ocaToken what, ocaToken whatNext)
1819 {
1820 switch (what)
1821 {
1822
1823 // we're probably in a match, so let's go to the last one
1824 case Tok_Pipe:
1825 jumpToMatchContext ();
1826 break;
1827
1828 case Tok_PARR:
1829 case Tok_BRR:
1830 case Tok_CurlR:
1831 popSoftContext ();
1832 break;
1833
1834 /* Everything that `begin` has an `end`
1835 * as end is overloaded and signal many end
1836 * of things, we add an empty strong context to
1837 * avoid problem with the end.
1838 */
1839 case OcaKEYWORD_begin:
1840 pushContext (ContextStrong, ContextBlock, &mayRedeclare, NULL);
1841 toDoNext = &mayRedeclare;
1842 break;
1843
1844 /* An in keyword signals the end of the previous context and the
1845 * start of a new one. */
1846 case OcaKEYWORD_in:
1847 popLastNamed ();
1848 pushEmptyContext (&localScope);
1849 toDoNext = &mayRedeclare;
1850 break;
1851
1852 /* Ok, we got a '{', which is much likely to create
1853 * a record. We cannot treat it like other [ && (,
1854 * because it may contain the 'with' keyword and screw
1855 * everything else. */
1856 case Tok_CurlL:
1857 toDoNext = &contextualTillToken;
1858 waitedToken = Tok_CurlR;
1859 comeAfter = &localScope;
1860 contextualTillToken (ident, what, whatNext);
1861 break;
1862
1863 /* Yeah imperative feature of OCaml,
1864 * a ';' like in C */
1865 case Tok_semi:
1866 /* ';;' case should end all scopes */
1867 if (whatNext == Tok_semi)
1868 {
1869 popStrongContext ();
1870 toDoNext = &globalScope;
1871 break;
1872 } /* else fallthrough */
1873
1874 /* Every standard operator has very high precedence
1875 * e.g. expr * expr needs no parentheses */
1876 case Tok_Op:
1877 toDoNext = &mayRedeclare;
1878 break;
1879
1880 case Tok_PARL:
1881 case Tok_BRL:
1882 pushEmptyContext (&localScope);
1883 toDoNext = &mayRedeclare;
1884 break;
1885
1886 case OcaKEYWORD_and:
1887 if (toDoNext == &mayRedeclare)
1888 {
1889 popSoftContext ();
1890 pushEmptyContext (localScope);
1891 toDoNext = &localLet;
1892 }
1893 else
1894 {
1895 /* a local 'and' keyword jumps up a context to the last
1896 * named. For ex
1897 * in `with let IDENT ... and IDENT2 ...` ident and
1898 * ident2 are on
1899 * same level, the same as `let IDENT ... in let IDENT2
1900 * ...`
1901 * a 'let' is the only 'and'-chainable construct allowed
1902 * locally
1903 * (thus we had to be one to get here), so we either go
1904 * to
1905 * globalLet or localLet depending on our scope. */
1906 popLastNamed ();
1907 toDoNext = stackIndex == 0 ? &globalLet : &localLet;
1908 }
1909 break;
1910
1911 case OcaKEYWORD_else:
1912 case OcaKEYWORD_then:
1913 popSoftContext ();
1914 pushEmptyContext (&localScope);
1915 toDoNext = &mayRedeclare;
1916 break;
1917
1918 case OcaKEYWORD_if:
1919 pushEmptyContext (&localScope);
1920 toDoNext = &mayRedeclare;
1921 break;
1922
1923 case OcaKEYWORD_match:
1924 pushEmptyContext (&localScope);
1925 toDoNext = &mayRedeclare;
1926 break;
1927
1928 case OcaKEYWORD_with:
1929 popSoftContext ();
1930 toDoNext = &matchPattern;
1931 pushSoftContext (&matchPattern, NULL, ContextMatch);
1932 break;
1933
1934 case OcaKEYWORD_fun:
1935 toDoNext = &letParam;
1936 break;
1937
1938 case OcaKEYWORD_done:
1939 /* doesn't care */
1940 break;
1941
1942 default:
1943 requestStrongPoping ();
1944 globalScope (ident, what, whatNext);
1945 break;
1946 }
1947 }
1948
1949 /*////////////////////////////////////////////////////////////////
1950 //// Deal with the system */
1951 /* in OCaml the file name is the module name used in the language
1952 * with it first letter put in upper case */
computeModuleName(void)1953 static void computeModuleName ( void )
1954 {
1955 /* in OCaml the file name define a module.
1956 * so we define a module if the file has
1957 * things in it. =)
1958 */
1959 const char *filename = getInputFileName ();
1960
1961 int beginIndex = 0;
1962 int endIndex = strlen (filename) - 1;
1963 vString *moduleName = vStringNew ();
1964
1965 while (filename[endIndex] != '.' && endIndex > 0)
1966 endIndex--;
1967
1968 /* avoid problem with path in front of filename */
1969 beginIndex = endIndex;
1970 while (beginIndex > 0)
1971 {
1972 if (filename[beginIndex] == '\\' || filename[beginIndex] == '/')
1973 {
1974 beginIndex++;
1975 break;
1976 }
1977
1978 beginIndex--;
1979 }
1980
1981 vStringNCopyS (moduleName, &filename[beginIndex], endIndex - beginIndex);
1982
1983 if (isLowerAlpha (vStringChar (moduleName, 0)))
1984 vStringChar (moduleName, 0) += ('A' - 'a');
1985
1986 addTag (moduleName, K_MODULE);
1987 vStringDelete (moduleName);
1988 }
1989
1990 /* Allocate all string of the context stack */
initStack(void)1991 static void initStack ( void )
1992 {
1993 int i;
1994 for (i = 0; i < OCAML_MAX_STACK_SIZE; ++i)
1995 stack[i].contextName = vStringNew ();
1996 stackIndex = 0;
1997 }
1998
clearStack(void)1999 static void clearStack ( void )
2000 {
2001 int i;
2002 for (i = 0; i < OCAML_MAX_STACK_SIZE; ++i)
2003 vStringDelete (stack[i].contextName);
2004 }
2005
findOcamlTags(void)2006 static void findOcamlTags (void)
2007 {
2008 lexingState st;
2009 ocaToken tok;
2010
2011 /* One-token lookahead gives us the ability to
2012 * do much more accurate analysis */
2013 lexingState nextSt;
2014 ocaToken nextTok;
2015
2016 initStack ();
2017
2018 tempIdent = vStringNew ();
2019 lastModule = vStringNew ();
2020 lastClass = vStringNew ();
2021 vString *temp_cp = vStringNew ();
2022
2023 nextSt.name = vStringNew ();
2024 nextSt.cp = readLineFromInputFile ();
2025 ocaLineNumber = getInputLineNumber();
2026 ocaFilePosition = getInputFilePosition();
2027 toDoNext = &globalScope;
2028 nextTok = lex (&nextSt);
2029
2030 if (nextTok != Tok_EOF)
2031 computeModuleName ();
2032
2033 /* prime the lookahead token */
2034 st = nextSt; // preserve the old state for our first token
2035 st.name = vStringNewCopy (st.name);
2036 st.cp = (const unsigned char *) vStringValue (temp_cp);
2037 tok = nextTok;
2038 ocaLineNumber = getInputLineNumber(); /* ??? getSourceLineNumber() */
2039 ocaFilePosition = getInputFilePosition();
2040 nextTok = lex (&nextSt);
2041
2042 /* main loop */
2043 while (tok != Tok_EOF)
2044 {
2045 (*toDoNext) (st.name, tok, nextTok);
2046
2047 tok = nextTok;
2048 ocaLineNumber = getInputLineNumber(); /* ??? */
2049 ocaFilePosition = getInputFilePosition();
2050
2051 if (nextTok != Tok_EOF)
2052 {
2053 vStringCopyS (temp_cp, (const char *) nextSt.cp);
2054 st.cp = (const unsigned char *) vStringValue (temp_cp);
2055 vStringCopy (st.name, nextSt.name);
2056 nextTok = lex (&nextSt);
2057 }
2058 else
2059 break;
2060 }
2061
2062 vStringDelete (st.name);
2063 vStringDelete (nextSt.name);
2064 vStringDelete (temp_cp);
2065 vStringDelete (tempIdent);
2066 vStringDelete (lastModule);
2067 vStringDelete (lastClass);
2068 clearStack ();
2069 }
2070
ocamlInitialize(const langType language)2071 static void ocamlInitialize (const langType language)
2072 {
2073 Lang_Ocaml = language;
2074
2075 initOperatorTable ();
2076 }
2077
OcamlParser(void)2078 extern parserDefinition *OcamlParser (void)
2079 {
2080 static const char *const extensions[] = { "ml", "mli", "aug", NULL };
2081 static const char *const aliases[] = { "tuareg", /* mode name of emacs */
2082 "caml", /* mode name of emacs */
2083 NULL };
2084 parserDefinition *def = parserNew ("OCaml");
2085 def->kindTable = OcamlKinds;
2086 def->kindCount = ARRAY_SIZE (OcamlKinds);
2087 def->extensions = extensions;
2088 def->aliases = aliases;
2089 def->parser = findOcamlTags;
2090 def->initialize = ocamlInitialize;
2091 def->keywordTable = OcamlKeywordTable;
2092 def->keywordCount = ARRAY_SIZE (OcamlKeywordTable);
2093 return def;
2094 }
2095