1 /*
2 * Copyright (c) 2000-2003, Darren Hiebert
3 *
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
6 *
7 * This module contains functions for applying regular expression matching.
8 *
9 * The code for utilizing the Gnu regex package with regards to processing the
10 * regex option and checking for regex matches was adapted from routines in
11 * Gnu etags.
12 */
13
14 /*
15 * INCLUDE FILES
16 */
17 #include "general.h" /* must always come first */
18
19 #include <string.h>
20
21 #include <ctype.h>
22 #include <stddef.h>
23 #ifdef HAVE_SYS_TYPES_H
24 # include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
25 #endif
26
27 #include <inttypes.h>
28
29 #include "debug.h"
30 #include "colprint_p.h"
31 #include "entry_p.h"
32 #include "field_p.h"
33 #include "flags_p.h"
34 #include "htable.h"
35 #include "kind.h"
36 #include "options.h"
37 #include "optscript.h"
38 #include "parse_p.h"
39 #include "promise.h"
40 #include "read.h"
41 #include "read_p.h"
42 #include "routines.h"
43 #include "routines_p.h"
44 #include "script_p.h"
45 #include "trace.h"
46 #include "trashbox.h"
47 #include "xtag_p.h"
48
49 static bool regexAvailable = false;
50
51 /*
52 * MACROS
53 */
54
55 /* The max depth of taction=enter/leave stack */
56 #define MTABLE_STACK_MAX_DEPTH 64
57
58 /* How many times ctags allows a mtable parser
59 stays at the same input position across table switching.
60
61 The value is derived from MTABLE_STACK_MAX_DEPTH.
62 No deep meaning is in that. It just for simplifying
63 Tmain cases. */
64 #define MTABLE_MOTIONLESS_MAX (MTABLE_STACK_MAX_DEPTH + 1)
65
66 #define DEFAULT_REGEX_BACKEND "e"
67
68 /*
69 * DATA DECLARATIONS
70 */
71
72 enum pType { PTRN_TAG, PTRN_CALLBACK };
73
74 enum scopeAction {
75 SCOPE_REF = 1UL << 0,
76 SCOPE_POP = 1UL << 1,
77 SCOPE_PUSH = 1UL << 2,
78 SCOPE_CLEAR = 1UL << 3,
79 SCOPE_REF_AFTER_POP = 1UL << 4,
80 SCOPE_PLACEHOLDER = 1UL << 5,
81 };
82
83 enum tableAction {
84 TACTION_NOP,
85 TACTION_ENTER, /* {tenter=N} */
86 TACTION_LEAVE, /* {tleave} */
87 TACTION_JUMP, /* {tjump=N} */
88 TACTION_RESET, /* {treset=N} */
89 TACTION_QUIT, /* {tquit} */
90 };
91
92 struct fieldPattern {
93 fieldType ftype;
94 const char *template;
95 };
96
97 struct boundarySpec {
98 int patternGroup;
99 bool fromStartOfGroup;
100 bool placeholder;
101 };
102
103 struct guestLangSpec {
104 enum guestLangSpecType {
105 GUEST_LANG_UNKNOWN,
106 GUEST_LANG_PLACEHOLDER, /* _ */
107 GUEST_LANG_STATIC_LANGNAME, /* C, Python,... */
108 GUEST_LANG_PTN_GROUP_FOR_LANGNAME, /* \1, \2, ..., \9 */
109 GUEST_LANG_PTN_GROUP_FOR_FILEMAP, /* *1, *2, ... *9 */
110 } type;
111 union {
112 langType lang;
113 int patternGroup;
114 } spec;
115 };
116
117 struct guestSpec {
118 struct guestLangSpec lang;
119 #define BOUNDARY_START 0
120 #define BOUNDARY_END 1
121 struct boundarySpec boundary[2];
122 };
123
124 struct mGroupSpec {
125 #define NO_MULTILINE -1
126 int forLineNumberDetermination;
127 int forNextScanning;
128 /* true => start, false => end */
129 bool nextFromStart;
130 };
131
132 struct mTableActionSpec {
133 enum tableAction action;
134 struct regexTable *table;
135
136 /* used when action == TACTION_ENTER */
137 struct regexTable *continuation_table;
138 };
139
140 typedef struct {
141 regexCompiledCode pattern;
142 enum pType type;
143 bool exclusive;
144 bool accept_empty_name;
145 union {
146 struct {
147 int kindIndex;
148 roleBitsType roleBits;
149 char *name_pattern;
150 } tag;
151 struct {
152 regexCallback function;
153 void *userData;
154 } callback;
155 } u;
156 unsigned int scopeActions;
157 bool *disabled;
158
159 enum regexParserType regptype;
160 struct mGroupSpec mgroup;
161 struct guestSpec guest;
162 struct mTableActionSpec taction;
163
164 int xtagType;
165 ptrArray *fieldPatterns;
166
167 char *pattern_string;
168
169 char *anonymous_tag_prefix;
170
171 struct {
172 errorSelection selection;
173 char *message_string;
174 } message;
175
176 char *optscript_src;
177 EsObject *optscript;
178
179 int refcount;
180 } regexPattern;
181
182
183 typedef struct {
184 /* the pattern can be shared among entries using a refcount */
185 regexPattern *pattern;
186
187 /* but the statistics are per-table-entry */
188 struct {
189 unsigned int match;
190 unsigned int unmatch;
191 } statistics;
192 } regexTableEntry;
193
194
195 #define TABLE_INDEX_UNUSED -1
196 struct regexTable {
197 char *name;
198 ptrArray *entries;
199 };
200
201 struct boundaryInRequest {
202 bool offset_set;
203 off_t offset;
204 };
205
206 struct guestRequest {
207 bool lang_set;
208 langType lang;
209
210 struct boundaryInRequest boundary[2];
211 };
212
213 typedef struct {
214 const char *line;
215 const char *start;
216 const regexPattern* const patbuf;
217 const regmatch_t* const pmatch;
218 int nmatch;
219 struct mTableActionSpec taction;
220 bool advanceto;
221 unsigned int advanceto_delta;
222 } scriptWindow;
223
224 struct lregexControlBlock {
225 int currentScope;
226 ptrArray *entries [2];
227
228 ptrArray *tables;
229 ptrArray *tstack;
230
231 struct guestRequest *guest_req;
232
233 EsObject *local_dict;
234
235 ptrArray *hook[SCRIPT_HOOK_MAX];
236 ptrArray *hook_code[SCRIPT_HOOK_MAX];
237
238 langType owner;
239
240 scriptWindow *window;
241 };
242
243 /*
244 * DATA DEFINITIONS
245 */
246 static OptVM *optvm;
247 static EsObject *lregex_dict = es_nil;
248
249 /*
250 * FUNCTION DEFINITIONS
251 */
252 static int getTableIndexForName (const struct lregexControlBlock *const lcb, const char *name);
253 static void deletePattern (regexPattern *p);
254 static int makePromiseForAreaSpecifiedWithOffsets (const char *parser,
255 off_t startOffset,
256 off_t endOffset);
257
258 static struct guestRequest *guestRequestNew (void);
259 static void guestRequestDelete (struct guestRequest *);
260 static bool guestRequestIsFilled(struct guestRequest *);
261 static void guestRequestClear (struct guestRequest *);
262 static void guestRequestSubmit (struct guestRequest *);
263
264 static EsObject *scriptRead (OptVM *vm, const char *src);
265 static void scriptSetup (OptVM *vm, struct lregexControlBlock *lcb, int corkIndex, scriptWindow *window);
266 static EsObject* scriptEval (OptVM *vm, EsObject *optscript);
267 static void scriptEvalHook (OptVM *vm, struct lregexControlBlock *lcb, enum scriptHook hook);
268 static void scriptTeardown (OptVM *vm, struct lregexControlBlock *lcb);
269
270 static char* make_match_string (scriptWindow *window, int group);
271 static matchLoc *make_mloc (scriptWindow *window, int group, bool start);
272
deleteTable(void * ptrn)273 static void deleteTable (void *ptrn)
274 {
275 struct regexTable *t = ptrn;
276
277 ptrArrayDelete (t->entries);
278 eFree (t->name);
279 eFree (t);
280 }
281
deleteTableEntry(void * ptrn)282 static void deleteTableEntry (void *ptrn)
283 {
284 regexTableEntry *e = ptrn;
285 Assert (e && e->pattern);
286 deletePattern (e->pattern);
287 eFree (e);
288 }
289
deletePattern(regexPattern * p)290 static void deletePattern (regexPattern *p)
291 {
292 p->refcount--;
293
294 if (p->refcount > 0)
295 return;
296
297 p->pattern.backend->delete_code (p->pattern.code);
298
299 if (p->type == PTRN_TAG)
300 {
301 eFree (p->u.tag.name_pattern);
302 p->u.tag.name_pattern = NULL;
303 }
304
305 if (p->fieldPatterns)
306 {
307 ptrArrayDelete (p->fieldPatterns);
308 p->fieldPatterns = NULL;
309 }
310
311 eFree (p->pattern_string);
312
313 if (p->message.message_string)
314 eFree (p->message.message_string);
315
316 if (p->anonymous_tag_prefix)
317 eFree (p->anonymous_tag_prefix);
318
319 if (p->optscript)
320 es_object_unref (p->optscript);
321 if (p->optscript_src)
322 eFree (p->optscript_src);
323
324 eFree (p);
325 }
326
clearPatternSet(struct lregexControlBlock * lcb)327 static void clearPatternSet (struct lregexControlBlock *lcb)
328 {
329 ptrArrayClear (lcb->entries [REG_PARSER_SINGLE_LINE]);
330 ptrArrayClear (lcb->entries [REG_PARSER_MULTI_LINE]);
331 ptrArrayClear (lcb->tables);
332 }
333
allocLregexControlBlock(parserDefinition * parser)334 extern struct lregexControlBlock* allocLregexControlBlock (parserDefinition *parser)
335 {
336 struct lregexControlBlock *lcb = xCalloc (1, struct lregexControlBlock);
337
338 lcb->entries[REG_PARSER_SINGLE_LINE] = ptrArrayNew(deleteTableEntry);
339 lcb->entries[REG_PARSER_MULTI_LINE] = ptrArrayNew(deleteTableEntry);
340 lcb->tables = ptrArrayNew(deleteTable);
341 lcb->tstack = ptrArrayNew(NULL);
342 lcb->guest_req = guestRequestNew ();
343 lcb->local_dict = es_nil;
344
345 for (int i = 0; i< SCRIPT_HOOK_MAX; i++)
346 {
347 lcb->hook[i] = ptrArrayNew (eFree);
348 lcb->hook_code[i] = ptrArrayNew ((ptrArrayDeleteFunc)es_object_unref);
349 }
350 lcb->owner = parser->id;
351
352 return lcb;
353 }
354
freeLregexControlBlock(struct lregexControlBlock * lcb)355 extern void freeLregexControlBlock (struct lregexControlBlock* lcb)
356 {
357 clearPatternSet (lcb);
358
359 ptrArrayDelete (lcb->entries [REG_PARSER_SINGLE_LINE]);
360 lcb->entries [REG_PARSER_SINGLE_LINE] = NULL;
361 ptrArrayDelete (lcb->entries [REG_PARSER_MULTI_LINE]);
362 lcb->entries [REG_PARSER_MULTI_LINE] = NULL;
363
364 ptrArrayDelete (lcb->tables);
365 lcb->tables = NULL;
366
367 ptrArrayDelete (lcb->tstack);
368 lcb->tstack = NULL;
369
370 guestRequestDelete (lcb->guest_req);
371 lcb->guest_req = NULL;
372
373 es_object_unref (lcb->local_dict);
374 lcb->local_dict = es_nil;
375
376 for (int i = 0; i < SCRIPT_HOOK_MAX; i++)
377 {
378 ptrArrayDelete (lcb->hook[i]);
379 lcb->hook[i] = NULL;
380
381 ptrArrayDelete (lcb->hook_code[i]);
382 lcb->hook_code[i] = NULL;
383 }
384
385 eFree (lcb);
386 }
387
388 /*
389 * Regex pseudo-parser
390 */
391
initRegexTag(tagEntryInfo * e,const char * name,int kindIndex,int roleIndex,int scopeIndex,int placeholder,unsigned long line,MIOPos * pos,int xtag_type)392 static void initRegexTag (tagEntryInfo *e,
393 const char * name, int kindIndex, int roleIndex, int scopeIndex, int placeholder,
394 unsigned long line, MIOPos *pos, int xtag_type)
395 {
396 Assert (name != NULL && ((name[0] != '\0') || placeholder));
397 initRefTagEntry (e, name, kindIndex, roleIndex);
398 e->extensionFields.scopeIndex = scopeIndex;
399 e->placeholder = !!placeholder;
400 if (line)
401 {
402 e->lineNumber = line;
403 e->filePosition = *pos;
404 }
405
406 if (xtag_type != XTAG_UNKNOWN)
407 markTagExtraBit (e, xtag_type);
408 }
409
410 /*
411 * Regex pattern definition
412 */
413
414 /* Take a string like "/blah/" and turn it into "blah", making sure
415 * that the first and last characters are the same, and handling
416 * quoted separator characters. Actually, stops on the occurrence of
417 * an unquoted separator. Also turns "\t" into a Tab character.
418 * Turns "\n" into a Newline character if MULTILINE is true.
419 * Returns pointer to terminating separator. Works in place. Null
420 * terminates name string.
421 */
scanSeparators(char * name,bool multiline)422 static char* scanSeparators (char* name, bool multiline)
423 {
424 char sep = name [0];
425 char *copyto = name;
426 bool quoted = false;
427
428 for (++name ; *name != '\0' ; ++name)
429 {
430 if (quoted)
431 {
432 if (*name == sep)
433 *copyto++ = sep;
434 else if (*name == 't')
435 *copyto++ = '\t';
436 else if (multiline && *name == 'n')
437 *copyto++ = '\n';
438 else
439 {
440 /* Something else is quoted, so preserve the quote. */
441 *copyto++ = '\\';
442 *copyto++ = *name;
443 }
444 quoted = false;
445 }
446 else if (*name == '\\')
447 quoted = true;
448 else if (*name == sep)
449 {
450 break;
451 }
452 else
453 *copyto++ = *name;
454 }
455 *copyto = '\0';
456 return name;
457 }
458
459 /* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
460 * character is whatever the first character of `regexp' is), by breaking it
461 * up into null terminated strings, removing the separators, and expanding
462 * '\t' into tabs. When complete, `regexp' points to the line matching
463 * pattern, a pointer to the name matching pattern is written to `name', a
464 * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
465 * to the trailing flags is written to `flags'. If the pattern is not in the
466 * correct format, a false value is returned.
467 */
parseTagRegex(enum regexParserType regptype,char * const regexp,char ** const name,char ** const kinds,char ** const flags)468 static bool parseTagRegex (
469 enum regexParserType regptype,
470 char* const regexp, char** const name,
471 char** const kinds, char** const flags)
472 {
473 bool result = false;
474 const int separator = (unsigned char) regexp [0];
475
476 *name = scanSeparators (regexp, (regptype == REG_PARSER_MULTI_LINE
477 || regptype == REG_PARSER_MULTI_TABLE));
478 if (*regexp == '\0')
479 error (WARNING, "empty regexp");
480 else if (**name != separator)
481 error (WARNING, "%s: incomplete regexp", regexp);
482 else
483 {
484 char* const third = scanSeparators (*name, false);
485 if (**name != '\0' && (*name) [strlen (*name) - 1] == '\\')
486 error (WARNING, "error in name pattern: \"%s\"", *name);
487 if (*third != separator)
488 error (WARNING, "%s: regexp missing final separator", regexp);
489 else
490 {
491 /*
492 * first----------V third------------V
493 * --regex-<LANG>=/regexp/replacement/[kind-spec/][flags][{{\n...\n}}]
494 * second----------------^ fourth---------------^
495 */
496
497 /*
498 * The following code assumes "{{\n" is never used in flags.
499 * If the input comes from the command line or an optlib file,
500 * this assumption is always correct; a new line character is never
501 * put at the middle (or end) of the input.
502 *
503 * TODO: How about the input comes from the source code translated
504 * by optlib2c?
505 */
506 char *script = strstr (third, "{{\n");
507 if (script)
508 {
509 /* The script part should not be unescaed by scanSeparators().
510 * By spitting the string, we can hide the script part from
511 * scanSeparators(). */
512 script [0] = '\0';
513 }
514
515 char* const fourth = scanSeparators (third, false);
516 if (*fourth == separator)
517 {
518 *kinds = third;
519 scanSeparators (fourth, false);
520 *flags = fourth;
521 }
522 else
523 {
524 *flags = third;
525 *kinds = NULL;
526 }
527
528 if (script)
529 {
530 Assert (*flags);
531
532 char *end = *flags + strlen (*flags);
533 script [0] = '{';
534 if (end != script)
535 {
536 size_t len = strlen (script);
537 memmove (end, script, len);
538 end [len] = '\0';
539 }
540 }
541
542 result = true;
543 }
544 }
545 return result;
546 }
547
548
pre_ptrn_flag_exclusive_short(char c CTAGS_ATTR_UNUSED,void * data)549 static void pre_ptrn_flag_exclusive_short (char c CTAGS_ATTR_UNUSED, void* data)
550 {
551 bool *exclusive = data;
552 *exclusive = true;
553 }
554
pre_ptrn_flag_exclusive_long(const char * const s CTAGS_ATTR_UNUSED,const char * const unused CTAGS_ATTR_UNUSED,void * data)555 static void pre_ptrn_flag_exclusive_long (const char* const s CTAGS_ATTR_UNUSED, const char* const unused CTAGS_ATTR_UNUSED, void* data)
556 {
557 pre_ptrn_flag_exclusive_short ('x', data);
558 }
559
560 static flagDefinition prePtrnFlagDef[] = {
561 { 'x', "exclusive", pre_ptrn_flag_exclusive_short, pre_ptrn_flag_exclusive_long ,
562 NULL, "skip testing the other patterns if a line is matched to this pattern"},
563 };
564
scope_ptrn_flag_eval(const char * const f CTAGS_ATTR_UNUSED,const char * const v,void * data)565 static void scope_ptrn_flag_eval (const char* const f CTAGS_ATTR_UNUSED,
566 const char* const v, void* data)
567 {
568 unsigned int *bfields = data;
569
570 if (strcmp (v, "ref") == 0)
571 *bfields |= SCOPE_REF;
572 else if (strcmp (v, "push") == 0)
573 *bfields |= (SCOPE_PUSH | SCOPE_REF);
574 else if (strcmp (v, "pop") == 0)
575 *bfields |= SCOPE_POP;
576 else if (strcmp (v, "clear") == 0)
577 *bfields |= SCOPE_CLEAR;
578 else if (strcmp (v, "set") == 0)
579 *bfields |= (SCOPE_CLEAR | SCOPE_PUSH);
580 else if (strcmp (v, "replace") == 0)
581 *bfields |= (SCOPE_POP|SCOPE_REF_AFTER_POP|SCOPE_PUSH);
582 else
583 error (FATAL, "Unexpected value for scope flag in regex definition: scope=%s", v);
584 }
585
placeholder_ptrn_flag_eval(const char * const f CTAGS_ATTR_UNUSED,const char * const v CTAGS_ATTR_UNUSED,void * data)586 static void placeholder_ptrn_flag_eval (const char* const f CTAGS_ATTR_UNUSED,
587 const char* const v CTAGS_ATTR_UNUSED, void* data)
588 {
589 unsigned int *bfields = data;
590 *bfields |= SCOPE_PLACEHOLDER;
591 }
592
593 static flagDefinition scopePtrnFlagDef[] = {
594 { '\0', "scope", NULL, scope_ptrn_flag_eval,
595 "ACTION", "use scope stack: ACTION = ref|push|pop|clear|set|replace"},
596 { '\0', "placeholder", NULL, placeholder_ptrn_flag_eval,
597 NULL, "don't put this tag to tags file."},
598 };
599
kindNew(char letter,const char * name,const char * description)600 static kindDefinition *kindNew (char letter, const char *name, const char *description)
601 {
602 kindDefinition *kdef = xCalloc (1, kindDefinition);
603 kdef->letter = letter;
604 kdef->name = eStrdup (name);
605 kdef->description = eStrdup(description? description: kdef->name);
606 kdef->enabled = true;
607 return kdef;
608 }
609
kindFree(kindDefinition * kind)610 static void kindFree (kindDefinition *kind)
611 {
612 kind->letter = '\0';
613 eFree ((void *)kind->name);
614 kind->name = NULL;
615 eFree ((void *)kind->description);
616 kind->description = NULL;
617 eFree (kind);
618 }
619
initMgroup(struct mGroupSpec * mgroup)620 static void initMgroup(struct mGroupSpec *mgroup)
621 {
622 mgroup->forLineNumberDetermination = NO_MULTILINE;
623 mgroup->forNextScanning = NO_MULTILINE;
624 mgroup->nextFromStart = false;
625 }
626
initGuestSpec(struct guestSpec * guest)627 static void initGuestSpec (struct guestSpec *guest)
628 {
629 guest->lang.type = GUEST_LANG_UNKNOWN;
630 }
631
initTaction(struct mTableActionSpec * taction)632 static void initTaction(struct mTableActionSpec *taction)
633 {
634 taction->action = TACTION_NOP;
635 taction->table = NULL;
636 }
637
refPattern(regexPattern * ptrn)638 static regexPattern * refPattern (regexPattern * ptrn)
639 {
640 ptrn->refcount++;
641 return ptrn;
642 }
643
newPattern(regexCompiledCode * const pattern,enum regexParserType regptype)644 static regexPattern * newPattern (regexCompiledCode* const pattern,
645 enum regexParserType regptype)
646 {
647 regexPattern *ptrn = xCalloc(1, regexPattern);
648
649 ptrn->pattern.backend = pattern->backend;
650 ptrn->pattern.code = pattern->code;
651
652 ptrn->exclusive = false;
653 ptrn->accept_empty_name = false;
654 ptrn->regptype = regptype;
655 ptrn->xtagType = XTAG_UNKNOWN;
656
657 if (regptype == REG_PARSER_MULTI_LINE)
658 initMgroup(&ptrn->mgroup);
659 if (regptype == REG_PARSER_MULTI_TABLE)
660 initTaction(&ptrn->taction);
661 initGuestSpec (&ptrn->guest);
662
663 ptrn->u.tag.roleBits = 0;
664 ptrn->refcount = 1;
665
666 ptrn->optscript = NULL;
667 ptrn->optscript_src = NULL;
668
669 return ptrn;
670 }
671
newRefPatternEntry(regexTableEntry * other)672 static regexTableEntry * newRefPatternEntry (regexTableEntry * other)
673 {
674 regexTableEntry *entry = xCalloc (1, regexTableEntry);
675
676 Assert (other && other->pattern);
677
678 entry->pattern = refPattern(other->pattern);
679 return entry;
680 }
681
newEntry(regexCompiledCode * const pattern,enum regexParserType regptype)682 static regexTableEntry * newEntry (regexCompiledCode* const pattern,
683 enum regexParserType regptype)
684 {
685 regexTableEntry *entry = xCalloc (1, regexTableEntry);
686 entry->pattern = newPattern (pattern, regptype);
687 return entry;
688 }
689
addCompiledTagCommon(struct lregexControlBlock * lcb,int table_index,regexCompiledCode * const pattern,enum regexParserType regptype)690 static regexPattern* addCompiledTagCommon (struct lregexControlBlock *lcb,
691 int table_index,
692 regexCompiledCode* const pattern,
693 enum regexParserType regptype)
694 {
695 regexTableEntry *entry = newEntry (pattern, regptype);
696
697 if (regptype == REG_PARSER_MULTI_TABLE)
698 {
699 struct regexTable *table = ptrArrayItem (lcb->tables, table_index);
700 Assert(table);
701
702 ptrArrayAdd (table->entries, entry);
703 }
704 else
705 ptrArrayAdd (lcb->entries[regptype], entry);
706
707 useRegexMethod(lcb->owner);
708
709 return entry->pattern;
710 }
711
pre_ptrn_flag_mgroup_long(const char * const s,const char * const v,void * data)712 static void pre_ptrn_flag_mgroup_long (const char* const s, const char* const v, void* data)
713 {
714 struct mGroupSpec *mgroup = data;
715 if (!v)
716 {
717 error (WARNING, "no value is given for: %s", s);
718 return;
719 }
720 if (!strToInt (v, 10, &mgroup->forLineNumberDetermination))
721 {
722 error (WARNING, "wrong %s specification: %s", s, v);
723 mgroup->forLineNumberDetermination = NO_MULTILINE;
724 }
725 else if (mgroup->forLineNumberDetermination < 0
726 || mgroup->forLineNumberDetermination >= BACK_REFERENCE_COUNT)
727 {
728 error (WARNING, "out of range(0 ~ %d) %s specification: %s",
729 (BACK_REFERENCE_COUNT - 1),
730 s, v);
731 mgroup->forLineNumberDetermination = NO_MULTILINE;
732 }
733 }
734
pre_ptrn_flag_advanceTo_long(const char * const s,const char * const v,void * data)735 static void pre_ptrn_flag_advanceTo_long (const char* const s, const char* const v, void* data)
736 {
737 struct mGroupSpec *mgroup = data;
738 char *vdup;
739 char *tmp;
740
741
742 if (!v)
743 {
744 error (WARNING, "no value is given for: %s", s);
745 return;
746 }
747
748 vdup = eStrdup (v);
749
750 mgroup->nextFromStart = false;
751 if ((tmp = strstr(vdup, "start")))
752 {
753 mgroup->nextFromStart = true;
754 *tmp = '\0';
755 }
756 else if ((tmp = strstr(vdup, "end")))
757 *tmp = '\0';
758
759 if (!strToInt (vdup, 10, &(mgroup->forNextScanning)))
760 {
761 error (WARNING, "wrong %s specification: %s", s, vdup);
762 mgroup->nextFromStart = false;
763 }
764 else if (mgroup->forNextScanning < 0 || mgroup->forNextScanning >= BACK_REFERENCE_COUNT)
765 {
766 error (WARNING, "out of range(0 ~ %d) %s specification: %s",
767 (BACK_REFERENCE_COUNT - 1), s, vdup);
768 mgroup->nextFromStart = false;
769 }
770
771 eFree (vdup);
772 }
773
774 struct guestPtrnFlagData {
775 enum regexParserType type;
776 struct guestSpec *guest;
777 };
778
pre_ptrn_flag_guest_long(const char * const s,const char * const v,void * data)779 static void pre_ptrn_flag_guest_long (const char* const s, const char* const v, void* data)
780 {
781 struct guestPtrnFlagData *flagData = data;
782 enum regexParserType type = flagData->type;
783 struct guestSpec *guest = flagData->guest;
784 struct boundarySpec *current;
785
786 if (!v)
787 {
788 error (WARNING, "no value is given for: %s", s);
789 return;
790 }
791
792 char *tmp = strchr (v, ',');
793 if (tmp == NULL)
794 {
795 error (WARNING, "no terminator found for parser name: %s", s);
796 return;
797 }
798
799 if ((tmp - v) == 0)
800 {
801 if (type == REG_PARSER_MULTI_LINE)
802 {
803 error (WARNING,
804 "using placeholder for guest name field is not allowed in multiline regex spec: %s", v);
805 goto err;
806 }
807
808 guest->lang.type = GUEST_LANG_PLACEHOLDER;
809 }
810 else if (*v == '\\' || *v == '*')
811 {
812 const char *n_tmp = v + 1;
813 const char *n = n_tmp;
814 for (; isdigit (*n_tmp); n_tmp++);
815 char c = *n_tmp;
816 *(char *)n_tmp = '\0';
817 if (!strToInt (n, 10, &(guest->lang.spec.patternGroup)))
818 {
819 error (WARNING, "wrong guest name specification: %s", v);
820 goto err;
821 }
822 else if (guest->lang.spec.patternGroup >= BACK_REFERENCE_COUNT)
823 {
824 error (WARNING, "wrong guest name specification (back reference count is too large): %d",
825 guest->lang.spec.patternGroup);
826 goto err;
827 }
828
829 *(char *)n_tmp = c;
830 if (*n_tmp != ',')
831 {
832 error (WARNING, "wrong guest specification (garbage at the end of end guest spec): %s", v);
833 goto err;
834 }
835
836 guest->lang.type = (*v == '\\')
837 ? GUEST_LANG_PTN_GROUP_FOR_LANGNAME
838 : GUEST_LANG_PTN_GROUP_FOR_FILEMAP;
839 }
840 else
841 {
842 guest->lang.spec.lang = getNamedLanguageOrAlias (v, (tmp - v));
843 if (guest->lang.spec.lang == LANG_IGNORE)
844 {
845 error (WARNING, "no parser found for the guest spec: %s", v);
846 goto err;
847 }
848 guest->lang.type = GUEST_LANG_STATIC_LANGNAME;
849 }
850
851 tmp++;
852 if (*tmp == '\0')
853 {
854 error (WARNING, "no area spec found in the guest spec: %s", v);
855 goto err;
856 }
857
858 for (int i = 0; i < 2; i++)
859 {
860 current = guest->boundary + i;
861 const char *current_field_str = (i == BOUNDARY_START? "start": "end");
862
863 if (tmp [0] == ((i == BOUNDARY_START)? ',': '\0'))
864 {
865 if (type == REG_PARSER_MULTI_LINE)
866 error (WARNING,
867 "using placeholder for %s field is not allowed in multiline regex spec: %s",
868 current_field_str, v);
869
870 current->placeholder = true;
871 }
872 else
873 {
874 char *n = tmp;
875
876 for (; isdigit (*tmp); tmp++);
877 char c = *tmp;
878 *tmp = '\0';
879 if (!strToInt (n, 10, &(current->patternGroup)))
880 {
881 error (WARNING, "wrong guest area specification (patternGroup of %s, number expected): %s:%s",
882 current_field_str, v, n);
883 goto err;
884 }
885 *tmp = c;
886 if (*tmp == '\0')
887 {
888 error (WARNING, "wrong guest area specification (patternGroup of %s, nether start nor end given): %s",
889 current_field_str, v);
890 goto err;
891 }
892 else if (strncmp (tmp, "start", 5) == 0)
893 {
894 current->fromStartOfGroup = true;
895 tmp += 5;
896 }
897 else if (strncmp (tmp, "end", 3) == 0)
898 {
899 current->fromStartOfGroup = false;
900 tmp += 3;
901 }
902 else
903 {
904 error (WARNING, "wrong guest area specification (%s): %s",
905 current_field_str, v);
906 goto err;
907 }
908 }
909
910 if (i == 0)
911 {
912 if (*tmp != ',')
913 {
914 error (WARNING,
915 "wrong guest area specification (separator between start and end boundaries): %s", v);
916 goto err;
917 }
918 tmp++;
919 }
920 else if (i == 1 && (*tmp != '\0'))
921 {
922 error (WARNING, "wrong guest area specification (garbage at the end of end boundary spec): %s", v);
923 goto err;
924 }
925 }
926 return;
927 err:
928 guest->lang.type = GUEST_LANG_UNKNOWN;
929 }
930
931 static flagDefinition multilinePtrnFlagDef[] = {
932 { '\0', "mgroup", NULL, pre_ptrn_flag_mgroup_long,
933 "N", "a group in pattern determining the line number of tag"},
934 { '\0', "_advanceTo", NULL, pre_ptrn_flag_advanceTo_long,
935 "N[start|end]", "a group in pattern from where the next scan starts [0end]"},
936 };
937
938 static flagDefinition guestPtrnFlagDef[] = {
939 #define EXPERIMENTAL "_"
940 { '\0', EXPERIMENTAL "guest", NULL, pre_ptrn_flag_guest_long,
941 "PARSERSPEC,N0[start|end],N1[start|end]", "run guest parser on the area"},
942 };
943
hasMessage(const regexPattern * const ptrn)944 static bool hasMessage(const regexPattern *const ptrn)
945 {
946 return (ptrn->message.selection > 0 && ptrn->message.message_string);
947 }
948
949 struct commonFlagData {
950 const langType owner;
951 const struct lregexControlBlock *const lcb;
952 regexPattern *ptrn;
953 };
954
common_flag_msg_long(const char * const s,const char * const v,void * data)955 static void common_flag_msg_long (const char* const s, const char* const v, void* data)
956 {
957 struct commonFlagData *cdata = data;
958 regexPattern *ptrn = cdata->ptrn;
959
960 Assert (ptrn);
961
962 if (hasMessage(ptrn))
963 {
964 error (WARNING, "only one message flag may be given per regex (already set to '%s')",
965 ptrn->message.message_string);
966 return;
967 }
968
969 if (strcmp (s, "fatal") == 0)
970 {
971 ptrn->message.selection = FATAL;
972 }
973 else if (strcmp (s, "warning") == 0)
974 {
975 ptrn->message.selection = WARNING;
976 }
977
978 Assert (ptrn->message.selection != 0);
979
980 if (!v || !*v)
981 {
982 error (WARNING, "no message value is given for {%s}", s);
983 return;
984 }
985
986 const char* begin = v;
987 const char* end = v + strlen (v);
988 --end;
989
990 if (*begin != '"' || *end != '"' || begin == end)
991 {
992 error (WARNING, "argument for {%s} must be in double-quotes", s);
993 return;
994 }
995
996 ++begin;
997
998 if (begin < end)
999 ptrn->message.message_string = eStrndup (begin, end - begin);
1000 }
1001
common_flag_extra_long(const char * const s,const char * const v,void * data)1002 static void common_flag_extra_long (const char* const s, const char* const v, void* data)
1003 {
1004 struct commonFlagData * cdata = data;
1005
1006 Assert (cdata->ptrn);
1007
1008 if (!v)
1009 {
1010 error (WARNING, "no value is given for: %s", s);
1011 return;
1012 }
1013
1014 cdata->ptrn->xtagType = getXtagTypeForNameAndLanguage (v, cdata->owner);
1015 if (cdata->ptrn->xtagType == XTAG_UNKNOWN)
1016 error (WARNING, "no such extra \"%s\" in %s", v, getLanguageName(cdata->owner));
1017 }
1018
1019
fieldPatternNew(fieldType ftype,const char * template)1020 static struct fieldPattern * fieldPatternNew (fieldType ftype, const char *template)
1021 {
1022 struct fieldPattern *fp;
1023
1024 fp = xMalloc(1, struct fieldPattern);
1025 fp->ftype = ftype;
1026 fp->template = eStrdup(template);
1027
1028 return fp;
1029 }
1030
fieldPatternDelete(struct fieldPattern * fp)1031 static void fieldPatternDelete (struct fieldPattern *fp)
1032 {
1033 eFree ((void *)fp->template);
1034 eFree (fp);
1035 }
1036
common_flag_field_long(const char * const s,const char * const v,void * data)1037 static void common_flag_field_long (const char* const s, const char* const v, void* data)
1038 {
1039 struct commonFlagData * cdata = data;
1040 regexPattern *ptrn = cdata->ptrn;
1041
1042 Assert (ptrn);
1043
1044 struct fieldPattern *fp;
1045 fieldType ftype;
1046 char *fname;
1047 const char* template;
1048 char *tmp;
1049
1050 if (!v)
1051 {
1052 error (WARNING, "no value is given for: %s", s);
1053 return;
1054 }
1055
1056 tmp = strchr (v, ':');
1057 if (tmp == NULL || tmp == v)
1058 {
1059 error (WARNING, "no field name is given for: %s", s);
1060 return;
1061 }
1062
1063 fname = eStrndup (v, tmp - v);
1064 ftype = getFieldTypeForNameAndLanguage (fname, cdata->owner);
1065 if (ftype == FIELD_UNKNOWN)
1066 {
1067 error (WARNING, "no such field \"%s\" in %s", fname, getLanguageName(cdata->owner));
1068 eFree (fname);
1069 return;
1070 }
1071
1072 if (ptrn->fieldPatterns)
1073 {
1074 for (unsigned int i = 0; i < ptrArrayCount(ptrn->fieldPatterns); i++)
1075 {
1076 fp = ptrArrayItem(ptrn->fieldPatterns, i);
1077 if (fp->ftype == ftype)
1078 {
1079 error (WARNING, "duplicated field specification \"%s\" in %s", fname, getLanguageName(cdata->owner));
1080 eFree (fname);
1081 return;
1082 }
1083 }
1084 }
1085 eFree (fname);
1086
1087 template = tmp + 1;
1088 fp = fieldPatternNew (ftype, template);
1089
1090 if (ptrn->fieldPatterns == NULL)
1091 ptrn->fieldPatterns = ptrArrayNew((ptrArrayDeleteFunc)fieldPatternDelete);
1092 ptrArrayAdd(ptrn->fieldPatterns, fp);
1093 }
1094
common_flag_role_long(const char * const s,const char * const v,void * data)1095 static void common_flag_role_long (const char* const s, const char* const v, void* data)
1096 {
1097 struct commonFlagData * cdata = data;
1098 regexPattern *ptrn = cdata->ptrn;
1099 roleDefinition * role;
1100
1101 Assert (ptrn);
1102
1103 if (!v)
1104 {
1105 error (WARNING, "no value is given for: %s", s);
1106 return;
1107 }
1108
1109 role = getLanguageRoleForName(cdata->owner,
1110 ptrn->u.tag.kindIndex, v);
1111 if (!role)
1112 {
1113 error (WARNING, "no such role: %s", v);
1114 return;
1115 }
1116
1117 ptrn->u.tag.roleBits |= makeRoleBit(role->id);
1118 }
1119
common_flag_anonymous_long(const char * const s,const char * const v,void * data)1120 static void common_flag_anonymous_long (const char* const s, const char* const v, void* data)
1121 {
1122 struct commonFlagData * cdata = data;
1123 regexPattern *ptrn = cdata->ptrn;
1124
1125 Assert (ptrn);
1126
1127 if (ptrn->anonymous_tag_prefix)
1128 {
1129 error (WARNING, "an anonymous tag prefix for this pattern (%s) is already given: %s",
1130 ptrn->pattern_string? ptrn->pattern_string: "",
1131 ptrn->anonymous_tag_prefix);
1132 return;
1133 }
1134
1135 if (!v)
1136 {
1137 error (WARNING, "no PREFIX for anonymous regex flag is given (pattern == %s)",
1138 ptrn->pattern_string? ptrn->pattern_string: "");
1139 return;
1140 }
1141
1142 if (ptrn->u.tag.kindIndex == KIND_GHOST_INDEX)
1143 {
1144 error (WARNING, "use \"%s\" regex flag only with an explicitly defined kind", s);
1145 return;
1146 }
1147
1148 ptrn->anonymous_tag_prefix = eStrdup (v);
1149 }
1150
1151 static flagDefinition commonSpecFlagDef[] = {
1152 { '\0', "fatal", NULL, common_flag_msg_long ,
1153 "\"MESSAGE\"", "print the given MESSAGE and exit"},
1154 { '\0', "warning", NULL, common_flag_msg_long ,
1155 "\"MESSAGE\"", "print the given MESSAGE at WARNING level"},
1156 #define EXPERIMENTAL "_"
1157 { '\0', EXPERIMENTAL "extra", NULL, common_flag_extra_long ,
1158 "EXTRA", "record the tag only when the extra is enabled"},
1159 { '\0', EXPERIMENTAL "field", NULL, common_flag_field_long ,
1160 "FIELD:VALUE", "record the matched string(VALUE) to parser own FIELD of the tag"},
1161 { '\0', EXPERIMENTAL "role", NULL, common_flag_role_long,
1162 "ROLE", "set the given ROLE to the roles field"},
1163 { '\0', EXPERIMENTAL "anonymous", NULL, common_flag_anonymous_long,
1164 "PREFIX", "make an anonymous tag with PREFIX"},
1165 };
1166
1167
pre_ptrn_flag_mtable_long(const char * const s,const char * const v,void * data)1168 static void pre_ptrn_flag_mtable_long (const char* const s, const char* const v, void* data)
1169 {
1170 struct commonFlagData * cdata = data;
1171 regexPattern *ptrn = cdata->ptrn;
1172 struct mTableActionSpec *taction;
1173 bool taking_table = true;
1174
1175 Assert (ptrn);
1176 Assert (cdata->lcb);
1177
1178 taction = &ptrn->taction;
1179
1180 if (strcmp (s, "tenter") == 0)
1181 taction->action = TACTION_ENTER;
1182 else if (strcmp (s, "tleave") == 0)
1183 {
1184 taction->action = TACTION_LEAVE;
1185 taking_table = false;
1186 }
1187 else if (strcmp (s, "tjump") == 0)
1188 taction->action = TACTION_JUMP;
1189 else if (strcmp (s, "treset") == 0)
1190 taction->action = TACTION_RESET;
1191 else if (strcmp (s, "tquit") == 0)
1192 {
1193 taction->action = TACTION_QUIT;
1194 taking_table = false;
1195 }
1196
1197 if (taking_table)
1198 {
1199 int t;
1200 char *continuation = NULL;
1201
1202
1203 if (!v || (!*v))
1204 error (FATAL, "no table is given for table action: %s", s);
1205
1206 if (taction->action == TACTION_ENTER
1207 && (continuation = strchr (v, ',')))
1208 {
1209 char *tableEnterTo;
1210
1211 tableEnterTo = eStrndup (v, continuation - v);
1212 t = getTableIndexForName (cdata->lcb, tableEnterTo);
1213 if (t < 0)
1214 error (FATAL, "table is not defined: %s", tableEnterTo);
1215 taction->table = ptrArrayItem (cdata->lcb->tables, t);
1216 eFree (tableEnterTo);
1217
1218 if (!*(continuation + 1))
1219 error (FATAL, "no continuation table is given for: %s", v);
1220
1221 int t_cont = getTableIndexForName (cdata->lcb, continuation + 1);
1222 if (t_cont < 0)
1223 error (FATAL, "table for continuation is not defined: %s", continuation + 1);
1224 taction->continuation_table = ptrArrayItem (cdata->lcb->tables, t_cont);
1225 }
1226 else
1227 {
1228 t = getTableIndexForName (cdata->lcb, v);
1229 if (t < 0)
1230 error (FATAL, "table is not defined: %s", v);
1231 taction->table = ptrArrayItem (cdata->lcb->tables, t);
1232 taction->continuation_table = NULL;
1233 }
1234 }
1235 }
1236
1237 static flagDefinition multitablePtrnFlagDef[] = {
1238 { '\0', "tenter", NULL, pre_ptrn_flag_mtable_long ,
1239 "TABLE[,CONT]", "enter to given regext table (with specifying continuation)"},
1240 { '\0', "tleave", NULL, pre_ptrn_flag_mtable_long ,
1241 NULL, "leave from the current regext table"},
1242 { '\0', "tjump", NULL, pre_ptrn_flag_mtable_long ,
1243 "TABLE", "jump to another regext table(don't push the current table to state stack)"},
1244 { '\0', "treset", NULL, pre_ptrn_flag_mtable_long ,
1245 "TABLE", "clear the state stack and jump to given regex table"},
1246 { '\0', "tquit", NULL, pre_ptrn_flag_mtable_long ,
1247 NULL, "stop the parsing with this parser"},
1248 };
1249
1250
setKind(regexPattern * ptrn,const langType owner,const char kindLetter,const char * kindName,const char * const description,bool kind_explicitly_defined)1251 static void setKind(regexPattern * ptrn, const langType owner,
1252 const char kindLetter, const char* kindName,
1253 const char *const description,
1254 bool kind_explicitly_defined)
1255 {
1256 Assert (ptrn);
1257 Assert (ptrn->u.tag.name_pattern);
1258 Assert (kindName);
1259 kindDefinition *kdef = getLanguageKindForLetter (owner, kindLetter);
1260
1261 if (kdef)
1262 {
1263 if (strcmp (kdef->name, kindName) && (strcmp(kindName, KIND_REGEX_DEFAULT_NAME)))
1264 /* When using a same kind letter for multiple regex patterns, the name of kind
1265 should be the same. */
1266 error (WARNING, "Don't reuse the kind letter `%c' in a language %s (old: \"%s\", new: \"%s\")",
1267 kdef->letter, getLanguageName (owner),
1268 kdef->name, kindName);
1269 ptrn->u.tag.kindIndex = kdef->id;
1270 }
1271 else if (*ptrn->u.tag.name_pattern == '\0' &&
1272 kindLetter == KIND_REGEX_DEFAULT_LETTER &&
1273 (strcmp(kindName, KIND_REGEX_DEFAULT_NAME) == 0) &&
1274 (!kind_explicitly_defined))
1275 ptrn->u.tag.kindIndex = KIND_GHOST_INDEX;
1276 else
1277 {
1278 kdef = kindNew (kindLetter, kindName, description);
1279 defineLanguageKind (owner, kdef, kindFree);
1280 ptrn->u.tag.kindIndex = kdef->id;
1281 }
1282 }
1283
patternEvalFlags(struct lregexControlBlock * lcb,regexPattern * ptrn,enum regexParserType regptype,const char * flags)1284 static void patternEvalFlags (struct lregexControlBlock *lcb,
1285 regexPattern * ptrn,
1286 enum regexParserType regptype,
1287 const char* flags)
1288 {
1289 struct commonFlagData commonFlagData = {
1290 .owner = lcb->owner,
1291 .lcb = lcb,
1292 .ptrn = ptrn
1293 };
1294
1295 if (regptype == REG_PARSER_SINGLE_LINE)
1296 flagsEval (flags, prePtrnFlagDef, ARRAY_SIZE(prePtrnFlagDef), &ptrn->exclusive);
1297
1298 const char * optscript = flagsEval (flags, commonSpecFlagDef, ARRAY_SIZE(commonSpecFlagDef), &commonFlagData);
1299 if (optscript)
1300 {
1301 ptrn->optscript = scriptRead (optvm, optscript);
1302 ptrn->optscript_src = eStrdup (optscript);
1303 }
1304
1305 if (regptype == REG_PARSER_SINGLE_LINE || regptype == REG_PARSER_MULTI_TABLE)
1306 {
1307 flagsEval (flags, scopePtrnFlagDef, ARRAY_SIZE(scopePtrnFlagDef), &ptrn->scopeActions);
1308 if ((ptrn->scopeActions & (SCOPE_REF|SCOPE_REF_AFTER_POP)) == (SCOPE_REF|SCOPE_REF_AFTER_POP))
1309 error (WARNING, "%s: don't combine \"replace\" with the other scope action.",
1310 getLanguageName (lcb->owner));
1311 }
1312
1313 if (regptype == REG_PARSER_MULTI_LINE || regptype == REG_PARSER_MULTI_TABLE)
1314 {
1315 ptrn->mgroup.forNextScanning = 0;
1316 /* ptrn->mgroup.nextFromStart is initialized in initMgroup() already. */
1317 flagsEval (flags, multilinePtrnFlagDef, ARRAY_SIZE(multilinePtrnFlagDef), &ptrn->mgroup);
1318 }
1319
1320 struct guestPtrnFlagData guestPtrnFlagData = {
1321 .type = regptype,
1322 .guest = &ptrn->guest,
1323 };
1324 flagsEval (flags, guestPtrnFlagDef, ARRAY_SIZE(guestPtrnFlagDef), &guestPtrnFlagData);
1325
1326 if (regptype == REG_PARSER_MULTI_TABLE)
1327 flagsEval (flags, multitablePtrnFlagDef, ARRAY_SIZE(multitablePtrnFlagDef), &commonFlagData);
1328 }
1329
addCompiledTagPattern(struct lregexControlBlock * lcb,int table_index,enum regexParserType regptype,regexCompiledCode * const pattern,const char * const name,char kindLetter,const char * kindName,char * const description,const char * flags,bool kind_explicitly_defined,bool * disabled)1330 static regexPattern *addCompiledTagPattern (struct lregexControlBlock *lcb,
1331 int table_index,
1332 enum regexParserType regptype, regexCompiledCode* const pattern,
1333 const char* const name, char kindLetter, const char* kindName,
1334 char *const description, const char* flags,
1335 bool kind_explicitly_defined,
1336 bool *disabled)
1337 {
1338 regexPattern * ptrn = addCompiledTagCommon(lcb, table_index, pattern, regptype);
1339
1340 ptrn->type = PTRN_TAG;
1341 ptrn->u.tag.name_pattern = eStrdup (name);
1342 ptrn->disabled = disabled;
1343
1344 setKind(ptrn, lcb->owner, kindLetter, kindName, description, kind_explicitly_defined);
1345 patternEvalFlags (lcb, ptrn, regptype, flags);
1346
1347 return ptrn;
1348 }
1349
addCompiledCallbackPattern(struct lregexControlBlock * lcb,regexCompiledCode * const pattern,const regexCallback callback,const char * flags,bool * disabled,void * userData)1350 static regexPattern *addCompiledCallbackPattern (struct lregexControlBlock *lcb, regexCompiledCode* const pattern,
1351 const regexCallback callback, const char* flags,
1352 bool *disabled,
1353 void *userData)
1354 {
1355 regexPattern * ptrn;
1356 bool exclusive = false;
1357 flagsEval (flags, prePtrnFlagDef, ARRAY_SIZE(prePtrnFlagDef), &exclusive);
1358 ptrn = addCompiledTagCommon(lcb, TABLE_INDEX_UNUSED, pattern, REG_PARSER_SINGLE_LINE);
1359 ptrn->type = PTRN_CALLBACK;
1360 ptrn->u.callback.function = callback;
1361 ptrn->u.callback.userData = userData;
1362 ptrn->exclusive = exclusive;
1363 ptrn->disabled = disabled;
1364 return ptrn;
1365 }
1366
1367 #ifndef HAVE_PCRE2
no_pcre2_regex_flag_short(char c,void * data)1368 static void no_pcre2_regex_flag_short (char c, void* data)
1369 {
1370 error (WARNING, "'p' flag is specied but pcre2 regex engine is not linked.");
1371 }
no_pcre2_regex_flag_long(const char * const s,const char * const unused CTAGS_ATTR_UNUSED,void * data)1372 static void no_pcre2_regex_flag_long (const char* const s, const char* const unused CTAGS_ATTR_UNUSED, void* data)
1373 {
1374 error (WARNING, "{pcre2} flag is specied but pcre2 regex engine is not linked.");
1375 }
1376 #endif
1377
1378 static flagDefinition backendFlagDefs[] = {
1379 { 'b', "basic", basic_regex_flag_short, basic_regex_flag_long,
1380 NULL, "interpreted as a Posix basic regular expression."},
1381 { 'e', "extend", extend_regex_flag_short, extend_regex_flag_long,
1382 NULL, "interpreted as a Posix extended regular expression (default)"},
1383 #ifdef HAVE_PCRE2
1384 { 'p', "pcre2", pcre2_regex_flag_short, pcre2_regex_flag_long,
1385 NULL, "use pcre2 regex engine"},
1386 #else
1387 { 'p', "pcre2", no_pcre2_regex_flag_short, no_pcre2_regex_flag_long,
1388 NULL, "pcre2 is NOT linked!"},
1389 #endif
1390 };
1391
regex_flag_icase_short(char c CTAGS_ATTR_UNUSED,void * data)1392 static void regex_flag_icase_short (char c CTAGS_ATTR_UNUSED, void* data)
1393 {
1394 struct flagDefsDescriptor *desc = data;
1395 desc->backend->set_icase_flag (&desc->flags);
1396 }
1397
regex_flag_icase_long(const char * s CTAGS_ATTR_UNUSED,const char * const unused CTAGS_ATTR_UNUSED,void * data)1398 static void regex_flag_icase_long (const char* s CTAGS_ATTR_UNUSED, const char* const unused CTAGS_ATTR_UNUSED, void* data)
1399 {
1400 regex_flag_icase_short ('i', data);
1401 }
1402
1403 static flagDefinition backendCommonRegexFlagDefs[] = {
1404 { 'i', "icase", regex_flag_icase_short, regex_flag_icase_long,
1405 NULL, "applied in a case-insensitive manner"},
1406 };
1407
1408
choose_backend(const char * flags,enum regexParserType regptype,bool error_if_no_backend)1409 static struct flagDefsDescriptor choose_backend (const char *flags, enum regexParserType regptype, bool error_if_no_backend)
1410 {
1411 struct flagDefsDescriptor desc = {
1412 .backend = NULL,
1413 .flags = 0,
1414 .regptype = regptype,
1415 };
1416
1417 if (flags)
1418 flagsEval (flags,
1419 backendFlagDefs,
1420 ARRAY_SIZE(backendFlagDefs),
1421 &desc);
1422
1423 /* Choose the default backend. */
1424 if (desc.backend == NULL)
1425 {
1426 if (flags && error_if_no_backend)
1427 error (FATAL, "No sunch backend for the name: \"%s\"", flags);
1428
1429 flagsEval (DEFAULT_REGEX_BACKEND,
1430 backendFlagDefs,
1431 ARRAY_SIZE(backendFlagDefs),
1432 &desc);
1433 }
1434 return desc;
1435 }
1436
compileRegex(enum regexParserType regptype,const char * const regexp,const char * const flags)1437 static regexCompiledCode compileRegex (enum regexParserType regptype,
1438 const char* const regexp, const char* const flags)
1439 {
1440 struct flagDefsDescriptor desc = choose_backend (flags, regptype, false);
1441
1442 /* Evaluate backend specific flags */
1443 flagsEval (flags,
1444 desc.backend->fdefs,
1445 desc.backend->fdef_count,
1446 &desc.flags);
1447
1448 flagsEval (flags,
1449 backendCommonRegexFlagDefs,
1450 ARRAY_SIZE (backendCommonRegexFlagDefs),
1451 &desc);
1452
1453 return desc.backend->compile (desc.backend, regexp, desc.flags);
1454 }
1455
1456
1457 /* If a letter and/or a name are defined in kindSpec, return true. */
parseKinds(const char * const kindSpec,char * const kindLetter,char ** const kindName,char ** description)1458 static bool parseKinds (
1459 const char* const kindSpec, char* const kindLetter, char** const kindName,
1460 char **description)
1461 {
1462 *description = NULL;
1463
1464 if (kindSpec == NULL || kindSpec [0] == '\0')
1465 {
1466 *kindLetter = KIND_REGEX_DEFAULT_LETTER;
1467 *kindName = eStrdup (KIND_REGEX_DEFAULT_NAME);
1468 return false;
1469 }
1470 else
1471 {
1472 bool explicitly_defined = false;
1473 const char* k = kindSpec;
1474
1475 if (k [0] != ',' && (k [1] == ',' || k [1] == '\0'))
1476 {
1477 *kindLetter = *k++;
1478 explicitly_defined = true;
1479 }
1480 else
1481 *kindLetter = KIND_REGEX_DEFAULT_LETTER;
1482
1483 if (*k == ',')
1484 ++k;
1485
1486 if (k [0] == '\0')
1487 *kindName = eStrdup (KIND_REGEX_DEFAULT_NAME);
1488 else
1489 {
1490 const char *const comma = strchr (k, ',');
1491
1492 if (comma == NULL)
1493 {
1494 if (strlen (k) == 0)
1495 *kindName = eStrdup (KIND_REGEX_DEFAULT_NAME);
1496 else
1497 {
1498 *kindName = eStrdup (k);
1499 explicitly_defined = true;
1500 }
1501 }
1502 else
1503 {
1504 if (comma - k == 0)
1505 *kindName = eStrdup (KIND_REGEX_DEFAULT_NAME);
1506 else
1507 {
1508 *kindName = eStrndup (k, comma - k );
1509 explicitly_defined = true;
1510 }
1511 k = comma + 1;
1512 if (k [0] != '\0')
1513 *description = eStrdup (k);
1514 }
1515 }
1516 return explicitly_defined;
1517 }
1518 }
1519
1520 /*
1521 * Regex pattern matching
1522 */
1523
1524
substitute(const char * const in,const char * out,const int nmatch,const regmatch_t * const pmatch)1525 static vString* substitute (
1526 const char* const in, const char* out,
1527 const int nmatch, const regmatch_t* const pmatch)
1528 {
1529 vString* result = vStringNew ();
1530 const char* p;
1531 for (p = out ; *p != '\0' ; p++)
1532 {
1533 if (*p == '\\' && isdigit ((int) *++p))
1534 {
1535 const int dig = *p - '0';
1536 if (0 < dig && dig < nmatch && pmatch [dig].rm_so != -1)
1537 {
1538 const int diglen = pmatch [dig].rm_eo - pmatch [dig].rm_so;
1539 vStringNCatS (result, in + pmatch [dig].rm_so, diglen);
1540 }
1541 }
1542 else if (*p != '\n' && *p != '\r')
1543 vStringPut (result, *p);
1544 }
1545 return result;
1546 }
1547
getInputLineNumberInRegPType(enum regexParserType regptype,off_t offset)1548 static unsigned long getInputLineNumberInRegPType (enum regexParserType regptype,
1549 off_t offset)
1550 {
1551 return (regptype == REG_PARSER_MULTI_LINE || regptype == REG_PARSER_MULTI_TABLE)
1552 ? getInputLineNumberForFileOffset (offset)
1553 : getInputLineNumber ();
1554 }
1555
fillEndLineFieldOfUpperScopes(struct lregexControlBlock * lcb,unsigned long endline)1556 static void fillEndLineFieldOfUpperScopes (struct lregexControlBlock *lcb, unsigned long endline)
1557 {
1558 tagEntryInfo *entry;
1559 int n = lcb->currentScope;
1560
1561 while ((entry = getEntryInCorkQueue (n))
1562 && (entry->extensionFields.endLine == 0))
1563 {
1564 entry->extensionFields.endLine = endline;
1565 n = entry->extensionFields.scopeIndex;
1566 }
1567 }
1568
hasNameSlot(const regexPattern * const patbuf)1569 static bool hasNameSlot (const regexPattern* const patbuf)
1570 {
1571 return (patbuf->u.tag.name_pattern[0] != '\0'
1572 || patbuf->anonymous_tag_prefix);
1573 }
1574
scopeActionRef(int currentScope)1575 static int scopeActionRef (int currentScope)
1576 {
1577 int scope = currentScope;
1578 tagEntryInfo *entry;
1579 while ((entry = getEntryInCorkQueue (scope)) && entry->placeholder)
1580 /* Look at parent */
1581 scope = entry->extensionFields.scopeIndex;
1582 return scope;
1583 }
1584
matchTagPattern(struct lregexControlBlock * lcb,const char * line,const regexPattern * const patbuf,const regmatch_t * const pmatch,off_t offset,scriptWindow * window)1585 static void matchTagPattern (struct lregexControlBlock *lcb,
1586 const char* line,
1587 const regexPattern* const patbuf,
1588 const regmatch_t* const pmatch,
1589 off_t offset, scriptWindow *window)
1590 {
1591 vString *const name =
1592 (patbuf->u.tag.name_pattern[0] != '\0') ? substitute (line,
1593 patbuf->u.tag.name_pattern,
1594 BACK_REFERENCE_COUNT, pmatch):
1595 (patbuf->anonymous_tag_prefix) ? anonGenerateNew (patbuf->anonymous_tag_prefix,
1596 patbuf->u.tag.kindIndex):
1597 vStringNewInit ("");
1598 bool placeholder = !!((patbuf->scopeActions & SCOPE_PLACEHOLDER) == SCOPE_PLACEHOLDER);
1599 int scope = CORK_NIL;
1600 int n;
1601
1602 vStringStripLeading (name);
1603 vStringStripTrailing (name);
1604
1605 if (patbuf->scopeActions & SCOPE_REF)
1606 scope = scopeActionRef (lcb->currentScope);
1607 if (patbuf->scopeActions & SCOPE_CLEAR)
1608 {
1609 unsigned long endline = getInputLineNumberInRegPType(patbuf->regptype, offset);
1610
1611 /*
1612 * SCOPE_CLEAR|SCOPE_PUSH implies that "set" was specified as the scope action.
1613 * If the specified action is "set", getInputLineNumberInRegPType()
1614 * returns the start line of the NEW scope. The cleared scopes are ended BEFORE
1615 * the new scope. There is a gap. We must adjust the "end:" field here.
1616 */
1617 if (patbuf->scopeActions & SCOPE_PUSH && endline > 0)
1618 endline--;
1619
1620 fillEndLineFieldOfUpperScopes (lcb, endline);
1621 lcb->currentScope = CORK_NIL;
1622 }
1623 if (patbuf->scopeActions & SCOPE_POP)
1624 {
1625 tagEntryInfo *entry = getEntryInCorkQueue (lcb->currentScope);
1626
1627 if (entry && (entry->extensionFields.endLine == 0))
1628 {
1629 entry->extensionFields.endLine = getInputLineNumberInRegPType(patbuf->regptype, offset);
1630
1631 /*
1632 * SCOPE_POP|SCOPE_REF_AFTER_POP implies that "replace" was specified as the
1633 * scope action. If the specified action is "replace", getInputLineNumberInRegPType()
1634 * returns the start line of the NEW scope. The popped scope is ended BEFORE
1635 * the new scope. There is a gap. We must adjust the "end:" field here.
1636 */
1637 if ((patbuf->scopeActions & SCOPE_REF_AFTER_POP) &&
1638 entry->extensionFields.endLine > 1)
1639 entry->extensionFields.endLine--;
1640 }
1641
1642 lcb->currentScope = entry? entry->extensionFields.scopeIndex: CORK_NIL;
1643 }
1644 if (patbuf->scopeActions & SCOPE_REF_AFTER_POP)
1645 scope = scopeActionRef (lcb->currentScope);
1646
1647 if (vStringLength (name) == 0 && (placeholder == false))
1648 {
1649 if (patbuf->accept_empty_name == false)
1650 error (WARNING, "%s:%lu: null expansion of name pattern \"%s\"",
1651 getInputFileName (),
1652 getInputLineNumberInRegPType(patbuf->regptype, offset),
1653 patbuf->u.tag.name_pattern);
1654 n = CORK_NIL;
1655 }
1656 else
1657 {
1658 static TrashBox* field_trashbox;
1659 unsigned long ln = 0;
1660 MIOPos pos;
1661 tagEntryInfo e;
1662 int kind;
1663 roleBitsType roleBits;
1664
1665 if ((patbuf->regptype == REG_PARSER_MULTI_LINE)
1666 || (patbuf->regptype == REG_PARSER_MULTI_TABLE))
1667 {
1668 ln = getInputLineNumberForFileOffset (offset);
1669 pos = getInputFilePositionForLine (ln);
1670 }
1671
1672 n = CORK_NIL;
1673 kind = patbuf->u.tag.kindIndex;
1674 roleBits = patbuf->u.tag.roleBits;
1675
1676 initRegexTag (&e, vStringValue (name), kind, ROLE_DEFINITION_INDEX, scope, placeholder,
1677 ln, ln == 0? NULL: &pos, patbuf->xtagType);
1678
1679 if (field_trashbox == NULL)
1680 {
1681 field_trashbox = trashBoxNew();
1682 DEFAULT_TRASH_BOX (field_trashbox, trashBoxDelete);
1683 }
1684
1685 if (patbuf->fieldPatterns)
1686 {
1687 for (unsigned int i = 0; i < ptrArrayCount(patbuf->fieldPatterns); i++)
1688 {
1689 struct fieldPattern *fp = ptrArrayItem(patbuf->fieldPatterns, i);
1690 if (isFieldEnabled (fp->ftype))
1691 {
1692 vString * const value = substitute (line, fp->template,
1693 BACK_REFERENCE_COUNT, pmatch);
1694 attachParserField (&e, false, fp->ftype, vStringValue (value));
1695 trashBoxPut (field_trashbox, value,
1696 (TrashBoxDestroyItemProc)vStringDelete);
1697 }
1698 }
1699 }
1700
1701 if (roleBits)
1702 {
1703 unsigned int roleIndex;
1704
1705 for (roleIndex = 0;
1706 roleIndex < countLanguageRoles(e.langType, kind);
1707 roleIndex++)
1708 {
1709 if (roleBits & makeRoleBit(roleIndex))
1710 assignRole (&e, roleIndex);
1711 }
1712 }
1713
1714 if (patbuf->anonymous_tag_prefix)
1715 markTagExtraBit (&e, XTAG_ANONYMOUS);
1716
1717 n = makeTagEntry (&e);
1718
1719 trashBoxMakeEmpty(field_trashbox);
1720 }
1721
1722 if (patbuf->scopeActions & SCOPE_PUSH)
1723 lcb->currentScope = n;
1724
1725 if (n != CORK_NIL && window)
1726 {
1727 scriptSetup (optvm, lcb, n, window);
1728 EsObject *e = scriptEval (optvm, patbuf->optscript);
1729 if (es_error_p (e))
1730 error (WARNING, "error when evaluating: %s", patbuf->optscript_src);
1731 es_object_unref (e);
1732 scriptTeardown (optvm, lcb);
1733 }
1734
1735 vStringDelete (name);
1736 }
1737
matchCallbackPattern(const vString * const line,const regexPattern * const patbuf,const regmatch_t * const pmatch)1738 static bool matchCallbackPattern (
1739 const vString* const line, const regexPattern* const patbuf,
1740 const regmatch_t* const pmatch)
1741 {
1742 regexMatch matches [BACK_REFERENCE_COUNT];
1743 unsigned int count = 0;
1744 int i;
1745 for (i = 0 ; i < BACK_REFERENCE_COUNT ; ++i)
1746 {
1747 matches [i].start = pmatch [i].rm_so;
1748 matches [i].length = pmatch [i].rm_eo - pmatch [i].rm_so;
1749 /* a valid match may have both offsets == -1,
1750 * e.g. (foo)*(bar) matching "bar" - see CTags bug 271.
1751 * As POSIX regex doesn't seem to have a way to count matches,
1752 * we return the count up to the last non-empty match. */
1753 if (pmatch [i].rm_so != -1)
1754 count = i + 1;
1755 }
1756 return patbuf->u.callback.function (vStringValue (line), matches, count,
1757 patbuf->u.callback.userData);
1758 }
1759
1760
printMessage(const langType language,const regexPattern * const ptrn,const off_t offset,const char * const line,const regmatch_t * const pmatch)1761 static void printMessage(const langType language,
1762 const regexPattern *const ptrn,
1763 const off_t offset,
1764 const char *const line,
1765 const regmatch_t* const pmatch)
1766 {
1767 vString *msg;
1768
1769 Assert (ptrn);
1770 Assert (ptrn->message.selection > 0);
1771 Assert (ptrn->message.message_string);
1772
1773 msg = substitute (line, ptrn->message.message_string, BACK_REFERENCE_COUNT, pmatch);
1774
1775 error (ptrn->message.selection, "%sMessage from regex<%s>: %s (%s:%lu)",
1776 (ptrn->message.selection == FATAL ? "Fatal: " : ""),
1777 getLanguageName (language),
1778 vStringValue (msg),
1779 getInputFileName (),
1780 getInputLineNumberInRegPType (ptrn->regptype, offset));
1781
1782 vStringDelete (msg);
1783 }
1784
isGuestRequestConsistent(struct guestRequest * guest_req)1785 static bool isGuestRequestConsistent (struct guestRequest *guest_req)
1786 {
1787 return (guest_req->lang != LANG_IGNORE)
1788 && (guest_req->boundary[BOUNDARY_START].offset < guest_req->boundary[BOUNDARY_END].offset);
1789 }
1790
fillGuestRequest(const char * start,const char * current,regmatch_t pmatch[BACK_REFERENCE_COUNT],struct guestSpec * guest_spec,struct guestRequest * guest_req)1791 static bool fillGuestRequest (const char *start,
1792 const char *current,
1793 regmatch_t pmatch [BACK_REFERENCE_COUNT],
1794 struct guestSpec *guest_spec,
1795 struct guestRequest *guest_req)
1796 {
1797 if (guest_spec->lang.type == GUEST_LANG_UNKNOWN)
1798 return false;
1799 else if (guest_spec->lang.type == GUEST_LANG_PLACEHOLDER)
1800 ;
1801 else if (guest_spec->lang.type == GUEST_LANG_STATIC_LANGNAME)
1802 {
1803 guest_req->lang = guest_spec->lang.spec.lang;
1804 guest_req->lang_set = true;
1805 }
1806 else if (guest_spec->lang.type == GUEST_LANG_PTN_GROUP_FOR_LANGNAME)
1807 {
1808 const char * name = current + pmatch [guest_spec->lang.spec.patternGroup].rm_so;
1809 int size = pmatch [guest_spec->lang.spec.patternGroup].rm_eo
1810 - pmatch [guest_spec->lang.spec.patternGroup].rm_so;
1811 if (size > 0)
1812 {
1813 guest_req->lang = getNamedLanguageOrAlias (name, size);
1814 guest_req->lang_set = true;
1815 }
1816 }
1817 else if (guest_spec->lang.type == GUEST_LANG_PTN_GROUP_FOR_FILEMAP)
1818 {
1819 const char * name = current + pmatch [guest_spec->lang.spec.patternGroup].rm_so;
1820 int size = pmatch [guest_spec->lang.spec.patternGroup].rm_eo
1821 - pmatch [guest_spec->lang.spec.patternGroup].rm_so;
1822 char *fname = (size > 0)? eStrndup (name, size): NULL;
1823
1824 if (fname)
1825 {
1826 guest_req->lang = getLanguageForFilename (fname, LANG_AUTO);
1827 guest_req->lang_set = true;
1828 eFree (fname);
1829 }
1830 }
1831
1832 for (int i = 0; i < 2; i++)
1833 {
1834 struct boundarySpec *boundary_spec = guest_spec->boundary + i;
1835 struct boundaryInRequest *boundary = guest_req->boundary + i;
1836 if (!boundary_spec->placeholder)
1837 {
1838 boundary->offset = current - start + (boundary_spec->fromStartOfGroup
1839 ? pmatch [boundary_spec->patternGroup].rm_so
1840 : pmatch [boundary_spec->patternGroup].rm_eo);
1841 boundary->offset_set = true;
1842 }
1843 }
1844 return guestRequestIsFilled (guest_req);
1845 }
1846
matchRegexPattern(struct lregexControlBlock * lcb,const vString * const line,regexTableEntry * entry)1847 static bool matchRegexPattern (struct lregexControlBlock *lcb,
1848 const vString* const line,
1849 regexTableEntry *entry)
1850 {
1851 bool result = false;
1852 regmatch_t pmatch [BACK_REFERENCE_COUNT];
1853 int match;
1854 regexPattern* patbuf = entry->pattern;
1855 struct guestSpec *guest = &patbuf->guest;
1856
1857 if (patbuf->disabled && *(patbuf->disabled))
1858 return false;
1859
1860 match = patbuf->pattern.backend->match (patbuf->pattern.backend,
1861 patbuf->pattern.code, vStringValue (line),
1862 vStringLength (line),
1863 pmatch);
1864
1865 if (match == 0)
1866 {
1867 result = true;
1868 entry->statistics.match++;
1869 scriptWindow window = {
1870 .line = vStringValue (line),
1871 .start = 0,
1872 .patbuf = patbuf,
1873 .pmatch = pmatch,
1874 .nmatch = BACK_REFERENCE_COUNT,
1875 .advanceto = false,
1876 };
1877
1878 if (patbuf->optscript && (! hasNameSlot (patbuf)))
1879 {
1880 scriptSetup (optvm, lcb, CORK_NIL, &window);
1881 EsObject *e = scriptEval (optvm, patbuf->optscript);
1882 if (es_error_p (e))
1883 error (WARNING, "error when evaluating: %s", patbuf->optscript_src);
1884 es_object_unref (e);
1885 scriptTeardown (optvm, lcb);
1886 }
1887
1888 if (hasMessage(patbuf))
1889 printMessage(lcb->owner, patbuf, 0, vStringValue (line), pmatch);
1890
1891 if (patbuf->type == PTRN_TAG)
1892 {
1893 matchTagPattern (lcb, vStringValue (line), patbuf, pmatch, 0,
1894 (patbuf->optscript && hasNameSlot (patbuf))? &window: NULL);
1895
1896 if (guest->lang.type != GUEST_LANG_UNKNOWN)
1897 {
1898 unsigned long ln = getInputLineNumber ();
1899 long current = getInputFileOffsetForLine (ln);
1900 if (fillGuestRequest (vStringValue (line) - current,
1901 vStringValue (line), pmatch, guest, lcb->guest_req))
1902 {
1903 Assert (lcb->guest_req->lang != LANG_AUTO);
1904 if (isGuestRequestConsistent(lcb->guest_req))
1905 guestRequestSubmit (lcb->guest_req);
1906 guestRequestClear (lcb->guest_req);
1907 }
1908 }
1909 }
1910 else if (patbuf->type == PTRN_CALLBACK)
1911 result = matchCallbackPattern (line, patbuf, pmatch);
1912 else
1913 {
1914 Assert ("invalid pattern type" == NULL);
1915 result = false;
1916 }
1917 }
1918 else
1919 entry->statistics.unmatch++;
1920 return result;
1921 }
1922
matchMultilineRegexPattern(struct lregexControlBlock * lcb,const vString * const allLines,regexTableEntry * entry)1923 static bool matchMultilineRegexPattern (struct lregexControlBlock *lcb,
1924 const vString* const allLines,
1925 regexTableEntry *entry)
1926 {
1927 const char *start;
1928 const char *current;
1929 off_t offset = 0;
1930 regexPattern* patbuf = entry->pattern;
1931 struct mGroupSpec *mgroup = &patbuf->mgroup;
1932 struct guestSpec *guest = &patbuf->guest;
1933
1934 bool result = false;
1935 regmatch_t pmatch [BACK_REFERENCE_COUNT];
1936 int match = 0;
1937 unsigned int delta = 1;
1938
1939 Assert (patbuf);
1940
1941 if (patbuf->disabled && *(patbuf->disabled))
1942 return false;
1943
1944 current = start = vStringValue (allLines);
1945 do
1946 {
1947 match = patbuf->pattern.backend->match (patbuf->pattern.backend,
1948 patbuf->pattern.code, current,
1949 vStringLength (allLines) - (current - start),
1950 pmatch);
1951
1952 if (match != 0)
1953 {
1954 entry->statistics.unmatch++;
1955 break;
1956 }
1957
1958 if (hasMessage(patbuf))
1959 printMessage(lcb->owner, patbuf, (current + pmatch[0].rm_so) - start, current, pmatch);
1960
1961 offset = (current + pmatch [mgroup->forLineNumberDetermination].rm_so)
1962 - start;
1963
1964 entry->statistics.match++;
1965 scriptWindow window = {
1966 .line = current,
1967 .start = start,
1968 .patbuf = patbuf,
1969 .pmatch = pmatch,
1970 .nmatch = BACK_REFERENCE_COUNT,
1971 .advanceto = false,
1972 };
1973
1974 if (patbuf->optscript && (! hasNameSlot (patbuf)))
1975 {
1976 scriptSetup (optvm, lcb, CORK_NIL, &window);
1977 EsObject *e = scriptEval (optvm, patbuf->optscript);
1978 if (es_error_p (e))
1979 error (WARNING, "error when evaluating: %s", patbuf->optscript_src);
1980 es_object_unref (e);
1981 scriptTeardown (optvm, lcb);
1982 }
1983
1984 if (patbuf->type == PTRN_TAG)
1985 {
1986 matchTagPattern (lcb, current, patbuf, pmatch, offset,
1987 (patbuf->optscript && hasNameSlot (patbuf))? &window: NULL);
1988 result = true;
1989 }
1990 else if (patbuf->type == PTRN_CALLBACK)
1991 ; /* Not implemented yet */
1992 else
1993 {
1994 Assert ("invalid pattern type" == NULL);
1995 result = false;
1996 break;
1997 }
1998
1999 if (fillGuestRequest (start, current, pmatch, guest, lcb->guest_req))
2000 {
2001 Assert (lcb->guest_req->lang != LANG_AUTO);
2002 if (isGuestRequestConsistent(lcb->guest_req))
2003 guestRequestSubmit (lcb->guest_req);
2004 guestRequestClear (lcb->guest_req);
2005 }
2006
2007 delta = (mgroup->nextFromStart
2008 ? pmatch [mgroup->forNextScanning].rm_so
2009 : pmatch [mgroup->forNextScanning].rm_eo);
2010 if (delta == 0)
2011 {
2012 unsigned int pos = current - start;
2013 error (WARNING,
2014 "a multi line regex pattern doesn't advance the input cursor: %s",
2015 patbuf->pattern_string);
2016 error (WARNING, "Language: %s, input file: %s, pos: %u",
2017 getLanguageName (lcb->owner), getInputFileName(), pos);
2018 break;
2019 }
2020 current += delta;
2021
2022 } while (current < start + vStringLength (allLines));
2023
2024 return result;
2025 }
2026
2027 /* PUBLIC INTERFACE */
2028
2029 /* Match against all patterns for specified language. Returns true if at least
2030 * on pattern matched.
2031 */
matchRegex(struct lregexControlBlock * lcb,const vString * const line)2032 extern bool matchRegex (struct lregexControlBlock *lcb, const vString* const line)
2033 {
2034 bool result = false;
2035 unsigned int i;
2036 for (i = 0 ; i < ptrArrayCount(lcb->entries[REG_PARSER_SINGLE_LINE]) ; ++i)
2037 {
2038 regexTableEntry *entry = ptrArrayItem(lcb->entries[REG_PARSER_SINGLE_LINE], i);
2039 regexPattern *ptrn = entry->pattern;
2040
2041 Assert (ptrn);
2042
2043 if ((ptrn->xtagType != XTAG_UNKNOWN)
2044 && (!isXtagEnabled (ptrn->xtagType)))
2045 continue;
2046
2047 if (matchRegexPattern (lcb, line, entry))
2048 {
2049 result = true;
2050 if (ptrn->exclusive)
2051 break;
2052 }
2053 }
2054 return result;
2055 }
2056
notifyRegexInputStart(struct lregexControlBlock * lcb)2057 extern void notifyRegexInputStart (struct lregexControlBlock *lcb)
2058 {
2059 lcb->currentScope = CORK_NIL;
2060
2061 ptrArrayClear (lcb->tstack);
2062 guestRequestClear (lcb->guest_req);
2063
2064 opt_vm_dstack_push (optvm, lregex_dict);
2065
2066 if (es_null (lcb->local_dict))
2067 lcb->local_dict = opt_dict_new (23);
2068 opt_vm_dstack_push (optvm, lcb->local_dict);
2069 opt_vm_set_app_data (optvm, lcb);
2070 scriptEvalHook (optvm, lcb, SCRIPT_HOOK_PRELUDE);
2071 }
2072
notifyRegexInputEnd(struct lregexControlBlock * lcb)2073 extern void notifyRegexInputEnd (struct lregexControlBlock *lcb)
2074 {
2075 scriptEvalHook (optvm, lcb, SCRIPT_HOOK_SEQUEL);
2076 opt_vm_set_app_data (optvm, NULL);
2077 opt_vm_clear (optvm);
2078 opt_dict_clear (lcb->local_dict);
2079 unsigned long endline = getInputLineNumber ();
2080 fillEndLineFieldOfUpperScopes (lcb, endline);
2081 }
2082
findRegexTagsMainloop(int (* driver)(void))2083 extern void findRegexTagsMainloop (int (* driver)(void))
2084 {
2085 /* merely read all lines of the file */
2086 while (driver () != EOF)
2087 ;
2088 }
2089
fileReadLineDriver(void)2090 static int fileReadLineDriver(void)
2091 {
2092 return (readLineFromInputFile () == NULL)? EOF: 1;
2093 }
2094
findRegexTags(void)2095 extern void findRegexTags (void)
2096 {
2097 findRegexTagsMainloop (fileReadLineDriver);
2098 }
2099
doesExpectCorkInRegex0(ptrArray * entries)2100 static bool doesExpectCorkInRegex0(ptrArray *entries)
2101 {
2102 for (unsigned int i = 0; i < ptrArrayCount(entries); i++)
2103 {
2104 regexTableEntry *entry = ptrArrayItem(entries, i);
2105 Assert (entry && entry->pattern);
2106 if (entry->pattern->scopeActions
2107 || entry->pattern->optscript
2108 )
2109 return true;
2110 }
2111 return false;
2112 }
2113
doesExpectCorkInRegex(struct lregexControlBlock * lcb)2114 extern bool doesExpectCorkInRegex (struct lregexControlBlock *lcb)
2115 {
2116 ptrArray *entries;
2117
2118 entries = lcb->entries[REG_PARSER_SINGLE_LINE];
2119 if (doesExpectCorkInRegex0 (entries))
2120 return true;
2121
2122 entries = lcb->entries[REG_PARSER_MULTI_LINE];
2123 if (doesExpectCorkInRegex0 (entries))
2124 return true;
2125
2126 for (unsigned int i = 0; i < ptrArrayCount(lcb->tables); i++)
2127 {
2128 struct regexTable *table = ptrArrayItem(lcb->tables, i);
2129 if (doesExpectCorkInRegex0 (table->entries))
2130 return true;
2131 }
2132
2133 return false;
2134 }
2135
escapeRegexPattern(const char * pattern)2136 static char *escapeRegexPattern (const char* pattern)
2137 {
2138 vString *p = vStringNew ();
2139
2140 while (*pattern != '\0')
2141 {
2142 char c = *pattern;
2143 if (c == '\n')
2144 vStringCatS(p, "\\n");
2145 else if (c == '\t')
2146 vStringCatS(p, "\\t");
2147 else if (c == '\\')
2148 vStringCatS(p, "\\\\");
2149 else
2150 vStringPut(p, c);
2151
2152 pattern++;
2153 }
2154
2155 return vStringDeleteUnwrap (p);
2156 }
2157
addTagRegexInternal(struct lregexControlBlock * lcb,int table_index,enum regexParserType regptype,const char * const regex,const char * const name,const char * const kinds,const char * const flags,bool * disabled)2158 static regexPattern *addTagRegexInternal (struct lregexControlBlock *lcb,
2159 int table_index,
2160 enum regexParserType regptype,
2161 const char* const regex,
2162 const char* const name,
2163 const char* const kinds,
2164 const char* const flags,
2165 bool *disabled)
2166 {
2167 Assert (regex != NULL);
2168 Assert (name != NULL);
2169
2170 if (!regexAvailable)
2171 return NULL;
2172
2173 regexCompiledCode cp = compileRegex (regptype, regex, flags);
2174 if (cp.code == NULL)
2175 {
2176 error (WARNING, "pattern: %s", regex);
2177 if (table_index != TABLE_INDEX_UNUSED)
2178 {
2179 struct regexTable *table = ptrArrayItem (lcb->tables, table_index);
2180 error (WARNING, "table: %s[%u]", table->name, ptrArrayCount (table->entries));
2181 error (WARNING, "language: %s", getLanguageName (lcb->owner));
2182 }
2183 else
2184 error (WARNING, "language: %s[%u]", getLanguageName (lcb->owner),
2185 ptrArrayCount (lcb->entries[regptype]));
2186 return NULL;
2187 }
2188
2189 char kindLetter;
2190 char* kindName;
2191 char* description;
2192 kindDefinition* fileKind;
2193
2194 bool explictly_defined = parseKinds (kinds, &kindLetter, &kindName, &description);
2195 fileKind = getLanguageKind (lcb->owner, KIND_FILE_INDEX);
2196 if (kindLetter == fileKind->letter)
2197 error (FATAL,
2198 "Kind letter \'%c\' used in regex definition \"%s\" of %s language is reserved in ctags main",
2199 kindLetter,
2200 regex,
2201 getLanguageName (lcb->owner));
2202 else if (!isalpha ((unsigned char)kindLetter))
2203 error (FATAL,
2204 "Kind letter must be an alphabetical character: \"%c\"",
2205 kindLetter);
2206
2207 if (strcmp (kindName, fileKind->name) == 0)
2208 error (FATAL,
2209 "Kind name \"%s\" used in regex definition \"%s\" of %s language is reserved in ctags main",
2210 kindName,
2211 regex,
2212 getLanguageName (lcb->owner));
2213
2214 const char *option_bsae = (regptype == REG_PARSER_SINGLE_LINE? "regex" :
2215 regptype == REG_PARSER_MULTI_LINE ? "mline-regex" :
2216 regptype == REG_PARSER_MULTI_TABLE? "_mtable-regex":
2217 NULL);
2218 Assert (option_bsae);
2219
2220 for (const char * p = kindName; *p; p++)
2221 {
2222 if (p == kindName)
2223 {
2224 if (!isalpha(*p))
2225 error (FATAL,
2226 "A kind name doesn't start with an alphabetical character: "
2227 "'%s' in \"--%s-%s\" option",
2228 kindName,
2229 option_bsae,
2230 getLanguageName (lcb->owner));
2231 }
2232 else
2233 {
2234 /*
2235 * People may object to this error.
2236 * Searching github repositories, I found not a few .ctags files
2237 * in which Exuberant-ctags users define kind names with whitespaces.
2238 * "FATAL" error breaks the compatibility.
2239 */
2240 if (!isalnum(*p))
2241 error (/* regptype == REG_PARSER_SINGLE_LINE? WARNING: */ FATAL,
2242 "Non-alphanumeric char is used in kind name: "
2243 "'%s' in \"--%s-%s\" option",
2244 kindName,
2245 option_bsae,
2246 getLanguageName (lcb->owner));
2247
2248 }
2249 }
2250
2251 regexPattern *rptr = addCompiledTagPattern (lcb, table_index,
2252 regptype, &cp, name,
2253 kindLetter, kindName, description, flags,
2254 explictly_defined,
2255 disabled);
2256 rptr->pattern_string = escapeRegexPattern(regex);
2257
2258 eFree (kindName);
2259 if (description)
2260 eFree (description);
2261
2262 if (*name == '\0')
2263 {
2264 if (rptr->exclusive || rptr->scopeActions & SCOPE_PLACEHOLDER
2265 || rptr->anonymous_tag_prefix
2266 || regptype == REG_PARSER_MULTI_TABLE
2267 || rptr->guest.lang.type != GUEST_LANG_UNKNOWN
2268 || rptr->optscript
2269 )
2270 rptr->accept_empty_name = true;
2271 else
2272 error (WARNING, "%s: regexp missing name pattern", regex);
2273 }
2274
2275 return rptr;
2276 }
2277
addTagRegex(struct lregexControlBlock * lcb,const char * const regex,const char * const name,const char * const kinds,const char * const flags,bool * disabled)2278 extern void addTagRegex (struct lregexControlBlock *lcb,
2279 const char* const regex,
2280 const char* const name,
2281 const char* const kinds,
2282 const char* const flags,
2283 bool *disabled)
2284 {
2285 addTagRegexInternal (lcb, TABLE_INDEX_UNUSED,
2286 REG_PARSER_SINGLE_LINE, regex, name, kinds, flags, disabled);
2287 }
2288
addTagMultiLineRegex(struct lregexControlBlock * lcb,const char * const regex,const char * const name,const char * const kinds,const char * const flags,bool * disabled)2289 extern void addTagMultiLineRegex (struct lregexControlBlock *lcb, const char* const regex,
2290 const char* const name, const char* const kinds, const char* const flags,
2291 bool *disabled)
2292 {
2293 addTagRegexInternal (lcb, TABLE_INDEX_UNUSED,
2294 REG_PARSER_MULTI_LINE, regex, name, kinds, flags, disabled);
2295 }
2296
addTagMultiTableRegex(struct lregexControlBlock * lcb,const char * const table_name,const char * const regex,const char * const name,const char * const kinds,const char * const flags,bool * disabled)2297 extern void addTagMultiTableRegex(struct lregexControlBlock *lcb,
2298 const char* const table_name,
2299 const char* const regex,
2300 const char* const name, const char* const kinds, const char* const flags,
2301 bool *disabled)
2302 {
2303 int table_index = getTableIndexForName (lcb, table_name);
2304
2305 if (table_index < 0)
2306 error (FATAL, "unknown table name: %s", table_name);
2307
2308 addTagRegexInternal (lcb, table_index, REG_PARSER_MULTI_TABLE, regex, name, kinds, flags,
2309 disabled);
2310 }
2311
addCallbackRegex(struct lregexControlBlock * lcb,const char * const regex,const char * const flags,const regexCallback callback,bool * disabled,void * userData)2312 extern void addCallbackRegex (struct lregexControlBlock *lcb,
2313 const char* const regex,
2314 const char* const flags,
2315 const regexCallback callback,
2316 bool *disabled,
2317 void * userData)
2318 {
2319 Assert (regex != NULL);
2320
2321 if (!regexAvailable)
2322 return;
2323
2324
2325 regexCompiledCode cp = compileRegex (REG_PARSER_SINGLE_LINE, regex, flags);
2326 if (cp.code == NULL)
2327 {
2328 error (WARNING, "pattern: %s", regex);
2329 error (WARNING, "language: %s", getLanguageName (lcb->owner));
2330 return;
2331 }
2332
2333 regexPattern *rptr = addCompiledCallbackPattern (lcb, &cp, callback, flags,
2334 disabled, userData);
2335 rptr->pattern_string = escapeRegexPattern(regex);
2336 }
2337
addTagRegexOption(struct lregexControlBlock * lcb,enum regexParserType regptype,const char * const pattern)2338 static void addTagRegexOption (struct lregexControlBlock *lcb,
2339 enum regexParserType regptype,
2340 const char* const pattern)
2341 {
2342 if (!regexAvailable)
2343 return;
2344
2345 int table_index = TABLE_INDEX_UNUSED;
2346 char * regex_pat = NULL;
2347 char *name, *kinds, *flags;
2348
2349
2350 if (regptype == REG_PARSER_MULTI_TABLE)
2351 {
2352 const char *c;
2353 for (c = pattern; *c; c++)
2354 {
2355 if (! (isalnum(*c) || *c == '_'))
2356 {
2357 if (*c && (*(c + 1) != '^'))
2358 {
2359 vString *tmp = vStringNew ();
2360
2361 /* Put '^' as prefix for the pattern */
2362 vStringPut(tmp, *c);
2363 vStringPut(tmp, '^');
2364 vStringCatS(tmp, c + 1);
2365 regex_pat = vStringDeleteUnwrap(tmp);
2366 }
2367 else
2368 regex_pat = eStrdup (c);
2369 break;
2370 }
2371 }
2372
2373 if (regex_pat == NULL || *regex_pat == '\0')
2374 error (FATAL, "wrong mtable pattern specification: %s", pattern);
2375
2376 char *table_name = eStrndup(pattern, c - pattern);
2377 table_index = getTableIndexForName (lcb, table_name);
2378 if (table_index < 0)
2379 error (FATAL, "unknown table name: %s (in %s)", table_name, pattern);
2380 eFree(table_name);
2381 }
2382 else
2383 regex_pat = eStrdup (pattern);
2384
2385 if (parseTagRegex (regptype, regex_pat, &name, &kinds, &flags))
2386 addTagRegexInternal (lcb, table_index, regptype, regex_pat, name, kinds, flags,
2387 NULL);
2388
2389 eFree (regex_pat);
2390 }
2391
processTagRegexOption(struct lregexControlBlock * lcb,enum regexParserType regptype,const char * const parameter)2392 extern void processTagRegexOption (struct lregexControlBlock *lcb,
2393 enum regexParserType regptype,
2394 const char* const parameter)
2395 {
2396 if (parameter == NULL || parameter [0] == '\0')
2397 clearPatternSet (lcb);
2398 else if (parameter [0] != '@')
2399 addTagRegexOption (lcb, regptype, parameter);
2400 else if (! doesFileExist (parameter + 1))
2401 error (WARNING, "cannot open regex file");
2402 else
2403 {
2404 const char* regexfile = parameter + 1;
2405
2406 verbose ("open a regex file: %s\n", regexfile);
2407 MIO* const mio = mio_new_file (regexfile, "r");
2408 if (mio == NULL)
2409 error (WARNING | PERROR, "%s", regexfile);
2410 else
2411 {
2412 vString* const regex = vStringNew ();
2413 while (readLineRaw (regex, mio))
2414 {
2415 if (vStringLength (regex) > 1 && vStringValue (regex)[0] != '\n')
2416 addTagRegexOption (lcb, regptype, vStringValue (regex));
2417 }
2418 mio_unref (mio);
2419 vStringDelete (regex);
2420 }
2421 }
2422 }
2423
2424 /*
2425 * Regex option parsing
2426 */
2427
printRegexFlags(bool withListHeader,bool machinable,const char * flags,FILE * fp)2428 extern void printRegexFlags (bool withListHeader, bool machinable, const char *flags, FILE *fp)
2429 {
2430 struct colprintTable * table = flagsColprintTableNew ();
2431
2432 if (flags && *flags != '\0')
2433 {
2434 /* Print backend specific flags.
2435 * This code is just stub because there is no backend having a specific flag.
2436 * The help message for this option is not updated. */
2437 struct flagDefsDescriptor desc = choose_backend (flags, REG_PARSER_SINGLE_LINE, true);
2438 flagsColprintAddDefinitions (table, desc.backend->fdefs, desc.backend->fdef_count);
2439 }
2440 else
2441 {
2442 flagsColprintAddDefinitions (table, backendFlagDefs, ARRAY_SIZE(backendFlagDefs));
2443 flagsColprintAddDefinitions (table, backendCommonRegexFlagDefs, ARRAY_SIZE(backendCommonRegexFlagDefs));
2444 flagsColprintAddDefinitions (table, prePtrnFlagDef, ARRAY_SIZE (prePtrnFlagDef));
2445 flagsColprintAddDefinitions (table, guestPtrnFlagDef, ARRAY_SIZE (guestPtrnFlagDef));
2446 flagsColprintAddDefinitions (table, scopePtrnFlagDef, ARRAY_SIZE (scopePtrnFlagDef));
2447 flagsColprintAddDefinitions (table, commonSpecFlagDef, ARRAY_SIZE (commonSpecFlagDef));
2448 }
2449
2450 flagsColprintTablePrint (table, withListHeader, machinable, fp);
2451 colprintTableDelete(table);
2452 }
2453
printMultilineRegexFlags(bool withListHeader,bool machinable,const char * flags,FILE * fp)2454 extern void printMultilineRegexFlags (bool withListHeader, bool machinable, const char *flags, FILE *fp)
2455 {
2456 struct colprintTable * table = flagsColprintTableNew ();
2457
2458 if (flags && *flags != '\0')
2459 {
2460 /* Print backend specific flags.
2461 * This code is just stub because there is no backend having a specific flag.
2462 * The help message for this option is not updated. */
2463 struct flagDefsDescriptor desc = choose_backend (flags, REG_PARSER_MULTI_LINE, true);
2464 flagsColprintAddDefinitions (table, desc.backend->fdefs, desc.backend->fdef_count);
2465 }
2466 else
2467 {
2468 flagsColprintAddDefinitions (table, backendFlagDefs, ARRAY_SIZE(backendFlagDefs));
2469 flagsColprintAddDefinitions (table, backendCommonRegexFlagDefs, ARRAY_SIZE(backendCommonRegexFlagDefs));
2470 flagsColprintAddDefinitions (table, multilinePtrnFlagDef, ARRAY_SIZE (multilinePtrnFlagDef));
2471 flagsColprintAddDefinitions (table, guestPtrnFlagDef, ARRAY_SIZE (guestPtrnFlagDef));
2472 flagsColprintAddDefinitions (table, commonSpecFlagDef, ARRAY_SIZE (commonSpecFlagDef));
2473 }
2474
2475 flagsColprintTablePrint (table, withListHeader, machinable, fp);
2476 colprintTableDelete(table);
2477 }
2478
printMultitableRegexFlags(bool withListHeader,bool machinable,const char * flags,FILE * fp)2479 extern void printMultitableRegexFlags (bool withListHeader, bool machinable, const char *flags, FILE *fp)
2480 {
2481 struct colprintTable * table = flagsColprintTableNew ();
2482
2483 if (flags && *flags != '\0')
2484 {
2485 /* Print backend specific flags.
2486 * This code is just stub because there is no backend having a specific flag.
2487 * The help message for this option is not updated. */
2488 struct flagDefsDescriptor desc = choose_backend (flags, REG_PARSER_MULTI_TABLE, true);
2489 flagsColprintAddDefinitions (table, desc.backend->fdefs, desc.backend->fdef_count);
2490 }
2491 else
2492 {
2493 flagsColprintAddDefinitions (table, backendFlagDefs, ARRAY_SIZE(backendFlagDefs));
2494 flagsColprintAddDefinitions (table, backendCommonRegexFlagDefs, ARRAY_SIZE(backendCommonRegexFlagDefs));
2495 flagsColprintAddDefinitions (table, multilinePtrnFlagDef, ARRAY_SIZE (multilinePtrnFlagDef));
2496 flagsColprintAddDefinitions (table, multitablePtrnFlagDef, ARRAY_SIZE (multitablePtrnFlagDef));
2497 flagsColprintAddDefinitions (table, guestPtrnFlagDef, ARRAY_SIZE (guestPtrnFlagDef));
2498 flagsColprintAddDefinitions (table, scopePtrnFlagDef, ARRAY_SIZE (scopePtrnFlagDef));
2499 flagsColprintAddDefinitions (table, commonSpecFlagDef, ARRAY_SIZE (commonSpecFlagDef));
2500 }
2501
2502 flagsColprintTablePrint (table, withListHeader, machinable, fp);
2503 colprintTableDelete(table);
2504 }
2505
freeRegexResources(void)2506 extern void freeRegexResources (void)
2507 {
2508 es_object_unref (lregex_dict);
2509 opt_vm_delete (optvm);
2510 }
2511
regexNeedsMultilineBuffer(struct lregexControlBlock * lcb)2512 extern bool regexNeedsMultilineBuffer (struct lregexControlBlock *lcb)
2513 {
2514 if (ptrArrayCount(lcb->entries [REG_PARSER_MULTI_LINE]) > 0)
2515 return true;
2516 else if (ptrArrayCount(lcb->tables) > 0)
2517 return true;
2518 else
2519 return false;
2520 }
2521
matchMultilineRegex(struct lregexControlBlock * lcb,const vString * const allLines)2522 extern bool matchMultilineRegex (struct lregexControlBlock *lcb, const vString* const allLines)
2523 {
2524 bool result = false;
2525
2526 unsigned int i;
2527
2528 for (i = 0; i < ptrArrayCount(lcb->entries [REG_PARSER_MULTI_LINE]); ++i)
2529 {
2530 regexTableEntry *entry = ptrArrayItem(lcb->entries [REG_PARSER_MULTI_LINE], i);
2531 Assert (entry && entry->pattern);
2532
2533 if ((entry->pattern->xtagType != XTAG_UNKNOWN)
2534 && (!isXtagEnabled (entry->pattern->xtagType)))
2535 continue;
2536
2537 result = matchMultilineRegexPattern (lcb, allLines, entry) || result;
2538 }
2539 return result;
2540 }
2541
getTableIndexForName(const struct lregexControlBlock * const lcb,const char * name)2542 static int getTableIndexForName (const struct lregexControlBlock *const lcb, const char *name)
2543 {
2544 unsigned int i;
2545
2546 for (i = 0; i < ptrArrayCount(lcb->tables); i++)
2547 {
2548 struct regexTable *table = ptrArrayItem(lcb->tables, i);
2549 if (strcmp (table->name, name) == 0)
2550 return (int)i;
2551 }
2552
2553 return TABLE_INDEX_UNUSED;
2554 }
2555
addRegexTable(struct lregexControlBlock * lcb,const char * name)2556 extern void addRegexTable (struct lregexControlBlock *lcb, const char *name)
2557 {
2558 const char *c;
2559 for (c = name; *c; c++)
2560 if (! (isalnum(*c) || *c == '_'))
2561 error (FATAL, "`%c' in \"%s\" is not acceptable as part of table name", *c, name);
2562
2563 if (getTableIndexForName(lcb, name) >= 0)
2564 {
2565 error (WARNING, "regex table \"%s\" is already defined", name);
2566 return;
2567 }
2568
2569 struct regexTable *table = xCalloc(1, struct regexTable);
2570 table->name = eStrdup (name);
2571 table->entries = ptrArrayNew(deleteTableEntry);
2572
2573 ptrArrayAdd (lcb->tables, table);
2574 }
2575
dumpSstack(FILE * fp,int scope)2576 static void dumpSstack(FILE* fp, int scope)
2577 {
2578 tagEntryInfo *entry;
2579 fprintf (fp, "scope : ");
2580 while ((entry = getEntryInCorkQueue (scope)))
2581 {
2582 fprintf(fp, "%s", entry->name);
2583
2584 scope = entry->extensionFields.scopeIndex;
2585 if (scope != CORK_NIL)
2586 fprintf(fp, "%c", '/');
2587 }
2588 fprintf (fp, "\n");
2589 }
2590
dumpTstack(FILE * fp,ptrArray * tstack)2591 static void dumpTstack(FILE* fp, ptrArray *tstack)
2592 {
2593 for (unsigned int i = ptrArrayCount(tstack); i > 0; i--)
2594 {
2595 char tmp[2];
2596 struct regexTable *t = ptrArrayItem(tstack, i - 1);
2597 if (i == 1)
2598 tmp[0] = '\0';
2599 else
2600 {
2601 tmp[0] = '/';
2602 tmp[1] = '\0';
2603 }
2604 fprintf(fp, "%s%s", t->name, tmp);
2605 }
2606 fprintf(fp, "\n");
2607 }
2608
printInputLine(FILE * vfp,const char * c,const off_t offset)2609 static void printInputLine(FILE* vfp, const char *c, const off_t offset)
2610 {
2611 vString *v = vStringNew ();
2612
2613 for (; *c && (*c != '\n'); c++)
2614 vStringPut(v, *c);
2615
2616 if (vStringLength (v) == 0 && *c == '\n')
2617 vStringCatS (v, "\\n");
2618
2619 fprintf (vfp, "\ninput : \"%s\" L%lu\n",
2620 vStringValue (v),
2621 getInputLineNumberForFileOffset(offset));
2622 vStringDelete(v);
2623 }
2624
printMultitableMessage(const langType language,const char * const tableName,const unsigned int index,const regexPattern * const ptrn,const off_t offset,const char * const current,const regmatch_t * const pmatch)2625 static void printMultitableMessage(const langType language,
2626 const char *const tableName,
2627 const unsigned int index,
2628 const regexPattern *const ptrn,
2629 const off_t offset,
2630 const char *const current,
2631 const regmatch_t* const pmatch)
2632 {
2633 vString *msg;
2634
2635 Assert (ptrn);
2636 Assert (ptrn->message.selection > 0);
2637 Assert (ptrn->message.message_string);
2638
2639 msg = substitute (current, ptrn->message.message_string, BACK_REFERENCE_COUNT, pmatch);
2640
2641 error (ptrn->message.selection, "%sMessage from mtable<%s/%s[%2u]>: %s (%s:%lu)",
2642 (ptrn->message.selection == FATAL ? "Fatal: " : ""),
2643 getLanguageName (language),
2644 tableName,
2645 index,
2646 vStringValue (msg),
2647 getInputFileName (),
2648 getInputLineNumberForFileOffset (offset));
2649
2650 vStringDelete (msg);
2651 }
2652
matchMultitableRegexTable(struct lregexControlBlock * lcb,struct regexTable * table,const vString * const start,unsigned int * offset)2653 static struct regexTable * matchMultitableRegexTable (struct lregexControlBlock *lcb,
2654 struct regexTable *table, const vString *const start, unsigned int *offset)
2655 {
2656 struct regexTable *next = NULL;
2657 const char *current;
2658 regmatch_t pmatch [BACK_REFERENCE_COUNT];
2659 const char *cstart = vStringValue(start);
2660 unsigned int delta;
2661
2662
2663 restart:
2664 current = cstart + *offset;
2665
2666 /* Accept the case *offset == vStringLength(start)
2667 because we want an empty regex // still matches empty input. */
2668 if (*offset > vStringLength(start))
2669 {
2670 *offset = vStringLength(start);
2671 goto out;
2672 }
2673
2674 BEGIN_VERBOSE(vfp);
2675 {
2676 printInputLine(vfp, current, *offset);
2677 }
2678 END_VERBOSE();
2679
2680 for (unsigned int i = 0; i < ptrArrayCount(table->entries); i++)
2681 {
2682 regexTableEntry *entry = ptrArrayItem(table->entries, i);
2683 if ((entry->pattern->xtagType != XTAG_UNKNOWN)
2684 && (!isXtagEnabled (entry->pattern->xtagType)))
2685 continue;
2686
2687 regexPattern *ptrn = entry->pattern;
2688 struct guestSpec *guest = &ptrn->guest;
2689
2690 Assert (ptrn);
2691
2692 BEGIN_VERBOSE(vfp);
2693 {
2694 char s[3];
2695 if (*current == '\n')
2696 {
2697 s [0] = '\\';
2698 s [1] = 'n';
2699 s [2] = '\0';
2700 }
2701 else if (*current == '\t')
2702 {
2703 s [0] = '\\';
2704 s [1] = 't';
2705 s [2] = '\0';
2706 }
2707 else if (*current == '\\')
2708 {
2709 s [0] = '\\';
2710 s [1] = '\\';
2711 s [2] = '\0';
2712 }
2713 else
2714 {
2715 s[0] = *current;
2716 s[1] = '\0';
2717 }
2718
2719 if (s[1] == '\0')
2720 fprintf (vfp, "match : '%s' %15s[%2u] /", s, table->name, i);
2721 else if (s[0] == '\0')
2722 fprintf (vfp, "match : '' %15s[%2u] /", table->name, i);
2723 else
2724 fprintf (vfp, "match :'%s' %15s[%2u] / ", s, table->name, i);
2725 fprintf (vfp, "%s/\n", ptrn->pattern_string);
2726 }
2727 END_VERBOSE();
2728
2729 int match = 0;
2730
2731 if (ptrn->disabled && *(ptrn->disabled))
2732 continue;
2733
2734 match = ptrn->pattern.backend->match (ptrn->pattern.backend,
2735 ptrn->pattern.code, current,
2736 vStringLength(start) - (current - cstart),
2737 pmatch);
2738 if (match == 0)
2739 {
2740 entry->statistics.match++;
2741 off_t offset_for_tag = (current
2742 + pmatch [ptrn->mgroup.forLineNumberDetermination].rm_so)
2743 - cstart;
2744 scriptWindow window = {
2745 .line = current,
2746 .start = cstart,
2747 .patbuf = ptrn,
2748 .pmatch = pmatch,
2749 .nmatch = BACK_REFERENCE_COUNT,
2750 .advanceto = false,
2751 };
2752 initTaction (&window.taction);
2753
2754 if (ptrn->optscript && (! hasNameSlot (ptrn)))
2755 {
2756 scriptSetup (optvm, lcb, CORK_NIL, &window);
2757 EsObject *e = scriptEval (optvm, ptrn->optscript);
2758 if (es_error_p (e))
2759 error (WARNING, "error when evaluating: %s", ptrn->optscript_src);
2760 es_object_unref (e);
2761 scriptTeardown (optvm, lcb);
2762 }
2763
2764 if (ptrn->type == PTRN_TAG)
2765 {
2766 matchTagPattern (lcb, current, ptrn, pmatch, offset_for_tag,
2767 (ptrn->optscript && hasNameSlot (ptrn))? &window: NULL);
2768
2769 struct mTableActionSpec *taction = (window.taction.action == TACTION_NOP)
2770 ? &(ptrn->taction)
2771 : &window.taction;
2772
2773 BEGIN_VERBOSE(vfp);
2774 {
2775 fprintf(vfp, "result: matched %d bytes\n", (int)(pmatch[0].rm_eo));
2776 dumpSstack (vfp, lcb->currentScope);
2777 }
2778 END_VERBOSE();
2779
2780 if (hasMessage(ptrn))
2781 printMultitableMessage (lcb->owner, table->name, i, ptrn,
2782 *offset, current, pmatch);
2783
2784 if (fillGuestRequest (cstart, current, pmatch, guest, lcb->guest_req))
2785 {
2786 Assert (lcb->guest_req->lang != LANG_AUTO);
2787 if (isGuestRequestConsistent(lcb->guest_req))
2788 guestRequestSubmit (lcb->guest_req);
2789 guestRequestClear (lcb->guest_req);
2790 }
2791
2792 if (window.advanceto)
2793 delta = window.advanceto_delta;
2794 else
2795 delta = (ptrn->mgroup.nextFromStart
2796 ? pmatch [ptrn->mgroup.forNextScanning].rm_so
2797 : pmatch [ptrn->mgroup.forNextScanning].rm_eo);
2798 *offset += delta;
2799
2800 switch (taction->action)
2801 {
2802 case TACTION_NOP:
2803 BEGIN_VERBOSE(vfp);
2804 {
2805 fprintf(vfp, "action: NOP in {%s}, stack: /", table->name);
2806 dumpTstack(vfp, lcb->tstack);
2807 }
2808 END_VERBOSE();
2809 break;
2810 case TACTION_ENTER:
2811 /* TODO: Limit the depth of tstack. */
2812 ptrArrayAdd (lcb->tstack,
2813 taction->continuation_table
2814 ? taction->continuation_table
2815 : table);
2816 next = taction->table;
2817 BEGIN_VERBOSE(vfp);
2818 {
2819 if (taction->continuation_table)
2820 fprintf(vfp, "action: [enter] to {%s}, cont: {%s}, stack: /",
2821 next->name,
2822 taction->continuation_table->name);
2823 else
2824 fprintf(vfp, "action: [enter] to {%s}, stack: /", next->name);
2825 dumpTstack(vfp, lcb->tstack);
2826 }
2827 END_VERBOSE();
2828 break;
2829 case TACTION_LEAVE:
2830 BEGIN_VERBOSE(vfp);
2831 {
2832 fprintf(vfp, "action: [leave] from {%s}, stack: /", table->name);
2833 dumpTstack(vfp, lcb->tstack);
2834 }
2835 END_VERBOSE();
2836 if (ptrArrayCount (lcb->tstack) == 0)
2837 {
2838 error (WARNING, "leave is specified as regex table action but the table stack is empty");
2839 return NULL;
2840 }
2841 next = ptrArrayLast(lcb->tstack);
2842 ptrArrayRemoveLast (lcb->tstack);
2843 break;
2844 case TACTION_JUMP:
2845 next = taction->table;
2846 BEGIN_VERBOSE(vfp);
2847 {
2848 fprintf(vfp, "action: [jump] from {%s} to {%s}, stack: /", table->name, next->name);
2849 dumpTstack(vfp, lcb->tstack);
2850 }
2851 END_VERBOSE();
2852
2853 break;
2854 case TACTION_RESET:
2855 next = taction->table;
2856 BEGIN_VERBOSE(vfp);
2857 {
2858 fprintf(vfp, "action: [reset] to {%s}, stack: /", next->name);
2859 }
2860 END_VERBOSE();
2861
2862 ptrArrayClear (lcb->tstack);
2863 break;
2864 case TACTION_QUIT:
2865 BEGIN_VERBOSE(vfp);
2866 {
2867 fprintf(vfp, "action: [quit], stack: /");
2868 dumpTstack(vfp, lcb->tstack);
2869 }
2870 END_VERBOSE();
2871 return NULL;
2872 }
2873
2874 if (next)
2875 break;
2876
2877 if (delta == 0)
2878 {
2879 error (WARNING, "Forcefully advance the input pos because");
2880 error (WARNING, "following conditions for entering infinite loop are satisfied:");
2881 error (WARNING, "+ matching the pattern succeeds,");
2882 error (WARNING, "+ the next table is not given, and");
2883 error (WARNING, "+ the input file pos doesn't advance.");
2884 error (WARNING, "Language: %s, input file: %s, pos: %u",
2885 getLanguageName (lcb->owner), getInputFileName(), *offset);
2886 ++*offset;
2887 }
2888 }
2889 else if (ptrn->type == PTRN_CALLBACK)
2890 ; /* Not implemented yet */
2891 else
2892 {
2893 Assert ("invalid pattern type" == NULL);
2894 break;
2895 }
2896 goto restart;
2897 }
2898 else
2899 entry->statistics.unmatch++;
2900 }
2901 out:
2902 if (next == NULL && ptrArrayCount (lcb->tstack) > 0)
2903 {
2904 static int apop_count = 0;
2905 next = ptrArrayLast(lcb->tstack);
2906 verbose("result: no match - autopop<%d> from {%s} to {%s} @ %lu\n", apop_count++, table->name, next->name,
2907 getInputLineNumberForFileOffset(*offset));
2908 ptrArrayRemoveLast (lcb->tstack);
2909 }
2910 return next;
2911 }
2912
extendRegexTable(struct lregexControlBlock * lcb,const char * src,const char * dist)2913 extern void extendRegexTable (struct lregexControlBlock *lcb, const char *src, const char *dist)
2914 {
2915
2916 int i;
2917 struct regexTable * src_table;
2918 struct regexTable * dist_table;
2919
2920 verbose ("extend regex table \"%s\" with \"%s\"\n", dist, src);
2921
2922 i = getTableIndexForName (lcb, src);
2923 if (i < 0)
2924 error (FATAL, "no such regex table in %s: %s", getLanguageName(lcb->owner), src);
2925 src_table = ptrArrayItem(lcb->tables, i);
2926
2927 i = getTableIndexForName (lcb, dist);
2928 if (i < 0)
2929 error (FATAL, "no such regex table in %s: %s", getLanguageName(lcb->owner), dist);
2930 dist_table = ptrArrayItem(lcb->tables, i);
2931
2932 for (i = 0; i < (int)ptrArrayCount(src_table->entries); i++)
2933 {
2934 regexTableEntry *entry = ptrArrayItem (src_table->entries, i);
2935 ptrArrayAdd(dist_table->entries, newRefPatternEntry(entry));
2936 }
2937 }
2938
printMultitableStatistics(struct lregexControlBlock * lcb)2939 extern void printMultitableStatistics (struct lregexControlBlock *lcb)
2940 {
2941 if (ptrArrayCount(lcb->tables) == 0)
2942 return;
2943
2944 fprintf(stderr, "\nMTABLE REGEX STATISTICS of %s\n", getLanguageName (lcb->owner));
2945 fputs("==============================================\n", stderr);
2946 for (unsigned int i = 0; i < ptrArrayCount(lcb->tables); i++)
2947 {
2948 struct regexTable *table = ptrArrayItem (lcb->tables, i);
2949 fprintf(stderr, "%s\n", table->name);
2950 fputs("-----------------------\n", stderr);
2951 for (unsigned int j = 0; j < ptrArrayCount(table->entries); j++)
2952 {
2953 regexTableEntry *entry = ptrArrayItem (table->entries, j);
2954 Assert (entry && entry->pattern);
2955 fprintf(stderr, "%10u/%-10u%-40s ref: %d\n",
2956 entry->statistics.match,
2957 entry->statistics.unmatch + entry->statistics.match,
2958 entry->pattern->pattern_string,
2959 entry->pattern->refcount);
2960 }
2961 fputc('\n', stderr);
2962 }
2963 }
2964
matchMultitableRegex(struct lregexControlBlock * lcb,const vString * const allLines)2965 extern bool matchMultitableRegex (struct lregexControlBlock *lcb, const vString* const allLines)
2966 {
2967 if (ptrArrayCount (lcb->tables) == 0)
2968 return false;
2969
2970 struct regexTable *table = ptrArrayItem (lcb->tables, 0);
2971 unsigned int offset = 0;
2972
2973 int motionless_counter = 0;
2974 unsigned int last_offset;
2975
2976
2977 while (table)
2978 {
2979 last_offset = offset;
2980 table = matchMultitableRegexTable(lcb, table, allLines, &offset);
2981
2982 if (last_offset == offset)
2983 motionless_counter++;
2984 else
2985 motionless_counter = 0;
2986
2987 if (motionless_counter > MTABLE_MOTIONLESS_MAX)
2988 {
2989 error (WARNING, "mtable<%s/%s>: the input cursor stays at %u in %s so long though the tables are switched",
2990 getLanguageName (lcb->owner),
2991 table->name, offset, getInputFileName ());
2992 break;
2993 }
2994
2995 if (table && (ptrArrayCount (lcb->tstack) > MTABLE_STACK_MAX_DEPTH))
2996 {
2997 unsigned int i;
2998 struct regexTable *t;
2999
3000 error (WARNING, "mtable<%s/%s>: the tenter/tleave stack overflows at %u in %s",
3001 getLanguageName (lcb->owner),
3002 table->name, offset, getInputFileName ());
3003 error (WARNING, "DUMP FROM THE TOP:");
3004 /* TODO: use dumpTstack */
3005 for (i = ptrArrayCount(lcb->tstack); 0 < i; --i)
3006 {
3007 t = ptrArrayItem (lcb->tstack, i - 1);
3008 error (WARNING, "%3u %s", i - 1, t->name);
3009 }
3010
3011 break;
3012 }
3013 }
3014
3015 return true;
3016 }
3017
makePromiseForAreaSpecifiedWithOffsets(const char * parser,off_t startOffset,off_t endOffset)3018 static int makePromiseForAreaSpecifiedWithOffsets (const char *parser,
3019 off_t startOffset,
3020 off_t endOffset)
3021 {
3022 unsigned long startLine = getInputLineNumberForFileOffset(startOffset);
3023 unsigned long endLine = getInputLineNumberForFileOffset(endOffset);
3024 unsigned long startLineOffset = getInputFileOffsetForLine (startLine);
3025 unsigned long endLineOffset = getInputFileOffsetForLine (endLine);
3026
3027 Assert(startOffset >= startLineOffset);
3028 Assert(endOffset >= endLineOffset);
3029
3030 return makePromise (parser,
3031 startLine, startOffset - startLineOffset,
3032 endLine, endOffset - endLineOffset,
3033 startOffset - startLineOffset);
3034 }
3035
guestRequestNew(void)3036 static struct guestRequest *guestRequestNew (void)
3037 {
3038 struct guestRequest *r = xMalloc (1, struct guestRequest);
3039
3040
3041 guestRequestClear (r);
3042 return r;
3043 }
3044
guestRequestDelete(struct guestRequest * r)3045 static void guestRequestDelete (struct guestRequest *r)
3046 {
3047 eFree (r);
3048 }
3049
guestRequestIsFilled(struct guestRequest * r)3050 static bool guestRequestIsFilled(struct guestRequest *r)
3051 {
3052 return (r->lang_set && (r->boundary + 0)->offset_set && (r->boundary + 1)->offset_set);
3053 }
3054
guestRequestClear(struct guestRequest * r)3055 static void guestRequestClear (struct guestRequest *r)
3056 {
3057 r->lang_set = false;
3058 r->boundary[BOUNDARY_START].offset_set = false;
3059 r->boundary[BOUNDARY_END].offset_set = false;
3060 }
3061
guestRequestSubmit(struct guestRequest * r)3062 static void guestRequestSubmit (struct guestRequest *r)
3063 {
3064 const char *langName = getLanguageName (r->lang);
3065 verbose ("guestRequestSubmit: %s; "
3066 "range: %"PRId64" - %"PRId64"\n",
3067 langName,
3068 (int64_t)r->boundary[BOUNDARY_START].offset,
3069 (int64_t)r->boundary[BOUNDARY_END].offset);
3070 makePromiseForAreaSpecifiedWithOffsets (langName,
3071 r->boundary[BOUNDARY_START].offset,
3072 r->boundary[BOUNDARY_END].offset);
3073 }
3074
3075 /*
3076 * Script related functions
3077 */
3078
3079 /* This functions expects { code }} as input.
3080 * Be care that curly brackets must be unbalanced.
3081 */
scriptRead(OptVM * vm,const char * src)3082 static EsObject *scriptRead (OptVM *vm, const char *src)
3083 {
3084 size_t len = strlen (src);
3085 Assert (len > 2);
3086 Assert (src[len - 1] == '}');
3087 Assert (src[len - 2] == '}');
3088
3089 EsObject *obj = optscriptRead (vm, src + 1, len - 1 - 1);
3090 if (es_error_p (obj))
3091 error (FATAL, "failed in loading an optscript: %s", src);
3092 return obj;
3093 }
3094
scriptEval(OptVM * vm,EsObject * optscript)3095 extern EsObject* scriptEval (OptVM *vm, EsObject *optscript)
3096 {
3097 return optscriptEval (vm, optscript);
3098 }
3099
scriptEvalHook(OptVM * vm,struct lregexControlBlock * lcb,enum scriptHook hook)3100 static void scriptEvalHook (OptVM *vm, struct lregexControlBlock *lcb, enum scriptHook hook)
3101 {
3102 if (ptrArrayCount (lcb->hook_code[hook]) == 0)
3103 {
3104 for (int i = 0; i < ptrArrayCount (lcb->hook[hook]); i++)
3105 {
3106 const char *src = ptrArrayItem (lcb->hook[hook], i);
3107 EsObject *code = scriptRead (vm, src);
3108 if (es_error_p (code))
3109 error (FATAL, "error when reading hook[%d] code: %s", hook, src);
3110 ptrArrayAdd (lcb->hook_code[hook], es_object_ref (code));
3111 es_object_unref (code);
3112 }
3113 }
3114 for (int i = 0; i < ptrArrayCount (lcb->hook_code[hook]); i++)
3115 {
3116 EsObject *code = ptrArrayItem (lcb->hook_code[hook], i);
3117 EsObject * e = optscriptEval (vm, code);
3118 if (es_error_p (e))
3119 error (WARNING, "error when evaluating hook[%d] code: %s",
3120 hook, (char *)ptrArrayItem (lcb->hook[i], i));
3121 }
3122 }
3123
scriptSetup(OptVM * vm,struct lregexControlBlock * lcb,int corkIndex,scriptWindow * window)3124 static void scriptSetup (OptVM *vm, struct lregexControlBlock *lcb, int corkIndex, scriptWindow *window)
3125 {
3126 lcb->window = window;
3127 optscriptSetup (vm, lcb->local_dict, corkIndex);
3128 }
3129
scriptTeardown(OptVM * vm,struct lregexControlBlock * lcb)3130 static void scriptTeardown (OptVM *vm, struct lregexControlBlock *lcb)
3131 {
3132 optscriptTeardown (vm, lcb->local_dict);
3133 lcb->window = NULL;
3134 }
3135
addOptscriptToHook(struct lregexControlBlock * lcb,enum scriptHook hook,const char * code)3136 extern void addOptscriptToHook (struct lregexControlBlock *lcb, enum scriptHook hook, const char *code)
3137 {
3138 ptrArrayAdd (lcb->hook[hook], eStrdup (code));
3139 }
3140
3141 /* Return true if available. */
checkRegex(void)3142 extern bool checkRegex (void)
3143 {
3144 #if defined (CHECK_REGCOMP)
3145 {
3146 /* Check for broken regcomp() on Cygwin */
3147 regex_t patbuf;
3148 int errcode;
3149 if (regcomp (&patbuf, "/hello/", 0) != 0)
3150 error (WARNING, "Disabling broken regex");
3151 else
3152 regexAvailable = true;
3153 }
3154 #else
3155 /* We are using bundled regex engine. */
3156 regexAvailable = true;
3157 #endif
3158
3159 return regexAvailable;
3160 }
3161
3162 static EsObject *OPTSCRIPT_ERR_UNKNOWNKIND;
3163
3164 /* name:str kind:name loc _TAG tag
3165 * name:str kind:name _TAG tag */
lrop_make_tag(OptVM * vm,EsObject * name)3166 static EsObject* lrop_make_tag (OptVM *vm, EsObject *name)
3167 {
3168 matchLoc *loc;
3169
3170 if (opt_vm_ostack_count (vm) < 1)
3171 return OPT_ERR_UNDERFLOW;
3172
3173 int index;
3174 EsObject *top = opt_vm_ostack_top (vm);
3175 if (es_object_get_type (top) == OPT_TYPE_MATCHLOC)
3176 {
3177 if (opt_vm_ostack_count (vm) < 3)
3178 return OPT_ERR_UNDERFLOW;
3179 loc = es_pointer_get (top);
3180 index = 1;
3181 }
3182 else
3183 {
3184 struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3185 if (lcb->window->patbuf->regptype != REG_PARSER_SINGLE_LINE)
3186 return OPT_ERR_TYPECHECK;
3187 if (opt_vm_ostack_count (vm) < 2)
3188 return OPT_ERR_UNDERFLOW;
3189 loc = NULL;
3190 index = 0;
3191 }
3192
3193 EsObject *kind = opt_vm_ostack_peek (vm, index++);
3194 if (es_object_get_type (kind) != OPT_TYPE_NAME)
3195 return OPT_ERR_TYPECHECK;
3196 EsObject *kind_sym = es_pointer_get (kind);
3197 const char *kind_str = es_symbol_get (kind_sym);
3198 kindDefinition* kind_def = getLanguageKindForName (getInputLanguage (),
3199 kind_str);
3200 if (!kind_def)
3201 return OPTSCRIPT_ERR_UNKNOWNKIND;
3202 int kind_index = kind_def->id;
3203
3204 EsObject *tname = opt_vm_ostack_peek (vm, index++);
3205 if (es_object_get_type (tname) != OPT_TYPE_STRING)
3206 return OPT_ERR_TYPECHECK;
3207 const char *n = opt_string_get_cstr (tname);
3208 if (n [0] == '\0')
3209 return OPT_ERR_RANGECHECK; /* TODO */
3210
3211 tagEntryInfo *e = xMalloc (1, tagEntryInfo);
3212 initRegexTag (e, eStrdup (n),
3213 kind_index, ROLE_DEFINITION_INDEX, CORK_NIL, 0,
3214 loc? loc->line: 0, loc? &loc->pos: NULL, XTAG_UNKNOWN);
3215 EsObject *obj = es_pointer_new (OPT_TYPE_TAG, e);
3216 if (es_error_p (obj))
3217 return obj;
3218
3219 while (index-- > 0)
3220 opt_vm_ostack_pop (vm);
3221
3222 opt_vm_ostack_push (vm, obj);
3223 es_object_unref (obj);
3224 return es_false;
3225 }
3226
3227 static EsObject *OPTSCRIPT_ERR_UNKNOWNROLE;
3228
lrop_make_reftag(OptVM * vm,EsObject * name)3229 static EsObject* lrop_make_reftag (OptVM *vm, EsObject *name)
3230 {
3231 matchLoc *loc;
3232
3233 if (opt_vm_ostack_count (vm) < 1)
3234 return OPT_ERR_UNDERFLOW;
3235
3236 int index;
3237 EsObject *top = opt_vm_ostack_top (vm);
3238 if (es_object_get_type (top) == OPT_TYPE_MATCHLOC)
3239 {
3240 if (opt_vm_ostack_count (vm) < 4)
3241 return OPT_ERR_UNDERFLOW;
3242 loc = es_pointer_get (top);
3243 index = 1;
3244 }
3245 else
3246 {
3247 struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3248 if (lcb->window->patbuf->regptype != REG_PARSER_SINGLE_LINE)
3249 return OPT_ERR_TYPECHECK;
3250 if (opt_vm_ostack_count (vm) < 3)
3251 return OPT_ERR_UNDERFLOW;
3252 loc = NULL;
3253 index = 0;
3254 }
3255
3256 EsObject *role = opt_vm_ostack_peek (vm, index++);
3257 if (es_object_get_type (role) != OPT_TYPE_NAME)
3258 return OPT_ERR_TYPECHECK;
3259
3260 EsObject *kind = opt_vm_ostack_peek (vm, index++);
3261 if (es_object_get_type (kind) != OPT_TYPE_NAME)
3262 return OPT_ERR_TYPECHECK;
3263 EsObject *kind_sym = es_pointer_get (kind);
3264 const char *kind_str = es_symbol_get (kind_sym);
3265 langType lang = getInputLanguage ();
3266 kindDefinition* kind_def = getLanguageKindForName (lang, kind_str);
3267 if (!kind_def)
3268 return OPTSCRIPT_ERR_UNKNOWNKIND;
3269 int kind_index = kind_def->id;
3270
3271 EsObject *role_sym = es_pointer_get (role);
3272 const char *role_str = es_symbol_get (role_sym);
3273 roleDefinition* role_def = getLanguageRoleForName (lang, kind_index, role_str);
3274 if (!role_def)
3275 return OPTSCRIPT_ERR_UNKNOWNROLE;
3276 int role_index = role_def->id;
3277
3278 EsObject *tname = opt_vm_ostack_peek (vm, index++);
3279 if (es_object_get_type (tname) != OPT_TYPE_STRING)
3280 return OPT_ERR_TYPECHECK;
3281 const char *n = opt_string_get_cstr (tname);
3282 if (n [0] == '\0')
3283 return OPT_ERR_RANGECHECK; /* TODO */
3284
3285 tagEntryInfo *e = xMalloc (1, tagEntryInfo);
3286 initRegexTag (e, eStrdup (n),
3287 kind_index, role_index, CORK_NIL, 0,
3288 loc? loc->line: 0, loc? &loc->pos: NULL,
3289 role_index == ROLE_DEFINITION_INDEX
3290 ? XTAG_UNKNOWN
3291 : XTAG_REFERENCE_TAGS);
3292 EsObject *obj = es_pointer_new (OPT_TYPE_TAG, e);
3293 if (es_error_p (obj))
3294 return obj;
3295
3296 while (index-- > 0)
3297 opt_vm_ostack_pop (vm);
3298
3299 opt_vm_ostack_push (vm, obj);
3300 es_object_unref (obj);
3301 return es_false;
3302 }
3303
3304 /* tag COMMIT int */
lrop_commit_tag(OptVM * vm,EsObject * name)3305 static EsObject* lrop_commit_tag (OptVM *vm, EsObject *name)
3306 {
3307 EsObject *tag = opt_vm_ostack_top (vm);
3308 if (es_object_get_type (tag) != OPT_TYPE_TAG)
3309 return OPT_ERR_TYPECHECK;
3310
3311 tagEntryInfo *e = es_pointer_get (tag);
3312 int corkIndex = makeTagEntry (e);
3313 EsObject *n = es_integer_new (corkIndex);
3314 if (es_error_p (n))
3315 return n;
3316 opt_vm_ostack_pop (vm);
3317 opt_vm_ostack_push (vm, n);
3318 es_object_unref (n);
3319 return es_false;
3320 }
3321
lrop_get_match_loc(OptVM * vm,EsObject * name)3322 static EsObject* lrop_get_match_loc (OptVM *vm, EsObject *name)
3323 {
3324
3325 bool start;
3326 EsObject *group;
3327
3328 if (opt_vm_ostack_count (vm) < 1)
3329 return OPT_ERR_UNDERFLOW;
3330
3331 EsObject *tmp = opt_vm_ostack_top (vm);
3332
3333 if (es_object_get_type (tmp) == ES_TYPE_INTEGER)
3334 {
3335 group = tmp;
3336 start = true;
3337 }
3338 else
3339 {
3340 EsObject *pos = tmp;
3341
3342 static EsObject *start_name, *end_name;
3343 if (!start_name)
3344 {
3345 start_name = opt_name_new_from_cstr ("start");
3346 end_name = opt_name_new_from_cstr ("end");
3347 }
3348
3349 if (es_object_equal (pos, start_name))
3350 start = true;
3351 else if (es_object_equal (pos, end_name))
3352 start = false;
3353 else
3354 return OPT_ERR_TYPECHECK;
3355
3356 if (opt_vm_ostack_count (vm) < 2)
3357 return OPT_ERR_UNDERFLOW;
3358
3359 group = opt_vm_ostack_peek (vm, 1);
3360 if (es_object_get_type (group) != ES_TYPE_INTEGER)
3361 return OPT_ERR_TYPECHECK;
3362 }
3363
3364 int g = es_integer_get (group);
3365 if (g < 1)
3366 return OPT_ERR_RANGECHECK;
3367
3368 struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3369 scriptWindow *window = lcb->window;
3370
3371 matchLoc *mloc = make_mloc (window, g, start);
3372 if (mloc == NULL)
3373 return OPT_ERR_RANGECHECK;
3374
3375 EsObject * mlocobj = es_pointer_new (OPT_TYPE_MATCHLOC, mloc);
3376 if (es_error_p (mlocobj))
3377 {
3378 eFree (mloc);
3379 return mlocobj;
3380 }
3381
3382 if (group != tmp)
3383 opt_vm_ostack_pop (vm);
3384 opt_vm_ostack_pop (vm);
3385 opt_vm_ostack_push (vm, mlocobj);
3386 es_object_unref (mlocobj);
3387 return es_false;
3388 }
3389
ldrop_get_line_from_matchloc(OptVM * vm,EsObject * name)3390 static EsObject* ldrop_get_line_from_matchloc (OptVM *vm, EsObject *name)
3391 {
3392 EsObject *mlocobj = opt_vm_ostack_top (vm);
3393 if (es_object_get_type (mlocobj) != OPT_TYPE_MATCHLOC)
3394 return OPT_ERR_TYPECHECK;
3395
3396 matchLoc *mloc = es_pointer_get (mlocobj);
3397 EsObject *lineobj = es_integer_new (mloc->line);
3398 if (es_error_p (lineobj))
3399 return lineobj;
3400
3401 opt_vm_ostack_pop (vm);
3402 opt_vm_ostack_push (vm, lineobj);
3403 es_object_unref (lineobj);
3404 return es_false;
3405 }
3406
make_mloc_from_tagEntryInfo(tagEntryInfo * e)3407 static matchLoc* make_mloc_from_tagEntryInfo(tagEntryInfo *e)
3408 {
3409 matchLoc *mloc = xMalloc (1, matchLoc);
3410 mloc->delta = 0;
3411 mloc->line = e->lineNumber;
3412 mloc->pos = e->filePosition;
3413
3414 return mloc;
3415 }
3416
lrop_get_tag_loc(OptVM * vm,EsObject * name)3417 static EsObject* lrop_get_tag_loc (OptVM *vm, EsObject *name)
3418 {
3419 EsObject *nobj = opt_vm_ostack_top (vm);
3420
3421 if (es_object_get_type (nobj) != ES_TYPE_INTEGER)
3422 return OPT_ERR_TYPECHECK;
3423
3424 int n = es_integer_get(nobj);
3425 if (! (CORK_NIL < n && n < countEntryInCorkQueue()))
3426 return OPT_ERR_RANGECHECK;
3427
3428 tagEntryInfo *e = getEntryInCorkQueue (n);
3429 if (e == NULL)
3430 return OPT_ERR_TYPECHECK; /* ??? */
3431
3432 matchLoc *mloc = make_mloc_from_tagEntryInfo (e);
3433 EsObject * mlocobj = es_pointer_new (OPT_TYPE_MATCHLOC, mloc);
3434 if (es_error_p (mlocobj))
3435 {
3436 eFree (mloc);
3437 return mlocobj;
3438 }
3439
3440 opt_vm_ostack_pop (vm);
3441 opt_vm_ostack_push (vm, mlocobj);
3442 es_object_unref (mlocobj);
3443 return es_false;
3444 }
3445
lrop_get_match_string_common(OptVM * vm,int i,int npop)3446 static EsObject* lrop_get_match_string_common (OptVM *vm, int i, int npop)
3447 {
3448 struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3449 scriptWindow *window = lcb->window;
3450 const char *cstr = make_match_string (window, i);
3451 if (!cstr)
3452 {
3453 for (; npop > 0; npop--)
3454 opt_vm_ostack_pop (vm);
3455 opt_vm_ostack_push (vm, es_false);
3456 return es_false;
3457 }
3458 EsObject *str = opt_string_new_from_cstr (cstr);
3459 eFree ((void *)cstr);
3460
3461 for (; npop > 0; npop--)
3462 opt_vm_ostack_pop (vm);
3463
3464 opt_vm_ostack_push (vm, str);
3465 es_object_unref (str);
3466 return es_false;
3467 }
3468
3469 /* Handles \1, \2, ... */
lrop_get_match_string_named_group(OptVM * vm,EsObject * name)3470 static EsObject* lrop_get_match_string_named_group (OptVM *vm, EsObject *name)
3471 {
3472 void * data = es_symbol_get_data (name);
3473 int i = HT_PTR_TO_INT (data);
3474
3475 return lrop_get_match_string_common (vm, i, 0);
3476 }
3477
lrop_get_match_string_group_on_stack(OptVM * vm,EsObject * name)3478 static EsObject* lrop_get_match_string_group_on_stack (OptVM *vm, EsObject *name)
3479 {
3480 EsObject *group = opt_vm_ostack_top (vm);
3481 if (!es_integer_p (group))
3482 return OPT_ERR_TYPECHECK;
3483
3484 int g = es_integer_get (group);
3485 if (g < 1)
3486 return OPT_ERR_RANGECHECK;
3487
3488 EsObject *r = lrop_get_match_string_common (vm, g, 1);
3489 if (es_error_p (r))
3490 return r;
3491
3492 r = opt_vm_ostack_top (vm);
3493 if (es_object_get_type (r) == OPT_TYPE_STRING)
3494 opt_vm_ostack_push (vm, es_true);
3495 return es_false;
3496 }
3497
make_match_string(scriptWindow * window,int group)3498 static char* make_match_string (scriptWindow *window, int group)
3499 {
3500 if (window == NULL
3501 || 0 >= group
3502 || window->nmatch <= group
3503 || window->pmatch [group].rm_so == -1)
3504 return NULL;
3505
3506 const int len = window->pmatch [group].rm_eo - window->pmatch [group].rm_so;
3507 const char *start = window->line + window->pmatch [group].rm_so;
3508
3509 return eStrndup (start, len);
3510 }
3511
make_mloc(scriptWindow * window,int group,bool start)3512 static matchLoc *make_mloc (scriptWindow *window, int group, bool start)
3513 {
3514 if (window == NULL
3515 || 0 > group
3516 || window->nmatch <= group
3517 || window->pmatch [group].rm_so == -1)
3518 return NULL;
3519
3520 matchLoc *mloc = xMalloc (1, matchLoc);
3521 if (window->patbuf->regptype == REG_PARSER_SINGLE_LINE)
3522 {
3523 mloc->delta = 0;
3524 mloc->line = getInputLineNumber ();
3525 mloc->pos = getInputFilePosition ();
3526 }
3527 else
3528 {
3529 mloc->delta = (start
3530 ? window->pmatch [group].rm_so
3531 : window->pmatch [group].rm_eo);
3532 off_t offset = (window->line + mloc->delta) - window->start;
3533 mloc->line = getInputLineNumberForFileOffset (offset);
3534 mloc->pos = getInputFilePositionForLine (mloc->line);
3535 }
3536 return mloc;
3537 }
3538
lrop_set_scope(OptVM * vm,EsObject * name)3539 static EsObject* lrop_set_scope (OptVM *vm, EsObject *name)
3540 {
3541 EsObject *corkIndex = opt_vm_ostack_top (vm);
3542 if (!es_integer_p (corkIndex))
3543 return OPT_ERR_TYPECHECK;
3544
3545 int n = es_integer_get (corkIndex);
3546 if (n < 0)
3547 return OPT_ERR_RANGECHECK;
3548
3549 if (n >= countEntryInCorkQueue())
3550 return OPT_ERR_RANGECHECK;
3551
3552 struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3553 lcb->currentScope = n;
3554
3555 opt_vm_ostack_pop (vm);
3556
3557 return es_false;
3558 }
3559
lrop_pop_scope(OptVM * vm,EsObject * name)3560 static EsObject* lrop_pop_scope (OptVM *vm, EsObject *name)
3561 {
3562 struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3563 if (lcb->currentScope != CORK_NIL)
3564 {
3565 tagEntryInfo *e = getEntryInCorkQueue (lcb->currentScope);
3566 if (e)
3567 lcb->currentScope = e->extensionFields.scopeIndex;
3568 }
3569 return es_false;
3570 }
3571
lrop_clear_scope(OptVM * vm,EsObject * name)3572 static EsObject* lrop_clear_scope (OptVM *vm, EsObject *name)
3573 {
3574 struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3575 lcb->currentScope = CORK_NIL;
3576 return es_false;
3577 }
3578
lrop_ref0_scope(OptVM * vm,EsObject * name)3579 static EsObject* lrop_ref0_scope (OptVM *vm, EsObject *name)
3580 {
3581 struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3582
3583 if (lcb->currentScope == 0)
3584 {
3585 opt_vm_ostack_push (vm, es_false);
3586 return es_false;
3587 }
3588
3589 EsObject *q = es_integer_new (lcb->currentScope);
3590
3591 if (es_error_p (q))
3592 return q;
3593
3594 opt_vm_ostack_push (vm, q);
3595 es_object_unref (q);
3596 opt_vm_ostack_push (vm, es_true);
3597 return es_false;
3598 }
3599
lrop_refN_scope(OptVM * vm,EsObject * name)3600 static EsObject* lrop_refN_scope (OptVM *vm, EsObject *name)
3601 {
3602 EsObject *nobj = opt_vm_ostack_top (vm);
3603 if (!es_integer_p (nobj))
3604 return OPT_ERR_TYPECHECK;
3605
3606 int n = es_integer_get(nobj);
3607
3608 struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3609 int scope = lcb->currentScope;
3610
3611 while (n--)
3612 {
3613 if (scope == CORK_NIL)
3614 break;
3615 tagEntryInfo *e = getEntryInCorkQueue (scope);
3616 if (e == NULL)
3617 break;
3618
3619 scope = e->extensionFields.scopeIndex;
3620 }
3621
3622 EsObject *q = es_integer_new (scope);
3623 if (es_error_p(q))
3624 return q;
3625
3626 opt_vm_ostack_pop (vm);
3627 opt_vm_ostack_push (vm, q);
3628 es_object_unref (q);
3629
3630 return es_false;
3631 }
3632
lrop_get_scope_depth(OptVM * vm,EsObject * name)3633 static EsObject* lrop_get_scope_depth (OptVM *vm, EsObject *name)
3634 {
3635 int n = 0;
3636
3637 struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3638 int scope = lcb->currentScope;
3639
3640 while (scope != CORK_NIL)
3641 {
3642 tagEntryInfo *e = getEntryInCorkQueue (scope);
3643 if (!e)
3644 break;
3645
3646 scope = e->extensionFields.scopeIndex;
3647 n++;
3648 }
3649
3650 EsObject *q = es_integer_new (scope);
3651 if (es_error_p(q))
3652 return q;
3653
3654 opt_vm_ostack_push (vm, q);
3655 es_object_unref (q);
3656 return es_false;
3657 }
3658
lrop_repl(OptVM * vm,EsObject * name)3659 static EsObject* lrop_repl (OptVM *vm, EsObject *name)
3660 {
3661 char *old_prompt = opt_vm_set_prompt (vm, "\n% type \"quit\" for exiting from repl\nOPT");
3662
3663 opt_vm_print_prompt (vm);
3664 opt_vm_set_prompt (vm, "OPT");
3665
3666 while (true)
3667 {
3668 EsObject *o = opt_vm_read (vm, NULL);
3669 if (es_object_equal (o, ES_READER_EOF))
3670 {
3671 es_object_unref (o);
3672 break;
3673 }
3674 EsObject *e = opt_vm_eval (vm, o);
3675 es_object_unref (o);
3676
3677 if (es_error_p (e))
3678 {
3679 if (!es_object_equal (e, OPT_ERR_QUIT))
3680 opt_vm_report_error (vm, e, NULL);
3681 break;
3682 }
3683 }
3684
3685 opt_vm_set_prompt (vm, old_prompt);
3686 return es_false;
3687 }
3688
3689 static EsObject *OPTSCRIPT_ERR_UNKNOWNTABLE;
3690 static EsObject *OPTSCRIPT_ERR_NOTMTABLEPTRN;
3691
getRegexTableForOptscriptName(struct lregexControlBlock * lcb,EsObject * tableName)3692 static struct regexTable *getRegexTableForOptscriptName (struct lregexControlBlock *lcb,
3693 EsObject *tableName)
3694 {
3695 EsObject *table_sym = es_pointer_get (tableName);
3696 const char *table_str = es_symbol_get (table_sym);
3697 int n = getTableIndexForName (lcb, table_str);
3698 if (n < 0)
3699 return NULL;
3700 return ptrArrayItem (lcb->tables, n);
3701 }
3702
lrop_tenter_common(OptVM * vm,EsObject * name,enum tableAction action)3703 static EsObject* lrop_tenter_common (OptVM *vm, EsObject *name, enum tableAction action)
3704 {
3705 struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3706 if (lcb->window->patbuf->regptype != REG_PARSER_MULTI_TABLE)
3707 {
3708 error (WARNING, "Use table related operators only with mtable regular expression");
3709 return OPTSCRIPT_ERR_NOTMTABLEPTRN;
3710 }
3711
3712 EsObject *table = opt_vm_ostack_top (vm);
3713 if (es_object_get_type (table) != OPT_TYPE_NAME)
3714 return OPT_ERR_TYPECHECK;
3715
3716 struct regexTable *t = getRegexTableForOptscriptName (lcb, table);
3717 if (t == NULL)
3718 return OPTSCRIPT_ERR_UNKNOWNTABLE;
3719
3720 lcb->window->taction = (struct mTableActionSpec){
3721 .action = action,
3722 .table = t,
3723 .continuation_table = NULL,
3724 };
3725
3726 opt_vm_ostack_pop (vm);
3727 return es_false;
3728 }
3729
lrop_tenter(OptVM * vm,EsObject * name)3730 static EsObject* lrop_tenter (OptVM *vm, EsObject *name)
3731 {
3732 return lrop_tenter_common (vm, name, TACTION_ENTER);
3733 }
3734
lrop_tenter_with_continuation(OptVM * vm,EsObject * name)3735 static EsObject* lrop_tenter_with_continuation (OptVM *vm, EsObject *name)
3736 {
3737 struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3738 if (lcb->window->patbuf->regptype != REG_PARSER_MULTI_TABLE)
3739 {
3740 error (WARNING, "Use table related operators only with mtable regular expression");
3741 return OPTSCRIPT_ERR_NOTMTABLEPTRN;
3742 }
3743
3744 EsObject *cont = opt_vm_ostack_top (vm);
3745 EsObject *table = opt_vm_ostack_peek (vm, 1);
3746
3747 if (es_object_get_type (table) != OPT_TYPE_NAME)
3748 return OPT_ERR_TYPECHECK;
3749 if (es_object_get_type (cont) != OPT_TYPE_NAME)
3750 return OPT_ERR_TYPECHECK;
3751
3752 struct regexTable *t = getRegexTableForOptscriptName (lcb, table);
3753 if (t == NULL)
3754 return OPTSCRIPT_ERR_UNKNOWNTABLE;
3755 struct regexTable *c = getRegexTableForOptscriptName (lcb, cont);
3756 if (c == NULL)
3757 return OPTSCRIPT_ERR_UNKNOWNTABLE;
3758
3759 lcb->window->taction = (struct mTableActionSpec){
3760 .action = TACTION_ENTER,
3761 .table = t,
3762 .continuation_table = c,
3763 };
3764
3765 opt_vm_ostack_pop (vm);
3766 opt_vm_ostack_pop (vm);
3767 return es_false;
3768 }
3769
lrop_tleave(OptVM * vm,EsObject * name)3770 static EsObject* lrop_tleave (OptVM *vm, EsObject *name)
3771 {
3772 struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3773 if (lcb->window->patbuf->regptype != REG_PARSER_MULTI_TABLE)
3774 {
3775 error (WARNING, "Use table related operators only with mtable regular expression");
3776 return OPTSCRIPT_ERR_NOTMTABLEPTRN;
3777 }
3778
3779 lcb->window->taction.action = TACTION_LEAVE;
3780 return es_false;
3781 }
3782
lrop_tjump(OptVM * vm,EsObject * name)3783 static EsObject* lrop_tjump (OptVM *vm, EsObject *name)
3784 {
3785 return lrop_tenter_common (vm, name, TACTION_JUMP);
3786 }
3787
lrop_treset(OptVM * vm,EsObject * name)3788 static EsObject* lrop_treset (OptVM *vm, EsObject *name)
3789 {
3790 return lrop_tenter_common (vm, name, TACTION_RESET);
3791 }
3792
lrop_tquit(OptVM * vm,EsObject * name)3793 static EsObject* lrop_tquit (OptVM *vm, EsObject *name)
3794 {
3795 struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3796 if (lcb->window->patbuf->regptype != REG_PARSER_MULTI_TABLE)
3797 {
3798 error (WARNING, "Use table related operators only with mtable regular expression");
3799 return OPTSCRIPT_ERR_NOTMTABLEPTRN;
3800 }
3801
3802 lcb->window->taction.action = TACTION_QUIT;
3803 return es_false;
3804 }
3805
lrop_traced(OptVM * vm,EsObject * name)3806 static EsObject* lrop_traced (OptVM *vm, EsObject *name)
3807 {
3808 #ifdef DO_TRACING
3809 langType lang = getInputLanguage ();
3810 if (isLanguageTraced (lang))
3811 opt_vm_ostack_push (vm, es_true);
3812 else
3813 opt_vm_ostack_push (vm, es_false);
3814 #else
3815 opt_vm_ostack_push (vm, es_false);
3816 #endif
3817 return false;
3818 }
3819
3820 EsObject *OPTSCRIPT_ERR_UNKNOWNEXTRA;
lrop_extraenabled(OptVM * vm,EsObject * name)3821 static EsObject* lrop_extraenabled (OptVM *vm, EsObject *name)
3822 {
3823 EsObject *extra = opt_vm_ostack_top (vm);
3824 if (es_object_get_type (extra) != OPT_TYPE_NAME)
3825 return OPT_ERR_TYPECHECK;
3826
3827 xtagType xt = optscriptGetXtagType (extra);
3828 if (xt == XTAG_UNKNOWN)
3829 return OPTSCRIPT_ERR_UNKNOWNEXTRA;
3830
3831 EsObject *r = isXtagEnabled (xt)? es_true: es_false;
3832 opt_vm_ostack_pop (vm);
3833 opt_vm_ostack_push (vm, r);
3834 return es_false;
3835 }
3836
lrop_markextra(OptVM * vm,EsObject * name)3837 static EsObject *lrop_markextra (OptVM *vm, EsObject *name)
3838 {
3839 EsObject *tag = opt_vm_ostack_peek (vm, 1);
3840 tagEntryInfo *e;
3841 if (es_integer_p (tag))
3842 {
3843 int n = es_integer_get (tag);
3844 if (! (CORK_NIL < n && n < countEntryInCorkQueue()))
3845 return OPT_ERR_RANGECHECK;
3846 e = getEntryInCorkQueue (n);
3847 }
3848 else if (es_object_get_type (tag) == OPT_TYPE_TAG)
3849 e = es_pointer_get (tag);
3850 else
3851 return OPT_ERR_TYPECHECK;
3852
3853 if (e == NULL)
3854 return OPTSCRIPT_ERR_NOTAGENTRY;
3855
3856 EsObject *extra = opt_vm_ostack_top (vm);
3857 if (es_object_get_type (extra) != OPT_TYPE_NAME)
3858 return OPT_ERR_TYPECHECK;
3859
3860 xtagType xt = optscriptGetXtagType (extra);
3861 if (xt == XTAG_UNKNOWN)
3862 return OPTSCRIPT_ERR_UNKNOWNEXTRA;
3863
3864 langType lang = getXtagOwner (xt);
3865 if (lang != LANG_IGNORE && e->langType != lang)
3866 {
3867 error (WARNING,
3868 "mismatch in the language of the tag (%s) and the language of field (%s)",
3869 getLanguageName (e->langType), getLanguageName (lang));
3870 return OPTSCRIPT_ERR_UNKNOWNEXTRA;
3871 }
3872
3873 markTagExtraBit (e, xt);
3874
3875 opt_vm_ostack_pop (vm);
3876 opt_vm_ostack_pop (vm);
3877
3878 return es_false;
3879 }
3880
lrop_advanceto(OptVM * vm,EsObject * name)3881 static EsObject *lrop_advanceto (OptVM *vm, EsObject *name)
3882 {
3883 struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3884 if (lcb->window->patbuf->regptype == REG_PARSER_SINGLE_LINE)
3885 {
3886 error (WARNING, "don't use `%s' operator in --regex-<LANG> option",
3887 es_symbol_get (name));
3888 return OPTSCRIPT_ERR_NOTMTABLEPTRN; /* TODO */
3889 }
3890
3891 EsObject *mlocobj = opt_vm_ostack_top (vm);
3892 if (es_object_get_type (mlocobj) != OPT_TYPE_MATCHLOC)
3893 return OPT_ERR_TYPECHECK;
3894
3895 matchLoc *loc = es_pointer_get (mlocobj);
3896 lcb->window->advanceto = true;
3897 lcb->window->advanceto_delta = loc->delta;
3898
3899 return es_true;
3900 }
3901
lrop_markplaceholder(OptVM * vm,EsObject * name)3902 static EsObject *lrop_markplaceholder (OptVM *vm, EsObject *name)
3903 {
3904 EsObject *tag = opt_vm_ostack_top (vm);
3905
3906 if (!es_integer_p (tag))
3907 return OPT_ERR_TYPECHECK;
3908
3909 int n = es_integer_get (tag);
3910 if (! (CORK_NIL < n && n < countEntryInCorkQueue()))
3911 return OPT_ERR_RANGECHECK;
3912
3913 tagEntryInfo *e = getEntryInCorkQueue (n);
3914 if (e == NULL)
3915 return OPTSCRIPT_ERR_NOTAGENTRY;
3916
3917 markTagPlaceholder (e, true);
3918
3919 opt_vm_ostack_pop (vm);
3920 return es_false;
3921 }
3922
3923 static struct optscriptOperatorRegistration lropOperators [] = {
3924 {
3925 .name = "_matchstr",
3926 .fn = lrop_get_match_string_group_on_stack,
3927 .arity = 1,
3928 .help_str = "group:int _MATCHSTR string true%"
3929 "group:int _MATCHSTR false",
3930 },
3931 {
3932 .name = "_matchloc",
3933 .fn = lrop_get_match_loc,
3934 .arity = -1,
3935 .help_str = "group:int /start|/end _MATCHLOC matchloc%"
3936 "group:int _MATCHLOC matchloc",
3937 },
3938 {
3939 .name = "_matchloc2line",
3940 .fn = ldrop_get_line_from_matchloc,
3941 .arity = 1,
3942 .help_str = "matchloc _MATCHLOC2LINE int:line",
3943 },
3944 {
3945 .name = "_tagloc",
3946 .fn = lrop_get_tag_loc,
3947 .arity = 1,
3948 .help_str = "index:int _TAGLOC matchloc",
3949 },
3950 {
3951 .name = "_tag",
3952 .fn = lrop_make_tag,
3953 .arity = -1,
3954 .help_str = "name:str kind:name matchloc _TAG tag%"
3955 "name:str kind:name _TAG tag",
3956 },
3957 {
3958 .name = "_reftag",
3959 .fn = lrop_make_reftag,
3960 .arity = -1,
3961 .help_str = "name:str kind:name role:name matchloc _REFTAG tag%"
3962 "name:str kind:name role:name _REFTAG tag%",
3963 },
3964 {
3965 .name = "_commit",
3966 .fn = lrop_commit_tag,
3967 .arity = 1,
3968 .help_str = "tag _COMMIT int",
3969 },
3970 {
3971 .name = "_scopeset",
3972 .fn = lrop_set_scope,
3973 .arity = 1,
3974 .help_str = "int _SCOPESET -",
3975 },
3976 {
3977 .name = "_scopepop",
3978 .fn = lrop_pop_scope,
3979 .arity = 0,
3980 .help_str = "- _SCOPEPOP -",
3981 },
3982 {
3983 .name = "_scopeclear",
3984 .fn = lrop_clear_scope,
3985 .arity = 0,
3986 .help_str = "- _SCOPECLEAR -",
3987 },
3988 {
3989 .name = "_scopetop",
3990 .fn = lrop_ref0_scope,
3991 .arity = 0,
3992 .help_str = "- _SCOPETOP int true%"
3993 "- _SCOPETOP false",
3994 },
3995 {
3996 .name = "_scopeNth",
3997 .fn = lrop_refN_scope,
3998 .arity = 1,
3999 .help_str = "index:int _SCOPENTH int",
4000 },
4001 {
4002 .name = "_scopedepth",
4003 .fn = lrop_get_scope_depth,
4004 .arity = 0,
4005 .help_str = "- _SCOPEDEPTH int",
4006 },
4007 {
4008 .name = "_repl",
4009 .fn = lrop_repl,
4010 .arity = 0,
4011 .help_str = "- _repl -",
4012 },
4013 {
4014 .name = "_tenter",
4015 .fn = lrop_tenter,
4016 .arity = 1,
4017 .help_str = "table:name _TENTER -",
4018 },
4019 {
4020 .name = "_tentercont",
4021 .fn = lrop_tenter_with_continuation,
4022 .arity = 2,
4023 .help_str = "table:name cont:name _TENTERCONT -",
4024 },
4025 {
4026 .name = "_tleave",
4027 .fn = lrop_tleave,
4028 .arity = 0,
4029 .help_str = "- _TLEAVE -",
4030 },
4031 {
4032 .name = "_tjump",
4033 .fn = lrop_tjump,
4034 .arity = 1,
4035 .help_str = "table:name _TJUMP -",
4036 },
4037 {
4038 .name = "_treset",
4039 .fn = lrop_treset,
4040 .arity = 1,
4041 .help_str = "table:name _TRESET -",
4042 },
4043 {
4044 .name = "_tquit",
4045 .fn = lrop_tquit,
4046 .arity = 0,
4047 .help_str = "- _TQUIT -",
4048 },
4049 {
4050 .name = "_extraenabled",
4051 .fn = lrop_extraenabled,
4052 .arity = 1,
4053 .help_str = "extra:name _extraenabled bool%"
4054 "language.extra _extraenabled bool",
4055 },
4056 {
4057 .name = "_markextra",
4058 .fn = lrop_markextra,
4059 .arity = 2,
4060 .help_str = "tag:int|tag:tag extra:name _MARKEXTRA -%"
4061 "tag:int|tag:tag lang.extra:name _MARKEXTRA -",
4062 },
4063 {
4064 .name = "_advanceto",
4065 .fn = lrop_advanceto,
4066 .arity = 1,
4067 .help_str = "matchloc _ADVANCETO -%"
4068 },
4069 {
4070 .name = "_traced",
4071 .fn = lrop_traced,
4072 .arity = 0,
4073 .help_str = "- _TRACED true|false",
4074 },
4075 {
4076 .name = "_markplaceholder",
4077 .fn = lrop_markplaceholder,
4078 .arity = 1,
4079 .help_str = "tag:int _MARKPLACEHOLDER -",
4080 }
4081 };
4082
initRegexOptscript(void)4083 extern void initRegexOptscript (void)
4084 {
4085 if (!regexAvailable)
4086 return;
4087
4088 if (optvm)
4089 return;
4090
4091 optvm = optscriptInit ();
4092 lregex_dict = opt_dict_new (17);
4093
4094 OPTSCRIPT_ERR_UNKNOWNTABLE = es_error_intern ("unknowntable");
4095 OPTSCRIPT_ERR_NOTMTABLEPTRN = es_error_intern ("notmtableptrn");
4096 OPTSCRIPT_ERR_UNKNOWNEXTRA = es_error_intern ("unknownextra");
4097 OPTSCRIPT_ERR_UNKNOWNLANGUAGE = es_error_intern ("unknownlanguage");
4098 OPTSCRIPT_ERR_UNKNOWNKIND = es_error_intern ("unknownkind");
4099 OPTSCRIPT_ERR_UNKNOWNROLE = es_error_intern ("unknownrole");
4100
4101 optscriptInstallProcs (lregex_dict, lrop_get_match_string_named_group);
4102
4103 optscriptRegisterOperators (lregex_dict,
4104 lropOperators, ARRAY_SIZE(lropOperators));
4105
4106 extern const char ctagsCommonPrelude[];
4107 opt_vm_dstack_push (optvm, lregex_dict);
4108 MIO *mio = mio_new_memory ((unsigned char*)ctagsCommonPrelude, strlen (ctagsCommonPrelude), NULL, NULL);
4109 EsObject *e = optscriptLoad (optvm, mio);
4110 if (es_error_p (e))
4111 error (FATAL, "failed in loading built-in procedures");
4112 mio_unref (mio);
4113 opt_vm_dstack_pop (optvm);
4114 }
4115
listRegexOpscriptOperators(FILE * fp)4116 extern void listRegexOpscriptOperators (FILE *fp)
4117 {
4118 EsObject *procdocs;
4119 if (!opt_dict_known_and_get_cstr (lregex_dict,
4120 "__procdocs",
4121 &procdocs))
4122 procdocs = NULL;
4123
4124 opt_vm_dstack_push (optvm, lregex_dict);
4125 optscriptHelp (optvm, fp, procdocs);
4126 opt_vm_dstack_pop (optvm);
4127 }
4128