xref: /Universal-ctags/main/lregex.c (revision 6024deefc593abced0b42582f4cf1a8658aac96f)
1 /*
2 *   Copyright (c) 2000-2003, Darren Hiebert
3 *
4 *   This source code is released for free distribution under the terms of the
5 *   GNU General Public License version 2 or (at your option) any later version.
6 *
7 *   This module contains functions for applying regular expression matching.
8 *
9 *   The code for utilizing the Gnu regex package with regards to processing the
10 *   regex option and checking for regex matches was adapted from routines in
11 *   Gnu etags.
12 */
13 
14 /*
15 *   INCLUDE FILES
16 */
17 #include "general.h"  /* must always come first */
18 
19 #include <string.h>
20 
21 #include <ctype.h>
22 #include <stddef.h>
23 #ifdef HAVE_SYS_TYPES_H
24 # include <sys/types.h>  /* declare off_t (not known to regex.h on FreeBSD) */
25 #endif
26 
27 #include <inttypes.h>
28 
29 #include "debug.h"
30 #include "colprint_p.h"
31 #include "entry_p.h"
32 #include "field_p.h"
33 #include "flags_p.h"
34 #include "htable.h"
35 #include "kind.h"
36 #include "options.h"
37 #include "optscript.h"
38 #include "parse_p.h"
39 #include "promise.h"
40 #include "read.h"
41 #include "read_p.h"
42 #include "routines.h"
43 #include "routines_p.h"
44 #include "script_p.h"
45 #include "trace.h"
46 #include "trashbox.h"
47 #include "xtag_p.h"
48 
49 static bool regexAvailable = false;
50 
51 /*
52 *   MACROS
53 */
54 
55 /* The max depth of taction=enter/leave stack */
56 #define MTABLE_STACK_MAX_DEPTH 64
57 
58 /* How many times ctags allows a mtable parser
59    stays at the same input position across table switching.
60 
61    The value is derived from MTABLE_STACK_MAX_DEPTH.
62    No deep meaning is in that. It just for simplifying
63    Tmain cases. */
64 #define MTABLE_MOTIONLESS_MAX (MTABLE_STACK_MAX_DEPTH + 1)
65 
66 #define DEFAULT_REGEX_BACKEND "e"
67 
68 /*
69 *   DATA DECLARATIONS
70 */
71 
72 enum pType { PTRN_TAG, PTRN_CALLBACK };
73 
74 enum scopeAction {
75 	SCOPE_REF     = 1UL << 0,
76 	SCOPE_POP     = 1UL << 1,
77 	SCOPE_PUSH    = 1UL << 2,
78 	SCOPE_CLEAR   = 1UL << 3,
79 	SCOPE_REF_AFTER_POP = 1UL << 4,
80 	SCOPE_PLACEHOLDER = 1UL << 5,
81 };
82 
83 enum tableAction {
84 	TACTION_NOP,
85 	TACTION_ENTER,				/* {tenter=N} */
86 	TACTION_LEAVE,				/* {tleave} */
87 	TACTION_JUMP,					/* {tjump=N} */
88 	TACTION_RESET,				/* {treset=N} */
89 	TACTION_QUIT,					/* {tquit} */
90 };
91 
92 struct fieldPattern {
93 	fieldType ftype;
94 	const char *template;
95 };
96 
97 struct boundarySpec {
98 	int patternGroup;
99 	bool fromStartOfGroup;
100 	bool placeholder;
101 };
102 
103 struct guestLangSpec {
104 	enum guestLangSpecType {
105 		GUEST_LANG_UNKNOWN,
106 		GUEST_LANG_PLACEHOLDER,			   /* _ */
107 		GUEST_LANG_STATIC_LANGNAME,		   /* C, Python,... */
108 		GUEST_LANG_PTN_GROUP_FOR_LANGNAME, /* \1, \2, ..., \9 */
109 		GUEST_LANG_PTN_GROUP_FOR_FILEMAP, /* *1, *2, ... *9 */
110 	} type;
111 	union {
112 		langType lang;
113 		int patternGroup;
114 	} spec;
115 };
116 
117 struct guestSpec {
118 	struct guestLangSpec lang;
119 #define BOUNDARY_START 0
120 #define BOUNDARY_END  1
121 	struct boundarySpec boundary[2];
122 };
123 
124 struct mGroupSpec {
125 #define NO_MULTILINE -1
126 	int forLineNumberDetermination;
127 	int forNextScanning;
128 	/* true => start, false => end */
129 	bool nextFromStart;
130 };
131 
132 struct mTableActionSpec {
133 	enum tableAction action;
134 	struct regexTable *table;
135 
136 	/* used when action == TACTION_ENTER */
137 	struct regexTable *continuation_table;
138 };
139 
140 typedef struct {
141 	regexCompiledCode pattern;
142 	enum pType type;
143 	bool exclusive;
144 	bool accept_empty_name;
145 	union {
146 		struct {
147 			int kindIndex;
148 			roleBitsType roleBits;
149 			char *name_pattern;
150 		} tag;
151 		struct {
152 			regexCallback function;
153 			void *userData;
154 		} callback;
155 	} u;
156 	unsigned int scopeActions;
157 	bool *disabled;
158 
159 	enum regexParserType regptype;
160 	struct mGroupSpec mgroup;
161 	struct guestSpec guest;
162 	struct mTableActionSpec taction;
163 
164 	int   xtagType;
165 	ptrArray *fieldPatterns;
166 
167 	char *pattern_string;
168 
169 	char *anonymous_tag_prefix;
170 
171 	struct {
172 		errorSelection selection;
173 		char *message_string;
174 	} message;
175 
176 	char *optscript_src;
177 	EsObject *optscript;
178 
179 	int refcount;
180 } regexPattern;
181 
182 
183 typedef struct {
184 	/* the pattern can be shared among entries using a refcount */
185 	regexPattern *pattern;
186 
187 	/* but the statistics are per-table-entry */
188 	struct {
189 		unsigned int match;
190 		unsigned int unmatch;
191 	} statistics;
192 } regexTableEntry;
193 
194 
195 #define TABLE_INDEX_UNUSED -1
196 struct regexTable {
197 	char *name;
198 	ptrArray *entries;
199 };
200 
201 struct boundaryInRequest {
202 	bool offset_set;
203 	off_t offset;
204 };
205 
206 struct guestRequest {
207 	bool lang_set;
208 	langType lang;
209 
210 	struct boundaryInRequest boundary[2];
211 };
212 
213 typedef struct {
214 	const char *line;
215 	const char *start;
216 	const regexPattern* const patbuf;
217 	const regmatch_t* const pmatch;
218 	int nmatch;
219 	struct mTableActionSpec taction;
220 	bool advanceto;
221 	unsigned int advanceto_delta;
222 } scriptWindow;
223 
224 struct lregexControlBlock {
225 	int currentScope;
226 	ptrArray *entries [2];
227 
228 	ptrArray *tables;
229 	ptrArray *tstack;
230 
231 	struct guestRequest *guest_req;
232 
233 	EsObject *local_dict;
234 
235 	ptrArray *hook[SCRIPT_HOOK_MAX];
236 	ptrArray *hook_code[SCRIPT_HOOK_MAX];
237 
238 	langType owner;
239 
240 	scriptWindow *window;
241 };
242 
243 /*
244 *   DATA DEFINITIONS
245 */
246 static OptVM *optvm;
247 static EsObject *lregex_dict = es_nil;
248 
249 /*
250 *   FUNCTION DEFINITIONS
251 */
252 static int getTableIndexForName (const struct lregexControlBlock *const lcb, const char *name);
253 static void deletePattern (regexPattern *p);
254 static int  makePromiseForAreaSpecifiedWithOffsets (const char *parser,
255 													off_t startOffset,
256 													off_t endOffset);
257 
258 static struct guestRequest *guestRequestNew (void);
259 static void   guestRequestDelete (struct guestRequest *);
260 static bool   guestRequestIsFilled(struct guestRequest *);
261 static void   guestRequestClear (struct guestRequest *);
262 static void   guestRequestSubmit (struct guestRequest *);
263 
264 static EsObject *scriptRead (OptVM *vm, const char *src);
265 static void scriptSetup (OptVM *vm, struct lregexControlBlock *lcb, int corkIndex, scriptWindow *window);
266 static EsObject* scriptEval (OptVM *vm, EsObject *optscript);
267 static void scriptEvalHook (OptVM *vm, struct lregexControlBlock *lcb, enum scriptHook hook);
268 static void scriptTeardown (OptVM *vm, struct lregexControlBlock *lcb);
269 
270 static char* make_match_string (scriptWindow *window, int group);
271 static matchLoc *make_mloc (scriptWindow *window, int group, bool start);
272 
deleteTable(void * ptrn)273 static void deleteTable (void *ptrn)
274 {
275 	struct regexTable *t = ptrn;
276 
277 	ptrArrayDelete (t->entries);
278 	eFree (t->name);
279 	eFree (t);
280 }
281 
deleteTableEntry(void * ptrn)282 static void deleteTableEntry (void *ptrn)
283 {
284 	regexTableEntry *e = ptrn;
285 	Assert (e && e->pattern);
286 	deletePattern (e->pattern);
287 	eFree (e);
288 }
289 
deletePattern(regexPattern * p)290 static void deletePattern (regexPattern *p)
291 {
292 	p->refcount--;
293 
294 	if (p->refcount > 0)
295 		return;
296 
297 	p->pattern.backend->delete_code (p->pattern.code);
298 
299 	if (p->type == PTRN_TAG)
300 	{
301 		eFree (p->u.tag.name_pattern);
302 		p->u.tag.name_pattern = NULL;
303 	}
304 
305 	if (p->fieldPatterns)
306 	{
307 		ptrArrayDelete (p->fieldPatterns);
308 		p->fieldPatterns = NULL;
309 	}
310 
311 	eFree (p->pattern_string);
312 
313 	if (p->message.message_string)
314 		eFree (p->message.message_string);
315 
316 	if (p->anonymous_tag_prefix)
317 		eFree (p->anonymous_tag_prefix);
318 
319 	if (p->optscript)
320 		es_object_unref (p->optscript);
321 	if (p->optscript_src)
322 		eFree (p->optscript_src);
323 
324 	eFree (p);
325 }
326 
clearPatternSet(struct lregexControlBlock * lcb)327 static void clearPatternSet (struct lregexControlBlock *lcb)
328 {
329 	ptrArrayClear (lcb->entries [REG_PARSER_SINGLE_LINE]);
330 	ptrArrayClear (lcb->entries [REG_PARSER_MULTI_LINE]);
331 	ptrArrayClear (lcb->tables);
332 }
333 
allocLregexControlBlock(parserDefinition * parser)334 extern struct lregexControlBlock* allocLregexControlBlock (parserDefinition *parser)
335 {
336 	struct lregexControlBlock *lcb = xCalloc (1, struct lregexControlBlock);
337 
338 	lcb->entries[REG_PARSER_SINGLE_LINE] = ptrArrayNew(deleteTableEntry);
339 	lcb->entries[REG_PARSER_MULTI_LINE] = ptrArrayNew(deleteTableEntry);
340 	lcb->tables = ptrArrayNew(deleteTable);
341 	lcb->tstack = ptrArrayNew(NULL);
342 	lcb->guest_req = guestRequestNew ();
343 	lcb->local_dict = es_nil;
344 
345 	for (int i = 0; i< SCRIPT_HOOK_MAX; i++)
346 	{
347 		lcb->hook[i] = ptrArrayNew (eFree);
348 		lcb->hook_code[i] = ptrArrayNew ((ptrArrayDeleteFunc)es_object_unref);
349 	}
350 	lcb->owner = parser->id;
351 
352 	return lcb;
353 }
354 
freeLregexControlBlock(struct lregexControlBlock * lcb)355 extern void freeLregexControlBlock (struct lregexControlBlock* lcb)
356 {
357 	clearPatternSet (lcb);
358 
359 	ptrArrayDelete (lcb->entries [REG_PARSER_SINGLE_LINE]);
360 	lcb->entries [REG_PARSER_SINGLE_LINE] = NULL;
361 	ptrArrayDelete (lcb->entries [REG_PARSER_MULTI_LINE]);
362 	lcb->entries [REG_PARSER_MULTI_LINE] = NULL;
363 
364 	ptrArrayDelete (lcb->tables);
365 	lcb->tables = NULL;
366 
367 	ptrArrayDelete (lcb->tstack);
368 	lcb->tstack = NULL;
369 
370 	guestRequestDelete (lcb->guest_req);
371 	lcb->guest_req = NULL;
372 
373 	es_object_unref (lcb->local_dict);
374 	lcb->local_dict = es_nil;
375 
376 	for (int i = 0; i < SCRIPT_HOOK_MAX; i++)
377 	{
378 		ptrArrayDelete (lcb->hook[i]);
379 		lcb->hook[i] = NULL;
380 
381 		ptrArrayDelete (lcb->hook_code[i]);
382 		lcb->hook_code[i] = NULL;
383 	}
384 
385 	eFree (lcb);
386 }
387 
388 /*
389 *   Regex pseudo-parser
390 */
391 
initRegexTag(tagEntryInfo * e,const char * name,int kindIndex,int roleIndex,int scopeIndex,int placeholder,unsigned long line,MIOPos * pos,int xtag_type)392 static void initRegexTag (tagEntryInfo *e,
393 		const char * name, int kindIndex, int roleIndex, int scopeIndex, int placeholder,
394 		unsigned long line, MIOPos *pos, int xtag_type)
395 {
396 	Assert (name != NULL  &&  ((name[0] != '\0') || placeholder));
397 	initRefTagEntry (e, name, kindIndex, roleIndex);
398 	e->extensionFields.scopeIndex = scopeIndex;
399 	e->placeholder = !!placeholder;
400 	if (line)
401 	{
402 		e->lineNumber = line;
403 		e->filePosition = *pos;
404 	}
405 
406 	if (xtag_type != XTAG_UNKNOWN)
407 		markTagExtraBit (e, xtag_type);
408 }
409 
410 /*
411 *   Regex pattern definition
412 */
413 
414 /* Take a string like "/blah/" and turn it into "blah", making sure
415  * that the first and last characters are the same, and handling
416  * quoted separator characters.  Actually, stops on the occurrence of
417  * an unquoted separator.  Also turns "\t" into a Tab character.
418  * Turns "\n" into a Newline character if MULTILINE is true.
419  * Returns pointer to terminating separator.  Works in place.  Null
420  * terminates name string.
421  */
scanSeparators(char * name,bool multiline)422 static char* scanSeparators (char* name, bool multiline)
423 {
424 	char sep = name [0];
425 	char *copyto = name;
426 	bool quoted = false;
427 
428 	for (++name ; *name != '\0' ; ++name)
429 	{
430 		if (quoted)
431 		{
432 			if (*name == sep)
433 				*copyto++ = sep;
434 			else if (*name == 't')
435 				*copyto++ = '\t';
436 			else if (multiline && *name == 'n')
437 				*copyto++ = '\n';
438 			else
439 			{
440 				/* Something else is quoted, so preserve the quote. */
441 				*copyto++ = '\\';
442 				*copyto++ = *name;
443 			}
444 			quoted = false;
445 		}
446 		else if (*name == '\\')
447 			quoted = true;
448 		else if (*name == sep)
449 		{
450 			break;
451 		}
452 		else
453 			*copyto++ = *name;
454 	}
455 	*copyto = '\0';
456 	return name;
457 }
458 
459 /* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
460  * character is whatever the first character of `regexp' is), by breaking it
461  * up into null terminated strings, removing the separators, and expanding
462  * '\t' into tabs. When complete, `regexp' points to the line matching
463  * pattern, a pointer to the name matching pattern is written to `name', a
464  * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
465  * to the trailing flags is written to `flags'. If the pattern is not in the
466  * correct format, a false value is returned.
467  */
parseTagRegex(enum regexParserType regptype,char * const regexp,char ** const name,char ** const kinds,char ** const flags)468 static bool parseTagRegex (
469 		enum regexParserType regptype,
470 		char* const regexp, char** const name,
471 		char** const kinds, char** const flags)
472 {
473 	bool result = false;
474 	const int separator = (unsigned char) regexp [0];
475 
476 	*name = scanSeparators (regexp, (regptype == REG_PARSER_MULTI_LINE
477 									 || regptype == REG_PARSER_MULTI_TABLE));
478 	if (*regexp == '\0')
479 		error (WARNING, "empty regexp");
480 	else if (**name != separator)
481 		error (WARNING, "%s: incomplete regexp", regexp);
482 	else
483 	{
484 		char* const third = scanSeparators (*name, false);
485 		if (**name != '\0' && (*name) [strlen (*name) - 1] == '\\')
486 			error (WARNING, "error in name pattern: \"%s\"", *name);
487 		if (*third != separator)
488 			error (WARNING, "%s: regexp missing final separator", regexp);
489 		else
490 		{
491 			/*
492 			 * first----------V third------------V
493 			 * --regex-<LANG>=/regexp/replacement/[kind-spec/][flags][{{\n...\n}}]
494 			 * second----------------^ fourth---------------^
495 			 */
496 
497 			/*
498 			 * The following code assumes "{{\n" is never used in flags.
499 			 * If the input comes from the command line or an optlib file,
500 			 * this assumption is always correct; a new line character is never
501 			 * put at the middle (or end) of the input.
502 			 *
503 			 * TODO: How about the input comes from the source code translated
504 			 * by optlib2c?
505 			 */
506 			char *script = strstr (third, "{{\n");
507 			if (script)
508 			{
509 				/* The script part should not be unescaed by scanSeparators().
510 				 * By spitting the string, we can hide the script part from
511 				 * scanSeparators(). */
512 				script [0] = '\0';
513 			}
514 
515 			char* const fourth = scanSeparators (third, false);
516 			if (*fourth == separator)
517 			{
518 				*kinds = third;
519 				scanSeparators (fourth, false);
520 				*flags = fourth;
521 			}
522 			else
523 			{
524 				*flags = third;
525 				*kinds = NULL;
526 			}
527 
528 			if (script)
529 			{
530 				Assert (*flags);
531 
532 				char *end = *flags + strlen (*flags);
533 				script [0] = '{';
534 				if (end != script)
535 				{
536 					size_t len = strlen (script);
537 					memmove (end, script, len);
538 					end [len] = '\0';
539 				}
540 			}
541 
542 			result = true;
543 		}
544 	}
545 	return result;
546 }
547 
548 
pre_ptrn_flag_exclusive_short(char c CTAGS_ATTR_UNUSED,void * data)549 static void pre_ptrn_flag_exclusive_short (char c CTAGS_ATTR_UNUSED, void* data)
550 {
551 	bool *exclusive = data;
552 	*exclusive = true;
553 }
554 
pre_ptrn_flag_exclusive_long(const char * const s CTAGS_ATTR_UNUSED,const char * const unused CTAGS_ATTR_UNUSED,void * data)555 static void pre_ptrn_flag_exclusive_long (const char* const s CTAGS_ATTR_UNUSED, const char* const unused CTAGS_ATTR_UNUSED, void* data)
556 {
557 	pre_ptrn_flag_exclusive_short ('x', data);
558 }
559 
560 static flagDefinition prePtrnFlagDef[] = {
561 	{ 'x',  "exclusive", pre_ptrn_flag_exclusive_short, pre_ptrn_flag_exclusive_long ,
562 	  NULL, "skip testing the other patterns if a line is matched to this pattern"},
563 };
564 
scope_ptrn_flag_eval(const char * const f CTAGS_ATTR_UNUSED,const char * const v,void * data)565 static void scope_ptrn_flag_eval (const char* const f  CTAGS_ATTR_UNUSED,
566 				  const char* const v, void* data)
567 {
568 	unsigned int *bfields = data;
569 
570 	if (strcmp (v, "ref") == 0)
571 		*bfields |= SCOPE_REF;
572 	else if (strcmp (v, "push") == 0)
573 		*bfields |= (SCOPE_PUSH | SCOPE_REF);
574 	else if (strcmp (v, "pop") == 0)
575 		*bfields |= SCOPE_POP;
576 	else if (strcmp (v, "clear") == 0)
577 		*bfields |= SCOPE_CLEAR;
578 	else if (strcmp (v, "set") == 0)
579 		*bfields |= (SCOPE_CLEAR | SCOPE_PUSH);
580 	else if (strcmp (v, "replace") == 0)
581 		*bfields |= (SCOPE_POP|SCOPE_REF_AFTER_POP|SCOPE_PUSH);
582 	else
583 		error (FATAL, "Unexpected value for scope flag in regex definition: scope=%s", v);
584 }
585 
placeholder_ptrn_flag_eval(const char * const f CTAGS_ATTR_UNUSED,const char * const v CTAGS_ATTR_UNUSED,void * data)586 static void placeholder_ptrn_flag_eval (const char* const f  CTAGS_ATTR_UNUSED,
587 				     const char* const v  CTAGS_ATTR_UNUSED, void* data)
588 {
589 	unsigned int *bfields = data;
590 	*bfields |= SCOPE_PLACEHOLDER;
591 }
592 
593 static flagDefinition scopePtrnFlagDef[] = {
594 	{ '\0', "scope",     NULL, scope_ptrn_flag_eval,
595 	  "ACTION", "use scope stack: ACTION = ref|push|pop|clear|set|replace"},
596 	{ '\0', "placeholder",  NULL, placeholder_ptrn_flag_eval,
597 	  NULL, "don't put this tag to tags file."},
598 };
599 
kindNew(char letter,const char * name,const char * description)600 static kindDefinition *kindNew (char letter, const char *name, const char *description)
601 {
602 	kindDefinition *kdef = xCalloc (1, kindDefinition);
603 	kdef->letter        = letter;
604 	kdef->name = eStrdup (name);
605 	kdef->description = eStrdup(description? description: kdef->name);
606 	kdef->enabled = true;
607 	return kdef;
608 }
609 
kindFree(kindDefinition * kind)610 static void kindFree (kindDefinition *kind)
611 {
612 	kind->letter = '\0';
613 	eFree ((void *)kind->name);
614 	kind->name = NULL;
615 	eFree ((void *)kind->description);
616 	kind->description = NULL;
617 	eFree (kind);
618 }
619 
initMgroup(struct mGroupSpec * mgroup)620 static void initMgroup(struct mGroupSpec *mgroup)
621 {
622 	mgroup->forLineNumberDetermination = NO_MULTILINE;
623 	mgroup->forNextScanning = NO_MULTILINE;
624 	mgroup->nextFromStart = false;
625 }
626 
initGuestSpec(struct guestSpec * guest)627 static void initGuestSpec (struct guestSpec *guest)
628 {
629 	guest->lang.type = GUEST_LANG_UNKNOWN;
630 }
631 
initTaction(struct mTableActionSpec * taction)632 static void initTaction(struct mTableActionSpec *taction)
633 {
634 	taction->action = TACTION_NOP;
635 	taction->table = NULL;
636 }
637 
refPattern(regexPattern * ptrn)638 static regexPattern * refPattern (regexPattern * ptrn)
639 {
640 	ptrn->refcount++;
641 	return ptrn;
642 }
643 
newPattern(regexCompiledCode * const pattern,enum regexParserType regptype)644 static regexPattern * newPattern (regexCompiledCode* const pattern,
645 								  enum regexParserType regptype)
646 {
647 	regexPattern *ptrn = xCalloc(1, regexPattern);
648 
649 	ptrn->pattern.backend = pattern->backend;
650 	ptrn->pattern.code = pattern->code;
651 
652 	ptrn->exclusive = false;
653 	ptrn->accept_empty_name = false;
654 	ptrn->regptype = regptype;
655 	ptrn->xtagType = XTAG_UNKNOWN;
656 
657 	if (regptype == REG_PARSER_MULTI_LINE)
658 		initMgroup(&ptrn->mgroup);
659 	if (regptype == REG_PARSER_MULTI_TABLE)
660 		initTaction(&ptrn->taction);
661 	initGuestSpec (&ptrn->guest);
662 
663 	ptrn->u.tag.roleBits = 0;
664 	ptrn->refcount = 1;
665 
666 	ptrn->optscript = NULL;
667 	ptrn->optscript_src = NULL;
668 
669 	return ptrn;
670 }
671 
newRefPatternEntry(regexTableEntry * other)672 static regexTableEntry * newRefPatternEntry (regexTableEntry * other)
673 {
674 	regexTableEntry *entry = xCalloc (1, regexTableEntry);
675 
676 	Assert (other && other->pattern);
677 
678 	entry->pattern = refPattern(other->pattern);
679 	return entry;
680 }
681 
newEntry(regexCompiledCode * const pattern,enum regexParserType regptype)682 static regexTableEntry * newEntry (regexCompiledCode* const pattern,
683 								   enum regexParserType regptype)
684 {
685 	regexTableEntry *entry = xCalloc (1, regexTableEntry);
686 	entry->pattern = newPattern (pattern, regptype);
687 	return entry;
688 }
689 
addCompiledTagCommon(struct lregexControlBlock * lcb,int table_index,regexCompiledCode * const pattern,enum regexParserType regptype)690 static regexPattern* addCompiledTagCommon (struct lregexControlBlock *lcb,
691 										   int table_index,
692 										   regexCompiledCode* const pattern,
693 										   enum regexParserType regptype)
694 {
695 	regexTableEntry *entry = newEntry (pattern, regptype);
696 
697 	if (regptype == REG_PARSER_MULTI_TABLE)
698 	{
699 		struct regexTable *table = ptrArrayItem (lcb->tables, table_index);
700 		Assert(table);
701 
702 		ptrArrayAdd (table->entries, entry);
703 	}
704 	else
705 		ptrArrayAdd (lcb->entries[regptype], entry);
706 
707 	useRegexMethod(lcb->owner);
708 
709 	return entry->pattern;
710 }
711 
pre_ptrn_flag_mgroup_long(const char * const s,const char * const v,void * data)712 static void pre_ptrn_flag_mgroup_long (const char* const s, const char* const v, void* data)
713 {
714 	struct mGroupSpec *mgroup = data;
715 	if (!v)
716 	{
717 		error (WARNING, "no value is given for: %s", s);
718 		return;
719 	}
720 	if (!strToInt (v, 10, &mgroup->forLineNumberDetermination))
721 	{
722 		error (WARNING, "wrong %s specification: %s", s, v);
723 		mgroup->forLineNumberDetermination = NO_MULTILINE;
724 	}
725 	else if (mgroup->forLineNumberDetermination < 0
726 			 || mgroup->forLineNumberDetermination >= BACK_REFERENCE_COUNT)
727 	{
728 		error (WARNING, "out of range(0 ~ %d) %s specification: %s",
729 			   (BACK_REFERENCE_COUNT - 1),
730 			   s, v);
731 		mgroup->forLineNumberDetermination = NO_MULTILINE;
732 	}
733 }
734 
pre_ptrn_flag_advanceTo_long(const char * const s,const char * const v,void * data)735 static void pre_ptrn_flag_advanceTo_long (const char* const s, const char* const v, void* data)
736 {
737 	struct mGroupSpec *mgroup = data;
738 	char *vdup;
739 	char *tmp;
740 
741 
742 	if (!v)
743 	{
744 		error (WARNING, "no value is given for: %s", s);
745 		return;
746 	}
747 
748 	vdup = eStrdup (v);
749 
750 	mgroup->nextFromStart = false;
751 	if ((tmp = strstr(vdup, "start")))
752 	{
753 		mgroup->nextFromStart = true;
754 		*tmp = '\0';
755 	}
756 	else if ((tmp = strstr(vdup, "end")))
757 		*tmp = '\0';
758 
759 	if (!strToInt (vdup, 10, &(mgroup->forNextScanning)))
760 	{
761 		error (WARNING, "wrong %s specification: %s", s, vdup);
762 		mgroup->nextFromStart = false;
763 	}
764 	else if (mgroup->forNextScanning < 0 || mgroup->forNextScanning >= BACK_REFERENCE_COUNT)
765 	{
766 		error (WARNING, "out of range(0 ~ %d) %s specification: %s",
767 			   (BACK_REFERENCE_COUNT - 1), s, vdup);
768 		mgroup->nextFromStart = false;
769 	}
770 
771 	eFree (vdup);
772 }
773 
774 struct guestPtrnFlagData {
775 	enum regexParserType type;
776 	struct guestSpec *guest;
777 };
778 
pre_ptrn_flag_guest_long(const char * const s,const char * const v,void * data)779 static void pre_ptrn_flag_guest_long (const char* const s, const char* const v, void* data)
780 {
781 	struct guestPtrnFlagData *flagData = data;
782 	enum regexParserType type = flagData->type;
783 	struct guestSpec *guest = flagData->guest;
784 	struct boundarySpec *current;
785 
786 	if (!v)
787 	{
788 		error (WARNING, "no value is given for: %s", s);
789 		return;
790 	}
791 
792 	char *tmp = strchr (v, ',');
793 	if (tmp == NULL)
794 	{
795 		error (WARNING, "no terminator found for parser name: %s", s);
796 		return;
797 	}
798 
799 	if ((tmp - v) == 0)
800 	{
801 		if (type == REG_PARSER_MULTI_LINE)
802 		{
803 			error (WARNING,
804 				   "using placeholder for guest name field is not allowed in multiline regex spec: %s", v);
805 			goto err;
806 		}
807 
808 		guest->lang.type = GUEST_LANG_PLACEHOLDER;
809 	}
810 	else if (*v == '\\' || *v == '*')
811 	{
812 		const char *n_tmp = v + 1;
813 		const char *n = n_tmp;
814 		for (; isdigit (*n_tmp); n_tmp++);
815 		char c = *n_tmp;
816 		*(char *)n_tmp = '\0';
817 		if (!strToInt (n, 10, &(guest->lang.spec.patternGroup)))
818 		{
819 			error (WARNING, "wrong guest name specification: %s", v);
820 			goto err;
821 		}
822 		else if (guest->lang.spec.patternGroup >= BACK_REFERENCE_COUNT)
823 		{
824 			error (WARNING, "wrong guest name specification (back reference count is too large): %d",
825 				   guest->lang.spec.patternGroup);
826 			goto err;
827 		}
828 
829 		*(char *)n_tmp = c;
830 		if (*n_tmp != ',')
831 		{
832 			error (WARNING, "wrong guest specification (garbage at the end of end guest spec): %s", v);
833 			goto err;
834 		}
835 
836 		guest->lang.type = (*v == '\\')
837 			? GUEST_LANG_PTN_GROUP_FOR_LANGNAME
838 			: GUEST_LANG_PTN_GROUP_FOR_FILEMAP;
839 	}
840 	else
841 	{
842 		guest->lang.spec.lang = getNamedLanguageOrAlias (v, (tmp - v));
843 		if (guest->lang.spec.lang == LANG_IGNORE)
844 		{
845 			error (WARNING, "no parser found for the guest spec: %s", v);
846 			goto err;
847 		}
848 		guest->lang.type = GUEST_LANG_STATIC_LANGNAME;
849 	}
850 
851 	tmp++;
852 	if (*tmp == '\0')
853 	{
854 		error (WARNING, "no area spec found in the guest spec: %s", v);
855 		goto err;
856 	}
857 
858 	for (int i = 0; i < 2; i++)
859 	{
860 		current = guest->boundary + i;
861 		const char *current_field_str = (i == BOUNDARY_START? "start": "end");
862 
863 		if (tmp [0] == ((i == BOUNDARY_START)? ',': '\0'))
864 		{
865 			if (type == REG_PARSER_MULTI_LINE)
866 				error (WARNING,
867 					   "using placeholder for %s field is not allowed in multiline regex spec: %s",
868 					   current_field_str, v);
869 
870 			current->placeholder = true;
871 		}
872 		else
873 		{
874 			char *n = tmp;
875 
876 			for (; isdigit (*tmp); tmp++);
877 			char c = *tmp;
878 			*tmp = '\0';
879 			if (!strToInt (n, 10, &(current->patternGroup)))
880 			{
881 				error (WARNING, "wrong guest area specification (patternGroup of %s, number expected): %s:%s",
882 					   current_field_str, v, n);
883 				goto err;
884 			}
885 			*tmp = c;
886 			if (*tmp == '\0')
887 			{
888 				error (WARNING, "wrong guest area specification (patternGroup of %s, nether start nor end given): %s",
889 					   current_field_str, v);
890 				goto err;
891 			}
892 			else if (strncmp (tmp, "start", 5) == 0)
893 			{
894 				current->fromStartOfGroup = true;
895 				tmp += 5;
896 			}
897 			else if (strncmp (tmp, "end", 3) == 0)
898 			{
899 				current->fromStartOfGroup = false;
900 				tmp += 3;
901 			}
902 			else
903 			{
904 				error (WARNING, "wrong guest area specification (%s): %s",
905 					   current_field_str, v);
906 				goto err;
907 			}
908 		}
909 
910 		if (i == 0)
911 		{
912 			if (*tmp != ',')
913 			{
914 				error (WARNING,
915 					   "wrong guest area specification (separator between start and end boundaries): %s", v);
916 				goto err;
917 			}
918 			tmp++;
919 		}
920 		else if (i == 1 && (*tmp != '\0'))
921 		{
922 			error (WARNING, "wrong guest area specification (garbage at the end of end boundary spec): %s", v);
923 			goto err;
924 		}
925 	}
926 	return;
927  err:
928 	guest->lang.type = GUEST_LANG_UNKNOWN;
929 }
930 
931 static flagDefinition multilinePtrnFlagDef[] = {
932 	{ '\0',  "mgroup", NULL, pre_ptrn_flag_mgroup_long,
933 	  "N", "a group in pattern determining the line number of tag"},
934 	{ '\0',  "_advanceTo", NULL, pre_ptrn_flag_advanceTo_long,
935 	  "N[start|end]", "a group in pattern from where the next scan starts [0end]"},
936 };
937 
938 static flagDefinition guestPtrnFlagDef[] = {
939 #define EXPERIMENTAL "_"
940 	{ '\0',  EXPERIMENTAL "guest", NULL, pre_ptrn_flag_guest_long,
941 	  "PARSERSPEC,N0[start|end],N1[start|end]", "run guest parser on the area"},
942 };
943 
hasMessage(const regexPattern * const ptrn)944 static bool hasMessage(const regexPattern *const ptrn)
945 {
946 	return (ptrn->message.selection > 0 && ptrn->message.message_string);
947 }
948 
949 struct commonFlagData {
950 	const langType owner;
951 	const struct lregexControlBlock *const lcb;
952 	regexPattern *ptrn;
953 };
954 
common_flag_msg_long(const char * const s,const char * const v,void * data)955 static void common_flag_msg_long (const char* const s, const char* const v, void* data)
956 {
957 	struct commonFlagData *cdata = data;
958 	regexPattern *ptrn = cdata->ptrn;
959 
960 	Assert (ptrn);
961 
962 	if (hasMessage(ptrn))
963 	{
964 		error (WARNING, "only one message flag may be given per regex (already set to '%s')",
965 			   ptrn->message.message_string);
966 		return;
967 	}
968 
969 	if (strcmp (s, "fatal") == 0)
970 	{
971 		ptrn->message.selection = FATAL;
972 	}
973 	else if (strcmp (s, "warning") == 0)
974 	{
975 		ptrn->message.selection = WARNING;
976 	}
977 
978 	Assert (ptrn->message.selection != 0);
979 
980 	if (!v || !*v)
981 	{
982 		error (WARNING, "no message value is given for {%s}", s);
983 		return;
984 	}
985 
986 	const char* begin = v;
987 	const char* end = v + strlen (v);
988 	--end;
989 
990 	if (*begin != '"' || *end != '"' || begin == end)
991 	{
992 		error (WARNING, "argument for {%s} must be in double-quotes", s);
993 		return;
994 	}
995 
996 	++begin;
997 
998 	if (begin < end)
999 		ptrn->message.message_string = eStrndup (begin, end - begin);
1000 }
1001 
common_flag_extra_long(const char * const s,const char * const v,void * data)1002 static void common_flag_extra_long (const char* const s, const char* const v, void* data)
1003 {
1004 	struct commonFlagData * cdata = data;
1005 
1006 	Assert (cdata->ptrn);
1007 
1008 	if (!v)
1009 	{
1010 		error (WARNING, "no value is given for: %s", s);
1011 		return;
1012 	}
1013 
1014 	cdata->ptrn->xtagType = getXtagTypeForNameAndLanguage (v, cdata->owner);
1015 	if (cdata->ptrn->xtagType == XTAG_UNKNOWN)
1016 		error (WARNING, "no such extra \"%s\" in %s", v, getLanguageName(cdata->owner));
1017 }
1018 
1019 
fieldPatternNew(fieldType ftype,const char * template)1020 static struct fieldPattern * fieldPatternNew (fieldType ftype, const char *template)
1021 {
1022 	struct fieldPattern *fp;
1023 
1024 	fp = xMalloc(1, struct fieldPattern);
1025 	fp->ftype = ftype;
1026 	fp->template = eStrdup(template);
1027 
1028 	return fp;
1029 }
1030 
fieldPatternDelete(struct fieldPattern * fp)1031 static void fieldPatternDelete (struct fieldPattern *fp)
1032 {
1033 	eFree ((void *)fp->template);
1034 	eFree (fp);
1035 }
1036 
common_flag_field_long(const char * const s,const char * const v,void * data)1037 static void common_flag_field_long (const char* const s, const char* const v, void* data)
1038 {
1039 	struct commonFlagData * cdata = data;
1040 	regexPattern *ptrn = cdata->ptrn;
1041 
1042 	Assert (ptrn);
1043 
1044 	struct fieldPattern *fp;
1045 	fieldType ftype;
1046 	char *fname;
1047 	const char* template;
1048 	char *tmp;
1049 
1050 	if (!v)
1051 	{
1052 		error (WARNING, "no value is given for: %s", s);
1053 		return;
1054 	}
1055 
1056 	tmp = strchr (v, ':');
1057 	if (tmp == NULL || tmp == v)
1058 	{
1059 		error (WARNING, "no field name is given for: %s", s);
1060 		return;
1061 	}
1062 
1063 	fname = eStrndup (v, tmp - v);
1064 	ftype = getFieldTypeForNameAndLanguage (fname, cdata->owner);
1065 	if (ftype == FIELD_UNKNOWN)
1066 	{
1067 		error (WARNING, "no such field \"%s\" in %s", fname, getLanguageName(cdata->owner));
1068 		eFree (fname);
1069 		return;
1070 	}
1071 
1072 	if (ptrn->fieldPatterns)
1073 	{
1074 		for (unsigned int i = 0; i < ptrArrayCount(ptrn->fieldPatterns); i++)
1075 		{
1076 			fp = ptrArrayItem(ptrn->fieldPatterns, i);
1077 			if (fp->ftype == ftype)
1078 			{
1079 				error (WARNING, "duplicated field specification \"%s\" in %s", fname, getLanguageName(cdata->owner));
1080 				eFree (fname);
1081 				return;
1082 			}
1083 		}
1084 	}
1085 	eFree (fname);
1086 
1087 	template = tmp + 1;
1088 	fp = fieldPatternNew (ftype, template);
1089 
1090 	if (ptrn->fieldPatterns == NULL)
1091 		ptrn->fieldPatterns = ptrArrayNew((ptrArrayDeleteFunc)fieldPatternDelete);
1092 	ptrArrayAdd(ptrn->fieldPatterns, fp);
1093 }
1094 
common_flag_role_long(const char * const s,const char * const v,void * data)1095 static void common_flag_role_long (const char* const s, const char* const v, void* data)
1096 {
1097 	struct commonFlagData * cdata = data;
1098 	regexPattern *ptrn = cdata->ptrn;
1099 	roleDefinition * role;
1100 
1101 	Assert (ptrn);
1102 
1103 	if (!v)
1104 	{
1105 		error (WARNING, "no value is given for: %s", s);
1106 		return;
1107 	}
1108 
1109 	role = getLanguageRoleForName(cdata->owner,
1110 								  ptrn->u.tag.kindIndex, v);
1111 	if (!role)
1112 	{
1113 		error (WARNING, "no such role: %s", v);
1114 		return;
1115 	}
1116 
1117 	ptrn->u.tag.roleBits |= makeRoleBit(role->id);
1118 }
1119 
common_flag_anonymous_long(const char * const s,const char * const v,void * data)1120 static void common_flag_anonymous_long (const char* const s, const char* const v, void* data)
1121 {
1122 	struct commonFlagData * cdata = data;
1123 	regexPattern *ptrn = cdata->ptrn;
1124 
1125 	Assert (ptrn);
1126 
1127 	if (ptrn->anonymous_tag_prefix)
1128 	{
1129 		error (WARNING, "an anonymous tag prefix for this pattern (%s) is already given: %s",
1130 			   ptrn->pattern_string? ptrn->pattern_string: "",
1131 			   ptrn->anonymous_tag_prefix);
1132 		return;
1133 	}
1134 
1135 	if (!v)
1136 	{
1137 		error (WARNING, "no PREFIX for anonymous regex flag is given (pattern == %s)",
1138 			   ptrn->pattern_string? ptrn->pattern_string: "");
1139 		return;
1140 	}
1141 
1142 	if (ptrn->u.tag.kindIndex == KIND_GHOST_INDEX)
1143 	{
1144 		error (WARNING, "use \"%s\" regex flag only with an explicitly defined kind", s);
1145 		return;
1146 	}
1147 
1148 	ptrn->anonymous_tag_prefix = eStrdup (v);
1149 }
1150 
1151 static flagDefinition commonSpecFlagDef[] = {
1152 	{ '\0',  "fatal", NULL, common_flag_msg_long ,
1153 	  "\"MESSAGE\"", "print the given MESSAGE and exit"},
1154 	{ '\0',  "warning", NULL, common_flag_msg_long ,
1155 	  "\"MESSAGE\"", "print the given MESSAGE at WARNING level"},
1156 #define EXPERIMENTAL "_"
1157 	{ '\0',  EXPERIMENTAL "extra", NULL, common_flag_extra_long ,
1158 	  "EXTRA", "record the tag only when the extra is enabled"},
1159 	{ '\0',  EXPERIMENTAL "field", NULL, common_flag_field_long ,
1160 	  "FIELD:VALUE", "record the matched string(VALUE) to parser own FIELD of the tag"},
1161 	{ '\0',  EXPERIMENTAL "role", NULL, common_flag_role_long,
1162 	  "ROLE", "set the given ROLE to the roles field"},
1163 	{ '\0',  EXPERIMENTAL "anonymous", NULL, common_flag_anonymous_long,
1164 	  "PREFIX", "make an anonymous tag with PREFIX"},
1165 };
1166 
1167 
pre_ptrn_flag_mtable_long(const char * const s,const char * const v,void * data)1168 static void pre_ptrn_flag_mtable_long (const char* const s, const char* const v, void* data)
1169 {
1170 	struct commonFlagData * cdata = data;
1171 	regexPattern *ptrn = cdata->ptrn;
1172 	struct mTableActionSpec *taction;
1173 	bool taking_table = true;
1174 
1175 	Assert (ptrn);
1176 	Assert (cdata->lcb);
1177 
1178 	taction = &ptrn->taction;
1179 
1180 	if (strcmp (s, "tenter") == 0)
1181 		taction->action = TACTION_ENTER;
1182 	else if (strcmp (s, "tleave") == 0)
1183 	{
1184 		taction->action = TACTION_LEAVE;
1185 		taking_table = false;
1186 	}
1187 	else if (strcmp (s, "tjump") == 0)
1188 		taction->action = TACTION_JUMP;
1189 	else if (strcmp (s, "treset") == 0)
1190 		taction->action = TACTION_RESET;
1191 	else if (strcmp (s, "tquit") == 0)
1192 	{
1193 		taction->action = TACTION_QUIT;
1194 		taking_table = false;
1195 	}
1196 
1197 	if (taking_table)
1198 	{
1199 		int t;
1200 		char *continuation = NULL;
1201 
1202 
1203 		if (!v || (!*v))
1204 			error (FATAL, "no table is given for table action: %s", s);
1205 
1206 		if (taction->action == TACTION_ENTER
1207 			&& (continuation = strchr (v, ',')))
1208 		{
1209 			char *tableEnterTo;
1210 
1211 			tableEnterTo = eStrndup (v, continuation - v);
1212 			t = getTableIndexForName (cdata->lcb, tableEnterTo);
1213 			if (t < 0)
1214 				error (FATAL, "table is not defined: %s", tableEnterTo);
1215 			taction->table = ptrArrayItem (cdata->lcb->tables, t);
1216 			eFree (tableEnterTo);
1217 
1218 			if (!*(continuation + 1))
1219 				error (FATAL, "no continuation table is given for: %s", v);
1220 
1221 			int t_cont = getTableIndexForName (cdata->lcb, continuation + 1);
1222 			if (t_cont < 0)
1223 				error (FATAL, "table for continuation is not defined: %s", continuation + 1);
1224 			taction->continuation_table = ptrArrayItem (cdata->lcb->tables, t_cont);
1225 		}
1226 		else
1227 		{
1228 			t = getTableIndexForName (cdata->lcb, v);
1229 			if (t < 0)
1230 				error (FATAL, "table is not defined: %s", v);
1231 			taction->table = ptrArrayItem (cdata->lcb->tables, t);
1232 			taction->continuation_table = NULL;
1233 		}
1234 	}
1235 }
1236 
1237 static flagDefinition multitablePtrnFlagDef[] = {
1238 	{ '\0',  "tenter", NULL, pre_ptrn_flag_mtable_long ,
1239 	  "TABLE[,CONT]", "enter to given regext table (with specifying continuation)"},
1240 	{ '\0',  "tleave", NULL, pre_ptrn_flag_mtable_long ,
1241 	  NULL, "leave from the current regext table"},
1242 	{ '\0',  "tjump", NULL, pre_ptrn_flag_mtable_long ,
1243 	  "TABLE", "jump to another regext table(don't push the current table to state stack)"},
1244 	{ '\0',  "treset", NULL, pre_ptrn_flag_mtable_long ,
1245 	  "TABLE", "clear the state stack and jump to given regex table"},
1246 	{ '\0',  "tquit", NULL, pre_ptrn_flag_mtable_long ,
1247 	  NULL, "stop the parsing with this parser"},
1248 };
1249 
1250 
setKind(regexPattern * ptrn,const langType owner,const char kindLetter,const char * kindName,const char * const description,bool kind_explicitly_defined)1251 static void setKind(regexPattern * ptrn, const langType owner,
1252 					const char kindLetter, const char* kindName,
1253 					const char *const description,
1254 					bool kind_explicitly_defined)
1255 {
1256 	Assert (ptrn);
1257 	Assert (ptrn->u.tag.name_pattern);
1258 	Assert (kindName);
1259 	kindDefinition *kdef = getLanguageKindForLetter (owner, kindLetter);
1260 
1261 	if (kdef)
1262 	{
1263 		if (strcmp (kdef->name, kindName) && (strcmp(kindName, KIND_REGEX_DEFAULT_NAME)))
1264 			/* When using a same kind letter for multiple regex patterns, the name of kind
1265 			   should be the same. */
1266 			error  (WARNING, "Don't reuse the kind letter `%c' in a language %s (old: \"%s\", new: \"%s\")",
1267 					kdef->letter, getLanguageName (owner),
1268 					kdef->name, kindName);
1269 		ptrn->u.tag.kindIndex = kdef->id;
1270 	}
1271 	else if (*ptrn->u.tag.name_pattern == '\0' &&
1272 			 kindLetter == KIND_REGEX_DEFAULT_LETTER &&
1273 			 (strcmp(kindName, KIND_REGEX_DEFAULT_NAME) == 0) &&
1274 			 (!kind_explicitly_defined))
1275 		ptrn->u.tag.kindIndex = KIND_GHOST_INDEX;
1276 	else
1277 	{
1278 		kdef = kindNew (kindLetter, kindName, description);
1279 		defineLanguageKind (owner, kdef, kindFree);
1280 		ptrn->u.tag.kindIndex = kdef->id;
1281 	}
1282 }
1283 
patternEvalFlags(struct lregexControlBlock * lcb,regexPattern * ptrn,enum regexParserType regptype,const char * flags)1284 static void patternEvalFlags (struct lregexControlBlock *lcb,
1285 							  regexPattern * ptrn,
1286 							  enum regexParserType regptype,
1287 							  const char* flags)
1288 {
1289 	struct commonFlagData commonFlagData = {
1290 		.owner = lcb->owner,
1291 		.lcb = lcb,
1292 		.ptrn = ptrn
1293 	};
1294 
1295 	if (regptype == REG_PARSER_SINGLE_LINE)
1296 		flagsEval (flags, prePtrnFlagDef, ARRAY_SIZE(prePtrnFlagDef), &ptrn->exclusive);
1297 
1298 	const char * optscript = flagsEval (flags, commonSpecFlagDef, ARRAY_SIZE(commonSpecFlagDef), &commonFlagData);
1299 	if (optscript)
1300 	{
1301 		ptrn->optscript = scriptRead (optvm, optscript);
1302 		ptrn->optscript_src = eStrdup (optscript);
1303 	}
1304 
1305 	if (regptype == REG_PARSER_SINGLE_LINE || regptype == REG_PARSER_MULTI_TABLE)
1306 	{
1307 		flagsEval (flags, scopePtrnFlagDef, ARRAY_SIZE(scopePtrnFlagDef), &ptrn->scopeActions);
1308 		if ((ptrn->scopeActions & (SCOPE_REF|SCOPE_REF_AFTER_POP)) == (SCOPE_REF|SCOPE_REF_AFTER_POP))
1309 			error (WARNING, "%s: don't combine \"replace\" with the other scope action.",
1310 				   getLanguageName (lcb->owner));
1311 	}
1312 
1313 	if (regptype == REG_PARSER_MULTI_LINE || regptype == REG_PARSER_MULTI_TABLE)
1314 	{
1315 		ptrn->mgroup.forNextScanning = 0;
1316 		/* ptrn->mgroup.nextFromStart is initialized in initMgroup() already. */
1317 		flagsEval (flags, multilinePtrnFlagDef, ARRAY_SIZE(multilinePtrnFlagDef), &ptrn->mgroup);
1318 	}
1319 
1320 	struct guestPtrnFlagData guestPtrnFlagData = {
1321 		.type = regptype,
1322 		.guest = &ptrn->guest,
1323 	};
1324 	flagsEval (flags, guestPtrnFlagDef, ARRAY_SIZE(guestPtrnFlagDef), &guestPtrnFlagData);
1325 
1326 	if (regptype == REG_PARSER_MULTI_TABLE)
1327 		flagsEval (flags, multitablePtrnFlagDef, ARRAY_SIZE(multitablePtrnFlagDef), &commonFlagData);
1328 }
1329 
addCompiledTagPattern(struct lregexControlBlock * lcb,int table_index,enum regexParserType regptype,regexCompiledCode * const pattern,const char * const name,char kindLetter,const char * kindName,char * const description,const char * flags,bool kind_explicitly_defined,bool * disabled)1330 static regexPattern *addCompiledTagPattern (struct lregexControlBlock *lcb,
1331 											int table_index,
1332 											enum regexParserType regptype, regexCompiledCode* const pattern,
1333 					    const char* const name, char kindLetter, const char* kindName,
1334 					    char *const description, const char* flags,
1335 					    bool kind_explicitly_defined,
1336 					    bool *disabled)
1337 {
1338 	regexPattern * ptrn = addCompiledTagCommon(lcb, table_index, pattern, regptype);
1339 
1340 	ptrn->type = PTRN_TAG;
1341 	ptrn->u.tag.name_pattern = eStrdup (name);
1342 	ptrn->disabled = disabled;
1343 
1344 	setKind(ptrn, lcb->owner, kindLetter, kindName, description, kind_explicitly_defined);
1345 	patternEvalFlags (lcb, ptrn, regptype, flags);
1346 
1347 	return ptrn;
1348 }
1349 
addCompiledCallbackPattern(struct lregexControlBlock * lcb,regexCompiledCode * const pattern,const regexCallback callback,const char * flags,bool * disabled,void * userData)1350 static regexPattern *addCompiledCallbackPattern (struct lregexControlBlock *lcb, regexCompiledCode* const pattern,
1351 					const regexCallback callback, const char* flags,
1352 					bool *disabled,
1353 					void *userData)
1354 {
1355 	regexPattern * ptrn;
1356 	bool exclusive = false;
1357 	flagsEval (flags, prePtrnFlagDef, ARRAY_SIZE(prePtrnFlagDef), &exclusive);
1358 	ptrn = addCompiledTagCommon(lcb, TABLE_INDEX_UNUSED, pattern, REG_PARSER_SINGLE_LINE);
1359 	ptrn->type    = PTRN_CALLBACK;
1360 	ptrn->u.callback.function = callback;
1361 	ptrn->u.callback.userData = userData;
1362 	ptrn->exclusive = exclusive;
1363 	ptrn->disabled = disabled;
1364 	return ptrn;
1365 }
1366 
1367 #ifndef HAVE_PCRE2
no_pcre2_regex_flag_short(char c,void * data)1368 static void no_pcre2_regex_flag_short (char c, void* data)
1369 {
1370 	error (WARNING, "'p' flag is specied but pcre2 regex engine is not linked.");
1371 }
no_pcre2_regex_flag_long(const char * const s,const char * const unused CTAGS_ATTR_UNUSED,void * data)1372 static void no_pcre2_regex_flag_long (const char* const s, const char* const unused CTAGS_ATTR_UNUSED, void* data)
1373 {
1374 	error (WARNING, "{pcre2} flag is specied but pcre2 regex engine is not linked.");
1375 }
1376 #endif
1377 
1378 static flagDefinition backendFlagDefs[] = {
1379 	{ 'b', "basic",  basic_regex_flag_short,  basic_regex_flag_long,
1380 	  NULL, "interpreted as a Posix basic regular expression."},
1381 	{ 'e', "extend", extend_regex_flag_short, extend_regex_flag_long,
1382 	  NULL, "interpreted as a Posix extended regular expression (default)"},
1383 #ifdef HAVE_PCRE2
1384 	{ 'p', "pcre2",  pcre2_regex_flag_short, pcre2_regex_flag_long,
1385 	  NULL, "use pcre2 regex engine"},
1386 #else
1387 	{ 'p', "pcre2",  no_pcre2_regex_flag_short, no_pcre2_regex_flag_long,
1388 	  NULL, "pcre2 is NOT linked!"},
1389 #endif
1390 };
1391 
regex_flag_icase_short(char c CTAGS_ATTR_UNUSED,void * data)1392 static void regex_flag_icase_short (char c CTAGS_ATTR_UNUSED, void* data)
1393 {
1394 	struct flagDefsDescriptor *desc = data;
1395 	desc->backend->set_icase_flag (&desc->flags);
1396 }
1397 
regex_flag_icase_long(const char * s CTAGS_ATTR_UNUSED,const char * const unused CTAGS_ATTR_UNUSED,void * data)1398 static void regex_flag_icase_long (const char* s CTAGS_ATTR_UNUSED, const char* const unused CTAGS_ATTR_UNUSED, void* data)
1399 {
1400 	regex_flag_icase_short ('i', data);
1401 }
1402 
1403 static flagDefinition backendCommonRegexFlagDefs[] = {
1404 	{ 'i', "icase",  regex_flag_icase_short,  regex_flag_icase_long,
1405 	  NULL, "applied in a case-insensitive manner"},
1406 };
1407 
1408 
choose_backend(const char * flags,enum regexParserType regptype,bool error_if_no_backend)1409 static struct flagDefsDescriptor choose_backend (const char *flags, enum regexParserType regptype, bool error_if_no_backend)
1410 {
1411 	struct flagDefsDescriptor desc = {
1412 		.backend  = NULL,
1413 		.flags = 0,
1414 		.regptype = regptype,
1415 	};
1416 
1417 	if (flags)
1418 		flagsEval (flags,
1419 				   backendFlagDefs,
1420 				   ARRAY_SIZE(backendFlagDefs),
1421 				   &desc);
1422 
1423 	/* Choose the default backend. */
1424 	if (desc.backend == NULL)
1425 	{
1426 		if (flags && error_if_no_backend)
1427 			error (FATAL, "No sunch backend for the name: \"%s\"", flags);
1428 
1429 		flagsEval (DEFAULT_REGEX_BACKEND,
1430 				   backendFlagDefs,
1431 				   ARRAY_SIZE(backendFlagDefs),
1432 				   &desc);
1433 	}
1434 	return desc;
1435 }
1436 
compileRegex(enum regexParserType regptype,const char * const regexp,const char * const flags)1437 static regexCompiledCode compileRegex (enum regexParserType regptype,
1438 									   const char* const regexp, const char* const flags)
1439 {
1440 	struct flagDefsDescriptor desc = choose_backend (flags, regptype, false);
1441 
1442 	/* Evaluate backend specific flags */
1443 	flagsEval (flags,
1444 			   desc.backend->fdefs,
1445 			   desc.backend->fdef_count,
1446 			   &desc.flags);
1447 
1448 	flagsEval (flags,
1449 			   backendCommonRegexFlagDefs,
1450 			   ARRAY_SIZE (backendCommonRegexFlagDefs),
1451 			   &desc);
1452 
1453 	return desc.backend->compile (desc.backend, regexp, desc.flags);
1454 }
1455 
1456 
1457 /* If a letter and/or a name are defined in kindSpec, return true. */
parseKinds(const char * const kindSpec,char * const kindLetter,char ** const kindName,char ** description)1458 static bool parseKinds (
1459 		const char* const kindSpec, char* const kindLetter, char** const kindName,
1460 		char **description)
1461 {
1462 	*description = NULL;
1463 
1464 	if (kindSpec == NULL  ||  kindSpec [0] == '\0')
1465 	{
1466 		*kindLetter = KIND_REGEX_DEFAULT_LETTER;
1467 		*kindName = eStrdup (KIND_REGEX_DEFAULT_NAME);
1468 		return false;
1469 	}
1470 	else
1471 	{
1472 		bool explicitly_defined = false;
1473 		const char* k = kindSpec;
1474 
1475 		if (k [0] != ','  &&  (k [1] == ','  ||  k [1] == '\0'))
1476 		{
1477 			*kindLetter = *k++;
1478 			explicitly_defined = true;
1479 		}
1480 		else
1481 			*kindLetter = KIND_REGEX_DEFAULT_LETTER;
1482 
1483 		if (*k == ',')
1484 			++k;
1485 
1486 		if (k [0] == '\0')
1487 			*kindName = eStrdup (KIND_REGEX_DEFAULT_NAME);
1488 		else
1489 		{
1490 			const char *const comma = strchr (k, ',');
1491 
1492 			if (comma == NULL)
1493 			{
1494 				if (strlen (k) == 0)
1495 					*kindName = eStrdup (KIND_REGEX_DEFAULT_NAME);
1496 				else
1497 				{
1498 					*kindName = eStrdup (k);
1499 					explicitly_defined = true;
1500 				}
1501 			}
1502 			else
1503 			{
1504 				if (comma - k == 0)
1505 					*kindName = eStrdup (KIND_REGEX_DEFAULT_NAME);
1506 				else
1507 				{
1508 					*kindName = eStrndup (k, comma - k );
1509 					explicitly_defined = true;
1510 				}
1511 				k = comma + 1;
1512 				if (k [0] != '\0')
1513 					*description = eStrdup (k);
1514 			}
1515 		}
1516 		return explicitly_defined;
1517 	}
1518 }
1519 
1520 /*
1521 *   Regex pattern matching
1522 */
1523 
1524 
substitute(const char * const in,const char * out,const int nmatch,const regmatch_t * const pmatch)1525 static vString* substitute (
1526 		const char* const in, const char* out,
1527 		const int nmatch, const regmatch_t* const pmatch)
1528 {
1529 	vString* result = vStringNew ();
1530 	const char* p;
1531 	for (p = out  ;  *p != '\0'  ;  p++)
1532 	{
1533 		if (*p == '\\'  &&  isdigit ((int) *++p))
1534 		{
1535 			const int dig = *p - '0';
1536 			if (0 < dig  &&  dig < nmatch  &&  pmatch [dig].rm_so != -1)
1537 			{
1538 				const int diglen = pmatch [dig].rm_eo - pmatch [dig].rm_so;
1539 				vStringNCatS (result, in + pmatch [dig].rm_so, diglen);
1540 			}
1541 		}
1542 		else if (*p != '\n'  &&  *p != '\r')
1543 			vStringPut (result, *p);
1544 	}
1545 	return result;
1546 }
1547 
getInputLineNumberInRegPType(enum regexParserType regptype,off_t offset)1548 static unsigned long getInputLineNumberInRegPType (enum regexParserType regptype,
1549 												   off_t offset)
1550 {
1551 	return (regptype == REG_PARSER_MULTI_LINE || regptype == REG_PARSER_MULTI_TABLE)
1552 		? getInputLineNumberForFileOffset (offset)
1553 		: getInputLineNumber ();
1554 }
1555 
fillEndLineFieldOfUpperScopes(struct lregexControlBlock * lcb,unsigned long endline)1556 static void fillEndLineFieldOfUpperScopes (struct lregexControlBlock *lcb, unsigned long endline)
1557 {
1558 	tagEntryInfo *entry;
1559 	int n = lcb->currentScope;
1560 
1561 	while ((entry = getEntryInCorkQueue (n))
1562 		   && (entry->extensionFields.endLine == 0))
1563 	{
1564 		entry->extensionFields.endLine = endline;
1565 		n = entry->extensionFields.scopeIndex;
1566 	}
1567 }
1568 
hasNameSlot(const regexPattern * const patbuf)1569 static bool hasNameSlot (const regexPattern* const patbuf)
1570 {
1571 	return (patbuf->u.tag.name_pattern[0] != '\0'
1572 			|| patbuf->anonymous_tag_prefix);
1573 }
1574 
scopeActionRef(int currentScope)1575 static int scopeActionRef (int currentScope)
1576 {
1577 	int scope = currentScope;
1578 	tagEntryInfo *entry;
1579 	while ((entry = getEntryInCorkQueue (scope)) && entry->placeholder)
1580 		/* Look at parent */
1581 		scope = entry->extensionFields.scopeIndex;
1582 	return scope;
1583 }
1584 
matchTagPattern(struct lregexControlBlock * lcb,const char * line,const regexPattern * const patbuf,const regmatch_t * const pmatch,off_t offset,scriptWindow * window)1585 static void matchTagPattern (struct lregexControlBlock *lcb,
1586 		const char* line,
1587 		const regexPattern* const patbuf,
1588 		const regmatch_t* const pmatch,
1589 			     off_t offset, scriptWindow *window)
1590 {
1591 	vString *const name =
1592 		(patbuf->u.tag.name_pattern[0] != '\0') ? substitute (line,
1593 															  patbuf->u.tag.name_pattern,
1594 															  BACK_REFERENCE_COUNT, pmatch):
1595 		(patbuf->anonymous_tag_prefix) ? anonGenerateNew (patbuf->anonymous_tag_prefix,
1596 														  patbuf->u.tag.kindIndex):
1597 		vStringNewInit ("");
1598 	bool placeholder = !!((patbuf->scopeActions & SCOPE_PLACEHOLDER) == SCOPE_PLACEHOLDER);
1599 	int scope = CORK_NIL;
1600 	int n;
1601 
1602 	vStringStripLeading (name);
1603 	vStringStripTrailing (name);
1604 
1605 	if (patbuf->scopeActions & SCOPE_REF)
1606 		scope = scopeActionRef (lcb->currentScope);
1607 	if (patbuf->scopeActions & SCOPE_CLEAR)
1608 	{
1609 		unsigned long endline = getInputLineNumberInRegPType(patbuf->regptype, offset);
1610 
1611 		/*
1612 		 * SCOPE_CLEAR|SCOPE_PUSH implies that "set" was specified as the scope action.
1613 		 * If the specified action is "set", getInputLineNumberInRegPType()
1614 		 * returns the start line of the NEW scope. The cleared scopes are ended BEFORE
1615 		 * the new scope. There is a gap. We must adjust the "end:" field here.
1616 		 */
1617 		if (patbuf->scopeActions & SCOPE_PUSH && endline > 0)
1618 			endline--;
1619 
1620 		fillEndLineFieldOfUpperScopes (lcb, endline);
1621 		lcb->currentScope = CORK_NIL;
1622 	}
1623 	if (patbuf->scopeActions & SCOPE_POP)
1624 	{
1625 		tagEntryInfo *entry = getEntryInCorkQueue (lcb->currentScope);
1626 
1627 		if (entry && (entry->extensionFields.endLine == 0))
1628 		{
1629 			entry->extensionFields.endLine = getInputLineNumberInRegPType(patbuf->regptype, offset);
1630 
1631 			/*
1632 			 * SCOPE_POP|SCOPE_REF_AFTER_POP implies that "replace" was specified as the
1633 			 * scope action. If the specified action is "replace", getInputLineNumberInRegPType()
1634 			 * returns the start line of the NEW scope. The popped scope is ended BEFORE
1635 			 * the new scope. There is a gap. We must adjust the "end:" field here.
1636 			 */
1637 			if ((patbuf->scopeActions & SCOPE_REF_AFTER_POP) &&
1638 				entry->extensionFields.endLine > 1)
1639 				entry->extensionFields.endLine--;
1640 		}
1641 
1642 		lcb->currentScope = entry? entry->extensionFields.scopeIndex: CORK_NIL;
1643 	}
1644 	if (patbuf->scopeActions & SCOPE_REF_AFTER_POP)
1645 		scope = scopeActionRef (lcb->currentScope);
1646 
1647 	if (vStringLength (name) == 0 && (placeholder == false))
1648 	{
1649 		if (patbuf->accept_empty_name == false)
1650 			error (WARNING, "%s:%lu: null expansion of name pattern \"%s\"",
1651 			       getInputFileName (),
1652 				   getInputLineNumberInRegPType(patbuf->regptype, offset),
1653 			       patbuf->u.tag.name_pattern);
1654 		n = CORK_NIL;
1655 	}
1656 	else
1657 	{
1658 		static TrashBox* field_trashbox;
1659 		unsigned long ln = 0;
1660 		MIOPos pos;
1661 		tagEntryInfo e;
1662 		int kind;
1663 		roleBitsType roleBits;
1664 
1665 		if ((patbuf->regptype == REG_PARSER_MULTI_LINE)
1666 			|| (patbuf->regptype == REG_PARSER_MULTI_TABLE))
1667 		{
1668 			ln = getInputLineNumberForFileOffset (offset);
1669 			pos = getInputFilePositionForLine (ln);
1670 		}
1671 
1672 		n = CORK_NIL;
1673 		kind = patbuf->u.tag.kindIndex;
1674 		roleBits = patbuf->u.tag.roleBits;
1675 
1676 		initRegexTag (&e, vStringValue (name), kind, ROLE_DEFINITION_INDEX, scope, placeholder,
1677 					  ln, ln == 0? NULL: &pos, patbuf->xtagType);
1678 
1679 		if (field_trashbox == NULL)
1680 		{
1681 			field_trashbox = trashBoxNew();
1682 			DEFAULT_TRASH_BOX (field_trashbox, trashBoxDelete);
1683 		}
1684 
1685 		if (patbuf->fieldPatterns)
1686 		{
1687 			for (unsigned int i = 0; i < ptrArrayCount(patbuf->fieldPatterns); i++)
1688 			{
1689 				struct fieldPattern *fp = ptrArrayItem(patbuf->fieldPatterns, i);
1690 				if (isFieldEnabled (fp->ftype))
1691 				{
1692 					vString * const value = substitute (line, fp->template,
1693 														BACK_REFERENCE_COUNT, pmatch);
1694 					attachParserField (&e, false, fp->ftype, vStringValue (value));
1695 					trashBoxPut (field_trashbox, value,
1696 								 (TrashBoxDestroyItemProc)vStringDelete);
1697 				}
1698 			}
1699 		}
1700 
1701 		if (roleBits)
1702 		{
1703 			unsigned int roleIndex;
1704 
1705 			for (roleIndex = 0;
1706 				 roleIndex < countLanguageRoles(e.langType, kind);
1707 				 roleIndex++)
1708 			{
1709 				if (roleBits & makeRoleBit(roleIndex))
1710 					assignRole (&e, roleIndex);
1711 			}
1712 		}
1713 
1714 		if (patbuf->anonymous_tag_prefix)
1715 			markTagExtraBit (&e, XTAG_ANONYMOUS);
1716 
1717 		n = makeTagEntry (&e);
1718 
1719 		trashBoxMakeEmpty(field_trashbox);
1720 	}
1721 
1722 	if (patbuf->scopeActions & SCOPE_PUSH)
1723 		lcb->currentScope = n;
1724 
1725 	if (n != CORK_NIL && window)
1726 	{
1727 		scriptSetup (optvm, lcb, n, window);
1728 		EsObject *e = scriptEval (optvm, patbuf->optscript);
1729 		if (es_error_p (e))
1730 			error (WARNING, "error when evaluating: %s", patbuf->optscript_src);
1731 		es_object_unref (e);
1732 		scriptTeardown (optvm, lcb);
1733 	}
1734 
1735 	vStringDelete (name);
1736 }
1737 
matchCallbackPattern(const vString * const line,const regexPattern * const patbuf,const regmatch_t * const pmatch)1738 static bool matchCallbackPattern (
1739 		const vString* const line, const regexPattern* const patbuf,
1740 		const regmatch_t* const pmatch)
1741 {
1742 	regexMatch matches [BACK_REFERENCE_COUNT];
1743 	unsigned int count = 0;
1744 	int i;
1745 	for (i = 0  ;  i < BACK_REFERENCE_COUNT  ;  ++i)
1746 	{
1747 		matches [i].start  = pmatch [i].rm_so;
1748 		matches [i].length = pmatch [i].rm_eo - pmatch [i].rm_so;
1749 		/* a valid match may have both offsets == -1,
1750 		 * e.g. (foo)*(bar) matching "bar" - see CTags bug 271.
1751 		 * As POSIX regex doesn't seem to have a way to count matches,
1752 		 * we return the count up to the last non-empty match. */
1753 		if (pmatch [i].rm_so != -1)
1754 			count = i + 1;
1755 	}
1756 	return patbuf->u.callback.function (vStringValue (line), matches, count,
1757 				     patbuf->u.callback.userData);
1758 }
1759 
1760 
printMessage(const langType language,const regexPattern * const ptrn,const off_t offset,const char * const line,const regmatch_t * const pmatch)1761 static void printMessage(const langType language,
1762 						 const regexPattern *const ptrn,
1763 						 const off_t offset,
1764 						 const char *const line,
1765 						 const regmatch_t* const pmatch)
1766 {
1767 	vString *msg;
1768 
1769 	Assert (ptrn);
1770 	Assert (ptrn->message.selection > 0);
1771 	Assert (ptrn->message.message_string);
1772 
1773 	msg = substitute (line, ptrn->message.message_string, BACK_REFERENCE_COUNT, pmatch);
1774 
1775 	error (ptrn->message.selection, "%sMessage from regex<%s>: %s (%s:%lu)",
1776 		   (ptrn->message.selection == FATAL ? "Fatal: " : ""),
1777 		   getLanguageName (language),
1778 		   vStringValue (msg),
1779 		   getInputFileName (),
1780 		   getInputLineNumberInRegPType (ptrn->regptype, offset));
1781 
1782 	vStringDelete (msg);
1783 }
1784 
isGuestRequestConsistent(struct guestRequest * guest_req)1785 static bool isGuestRequestConsistent (struct guestRequest *guest_req)
1786 {
1787 	return (guest_req->lang != LANG_IGNORE)
1788 		&& (guest_req->boundary[BOUNDARY_START].offset < guest_req->boundary[BOUNDARY_END].offset);
1789 }
1790 
fillGuestRequest(const char * start,const char * current,regmatch_t pmatch[BACK_REFERENCE_COUNT],struct guestSpec * guest_spec,struct guestRequest * guest_req)1791 static bool fillGuestRequest (const char *start,
1792 							  const char *current,
1793 							  regmatch_t pmatch [BACK_REFERENCE_COUNT],
1794 							  struct guestSpec *guest_spec,
1795 							  struct guestRequest *guest_req)
1796 {
1797 	if (guest_spec->lang.type == GUEST_LANG_UNKNOWN)
1798 		return false;
1799 	else if (guest_spec->lang.type == GUEST_LANG_PLACEHOLDER)
1800 		;
1801 	else if (guest_spec->lang.type == GUEST_LANG_STATIC_LANGNAME)
1802 	{
1803 		guest_req->lang = guest_spec->lang.spec.lang;
1804 		guest_req->lang_set = true;
1805 	}
1806 	else if (guest_spec->lang.type == GUEST_LANG_PTN_GROUP_FOR_LANGNAME)
1807 	{
1808 		const char * name = current + pmatch [guest_spec->lang.spec.patternGroup].rm_so;
1809 		int size = pmatch [guest_spec->lang.spec.patternGroup].rm_eo
1810 			- pmatch [guest_spec->lang.spec.patternGroup].rm_so;
1811 		if (size > 0)
1812 		{
1813 			guest_req->lang = getNamedLanguageOrAlias (name, size);
1814 			guest_req->lang_set = true;
1815 		}
1816 	}
1817 	else if (guest_spec->lang.type == GUEST_LANG_PTN_GROUP_FOR_FILEMAP)
1818 	{
1819 		const char * name = current + pmatch [guest_spec->lang.spec.patternGroup].rm_so;
1820 		int size = pmatch [guest_spec->lang.spec.patternGroup].rm_eo
1821 			- pmatch [guest_spec->lang.spec.patternGroup].rm_so;
1822 		char *fname = (size > 0)? eStrndup (name, size): NULL;
1823 
1824 		if (fname)
1825 		{
1826 			guest_req->lang = getLanguageForFilename (fname, LANG_AUTO);
1827 			guest_req->lang_set = true;
1828 			eFree (fname);
1829 		}
1830 	}
1831 
1832 	for (int i = 0; i < 2; i++)
1833 	{
1834 		struct boundarySpec *boundary_spec = guest_spec->boundary + i;
1835 		struct boundaryInRequest *boundary = guest_req->boundary + i;
1836 		if (!boundary_spec->placeholder)
1837 		{
1838 			boundary->offset =  current - start + (boundary_spec->fromStartOfGroup
1839 												   ? pmatch [boundary_spec->patternGroup].rm_so
1840 												   : pmatch [boundary_spec->patternGroup].rm_eo);
1841 			boundary->offset_set = true;
1842 		}
1843 	}
1844 	return guestRequestIsFilled (guest_req);
1845 }
1846 
matchRegexPattern(struct lregexControlBlock * lcb,const vString * const line,regexTableEntry * entry)1847 static bool matchRegexPattern (struct lregexControlBlock *lcb,
1848 							   const vString* const line,
1849 							   regexTableEntry *entry)
1850 {
1851 	bool result = false;
1852 	regmatch_t pmatch [BACK_REFERENCE_COUNT];
1853 	int match;
1854 	regexPattern* patbuf = entry->pattern;
1855 	struct guestSpec  *guest = &patbuf->guest;
1856 
1857 	if (patbuf->disabled && *(patbuf->disabled))
1858 		return false;
1859 
1860 	match = patbuf->pattern.backend->match (patbuf->pattern.backend,
1861 											patbuf->pattern.code, vStringValue (line),
1862 											vStringLength (line),
1863 											pmatch);
1864 
1865 	if (match == 0)
1866 	{
1867 		result = true;
1868 		entry->statistics.match++;
1869 		scriptWindow window = {
1870 			.line = vStringValue (line),
1871 			.start = 0,
1872 			.patbuf = patbuf,
1873 			.pmatch = pmatch,
1874 			.nmatch = BACK_REFERENCE_COUNT,
1875 			.advanceto = false,
1876 		};
1877 
1878 		if (patbuf->optscript && (! hasNameSlot (patbuf)))
1879 		{
1880 			scriptSetup (optvm, lcb, CORK_NIL, &window);
1881 			EsObject *e = scriptEval (optvm, patbuf->optscript);
1882 			if (es_error_p (e))
1883 				error (WARNING, "error when evaluating: %s", patbuf->optscript_src);
1884 			es_object_unref (e);
1885 			scriptTeardown (optvm, lcb);
1886 		}
1887 
1888 		if (hasMessage(patbuf))
1889 			printMessage(lcb->owner, patbuf, 0, vStringValue (line), pmatch);
1890 
1891 		if (patbuf->type == PTRN_TAG)
1892 		{
1893 			matchTagPattern (lcb, vStringValue (line), patbuf, pmatch, 0,
1894 							 (patbuf->optscript && hasNameSlot (patbuf))? &window: NULL);
1895 
1896 			if (guest->lang.type != GUEST_LANG_UNKNOWN)
1897 			{
1898 				unsigned long ln = getInputLineNumber ();
1899 				long current = getInputFileOffsetForLine (ln);
1900 				if (fillGuestRequest (vStringValue (line) - current,
1901 									  vStringValue (line), pmatch, guest, lcb->guest_req))
1902 				{
1903 					Assert (lcb->guest_req->lang != LANG_AUTO);
1904 					if (isGuestRequestConsistent(lcb->guest_req))
1905 						guestRequestSubmit (lcb->guest_req);
1906 					guestRequestClear (lcb->guest_req);
1907 				}
1908 			}
1909 		}
1910 		else if (patbuf->type == PTRN_CALLBACK)
1911 			result = matchCallbackPattern (line, patbuf, pmatch);
1912 		else
1913 		{
1914 			Assert ("invalid pattern type" == NULL);
1915 			result = false;
1916 		}
1917 	}
1918 	else
1919 		entry->statistics.unmatch++;
1920 	return result;
1921 }
1922 
matchMultilineRegexPattern(struct lregexControlBlock * lcb,const vString * const allLines,regexTableEntry * entry)1923 static bool matchMultilineRegexPattern (struct lregexControlBlock *lcb,
1924 										const vString* const allLines,
1925 										regexTableEntry *entry)
1926 {
1927 	const char *start;
1928 	const char *current;
1929 	off_t offset = 0;
1930 	regexPattern* patbuf = entry->pattern;
1931 	struct mGroupSpec *mgroup = &patbuf->mgroup;
1932 	struct guestSpec  *guest = &patbuf->guest;
1933 
1934 	bool result = false;
1935 	regmatch_t pmatch [BACK_REFERENCE_COUNT];
1936 	int match = 0;
1937 	unsigned int delta = 1;
1938 
1939 	Assert (patbuf);
1940 
1941 	if (patbuf->disabled && *(patbuf->disabled))
1942 		return false;
1943 
1944 	current = start = vStringValue (allLines);
1945 	do
1946 	{
1947 		match = patbuf->pattern.backend->match (patbuf->pattern.backend,
1948 												patbuf->pattern.code, current,
1949 												vStringLength (allLines) - (current - start),
1950 												pmatch);
1951 
1952 		if (match != 0)
1953 		{
1954 			entry->statistics.unmatch++;
1955 			break;
1956 		}
1957 
1958 		if (hasMessage(patbuf))
1959 			printMessage(lcb->owner, patbuf, (current + pmatch[0].rm_so) - start, current, pmatch);
1960 
1961 		offset = (current + pmatch [mgroup->forLineNumberDetermination].rm_so)
1962 				 - start;
1963 
1964 		entry->statistics.match++;
1965 		scriptWindow window = {
1966 			.line = current,
1967 			.start = start,
1968 			.patbuf = patbuf,
1969 			.pmatch = pmatch,
1970 			.nmatch = BACK_REFERENCE_COUNT,
1971 			.advanceto = false,
1972 		};
1973 
1974 		if (patbuf->optscript && (! hasNameSlot (patbuf)))
1975 		{
1976 			scriptSetup (optvm, lcb, CORK_NIL, &window);
1977 			EsObject *e = scriptEval (optvm, patbuf->optscript);
1978 			if (es_error_p (e))
1979 				error (WARNING, "error when evaluating: %s", patbuf->optscript_src);
1980 			es_object_unref (e);
1981 			scriptTeardown (optvm, lcb);
1982 		}
1983 
1984 		if (patbuf->type == PTRN_TAG)
1985 		{
1986 			matchTagPattern (lcb, current, patbuf, pmatch, offset,
1987 							 (patbuf->optscript && hasNameSlot (patbuf))? &window: NULL);
1988 			result = true;
1989 		}
1990 		else if (patbuf->type == PTRN_CALLBACK)
1991 			;	/* Not implemented yet */
1992 		else
1993 		{
1994 			Assert ("invalid pattern type" == NULL);
1995 			result = false;
1996 			break;
1997 		}
1998 
1999 		if (fillGuestRequest (start, current, pmatch, guest, lcb->guest_req))
2000 		{
2001 			Assert (lcb->guest_req->lang != LANG_AUTO);
2002 			if (isGuestRequestConsistent(lcb->guest_req))
2003 				guestRequestSubmit (lcb->guest_req);
2004 			guestRequestClear (lcb->guest_req);
2005 		}
2006 
2007 		delta = (mgroup->nextFromStart
2008 				 ? pmatch [mgroup->forNextScanning].rm_so
2009 				 : pmatch [mgroup->forNextScanning].rm_eo);
2010 		if (delta == 0)
2011 		{
2012 			unsigned int pos = current - start;
2013 			error (WARNING,
2014 				   "a multi line regex pattern doesn't advance the input cursor: %s",
2015 				   patbuf->pattern_string);
2016 			error (WARNING, "Language: %s, input file: %s, pos: %u",
2017 				   getLanguageName (lcb->owner), getInputFileName(), pos);
2018 			break;
2019 		}
2020 		current += delta;
2021 
2022 	} while (current < start + vStringLength (allLines));
2023 
2024 	return result;
2025 }
2026 
2027 /* PUBLIC INTERFACE */
2028 
2029 /* Match against all patterns for specified language. Returns true if at least
2030  * on pattern matched.
2031  */
matchRegex(struct lregexControlBlock * lcb,const vString * const line)2032 extern bool matchRegex (struct lregexControlBlock *lcb, const vString* const line)
2033 {
2034 	bool result = false;
2035 	unsigned int i;
2036 	for (i = 0  ;  i < ptrArrayCount(lcb->entries[REG_PARSER_SINGLE_LINE])  ;  ++i)
2037 	{
2038 		regexTableEntry *entry = ptrArrayItem(lcb->entries[REG_PARSER_SINGLE_LINE], i);
2039 		regexPattern *ptrn = entry->pattern;
2040 
2041 		Assert (ptrn);
2042 
2043 		if ((ptrn->xtagType != XTAG_UNKNOWN)
2044 			&& (!isXtagEnabled (ptrn->xtagType)))
2045 				continue;
2046 
2047 		if (matchRegexPattern (lcb, line, entry))
2048 		{
2049 			result = true;
2050 			if (ptrn->exclusive)
2051 				break;
2052 		}
2053 	}
2054 	return result;
2055 }
2056 
notifyRegexInputStart(struct lregexControlBlock * lcb)2057 extern void notifyRegexInputStart (struct lregexControlBlock *lcb)
2058 {
2059 	lcb->currentScope = CORK_NIL;
2060 
2061 	ptrArrayClear (lcb->tstack);
2062 	guestRequestClear (lcb->guest_req);
2063 
2064 	opt_vm_dstack_push (optvm, lregex_dict);
2065 
2066 	if (es_null (lcb->local_dict))
2067 		lcb->local_dict = opt_dict_new (23);
2068 	opt_vm_dstack_push (optvm, lcb->local_dict);
2069 	opt_vm_set_app_data (optvm, lcb);
2070 	scriptEvalHook (optvm, lcb, SCRIPT_HOOK_PRELUDE);
2071 }
2072 
notifyRegexInputEnd(struct lregexControlBlock * lcb)2073 extern void notifyRegexInputEnd (struct lregexControlBlock *lcb)
2074 {
2075 	scriptEvalHook (optvm, lcb, SCRIPT_HOOK_SEQUEL);
2076 	opt_vm_set_app_data (optvm, NULL);
2077 	opt_vm_clear (optvm);
2078 	opt_dict_clear (lcb->local_dict);
2079 	unsigned long endline = getInputLineNumber ();
2080 	fillEndLineFieldOfUpperScopes (lcb, endline);
2081 }
2082 
findRegexTagsMainloop(int (* driver)(void))2083 extern void findRegexTagsMainloop (int (* driver)(void))
2084 {
2085 	/* merely read all lines of the file */
2086 	while (driver () != EOF)
2087 		;
2088 }
2089 
fileReadLineDriver(void)2090 static int fileReadLineDriver(void)
2091 {
2092 	return (readLineFromInputFile () == NULL)? EOF: 1;
2093 }
2094 
findRegexTags(void)2095 extern void findRegexTags (void)
2096 {
2097 	findRegexTagsMainloop (fileReadLineDriver);
2098 }
2099 
doesExpectCorkInRegex0(ptrArray * entries)2100 static bool doesExpectCorkInRegex0(ptrArray *entries)
2101 {
2102 	for (unsigned int i = 0; i < ptrArrayCount(entries); i++)
2103 	{
2104 		regexTableEntry *entry = ptrArrayItem(entries, i);
2105 		Assert (entry && entry->pattern);
2106 		if (entry->pattern->scopeActions
2107 			|| entry->pattern->optscript
2108 			)
2109 			return true;
2110 	}
2111 	return false;
2112 }
2113 
doesExpectCorkInRegex(struct lregexControlBlock * lcb)2114 extern bool doesExpectCorkInRegex (struct lregexControlBlock *lcb)
2115 {
2116 	ptrArray *entries;
2117 
2118 	entries = lcb->entries[REG_PARSER_SINGLE_LINE];
2119 	if (doesExpectCorkInRegex0 (entries))
2120 		return true;
2121 
2122 	entries = lcb->entries[REG_PARSER_MULTI_LINE];
2123 	if (doesExpectCorkInRegex0 (entries))
2124 		return true;
2125 
2126 	for (unsigned int i = 0; i < ptrArrayCount(lcb->tables); i++)
2127 	{
2128 		struct regexTable *table = ptrArrayItem(lcb->tables, i);
2129 		if (doesExpectCorkInRegex0 (table->entries))
2130 			return true;
2131 	}
2132 
2133 	return false;
2134 }
2135 
escapeRegexPattern(const char * pattern)2136 static char *escapeRegexPattern (const char* pattern)
2137 {
2138 	vString *p = vStringNew ();
2139 
2140 	while (*pattern != '\0')
2141 	{
2142 		char c = *pattern;
2143 		if (c == '\n')
2144 			vStringCatS(p, "\\n");
2145 		else if (c == '\t')
2146 			vStringCatS(p, "\\t");
2147 		else if (c == '\\')
2148 			vStringCatS(p, "\\\\");
2149 		else
2150 			vStringPut(p, c);
2151 
2152 		pattern++;
2153 	}
2154 
2155 	return vStringDeleteUnwrap (p);
2156 }
2157 
addTagRegexInternal(struct lregexControlBlock * lcb,int table_index,enum regexParserType regptype,const char * const regex,const char * const name,const char * const kinds,const char * const flags,bool * disabled)2158 static regexPattern *addTagRegexInternal (struct lregexControlBlock *lcb,
2159 										  int table_index,
2160 					  enum regexParserType regptype,
2161 					  const char* const regex,
2162 					  const char* const name,
2163 					  const char* const kinds,
2164 					  const char* const flags,
2165 					  bool *disabled)
2166 {
2167 	Assert (regex != NULL);
2168 	Assert (name != NULL);
2169 
2170 	if (!regexAvailable)
2171 		return NULL;
2172 
2173 	regexCompiledCode cp = compileRegex (regptype, regex, flags);
2174 	if (cp.code == NULL)
2175 	{
2176 		error (WARNING, "pattern: %s", regex);
2177 		if (table_index != TABLE_INDEX_UNUSED)
2178 		{
2179 			struct regexTable *table = ptrArrayItem (lcb->tables, table_index);
2180 			error (WARNING, "table: %s[%u]", table->name, ptrArrayCount (table->entries));
2181 			error (WARNING, "language: %s", getLanguageName (lcb->owner));
2182 		}
2183 		else
2184 			error (WARNING, "language: %s[%u]", getLanguageName (lcb->owner),
2185 				   ptrArrayCount (lcb->entries[regptype]));
2186 		return NULL;
2187 	}
2188 
2189 	char kindLetter;
2190 	char* kindName;
2191 	char* description;
2192 	kindDefinition* fileKind;
2193 
2194 	bool explictly_defined =  parseKinds (kinds, &kindLetter, &kindName, &description);
2195 	fileKind = getLanguageKind (lcb->owner, KIND_FILE_INDEX);
2196 	if (kindLetter == fileKind->letter)
2197 		error (FATAL,
2198 			   "Kind letter \'%c\' used in regex definition \"%s\" of %s language is reserved in ctags main",
2199 			   kindLetter,
2200 			   regex,
2201 			   getLanguageName (lcb->owner));
2202 	else if (!isalpha ((unsigned char)kindLetter))
2203 		error (FATAL,
2204 			   "Kind letter must be an alphabetical character: \"%c\"",
2205 			   kindLetter);
2206 
2207 	if (strcmp (kindName, fileKind->name) == 0)
2208 		error (FATAL,
2209 			   "Kind name \"%s\" used in regex definition \"%s\" of %s language is reserved in ctags main",
2210 			   kindName,
2211 			   regex,
2212 			   getLanguageName (lcb->owner));
2213 
2214 	const char *option_bsae = (regptype == REG_PARSER_SINGLE_LINE? "regex"        :
2215 							   regptype == REG_PARSER_MULTI_LINE ? "mline-regex"  :
2216 							   regptype == REG_PARSER_MULTI_TABLE? "_mtable-regex":
2217 							   NULL);
2218 	Assert (option_bsae);
2219 
2220 	for (const char * p = kindName; *p; p++)
2221 	{
2222 		if (p == kindName)
2223 		{
2224 			if (!isalpha(*p))
2225 				error (FATAL,
2226 					   "A kind name doesn't start with an alphabetical character: "
2227 					   "'%s' in \"--%s-%s\" option",
2228 					   kindName,
2229 					   option_bsae,
2230 					   getLanguageName (lcb->owner));
2231 		}
2232 		else
2233 		{
2234 			/*
2235 			 * People may object to this error.
2236 			 * Searching github repositories, I found not a few .ctags files
2237 			 * in which Exuberant-ctags users define kind names with whitespaces.
2238 			 * "FATAL" error breaks the compatibility.
2239 			 */
2240 			if (!isalnum(*p))
2241 				error (/* regptype == REG_PARSER_SINGLE_LINE? WARNING: */ FATAL,
2242 					   "Non-alphanumeric char is used in kind name: "
2243 					   "'%s' in \"--%s-%s\" option",
2244 					   kindName,
2245 					   option_bsae,
2246 					   getLanguageName (lcb->owner));
2247 
2248 		}
2249 	}
2250 
2251 	regexPattern *rptr = addCompiledTagPattern (lcb, table_index,
2252 												regptype, &cp, name,
2253 												kindLetter, kindName, description, flags,
2254 												explictly_defined,
2255 												disabled);
2256 	rptr->pattern_string = escapeRegexPattern(regex);
2257 
2258 	eFree (kindName);
2259 	if (description)
2260 		eFree (description);
2261 
2262 	if (*name == '\0')
2263 	{
2264 		if (rptr->exclusive || rptr->scopeActions & SCOPE_PLACEHOLDER
2265 			|| rptr->anonymous_tag_prefix
2266 			|| regptype == REG_PARSER_MULTI_TABLE
2267 			|| rptr->guest.lang.type != GUEST_LANG_UNKNOWN
2268 			|| rptr->optscript
2269 			)
2270 			rptr->accept_empty_name = true;
2271 		else
2272 			error (WARNING, "%s: regexp missing name pattern", regex);
2273 	}
2274 
2275 	return rptr;
2276 }
2277 
addTagRegex(struct lregexControlBlock * lcb,const char * const regex,const char * const name,const char * const kinds,const char * const flags,bool * disabled)2278 extern void addTagRegex (struct lregexControlBlock *lcb,
2279 			 const char* const regex,
2280 			 const char* const name,
2281 			 const char* const kinds,
2282 			 const char* const flags,
2283 			 bool *disabled)
2284 {
2285 	addTagRegexInternal (lcb, TABLE_INDEX_UNUSED,
2286 						 REG_PARSER_SINGLE_LINE, regex, name, kinds, flags, disabled);
2287 }
2288 
addTagMultiLineRegex(struct lregexControlBlock * lcb,const char * const regex,const char * const name,const char * const kinds,const char * const flags,bool * disabled)2289 extern void addTagMultiLineRegex (struct lregexControlBlock *lcb, const char* const regex,
2290 								  const char* const name, const char* const kinds, const char* const flags,
2291 								  bool *disabled)
2292 {
2293 	addTagRegexInternal (lcb, TABLE_INDEX_UNUSED,
2294 						 REG_PARSER_MULTI_LINE, regex, name, kinds, flags, disabled);
2295 }
2296 
addTagMultiTableRegex(struct lregexControlBlock * lcb,const char * const table_name,const char * const regex,const char * const name,const char * const kinds,const char * const flags,bool * disabled)2297 extern void addTagMultiTableRegex(struct lregexControlBlock *lcb,
2298 								  const char* const table_name,
2299 								  const char* const regex,
2300 								  const char* const name, const char* const kinds, const char* const flags,
2301 								  bool *disabled)
2302 {
2303 	int table_index = getTableIndexForName (lcb, table_name);
2304 
2305 	if (table_index < 0)
2306 		error (FATAL, "unknown table name: %s", table_name);
2307 
2308 	addTagRegexInternal (lcb, table_index, REG_PARSER_MULTI_TABLE, regex, name, kinds, flags,
2309 						 disabled);
2310 }
2311 
addCallbackRegex(struct lregexControlBlock * lcb,const char * const regex,const char * const flags,const regexCallback callback,bool * disabled,void * userData)2312 extern void addCallbackRegex (struct lregexControlBlock *lcb,
2313 			      const char* const regex,
2314 			      const char* const flags,
2315 			      const regexCallback callback,
2316 			      bool *disabled,
2317 			      void * userData)
2318 {
2319 	Assert (regex != NULL);
2320 
2321 	if (!regexAvailable)
2322 		return;
2323 
2324 
2325 	regexCompiledCode cp = compileRegex (REG_PARSER_SINGLE_LINE, regex, flags);
2326 	if (cp.code == NULL)
2327 	{
2328 		error (WARNING, "pattern: %s", regex);
2329 		error (WARNING, "language: %s", getLanguageName (lcb->owner));
2330 		return;
2331 	}
2332 
2333 	regexPattern *rptr = addCompiledCallbackPattern (lcb, &cp, callback, flags,
2334 													 disabled, userData);
2335 	rptr->pattern_string = escapeRegexPattern(regex);
2336 }
2337 
addTagRegexOption(struct lregexControlBlock * lcb,enum regexParserType regptype,const char * const pattern)2338 static void addTagRegexOption (struct lregexControlBlock *lcb,
2339 							   enum regexParserType regptype,
2340 							   const char* const pattern)
2341 {
2342 	if (!regexAvailable)
2343 		return;
2344 
2345 	int table_index = TABLE_INDEX_UNUSED;
2346 	char * regex_pat = NULL;
2347 	char *name, *kinds, *flags;
2348 
2349 
2350 	if (regptype == REG_PARSER_MULTI_TABLE)
2351 	{
2352 		const char *c;
2353 		for (c = pattern; *c; c++)
2354 		{
2355 			if (! (isalnum(*c) || *c == '_'))
2356 			{
2357 				if (*c &&  (*(c + 1) != '^'))
2358 				{
2359 					vString *tmp = vStringNew ();
2360 
2361 					/* Put '^' as prefix for the pattern */
2362 					vStringPut(tmp, *c);
2363 					vStringPut(tmp, '^');
2364 					vStringCatS(tmp, c + 1);
2365 					regex_pat = vStringDeleteUnwrap(tmp);
2366 				}
2367 				else
2368 					regex_pat = eStrdup (c);
2369 				break;
2370 			}
2371 		}
2372 
2373 		if (regex_pat == NULL || *regex_pat == '\0')
2374 			error (FATAL, "wrong mtable pattern specification: %s", pattern);
2375 
2376 		char *table_name = eStrndup(pattern, c - pattern);
2377 		table_index = getTableIndexForName (lcb, table_name);
2378 		if (table_index < 0)
2379 			error (FATAL, "unknown table name: %s (in %s)", table_name, pattern);
2380 		eFree(table_name);
2381 	}
2382 	else
2383 		regex_pat = eStrdup (pattern);
2384 
2385 	if (parseTagRegex (regptype, regex_pat, &name, &kinds, &flags))
2386 		addTagRegexInternal (lcb, table_index, regptype, regex_pat, name, kinds, flags,
2387 							 NULL);
2388 
2389 	eFree (regex_pat);
2390 }
2391 
processTagRegexOption(struct lregexControlBlock * lcb,enum regexParserType regptype,const char * const parameter)2392 extern void processTagRegexOption (struct lregexControlBlock *lcb,
2393 								   enum regexParserType regptype,
2394 								   const char* const parameter)
2395 {
2396 	if (parameter == NULL  ||  parameter [0] == '\0')
2397 		clearPatternSet (lcb);
2398 	else if (parameter [0] != '@')
2399 		addTagRegexOption (lcb, regptype, parameter);
2400 	else if (! doesFileExist (parameter + 1))
2401 		error (WARNING, "cannot open regex file");
2402 	else
2403 	{
2404 		const char* regexfile = parameter + 1;
2405 
2406 		verbose ("open a regex file: %s\n", regexfile);
2407 		MIO* const mio = mio_new_file (regexfile, "r");
2408 		if (mio == NULL)
2409 			error (WARNING | PERROR, "%s", regexfile);
2410 		else
2411 		{
2412 			vString* const regex = vStringNew ();
2413 			while (readLineRaw (regex, mio))
2414 			{
2415 				if (vStringLength (regex) > 1 && vStringValue (regex)[0] != '\n')
2416 					addTagRegexOption (lcb, regptype, vStringValue (regex));
2417 			}
2418 			mio_unref (mio);
2419 			vStringDelete (regex);
2420 		}
2421 	}
2422 }
2423 
2424 /*
2425 *   Regex option parsing
2426 */
2427 
printRegexFlags(bool withListHeader,bool machinable,const char * flags,FILE * fp)2428 extern void printRegexFlags (bool withListHeader, bool machinable, const char *flags, FILE *fp)
2429 {
2430 	struct colprintTable * table = flagsColprintTableNew ();
2431 
2432 	if (flags && *flags != '\0')
2433 	{
2434 		/* Print backend specific flags.
2435 		 * This code is just stub because there is no backend having a specific flag.
2436 		 * The help message for this option is not updated. */
2437 		struct flagDefsDescriptor desc = choose_backend (flags, REG_PARSER_SINGLE_LINE, true);
2438 		flagsColprintAddDefinitions (table, desc.backend->fdefs, desc.backend->fdef_count);
2439 	}
2440 	else
2441 	{
2442 		flagsColprintAddDefinitions (table, backendFlagDefs, ARRAY_SIZE(backendFlagDefs));
2443 		flagsColprintAddDefinitions (table, backendCommonRegexFlagDefs, ARRAY_SIZE(backendCommonRegexFlagDefs));
2444 		flagsColprintAddDefinitions (table, prePtrnFlagDef, ARRAY_SIZE (prePtrnFlagDef));
2445 		flagsColprintAddDefinitions (table, guestPtrnFlagDef, ARRAY_SIZE (guestPtrnFlagDef));
2446 		flagsColprintAddDefinitions (table, scopePtrnFlagDef, ARRAY_SIZE (scopePtrnFlagDef));
2447 		flagsColprintAddDefinitions (table, commonSpecFlagDef, ARRAY_SIZE (commonSpecFlagDef));
2448 	}
2449 
2450 	flagsColprintTablePrint (table, withListHeader, machinable, fp);
2451 	colprintTableDelete(table);
2452 }
2453 
printMultilineRegexFlags(bool withListHeader,bool machinable,const char * flags,FILE * fp)2454 extern void printMultilineRegexFlags (bool withListHeader, bool machinable, const char *flags, FILE *fp)
2455 {
2456 	struct colprintTable * table = flagsColprintTableNew ();
2457 
2458 	if (flags && *flags != '\0')
2459 	{
2460 		/* Print backend specific flags.
2461 		 * This code is just stub because there is no backend having a specific flag.
2462 		 * The help message for this option is not updated. */
2463 		struct flagDefsDescriptor desc = choose_backend (flags, REG_PARSER_MULTI_LINE, true);
2464 		flagsColprintAddDefinitions (table, desc.backend->fdefs, desc.backend->fdef_count);
2465 	}
2466 	else
2467 	{
2468 		flagsColprintAddDefinitions (table, backendFlagDefs, ARRAY_SIZE(backendFlagDefs));
2469 		flagsColprintAddDefinitions (table, backendCommonRegexFlagDefs, ARRAY_SIZE(backendCommonRegexFlagDefs));
2470 		flagsColprintAddDefinitions (table, multilinePtrnFlagDef, ARRAY_SIZE (multilinePtrnFlagDef));
2471 		flagsColprintAddDefinitions (table, guestPtrnFlagDef, ARRAY_SIZE (guestPtrnFlagDef));
2472 		flagsColprintAddDefinitions (table, commonSpecFlagDef, ARRAY_SIZE (commonSpecFlagDef));
2473 	}
2474 
2475 	flagsColprintTablePrint (table, withListHeader, machinable, fp);
2476 	colprintTableDelete(table);
2477 }
2478 
printMultitableRegexFlags(bool withListHeader,bool machinable,const char * flags,FILE * fp)2479 extern void printMultitableRegexFlags (bool withListHeader, bool machinable, const char *flags, FILE *fp)
2480 {
2481 	struct colprintTable * table = flagsColprintTableNew ();
2482 
2483 	if (flags && *flags != '\0')
2484 	{
2485 		/* Print backend specific flags.
2486 		 * This code is just stub because there is no backend having a specific flag.
2487 		 * The help message for this option is not updated. */
2488 		struct flagDefsDescriptor desc = choose_backend (flags, REG_PARSER_MULTI_TABLE, true);
2489 		flagsColprintAddDefinitions (table, desc.backend->fdefs, desc.backend->fdef_count);
2490 	}
2491 	else
2492 	{
2493 		flagsColprintAddDefinitions (table, backendFlagDefs, ARRAY_SIZE(backendFlagDefs));
2494 		flagsColprintAddDefinitions (table, backendCommonRegexFlagDefs, ARRAY_SIZE(backendCommonRegexFlagDefs));
2495 		flagsColprintAddDefinitions (table, multilinePtrnFlagDef, ARRAY_SIZE (multilinePtrnFlagDef));
2496 		flagsColprintAddDefinitions (table, multitablePtrnFlagDef, ARRAY_SIZE (multitablePtrnFlagDef));
2497 		flagsColprintAddDefinitions (table, guestPtrnFlagDef, ARRAY_SIZE (guestPtrnFlagDef));
2498 		flagsColprintAddDefinitions (table, scopePtrnFlagDef, ARRAY_SIZE (scopePtrnFlagDef));
2499 		flagsColprintAddDefinitions (table, commonSpecFlagDef, ARRAY_SIZE (commonSpecFlagDef));
2500 	}
2501 
2502 	flagsColprintTablePrint (table, withListHeader, machinable, fp);
2503 	colprintTableDelete(table);
2504 }
2505 
freeRegexResources(void)2506 extern void freeRegexResources (void)
2507 {
2508 	es_object_unref (lregex_dict);
2509 	opt_vm_delete (optvm);
2510 }
2511 
regexNeedsMultilineBuffer(struct lregexControlBlock * lcb)2512 extern bool regexNeedsMultilineBuffer (struct lregexControlBlock *lcb)
2513 {
2514 	if  (ptrArrayCount(lcb->entries [REG_PARSER_MULTI_LINE]) > 0)
2515 		return true;
2516 	else if (ptrArrayCount(lcb->tables) > 0)
2517 		return true;
2518 	else
2519 		return false;
2520 }
2521 
matchMultilineRegex(struct lregexControlBlock * lcb,const vString * const allLines)2522 extern bool matchMultilineRegex (struct lregexControlBlock *lcb, const vString* const allLines)
2523 {
2524 	bool result = false;
2525 
2526 	unsigned int i;
2527 
2528 	for (i = 0; i < ptrArrayCount(lcb->entries [REG_PARSER_MULTI_LINE]); ++i)
2529 	{
2530 		regexTableEntry *entry = ptrArrayItem(lcb->entries [REG_PARSER_MULTI_LINE], i);
2531 		Assert (entry && entry->pattern);
2532 
2533 		if ((entry->pattern->xtagType != XTAG_UNKNOWN)
2534 			&& (!isXtagEnabled (entry->pattern->xtagType)))
2535 			continue;
2536 
2537 		result = matchMultilineRegexPattern (lcb, allLines, entry) || result;
2538 	}
2539 	return result;
2540 }
2541 
getTableIndexForName(const struct lregexControlBlock * const lcb,const char * name)2542 static int getTableIndexForName (const struct lregexControlBlock *const lcb, const char *name)
2543 {
2544 	unsigned int i;
2545 
2546 	for (i = 0; i < ptrArrayCount(lcb->tables); i++)
2547 	{
2548 		struct regexTable *table = ptrArrayItem(lcb->tables, i);
2549 		if (strcmp (table->name, name) == 0)
2550 			return (int)i;
2551 	}
2552 
2553 	return TABLE_INDEX_UNUSED;
2554 }
2555 
addRegexTable(struct lregexControlBlock * lcb,const char * name)2556 extern void addRegexTable (struct lregexControlBlock *lcb, const char *name)
2557 {
2558 	const char *c;
2559 	for (c = name; *c; c++)
2560 		if (! (isalnum(*c) || *c == '_'))
2561 			error (FATAL, "`%c' in \"%s\" is not acceptable as part of table name", *c, name);
2562 
2563 	if (getTableIndexForName(lcb, name) >= 0)
2564 	{
2565 		error (WARNING, "regex table \"%s\" is already defined", name);
2566 		return;
2567 	}
2568 
2569 	struct regexTable *table = xCalloc(1, struct regexTable);
2570 	table->name = eStrdup (name);
2571 	table->entries = ptrArrayNew(deleteTableEntry);
2572 
2573 	ptrArrayAdd (lcb->tables, table);
2574 }
2575 
dumpSstack(FILE * fp,int scope)2576 static void dumpSstack(FILE* fp, int scope)
2577 {
2578 	tagEntryInfo *entry;
2579 	fprintf (fp, "scope : ");
2580 	while ((entry = getEntryInCorkQueue (scope)))
2581 	{
2582 		fprintf(fp, "%s", entry->name);
2583 
2584 		scope = entry->extensionFields.scopeIndex;
2585 		if (scope != CORK_NIL)
2586 			fprintf(fp, "%c", '/');
2587 	}
2588 	fprintf (fp, "\n");
2589 }
2590 
dumpTstack(FILE * fp,ptrArray * tstack)2591 static void dumpTstack(FILE* fp, ptrArray *tstack)
2592 {
2593 	for (unsigned int i = ptrArrayCount(tstack); i > 0; i--)
2594 	{
2595 		char tmp[2];
2596 		struct regexTable *t = ptrArrayItem(tstack, i - 1);
2597 		if (i == 1)
2598 			tmp[0] = '\0';
2599 		else
2600 		{
2601 			tmp[0] = '/';
2602 			tmp[1] = '\0';
2603 		}
2604 		fprintf(fp, "%s%s", t->name, tmp);
2605 	}
2606 	fprintf(fp, "\n");
2607 }
2608 
printInputLine(FILE * vfp,const char * c,const off_t offset)2609 static void printInputLine(FILE* vfp, const char *c, const off_t offset)
2610 {
2611 	vString *v = vStringNew ();
2612 
2613 	for (; *c && (*c != '\n'); c++)
2614 		vStringPut(v, *c);
2615 
2616 	if (vStringLength (v) == 0 && *c == '\n')
2617 		vStringCatS (v, "\\n");
2618 
2619 	fprintf (vfp, "\ninput : \"%s\" L%lu\n",
2620 			 vStringValue (v),
2621 			 getInputLineNumberForFileOffset(offset));
2622 	vStringDelete(v);
2623 }
2624 
printMultitableMessage(const langType language,const char * const tableName,const unsigned int index,const regexPattern * const ptrn,const off_t offset,const char * const current,const regmatch_t * const pmatch)2625 static void printMultitableMessage(const langType language,
2626 								   const char *const tableName,
2627 								   const unsigned int index,
2628 								   const regexPattern *const ptrn,
2629 								   const off_t offset,
2630 								   const char *const current,
2631 								   const regmatch_t* const pmatch)
2632 {
2633 	vString *msg;
2634 
2635 	Assert (ptrn);
2636 	Assert (ptrn->message.selection > 0);
2637 	Assert (ptrn->message.message_string);
2638 
2639 	msg = substitute (current, ptrn->message.message_string, BACK_REFERENCE_COUNT, pmatch);
2640 
2641 	error (ptrn->message.selection, "%sMessage from mtable<%s/%s[%2u]>: %s (%s:%lu)",
2642 		   (ptrn->message.selection == FATAL ? "Fatal: " : ""),
2643 		   getLanguageName (language),
2644 		   tableName,
2645 		   index,
2646 		   vStringValue (msg),
2647 		   getInputFileName (),
2648 		   getInputLineNumberForFileOffset (offset));
2649 
2650 	vStringDelete (msg);
2651 }
2652 
matchMultitableRegexTable(struct lregexControlBlock * lcb,struct regexTable * table,const vString * const start,unsigned int * offset)2653 static struct regexTable * matchMultitableRegexTable (struct lregexControlBlock *lcb,
2654 													  struct regexTable *table, const vString *const start, unsigned int *offset)
2655 {
2656 	struct regexTable *next = NULL;
2657 	const char *current;
2658 	regmatch_t pmatch [BACK_REFERENCE_COUNT];
2659 	const char *cstart = vStringValue(start);
2660 	unsigned int delta;
2661 
2662 
2663  restart:
2664 	current = cstart + *offset;
2665 
2666 	/* Accept the case *offset == vStringLength(start)
2667 	   because we want an empty regex // still matches empty input. */
2668 	if (*offset > vStringLength(start))
2669 	{
2670 		*offset = vStringLength(start);
2671 		goto out;
2672 	}
2673 
2674 	BEGIN_VERBOSE(vfp);
2675 	{
2676 		printInputLine(vfp, current, *offset);
2677 	}
2678 	END_VERBOSE();
2679 
2680 	for (unsigned int i = 0; i < ptrArrayCount(table->entries); i++)
2681 	{
2682 		regexTableEntry *entry = ptrArrayItem(table->entries, i);
2683 		if ((entry->pattern->xtagType != XTAG_UNKNOWN)
2684 			&& (!isXtagEnabled (entry->pattern->xtagType)))
2685 			continue;
2686 
2687 		regexPattern *ptrn = entry->pattern;
2688 		struct guestSpec  *guest = &ptrn->guest;
2689 
2690 		Assert (ptrn);
2691 
2692 		BEGIN_VERBOSE(vfp);
2693 		{
2694 			char s[3];
2695 			if (*current == '\n')
2696 			{
2697 				s [0] = '\\';
2698 				s [1] = 'n';
2699 				s [2] = '\0';
2700 			}
2701 			else if (*current == '\t')
2702 			{
2703 				s [0] = '\\';
2704 				s [1] = 't';
2705 				s [2] = '\0';
2706 			}
2707 			else if (*current == '\\')
2708 			{
2709 				s [0] = '\\';
2710 				s [1] = '\\';
2711 				s [2] = '\0';
2712 			}
2713 			else
2714 			{
2715 				s[0] = *current;
2716 				s[1] = '\0';
2717 			}
2718 
2719 			if (s[1] == '\0')
2720 				fprintf (vfp, "match : '%s' %15s[%2u] /", s, table->name, i);
2721 			else if (s[0] == '\0')
2722 				fprintf (vfp, "match :  '' %15s[%2u] /", table->name, i);
2723 			else
2724 				fprintf (vfp, "match :'%s' %15s[%2u] / ", s, table->name, i);
2725 			fprintf (vfp, "%s/\n", ptrn->pattern_string);
2726 		}
2727 		END_VERBOSE();
2728 
2729 		int match = 0;
2730 
2731 		if (ptrn->disabled && *(ptrn->disabled))
2732 			continue;
2733 
2734 		match = ptrn->pattern.backend->match (ptrn->pattern.backend,
2735 											  ptrn->pattern.code, current,
2736 											  vStringLength(start) - (current - cstart),
2737 											  pmatch);
2738 		if (match == 0)
2739 		{
2740 			entry->statistics.match++;
2741 			off_t offset_for_tag = (current
2742 									+ pmatch [ptrn->mgroup.forLineNumberDetermination].rm_so)
2743 				- cstart;
2744 			scriptWindow window = {
2745 				.line = current,
2746 				.start = cstart,
2747 				.patbuf = ptrn,
2748 				.pmatch = pmatch,
2749 				.nmatch = BACK_REFERENCE_COUNT,
2750 				.advanceto = false,
2751 			};
2752 			initTaction (&window.taction);
2753 
2754 			if (ptrn->optscript && (! hasNameSlot (ptrn)))
2755 			{
2756 				scriptSetup (optvm, lcb, CORK_NIL, &window);
2757 				EsObject *e = scriptEval (optvm, ptrn->optscript);
2758 				if (es_error_p (e))
2759 					error (WARNING, "error when evaluating: %s", ptrn->optscript_src);
2760 				es_object_unref (e);
2761 				scriptTeardown (optvm, lcb);
2762 			}
2763 
2764 			if (ptrn->type == PTRN_TAG)
2765 			{
2766 				matchTagPattern (lcb, current, ptrn, pmatch, offset_for_tag,
2767 								 (ptrn->optscript && hasNameSlot (ptrn))? &window: NULL);
2768 
2769 				struct mTableActionSpec *taction = (window.taction.action == TACTION_NOP)
2770 					? &(ptrn->taction)
2771 					: &window.taction;
2772 
2773 				BEGIN_VERBOSE(vfp);
2774 				{
2775 					fprintf(vfp, "result: matched %d bytes\n", (int)(pmatch[0].rm_eo));
2776 					dumpSstack (vfp, lcb->currentScope);
2777 				}
2778 				END_VERBOSE();
2779 
2780 				if (hasMessage(ptrn))
2781 					printMultitableMessage (lcb->owner, table->name, i, ptrn,
2782 											*offset, current, pmatch);
2783 
2784 				if (fillGuestRequest (cstart, current, pmatch, guest, lcb->guest_req))
2785 				{
2786 					Assert (lcb->guest_req->lang != LANG_AUTO);
2787 					if (isGuestRequestConsistent(lcb->guest_req))
2788 						guestRequestSubmit (lcb->guest_req);
2789 					guestRequestClear (lcb->guest_req);
2790 				}
2791 
2792 				if (window.advanceto)
2793 					delta = window.advanceto_delta;
2794 				else
2795 					delta = (ptrn->mgroup.nextFromStart
2796 							 ? pmatch [ptrn->mgroup.forNextScanning].rm_so
2797 							 : pmatch [ptrn->mgroup.forNextScanning].rm_eo);
2798 				*offset += delta;
2799 
2800 				switch (taction->action)
2801 				{
2802 				case TACTION_NOP:
2803 					BEGIN_VERBOSE(vfp);
2804 					{
2805 						fprintf(vfp, "action: NOP in {%s}, stack: /", table->name);
2806 						dumpTstack(vfp, lcb->tstack);
2807 					}
2808 					END_VERBOSE();
2809 					break;
2810 				case TACTION_ENTER:
2811 					/* TODO: Limit the depth of tstack.  */
2812 					ptrArrayAdd (lcb->tstack,
2813 								 taction->continuation_table
2814 								 ? taction->continuation_table
2815 								 : table);
2816 					next = taction->table;
2817 					BEGIN_VERBOSE(vfp);
2818 					{
2819 						if (taction->continuation_table)
2820 							fprintf(vfp, "action: [enter] to {%s}, cont: {%s}, stack: /",
2821 									next->name,
2822 									taction->continuation_table->name);
2823 						else
2824 							fprintf(vfp, "action: [enter] to {%s}, stack: /", next->name);
2825 						dumpTstack(vfp, lcb->tstack);
2826 					}
2827 					END_VERBOSE();
2828 					break;
2829 				case TACTION_LEAVE:
2830 					BEGIN_VERBOSE(vfp);
2831 					{
2832 						fprintf(vfp, "action: [leave] from {%s}, stack: /", table->name);
2833 						dumpTstack(vfp, lcb->tstack);
2834 					}
2835 					END_VERBOSE();
2836 					if (ptrArrayCount (lcb->tstack) == 0)
2837 					{
2838 						error (WARNING, "leave is specified as regex table action but the table stack is empty");
2839 						return NULL;
2840 					}
2841 					next = ptrArrayLast(lcb->tstack);
2842 					ptrArrayRemoveLast (lcb->tstack);
2843 					break;
2844 				case TACTION_JUMP:
2845 					next = taction->table;
2846 					BEGIN_VERBOSE(vfp);
2847 					{
2848 						fprintf(vfp, "action: [jump] from {%s} to {%s}, stack: /", table->name, next->name);
2849 						dumpTstack(vfp, lcb->tstack);
2850 					}
2851 					END_VERBOSE();
2852 
2853 					break;
2854 				case TACTION_RESET:
2855 					next = taction->table;
2856 					BEGIN_VERBOSE(vfp);
2857 					{
2858 						fprintf(vfp, "action: [reset] to {%s}, stack: /", next->name);
2859 					}
2860 					END_VERBOSE();
2861 
2862 					ptrArrayClear (lcb->tstack);
2863 					break;
2864 				case TACTION_QUIT:
2865 					BEGIN_VERBOSE(vfp);
2866 					{
2867 						fprintf(vfp, "action: [quit], stack: /");
2868 						dumpTstack(vfp, lcb->tstack);
2869 					}
2870 					END_VERBOSE();
2871 					return NULL;
2872 				}
2873 
2874 				if (next)
2875 					break;
2876 
2877 				if (delta == 0)
2878 				{
2879 					error (WARNING, "Forcefully advance the input pos because");
2880 					error (WARNING, "following conditions for entering infinite loop are satisfied:");
2881 					error (WARNING, "+ matching the pattern succeeds,");
2882 					error (WARNING, "+ the next table is not given, and");
2883 					error (WARNING, "+ the input file pos doesn't advance.");
2884 					error (WARNING, "Language: %s, input file: %s, pos: %u",
2885 						   getLanguageName (lcb->owner), getInputFileName(), *offset);
2886 					++*offset;
2887 				}
2888 			}
2889 			else if (ptrn->type == PTRN_CALLBACK)
2890 				;	/* Not implemented yet */
2891 			else
2892 			{
2893 				Assert ("invalid pattern type" == NULL);
2894 				break;
2895 			}
2896 			goto restart;
2897 		}
2898 		else
2899 			entry->statistics.unmatch++;
2900 	}
2901  out:
2902 	if (next == NULL && ptrArrayCount (lcb->tstack) > 0)
2903 	{
2904 		static int apop_count = 0;
2905 		next = ptrArrayLast(lcb->tstack);
2906 		verbose("result: no match - autopop<%d> from {%s} to {%s} @ %lu\n", apop_count++, table->name, next->name,
2907 				getInputLineNumberForFileOffset(*offset));
2908 		ptrArrayRemoveLast (lcb->tstack);
2909 	}
2910 	return next;
2911 }
2912 
extendRegexTable(struct lregexControlBlock * lcb,const char * src,const char * dist)2913 extern void extendRegexTable (struct lregexControlBlock *lcb, const char *src, const char *dist)
2914 {
2915 
2916 	int i;
2917 	struct regexTable * src_table;
2918 	struct regexTable * dist_table;
2919 
2920 	verbose ("extend regex table  \"%s\" with \"%s\"\n", dist, src);
2921 
2922 	i = getTableIndexForName (lcb, src);
2923 	if (i < 0)
2924 		error (FATAL, "no such regex table in %s: %s", getLanguageName(lcb->owner), src);
2925 	src_table = ptrArrayItem(lcb->tables, i);
2926 
2927 	i = getTableIndexForName (lcb, dist);
2928 	if (i < 0)
2929 		error (FATAL, "no such regex table in %s: %s", getLanguageName(lcb->owner), dist);
2930 	dist_table = ptrArrayItem(lcb->tables, i);
2931 
2932 	for (i = 0; i < (int)ptrArrayCount(src_table->entries); i++)
2933 	{
2934 		regexTableEntry *entry = ptrArrayItem (src_table->entries, i);
2935 		ptrArrayAdd(dist_table->entries, newRefPatternEntry(entry));
2936 	}
2937 }
2938 
printMultitableStatistics(struct lregexControlBlock * lcb)2939 extern void printMultitableStatistics (struct lregexControlBlock *lcb)
2940 {
2941 	if (ptrArrayCount(lcb->tables) == 0)
2942 		return;
2943 
2944 	fprintf(stderr, "\nMTABLE REGEX STATISTICS of %s\n", getLanguageName (lcb->owner));
2945 	fputs("==============================================\n", stderr);
2946 	for (unsigned int i = 0; i < ptrArrayCount(lcb->tables); i++)
2947 	{
2948 		struct regexTable *table = ptrArrayItem (lcb->tables, i);
2949 		fprintf(stderr, "%s\n", table->name);
2950 		fputs("-----------------------\n", stderr);
2951 		for (unsigned int j = 0; j < ptrArrayCount(table->entries); j++)
2952 		{
2953 			regexTableEntry *entry = ptrArrayItem (table->entries, j);
2954 			Assert (entry && entry->pattern);
2955 			fprintf(stderr, "%10u/%-10u%-40s ref: %d\n",
2956 					entry->statistics.match,
2957 					entry->statistics.unmatch + entry->statistics.match,
2958 					entry->pattern->pattern_string,
2959 					entry->pattern->refcount);
2960 		}
2961 		fputc('\n', stderr);
2962 	}
2963 }
2964 
matchMultitableRegex(struct lregexControlBlock * lcb,const vString * const allLines)2965 extern bool matchMultitableRegex (struct lregexControlBlock *lcb, const vString* const allLines)
2966 {
2967 	if (ptrArrayCount (lcb->tables) == 0)
2968 		return false;
2969 
2970 	struct regexTable *table = ptrArrayItem (lcb->tables, 0);
2971 	unsigned int offset = 0;
2972 
2973 	int motionless_counter = 0;
2974 	unsigned int last_offset;
2975 
2976 
2977 	while (table)
2978 	{
2979 		last_offset = offset;
2980 		table = matchMultitableRegexTable(lcb, table, allLines, &offset);
2981 
2982 		if (last_offset == offset)
2983 			motionless_counter++;
2984 		else
2985 			motionless_counter = 0;
2986 
2987 		if (motionless_counter > MTABLE_MOTIONLESS_MAX)
2988 		{
2989 			error (WARNING, "mtable<%s/%s>: the input cursor stays at %u in %s so long though the tables are switched",
2990 				   getLanguageName (lcb->owner),
2991 				   table->name, offset, getInputFileName ());
2992 			break;
2993 		}
2994 
2995 		if (table && (ptrArrayCount (lcb->tstack) > MTABLE_STACK_MAX_DEPTH))
2996 		{
2997 			unsigned int i;
2998 			struct regexTable *t;
2999 
3000 			error (WARNING, "mtable<%s/%s>: the tenter/tleave stack overflows at %u in %s",
3001 				   getLanguageName (lcb->owner),
3002 				   table->name, offset, getInputFileName ());
3003 			error (WARNING, "DUMP FROM THE TOP:");
3004 			/* TODO: use dumpTstack */
3005 			for (i = ptrArrayCount(lcb->tstack); 0 < i; --i)
3006 			{
3007 				t = ptrArrayItem (lcb->tstack, i - 1);
3008 				error (WARNING, "%3u %s", i - 1, t->name);
3009 			}
3010 
3011 			break;
3012 		}
3013 	}
3014 
3015 	return true;
3016 }
3017 
makePromiseForAreaSpecifiedWithOffsets(const char * parser,off_t startOffset,off_t endOffset)3018 static int  makePromiseForAreaSpecifiedWithOffsets (const char *parser,
3019 													off_t startOffset,
3020 													off_t endOffset)
3021 {
3022 	unsigned long startLine = getInputLineNumberForFileOffset(startOffset);
3023 	unsigned long endLine = getInputLineNumberForFileOffset(endOffset);
3024 	unsigned long startLineOffset = getInputFileOffsetForLine (startLine);
3025 	unsigned long endLineOffset = getInputFileOffsetForLine (endLine);
3026 
3027 	Assert(startOffset >= startLineOffset);
3028 	Assert(endOffset >= endLineOffset);
3029 
3030 	return makePromise (parser,
3031 						startLine, startOffset - startLineOffset,
3032 						endLine, endOffset - endLineOffset,
3033 						startOffset - startLineOffset);
3034 }
3035 
guestRequestNew(void)3036 static struct guestRequest *guestRequestNew (void)
3037 {
3038 	struct guestRequest *r = xMalloc (1, struct guestRequest);
3039 
3040 
3041 	guestRequestClear (r);
3042 	return r;
3043 }
3044 
guestRequestDelete(struct guestRequest * r)3045 static void   guestRequestDelete (struct guestRequest *r)
3046 {
3047 	eFree (r);
3048 }
3049 
guestRequestIsFilled(struct guestRequest * r)3050 static bool   guestRequestIsFilled(struct guestRequest *r)
3051 {
3052 	return (r->lang_set && (r->boundary + 0)->offset_set && (r->boundary + 1)->offset_set);
3053 }
3054 
guestRequestClear(struct guestRequest * r)3055 static void   guestRequestClear (struct guestRequest *r)
3056 {
3057 	r->lang_set = false;
3058 	r->boundary[BOUNDARY_START].offset_set = false;
3059 	r->boundary[BOUNDARY_END].offset_set = false;
3060 }
3061 
guestRequestSubmit(struct guestRequest * r)3062 static void   guestRequestSubmit (struct guestRequest *r)
3063 {
3064 	const char *langName = getLanguageName (r->lang);
3065 	verbose ("guestRequestSubmit: %s; "
3066 			 "range: %"PRId64" - %"PRId64"\n",
3067 			 langName,
3068 			 (int64_t)r->boundary[BOUNDARY_START].offset,
3069 			 (int64_t)r->boundary[BOUNDARY_END].offset);
3070 	makePromiseForAreaSpecifiedWithOffsets (langName,
3071 											r->boundary[BOUNDARY_START].offset,
3072 											r->boundary[BOUNDARY_END].offset);
3073 }
3074 
3075 /*
3076  * Script related functions
3077  */
3078 
3079 /* This functions expects { code }} as input.
3080  * Be care that curly brackets must be unbalanced.
3081  */
scriptRead(OptVM * vm,const char * src)3082 static EsObject *scriptRead (OptVM *vm, const char *src)
3083 {
3084 	size_t len = strlen (src);
3085 	Assert (len > 2);
3086 	Assert (src[len - 1] == '}');
3087 	Assert (src[len - 2] == '}');
3088 
3089 	EsObject *obj = optscriptRead (vm, src + 1, len - 1 - 1);
3090 	if (es_error_p (obj))
3091 		error (FATAL, "failed in loading an optscript: %s", src);
3092 	return obj;
3093 }
3094 
scriptEval(OptVM * vm,EsObject * optscript)3095 extern EsObject* scriptEval (OptVM *vm, EsObject *optscript)
3096 {
3097 	return optscriptEval (vm, optscript);
3098 }
3099 
scriptEvalHook(OptVM * vm,struct lregexControlBlock * lcb,enum scriptHook hook)3100 static void scriptEvalHook (OptVM *vm, struct lregexControlBlock *lcb, enum scriptHook hook)
3101 {
3102 	if (ptrArrayCount (lcb->hook_code[hook]) == 0)
3103 	{
3104 		for (int i = 0; i < ptrArrayCount (lcb->hook[hook]); i++)
3105 		{
3106 			const char *src = ptrArrayItem (lcb->hook[hook], i);
3107 			EsObject *code = scriptRead (vm, src);
3108 			if (es_error_p (code))
3109 				error (FATAL, "error when reading hook[%d] code: %s", hook, src);
3110 			ptrArrayAdd (lcb->hook_code[hook], es_object_ref (code));
3111 			es_object_unref (code);
3112 		}
3113 	}
3114 	for (int i = 0; i < ptrArrayCount (lcb->hook_code[hook]); i++)
3115 	{
3116 		EsObject *code = ptrArrayItem (lcb->hook_code[hook], i);
3117 		EsObject * e = optscriptEval (vm, code);
3118 		if (es_error_p (e))
3119 			error (WARNING, "error when evaluating hook[%d] code: %s",
3120 				   hook, (char *)ptrArrayItem (lcb->hook[i], i));
3121 	}
3122 }
3123 
scriptSetup(OptVM * vm,struct lregexControlBlock * lcb,int corkIndex,scriptWindow * window)3124 static void scriptSetup (OptVM *vm, struct lregexControlBlock *lcb, int corkIndex, scriptWindow *window)
3125 {
3126 	lcb->window = window;
3127 	optscriptSetup (vm, lcb->local_dict, corkIndex);
3128 }
3129 
scriptTeardown(OptVM * vm,struct lregexControlBlock * lcb)3130 static void scriptTeardown (OptVM *vm, struct lregexControlBlock *lcb)
3131 {
3132 	optscriptTeardown (vm, lcb->local_dict);
3133 	lcb->window = NULL;
3134 }
3135 
addOptscriptToHook(struct lregexControlBlock * lcb,enum scriptHook hook,const char * code)3136 extern void	addOptscriptToHook (struct lregexControlBlock *lcb, enum scriptHook hook, const char *code)
3137 {
3138 	ptrArrayAdd (lcb->hook[hook], eStrdup (code));
3139 }
3140 
3141 /* Return true if available. */
checkRegex(void)3142 extern bool checkRegex (void)
3143 {
3144 #if defined (CHECK_REGCOMP)
3145 	{
3146 		/* Check for broken regcomp() on Cygwin */
3147 		regex_t patbuf;
3148 		int errcode;
3149 		if (regcomp (&patbuf, "/hello/", 0) != 0)
3150 			error (WARNING, "Disabling broken regex");
3151 		else
3152 			regexAvailable = true;
3153 	}
3154 #else
3155 	/* We are using bundled regex engine. */
3156 	regexAvailable = true;
3157 #endif
3158 
3159 	return regexAvailable;
3160 }
3161 
3162 static EsObject *OPTSCRIPT_ERR_UNKNOWNKIND;
3163 
3164 /* name:str kind:name loc _TAG tag
3165  * name:str kind:name     _TAG tag */
lrop_make_tag(OptVM * vm,EsObject * name)3166 static EsObject* lrop_make_tag (OptVM *vm, EsObject *name)
3167 {
3168 	matchLoc *loc;
3169 
3170 	if (opt_vm_ostack_count (vm) < 1)
3171 		return OPT_ERR_UNDERFLOW;
3172 
3173 	int index;
3174 	EsObject *top = opt_vm_ostack_top (vm);
3175 	if (es_object_get_type (top) == OPT_TYPE_MATCHLOC)
3176 	{
3177 		if (opt_vm_ostack_count (vm) < 3)
3178 			return OPT_ERR_UNDERFLOW;
3179 		loc = es_pointer_get (top);
3180 		index = 1;
3181 	}
3182 	else
3183 	{
3184 		struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3185 		if (lcb->window->patbuf->regptype != REG_PARSER_SINGLE_LINE)
3186 			return OPT_ERR_TYPECHECK;
3187 		if (opt_vm_ostack_count (vm) < 2)
3188 			return OPT_ERR_UNDERFLOW;
3189 		loc = NULL;
3190 		index = 0;
3191 	}
3192 
3193 	EsObject *kind = opt_vm_ostack_peek (vm, index++);
3194 	if (es_object_get_type (kind) != OPT_TYPE_NAME)
3195 		return OPT_ERR_TYPECHECK;
3196 	EsObject *kind_sym = es_pointer_get (kind);
3197 	const char *kind_str = es_symbol_get (kind_sym);
3198 	kindDefinition* kind_def = getLanguageKindForName (getInputLanguage (),
3199 													   kind_str);
3200 	if (!kind_def)
3201 		return OPTSCRIPT_ERR_UNKNOWNKIND;
3202 	int kind_index = kind_def->id;
3203 
3204 	EsObject *tname = opt_vm_ostack_peek (vm, index++);
3205 	if (es_object_get_type (tname) != OPT_TYPE_STRING)
3206 		return OPT_ERR_TYPECHECK;
3207 	const char *n = opt_string_get_cstr (tname);
3208 	if (n [0] == '\0')
3209 		return OPT_ERR_RANGECHECK; /* TODO */
3210 
3211 	tagEntryInfo *e = xMalloc (1, tagEntryInfo);
3212 	initRegexTag (e, eStrdup (n),
3213 				  kind_index, ROLE_DEFINITION_INDEX, CORK_NIL, 0,
3214 				  loc? loc->line: 0, loc? &loc->pos: NULL, XTAG_UNKNOWN);
3215 	EsObject *obj = es_pointer_new (OPT_TYPE_TAG, e);
3216 	if (es_error_p (obj))
3217 		return obj;
3218 
3219 	while (index-- > 0)
3220 		opt_vm_ostack_pop (vm);
3221 
3222 	opt_vm_ostack_push (vm, obj);
3223 	es_object_unref (obj);
3224 	return es_false;
3225 }
3226 
3227 static EsObject *OPTSCRIPT_ERR_UNKNOWNROLE;
3228 
lrop_make_reftag(OptVM * vm,EsObject * name)3229 static EsObject* lrop_make_reftag (OptVM *vm, EsObject *name)
3230 {
3231 	matchLoc *loc;
3232 
3233 	if (opt_vm_ostack_count (vm) < 1)
3234 		return OPT_ERR_UNDERFLOW;
3235 
3236 	int index;
3237 	EsObject *top = opt_vm_ostack_top (vm);
3238 	if (es_object_get_type (top) == OPT_TYPE_MATCHLOC)
3239 	{
3240 		if (opt_vm_ostack_count (vm) < 4)
3241 			return OPT_ERR_UNDERFLOW;
3242 		loc = es_pointer_get (top);
3243 		index = 1;
3244 	}
3245 	else
3246 	{
3247 		struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3248 		if (lcb->window->patbuf->regptype != REG_PARSER_SINGLE_LINE)
3249 			return OPT_ERR_TYPECHECK;
3250 		if (opt_vm_ostack_count (vm) < 3)
3251 			return OPT_ERR_UNDERFLOW;
3252 		loc = NULL;
3253 		index = 0;
3254 	}
3255 
3256 	EsObject *role = opt_vm_ostack_peek (vm, index++);
3257 	if (es_object_get_type (role) != OPT_TYPE_NAME)
3258 		return OPT_ERR_TYPECHECK;
3259 
3260 	EsObject *kind = opt_vm_ostack_peek (vm, index++);
3261 	if (es_object_get_type (kind) != OPT_TYPE_NAME)
3262 		return OPT_ERR_TYPECHECK;
3263 	EsObject *kind_sym = es_pointer_get (kind);
3264 	const char *kind_str = es_symbol_get (kind_sym);
3265 	langType lang = getInputLanguage ();
3266 	kindDefinition* kind_def = getLanguageKindForName (lang, kind_str);
3267 	if (!kind_def)
3268 		return OPTSCRIPT_ERR_UNKNOWNKIND;
3269 	int kind_index = kind_def->id;
3270 
3271 	EsObject *role_sym = es_pointer_get (role);
3272 	const char *role_str = es_symbol_get (role_sym);
3273 	roleDefinition* role_def = getLanguageRoleForName (lang, kind_index, role_str);
3274 	if (!role_def)
3275 		return OPTSCRIPT_ERR_UNKNOWNROLE;
3276 	int role_index = role_def->id;
3277 
3278 	EsObject *tname = opt_vm_ostack_peek (vm, index++);
3279 	if (es_object_get_type (tname) != OPT_TYPE_STRING)
3280 		return OPT_ERR_TYPECHECK;
3281 	const char *n = opt_string_get_cstr (tname);
3282 	if (n [0] == '\0')
3283 		return OPT_ERR_RANGECHECK; /* TODO */
3284 
3285 	tagEntryInfo *e = xMalloc (1, tagEntryInfo);
3286 	initRegexTag (e, eStrdup (n),
3287 				  kind_index, role_index, CORK_NIL, 0,
3288 				  loc? loc->line: 0, loc? &loc->pos: NULL,
3289 				  role_index == ROLE_DEFINITION_INDEX
3290 				  ? XTAG_UNKNOWN
3291 				  : XTAG_REFERENCE_TAGS);
3292 	EsObject *obj = es_pointer_new (OPT_TYPE_TAG, e);
3293 	if (es_error_p (obj))
3294 		return obj;
3295 
3296 	while (index-- > 0)
3297 		opt_vm_ostack_pop (vm);
3298 
3299 	opt_vm_ostack_push (vm, obj);
3300 	es_object_unref (obj);
3301 	return es_false;
3302 }
3303 
3304 /* tag COMMIT int */
lrop_commit_tag(OptVM * vm,EsObject * name)3305 static EsObject* lrop_commit_tag (OptVM *vm, EsObject *name)
3306 {
3307 	EsObject *tag = opt_vm_ostack_top (vm);
3308 	if (es_object_get_type (tag) != OPT_TYPE_TAG)
3309 		return OPT_ERR_TYPECHECK;
3310 
3311 	tagEntryInfo *e = es_pointer_get (tag);
3312 	int corkIndex = makeTagEntry (e);
3313 	EsObject *n = es_integer_new (corkIndex);
3314 	if (es_error_p (n))
3315 		return n;
3316 	opt_vm_ostack_pop (vm);
3317 	opt_vm_ostack_push (vm, n);
3318 	es_object_unref (n);
3319 	return es_false;
3320 }
3321 
lrop_get_match_loc(OptVM * vm,EsObject * name)3322 static EsObject* lrop_get_match_loc (OptVM *vm, EsObject *name)
3323 {
3324 
3325 	bool start;
3326 	EsObject *group;
3327 
3328 	if (opt_vm_ostack_count (vm) < 1)
3329 		return OPT_ERR_UNDERFLOW;
3330 
3331 	EsObject *tmp = opt_vm_ostack_top (vm);
3332 
3333 	if (es_object_get_type (tmp) == ES_TYPE_INTEGER)
3334 	{
3335 		group = tmp;
3336 		start = true;
3337 	}
3338 	else
3339 	{
3340 		EsObject *pos = tmp;
3341 
3342 		static EsObject *start_name, *end_name;
3343 		if (!start_name)
3344 		{
3345 			start_name = opt_name_new_from_cstr ("start");
3346 			end_name = opt_name_new_from_cstr ("end");
3347 		}
3348 
3349 		if (es_object_equal (pos, start_name))
3350 			start = true;
3351 		else if (es_object_equal (pos, end_name))
3352 			start = false;
3353 		else
3354 			return OPT_ERR_TYPECHECK;
3355 
3356 		if (opt_vm_ostack_count (vm) < 2)
3357 			return OPT_ERR_UNDERFLOW;
3358 
3359 		group = opt_vm_ostack_peek (vm, 1);
3360 		if (es_object_get_type (group) != ES_TYPE_INTEGER)
3361 			return OPT_ERR_TYPECHECK;
3362 	}
3363 
3364 	int g = es_integer_get (group);
3365 	if (g < 1)
3366 		return OPT_ERR_RANGECHECK;
3367 
3368 	struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3369 	scriptWindow *window = lcb->window;
3370 
3371 	matchLoc *mloc = make_mloc (window, g, start);
3372 	if (mloc == NULL)
3373 		return OPT_ERR_RANGECHECK;
3374 
3375 	EsObject * mlocobj = es_pointer_new (OPT_TYPE_MATCHLOC, mloc);
3376 	if (es_error_p (mlocobj))
3377 	{
3378 		eFree (mloc);
3379 		return mlocobj;
3380 	}
3381 
3382 	if (group != tmp)
3383 		opt_vm_ostack_pop (vm);
3384 	opt_vm_ostack_pop (vm);
3385 	opt_vm_ostack_push (vm, mlocobj);
3386 	es_object_unref (mlocobj);
3387 	return es_false;
3388 }
3389 
ldrop_get_line_from_matchloc(OptVM * vm,EsObject * name)3390 static EsObject* ldrop_get_line_from_matchloc (OptVM *vm, EsObject *name)
3391 {
3392 	EsObject *mlocobj = opt_vm_ostack_top (vm);
3393 	if (es_object_get_type (mlocobj) != OPT_TYPE_MATCHLOC)
3394 		return OPT_ERR_TYPECHECK;
3395 
3396 	matchLoc *mloc = es_pointer_get (mlocobj);
3397 	EsObject *lineobj = es_integer_new (mloc->line);
3398 	if (es_error_p (lineobj))
3399 		return lineobj;
3400 
3401 	opt_vm_ostack_pop (vm);
3402 	opt_vm_ostack_push (vm, lineobj);
3403 	es_object_unref (lineobj);
3404 	return es_false;
3405 }
3406 
make_mloc_from_tagEntryInfo(tagEntryInfo * e)3407 static matchLoc* make_mloc_from_tagEntryInfo(tagEntryInfo *e)
3408 {
3409 	matchLoc *mloc = xMalloc (1, matchLoc);
3410 	mloc->delta = 0;
3411 	mloc->line = e->lineNumber;
3412 	mloc->pos = e->filePosition;
3413 
3414 	return mloc;
3415 }
3416 
lrop_get_tag_loc(OptVM * vm,EsObject * name)3417 static EsObject* lrop_get_tag_loc (OptVM *vm, EsObject *name)
3418 {
3419 	EsObject *nobj = opt_vm_ostack_top (vm);
3420 
3421 	if (es_object_get_type (nobj) != ES_TYPE_INTEGER)
3422 		return OPT_ERR_TYPECHECK;
3423 
3424 	int n = es_integer_get(nobj);
3425 	if (! (CORK_NIL < n && n < countEntryInCorkQueue()))
3426 			return OPT_ERR_RANGECHECK;
3427 
3428 	tagEntryInfo *e = getEntryInCorkQueue (n);
3429 	if (e == NULL)
3430 		return OPT_ERR_TYPECHECK; /* ??? */
3431 
3432 	matchLoc *mloc = make_mloc_from_tagEntryInfo (e);
3433 	EsObject * mlocobj = es_pointer_new (OPT_TYPE_MATCHLOC, mloc);
3434 	if (es_error_p (mlocobj))
3435 	{
3436 		eFree (mloc);
3437 		return mlocobj;
3438 	}
3439 
3440 	opt_vm_ostack_pop (vm);
3441 	opt_vm_ostack_push (vm, mlocobj);
3442 	es_object_unref (mlocobj);
3443 	return es_false;
3444 }
3445 
lrop_get_match_string_common(OptVM * vm,int i,int npop)3446 static EsObject* lrop_get_match_string_common (OptVM *vm, int i, int npop)
3447 {
3448 	struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3449 	scriptWindow *window = lcb->window;
3450 	const char *cstr = make_match_string (window, i);
3451 	if (!cstr)
3452 	{
3453 		for (; npop > 0; npop--)
3454 			opt_vm_ostack_pop (vm);
3455 		opt_vm_ostack_push (vm, es_false);
3456 		return es_false;
3457 	}
3458 	EsObject *str = opt_string_new_from_cstr (cstr);
3459 	eFree ((void *)cstr);
3460 
3461 	for (; npop > 0; npop--)
3462 		opt_vm_ostack_pop (vm);
3463 
3464 	opt_vm_ostack_push (vm, str);
3465 	es_object_unref (str);
3466 	return es_false;
3467 }
3468 
3469 /* Handles \1, \2, ... */
lrop_get_match_string_named_group(OptVM * vm,EsObject * name)3470 static EsObject* lrop_get_match_string_named_group (OptVM *vm, EsObject *name)
3471 {
3472 	void * data = es_symbol_get_data (name);
3473 	int i = HT_PTR_TO_INT (data);
3474 
3475 	return lrop_get_match_string_common (vm, i, 0);
3476 }
3477 
lrop_get_match_string_group_on_stack(OptVM * vm,EsObject * name)3478 static EsObject* lrop_get_match_string_group_on_stack (OptVM *vm, EsObject *name)
3479 {
3480 	EsObject *group = opt_vm_ostack_top (vm);
3481 	if (!es_integer_p (group))
3482 		return OPT_ERR_TYPECHECK;
3483 
3484 	int g = es_integer_get (group);
3485 	if (g < 1)
3486 		return OPT_ERR_RANGECHECK;
3487 
3488 	EsObject *r = lrop_get_match_string_common (vm, g, 1);
3489 	if (es_error_p (r))
3490 		return r;
3491 
3492 	r = opt_vm_ostack_top (vm);
3493 	if (es_object_get_type (r) == OPT_TYPE_STRING)
3494 		opt_vm_ostack_push (vm, es_true);
3495 	return es_false;
3496 }
3497 
make_match_string(scriptWindow * window,int group)3498 static char* make_match_string (scriptWindow *window, int group)
3499 {
3500 	if (window == NULL
3501 		|| 0 >= group
3502 		|| window->nmatch <= group
3503 		|| window->pmatch [group].rm_so == -1)
3504 		return NULL;
3505 
3506 	const int len = window->pmatch [group].rm_eo - window->pmatch [group].rm_so;
3507 	const char *start = window->line + window->pmatch [group].rm_so;
3508 
3509 	return eStrndup (start, len);
3510 }
3511 
make_mloc(scriptWindow * window,int group,bool start)3512 static matchLoc *make_mloc (scriptWindow *window, int group, bool start)
3513 {
3514 	if (window == NULL
3515 		|| 0 > group
3516 		|| window->nmatch <= group
3517 		|| window->pmatch [group].rm_so == -1)
3518 		return NULL;
3519 
3520 	matchLoc *mloc = xMalloc (1, matchLoc);
3521 	if (window->patbuf->regptype == REG_PARSER_SINGLE_LINE)
3522 	{
3523 		mloc->delta = 0;
3524 		mloc->line = getInputLineNumber ();
3525 		mloc->pos = getInputFilePosition ();
3526 	}
3527 	else
3528 	{
3529 		mloc->delta = (start
3530 					   ? window->pmatch [group].rm_so
3531 					   : window->pmatch [group].rm_eo);
3532 		off_t offset = (window->line + mloc->delta) - window->start;
3533 		mloc->line = getInputLineNumberForFileOffset (offset);
3534 		mloc->pos  = getInputFilePositionForLine (mloc->line);
3535 	}
3536 	return mloc;
3537 }
3538 
lrop_set_scope(OptVM * vm,EsObject * name)3539 static EsObject* lrop_set_scope (OptVM *vm, EsObject *name)
3540 {
3541 	EsObject *corkIndex = opt_vm_ostack_top (vm);
3542 	if (!es_integer_p (corkIndex))
3543 		return OPT_ERR_TYPECHECK;
3544 
3545 	int n = es_integer_get (corkIndex);
3546 	if (n < 0)
3547 		return OPT_ERR_RANGECHECK;
3548 
3549 	if (n >= countEntryInCorkQueue())
3550 		return OPT_ERR_RANGECHECK;
3551 
3552 	struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3553 	lcb->currentScope = n;
3554 
3555 	opt_vm_ostack_pop (vm);
3556 
3557 	return es_false;
3558 }
3559 
lrop_pop_scope(OptVM * vm,EsObject * name)3560 static EsObject* lrop_pop_scope (OptVM *vm, EsObject *name)
3561 {
3562 	struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3563 	if (lcb->currentScope != CORK_NIL)
3564 	{
3565 		tagEntryInfo *e = getEntryInCorkQueue (lcb->currentScope);
3566 		if (e)
3567 			lcb->currentScope = e->extensionFields.scopeIndex;
3568 	}
3569 	return es_false;
3570 }
3571 
lrop_clear_scope(OptVM * vm,EsObject * name)3572 static EsObject* lrop_clear_scope (OptVM *vm, EsObject *name)
3573 {
3574 	struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3575 	lcb->currentScope = CORK_NIL;
3576 	return es_false;
3577 }
3578 
lrop_ref0_scope(OptVM * vm,EsObject * name)3579 static EsObject* lrop_ref0_scope (OptVM *vm, EsObject *name)
3580 {
3581 	struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3582 
3583 	if (lcb->currentScope == 0)
3584 	{
3585 		opt_vm_ostack_push (vm, es_false);
3586 		return es_false;
3587 	}
3588 
3589 	EsObject *q = es_integer_new (lcb->currentScope);
3590 
3591 	if (es_error_p (q))
3592 		return q;
3593 
3594 	opt_vm_ostack_push (vm, q);
3595 	es_object_unref (q);
3596 	opt_vm_ostack_push (vm, es_true);
3597 	return es_false;
3598 }
3599 
lrop_refN_scope(OptVM * vm,EsObject * name)3600 static EsObject* lrop_refN_scope (OptVM *vm, EsObject *name)
3601 {
3602 	EsObject *nobj = opt_vm_ostack_top (vm);
3603 	if (!es_integer_p (nobj))
3604 		return OPT_ERR_TYPECHECK;
3605 
3606 	int n = es_integer_get(nobj);
3607 
3608 	struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3609 	int scope = lcb->currentScope;
3610 
3611 	while (n--)
3612 	{
3613 		if (scope == CORK_NIL)
3614 			break;
3615 		tagEntryInfo *e = getEntryInCorkQueue (scope);
3616 		if (e == NULL)
3617 			break;
3618 
3619 		scope = e->extensionFields.scopeIndex;
3620 	}
3621 
3622 	EsObject *q = es_integer_new (scope);
3623 	if (es_error_p(q))
3624 		return q;
3625 
3626 	opt_vm_ostack_pop (vm);
3627 	opt_vm_ostack_push (vm, q);
3628 	es_object_unref (q);
3629 
3630 	return es_false;
3631 }
3632 
lrop_get_scope_depth(OptVM * vm,EsObject * name)3633 static EsObject* lrop_get_scope_depth (OptVM *vm, EsObject *name)
3634 {
3635 	int n = 0;
3636 
3637 	struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3638 	int scope = lcb->currentScope;
3639 
3640 	while (scope != CORK_NIL)
3641 	{
3642 		tagEntryInfo *e = getEntryInCorkQueue (scope);
3643 		if (!e)
3644 			break;
3645 
3646 		scope = e->extensionFields.scopeIndex;
3647 		n++;
3648 	}
3649 
3650 	EsObject *q = es_integer_new (scope);
3651 	if (es_error_p(q))
3652 		return q;
3653 
3654 	opt_vm_ostack_push (vm, q);
3655 	es_object_unref (q);
3656 	return es_false;
3657 }
3658 
lrop_repl(OptVM * vm,EsObject * name)3659 static EsObject* lrop_repl (OptVM *vm, EsObject *name)
3660 {
3661 	char *old_prompt = opt_vm_set_prompt (vm, "\n% type \"quit\" for exiting from repl\nOPT");
3662 
3663 	opt_vm_print_prompt (vm);
3664 	opt_vm_set_prompt (vm, "OPT");
3665 
3666 	while (true)
3667 	{
3668 		EsObject *o = opt_vm_read (vm, NULL);
3669 		if (es_object_equal (o, ES_READER_EOF))
3670 		{
3671 			es_object_unref (o);
3672 			break;
3673 		}
3674 		EsObject *e = opt_vm_eval (vm, o);
3675 		es_object_unref (o);
3676 
3677 		if (es_error_p (e))
3678 		{
3679 			if (!es_object_equal (e, OPT_ERR_QUIT))
3680 				opt_vm_report_error (vm, e, NULL);
3681 			break;
3682 		}
3683 	}
3684 
3685 	opt_vm_set_prompt (vm, old_prompt);
3686 	return es_false;
3687 }
3688 
3689 static EsObject *OPTSCRIPT_ERR_UNKNOWNTABLE;
3690 static EsObject *OPTSCRIPT_ERR_NOTMTABLEPTRN;
3691 
getRegexTableForOptscriptName(struct lregexControlBlock * lcb,EsObject * tableName)3692 static struct regexTable *getRegexTableForOptscriptName (struct lregexControlBlock *lcb,
3693 														 EsObject *tableName)
3694 {
3695 	EsObject *table_sym = es_pointer_get (tableName);
3696 	const char *table_str = es_symbol_get (table_sym);
3697 	int n = getTableIndexForName (lcb, table_str);
3698 	if (n < 0)
3699 		return NULL;
3700 	return ptrArrayItem (lcb->tables, n);
3701 }
3702 
lrop_tenter_common(OptVM * vm,EsObject * name,enum tableAction action)3703 static EsObject* lrop_tenter_common (OptVM *vm, EsObject *name, enum tableAction action)
3704 {
3705 	struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3706 	if (lcb->window->patbuf->regptype != REG_PARSER_MULTI_TABLE)
3707 	{
3708 		error (WARNING, "Use table related operators only with mtable regular expression");
3709 		return OPTSCRIPT_ERR_NOTMTABLEPTRN;
3710 	}
3711 
3712 	EsObject *table = opt_vm_ostack_top (vm);
3713 	if (es_object_get_type (table) != OPT_TYPE_NAME)
3714 		return OPT_ERR_TYPECHECK;
3715 
3716 	struct regexTable *t = getRegexTableForOptscriptName (lcb, table);
3717 	if (t == NULL)
3718 		return OPTSCRIPT_ERR_UNKNOWNTABLE;
3719 
3720 	lcb->window->taction = (struct mTableActionSpec){
3721 		.action             = action,
3722 		.table              = t,
3723 		.continuation_table = NULL,
3724 	};
3725 
3726 	opt_vm_ostack_pop (vm);
3727 	return es_false;
3728 }
3729 
lrop_tenter(OptVM * vm,EsObject * name)3730 static EsObject* lrop_tenter (OptVM *vm, EsObject *name)
3731 {
3732 	return lrop_tenter_common (vm, name, TACTION_ENTER);
3733 }
3734 
lrop_tenter_with_continuation(OptVM * vm,EsObject * name)3735 static EsObject* lrop_tenter_with_continuation (OptVM *vm, EsObject *name)
3736 {
3737 	struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3738 	if (lcb->window->patbuf->regptype != REG_PARSER_MULTI_TABLE)
3739 	{
3740 		error (WARNING, "Use table related operators only with mtable regular expression");
3741 		return OPTSCRIPT_ERR_NOTMTABLEPTRN;
3742 	}
3743 
3744 	EsObject *cont = opt_vm_ostack_top (vm);
3745 	EsObject *table = opt_vm_ostack_peek (vm, 1);
3746 
3747 	if (es_object_get_type (table) != OPT_TYPE_NAME)
3748 		return OPT_ERR_TYPECHECK;
3749 	if (es_object_get_type (cont) != OPT_TYPE_NAME)
3750 		return OPT_ERR_TYPECHECK;
3751 
3752 	struct regexTable *t = getRegexTableForOptscriptName (lcb, table);
3753 	if (t == NULL)
3754 		return OPTSCRIPT_ERR_UNKNOWNTABLE;
3755 	struct regexTable *c = getRegexTableForOptscriptName (lcb, cont);
3756 	if (c == NULL)
3757 		return OPTSCRIPT_ERR_UNKNOWNTABLE;
3758 
3759 	lcb->window->taction = (struct mTableActionSpec){
3760 		.action             = TACTION_ENTER,
3761 		.table              = t,
3762 		.continuation_table = c,
3763 	};
3764 
3765 	opt_vm_ostack_pop (vm);
3766 	opt_vm_ostack_pop (vm);
3767 	return es_false;
3768 }
3769 
lrop_tleave(OptVM * vm,EsObject * name)3770 static EsObject* lrop_tleave (OptVM *vm, EsObject *name)
3771 {
3772 	struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3773 	if (lcb->window->patbuf->regptype != REG_PARSER_MULTI_TABLE)
3774 	{
3775 		error (WARNING, "Use table related operators only with mtable regular expression");
3776 		return OPTSCRIPT_ERR_NOTMTABLEPTRN;
3777 	}
3778 
3779 	lcb->window->taction.action = TACTION_LEAVE;
3780 	return es_false;
3781 }
3782 
lrop_tjump(OptVM * vm,EsObject * name)3783 static EsObject* lrop_tjump (OptVM *vm, EsObject *name)
3784 {
3785 	return lrop_tenter_common (vm, name, TACTION_JUMP);
3786 }
3787 
lrop_treset(OptVM * vm,EsObject * name)3788 static EsObject* lrop_treset (OptVM *vm, EsObject *name)
3789 {
3790 	return lrop_tenter_common (vm, name, TACTION_RESET);
3791 }
3792 
lrop_tquit(OptVM * vm,EsObject * name)3793 static EsObject* lrop_tquit (OptVM *vm, EsObject *name)
3794 {
3795 	struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3796 	if (lcb->window->patbuf->regptype != REG_PARSER_MULTI_TABLE)
3797 	{
3798 		error (WARNING, "Use table related operators only with mtable regular expression");
3799 		return OPTSCRIPT_ERR_NOTMTABLEPTRN;
3800 	}
3801 
3802 	lcb->window->taction.action = TACTION_QUIT;
3803 	return es_false;
3804 }
3805 
lrop_traced(OptVM * vm,EsObject * name)3806 static EsObject* lrop_traced (OptVM *vm, EsObject *name)
3807 {
3808 #ifdef DO_TRACING
3809 	langType lang = getInputLanguage ();
3810 	if (isLanguageTraced (lang))
3811 		opt_vm_ostack_push (vm, es_true);
3812 	else
3813 		opt_vm_ostack_push (vm, es_false);
3814 #else
3815 	opt_vm_ostack_push (vm, es_false);
3816 #endif
3817 	return false;
3818 }
3819 
3820 EsObject *OPTSCRIPT_ERR_UNKNOWNEXTRA;
lrop_extraenabled(OptVM * vm,EsObject * name)3821 static EsObject* lrop_extraenabled (OptVM *vm, EsObject *name)
3822 {
3823 	EsObject *extra = opt_vm_ostack_top (vm);
3824 	if (es_object_get_type (extra) != OPT_TYPE_NAME)
3825 		return OPT_ERR_TYPECHECK;
3826 
3827 	xtagType xt = optscriptGetXtagType (extra);
3828 	if (xt == XTAG_UNKNOWN)
3829 		return OPTSCRIPT_ERR_UNKNOWNEXTRA;
3830 
3831 	EsObject *r = isXtagEnabled (xt)? es_true: es_false;
3832 	opt_vm_ostack_pop (vm);
3833 	opt_vm_ostack_push (vm, r);
3834 	return es_false;
3835 }
3836 
lrop_markextra(OptVM * vm,EsObject * name)3837 static EsObject *lrop_markextra (OptVM *vm, EsObject *name)
3838 {
3839 	EsObject *tag = opt_vm_ostack_peek (vm, 1);
3840 	tagEntryInfo *e;
3841 	if (es_integer_p (tag))
3842 	{
3843 		int n = es_integer_get (tag);
3844 		if (! (CORK_NIL < n && n < countEntryInCorkQueue()))
3845 			return OPT_ERR_RANGECHECK;
3846 		e = getEntryInCorkQueue (n);
3847 	}
3848 	else if (es_object_get_type (tag) == OPT_TYPE_TAG)
3849 		e = es_pointer_get (tag);
3850 	else
3851 		return OPT_ERR_TYPECHECK;
3852 
3853 	if (e == NULL)
3854 		return OPTSCRIPT_ERR_NOTAGENTRY;
3855 
3856 	EsObject *extra = opt_vm_ostack_top (vm);
3857 	if (es_object_get_type (extra) != OPT_TYPE_NAME)
3858 		return OPT_ERR_TYPECHECK;
3859 
3860 	xtagType xt = optscriptGetXtagType (extra);
3861 	if (xt == XTAG_UNKNOWN)
3862 		return OPTSCRIPT_ERR_UNKNOWNEXTRA;
3863 
3864 	langType lang = getXtagOwner (xt);
3865 	if (lang != LANG_IGNORE && e->langType != lang)
3866 	{
3867 		error (WARNING,
3868 			   "mismatch in the language of the tag (%s) and the language of field (%s)",
3869 			   getLanguageName (e->langType), getLanguageName (lang));
3870 		return OPTSCRIPT_ERR_UNKNOWNEXTRA;
3871 	}
3872 
3873 	markTagExtraBit (e, xt);
3874 
3875 	opt_vm_ostack_pop (vm);
3876 	opt_vm_ostack_pop (vm);
3877 
3878 	return es_false;
3879 }
3880 
lrop_advanceto(OptVM * vm,EsObject * name)3881 static EsObject *lrop_advanceto (OptVM *vm, EsObject *name)
3882 {
3883 	struct lregexControlBlock *lcb = opt_vm_get_app_data (vm);
3884 	if (lcb->window->patbuf->regptype == REG_PARSER_SINGLE_LINE)
3885 	{
3886 		error (WARNING, "don't use `%s' operator in --regex-<LANG> option",
3887 			   es_symbol_get (name));
3888 		return OPTSCRIPT_ERR_NOTMTABLEPTRN; /* TODO */
3889 	}
3890 
3891 	EsObject *mlocobj = opt_vm_ostack_top (vm);
3892 	if (es_object_get_type (mlocobj) != OPT_TYPE_MATCHLOC)
3893 		return OPT_ERR_TYPECHECK;
3894 
3895 	matchLoc *loc = es_pointer_get (mlocobj);
3896 	lcb->window->advanceto = true;
3897 	lcb->window->advanceto_delta = loc->delta;
3898 
3899 	return es_true;
3900 }
3901 
lrop_markplaceholder(OptVM * vm,EsObject * name)3902 static EsObject *lrop_markplaceholder (OptVM *vm, EsObject *name)
3903 {
3904 	EsObject *tag = opt_vm_ostack_top (vm);
3905 
3906 	if (!es_integer_p (tag))
3907 		return OPT_ERR_TYPECHECK;
3908 
3909 	int n = es_integer_get (tag);
3910 	if (! (CORK_NIL < n && n < countEntryInCorkQueue()))
3911 		return OPT_ERR_RANGECHECK;
3912 
3913 	tagEntryInfo *e = getEntryInCorkQueue (n);
3914 	if (e == NULL)
3915 		return OPTSCRIPT_ERR_NOTAGENTRY;
3916 
3917 	markTagPlaceholder (e, true);
3918 
3919 	opt_vm_ostack_pop (vm);
3920 	return es_false;
3921 }
3922 
3923 static struct optscriptOperatorRegistration lropOperators [] = {
3924 	{
3925 		.name     = "_matchstr",
3926 		.fn       = lrop_get_match_string_group_on_stack,
3927 		.arity    = 1,
3928 		.help_str = "group:int _MATCHSTR string true%"
3929 		"group:int _MATCHSTR false",
3930 	},
3931 	{
3932 		.name     = "_matchloc",
3933 		.fn       = lrop_get_match_loc,
3934 		.arity    = -1,
3935 		.help_str = "group:int /start|/end _MATCHLOC matchloc%"
3936 		"group:int _MATCHLOC matchloc",
3937 	},
3938 	{
3939 		.name     = "_matchloc2line",
3940 		.fn       = ldrop_get_line_from_matchloc,
3941 		.arity    = 1,
3942 		.help_str = "matchloc _MATCHLOC2LINE int:line",
3943 	},
3944 	{
3945 		.name     = "_tagloc",
3946 		.fn       = lrop_get_tag_loc,
3947 		.arity    = 1,
3948 		.help_str = "index:int _TAGLOC matchloc",
3949 	},
3950 	{
3951 		.name     = "_tag",
3952 		.fn       = lrop_make_tag,
3953 		.arity    = -1,
3954 		.help_str = "name:str kind:name matchloc _TAG tag%"
3955 		"name:str kind:name _TAG tag",
3956 	},
3957 	{
3958 		.name     = "_reftag",
3959 		.fn       = lrop_make_reftag,
3960 		.arity    = -1,
3961 		.help_str = "name:str kind:name role:name matchloc _REFTAG tag%"
3962 		"name:str kind:name role:name _REFTAG tag%",
3963 	},
3964 	{
3965 		.name     = "_commit",
3966 		.fn       = lrop_commit_tag,
3967 		.arity    = 1,
3968 		.help_str = "tag _COMMIT int",
3969 	},
3970 	{
3971 		.name     = "_scopeset",
3972 		.fn       = lrop_set_scope,
3973 		.arity    = 1,
3974 		.help_str = "int _SCOPESET -",
3975 	},
3976 	{
3977 		.name     = "_scopepop",
3978 		.fn       = lrop_pop_scope,
3979 		.arity    = 0,
3980 		.help_str = "- _SCOPEPOP -",
3981 	},
3982 	{
3983 		.name     = "_scopeclear",
3984 		.fn       = lrop_clear_scope,
3985 		.arity    = 0,
3986 		.help_str = "- _SCOPECLEAR -",
3987 	},
3988 	{
3989 		.name     = "_scopetop",
3990 		.fn       = lrop_ref0_scope,
3991 		.arity    = 0,
3992 		.help_str = "- _SCOPETOP int true%"
3993 		"- _SCOPETOP false",
3994 	},
3995 	{
3996 		.name     = "_scopeNth",
3997 		.fn       = lrop_refN_scope,
3998 		.arity    = 1,
3999 		.help_str = "index:int _SCOPENTH int",
4000 	},
4001 	{
4002 		.name     = "_scopedepth",
4003 		.fn       = lrop_get_scope_depth,
4004 		.arity    = 0,
4005 		.help_str = "- _SCOPEDEPTH int",
4006 	},
4007 	{
4008 		.name     = "_repl",
4009 		.fn       = lrop_repl,
4010 		.arity    = 0,
4011 		.help_str = "- _repl -",
4012 	},
4013 	{
4014 		.name     = "_tenter",
4015 		.fn       = lrop_tenter,
4016 		.arity    = 1,
4017 		.help_str = "table:name _TENTER -",
4018 	},
4019 	{
4020 		.name     = "_tentercont",
4021 		.fn       = lrop_tenter_with_continuation,
4022 		.arity    = 2,
4023 		.help_str = "table:name cont:name _TENTERCONT -",
4024 	},
4025 	{
4026 		.name     = "_tleave",
4027 		.fn       = lrop_tleave,
4028 		.arity    = 0,
4029 		.help_str = "- _TLEAVE -",
4030 	},
4031 	{
4032 		.name     = "_tjump",
4033 		.fn       = lrop_tjump,
4034 		.arity    = 1,
4035 		.help_str = "table:name _TJUMP -",
4036 	},
4037 	{
4038 		.name     = "_treset",
4039 		.fn       = lrop_treset,
4040 		.arity    = 1,
4041 		.help_str = "table:name _TRESET -",
4042 	},
4043 	{
4044 		.name     = "_tquit",
4045 		.fn       = lrop_tquit,
4046 		.arity    = 0,
4047 		.help_str = "- _TQUIT -",
4048 	},
4049 	{
4050 		.name     = "_extraenabled",
4051 		.fn       = lrop_extraenabled,
4052 		.arity    = 1,
4053 		.help_str = "extra:name _extraenabled bool%"
4054 		"language.extra _extraenabled bool",
4055 	},
4056 	{
4057 		.name     = "_markextra",
4058 		.fn       = lrop_markextra,
4059 		.arity    = 2,
4060 		.help_str = "tag:int|tag:tag extra:name _MARKEXTRA -%"
4061 		"tag:int|tag:tag lang.extra:name _MARKEXTRA -",
4062 	},
4063 	{
4064 		.name     = "_advanceto",
4065 		.fn       = lrop_advanceto,
4066 		.arity    = 1,
4067 		.help_str = "matchloc _ADVANCETO -%"
4068 	},
4069 	{
4070 		.name     = "_traced",
4071 		.fn       = lrop_traced,
4072 		.arity    = 0,
4073 		.help_str = "- _TRACED true|false",
4074 	},
4075 	{
4076 		.name     = "_markplaceholder",
4077 		.fn       = lrop_markplaceholder,
4078 		.arity    = 1,
4079 		.help_str = "tag:int _MARKPLACEHOLDER -",
4080 	}
4081 };
4082 
initRegexOptscript(void)4083 extern void initRegexOptscript (void)
4084 {
4085 	if (!regexAvailable)
4086 		return;
4087 
4088 	if (optvm)
4089 		return;
4090 
4091 	optvm = optscriptInit ();
4092 	lregex_dict = opt_dict_new (17);
4093 
4094 	OPTSCRIPT_ERR_UNKNOWNTABLE = es_error_intern ("unknowntable");
4095 	OPTSCRIPT_ERR_NOTMTABLEPTRN = es_error_intern ("notmtableptrn");
4096 	OPTSCRIPT_ERR_UNKNOWNEXTRA = es_error_intern ("unknownextra");
4097 	OPTSCRIPT_ERR_UNKNOWNLANGUAGE = es_error_intern ("unknownlanguage");
4098 	OPTSCRIPT_ERR_UNKNOWNKIND = es_error_intern ("unknownkind");
4099 	OPTSCRIPT_ERR_UNKNOWNROLE = es_error_intern ("unknownrole");
4100 
4101 	optscriptInstallProcs (lregex_dict, lrop_get_match_string_named_group);
4102 
4103 	optscriptRegisterOperators (lregex_dict,
4104 								lropOperators, ARRAY_SIZE(lropOperators));
4105 
4106 	extern const char ctagsCommonPrelude[];
4107 	opt_vm_dstack_push (optvm, lregex_dict);
4108 	MIO *mio = mio_new_memory ((unsigned char*)ctagsCommonPrelude, strlen (ctagsCommonPrelude), NULL, NULL);
4109 	EsObject *e = optscriptLoad (optvm, mio);
4110 	if (es_error_p (e))
4111 		error (FATAL, "failed in loading built-in procedures");
4112 	mio_unref (mio);
4113 	opt_vm_dstack_pop (optvm);
4114 }
4115 
listRegexOpscriptOperators(FILE * fp)4116 extern void	listRegexOpscriptOperators (FILE *fp)
4117 {
4118 	EsObject *procdocs;
4119 	if (!opt_dict_known_and_get_cstr (lregex_dict,
4120 									  "__procdocs",
4121 									  &procdocs))
4122 		procdocs = NULL;
4123 
4124 	opt_vm_dstack_push (optvm, lregex_dict);
4125 	optscriptHelp (optvm, fp, procdocs);
4126 	opt_vm_dstack_pop (optvm);
4127 }
4128