xref: /Universal-ctags/main/lregex-pcre2.c (revision 6a8d5b709af994ff278390eed2a62d718f05a409)
1*6a8d5b70SMasatake YAMATO /*
2*6a8d5b70SMasatake YAMATO *  Copyright (c) 2021, Red Hat, Inc.
3*6a8d5b70SMasatake YAMATO *  Copyright (c) 2021, Masatake YAMATO
4*6a8d5b70SMasatake YAMATO *
5*6a8d5b70SMasatake YAMATO *   This source code is released for free distribution under the terms of the
6*6a8d5b70SMasatake YAMATO *   GNU General Public License version 2 or (at your option) any later version.
7*6a8d5b70SMasatake YAMATO *
8*6a8d5b70SMasatake YAMATO *   This module contains functions for applying regular expression matching.
9*6a8d5b70SMasatake YAMATO *
10*6a8d5b70SMasatake YAMATO *   The code for utilizing the Gnu regex package with regards to processing the
11*6a8d5b70SMasatake YAMATO *   regex option and checking for regex matches was adapted from routines in
12*6a8d5b70SMasatake YAMATO *   Gnu etags.
13*6a8d5b70SMasatake YAMATO */
14*6a8d5b70SMasatake YAMATO 
15*6a8d5b70SMasatake YAMATO /*
16*6a8d5b70SMasatake YAMATO *   INCLUDE FILES
17*6a8d5b70SMasatake YAMATO */
18*6a8d5b70SMasatake YAMATO #include "general.h"  /* must always come first */
19*6a8d5b70SMasatake YAMATO 
20*6a8d5b70SMasatake YAMATO #ifdef HAVE_PCRE2
21*6a8d5b70SMasatake YAMATO #define PCRE2_CODE_UNIT_WIDTH 8
22*6a8d5b70SMasatake YAMATO #include <pcre2.h>
23*6a8d5b70SMasatake YAMATO #endif
24*6a8d5b70SMasatake YAMATO 
25*6a8d5b70SMasatake YAMATO #include "lregex_p.h"
26*6a8d5b70SMasatake YAMATO #include "trashbox.h"
27*6a8d5b70SMasatake YAMATO 
28*6a8d5b70SMasatake YAMATO #include <string.h>
29*6a8d5b70SMasatake YAMATO 
30*6a8d5b70SMasatake YAMATO /*
31*6a8d5b70SMasatake YAMATO *    FUNCTION DECLARATIONS
32*6a8d5b70SMasatake YAMATO */
33*6a8d5b70SMasatake YAMATO static int match (struct regexBackend *backend,
34*6a8d5b70SMasatake YAMATO 				  void *code, const char *input, size_t size,
35*6a8d5b70SMasatake YAMATO 				  regmatch_t pmatch[BACK_REFERENCE_COUNT]);
36*6a8d5b70SMasatake YAMATO static regexCompiledCode compile (struct regexBackend *backend,
37*6a8d5b70SMasatake YAMATO 								  const char *const regexp,
38*6a8d5b70SMasatake YAMATO 								  int flags);
39*6a8d5b70SMasatake YAMATO static void delete_code (void *code);
40*6a8d5b70SMasatake YAMATO static void set_icase_flag (int *flags);
41*6a8d5b70SMasatake YAMATO 
42*6a8d5b70SMasatake YAMATO /*
43*6a8d5b70SMasatake YAMATO *    DATA DEFINITIONS
44*6a8d5b70SMasatake YAMATO */
45*6a8d5b70SMasatake YAMATO static struct regexBackend pcre2RegexBackend = {
46*6a8d5b70SMasatake YAMATO 	.fdefs = NULL,
47*6a8d5b70SMasatake YAMATO 	.fdef_count = 0,
48*6a8d5b70SMasatake YAMATO 	.set_icase_flag = set_icase_flag,
49*6a8d5b70SMasatake YAMATO 	.compile = compile,
50*6a8d5b70SMasatake YAMATO 	.match = match,
51*6a8d5b70SMasatake YAMATO 	.delete_code = delete_code,
52*6a8d5b70SMasatake YAMATO };
53*6a8d5b70SMasatake YAMATO 
54*6a8d5b70SMasatake YAMATO /*
55*6a8d5b70SMasatake YAMATO *    FUNCTOIN DEFINITIONS
56*6a8d5b70SMasatake YAMATO */
pcre2_regex_flag_short(char c,void * data)57*6a8d5b70SMasatake YAMATO extern void pcre2_regex_flag_short (char c, void* data)
58*6a8d5b70SMasatake YAMATO {
59*6a8d5b70SMasatake YAMATO 	struct flagDefsDescriptor *desc = data;
60*6a8d5b70SMasatake YAMATO 
61*6a8d5b70SMasatake YAMATO 	if (desc->backend)
62*6a8d5b70SMasatake YAMATO 		error (FATAL, "regex backed is specified twice: %c", c);
63*6a8d5b70SMasatake YAMATO 
64*6a8d5b70SMasatake YAMATO 	desc->backend = &pcre2RegexBackend;
65*6a8d5b70SMasatake YAMATO 	desc->flags   = (desc->regptype == REG_PARSER_MULTI_TABLE)? PCRE2_DOTALL: PCRE2_MULTILINE;
66*6a8d5b70SMasatake YAMATO }
67*6a8d5b70SMasatake YAMATO 
pcre2_regex_flag_long(const char * const s,const char * const unused CTAGS_ATTR_UNUSED,void * data)68*6a8d5b70SMasatake YAMATO extern void pcre2_regex_flag_long (const char* const s, const char* const unused CTAGS_ATTR_UNUSED, void* data)
69*6a8d5b70SMasatake YAMATO {
70*6a8d5b70SMasatake YAMATO 	struct flagDefsDescriptor *desc = data;
71*6a8d5b70SMasatake YAMATO 
72*6a8d5b70SMasatake YAMATO 	if (desc->backend)
73*6a8d5b70SMasatake YAMATO 		error (FATAL, "regex backed is specified twice: %s", s);
74*6a8d5b70SMasatake YAMATO 
75*6a8d5b70SMasatake YAMATO 	pcre2_regex_flag_short ('p', data);
76*6a8d5b70SMasatake YAMATO }
77*6a8d5b70SMasatake YAMATO 
delete_code(void * code)78*6a8d5b70SMasatake YAMATO static void delete_code (void *code)
79*6a8d5b70SMasatake YAMATO {
80*6a8d5b70SMasatake YAMATO 	pcre2_code_free (code);
81*6a8d5b70SMasatake YAMATO }
82*6a8d5b70SMasatake YAMATO 
compile(struct regexBackend * backend,const char * const regexp,int flags)83*6a8d5b70SMasatake YAMATO static regexCompiledCode compile (struct regexBackend *backend,
84*6a8d5b70SMasatake YAMATO 								  const char *const regexp,
85*6a8d5b70SMasatake YAMATO 								  int flags)
86*6a8d5b70SMasatake YAMATO {
87*6a8d5b70SMasatake YAMATO 	int errornumber;
88*6a8d5b70SMasatake YAMATO 	PCRE2_SIZE erroroffset;
89*6a8d5b70SMasatake YAMATO 	pcre2_code *regex_code = pcre2_compile((PCRE2_SPTR)regexp,
90*6a8d5b70SMasatake YAMATO 										   PCRE2_ZERO_TERMINATED,
91*6a8d5b70SMasatake YAMATO 										   (uint32_t) flags,
92*6a8d5b70SMasatake YAMATO 										   &errornumber,
93*6a8d5b70SMasatake YAMATO 										   &erroroffset,
94*6a8d5b70SMasatake YAMATO 										   NULL);
95*6a8d5b70SMasatake YAMATO 	if (regex_code == NULL)
96*6a8d5b70SMasatake YAMATO 	{
97*6a8d5b70SMasatake YAMATO 		PCRE2_UCHAR buffer[256];
98*6a8d5b70SMasatake YAMATO 		pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
99*6a8d5b70SMasatake YAMATO 		error (WARNING, "PCRE2 compilation failed at offset %d: %s", (int)erroroffset,
100*6a8d5b70SMasatake YAMATO 			   buffer);
101*6a8d5b70SMasatake YAMATO 		return (regexCompiledCode) { .backend = NULL, .code = NULL };
102*6a8d5b70SMasatake YAMATO 	}
103*6a8d5b70SMasatake YAMATO 	return (regexCompiledCode) { .backend = &pcre2RegexBackend, .code = regex_code };
104*6a8d5b70SMasatake YAMATO }
105*6a8d5b70SMasatake YAMATO 
match(struct regexBackend * backend,void * code,const char * input,size_t size,regmatch_t pmatch[BACK_REFERENCE_COUNT])106*6a8d5b70SMasatake YAMATO static int match (struct regexBackend *backend,
107*6a8d5b70SMasatake YAMATO 				  void *code, const char *input, size_t size,
108*6a8d5b70SMasatake YAMATO 				  regmatch_t pmatch[BACK_REFERENCE_COUNT])
109*6a8d5b70SMasatake YAMATO {
110*6a8d5b70SMasatake YAMATO 	static pcre2_match_data *match_data;
111*6a8d5b70SMasatake YAMATO 	if (match_data == NULL)
112*6a8d5b70SMasatake YAMATO 	{
113*6a8d5b70SMasatake YAMATO 		match_data = pcre2_match_data_create (BACK_REFERENCE_COUNT, NULL);
114*6a8d5b70SMasatake YAMATO 		DEFAULT_TRASH_BOX (match_data, pcre2_match_data_free);
115*6a8d5b70SMasatake YAMATO 	}
116*6a8d5b70SMasatake YAMATO 
117*6a8d5b70SMasatake YAMATO 	int rc = pcre2_match (code, (PCRE2_SPTR)input, size,
118*6a8d5b70SMasatake YAMATO 						  0, 0, match_data, NULL);
119*6a8d5b70SMasatake YAMATO 	if (rc > 0)
120*6a8d5b70SMasatake YAMATO 	{
121*6a8d5b70SMasatake YAMATO 		PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data);
122*6a8d5b70SMasatake YAMATO 		if (ovector[0] <= ovector[1])
123*6a8d5b70SMasatake YAMATO 		{
124*6a8d5b70SMasatake YAMATO 			memset (pmatch, 0, sizeof(pmatch[0]) * BACK_REFERENCE_COUNT);
125*6a8d5b70SMasatake YAMATO 			for (int i = 0; i < BACK_REFERENCE_COUNT; i++)
126*6a8d5b70SMasatake YAMATO 			{
127*6a8d5b70SMasatake YAMATO 				pmatch [i].rm_so = (i < rc)? ovector [2*i]  : -1;
128*6a8d5b70SMasatake YAMATO 				pmatch [i].rm_eo = (i < rc)? ovector [2*i+1]: -1;
129*6a8d5b70SMasatake YAMATO 
130*6a8d5b70SMasatake YAMATO 			}
131*6a8d5b70SMasatake YAMATO 			return 0;
132*6a8d5b70SMasatake YAMATO 		}
133*6a8d5b70SMasatake YAMATO 	}
134*6a8d5b70SMasatake YAMATO 	return 1;
135*6a8d5b70SMasatake YAMATO }
136*6a8d5b70SMasatake YAMATO 
set_icase_flag(int * flags)137*6a8d5b70SMasatake YAMATO static void set_icase_flag (int *flags)
138*6a8d5b70SMasatake YAMATO {
139*6a8d5b70SMasatake YAMATO 	*flags |= PCRE2_CASELESS;
140*6a8d5b70SMasatake YAMATO }
141