xref: /Universal-ctags/main/lregex-pcre2.c (revision 6a8d5b709af994ff278390eed2a62d718f05a409)
1 /*
2 *  Copyright (c) 2021, Red Hat, Inc.
3 *  Copyright (c) 2021, Masatake YAMATO
4 *
5 *   This source code is released for free distribution under the terms of the
6 *   GNU General Public License version 2 or (at your option) any later version.
7 *
8 *   This module contains functions for applying regular expression matching.
9 *
10 *   The code for utilizing the Gnu regex package with regards to processing the
11 *   regex option and checking for regex matches was adapted from routines in
12 *   Gnu etags.
13 */
14 
15 /*
16 *   INCLUDE FILES
17 */
18 #include "general.h"  /* must always come first */
19 
20 #ifdef HAVE_PCRE2
21 #define PCRE2_CODE_UNIT_WIDTH 8
22 #include <pcre2.h>
23 #endif
24 
25 #include "lregex_p.h"
26 #include "trashbox.h"
27 
28 #include <string.h>
29 
30 /*
31 *    FUNCTION DECLARATIONS
32 */
33 static int match (struct regexBackend *backend,
34 				  void *code, const char *input, size_t size,
35 				  regmatch_t pmatch[BACK_REFERENCE_COUNT]);
36 static regexCompiledCode compile (struct regexBackend *backend,
37 								  const char *const regexp,
38 								  int flags);
39 static void delete_code (void *code);
40 static void set_icase_flag (int *flags);
41 
42 /*
43 *    DATA DEFINITIONS
44 */
45 static struct regexBackend pcre2RegexBackend = {
46 	.fdefs = NULL,
47 	.fdef_count = 0,
48 	.set_icase_flag = set_icase_flag,
49 	.compile = compile,
50 	.match = match,
51 	.delete_code = delete_code,
52 };
53 
54 /*
55 *    FUNCTOIN DEFINITIONS
56 */
pcre2_regex_flag_short(char c,void * data)57 extern void pcre2_regex_flag_short (char c, void* data)
58 {
59 	struct flagDefsDescriptor *desc = data;
60 
61 	if (desc->backend)
62 		error (FATAL, "regex backed is specified twice: %c", c);
63 
64 	desc->backend = &pcre2RegexBackend;
65 	desc->flags   = (desc->regptype == REG_PARSER_MULTI_TABLE)? PCRE2_DOTALL: PCRE2_MULTILINE;
66 }
67 
pcre2_regex_flag_long(const char * const s,const char * const unused CTAGS_ATTR_UNUSED,void * data)68 extern void pcre2_regex_flag_long (const char* const s, const char* const unused CTAGS_ATTR_UNUSED, void* data)
69 {
70 	struct flagDefsDescriptor *desc = data;
71 
72 	if (desc->backend)
73 		error (FATAL, "regex backed is specified twice: %s", s);
74 
75 	pcre2_regex_flag_short ('p', data);
76 }
77 
delete_code(void * code)78 static void delete_code (void *code)
79 {
80 	pcre2_code_free (code);
81 }
82 
compile(struct regexBackend * backend,const char * const regexp,int flags)83 static regexCompiledCode compile (struct regexBackend *backend,
84 								  const char *const regexp,
85 								  int flags)
86 {
87 	int errornumber;
88 	PCRE2_SIZE erroroffset;
89 	pcre2_code *regex_code = pcre2_compile((PCRE2_SPTR)regexp,
90 										   PCRE2_ZERO_TERMINATED,
91 										   (uint32_t) flags,
92 										   &errornumber,
93 										   &erroroffset,
94 										   NULL);
95 	if (regex_code == NULL)
96 	{
97 		PCRE2_UCHAR buffer[256];
98 		pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
99 		error (WARNING, "PCRE2 compilation failed at offset %d: %s", (int)erroroffset,
100 			   buffer);
101 		return (regexCompiledCode) { .backend = NULL, .code = NULL };
102 	}
103 	return (regexCompiledCode) { .backend = &pcre2RegexBackend, .code = regex_code };
104 }
105 
match(struct regexBackend * backend,void * code,const char * input,size_t size,regmatch_t pmatch[BACK_REFERENCE_COUNT])106 static int match (struct regexBackend *backend,
107 				  void *code, const char *input, size_t size,
108 				  regmatch_t pmatch[BACK_REFERENCE_COUNT])
109 {
110 	static pcre2_match_data *match_data;
111 	if (match_data == NULL)
112 	{
113 		match_data = pcre2_match_data_create (BACK_REFERENCE_COUNT, NULL);
114 		DEFAULT_TRASH_BOX (match_data, pcre2_match_data_free);
115 	}
116 
117 	int rc = pcre2_match (code, (PCRE2_SPTR)input, size,
118 						  0, 0, match_data, NULL);
119 	if (rc > 0)
120 	{
121 		PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data);
122 		if (ovector[0] <= ovector[1])
123 		{
124 			memset (pmatch, 0, sizeof(pmatch[0]) * BACK_REFERENCE_COUNT);
125 			for (int i = 0; i < BACK_REFERENCE_COUNT; i++)
126 			{
127 				pmatch [i].rm_so = (i < rc)? ovector [2*i]  : -1;
128 				pmatch [i].rm_eo = (i < rc)? ovector [2*i+1]: -1;
129 
130 			}
131 			return 0;
132 		}
133 	}
134 	return 1;
135 }
136 
set_icase_flag(int * flags)137 static void set_icase_flag (int *flags)
138 {
139 	*flags |= PCRE2_CASELESS;
140 }
141