1 /*
2 * Copyright (c) 2021, Red Hat, Inc.
3 * Copyright (c) 2021, Masatake YAMATO
4 *
5 * This source code is released for free distribution under the terms of the
6 * GNU General Public License version 2 or (at your option) any later version.
7 *
8 * This module contains functions for applying regular expression matching.
9 *
10 * The code for utilizing the Gnu regex package with regards to processing the
11 * regex option and checking for regex matches was adapted from routines in
12 * Gnu etags.
13 */
14
15 /*
16 * INCLUDE FILES
17 */
18 #include "general.h" /* must always come first */
19
20 #ifdef HAVE_PCRE2
21 #define PCRE2_CODE_UNIT_WIDTH 8
22 #include <pcre2.h>
23 #endif
24
25 #include "lregex_p.h"
26 #include "trashbox.h"
27
28 #include <string.h>
29
30 /*
31 * FUNCTION DECLARATIONS
32 */
33 static int match (struct regexBackend *backend,
34 void *code, const char *input, size_t size,
35 regmatch_t pmatch[BACK_REFERENCE_COUNT]);
36 static regexCompiledCode compile (struct regexBackend *backend,
37 const char *const regexp,
38 int flags);
39 static void delete_code (void *code);
40 static void set_icase_flag (int *flags);
41
42 /*
43 * DATA DEFINITIONS
44 */
45 static struct regexBackend pcre2RegexBackend = {
46 .fdefs = NULL,
47 .fdef_count = 0,
48 .set_icase_flag = set_icase_flag,
49 .compile = compile,
50 .match = match,
51 .delete_code = delete_code,
52 };
53
54 /*
55 * FUNCTOIN DEFINITIONS
56 */
pcre2_regex_flag_short(char c,void * data)57 extern void pcre2_regex_flag_short (char c, void* data)
58 {
59 struct flagDefsDescriptor *desc = data;
60
61 if (desc->backend)
62 error (FATAL, "regex backed is specified twice: %c", c);
63
64 desc->backend = &pcre2RegexBackend;
65 desc->flags = (desc->regptype == REG_PARSER_MULTI_TABLE)? PCRE2_DOTALL: PCRE2_MULTILINE;
66 }
67
pcre2_regex_flag_long(const char * const s,const char * const unused CTAGS_ATTR_UNUSED,void * data)68 extern void pcre2_regex_flag_long (const char* const s, const char* const unused CTAGS_ATTR_UNUSED, void* data)
69 {
70 struct flagDefsDescriptor *desc = data;
71
72 if (desc->backend)
73 error (FATAL, "regex backed is specified twice: %s", s);
74
75 pcre2_regex_flag_short ('p', data);
76 }
77
delete_code(void * code)78 static void delete_code (void *code)
79 {
80 pcre2_code_free (code);
81 }
82
compile(struct regexBackend * backend,const char * const regexp,int flags)83 static regexCompiledCode compile (struct regexBackend *backend,
84 const char *const regexp,
85 int flags)
86 {
87 int errornumber;
88 PCRE2_SIZE erroroffset;
89 pcre2_code *regex_code = pcre2_compile((PCRE2_SPTR)regexp,
90 PCRE2_ZERO_TERMINATED,
91 (uint32_t) flags,
92 &errornumber,
93 &erroroffset,
94 NULL);
95 if (regex_code == NULL)
96 {
97 PCRE2_UCHAR buffer[256];
98 pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
99 error (WARNING, "PCRE2 compilation failed at offset %d: %s", (int)erroroffset,
100 buffer);
101 return (regexCompiledCode) { .backend = NULL, .code = NULL };
102 }
103 return (regexCompiledCode) { .backend = &pcre2RegexBackend, .code = regex_code };
104 }
105
match(struct regexBackend * backend,void * code,const char * input,size_t size,regmatch_t pmatch[BACK_REFERENCE_COUNT])106 static int match (struct regexBackend *backend,
107 void *code, const char *input, size_t size,
108 regmatch_t pmatch[BACK_REFERENCE_COUNT])
109 {
110 static pcre2_match_data *match_data;
111 if (match_data == NULL)
112 {
113 match_data = pcre2_match_data_create (BACK_REFERENCE_COUNT, NULL);
114 DEFAULT_TRASH_BOX (match_data, pcre2_match_data_free);
115 }
116
117 int rc = pcre2_match (code, (PCRE2_SPTR)input, size,
118 0, 0, match_data, NULL);
119 if (rc > 0)
120 {
121 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data);
122 if (ovector[0] <= ovector[1])
123 {
124 memset (pmatch, 0, sizeof(pmatch[0]) * BACK_REFERENCE_COUNT);
125 for (int i = 0; i < BACK_REFERENCE_COUNT; i++)
126 {
127 pmatch [i].rm_so = (i < rc)? ovector [2*i] : -1;
128 pmatch [i].rm_eo = (i < rc)? ovector [2*i+1]: -1;
129
130 }
131 return 0;
132 }
133 }
134 return 1;
135 }
136
set_icase_flag(int * flags)137 static void set_icase_flag (int *flags)
138 {
139 *flags |= PCRE2_CASELESS;
140 }
141