1 /*
2 * PackCC: a packrat parser generator for C.
3 *
4 * Copyright (c) 2014, 2019-2022 Arihiro Yoshida. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /*
26 * The algorithm is based on the paper "Packrat Parsers Can Support Left Recursion"
27 * authored by A. Warth, J. R. Douglass, and T. Millstein.
28 *
29 * The specification is determined by referring to peg/leg developed by Ian Piumarta.
30 */
31
32 #ifdef _MSC_VER
33 #define _CRT_SECURE_NO_WARNINGS
34 #ifdef _DEBUG
35 #define _CRTDBG_MAP_ALLOC
36 #include <crtdbg.h>
37 #endif
38 #endif
39
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <stdarg.h>
43 #include <string.h>
44 #include <limits.h>
45 #include <assert.h>
46
47 #ifndef _MSC_VER
48 #if defined __GNUC__ && defined _WIN32 /* MinGW */
49 #ifndef PCC_USE_SYSTEM_STRNLEN
50 #define strnlen(str, maxlen) strnlen_(str, maxlen)
strnlen_(const char * str,size_t maxlen)51 static size_t strnlen_(const char *str, size_t maxlen) {
52 size_t i;
53 for (i = 0; i < maxlen && str[i]; i++);
54 return i;
55 }
56 #endif /* !PCC_USE_SYSTEM_STRNLEN */
57 #endif /* defined __GNUC__ && defined _WIN32 */
58 #endif /* !_MSC_VER */
59
60 #ifdef _MSC_VER
61 #define snprintf _snprintf
62 #define vsnprintf _vsnprintf
63 #define unlink _unlink
64 #else
65 #include <unistd.h> /* for unlink() */
66 #endif
67
68 #if !defined __has_attribute || defined _MSC_VER
69 #define __attribute__(x)
70 #endif
71
72 #undef TRUE /* to avoid macro definition conflicts with the system header file of IBM AIX */
73 #undef FALSE
74
75 #define VERSION "1.8.0"
76
77 #ifndef BUFFER_MIN_SIZE
78 #define BUFFER_MIN_SIZE 256
79 #endif
80 #ifndef ARRAY_MIN_SIZE
81 #define ARRAY_MIN_SIZE 2
82 #endif
83
84 #define VOID_VALUE (~(size_t)0)
85
86 #ifdef _WIN64 /* 64-bit Windows including MSVC and MinGW-w64 */
87 #define FMT_LU "%llu"
88 typedef unsigned long long ulong_t;
89 /* NOTE: "%llu" and "long long" are not C89-compliant, but they are required to deal with a 64-bit integer value in 64-bit Windows. */
90 #else
91 #define FMT_LU "%lu"
92 typedef unsigned long ulong_t;
93 #endif
94 /* FMT_LU and ulong_t are used to print size_t values safely (ex. printf(FMT_LU "\n", (ulong_t)value);) */
95 /* NOTE: Neither "%z" nor <inttypes.h> is used since PackCC complies with the C89 standard as much as possible. */
96
97 typedef enum bool_tag {
98 FALSE = 0,
99 TRUE
100 } bool_t;
101
102 typedef struct stream_tag {
103 FILE *file; /* the stream; just a reference */
104 const char *name; /* the file name */
105 size_t line; /* the current line number (0-based); line counting is disabled if VOID_VALUE */
106 } stream_t;
107
108 typedef struct char_array_tag {
109 char *buf;
110 size_t max;
111 size_t len;
112 } char_array_t;
113
114 typedef struct code_block_tag {
115 char *text;
116 size_t len;
117 size_t line;
118 size_t col;
119 } code_block_t;
120
121 typedef struct code_block_array_tag {
122 code_block_t *buf;
123 size_t max;
124 size_t len;
125 } code_block_array_t;
126
127 typedef enum node_type_tag {
128 NODE_RULE = 0,
129 NODE_REFERENCE,
130 NODE_STRING,
131 NODE_CHARCLASS,
132 NODE_QUANTITY,
133 NODE_PREDICATE,
134 NODE_SEQUENCE,
135 NODE_ALTERNATE,
136 NODE_CAPTURE,
137 NODE_EXPAND,
138 NODE_ACTION,
139 NODE_ERROR
140 } node_type_t;
141
142 typedef struct node_tag node_t;
143
144 typedef struct node_array_tag {
145 node_t **buf;
146 size_t max;
147 size_t len;
148 } node_array_t;
149
150 typedef struct node_const_array_tag {
151 const node_t **buf;
152 size_t max;
153 size_t len;
154 } node_const_array_t;
155
156 typedef struct node_hash_table_tag {
157 const node_t **buf;
158 size_t max;
159 size_t mod;
160 } node_hash_table_t;
161
162 typedef struct node_rule_tag {
163 char *name;
164 node_t *expr;
165 int ref; /* mutable */
166 node_const_array_t vars;
167 node_const_array_t capts;
168 node_const_array_t codes;
169 size_t line;
170 size_t col;
171 } node_rule_t;
172
173 typedef struct node_reference_tag {
174 char *var; /* NULL if no variable name */
175 size_t index;
176 char *name;
177 const node_t *rule;
178 size_t line;
179 size_t col;
180 } node_reference_t;
181
182 typedef struct node_string_tag {
183 char *value;
184 } node_string_t;
185
186 typedef struct node_charclass_tag {
187 char *value; /* NULL means any character */
188 } node_charclass_t;
189
190 typedef struct node_quantity_tag {
191 int min;
192 int max;
193 node_t *expr;
194 } node_quantity_t;
195
196 typedef struct node_predicate_tag {
197 bool_t neg;
198 node_t *expr;
199 } node_predicate_t;
200
201 typedef struct node_sequence_tag {
202 node_array_t nodes;
203 } node_sequence_t;
204
205 typedef struct node_alternate_tag {
206 node_array_t nodes;
207 } node_alternate_t;
208
209 typedef struct node_capture_tag {
210 node_t *expr;
211 size_t index;
212 } node_capture_t;
213
214 typedef struct node_expand_tag {
215 size_t index;
216 size_t line;
217 size_t col;
218 } node_expand_t;
219
220 typedef struct node_action_tag {
221 code_block_t code;
222 size_t index;
223 node_const_array_t vars;
224 node_const_array_t capts;
225 } node_action_t;
226
227 typedef struct node_error_tag {
228 node_t *expr;
229 code_block_t code;
230 size_t index;
231 node_const_array_t vars;
232 node_const_array_t capts;
233 } node_error_t;
234
235 typedef union node_data_tag {
236 node_rule_t rule;
237 node_reference_t reference;
238 node_string_t string;
239 node_charclass_t charclass;
240 node_quantity_t quantity;
241 node_predicate_t predicate;
242 node_sequence_t sequence;
243 node_alternate_t alternate;
244 node_capture_t capture;
245 node_expand_t expand;
246 node_action_t action;
247 node_error_t error;
248 } node_data_t;
249
250 struct node_tag {
251 node_type_t type;
252 node_data_t data;
253 };
254
255 typedef struct options_tag {
256 bool_t ascii; /* UTF-8 support is disabled if true */
257 bool_t lines; /* #line directives are output if true */
258 bool_t debug; /* debug information is output if true */
259 } options_t;
260
261 typedef enum code_flag_tag {
262 CODE_FLAG__NONE = 0,
263 CODE_FLAG__UTF8_CHARCLASS_USED = 1
264 } code_flag_t;
265
266 typedef struct context_tag {
267 char *iname; /* the path name of the PEG file being parsed */
268 char *sname; /* the path name of the C source file being generated */
269 char *hname; /* the path name of the C header file being generated */
270 FILE *ifile; /* the input stream of the PEG file */
271 char *hid; /* the macro name for the include guard of the C header file */
272 char *vtype; /* the type name of the data output by the parsing API function (NULL means the default) */
273 char *atype; /* the type name of the user-defined data passed to the parser creation API function (NULL means the default) */
274 char *prefix; /* the prefix of the API function names (NULL means the default) */
275 options_t opts; /* the options */
276 code_flag_t flags; /* the bitwise flags to control code generation; updated during PEG parsing */
277 size_t errnum; /* the current number of PEG parsing errors */
278 size_t linenum; /* the current line number (0-based) */
279 size_t charnum; /* the number of characters in the current line that are already flushed (0-based, UTF-8 support if not disabled) */
280 size_t linepos; /* the beginning position in the PEG file of the current line */
281 size_t bufpos; /* the position in the PEG file of the first character currently buffered */
282 size_t bufcur; /* the current parsing position in the character buffer */
283 char_array_t buffer; /* the character buffer */
284 node_array_t rules; /* the PEG rules */
285 node_hash_table_t rulehash; /* the hash table to accelerate access of desired PEG rules */
286 code_block_array_t esource; /* the code blocks from %earlysource and %earlycommon directives to be added into the generated source file */
287 code_block_array_t eheader; /* the code blocks from %earlyheader and %earlycommon directives to be added into the generated header file */
288 code_block_array_t source; /* the code blocks from %source and %common directives to be added into the generated source file */
289 code_block_array_t header; /* the code blocks from %header and %common directives to be added into the generated header file */
290 } context_t;
291
292 typedef struct generate_tag {
293 stream_t *stream;
294 const node_t *rule;
295 int label;
296 bool_t ascii;
297 } generate_t;
298
299 typedef enum string_flag_tag {
300 STRING_FLAG__NONE = 0,
301 STRING_FLAG__NOTEMPTY = 1,
302 STRING_FLAG__NOTVOID = 2,
303 STRING_FLAG__IDENTIFIER = 4
304 } string_flag_t;
305
306 typedef enum code_reach_tag {
307 CODE_REACH__BOTH = 0,
308 CODE_REACH__ALWAYS_SUCCEED = 1,
309 CODE_REACH__ALWAYS_FAIL = -1
310 } code_reach_t;
311
312 static const char *g_cmdname = "packcc"; /* replaced later with actual one */
313
314 __attribute__((format(printf, 1, 2)))
print_error(const char * format,...)315 static int print_error(const char *format, ...) {
316 int n;
317 va_list a;
318 va_start(a, format);
319 n = fprintf(stderr, "%s: ", g_cmdname);
320 if (n >= 0) {
321 const int k = vfprintf(stderr, format, a);
322 if (k < 0) n = k; else n += k;
323 }
324 va_end(a);
325 return n;
326 }
327
fopen_rb_e(const char * path)328 static FILE *fopen_rb_e(const char *path) {
329 FILE *const f = fopen(path, "rb");
330 if (f == NULL) {
331 print_error("Cannot open file '%s' to read\n", path);
332 exit(2);
333 }
334 return f;
335 }
336
fopen_wt_e(const char * path)337 static FILE *fopen_wt_e(const char *path) {
338 FILE *const f = fopen(path, "wt");
339 if (f == NULL) {
340 print_error("Cannot open file '%s' to write\n", path);
341 exit(2);
342 }
343 return f;
344 }
345
fclose_e(FILE * stream)346 static int fclose_e(FILE *stream) {
347 const int r = fclose(stream);
348 if (r == EOF) {
349 print_error("File closing error\n");
350 exit(2);
351 }
352 return r;
353 }
354
fgetc_e(FILE * stream)355 static int fgetc_e(FILE *stream) {
356 const int c = fgetc(stream);
357 if (c == EOF && ferror(stream)) {
358 print_error("File read error\n");
359 exit(2);
360 }
361 return c;
362 }
363
malloc_e(size_t size)364 static void *malloc_e(size_t size) {
365 void *const p = malloc(size);
366 if (p == NULL) {
367 print_error("Out of memory\n");
368 exit(3);
369 }
370 return p;
371 }
372
realloc_e(void * ptr,size_t size)373 static void *realloc_e(void *ptr, size_t size) {
374 void *const p = realloc(ptr, size);
375 if (p == NULL) {
376 print_error("Out of memory\n");
377 exit(3);
378 }
379 return p;
380 }
381
strdup_e(const char * str)382 static char *strdup_e(const char *str) {
383 const size_t m = strlen(str);
384 char *const s = (char *)malloc_e(m + 1);
385 memcpy(s, str, m);
386 s[m] = '\0';
387 return s;
388 }
389
strndup_e(const char * str,size_t len)390 static char *strndup_e(const char *str, size_t len) {
391 const size_t m = strnlen(str, len);
392 char *const s = (char *)malloc_e(m + 1);
393 memcpy(s, str, m);
394 s[m] = '\0';
395 return s;
396 }
397
string_to_size_t(const char * str)398 static size_t string_to_size_t(const char *str) {
399 #define N (~(size_t)0 / 10)
400 #define M (~(size_t)0 - 10 * N)
401 size_t n = 0, i, k;
402 for (i = 0; str[i]; i++) {
403 const char c = str[i];
404 if (c < '0' || c > '9') return VOID_VALUE;
405 k = (size_t)(c - '0');
406 if (n >= N && k > M) return VOID_VALUE; /* overflow */
407 n = k + 10 * n;
408 }
409 return n;
410 #undef N
411 #undef M
412 }
413
find_first_trailing_space(const char * str,size_t start,size_t end,size_t * next)414 static size_t find_first_trailing_space(const char *str, size_t start, size_t end, size_t *next) {
415 size_t j = start, i;
416 for (i = start; i < end; i++) {
417 switch (str[i]) {
418 case ' ':
419 case '\v':
420 case '\f':
421 case '\t':
422 continue;
423 case '\n':
424 if (next) *next = i + 1;
425 return j;
426 case '\r':
427 if (i + 1 < end && str[i + 1] == '\n') i++;
428 if (next) *next = i + 1;
429 return j;
430 default:
431 j = i + 1;
432 }
433 }
434 if (next) *next = end;
435 return j;
436 }
437
count_indent_spaces(const char * str,size_t start,size_t end,size_t * next)438 static size_t count_indent_spaces(const char *str, size_t start, size_t end, size_t *next) {
439 size_t n = 0, i;
440 for (i = start; i < end; i++) {
441 switch (str[i]) {
442 case ' ':
443 case '\v':
444 case '\f':
445 n++;
446 break;
447 case '\t':
448 n = (n + 8) & ~7;
449 break;
450 default:
451 if (next) *next = i;
452 return n;
453 }
454 }
455 if (next) *next = end;
456 return n;
457 }
458
is_filled_string(const char * str)459 static bool_t is_filled_string(const char *str) {
460 size_t i;
461 for (i = 0; str[i]; i++) {
462 if (
463 str[i] != ' ' &&
464 str[i] != '\v' &&
465 str[i] != '\f' &&
466 str[i] != '\t' &&
467 str[i] != '\n' &&
468 str[i] != '\r'
469 ) return TRUE;
470 }
471 return FALSE;
472 }
473
is_identifier_string(const char * str)474 static bool_t is_identifier_string(const char *str) {
475 size_t i;
476 if (!(
477 (str[0] >= 'a' && str[0] <= 'z') ||
478 (str[0] >= 'A' && str[0] <= 'Z') ||
479 str[0] == '_'
480 )) return FALSE;
481 for (i = 1; str[i]; i++) {
482 if (!(
483 (str[i] >= 'a' && str[i] <= 'z') ||
484 (str[i] >= 'A' && str[i] <= 'Z') ||
485 (str[i] >= '0' && str[i] <= '9') ||
486 str[i] == '_'
487 )) return FALSE;
488 }
489 return TRUE;
490 }
491
is_pointer_type(const char * str)492 static bool_t is_pointer_type(const char *str) {
493 const size_t n = strlen(str);
494 return (n > 0 && str[n - 1] == '*') ? TRUE : FALSE;
495 }
496
is_valid_utf8_string(const char * str)497 static bool_t is_valid_utf8_string(const char *str) {
498 int k = 0, n = 0, u = 0;
499 size_t i;
500 for (i = 0; str[i]; i++) {
501 const int c = (int)(unsigned char)str[i];
502 switch (k) {
503 case 0:
504 if (c >= 0x80) {
505 if ((c & 0xe0) == 0xc0) {
506 u = c & 0x1f;
507 n = k = 1;
508 }
509 else if ((c & 0xf0) == 0xe0) {
510 u = c & 0x0f;
511 n = k = 2;
512 }
513 else if ((c & 0xf8) == 0xf0) {
514 u = c & 0x07;
515 n = k = 3;
516 }
517 else {
518 return FALSE;
519 }
520 }
521 break;
522 case 1:
523 case 2:
524 case 3:
525 if ((c & 0xc0) == 0x80) {
526 u <<= 6;
527 u |= c & 0x3f;
528 k--;
529 if (k == 0) {
530 switch (n) {
531 case 1:
532 if (u < 0x80) return FALSE;
533 break;
534 case 2:
535 if (u < 0x800) return FALSE;
536 break;
537 case 3:
538 if (u < 0x10000 || u > 0x10ffff) return FALSE;
539 break;
540 default:
541 assert(((void)"unexpected control flow", 0));
542 return FALSE; /* never reached */
543 }
544 u = 0;
545 n = 0;
546 }
547 }
548 else {
549 return FALSE;
550 }
551 break;
552 default:
553 assert(((void)"unexpected control flow", 0));
554 return FALSE; /* never reached */
555 }
556 }
557 return (k == 0) ? TRUE : FALSE;
558 }
559
utf8_to_utf32(const char * seq,int * out)560 static size_t utf8_to_utf32(const char *seq, int *out) { /* without checking UTF-8 validity */
561 const int c = (int)(unsigned char)seq[0];
562 const size_t n =
563 (c == 0) ? 0 : (c < 0x80) ? 1 :
564 ((c & 0xe0) == 0xc0) ? 2 :
565 ((c & 0xf0) == 0xe0) ? 3 :
566 ((c & 0xf8) == 0xf0) ? 4 : 1;
567 int u = 0;
568 switch (n) {
569 case 0:
570 case 1:
571 u = c;
572 break;
573 case 2:
574 u = ((c & 0x1f) << 6) |
575 ((int)(unsigned char)seq[1] & 0x3f);
576 break;
577 case 3:
578 u = ((c & 0x0f) << 12) |
579 (((int)(unsigned char)seq[1] & 0x3f) << 6) |
580 (seq[1] ? ((int)(unsigned char)seq[2] & 0x3f) : 0);
581 break;
582 default:
583 u = ((c & 0x07) << 18) |
584 (((int)(unsigned char)seq[1] & 0x3f) << 12) |
585 (seq[1] ? (((int)(unsigned char)seq[2] & 0x3f) << 6) : 0) |
586 (seq[2] ? ((int)(unsigned char)seq[3] & 0x3f) : 0);
587 }
588 if (out) *out = u;
589 return n;
590 }
591
unescape_string(char * str,bool_t cls)592 static bool_t unescape_string(char *str, bool_t cls) { /* cls: TRUE if used for character class matching */
593 bool_t b = TRUE;
594 size_t i, j;
595 for (j = 0, i = 0; str[i]; i++) {
596 if (str[i] == '\\') {
597 i++;
598 switch (str[i]) {
599 case '\0': str[j++] = '\\'; str[j] = '\0'; return FALSE;
600 case '\'': str[j++] = '\''; break;
601 case '\"': str[j++] = '\"'; break;
602 case '0': str[j++] = '\x00'; break;
603 case 'a': str[j++] = '\x07'; break;
604 case 'b': str[j++] = '\x08'; break;
605 case 'f': str[j++] = '\x0c'; break;
606 case 'n': str[j++] = '\x0a'; break;
607 case 'r': str[j++] = '\x0d'; break;
608 case 't': str[j++] = '\x09'; break;
609 case 'v': str[j++] = '\x0b'; break;
610 case 'x':
611 {
612 char s = 0, c;
613 size_t k;
614 for (k = 0; k < 2; k++) {
615 char d;
616 c = str[i + k + 1];
617 d = (c >= '0' && c <= '9') ? c - '0' :
618 (c >= 'a' && c <= 'f') ? c - 'a' + 10 :
619 (c >= 'A' && c <= 'F') ? c - 'A' + 10 : -1;
620 if (d < 0) break;
621 s = (s << 4) | d;
622 }
623 if (k < 2) {
624 const size_t l = i + k;
625 str[j++] = '\\'; str[j++] = 'x';
626 while (i <= l) str[j++] = str[++i];
627 if (c == '\0') return FALSE;
628 b = FALSE;
629 continue;
630 }
631 str[j++] = s;
632 i += 2;
633 }
634 break;
635 case 'u':
636 {
637 int s = 0, t = 0;
638 char c;
639 size_t k;
640 for (k = 0; k < 4; k++) {
641 char d;
642 c = str[i + k + 1];
643 d = (c >= '0' && c <= '9') ? c - '0' :
644 (c >= 'a' && c <= 'f') ? c - 'a' + 10 :
645 (c >= 'A' && c <= 'F') ? c - 'A' + 10 : -1;
646 if (d < 0) break;
647 s = (s << 4) | d;
648 }
649 if (k < 4 || (s & 0xfc00) == 0xdc00) { /* invalid character or invalid surrogate code point */
650 const size_t l = i + k;
651 str[j++] = '\\'; str[j++] = 'u';
652 while (i <= l) str[j++] = str[++i];
653 if (c == '\0') return FALSE;
654 b = FALSE;
655 continue;
656 }
657 if ((s & 0xfc00) == 0xd800) { /* surrogate pair */
658 for (k = 4; k < 10; k++) {
659 c = str[i + k + 1];
660 if (k == 4) {
661 if (c != '\\') break;
662 }
663 else if (k == 5) {
664 if (c != 'u') break;
665 }
666 else {
667 const char d =
668 (c >= '0' && c <= '9') ? c - '0' :
669 (c >= 'a' && c <= 'f') ? c - 'a' + 10 :
670 (c >= 'A' && c <= 'F') ? c - 'A' + 10 : -1;
671 if (d < 0) break;
672 t = (t << 4) | d;
673 }
674 }
675 if (k < 10 || (t & 0xfc00) != 0xdc00) { /* invalid character or invalid surrogate code point */
676 const size_t l = i + 4; /* NOTE: Not i + k to redo with recovery. */
677 str[j++] = '\\'; str[j++] = 'u';
678 while (i <= l) str[j++] = str[++i];
679 b = FALSE;
680 continue;
681 }
682 }
683 {
684 const int u = t ? ((((s & 0x03ff) + 0x0040) << 10) | (t & 0x03ff)) : s;
685 if (u < 0x0080) {
686 str[j++] = (char)u;
687 }
688 else if (u < 0x0800) {
689 str[j++] = (char)(0xc0 | (u >> 6));
690 str[j++] = (char)(0x80 | (u & 0x3f));
691 }
692 else if (u < 0x010000) {
693 str[j++] = (char)(0xe0 | (u >> 12));
694 str[j++] = (char)(0x80 | ((u >> 6) & 0x3f));
695 str[j++] = (char)(0x80 | (u & 0x3f));
696 }
697 else if (u < 0x110000) {
698 str[j++] = (char)(0xf0 | (u >> 18));
699 str[j++] = (char)(0x80 | ((u >> 12) & 0x3f));
700 str[j++] = (char)(0x80 | ((u >> 6) & 0x3f));
701 str[j++] = (char)(0x80 | (u & 0x3f));
702 }
703 else { /* never reached theoretically; in case */
704 const size_t l = i + 10;
705 str[j++] = '\\'; str[j++] = 'u';
706 while (i <= l) str[j++] = str[++i];
707 b = FALSE;
708 continue;
709 }
710 }
711 i += t ? 10 : 4;
712 }
713 break;
714 case '\n': break;
715 case '\r': if (str[i + 1] == '\n') i++; break;
716 case '\\':
717 if (cls) str[j++] = '\\'; /* left for character class matching (ex. considering [\^\]\\]) */
718 str[j++] = '\\';
719 break;
720 default: str[j++] = '\\'; str[j++] = str[i];
721 }
722 }
723 else {
724 str[j++] = str[i];
725 }
726 }
727 str[j] = '\0';
728 return b;
729 }
730
escape_character(char ch,char (* buf)[5])731 static const char *escape_character(char ch, char (*buf)[5]) {
732 switch (ch) {
733 case '\x00': strncpy(*buf, "\\0", 5); break;
734 case '\x07': strncpy(*buf, "\\a", 5); break;
735 case '\x08': strncpy(*buf, "\\b", 5); break;
736 case '\x0c': strncpy(*buf, "\\f", 5); break;
737 case '\x0a': strncpy(*buf, "\\n", 5); break;
738 case '\x0d': strncpy(*buf, "\\r", 5); break;
739 case '\x09': strncpy(*buf, "\\t", 5); break;
740 case '\x0b': strncpy(*buf, "\\v", 5); break;
741 case '\\': strncpy(*buf, "\\\\", 5); break;
742 case '\'': strncpy(*buf, "\\\'", 5); break;
743 case '\"': strncpy(*buf, "\\\"", 5); break;
744 default:
745 if (ch >= '\x20' && ch < '\x7f')
746 snprintf(*buf, 5, "%c", ch);
747 else
748 snprintf(*buf, 5, "\\x%02x", (int)(unsigned char)ch);
749 }
750 (*buf)[4] = '\0';
751 return *buf;
752 }
753
remove_leading_blanks(char * str)754 static void remove_leading_blanks(char *str) {
755 size_t i, j;
756 for (i = 0; str[i]; i++) {
757 if (
758 str[i] != ' ' &&
759 str[i] != '\v' &&
760 str[i] != '\f' &&
761 str[i] != '\t' &&
762 str[i] != '\n' &&
763 str[i] != '\r'
764 ) break;
765 }
766 for (j = 0; str[i]; i++) {
767 str[j++] = str[i];
768 }
769 str[j] = '\0';
770 }
771
remove_trailing_blanks(char * str)772 static void remove_trailing_blanks(char *str) {
773 size_t i, j;
774 for (j = 0, i = 0; str[i]; i++) {
775 if (
776 str[i] != ' ' &&
777 str[i] != '\v' &&
778 str[i] != '\f' &&
779 str[i] != '\t' &&
780 str[i] != '\n' &&
781 str[i] != '\r'
782 ) j = i + 1;
783 }
784 str[j] = '\0';
785 }
786
find_trailing_blanks(const char * str)787 static size_t find_trailing_blanks(const char *str) {
788 size_t i, j;
789 for (j = 0, i = 0; str[i]; i++) {
790 if (
791 str[i] != ' ' &&
792 str[i] != '\v' &&
793 str[i] != '\f' &&
794 str[i] != '\t' &&
795 str[i] != '\n' &&
796 str[i] != '\r'
797 ) j = i + 1;
798 }
799 return j;
800 }
801
count_characters(const char * str,size_t start,size_t end)802 static size_t count_characters(const char *str, size_t start, size_t end) {
803 /* UTF-8 multibyte character support but without checking UTF-8 validity */
804 size_t n = 0, i = start;
805 while (i < end) {
806 const int c = (int)(unsigned char)str[i];
807 if (c == 0) break;
808 n++;
809 i += (c < 0x80) ? 1 : ((c & 0xe0) == 0xc0) ? 2 : ((c & 0xf0) == 0xe0) ? 3 : ((c & 0xf8) == 0xf0) ? 4 : /* invalid code */ 1;
810 }
811 return n;
812 }
813
make_header_identifier(char * str)814 static void make_header_identifier(char *str) {
815 size_t i;
816 for (i = 0; str[i]; i++) {
817 str[i] =
818 ((str[i] >= 'A' && str[i] <= 'Z') || (str[i] >= '0' && str[i] <= '9')) ? str[i] :
819 (str[i] >= 'a' && str[i] <= 'z') ? str[i] - 'a' + 'A' : '_';
820 }
821 }
822
stream__wrap(FILE * file,const char * name,size_t line)823 static stream_t stream__wrap(FILE *file, const char *name, size_t line) {
824 stream_t s;
825 s.file = file;
826 s.name = name;
827 s.line = line;
828 return s;
829 }
830
stream__putc(stream_t * stream,int c)831 static int stream__putc(stream_t *stream, int c) {
832 const int r = fputc(c, stream->file);
833 if (r == EOF) {
834 print_error("File write error\n");
835 exit(2);
836 }
837 if (stream->line != VOID_VALUE) {
838 if (c == '\n') stream->line++;
839 }
840 return r;
841 }
842
stream__puts(stream_t * stream,const char * s)843 static int stream__puts(stream_t *stream, const char *s) {
844 const int r = fputs(s, stream->file);
845 if (r == EOF) {
846 print_error("File write error\n");
847 exit(2);
848 }
849 if (stream->line != VOID_VALUE) {
850 size_t i = 0;
851 for (i = 0; s[i]; i++) {
852 if (s[i] == '\n') stream->line++;
853 }
854 }
855 return r;
856 }
857
858 __attribute__((format(printf, 2, 3)))
stream__printf(stream_t * stream,const char * format,...)859 static int stream__printf(stream_t *stream, const char *format, ...) {
860 if (stream->line != VOID_VALUE) {
861 #define M 1024
862 char s[M], *p = NULL;
863 int n = 0;
864 size_t l = 0;
865 {
866 va_list a;
867 va_start(a, format);
868 n = vsnprintf(NULL, 0, format, a);
869 va_end(a);
870 if (n < 0) {
871 print_error("Internal error\n");
872 exit(2);
873 }
874 l = (size_t)n + 1;
875 }
876 p = (l > M) ? (char *)malloc_e(l) : s;
877 {
878 va_list a;
879 va_start(a, format);
880 n = vsnprintf(p, l, format, a);
881 va_end(a);
882 if (n < 0 || (size_t)n >= l) {
883 print_error("Internal error\n");
884 exit(2);
885 }
886 }
887 stream__puts(stream, p);
888 if (p != s) free(p);
889 return n;
890 #undef M
891 }
892 else {
893 int n;
894 va_list a;
895 va_start(a, format);
896 n = vfprintf(stream->file, format, a);
897 va_end(a);
898 if (n < 0) {
899 print_error("File write error\n");
900 exit(2);
901 }
902 return n;
903 }
904 }
905
stream__write_characters(stream_t * stream,char ch,size_t len)906 static void stream__write_characters(stream_t *stream, char ch, size_t len) {
907 size_t i;
908 if (len == VOID_VALUE) return; /* for safety */
909 for (i = 0; i < len; i++) stream__putc(stream, ch);
910 }
911
stream__write_text(stream_t * stream,const char * ptr,size_t len)912 static void stream__write_text(stream_t *stream, const char *ptr, size_t len) {
913 size_t i;
914 if (len == VOID_VALUE) return; /* for safety */
915 for (i = 0; i < len; i++) {
916 if (ptr[i] == '\r') {
917 if (i + 1 < len && ptr[i + 1] == '\n') i++;
918 stream__putc(stream, '\n');
919 }
920 else {
921 stream__putc(stream, ptr[i]);
922 }
923 }
924 }
925
stream__write_escaped_string(stream_t * stream,const char * ptr,size_t len)926 static void stream__write_escaped_string(stream_t *stream, const char *ptr, size_t len) {
927 char s[5];
928 size_t i;
929 if (len == VOID_VALUE) return; /* for safety */
930 for (i = 0; i < len; i++) {
931 stream__puts(stream, escape_character(ptr[i], &s));
932 }
933 }
934
stream__write_line_directive(stream_t * stream,const char * fname,size_t lineno)935 static void stream__write_line_directive(stream_t *stream, const char *fname, size_t lineno) {
936 stream__printf(stream, "#line " FMT_LU " \"", (ulong_t)(lineno + 1));
937 stream__write_escaped_string(stream, fname, strlen(fname));
938 stream__puts(stream, "\"\n");
939 }
940
stream__write_code_block(stream_t * stream,const char * ptr,size_t len,size_t indent,const char * fname,size_t lineno)941 static void stream__write_code_block(stream_t *stream, const char *ptr, size_t len, size_t indent, const char *fname, size_t lineno) {
942 bool_t b = FALSE;
943 size_t i, j, k;
944 if (len == VOID_VALUE) return; /* for safety */
945 j = find_first_trailing_space(ptr, 0, len, &k);
946 for (i = 0; i < j; i++) {
947 if (
948 ptr[i] != ' ' &&
949 ptr[i] != '\v' &&
950 ptr[i] != '\f' &&
951 ptr[i] != '\t'
952 ) break;
953 }
954 if (i < j) {
955 if (stream->line != VOID_VALUE)
956 stream__write_line_directive(stream, fname, lineno);
957 if (ptr[i] != '#')
958 stream__write_characters(stream, ' ', indent);
959 stream__write_text(stream, ptr + i, j - i);
960 stream__putc(stream, '\n');
961 b = TRUE;
962 }
963 else {
964 lineno++;
965 }
966 if (k < len) {
967 size_t m = VOID_VALUE;
968 size_t h;
969 for (i = k; i < len; i = h) {
970 j = find_first_trailing_space(ptr, i, len, &h);
971 if (i < j) {
972 if (stream->line != VOID_VALUE && !b)
973 stream__write_line_directive(stream, fname, lineno);
974 if (ptr[i] != '#') {
975 const size_t l = count_indent_spaces(ptr, i, j, NULL);
976 if (m == VOID_VALUE || m > l) m = l;
977 }
978 b = TRUE;
979 }
980 else {
981 if (!b) {
982 k = h;
983 lineno++;
984 }
985 }
986 }
987 for (i = k; i < len; i = h) {
988 j = find_first_trailing_space(ptr, i, len, &h);
989 if (i < j) {
990 const size_t l = count_indent_spaces(ptr, i, j, &i);
991 if (ptr[i] != '#') {
992 assert(m != VOID_VALUE); /* m must have a valid value */
993 assert(l >= m);
994 stream__write_characters(stream, ' ', l - m + indent);
995 }
996 stream__write_text(stream, ptr + i, j - i);
997 stream__putc(stream, '\n');
998 b = TRUE;
999 }
1000 else if (h < len) {
1001 stream__putc(stream, '\n');
1002 }
1003 }
1004 }
1005 if (stream->line != VOID_VALUE && b)
1006 stream__write_line_directive(stream, stream->name, stream->line);
1007 }
1008
extract_filename(const char * path)1009 static const char *extract_filename(const char *path) {
1010 size_t i = strlen(path);
1011 while (i > 0) {
1012 i--;
1013 if (path[i] == '/' || path[i] == '\\' || path[i] == ':') return path + i + 1;
1014 }
1015 return path;
1016 }
1017
extract_fileext(const char * path)1018 static const char *extract_fileext(const char *path) {
1019 const size_t n = strlen(path);
1020 size_t i = n;
1021 while (i > 0) {
1022 i--;
1023 if (path[i] == '/' || path[i] == '\\' || path[i] == ':') break;
1024 if (path[i] == '.') return path + i;
1025 }
1026 return path + n;
1027 }
1028
replace_fileext(const char * path,const char * ext)1029 static char *replace_fileext(const char *path, const char *ext) {
1030 const char *const p = extract_fileext(path);
1031 const size_t m = p - path;
1032 const size_t n = strlen(ext);
1033 char *const s = (char *)malloc_e(m + n + 2);
1034 memcpy(s, path, m);
1035 s[m] = '.';
1036 memcpy(s + m + 1, ext, n + 1);
1037 return s;
1038 }
1039
add_fileext(const char * path,const char * ext)1040 static char *add_fileext(const char *path, const char *ext) {
1041 const size_t m = strlen(path);
1042 const size_t n = strlen(ext);
1043 char *const s = (char *)malloc_e(m + n + 2);
1044 memcpy(s, path, m);
1045 s[m] = '.';
1046 memcpy(s + m + 1, ext, n + 1);
1047 return s;
1048 }
1049
hash_string(const char * str)1050 static size_t hash_string(const char *str) {
1051 size_t i, h = 0;
1052 for (i = 0; str[i]; i++) {
1053 h = h * 31 + str[i];
1054 }
1055 return h;
1056 }
1057
populate_bits(size_t x)1058 static size_t populate_bits(size_t x) {
1059 x |= x >> 1;
1060 x |= x >> 2;
1061 x |= x >> 4;
1062 x |= x >> 8;
1063 x |= x >> 16;
1064 #if (defined __SIZEOF_SIZE_T__ && __SIZEOF_SIZE_T__ == 8) /* gcc or clang */ || defined _WIN64 /* MSVC */
1065 x |= x >> 32;
1066 #endif
1067 return x;
1068 }
1069
column_number(const context_t * ctx)1070 static size_t column_number(const context_t *ctx) { /* 0-based */
1071 assert(ctx->bufpos + ctx->bufcur >= ctx->linepos);
1072 if (ctx->opts.ascii)
1073 return ctx->charnum + ctx->bufcur - ((ctx->linepos > ctx->bufpos) ? ctx->linepos - ctx->bufpos : 0);
1074 else
1075 return ctx->charnum + count_characters(ctx->buffer.buf, (ctx->linepos > ctx->bufpos) ? ctx->linepos - ctx->bufpos : 0, ctx->bufcur);
1076 }
1077
char_array__init(char_array_t * array)1078 static void char_array__init(char_array_t *array) {
1079 array->len = 0;
1080 array->max = 0;
1081 array->buf = NULL;
1082 }
1083
char_array__add(char_array_t * array,char ch)1084 static void char_array__add(char_array_t *array, char ch) {
1085 if (array->max <= array->len) {
1086 const size_t n = array->len + 1;
1087 size_t m = array->max;
1088 if (m == 0) m = BUFFER_MIN_SIZE;
1089 while (m < n && m != 0) m <<= 1;
1090 if (m == 0) m = n; /* in case of shift overflow */
1091 array->buf = (char *)realloc_e(array->buf, m);
1092 array->max = m;
1093 }
1094 array->buf[array->len++] = ch;
1095 }
1096
char_array__term(char_array_t * array)1097 static void char_array__term(char_array_t *array) {
1098 free(array->buf);
1099 }
1100
code_block__init(code_block_t * code)1101 static void code_block__init(code_block_t *code) {
1102 code->text = NULL;
1103 code->len = 0;
1104 code->line = VOID_VALUE;
1105 code->col = VOID_VALUE;
1106 }
1107
code_block__term(code_block_t * code)1108 static void code_block__term(code_block_t *code) {
1109 free(code->text);
1110 }
1111
code_block_array__init(code_block_array_t * array)1112 static void code_block_array__init(code_block_array_t *array) {
1113 array->len = 0;
1114 array->max = 0;
1115 array->buf = NULL;
1116 }
1117
code_block_array__create_entry(code_block_array_t * array)1118 static code_block_t *code_block_array__create_entry(code_block_array_t *array) {
1119 if (array->max <= array->len) {
1120 const size_t n = array->len + 1;
1121 size_t m = array->max;
1122 if (m == 0) m = ARRAY_MIN_SIZE;
1123 while (m < n && m != 0) m <<= 1;
1124 if (m == 0) m = n; /* in case of shift overflow */
1125 array->buf = (code_block_t *)realloc_e(array->buf, sizeof(code_block_t) * m);
1126 array->max = m;
1127 }
1128 code_block__init(&array->buf[array->len]);
1129 return &array->buf[array->len++];
1130 }
1131
code_block_array__term(code_block_array_t * array)1132 static void code_block_array__term(code_block_array_t *array) {
1133 while (array->len > 0) {
1134 array->len--;
1135 code_block__term(&array->buf[array->len]);
1136 }
1137 free(array->buf);
1138 }
1139
node_array__init(node_array_t * array)1140 static void node_array__init(node_array_t *array) {
1141 array->len = 0;
1142 array->max = 0;
1143 array->buf = NULL;
1144 }
1145
node_array__add(node_array_t * array,node_t * node)1146 static void node_array__add(node_array_t *array, node_t *node) {
1147 if (array->max <= array->len) {
1148 const size_t n = array->len + 1;
1149 size_t m = array->max;
1150 if (m == 0) m = ARRAY_MIN_SIZE;
1151 while (m < n && m != 0) m <<= 1;
1152 if (m == 0) m = n; /* in case of shift overflow */
1153 array->buf = (node_t **)realloc_e(array->buf, sizeof(node_t *) * m);
1154 array->max = m;
1155 }
1156 array->buf[array->len++] = node;
1157 }
1158
1159 static void destroy_node(node_t *node);
1160
node_array__term(node_array_t * array)1161 static void node_array__term(node_array_t *array) {
1162 while (array->len > 0) {
1163 array->len--;
1164 destroy_node(array->buf[array->len]);
1165 }
1166 free(array->buf);
1167 }
1168
node_const_array__init(node_const_array_t * array)1169 static void node_const_array__init(node_const_array_t *array) {
1170 array->len = 0;
1171 array->max = 0;
1172 array->buf = NULL;
1173 }
1174
node_const_array__add(node_const_array_t * array,const node_t * node)1175 static void node_const_array__add(node_const_array_t *array, const node_t *node) {
1176 if (array->max <= array->len) {
1177 const size_t n = array->len + 1;
1178 size_t m = array->max;
1179 if (m == 0) m = ARRAY_MIN_SIZE;
1180 while (m < n && m != 0) m <<= 1;
1181 if (m == 0) m = n; /* in case of shift overflow */
1182 array->buf = (const node_t **)realloc_e((node_t **)array->buf, sizeof(const node_t *) * m);
1183 array->max = m;
1184 }
1185 array->buf[array->len++] = node;
1186 }
1187
node_const_array__clear(node_const_array_t * array)1188 static void node_const_array__clear(node_const_array_t *array) {
1189 array->len = 0;
1190 }
1191
node_const_array__copy(node_const_array_t * array,const node_const_array_t * src)1192 static void node_const_array__copy(node_const_array_t *array, const node_const_array_t *src) {
1193 size_t i;
1194 node_const_array__clear(array);
1195 for (i = 0; i < src->len; i++) {
1196 node_const_array__add(array, src->buf[i]);
1197 }
1198 }
1199
node_const_array__term(node_const_array_t * array)1200 static void node_const_array__term(node_const_array_t *array) {
1201 free((node_t **)array->buf);
1202 }
1203
create_context(const char * iname,const char * oname,const options_t * opts)1204 static context_t *create_context(const char *iname, const char *oname, const options_t *opts) {
1205 context_t *const ctx = (context_t *)malloc_e(sizeof(context_t));
1206 ctx->iname = strdup_e((iname && iname[0]) ? iname : "-");
1207 ctx->sname = (oname && oname[0]) ? add_fileext(oname, "c") : replace_fileext(ctx->iname, "c");
1208 ctx->hname = (oname && oname[0]) ? add_fileext(oname, "h") : replace_fileext(ctx->iname, "h");
1209 ctx->ifile = (iname && iname[0]) ? fopen_rb_e(ctx->iname) : stdin;
1210 ctx->hid = strdup_e(ctx->hname); make_header_identifier(ctx->hid);
1211 ctx->vtype = NULL;
1212 ctx->atype = NULL;
1213 ctx->prefix = NULL;
1214 ctx->opts = *opts;
1215 ctx->flags = CODE_FLAG__NONE;
1216 ctx->errnum = 0;
1217 ctx->linenum = 0;
1218 ctx->charnum = 0;
1219 ctx->linepos = 0;
1220 ctx->bufpos = 0;
1221 ctx->bufcur = 0;
1222 char_array__init(&ctx->buffer);
1223 node_array__init(&ctx->rules);
1224 ctx->rulehash.mod = 0;
1225 ctx->rulehash.max = 0;
1226 ctx->rulehash.buf = NULL;
1227 code_block_array__init(&ctx->esource);
1228 code_block_array__init(&ctx->eheader);
1229 code_block_array__init(&ctx->source);
1230 code_block_array__init(&ctx->header);
1231 return ctx;
1232 }
1233
create_node(node_type_t type)1234 static node_t *create_node(node_type_t type) {
1235 node_t *const node = (node_t *)malloc_e(sizeof(node_t));
1236 node->type = type;
1237 switch (node->type) {
1238 case NODE_RULE:
1239 node->data.rule.name = NULL;
1240 node->data.rule.expr = NULL;
1241 node->data.rule.ref = 0;
1242 node_const_array__init(&node->data.rule.vars);
1243 node_const_array__init(&node->data.rule.capts);
1244 node_const_array__init(&node->data.rule.codes);
1245 node->data.rule.line = VOID_VALUE;
1246 node->data.rule.col = VOID_VALUE;
1247 break;
1248 case NODE_REFERENCE:
1249 node->data.reference.var = NULL;
1250 node->data.reference.index = VOID_VALUE;
1251 node->data.reference.name = NULL;
1252 node->data.reference.rule = NULL;
1253 node->data.reference.line = VOID_VALUE;
1254 node->data.reference.col = VOID_VALUE;
1255 break;
1256 case NODE_STRING:
1257 node->data.string.value = NULL;
1258 break;
1259 case NODE_CHARCLASS:
1260 node->data.charclass.value = NULL;
1261 break;
1262 case NODE_QUANTITY:
1263 node->data.quantity.min = node->data.quantity.max = 0;
1264 node->data.quantity.expr = NULL;
1265 break;
1266 case NODE_PREDICATE:
1267 node->data.predicate.neg = FALSE;
1268 node->data.predicate.expr = NULL;
1269 break;
1270 case NODE_SEQUENCE:
1271 node_array__init(&node->data.sequence.nodes);
1272 break;
1273 case NODE_ALTERNATE:
1274 node_array__init(&node->data.alternate.nodes);
1275 break;
1276 case NODE_CAPTURE:
1277 node->data.capture.expr = NULL;
1278 node->data.capture.index = VOID_VALUE;
1279 break;
1280 case NODE_EXPAND:
1281 node->data.expand.index = VOID_VALUE;
1282 node->data.expand.line = VOID_VALUE;
1283 node->data.expand.col = VOID_VALUE;
1284 break;
1285 case NODE_ACTION:
1286 code_block__init(&node->data.action.code);
1287 node->data.action.index = VOID_VALUE;
1288 node_const_array__init(&node->data.action.vars);
1289 node_const_array__init(&node->data.action.capts);
1290 break;
1291 case NODE_ERROR:
1292 node->data.error.expr = NULL;
1293 code_block__init(&node->data.error.code);
1294 node->data.error.index = VOID_VALUE;
1295 node_const_array__init(&node->data.error.vars);
1296 node_const_array__init(&node->data.error.capts);
1297 break;
1298 default:
1299 print_error("Internal error [%d]\n", __LINE__);
1300 exit(-1);
1301 }
1302 return node;
1303 }
1304
destroy_node(node_t * node)1305 static void destroy_node(node_t *node) {
1306 if (node == NULL) return;
1307 switch (node->type) {
1308 case NODE_RULE:
1309 node_const_array__term(&node->data.rule.codes);
1310 node_const_array__term(&node->data.rule.capts);
1311 node_const_array__term(&node->data.rule.vars);
1312 destroy_node(node->data.rule.expr);
1313 free(node->data.rule.name);
1314 break;
1315 case NODE_REFERENCE:
1316 free(node->data.reference.name);
1317 free(node->data.reference.var);
1318 break;
1319 case NODE_STRING:
1320 free(node->data.string.value);
1321 break;
1322 case NODE_CHARCLASS:
1323 free(node->data.charclass.value);
1324 break;
1325 case NODE_QUANTITY:
1326 destroy_node(node->data.quantity.expr);
1327 break;
1328 case NODE_PREDICATE:
1329 destroy_node(node->data.predicate.expr);
1330 break;
1331 case NODE_SEQUENCE:
1332 node_array__term(&node->data.sequence.nodes);
1333 break;
1334 case NODE_ALTERNATE:
1335 node_array__term(&node->data.alternate.nodes);
1336 break;
1337 case NODE_CAPTURE:
1338 destroy_node(node->data.capture.expr);
1339 break;
1340 case NODE_EXPAND:
1341 break;
1342 case NODE_ACTION:
1343 node_const_array__term(&node->data.action.capts);
1344 node_const_array__term(&node->data.action.vars);
1345 code_block__term(&node->data.action.code);
1346 break;
1347 case NODE_ERROR:
1348 node_const_array__term(&node->data.error.capts);
1349 node_const_array__term(&node->data.error.vars);
1350 code_block__term(&node->data.error.code);
1351 destroy_node(node->data.error.expr);
1352 break;
1353 default:
1354 print_error("Internal error [%d]\n", __LINE__);
1355 exit(-1);
1356 }
1357 free(node);
1358 }
1359
destroy_context(context_t * ctx)1360 static void destroy_context(context_t *ctx) {
1361 if (ctx == NULL) return;
1362 code_block_array__term(&ctx->header);
1363 code_block_array__term(&ctx->source);
1364 code_block_array__term(&ctx->eheader);
1365 code_block_array__term(&ctx->esource);
1366 free((node_t **)ctx->rulehash.buf);
1367 node_array__term(&ctx->rules);
1368 char_array__term(&ctx->buffer);
1369 free(ctx->prefix);
1370 free(ctx->atype);
1371 free(ctx->vtype);
1372 free(ctx->hid);
1373 fclose_e(ctx->ifile);
1374 free(ctx->hname);
1375 free(ctx->sname);
1376 free(ctx->iname);
1377 free(ctx);
1378 }
1379
make_rulehash(context_t * ctx)1380 static void make_rulehash(context_t *ctx) {
1381 size_t i, j;
1382 ctx->rulehash.mod = populate_bits(ctx->rules.len * 4);
1383 ctx->rulehash.max = ctx->rulehash.mod + 1;
1384 ctx->rulehash.buf = (const node_t **)realloc_e((node_t **)ctx->rulehash.buf, sizeof(const node_t *) * ctx->rulehash.max);
1385 for (i = 0; i < ctx->rulehash.max; i++) {
1386 ctx->rulehash.buf[i] = NULL;
1387 }
1388 for (i = 0; i < ctx->rules.len; i++) {
1389 assert(ctx->rules.buf[i]->type == NODE_RULE);
1390 j = hash_string(ctx->rules.buf[i]->data.rule.name) & ctx->rulehash.mod;
1391 while (ctx->rulehash.buf[j] != NULL) {
1392 if (strcmp(ctx->rules.buf[i]->data.rule.name, ctx->rulehash.buf[j]->data.rule.name) == 0) {
1393 assert(ctx->rules.buf[i]->data.rule.ref == 0);
1394 assert(ctx->rulehash.buf[j]->data.rule.ref == 0);
1395 ctx->rules.buf[i]->data.rule.ref = -1;
1396 goto EXCEPTION;
1397 }
1398 j = (j + 1) & ctx->rulehash.mod;
1399 }
1400 ctx->rulehash.buf[j] = ctx->rules.buf[i];
1401
1402 EXCEPTION:;
1403 }
1404 }
1405
lookup_rulehash(const context_t * ctx,const char * name)1406 static const node_t *lookup_rulehash(const context_t *ctx, const char *name) {
1407 size_t j = hash_string(name) & ctx->rulehash.mod;
1408 while (ctx->rulehash.buf[j] != NULL && strcmp(name, ctx->rulehash.buf[j]->data.rule.name) != 0) {
1409 j = (j + 1) & ctx->rulehash.mod;
1410 }
1411 return (ctx->rulehash.buf[j] != NULL) ? ctx->rulehash.buf[j] : NULL;
1412 }
1413
link_references(context_t * ctx,node_t * node)1414 static void link_references(context_t *ctx, node_t *node) {
1415 if (node == NULL) return;
1416 switch (node->type) {
1417 case NODE_RULE:
1418 print_error("Internal error [%d]\n", __LINE__);
1419 exit(-1);
1420 case NODE_REFERENCE:
1421 node->data.reference.rule = lookup_rulehash(ctx, node->data.reference.name);
1422 if (node->data.reference.rule == NULL) {
1423 print_error("%s:" FMT_LU ":" FMT_LU ": No definition of rule '%s'\n",
1424 ctx->iname, (ulong_t)(node->data.reference.line + 1), (ulong_t)(node->data.reference.col + 1),
1425 node->data.reference.name);
1426 ctx->errnum++;
1427 }
1428 else {
1429 assert(node->data.reference.rule->type == NODE_RULE);
1430 ((node_t *)node->data.reference.rule)->data.rule.ref++;
1431 }
1432 break;
1433 case NODE_STRING:
1434 break;
1435 case NODE_CHARCLASS:
1436 break;
1437 case NODE_QUANTITY:
1438 link_references(ctx, node->data.quantity.expr);
1439 break;
1440 case NODE_PREDICATE:
1441 link_references(ctx, node->data.predicate.expr);
1442 break;
1443 case NODE_SEQUENCE:
1444 {
1445 size_t i;
1446 for (i = 0; i < node->data.sequence.nodes.len; i++) {
1447 link_references(ctx, node->data.sequence.nodes.buf[i]);
1448 }
1449 }
1450 break;
1451 case NODE_ALTERNATE:
1452 {
1453 size_t i;
1454 for (i = 0; i < node->data.alternate.nodes.len; i++) {
1455 link_references(ctx, node->data.alternate.nodes.buf[i]);
1456 }
1457 }
1458 break;
1459 case NODE_CAPTURE:
1460 link_references(ctx, node->data.capture.expr);
1461 break;
1462 case NODE_EXPAND:
1463 break;
1464 case NODE_ACTION:
1465 break;
1466 case NODE_ERROR:
1467 link_references(ctx, node->data.error.expr);
1468 break;
1469 default:
1470 print_error("Internal error [%d]\n", __LINE__);
1471 exit(-1);
1472 }
1473 }
1474
verify_variables(context_t * ctx,node_t * node,node_const_array_t * vars)1475 static void verify_variables(context_t *ctx, node_t *node, node_const_array_t *vars) {
1476 node_const_array_t a;
1477 const bool_t b = (vars == NULL) ? TRUE : FALSE;
1478 if (node == NULL) return;
1479 if (b) {
1480 node_const_array__init(&a);
1481 vars = &a;
1482 }
1483 switch (node->type) {
1484 case NODE_RULE:
1485 print_error("Internal error [%d]\n", __LINE__);
1486 exit(-1);
1487 case NODE_REFERENCE:
1488 if (node->data.reference.index != VOID_VALUE) {
1489 size_t i;
1490 for (i = 0; i < vars->len; i++) {
1491 assert(vars->buf[i]->type == NODE_REFERENCE);
1492 if (node->data.reference.index == vars->buf[i]->data.reference.index) break;
1493 }
1494 if (i == vars->len) node_const_array__add(vars, node);
1495 }
1496 break;
1497 case NODE_STRING:
1498 break;
1499 case NODE_CHARCLASS:
1500 break;
1501 case NODE_QUANTITY:
1502 verify_variables(ctx, node->data.quantity.expr, vars);
1503 break;
1504 case NODE_PREDICATE:
1505 verify_variables(ctx, node->data.predicate.expr, vars);
1506 break;
1507 case NODE_SEQUENCE:
1508 {
1509 size_t i;
1510 for (i = 0; i < node->data.sequence.nodes.len; i++) {
1511 verify_variables(ctx, node->data.sequence.nodes.buf[i], vars);
1512 }
1513 }
1514 break;
1515 case NODE_ALTERNATE:
1516 {
1517 size_t i, j, k, m = vars->len;
1518 node_const_array_t v;
1519 node_const_array__init(&v);
1520 node_const_array__copy(&v, vars);
1521 for (i = 0; i < node->data.alternate.nodes.len; i++) {
1522 v.len = m;
1523 verify_variables(ctx, node->data.alternate.nodes.buf[i], &v);
1524 for (j = m; j < v.len; j++) {
1525 for (k = m; k < vars->len; k++) {
1526 if (v.buf[j]->data.reference.index == vars->buf[k]->data.reference.index) break;
1527 }
1528 if (k == vars->len) node_const_array__add(vars, v.buf[j]);
1529 }
1530 }
1531 node_const_array__term(&v);
1532 }
1533 break;
1534 case NODE_CAPTURE:
1535 verify_variables(ctx, node->data.capture.expr, vars);
1536 break;
1537 case NODE_EXPAND:
1538 break;
1539 case NODE_ACTION:
1540 node_const_array__copy(&node->data.action.vars, vars);
1541 break;
1542 case NODE_ERROR:
1543 node_const_array__copy(&node->data.error.vars, vars);
1544 verify_variables(ctx, node->data.error.expr, vars);
1545 break;
1546 default:
1547 print_error("Internal error [%d]\n", __LINE__);
1548 exit(-1);
1549 }
1550 if (b) {
1551 node_const_array__term(&a);
1552 }
1553 }
1554
verify_captures(context_t * ctx,node_t * node,node_const_array_t * capts)1555 static void verify_captures(context_t *ctx, node_t *node, node_const_array_t *capts) {
1556 node_const_array_t a;
1557 const bool_t b = (capts == NULL) ? TRUE : FALSE;
1558 if (node == NULL) return;
1559 if (b) {
1560 node_const_array__init(&a);
1561 capts = &a;
1562 }
1563 switch (node->type) {
1564 case NODE_RULE:
1565 print_error("Internal error [%d]\n", __LINE__);
1566 exit(-1);
1567 case NODE_REFERENCE:
1568 break;
1569 case NODE_STRING:
1570 break;
1571 case NODE_CHARCLASS:
1572 break;
1573 case NODE_QUANTITY:
1574 verify_captures(ctx, node->data.quantity.expr, capts);
1575 break;
1576 case NODE_PREDICATE:
1577 verify_captures(ctx, node->data.predicate.expr, capts);
1578 break;
1579 case NODE_SEQUENCE:
1580 {
1581 size_t i;
1582 for (i = 0; i < node->data.sequence.nodes.len; i++) {
1583 verify_captures(ctx, node->data.sequence.nodes.buf[i], capts);
1584 }
1585 }
1586 break;
1587 case NODE_ALTERNATE:
1588 {
1589 size_t i, j, m = capts->len;
1590 node_const_array_t v;
1591 node_const_array__init(&v);
1592 node_const_array__copy(&v, capts);
1593 for (i = 0; i < node->data.alternate.nodes.len; i++) {
1594 v.len = m;
1595 verify_captures(ctx, node->data.alternate.nodes.buf[i], &v);
1596 for (j = m; j < v.len; j++) {
1597 node_const_array__add(capts, v.buf[j]);
1598 }
1599 }
1600 node_const_array__term(&v);
1601 }
1602 break;
1603 case NODE_CAPTURE:
1604 verify_captures(ctx, node->data.capture.expr, capts);
1605 node_const_array__add(capts, node);
1606 break;
1607 case NODE_EXPAND:
1608 {
1609 size_t i;
1610 for (i = 0; i < capts->len; i++) {
1611 assert(capts->buf[i]->type == NODE_CAPTURE);
1612 if (node->data.expand.index == capts->buf[i]->data.capture.index) break;
1613 }
1614 if (i >= capts->len && node->data.expand.index != VOID_VALUE) {
1615 print_error("%s:" FMT_LU ":" FMT_LU ": Capture " FMT_LU " not available at this position\n",
1616 ctx->iname, (ulong_t)(node->data.expand.line + 1), (ulong_t)(node->data.expand.col + 1), (ulong_t)(node->data.expand.index + 1));
1617 ctx->errnum++;
1618 }
1619 }
1620 break;
1621 case NODE_ACTION:
1622 node_const_array__copy(&node->data.action.capts, capts);
1623 break;
1624 case NODE_ERROR:
1625 node_const_array__copy(&node->data.error.capts, capts);
1626 verify_captures(ctx, node->data.error.expr, capts);
1627 break;
1628 default:
1629 print_error("Internal error [%d]\n", __LINE__);
1630 exit(-1);
1631 }
1632 if (b) {
1633 node_const_array__term(&a);
1634 }
1635 }
1636
dump_escaped_string(const char * str)1637 static void dump_escaped_string(const char *str) {
1638 char s[5];
1639 if (str == NULL) {
1640 fprintf(stdout, "null");
1641 return;
1642 }
1643 while (*str) {
1644 fprintf(stdout, "%s", escape_character(*str++, &s));
1645 }
1646 }
1647
dump_integer_value(size_t value)1648 static void dump_integer_value(size_t value) {
1649 if (value == VOID_VALUE) {
1650 fprintf(stdout, "void");
1651 }
1652 else {
1653 fprintf(stdout, FMT_LU, (ulong_t)value);
1654 }
1655 }
1656
dump_node(context_t * ctx,const node_t * node,const int indent)1657 static void dump_node(context_t *ctx, const node_t *node, const int indent) {
1658 if (node == NULL) return;
1659 switch (node->type) {
1660 case NODE_RULE:
1661 fprintf(stdout, "%*sRule(name:'%s', ref:%d, vars.len:" FMT_LU ", capts.len:" FMT_LU ", codes.len:" FMT_LU ") {\n",
1662 indent, "", node->data.rule.name, node->data.rule.ref,
1663 (ulong_t)node->data.rule.vars.len, (ulong_t)node->data.rule.capts.len, (ulong_t)node->data.rule.codes.len);
1664 dump_node(ctx, node->data.rule.expr, indent + 2);
1665 fprintf(stdout, "%*s}\n", indent, "");
1666 break;
1667 case NODE_REFERENCE:
1668 fprintf(stdout, "%*sReference(var:'%s', index:", indent, "", node->data.reference.var);
1669 dump_integer_value(node->data.reference.index);
1670 fprintf(stdout, ", name:'%s', rule:'%s')\n", node->data.reference.name,
1671 (node->data.reference.rule) ? node->data.reference.rule->data.rule.name : NULL);
1672 break;
1673 case NODE_STRING:
1674 fprintf(stdout, "%*sString(value:'", indent, "");
1675 dump_escaped_string(node->data.string.value);
1676 fprintf(stdout, "')\n");
1677 break;
1678 case NODE_CHARCLASS:
1679 fprintf(stdout, "%*sCharclass(value:'", indent, "");
1680 dump_escaped_string(node->data.charclass.value);
1681 fprintf(stdout, "')\n");
1682 break;
1683 case NODE_QUANTITY:
1684 fprintf(stdout, "%*sQuantity(min:%d, max%d) {\n", indent, "", node->data.quantity.min, node->data.quantity.max);
1685 dump_node(ctx, node->data.quantity.expr, indent + 2);
1686 fprintf(stdout, "%*s}\n", indent, "");
1687 break;
1688 case NODE_PREDICATE:
1689 fprintf(stdout, "%*sPredicate(neg:%d) {\n", indent, "", node->data.predicate.neg);
1690 dump_node(ctx, node->data.predicate.expr, indent + 2);
1691 fprintf(stdout, "%*s}\n", indent, "");
1692 break;
1693 case NODE_SEQUENCE:
1694 fprintf(stdout, "%*sSequence(max:" FMT_LU ", len:" FMT_LU ") {\n",
1695 indent, "", (ulong_t)node->data.sequence.nodes.max, (ulong_t)node->data.sequence.nodes.len);
1696 {
1697 size_t i;
1698 for (i = 0; i < node->data.sequence.nodes.len; i++) {
1699 dump_node(ctx, node->data.sequence.nodes.buf[i], indent + 2);
1700 }
1701 }
1702 fprintf(stdout, "%*s}\n", indent, "");
1703 break;
1704 case NODE_ALTERNATE:
1705 fprintf(stdout, "%*sAlternate(max:" FMT_LU ", len:" FMT_LU ") {\n",
1706 indent, "", (ulong_t)node->data.alternate.nodes.max, (ulong_t)node->data.alternate.nodes.len);
1707 {
1708 size_t i;
1709 for (i = 0; i < node->data.alternate.nodes.len; i++) {
1710 dump_node(ctx, node->data.alternate.nodes.buf[i], indent + 2);
1711 }
1712 }
1713 fprintf(stdout, "%*s}\n", indent, "");
1714 break;
1715 case NODE_CAPTURE:
1716 fprintf(stdout, "%*sCapture(index:", indent, "");
1717 dump_integer_value(node->data.capture.index);
1718 fprintf(stdout, ") {\n");
1719 dump_node(ctx, node->data.capture.expr, indent + 2);
1720 fprintf(stdout, "%*s}\n", indent, "");
1721 break;
1722 case NODE_EXPAND:
1723 fprintf(stdout, "%*sExpand(index:", indent, "");
1724 dump_integer_value(node->data.expand.index);
1725 fprintf(stdout, ")\n");
1726 break;
1727 case NODE_ACTION:
1728 fprintf(stdout, "%*sAction(index:", indent, "");
1729 dump_integer_value(node->data.action.index);
1730 fprintf(stdout, ", code:{");
1731 dump_escaped_string(node->data.action.code.text);
1732 fprintf(stdout, "}, vars:");
1733 if (node->data.action.vars.len + node->data.action.capts.len > 0) {
1734 size_t i;
1735 fprintf(stdout, "\n");
1736 for (i = 0; i < node->data.action.vars.len; i++) {
1737 fprintf(stdout, "%*s'%s'\n", indent + 2, "", node->data.action.vars.buf[i]->data.reference.var);
1738 }
1739 for (i = 0; i < node->data.action.capts.len; i++) {
1740 fprintf(stdout, "%*s$" FMT_LU "\n", indent + 2, "", (ulong_t)(node->data.action.capts.buf[i]->data.capture.index + 1));
1741 }
1742 fprintf(stdout, "%*s)\n", indent, "");
1743 }
1744 else {
1745 fprintf(stdout, "none)\n");
1746 }
1747 break;
1748 case NODE_ERROR:
1749 fprintf(stdout, "%*sError(index:", indent, "");
1750 dump_integer_value(node->data.error.index);
1751 fprintf(stdout, ", code:{");
1752 dump_escaped_string(node->data.error.code.text);
1753 fprintf(stdout, "}, vars:\n");
1754 {
1755 size_t i;
1756 for (i = 0; i < node->data.error.vars.len; i++) {
1757 fprintf(stdout, "%*s'%s'\n", indent + 2, "", node->data.error.vars.buf[i]->data.reference.var);
1758 }
1759 for (i = 0; i < node->data.error.capts.len; i++) {
1760 fprintf(stdout, "%*s$" FMT_LU "\n", indent + 2, "", (ulong_t)(node->data.error.capts.buf[i]->data.capture.index + 1));
1761 }
1762 }
1763 fprintf(stdout, "%*s) {\n", indent, "");
1764 dump_node(ctx, node->data.error.expr, indent + 2);
1765 fprintf(stdout, "%*s}\n", indent, "");
1766 break;
1767 default:
1768 print_error("%*sInternal error [%d]\n", indent, "", __LINE__);
1769 exit(-1);
1770 }
1771 }
1772
refill_buffer(context_t * ctx,size_t num)1773 static size_t refill_buffer(context_t *ctx, size_t num) {
1774 if (ctx->buffer.len >= ctx->bufcur + num) return ctx->buffer.len - ctx->bufcur;
1775 while (ctx->buffer.len < ctx->bufcur + num) {
1776 const int c = fgetc_e(ctx->ifile);
1777 if (c == EOF) break;
1778 char_array__add(&ctx->buffer, (char)c);
1779 }
1780 return ctx->buffer.len - ctx->bufcur;
1781 }
1782
commit_buffer(context_t * ctx)1783 static void commit_buffer(context_t *ctx) {
1784 assert(ctx->buffer.len >= ctx->bufcur);
1785 if (ctx->linepos < ctx->bufpos + ctx->bufcur)
1786 ctx->charnum += ctx->opts.ascii ? ctx->bufcur : count_characters(ctx->buffer.buf, 0, ctx->bufcur);
1787 memmove(ctx->buffer.buf, ctx->buffer.buf + ctx->bufcur, ctx->buffer.len - ctx->bufcur);
1788 ctx->buffer.len -= ctx->bufcur;
1789 ctx->bufpos += ctx->bufcur;
1790 ctx->bufcur = 0;
1791 }
1792
match_eof(context_t * ctx)1793 static bool_t match_eof(context_t *ctx) {
1794 return (refill_buffer(ctx, 1) < 1) ? TRUE : FALSE;
1795 }
1796
match_eol(context_t * ctx)1797 static bool_t match_eol(context_t *ctx) {
1798 if (refill_buffer(ctx, 1) >= 1) {
1799 switch (ctx->buffer.buf[ctx->bufcur]) {
1800 case '\n':
1801 ctx->bufcur++;
1802 ctx->linenum++;
1803 ctx->charnum = 0;
1804 ctx->linepos = ctx->bufpos + ctx->bufcur;
1805 return TRUE;
1806 case '\r':
1807 ctx->bufcur++;
1808 if (refill_buffer(ctx, 1) >= 1) {
1809 if (ctx->buffer.buf[ctx->bufcur] == '\n') ctx->bufcur++;
1810 }
1811 ctx->linenum++;
1812 ctx->charnum = 0;
1813 ctx->linepos = ctx->bufpos + ctx->bufcur;
1814 return TRUE;
1815 }
1816 }
1817 return FALSE;
1818 }
1819
match_character(context_t * ctx,char ch)1820 static bool_t match_character(context_t *ctx, char ch) {
1821 if (refill_buffer(ctx, 1) >= 1) {
1822 if (ctx->buffer.buf[ctx->bufcur] == ch) {
1823 ctx->bufcur++;
1824 return TRUE;
1825 }
1826 }
1827 return FALSE;
1828 }
1829
match_character_range(context_t * ctx,char min,char max)1830 static bool_t match_character_range(context_t *ctx, char min, char max) {
1831 if (refill_buffer(ctx, 1) >= 1) {
1832 const char c = ctx->buffer.buf[ctx->bufcur];
1833 if (c >= min && c <= max) {
1834 ctx->bufcur++;
1835 return TRUE;
1836 }
1837 }
1838 return FALSE;
1839 }
1840
match_character_set(context_t * ctx,const char * chs)1841 static bool_t match_character_set(context_t *ctx, const char *chs) {
1842 if (refill_buffer(ctx, 1) >= 1) {
1843 const char c = ctx->buffer.buf[ctx->bufcur];
1844 size_t i;
1845 for (i = 0; chs[i]; i++) {
1846 if (c == chs[i]) {
1847 ctx->bufcur++;
1848 return TRUE;
1849 }
1850 }
1851 }
1852 return FALSE;
1853 }
1854
match_character_any(context_t * ctx)1855 static bool_t match_character_any(context_t *ctx) {
1856 if (refill_buffer(ctx, 1) >= 1) {
1857 ctx->bufcur++;
1858 return TRUE;
1859 }
1860 return FALSE;
1861 }
1862
match_string(context_t * ctx,const char * str)1863 static bool_t match_string(context_t *ctx, const char *str) {
1864 const size_t n = strlen(str);
1865 if (refill_buffer(ctx, n) >= n) {
1866 if (strncmp(ctx->buffer.buf + ctx->bufcur, str, n) == 0) {
1867 ctx->bufcur += n;
1868 return TRUE;
1869 }
1870 }
1871 return FALSE;
1872 }
1873
match_blank(context_t * ctx)1874 static bool_t match_blank(context_t *ctx) {
1875 return match_character_set(ctx, " \t\v\f");
1876 }
1877
match_section_line_(context_t * ctx,const char * head)1878 static bool_t match_section_line_(context_t *ctx, const char *head) {
1879 if (match_string(ctx, head)) {
1880 while (!match_eol(ctx) && !match_eof(ctx)) match_character_any(ctx);
1881 return TRUE;
1882 }
1883 return FALSE;
1884 }
1885
match_section_line_continuable_(context_t * ctx,const char * head)1886 static bool_t match_section_line_continuable_(context_t *ctx, const char *head) {
1887 if (match_string(ctx, head)) {
1888 while (!match_eof(ctx)) {
1889 const size_t p = ctx->bufcur;
1890 if (match_eol(ctx)) {
1891 if (ctx->buffer.buf[p - 1] != '\\') break;
1892 }
1893 else {
1894 match_character_any(ctx);
1895 }
1896 }
1897 return TRUE;
1898 }
1899 return FALSE;
1900 }
1901
match_section_block_(context_t * ctx,const char * left,const char * right,const char * name)1902 static bool_t match_section_block_(context_t *ctx, const char *left, const char *right, const char *name) {
1903 const size_t l = ctx->linenum;
1904 const size_t m = column_number(ctx);
1905 if (match_string(ctx, left)) {
1906 while (!match_string(ctx, right)) {
1907 if (match_eof(ctx)) {
1908 print_error("%s:" FMT_LU ":" FMT_LU ": Premature EOF in %s\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1), name);
1909 ctx->errnum++;
1910 break;
1911 }
1912 if (!match_eol(ctx)) match_character_any(ctx);
1913 }
1914 return TRUE;
1915 }
1916 return FALSE;
1917 }
1918
match_quotation_(context_t * ctx,const char * left,const char * right,const char * name)1919 static bool_t match_quotation_(context_t *ctx, const char *left, const char *right, const char *name) {
1920 const size_t l = ctx->linenum;
1921 const size_t m = column_number(ctx);
1922 if (match_string(ctx, left)) {
1923 while (!match_string(ctx, right)) {
1924 if (match_eof(ctx)) {
1925 print_error("%s:" FMT_LU ":" FMT_LU ": Premature EOF in %s\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1), name);
1926 ctx->errnum++;
1927 break;
1928 }
1929 if (match_character(ctx, '\\')) {
1930 if (!match_eol(ctx)) match_character_any(ctx);
1931 }
1932 else {
1933 if (match_eol(ctx)) {
1934 print_error("%s:" FMT_LU ":" FMT_LU ": Premature EOL in %s\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1), name);
1935 ctx->errnum++;
1936 break;
1937 }
1938 match_character_any(ctx);
1939 }
1940 }
1941 return TRUE;
1942 }
1943 return FALSE;
1944 }
1945
match_directive_c(context_t * ctx)1946 static bool_t match_directive_c(context_t *ctx) {
1947 return match_section_line_continuable_(ctx, "#");
1948 }
1949
match_comment(context_t * ctx)1950 static bool_t match_comment(context_t *ctx) {
1951 return match_section_line_(ctx, "#");
1952 }
1953
match_comment_c(context_t * ctx)1954 static bool_t match_comment_c(context_t *ctx) {
1955 return match_section_block_(ctx, "/*", "*/", "C comment");
1956 }
1957
match_comment_cxx(context_t * ctx)1958 static bool_t match_comment_cxx(context_t *ctx) {
1959 return match_section_line_(ctx, "//");
1960 }
1961
match_quotation_single(context_t * ctx)1962 static bool_t match_quotation_single(context_t *ctx) {
1963 return match_quotation_(ctx, "\'", "\'", "single quotation");
1964 }
1965
match_quotation_double(context_t * ctx)1966 static bool_t match_quotation_double(context_t *ctx) {
1967 return match_quotation_(ctx, "\"", "\"", "double quotation");
1968 }
1969
match_character_class(context_t * ctx)1970 static bool_t match_character_class(context_t *ctx) {
1971 return match_quotation_(ctx, "[", "]", "character class");
1972 }
1973
match_spaces(context_t * ctx)1974 static bool_t match_spaces(context_t *ctx) {
1975 size_t n = 0;
1976 while (match_blank(ctx) || match_eol(ctx) || match_comment(ctx)) n++;
1977 return (n > 0) ? TRUE : FALSE;
1978 }
1979
match_number(context_t * ctx)1980 static bool_t match_number(context_t *ctx) {
1981 if (match_character_range(ctx, '0', '9')) {
1982 while (match_character_range(ctx, '0', '9'));
1983 return TRUE;
1984 }
1985 return FALSE;
1986 }
1987
match_identifier(context_t * ctx)1988 static bool_t match_identifier(context_t *ctx) {
1989 if (
1990 match_character_range(ctx, 'a', 'z') ||
1991 match_character_range(ctx, 'A', 'Z') ||
1992 match_character(ctx, '_')
1993 ) {
1994 while (
1995 match_character_range(ctx, 'a', 'z') ||
1996 match_character_range(ctx, 'A', 'Z') ||
1997 match_character_range(ctx, '0', '9') ||
1998 match_character(ctx, '_')
1999 );
2000 return TRUE;
2001 }
2002 return FALSE;
2003 }
2004
match_code_block(context_t * ctx)2005 static bool_t match_code_block(context_t *ctx) {
2006 const size_t l = ctx->linenum;
2007 const size_t m = column_number(ctx);
2008 if (match_character(ctx, '{')) {
2009 int d = 1;
2010 for (;;) {
2011 if (match_eof(ctx)) {
2012 print_error("%s:" FMT_LU ":" FMT_LU ": Premature EOF in code block\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1));
2013 ctx->errnum++;
2014 break;
2015 }
2016 if (
2017 match_directive_c(ctx) ||
2018 match_comment_c(ctx) ||
2019 match_comment_cxx(ctx) ||
2020 match_quotation_single(ctx) ||
2021 match_quotation_double(ctx)
2022 ) continue;
2023 if (match_character(ctx, '{')) {
2024 d++;
2025 }
2026 else if (match_character(ctx, '}')) {
2027 d--;
2028 if (d == 0) break;
2029 }
2030 else {
2031 if (!match_eol(ctx)) {
2032 if (match_character(ctx, '$')) {
2033 ctx->buffer.buf[ctx->bufcur - 1] = '_';
2034 }
2035 else {
2036 match_character_any(ctx);
2037 }
2038 }
2039 }
2040 }
2041 return TRUE;
2042 }
2043 return FALSE;
2044 }
2045
match_footer_start(context_t * ctx)2046 static bool_t match_footer_start(context_t *ctx) {
2047 return match_string(ctx, "%%");
2048 }
2049
2050 static node_t *parse_expression(context_t *ctx, node_t *rule);
2051
parse_primary(context_t * ctx,node_t * rule)2052 static node_t *parse_primary(context_t *ctx, node_t *rule) {
2053 const size_t p = ctx->bufcur;
2054 const size_t l = ctx->linenum;
2055 const size_t m = column_number(ctx);
2056 const size_t n = ctx->charnum;
2057 const size_t o = ctx->linepos;
2058 node_t *n_p = NULL;
2059 if (match_identifier(ctx)) {
2060 const size_t q = ctx->bufcur;
2061 size_t r = VOID_VALUE, s = VOID_VALUE;
2062 match_spaces(ctx);
2063 if (match_character(ctx, ':')) {
2064 match_spaces(ctx);
2065 r = ctx->bufcur;
2066 if (!match_identifier(ctx)) goto EXCEPTION;
2067 s = ctx->bufcur;
2068 match_spaces(ctx);
2069 }
2070 if (match_string(ctx, "<-")) goto EXCEPTION;
2071 n_p = create_node(NODE_REFERENCE);
2072 if (r == VOID_VALUE) {
2073 assert(q >= p);
2074 n_p->data.reference.var = NULL;
2075 n_p->data.reference.index = VOID_VALUE;
2076 n_p->data.reference.name = strndup_e(ctx->buffer.buf + p, q - p);
2077 }
2078 else {
2079 assert(s != VOID_VALUE); /* s should have a valid value when r has a valid value */
2080 assert(q >= p);
2081 n_p->data.reference.var = strndup_e(ctx->buffer.buf + p, q - p);
2082 if (n_p->data.reference.var[0] == '_') {
2083 print_error("%s:" FMT_LU ":" FMT_LU ": Leading underscore in variable name '%s'\n",
2084 ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1), n_p->data.reference.var);
2085 ctx->errnum++;
2086 }
2087 {
2088 size_t i;
2089 for (i = 0; i < rule->data.rule.vars.len; i++) {
2090 assert(rule->data.rule.vars.buf[i]->type == NODE_REFERENCE);
2091 if (strcmp(n_p->data.reference.var, rule->data.rule.vars.buf[i]->data.reference.var) == 0) break;
2092 }
2093 if (i == rule->data.rule.vars.len) node_const_array__add(&rule->data.rule.vars, n_p);
2094 n_p->data.reference.index = i;
2095 }
2096 assert(s >= r);
2097 n_p->data.reference.name = strndup_e(ctx->buffer.buf + r, s - r);
2098 }
2099 n_p->data.reference.line = l;
2100 n_p->data.reference.col = m;
2101 }
2102 else if (match_character(ctx, '(')) {
2103 match_spaces(ctx);
2104 n_p = parse_expression(ctx, rule);
2105 if (n_p == NULL) goto EXCEPTION;
2106 if (!match_character(ctx, ')')) goto EXCEPTION;
2107 match_spaces(ctx);
2108 }
2109 else if (match_character(ctx, '<')) {
2110 match_spaces(ctx);
2111 n_p = create_node(NODE_CAPTURE);
2112 n_p->data.capture.index = rule->data.rule.capts.len;
2113 node_const_array__add(&rule->data.rule.capts, n_p);
2114 n_p->data.capture.expr = parse_expression(ctx, rule);
2115 if (n_p->data.capture.expr == NULL || !match_character(ctx, '>')) {
2116 rule->data.rule.capts.len = n_p->data.capture.index;
2117 goto EXCEPTION;
2118 }
2119 match_spaces(ctx);
2120 }
2121 else if (match_character(ctx, '$')) {
2122 size_t p;
2123 match_spaces(ctx);
2124 p = ctx->bufcur;
2125 if (match_number(ctx)) {
2126 const size_t q = ctx->bufcur;
2127 char *s;
2128 match_spaces(ctx);
2129 n_p = create_node(NODE_EXPAND);
2130 assert(q >= p);
2131 s = strndup_e(ctx->buffer.buf + p, q - p);
2132 n_p->data.expand.index = string_to_size_t(s);
2133 if (n_p->data.expand.index == VOID_VALUE) {
2134 print_error("%s:" FMT_LU ":" FMT_LU ": Invalid unsigned number '%s'\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1), s);
2135 ctx->errnum++;
2136 }
2137 else if (n_p->data.expand.index == 0) {
2138 print_error("%s:" FMT_LU ":" FMT_LU ": 0 not allowed\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1));
2139 ctx->errnum++;
2140 }
2141 else if (s[0] == '0') {
2142 print_error("%s:" FMT_LU ":" FMT_LU ": 0-prefixed number not allowed\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1));
2143 ctx->errnum++;
2144 n_p->data.expand.index = 0;
2145 }
2146 free(s);
2147 if (n_p->data.expand.index > 0 && n_p->data.expand.index != VOID_VALUE) {
2148 n_p->data.expand.index--;
2149 n_p->data.expand.line = l;
2150 n_p->data.expand.col = m;
2151 }
2152 }
2153 else {
2154 goto EXCEPTION;
2155 }
2156 }
2157 else if (match_character(ctx, '.')) {
2158 match_spaces(ctx);
2159 n_p = create_node(NODE_CHARCLASS);
2160 n_p->data.charclass.value = NULL;
2161 if (!ctx->opts.ascii) {
2162 ctx->flags |= CODE_FLAG__UTF8_CHARCLASS_USED;
2163 }
2164 }
2165 else if (match_character_class(ctx)) {
2166 const size_t q = ctx->bufcur;
2167 match_spaces(ctx);
2168 n_p = create_node(NODE_CHARCLASS);
2169 n_p->data.charclass.value = strndup_e(ctx->buffer.buf + p + 1, q - p - 2);
2170 if (!unescape_string(n_p->data.charclass.value, TRUE)) {
2171 print_error("%s:" FMT_LU ":" FMT_LU ": Illegal escape sequence\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1));
2172 ctx->errnum++;
2173 }
2174 if (!ctx->opts.ascii && !is_valid_utf8_string(n_p->data.charclass.value)) {
2175 print_error("%s:" FMT_LU ":" FMT_LU ": Invalid UTF-8 string\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1));
2176 ctx->errnum++;
2177 }
2178 if (!ctx->opts.ascii && n_p->data.charclass.value[0] != '\0') {
2179 ctx->flags |= CODE_FLAG__UTF8_CHARCLASS_USED;
2180 }
2181 }
2182 else if (match_quotation_single(ctx) || match_quotation_double(ctx)) {
2183 const size_t q = ctx->bufcur;
2184 match_spaces(ctx);
2185 n_p = create_node(NODE_STRING);
2186 n_p->data.string.value = strndup_e(ctx->buffer.buf + p + 1, q - p - 2);
2187 if (!unescape_string(n_p->data.string.value, FALSE)) {
2188 print_error("%s:" FMT_LU ":" FMT_LU ": Illegal escape sequence\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1));
2189 ctx->errnum++;
2190 }
2191 if (!ctx->opts.ascii && !is_valid_utf8_string(n_p->data.string.value)) {
2192 print_error("%s:" FMT_LU ":" FMT_LU ": Invalid UTF-8 string\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1));
2193 ctx->errnum++;
2194 }
2195 }
2196 else if (match_code_block(ctx)) {
2197 const size_t q = ctx->bufcur;
2198 match_spaces(ctx);
2199 n_p = create_node(NODE_ACTION);
2200 n_p->data.action.code.text = strndup_e(ctx->buffer.buf + p + 1, q - p - 2);
2201 n_p->data.action.code.len = find_trailing_blanks(n_p->data.action.code.text);
2202 n_p->data.action.code.line = l;
2203 n_p->data.action.code.col = m;
2204 n_p->data.action.index = rule->data.rule.codes.len;
2205 node_const_array__add(&rule->data.rule.codes, n_p);
2206 }
2207 else {
2208 goto EXCEPTION;
2209 }
2210 return n_p;
2211
2212 EXCEPTION:;
2213 destroy_node(n_p);
2214 ctx->bufcur = p;
2215 ctx->linenum = l;
2216 ctx->charnum = n;
2217 ctx->linepos = o;
2218 return NULL;
2219 }
2220
parse_term(context_t * ctx,node_t * rule)2221 static node_t *parse_term(context_t *ctx, node_t *rule) {
2222 const size_t p = ctx->bufcur;
2223 const size_t l = ctx->linenum;
2224 const size_t n = ctx->charnum;
2225 const size_t o = ctx->linepos;
2226 node_t *n_p = NULL;
2227 node_t *n_q = NULL;
2228 node_t *n_r = NULL;
2229 node_t *n_t = NULL;
2230 const char t = match_character(ctx, '&') ? '&' : match_character(ctx, '!') ? '!' : '\0';
2231 if (t) match_spaces(ctx);
2232 n_p = parse_primary(ctx, rule);
2233 if (n_p == NULL) goto EXCEPTION;
2234 if (match_character(ctx, '*')) {
2235 match_spaces(ctx);
2236 n_q = create_node(NODE_QUANTITY);
2237 n_q->data.quantity.min = 0;
2238 n_q->data.quantity.max = -1;
2239 n_q->data.quantity.expr = n_p;
2240 }
2241 else if (match_character(ctx, '+')) {
2242 match_spaces(ctx);
2243 n_q = create_node(NODE_QUANTITY);
2244 n_q->data.quantity.min = 1;
2245 n_q->data.quantity.max = -1;
2246 n_q->data.quantity.expr = n_p;
2247 }
2248 else if (match_character(ctx, '?')) {
2249 match_spaces(ctx);
2250 n_q = create_node(NODE_QUANTITY);
2251 n_q->data.quantity.min = 0;
2252 n_q->data.quantity.max = 1;
2253 n_q->data.quantity.expr = n_p;
2254 }
2255 else {
2256 n_q = n_p;
2257 }
2258 switch (t) {
2259 case '&':
2260 n_r = create_node(NODE_PREDICATE);
2261 n_r->data.predicate.neg = FALSE;
2262 n_r->data.predicate.expr = n_q;
2263 break;
2264 case '!':
2265 n_r = create_node(NODE_PREDICATE);
2266 n_r->data.predicate.neg = TRUE;
2267 n_r->data.predicate.expr = n_q;
2268 break;
2269 default:
2270 n_r = n_q;
2271 }
2272 if (match_character(ctx, '~')) {
2273 size_t p, l, m;
2274 match_spaces(ctx);
2275 p = ctx->bufcur;
2276 l = ctx->linenum;
2277 m = column_number(ctx);
2278 if (match_code_block(ctx)) {
2279 const size_t q = ctx->bufcur;
2280 match_spaces(ctx);
2281 n_t = create_node(NODE_ERROR);
2282 n_t->data.error.expr = n_r;
2283 n_t->data.error.code.text = strndup_e(ctx->buffer.buf + p + 1, q - p - 2);
2284 n_t->data.error.code.len = find_trailing_blanks(n_t->data.error.code.text);
2285 n_t->data.error.code.line = l;
2286 n_t->data.error.code.col = m;
2287 n_t->data.error.index = rule->data.rule.codes.len;
2288 node_const_array__add(&rule->data.rule.codes, n_t);
2289 }
2290 else {
2291 goto EXCEPTION;
2292 }
2293 }
2294 else {
2295 n_t = n_r;
2296 }
2297 return n_t;
2298
2299 EXCEPTION:;
2300 destroy_node(n_r);
2301 ctx->bufcur = p;
2302 ctx->linenum = l;
2303 ctx->charnum = n;
2304 ctx->linepos = o;
2305 return NULL;
2306 }
2307
parse_sequence(context_t * ctx,node_t * rule)2308 static node_t *parse_sequence(context_t *ctx, node_t *rule) {
2309 const size_t p = ctx->bufcur;
2310 const size_t l = ctx->linenum;
2311 const size_t n = ctx->charnum;
2312 const size_t o = ctx->linepos;
2313 node_array_t *a_t = NULL;
2314 node_t *n_t = NULL;
2315 node_t *n_u = NULL;
2316 node_t *n_s = NULL;
2317 n_t = parse_term(ctx, rule);
2318 if (n_t == NULL) goto EXCEPTION;
2319 n_u = parse_term(ctx, rule);
2320 if (n_u != NULL) {
2321 n_s = create_node(NODE_SEQUENCE);
2322 a_t = &n_s->data.sequence.nodes;
2323 node_array__add(a_t, n_t);
2324 node_array__add(a_t, n_u);
2325 while ((n_t = parse_term(ctx, rule)) != NULL) {
2326 node_array__add(a_t, n_t);
2327 }
2328 }
2329 else {
2330 n_s = n_t;
2331 }
2332 return n_s;
2333
2334 EXCEPTION:;
2335 ctx->bufcur = p;
2336 ctx->linenum = l;
2337 ctx->charnum = n;
2338 ctx->linepos = o;
2339 return NULL;
2340 }
2341
parse_expression(context_t * ctx,node_t * rule)2342 static node_t *parse_expression(context_t *ctx, node_t *rule) {
2343 const size_t p = ctx->bufcur;
2344 const size_t l = ctx->linenum;
2345 const size_t n = ctx->charnum;
2346 const size_t o = ctx->linepos;
2347 size_t q;
2348 node_array_t *a_s = NULL;
2349 node_t *n_s = NULL;
2350 node_t *n_e = NULL;
2351 n_s = parse_sequence(ctx, rule);
2352 if (n_s == NULL) goto EXCEPTION;
2353 q = ctx->bufcur;
2354 if (match_character(ctx, '/')) {
2355 ctx->bufcur = q;
2356 n_e = create_node(NODE_ALTERNATE);
2357 a_s = &n_e->data.alternate.nodes;
2358 node_array__add(a_s, n_s);
2359 while (match_character(ctx, '/')) {
2360 match_spaces(ctx);
2361 n_s = parse_sequence(ctx, rule);
2362 if (n_s == NULL) goto EXCEPTION;
2363 node_array__add(a_s, n_s);
2364 }
2365 }
2366 else {
2367 n_e = n_s;
2368 }
2369 return n_e;
2370
2371 EXCEPTION:;
2372 destroy_node(n_e);
2373 ctx->bufcur = p;
2374 ctx->linenum = l;
2375 ctx->charnum = n;
2376 ctx->linepos = o;
2377 return NULL;
2378 }
2379
parse_rule(context_t * ctx)2380 static node_t *parse_rule(context_t *ctx) {
2381 const size_t p = ctx->bufcur;
2382 const size_t l = ctx->linenum;
2383 const size_t m = column_number(ctx);
2384 const size_t n = ctx->charnum;
2385 const size_t o = ctx->linepos;
2386 size_t q;
2387 node_t *n_r = NULL;
2388 if (!match_identifier(ctx)) goto EXCEPTION;
2389 q = ctx->bufcur;
2390 match_spaces(ctx);
2391 if (!match_string(ctx, "<-")) goto EXCEPTION;
2392 match_spaces(ctx);
2393 n_r = create_node(NODE_RULE);
2394 n_r->data.rule.expr = parse_expression(ctx, n_r);
2395 if (n_r->data.rule.expr == NULL) goto EXCEPTION;
2396 assert(q >= p);
2397 n_r->data.rule.name = strndup_e(ctx->buffer.buf + p, q - p);
2398 n_r->data.rule.line = l;
2399 n_r->data.rule.col = m;
2400 return n_r;
2401
2402 EXCEPTION:;
2403 destroy_node(n_r);
2404 ctx->bufcur = p;
2405 ctx->linenum = l;
2406 ctx->charnum = n;
2407 ctx->linepos = o;
2408 return NULL;
2409 }
2410
get_value_type(context_t * ctx)2411 static const char *get_value_type(context_t *ctx) {
2412 return (ctx->vtype && ctx->vtype[0]) ? ctx->vtype : "int";
2413 }
2414
get_auxil_type(context_t * ctx)2415 static const char *get_auxil_type(context_t *ctx) {
2416 return (ctx->atype && ctx->atype[0]) ? ctx->atype : "void *";
2417 }
2418
get_prefix(context_t * ctx)2419 static const char *get_prefix(context_t *ctx) {
2420 return (ctx->prefix && ctx->prefix[0]) ? ctx->prefix : "pcc";
2421 }
2422
dump_options(context_t * ctx)2423 static void dump_options(context_t *ctx) {
2424 fprintf(stdout, "value_type: '%s'\n", get_value_type(ctx));
2425 fprintf(stdout, "auxil_type: '%s'\n", get_auxil_type(ctx));
2426 fprintf(stdout, "prefix: '%s'\n", get_prefix(ctx));
2427 }
2428
parse_directive_include_(context_t * ctx,const char * name,code_block_array_t * output1,code_block_array_t * output2)2429 static bool_t parse_directive_include_(context_t *ctx, const char *name, code_block_array_t *output1, code_block_array_t *output2) {
2430 if (!match_string(ctx, name)) return FALSE;
2431 match_spaces(ctx);
2432 {
2433 const size_t p = ctx->bufcur;
2434 const size_t l = ctx->linenum;
2435 const size_t m = column_number(ctx);
2436 if (match_code_block(ctx)) {
2437 const size_t q = ctx->bufcur;
2438 match_spaces(ctx);
2439 if (output1 != NULL) {
2440 code_block_t *c = code_block_array__create_entry(output1);
2441 c->text = strndup_e(ctx->buffer.buf + p + 1, q - p - 2);
2442 c->len = q - p - 2;
2443 c->line = l;
2444 c->col = m;
2445 }
2446 if (output2 != NULL) {
2447 code_block_t *c = code_block_array__create_entry(output2);
2448 c->text = strndup_e(ctx->buffer.buf + p + 1, q - p - 2);
2449 c->len = q - p - 2;
2450 c->line = l;
2451 c->col = m;
2452 }
2453 }
2454 else {
2455 print_error("%s:" FMT_LU ":" FMT_LU ": Illegal %s syntax\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1), name);
2456 ctx->errnum++;
2457 }
2458 }
2459 return TRUE;
2460 }
2461
parse_directive_string_(context_t * ctx,const char * name,char ** output,string_flag_t mode)2462 static bool_t parse_directive_string_(context_t *ctx, const char *name, char **output, string_flag_t mode) {
2463 const size_t l = ctx->linenum;
2464 const size_t m = column_number(ctx);
2465 if (!match_string(ctx, name)) return FALSE;
2466 match_spaces(ctx);
2467 {
2468 char *s = NULL;
2469 const size_t p = ctx->bufcur;
2470 const size_t lv = ctx->linenum;
2471 const size_t mv = column_number(ctx);
2472 size_t q;
2473 if (match_quotation_single(ctx) || match_quotation_double(ctx)) {
2474 q = ctx->bufcur;
2475 match_spaces(ctx);
2476 s = strndup_e(ctx->buffer.buf + p + 1, q - p - 2);
2477 if (!unescape_string(s, FALSE)) {
2478 print_error("%s:" FMT_LU ":" FMT_LU ": Illegal escape sequence\n", ctx->iname, (ulong_t)(lv + 1), (ulong_t)(mv + 1));
2479 ctx->errnum++;
2480 }
2481 }
2482 else {
2483 print_error("%s:" FMT_LU ":" FMT_LU ": Illegal %s syntax\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1), name);
2484 ctx->errnum++;
2485 }
2486 if (s != NULL) {
2487 string_flag_t f = STRING_FLAG__NONE;
2488 bool_t b = TRUE;
2489 remove_leading_blanks(s);
2490 remove_trailing_blanks(s);
2491 assert((mode & ~7) == 0);
2492 if ((mode & STRING_FLAG__NOTEMPTY) && !is_filled_string(s)) {
2493 print_error("%s:" FMT_LU ":" FMT_LU ": Empty string\n", ctx->iname, (ulong_t)(lv + 1), (ulong_t)(mv + 1));
2494 ctx->errnum++;
2495 f |= STRING_FLAG__NOTEMPTY;
2496 }
2497 if ((mode & STRING_FLAG__NOTVOID) && strcmp(s, "void") == 0) {
2498 print_error("%s:" FMT_LU ":" FMT_LU ": 'void' not allowed\n", ctx->iname, (ulong_t)(lv + 1), (ulong_t)(mv + 1));
2499 ctx->errnum++;
2500 f |= STRING_FLAG__NOTVOID;
2501 }
2502 if ((mode & STRING_FLAG__IDENTIFIER) && !is_identifier_string(s)) {
2503 if (!(f & STRING_FLAG__NOTEMPTY)) {
2504 print_error("%s:" FMT_LU ":" FMT_LU ": Invalid identifier\n", ctx->iname, (ulong_t)(lv + 1), (ulong_t)(mv + 1));
2505 ctx->errnum++;
2506 }
2507 f |= STRING_FLAG__IDENTIFIER;
2508 }
2509 if (*output != NULL) {
2510 print_error("%s:" FMT_LU ":" FMT_LU ": Multiple %s definition\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1), name);
2511 ctx->errnum++;
2512 b = FALSE;
2513 }
2514 if (f == STRING_FLAG__NONE && b) {
2515 *output = s;
2516 }
2517 else {
2518 free(s); s = NULL;
2519 }
2520 }
2521 }
2522 return TRUE;
2523 }
2524
parse(context_t * ctx)2525 static bool_t parse(context_t *ctx) {
2526 {
2527 bool_t b = TRUE;
2528 match_spaces(ctx);
2529 for (;;) {
2530 size_t l, m, n, o;
2531 if (match_eof(ctx) || match_footer_start(ctx)) break;
2532 l = ctx->linenum;
2533 m = column_number(ctx);
2534 n = ctx->charnum;
2535 o = ctx->linepos;
2536 if (
2537 parse_directive_include_(ctx, "%earlysource", &ctx->esource, NULL) ||
2538 parse_directive_include_(ctx, "%earlyheader", &ctx->eheader, NULL) ||
2539 parse_directive_include_(ctx, "%earlycommon", &ctx->esource, &ctx->eheader) ||
2540 parse_directive_include_(ctx, "%source", &ctx->source, NULL) ||
2541 parse_directive_include_(ctx, "%header", &ctx->header, NULL) ||
2542 parse_directive_include_(ctx, "%common", &ctx->source, &ctx->header) ||
2543 parse_directive_string_(ctx, "%value", &ctx->vtype, STRING_FLAG__NOTEMPTY | STRING_FLAG__NOTVOID) ||
2544 parse_directive_string_(ctx, "%auxil", &ctx->atype, STRING_FLAG__NOTEMPTY | STRING_FLAG__NOTVOID) ||
2545 parse_directive_string_(ctx, "%prefix", &ctx->prefix, STRING_FLAG__NOTEMPTY | STRING_FLAG__IDENTIFIER)
2546 ) {
2547 b = TRUE;
2548 }
2549 else if (match_character(ctx, '%')) {
2550 print_error("%s:" FMT_LU ":" FMT_LU ": Invalid directive\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1));
2551 ctx->errnum++;
2552 match_identifier(ctx);
2553 match_spaces(ctx);
2554 b = TRUE;
2555 }
2556 else {
2557 node_t *const n_r = parse_rule(ctx);
2558 if (n_r == NULL) {
2559 if (b) {
2560 print_error("%s:" FMT_LU ":" FMT_LU ": Illegal rule syntax\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1));
2561 ctx->errnum++;
2562 b = FALSE;
2563 }
2564 ctx->linenum = l;
2565 ctx->charnum = n;
2566 ctx->linepos = o;
2567 if (!match_identifier(ctx) && !match_spaces(ctx)) match_character_any(ctx);
2568 continue;
2569 }
2570 node_array__add(&ctx->rules, n_r);
2571 b = TRUE;
2572 }
2573 commit_buffer(ctx);
2574 }
2575 commit_buffer(ctx);
2576 }
2577 {
2578 size_t i;
2579 make_rulehash(ctx);
2580 for (i = 0; i < ctx->rules.len; i++) {
2581 link_references(ctx, ctx->rules.buf[i]->data.rule.expr);
2582 }
2583 for (i = 1; i < ctx->rules.len; i++) {
2584 if (ctx->rules.buf[i]->data.rule.ref == 0) {
2585 print_error("%s:" FMT_LU ":" FMT_LU ": Never used rule '%s'\n",
2586 ctx->iname,
2587 (ulong_t)(ctx->rules.buf[i]->data.rule.line + 1), (ulong_t)(ctx->rules.buf[i]->data.rule.col + 1),
2588 ctx->rules.buf[i]->data.rule.name);
2589 ctx->errnum++;
2590 }
2591 else if (ctx->rules.buf[i]->data.rule.ref < 0) {
2592 print_error("%s:" FMT_LU ":" FMT_LU ": Multiple definition of rule '%s'\n",
2593 ctx->iname,
2594 (ulong_t)(ctx->rules.buf[i]->data.rule.line + 1), (ulong_t)(ctx->rules.buf[i]->data.rule.col + 1),
2595 ctx->rules.buf[i]->data.rule.name);
2596 ctx->errnum++;
2597 }
2598 }
2599 }
2600 {
2601 size_t i;
2602 for (i = 0; i < ctx->rules.len; i++) {
2603 verify_variables(ctx, ctx->rules.buf[i]->data.rule.expr, NULL);
2604 verify_captures(ctx, ctx->rules.buf[i]->data.rule.expr, NULL);
2605 }
2606 }
2607 if (ctx->opts.debug) {
2608 size_t i;
2609 for (i = 0; i < ctx->rules.len; i++) {
2610 dump_node(ctx, ctx->rules.buf[i], 0);
2611 }
2612 dump_options(ctx);
2613 }
2614 return (ctx->errnum == 0) ? TRUE : FALSE;
2615 }
2616
generate_matching_string_code(generate_t * gen,const char * value,int onfail,size_t indent,bool_t bare)2617 static code_reach_t generate_matching_string_code(generate_t *gen, const char *value, int onfail, size_t indent, bool_t bare) {
2618 const size_t n = (value != NULL) ? strlen(value) : 0;
2619 if (n > 0) {
2620 char s[5];
2621 if (n > 1) {
2622 size_t i;
2623 stream__write_characters(gen->stream, ' ', indent);
2624 stream__puts(gen->stream, "if (\n");
2625 stream__write_characters(gen->stream, ' ', indent + 4);
2626 stream__printf(gen->stream, "pcc_refill_buffer(ctx, " FMT_LU ") < " FMT_LU " ||\n", (ulong_t)n, (ulong_t)n);
2627 for (i = 0; i < n - 1; i++) {
2628 stream__write_characters(gen->stream, ' ', indent + 4);
2629 stream__printf(gen->stream, "(ctx->buffer.buf + ctx->cur)[" FMT_LU "] != '%s' ||\n", (ulong_t)i, escape_character(value[i], &s));
2630 }
2631 stream__write_characters(gen->stream, ' ', indent + 4);
2632 stream__printf(gen->stream, "(ctx->buffer.buf + ctx->cur)[" FMT_LU "] != '%s'\n", (ulong_t)i, escape_character(value[i], &s));
2633 stream__write_characters(gen->stream, ' ', indent);
2634 stream__printf(gen->stream, ") goto L%04d;\n", onfail);
2635 stream__write_characters(gen->stream, ' ', indent);
2636 stream__printf(gen->stream, "ctx->cur += " FMT_LU ";\n", (ulong_t)n);
2637 return CODE_REACH__BOTH;
2638 }
2639 else {
2640 stream__write_characters(gen->stream, ' ', indent);
2641 stream__puts(gen->stream, "if (\n");
2642 stream__write_characters(gen->stream, ' ', indent + 4);
2643 stream__puts(gen->stream, "pcc_refill_buffer(ctx, 1) < 1 ||\n");
2644 stream__write_characters(gen->stream, ' ', indent + 4);
2645 stream__printf(gen->stream, "ctx->buffer.buf[ctx->cur] != '%s'\n", escape_character(value[0], &s));
2646 stream__write_characters(gen->stream, ' ', indent);
2647 stream__printf(gen->stream, ") goto L%04d;\n", onfail);
2648 stream__write_characters(gen->stream, ' ', indent);
2649 stream__puts(gen->stream, "ctx->cur++;\n");
2650 return CODE_REACH__BOTH;
2651 }
2652 }
2653 else {
2654 /* no code to generate */
2655 return CODE_REACH__ALWAYS_SUCCEED;
2656 }
2657 }
2658
generate_matching_charclass_code(generate_t * gen,const char * value,int onfail,size_t indent,bool_t bare)2659 static code_reach_t generate_matching_charclass_code(generate_t *gen, const char *value, int onfail, size_t indent, bool_t bare) {
2660 assert(gen->ascii);
2661 if (value != NULL) {
2662 const size_t n = strlen(value);
2663 if (n > 0) {
2664 char s[5], t[5];
2665 if (n > 1) {
2666 const bool_t a = (value[0] == '^') ? TRUE : FALSE;
2667 size_t i = a ? 1 : 0;
2668 if (i + 1 == n) { /* fulfilled only if a == TRUE */
2669 stream__write_characters(gen->stream, ' ', indent);
2670 stream__puts(gen->stream, "if (\n");
2671 stream__write_characters(gen->stream, ' ', indent + 4);
2672 stream__puts(gen->stream, "pcc_refill_buffer(ctx, 1) < 1 ||\n");
2673 stream__write_characters(gen->stream, ' ', indent + 4);
2674 stream__printf(gen->stream, "ctx->buffer.buf[ctx->cur] == '%s'\n", escape_character(value[i], &s));
2675 stream__write_characters(gen->stream, ' ', indent);
2676 stream__printf(gen->stream, ") goto L%04d;\n", onfail);
2677 stream__write_characters(gen->stream, ' ', indent);
2678 stream__puts(gen->stream, "ctx->cur++;\n");
2679 return CODE_REACH__BOTH;
2680 }
2681 else {
2682 if (!bare) {
2683 stream__write_characters(gen->stream, ' ', indent);
2684 stream__puts(gen->stream, "{\n");
2685 indent += 4;
2686 }
2687 stream__write_characters(gen->stream, ' ', indent);
2688 stream__puts(gen->stream, "char c;\n");
2689 stream__write_characters(gen->stream, ' ', indent);
2690 stream__printf(gen->stream, "if (pcc_refill_buffer(ctx, 1) < 1) goto L%04d;\n", onfail);
2691 stream__write_characters(gen->stream, ' ', indent);
2692 stream__puts(gen->stream, "c = ctx->buffer.buf[ctx->cur];\n");
2693 if (i + 3 == n && value[i] != '\\' && value[i + 1] == '-') {
2694 stream__write_characters(gen->stream, ' ', indent);
2695 stream__printf(gen->stream,
2696 a ? "if (c >= '%s' && c <= '%s') goto L%04d;\n"
2697 : "if (!(c >= '%s' && c <= '%s')) goto L%04d;\n",
2698 escape_character(value[i], &s), escape_character(value[i + 2], &t), onfail);
2699 }
2700 else {
2701 stream__write_characters(gen->stream, ' ', indent);
2702 stream__puts(gen->stream, a ? "if (\n" : "if (!(\n");
2703 for (; i < n; i++) {
2704 stream__write_characters(gen->stream, ' ', indent + 4);
2705 if (value[i] == '\\' && i + 1 < n) i++;
2706 if (i + 2 < n && value[i + 1] == '-') {
2707 stream__printf(gen->stream, "(c >= '%s' && c <= '%s')%s\n",
2708 escape_character(value[i], &s), escape_character(value[i + 2], &t), (i + 3 == n) ? "" : " ||");
2709 i += 2;
2710 }
2711 else {
2712 stream__printf(gen->stream, "c == '%s'%s\n",
2713 escape_character(value[i], &s), (i + 1 == n) ? "" : " ||");
2714 }
2715 }
2716 stream__write_characters(gen->stream, ' ', indent);
2717 stream__printf(gen->stream, a ? ") goto L%04d;\n" : ")) goto L%04d;\n", onfail);
2718 }
2719 stream__write_characters(gen->stream, ' ', indent);
2720 stream__puts(gen->stream, "ctx->cur++;\n");
2721 if (!bare) {
2722 indent -= 4;
2723 stream__write_characters(gen->stream, ' ', indent);
2724 stream__puts(gen->stream, "}\n");
2725 }
2726 return CODE_REACH__BOTH;
2727 }
2728 }
2729 else {
2730 stream__write_characters(gen->stream, ' ', indent);
2731 stream__puts(gen->stream, "if (\n");
2732 stream__write_characters(gen->stream, ' ', indent + 4);
2733 stream__puts(gen->stream, "pcc_refill_buffer(ctx, 1) < 1 ||\n");
2734 stream__write_characters(gen->stream, ' ', indent + 4);
2735 stream__printf(gen->stream, "ctx->buffer.buf[ctx->cur] != '%s'\n", escape_character(value[0], &s));
2736 stream__write_characters(gen->stream, ' ', indent);
2737 stream__printf(gen->stream, ") goto L%04d;\n", onfail);
2738 stream__write_characters(gen->stream, ' ', indent);
2739 stream__puts(gen->stream, "ctx->cur++;\n");
2740 return CODE_REACH__BOTH;
2741 }
2742 }
2743 else {
2744 stream__write_characters(gen->stream, ' ', indent);
2745 stream__printf(gen->stream, "goto L%04d;\n", onfail);
2746 return CODE_REACH__ALWAYS_FAIL;
2747 }
2748 }
2749 else {
2750 stream__write_characters(gen->stream, ' ', indent);
2751 stream__printf(gen->stream, "if (pcc_refill_buffer(ctx, 1) < 1) goto L%04d;\n", onfail);
2752 stream__write_characters(gen->stream, ' ', indent);
2753 stream__puts(gen->stream, "ctx->cur++;\n");
2754 return CODE_REACH__BOTH;
2755 }
2756 }
2757
generate_matching_utf8_charclass_code(generate_t * gen,const char * value,int onfail,size_t indent,bool_t bare)2758 static code_reach_t generate_matching_utf8_charclass_code(generate_t *gen, const char *value, int onfail, size_t indent, bool_t bare) {
2759 const size_t n = (value != NULL) ? strlen(value) : 0;
2760 if (value == NULL || n > 0) {
2761 const bool_t a = (n > 0 && value[0] == '^') ? TRUE : FALSE;
2762 size_t i = a ? 1 : 0;
2763 if (!bare) {
2764 stream__write_characters(gen->stream, ' ', indent);
2765 stream__puts(gen->stream, "{\n");
2766 indent += 4;
2767 }
2768 stream__write_characters(gen->stream, ' ', indent);
2769 stream__puts(gen->stream, "int u;\n");
2770 stream__write_characters(gen->stream, ' ', indent);
2771 stream__puts(gen->stream, "const size_t n = pcc_get_char_as_utf32(ctx, &u);\n");
2772 stream__write_characters(gen->stream, ' ', indent);
2773 stream__printf(gen->stream, "if (n == 0) goto L%04d;\n", onfail);
2774 if (value != NULL && !(a && n == 1)) { /* not '.' or '[^]' */
2775 int u0 = 0;
2776 bool_t r = FALSE;
2777 stream__write_characters(gen->stream, ' ', indent);
2778 stream__puts(gen->stream, a ? "if (\n" : "if (!(\n");
2779 while (i < n) {
2780 int u = 0;
2781 if (value[i] == '\\' && i + 1 < n) i++;
2782 i += utf8_to_utf32(value + i, &u);
2783 if (r) { /* character range */
2784 stream__write_characters(gen->stream, ' ', indent + 4);
2785 stream__printf(gen->stream, "(u >= 0x%06x && u <= 0x%06x)%s\n", u0, u, (i < n) ? " ||" : "");
2786 u0 = 0;
2787 r = FALSE;
2788 }
2789 else if (
2790 value[i] != '-' ||
2791 i == n - 1 /* the individual '-' character is valid when it is at the first or the last position */
2792 ) { /* single character */
2793 stream__write_characters(gen->stream, ' ', indent + 4);
2794 stream__printf(gen->stream, "u == 0x%06x%s\n", u, (i < n) ? " ||" : "");
2795 u0 = 0;
2796 r = FALSE;
2797 }
2798 else {
2799 assert(value[i] == '-');
2800 i++;
2801 u0 = u;
2802 r = TRUE;
2803 }
2804 }
2805 stream__write_characters(gen->stream, ' ', indent);
2806 stream__printf(gen->stream, a ? ") goto L%04d;\n" : ")) goto L%04d;\n", onfail);
2807 }
2808 stream__write_characters(gen->stream, ' ', indent);
2809 stream__puts(gen->stream, "ctx->cur += n;\n");
2810 if (!bare) {
2811 indent -= 4;
2812 stream__write_characters(gen->stream, ' ', indent);
2813 stream__puts(gen->stream, "}\n");
2814 }
2815 return CODE_REACH__BOTH;
2816 }
2817 else {
2818 stream__write_characters(gen->stream, ' ', indent);
2819 stream__printf(gen->stream, "goto L%04d;\n", onfail);
2820 return CODE_REACH__ALWAYS_FAIL;
2821 }
2822 }
2823
2824 static code_reach_t generate_code(generate_t *gen, const node_t *node, int onfail, size_t indent, bool_t bare);
2825
generate_quantifying_code(generate_t * gen,const node_t * expr,int min,int max,int onfail,size_t indent,bool_t bare)2826 static code_reach_t generate_quantifying_code(generate_t *gen, const node_t *expr, int min, int max, int onfail, size_t indent, bool_t bare) {
2827 if (max > 1 || max < 0) {
2828 code_reach_t r;
2829 if (!bare) {
2830 stream__write_characters(gen->stream, ' ', indent);
2831 stream__puts(gen->stream, "{\n");
2832 indent += 4;
2833 }
2834 if (min > 0) {
2835 stream__write_characters(gen->stream, ' ', indent);
2836 stream__puts(gen->stream, "const size_t p0 = ctx->cur;\n");
2837 stream__write_characters(gen->stream, ' ', indent);
2838 stream__puts(gen->stream, "const size_t n0 = chunk->thunks.len;\n");
2839 }
2840 stream__write_characters(gen->stream, ' ', indent);
2841 stream__puts(gen->stream, "int i;\n");
2842 stream__write_characters(gen->stream, ' ', indent);
2843 if (max < 0)
2844 stream__puts(gen->stream, "for (i = 0;; i++) {\n");
2845 else
2846 stream__printf(gen->stream, "for (i = 0; i < %d; i++) {\n", max);
2847 stream__write_characters(gen->stream, ' ', indent + 4);
2848 stream__puts(gen->stream, "const size_t p = ctx->cur;\n");
2849 stream__write_characters(gen->stream, ' ', indent + 4);
2850 stream__puts(gen->stream, "const size_t n = chunk->thunks.len;\n");
2851 {
2852 const int l = ++gen->label;
2853 r = generate_code(gen, expr, l, indent + 4, FALSE);
2854 stream__write_characters(gen->stream, ' ', indent + 4);
2855 stream__puts(gen->stream, "if (ctx->cur == p) break;\n");
2856 if (r != CODE_REACH__ALWAYS_SUCCEED) {
2857 stream__write_characters(gen->stream, ' ', indent + 4);
2858 stream__puts(gen->stream, "continue;\n");
2859 stream__write_characters(gen->stream, ' ', indent);
2860 stream__printf(gen->stream, "L%04d:;\n", l);
2861 stream__write_characters(gen->stream, ' ', indent + 4);
2862 stream__puts(gen->stream, "ctx->cur = p;\n");
2863 stream__write_characters(gen->stream, ' ', indent + 4);
2864 stream__puts(gen->stream, "pcc_thunk_array__revert(ctx->auxil, &chunk->thunks, n);\n");
2865 stream__write_characters(gen->stream, ' ', indent + 4);
2866 stream__puts(gen->stream, "break;\n");
2867 }
2868 }
2869 stream__write_characters(gen->stream, ' ', indent);
2870 stream__puts(gen->stream, "}\n");
2871 if (min > 0) {
2872 stream__write_characters(gen->stream, ' ', indent);
2873 stream__printf(gen->stream, "if (i < %d) {\n", min);
2874 stream__write_characters(gen->stream, ' ', indent + 4);
2875 stream__puts(gen->stream, "ctx->cur = p0;\n");
2876 stream__write_characters(gen->stream, ' ', indent + 4);
2877 stream__puts(gen->stream, "pcc_thunk_array__revert(ctx->auxil, &chunk->thunks, n0);\n");
2878 stream__write_characters(gen->stream, ' ', indent + 4);
2879 stream__printf(gen->stream, "goto L%04d;\n", onfail);
2880 stream__write_characters(gen->stream, ' ', indent);
2881 stream__puts(gen->stream, "}\n");
2882 }
2883 if (!bare) {
2884 indent -= 4;
2885 stream__write_characters(gen->stream, ' ', indent);
2886 stream__puts(gen->stream, "}\n");
2887 }
2888 return (min > 0) ? ((r == CODE_REACH__ALWAYS_FAIL) ? CODE_REACH__ALWAYS_FAIL : CODE_REACH__BOTH) : CODE_REACH__ALWAYS_SUCCEED;
2889 }
2890 else if (max == 1) {
2891 if (min > 0) {
2892 return generate_code(gen, expr, onfail, indent, bare);
2893 }
2894 else {
2895 if (!bare) {
2896 stream__write_characters(gen->stream, ' ', indent);
2897 stream__puts(gen->stream, "{\n");
2898 indent += 4;
2899 }
2900 stream__write_characters(gen->stream, ' ', indent);
2901 stream__puts(gen->stream, "const size_t p = ctx->cur;\n");
2902 stream__write_characters(gen->stream, ' ', indent);
2903 stream__puts(gen->stream, "const size_t n = chunk->thunks.len;\n");
2904 {
2905 const int l = ++gen->label;
2906 if (generate_code(gen, expr, l, indent, FALSE) != CODE_REACH__ALWAYS_SUCCEED) {
2907 const int m = ++gen->label;
2908 stream__write_characters(gen->stream, ' ', indent);
2909 stream__printf(gen->stream, "goto L%04d;\n", m);
2910 if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
2911 stream__printf(gen->stream, "L%04d:;\n", l);
2912 stream__write_characters(gen->stream, ' ', indent);
2913 stream__puts(gen->stream, "ctx->cur = p;\n");
2914 stream__write_characters(gen->stream, ' ', indent);
2915 stream__puts(gen->stream, "pcc_thunk_array__revert(ctx->auxil, &chunk->thunks, n);\n");
2916 if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
2917 stream__printf(gen->stream, "L%04d:;\n", m);
2918 }
2919 }
2920 if (!bare) {
2921 indent -= 4;
2922 stream__write_characters(gen->stream, ' ', indent);
2923 stream__puts(gen->stream, "}\n");
2924 }
2925 return CODE_REACH__ALWAYS_SUCCEED;
2926 }
2927 }
2928 else {
2929 /* no code to generate */
2930 return CODE_REACH__ALWAYS_SUCCEED;
2931 }
2932 }
2933
generate_predicating_code(generate_t * gen,const node_t * expr,bool_t neg,int onfail,size_t indent,bool_t bare)2934 static code_reach_t generate_predicating_code(generate_t *gen, const node_t *expr, bool_t neg, int onfail, size_t indent, bool_t bare) {
2935 code_reach_t r;
2936 if (!bare) {
2937 stream__write_characters(gen->stream, ' ', indent);
2938 stream__puts(gen->stream, "{\n");
2939 indent += 4;
2940 }
2941 stream__write_characters(gen->stream, ' ', indent);
2942 stream__puts(gen->stream, "const size_t p = ctx->cur;\n");
2943 if (neg) {
2944 const int l = ++gen->label;
2945 r = generate_code(gen, expr, l, indent, FALSE);
2946 if (r != CODE_REACH__ALWAYS_FAIL) {
2947 stream__write_characters(gen->stream, ' ', indent);
2948 stream__puts(gen->stream, "ctx->cur = p;\n");
2949 stream__write_characters(gen->stream, ' ', indent);
2950 stream__printf(gen->stream, "goto L%04d;\n", onfail);
2951 }
2952 if (r != CODE_REACH__ALWAYS_SUCCEED) {
2953 if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
2954 stream__printf(gen->stream, "L%04d:;\n", l);
2955 stream__write_characters(gen->stream, ' ', indent);
2956 stream__puts(gen->stream, "ctx->cur = p;\n");
2957 }
2958 switch (r) {
2959 case CODE_REACH__ALWAYS_SUCCEED: r = CODE_REACH__ALWAYS_FAIL; break;
2960 case CODE_REACH__ALWAYS_FAIL: r = CODE_REACH__ALWAYS_SUCCEED; break;
2961 case CODE_REACH__BOTH: break;
2962 }
2963 }
2964 else {
2965 const int l = ++gen->label;
2966 const int m = ++gen->label;
2967 r = generate_code(gen, expr, l, indent, FALSE);
2968 if (r != CODE_REACH__ALWAYS_FAIL) {
2969 stream__write_characters(gen->stream, ' ', indent);
2970 stream__puts(gen->stream, "ctx->cur = p;\n");
2971 }
2972 if (r == CODE_REACH__BOTH) {
2973 stream__write_characters(gen->stream, ' ', indent);
2974 stream__printf(gen->stream, "goto L%04d;\n", m);
2975 }
2976 if (r != CODE_REACH__ALWAYS_SUCCEED) {
2977 if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
2978 stream__printf(gen->stream, "L%04d:;\n", l);
2979 stream__write_characters(gen->stream, ' ', indent);
2980 stream__puts(gen->stream, "ctx->cur = p;\n");
2981 stream__write_characters(gen->stream, ' ', indent);
2982 stream__printf(gen->stream, "goto L%04d;\n", onfail);
2983 }
2984 if (r == CODE_REACH__BOTH) {
2985 if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
2986 stream__printf(gen->stream, "L%04d:;\n", m);
2987 }
2988 }
2989 if (!bare) {
2990 indent -= 4;
2991 stream__write_characters(gen->stream, ' ', indent);
2992 stream__puts(gen->stream, "}\n");
2993 }
2994 return r;
2995 }
2996
generate_sequential_code(generate_t * gen,const node_array_t * nodes,int onfail,size_t indent,bool_t bare)2997 static code_reach_t generate_sequential_code(generate_t *gen, const node_array_t *nodes, int onfail, size_t indent, bool_t bare) {
2998 bool_t b = FALSE;
2999 size_t i;
3000 for (i = 0; i < nodes->len; i++) {
3001 switch (generate_code(gen, nodes->buf[i], onfail, indent, FALSE)) {
3002 case CODE_REACH__ALWAYS_FAIL:
3003 if (i + 1 < nodes->len) {
3004 stream__write_characters(gen->stream, ' ', indent);
3005 stream__puts(gen->stream, "/* unreachable codes omitted */\n");
3006 }
3007 return CODE_REACH__ALWAYS_FAIL;
3008 case CODE_REACH__ALWAYS_SUCCEED:
3009 break;
3010 default:
3011 b = TRUE;
3012 }
3013 }
3014 return b ? CODE_REACH__BOTH : CODE_REACH__ALWAYS_SUCCEED;
3015 }
3016
generate_alternative_code(generate_t * gen,const node_array_t * nodes,int onfail,size_t indent,bool_t bare)3017 static code_reach_t generate_alternative_code(generate_t *gen, const node_array_t *nodes, int onfail, size_t indent, bool_t bare) {
3018 bool_t b = FALSE;
3019 int m = ++gen->label;
3020 size_t i;
3021 if (!bare) {
3022 stream__write_characters(gen->stream, ' ', indent);
3023 stream__puts(gen->stream, "{\n");
3024 indent += 4;
3025 }
3026 stream__write_characters(gen->stream, ' ', indent);
3027 stream__puts(gen->stream, "const size_t p = ctx->cur;\n");
3028 stream__write_characters(gen->stream, ' ', indent);
3029 stream__puts(gen->stream, "const size_t n = chunk->thunks.len;\n");
3030 for (i = 0; i < nodes->len; i++) {
3031 const bool_t c = (i + 1 < nodes->len) ? TRUE : FALSE;
3032 const int l = ++gen->label;
3033 switch (generate_code(gen, nodes->buf[i], l, indent, FALSE)) {
3034 case CODE_REACH__ALWAYS_SUCCEED:
3035 if (c) {
3036 stream__write_characters(gen->stream, ' ', indent);
3037 stream__puts(gen->stream, "/* unreachable codes omitted */\n");
3038 }
3039 if (b) {
3040 if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
3041 stream__printf(gen->stream, "L%04d:;\n", m);
3042 }
3043 if (!bare) {
3044 indent -= 4;
3045 stream__write_characters(gen->stream, ' ', indent);
3046 stream__puts(gen->stream, "}\n");
3047 }
3048 return CODE_REACH__ALWAYS_SUCCEED;
3049 case CODE_REACH__ALWAYS_FAIL:
3050 break;
3051 default:
3052 b = TRUE;
3053 stream__write_characters(gen->stream, ' ', indent);
3054 stream__printf(gen->stream, "goto L%04d;\n", m);
3055 }
3056 if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
3057 stream__printf(gen->stream, "L%04d:;\n", l);
3058 stream__write_characters(gen->stream, ' ', indent);
3059 stream__puts(gen->stream, "ctx->cur = p;\n");
3060 stream__write_characters(gen->stream, ' ', indent);
3061 stream__puts(gen->stream, "pcc_thunk_array__revert(ctx->auxil, &chunk->thunks, n);\n");
3062 if (!c) {
3063 stream__write_characters(gen->stream, ' ', indent);
3064 stream__printf(gen->stream, "goto L%04d;\n", onfail);
3065 }
3066 }
3067 if (b) {
3068 if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
3069 stream__printf(gen->stream, "L%04d:;\n", m);
3070 }
3071 if (!bare) {
3072 indent -= 4;
3073 stream__write_characters(gen->stream, ' ', indent);
3074 stream__puts(gen->stream, "}\n");
3075 }
3076 return b ? CODE_REACH__BOTH : CODE_REACH__ALWAYS_FAIL;
3077 }
3078
generate_capturing_code(generate_t * gen,const node_t * expr,size_t index,int onfail,size_t indent,bool_t bare)3079 static code_reach_t generate_capturing_code(generate_t *gen, const node_t *expr, size_t index, int onfail, size_t indent, bool_t bare) {
3080 code_reach_t r;
3081 if (!bare) {
3082 stream__write_characters(gen->stream, ' ', indent);
3083 stream__puts(gen->stream, "{\n");
3084 indent += 4;
3085 }
3086 stream__write_characters(gen->stream, ' ', indent);
3087 stream__puts(gen->stream, "const size_t p = ctx->cur;\n");
3088 stream__write_characters(gen->stream, ' ', indent);
3089 stream__puts(gen->stream, "size_t q;\n");
3090 r = generate_code(gen, expr, onfail, indent, FALSE);
3091 stream__write_characters(gen->stream, ' ', indent);
3092 stream__puts(gen->stream, "q = ctx->cur;\n");
3093 stream__write_characters(gen->stream, ' ', indent);
3094 stream__printf(gen->stream, "chunk->capts.buf[" FMT_LU "].range.start = p;\n", (ulong_t)index);
3095 stream__write_characters(gen->stream, ' ', indent);
3096 stream__printf(gen->stream, "chunk->capts.buf[" FMT_LU "].range.end = q;\n", (ulong_t)index);
3097 if (!bare) {
3098 indent -= 4;
3099 stream__write_characters(gen->stream, ' ', indent);
3100 stream__puts(gen->stream, "}\n");
3101 }
3102 return r;
3103 }
3104
generate_expanding_code(generate_t * gen,size_t index,int onfail,size_t indent,bool_t bare)3105 static code_reach_t generate_expanding_code(generate_t *gen, size_t index, int onfail, size_t indent, bool_t bare) {
3106 if (!bare) {
3107 stream__write_characters(gen->stream, ' ', indent);
3108 stream__puts(gen->stream, "{\n");
3109 indent += 4;
3110 }
3111 stream__write_characters(gen->stream, ' ', indent);
3112 stream__printf(gen->stream,
3113 "const size_t n = chunk->capts.buf[" FMT_LU "].range.end - chunk->capts.buf[" FMT_LU "].range.start;\n", (ulong_t)index, (ulong_t)index);
3114 stream__write_characters(gen->stream, ' ', indent);
3115 stream__printf(gen->stream, "if (pcc_refill_buffer(ctx, n) < n) goto L%04d;\n", onfail);
3116 stream__write_characters(gen->stream, ' ', indent);
3117 stream__puts(gen->stream, "if (n > 0) {\n");
3118 stream__write_characters(gen->stream, ' ', indent + 4);
3119 stream__puts(gen->stream, "const char *const p = ctx->buffer.buf + ctx->cur;\n");
3120 stream__write_characters(gen->stream, ' ', indent + 4);
3121 stream__printf(gen->stream, "const char *const q = ctx->buffer.buf + chunk->capts.buf[" FMT_LU "].range.start;\n", (ulong_t)index);
3122 stream__write_characters(gen->stream, ' ', indent + 4);
3123 stream__puts(gen->stream, "size_t i;\n");
3124 stream__write_characters(gen->stream, ' ', indent + 4);
3125 stream__puts(gen->stream, "for (i = 0; i < n; i++) {\n");
3126 stream__write_characters(gen->stream, ' ', indent + 8);
3127 stream__printf(gen->stream, "if (p[i] != q[i]) goto L%04d;\n", onfail);
3128 stream__write_characters(gen->stream, ' ', indent + 4);
3129 stream__puts(gen->stream, "}\n");
3130 stream__write_characters(gen->stream, ' ', indent + 4);
3131 stream__puts(gen->stream, "ctx->cur += n;\n");
3132 stream__write_characters(gen->stream, ' ', indent);
3133 stream__puts(gen->stream, "}\n");
3134 if (!bare) {
3135 indent -= 4;
3136 stream__write_characters(gen->stream, ' ', indent);
3137 stream__puts(gen->stream, "}\n");
3138 }
3139 return CODE_REACH__BOTH;
3140 }
3141
generate_thunking_action_code(generate_t * gen,size_t index,const node_const_array_t * vars,const node_const_array_t * capts,bool_t error,int onfail,size_t indent,bool_t bare)3142 static code_reach_t generate_thunking_action_code(
3143 generate_t *gen, size_t index, const node_const_array_t *vars, const node_const_array_t *capts, bool_t error, int onfail, size_t indent, bool_t bare
3144 ) {
3145 assert(gen->rule->type == NODE_RULE);
3146 if (!bare) {
3147 stream__write_characters(gen->stream, ' ', indent);
3148 stream__puts(gen->stream, "{\n");
3149 indent += 4;
3150 }
3151 if (error) {
3152 stream__write_characters(gen->stream, ' ', indent);
3153 stream__puts(gen->stream, "pcc_value_t null;\n");
3154 }
3155 stream__write_characters(gen->stream, ' ', indent);
3156 stream__printf(gen->stream, "pcc_thunk_t *const thunk = pcc_thunk__create_leaf(ctx->auxil, pcc_action_%s_" FMT_LU ", " FMT_LU ", " FMT_LU ");\n",
3157 gen->rule->data.rule.name, (ulong_t)index, (ulong_t)gen->rule->data.rule.vars.len, (ulong_t)gen->rule->data.rule.capts.len);
3158 {
3159 size_t i;
3160 for (i = 0; i < vars->len; i++) {
3161 assert(vars->buf[i]->type == NODE_REFERENCE);
3162 stream__write_characters(gen->stream, ' ', indent);
3163 stream__printf(gen->stream, "thunk->data.leaf.values.buf[" FMT_LU "] = &(chunk->values.buf[" FMT_LU "]);\n",
3164 (ulong_t)vars->buf[i]->data.reference.index, (ulong_t)vars->buf[i]->data.reference.index);
3165 }
3166 for (i = 0; i < capts->len; i++) {
3167 assert(capts->buf[i]->type == NODE_CAPTURE);
3168 stream__write_characters(gen->stream, ' ', indent);
3169 stream__printf(gen->stream, "thunk->data.leaf.capts.buf[" FMT_LU "] = &(chunk->capts.buf[" FMT_LU "]);\n",
3170 (ulong_t)capts->buf[i]->data.capture.index, (ulong_t)capts->buf[i]->data.capture.index);
3171 }
3172 stream__write_characters(gen->stream, ' ', indent);
3173 stream__puts(gen->stream, "thunk->data.leaf.capt0.range.start = chunk->pos;\n");
3174 stream__write_characters(gen->stream, ' ', indent);
3175 stream__puts(gen->stream, "thunk->data.leaf.capt0.range.end = ctx->cur;\n");
3176 }
3177 if (error) {
3178 stream__write_characters(gen->stream, ' ', indent);
3179 stream__puts(gen->stream, "memset(&null, 0, sizeof(pcc_value_t)); /* in case */\n");
3180 stream__write_characters(gen->stream, ' ', indent);
3181 stream__puts(gen->stream, "thunk->data.leaf.action(ctx, thunk, &null);\n");
3182 stream__write_characters(gen->stream, ' ', indent);
3183 stream__puts(gen->stream, "pcc_thunk__destroy(ctx->auxil, thunk);\n");
3184 }
3185 else {
3186 stream__write_characters(gen->stream, ' ', indent);
3187 stream__puts(gen->stream, "pcc_thunk_array__add(ctx->auxil, &chunk->thunks, thunk);\n");
3188 }
3189 if (!bare) {
3190 indent -= 4;
3191 stream__write_characters(gen->stream, ' ', indent);
3192 stream__puts(gen->stream, "}\n");
3193 }
3194 return CODE_REACH__ALWAYS_SUCCEED;
3195 }
3196
generate_thunking_error_code(generate_t * gen,const node_t * expr,size_t index,const node_const_array_t * vars,const node_const_array_t * capts,int onfail,size_t indent,bool_t bare)3197 static code_reach_t generate_thunking_error_code(
3198 generate_t *gen, const node_t *expr, size_t index, const node_const_array_t *vars, const node_const_array_t *capts, int onfail, size_t indent, bool_t bare
3199 ) {
3200 code_reach_t r;
3201 const int l = ++gen->label;
3202 const int m = ++gen->label;
3203 assert(gen->rule->type == NODE_RULE);
3204 if (!bare) {
3205 stream__write_characters(gen->stream, ' ', indent);
3206 stream__puts(gen->stream, "{\n");
3207 indent += 4;
3208 }
3209 r = generate_code(gen, expr, l, indent, TRUE);
3210 stream__write_characters(gen->stream, ' ', indent);
3211 stream__printf(gen->stream, "goto L%04d;\n", m);
3212 if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
3213 stream__printf(gen->stream, "L%04d:;\n", l);
3214 generate_thunking_action_code(gen, index, vars, capts, TRUE, l, indent, FALSE);
3215 stream__write_characters(gen->stream, ' ', indent);
3216 stream__printf(gen->stream, "goto L%04d;\n", onfail);
3217 if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
3218 stream__printf(gen->stream, "L%04d:;\n", m);
3219 if (!bare) {
3220 indent -= 4;
3221 stream__write_characters(gen->stream, ' ', indent);
3222 stream__puts(gen->stream, "}\n");
3223 }
3224 return r;
3225 }
3226
generate_code(generate_t * gen,const node_t * node,int onfail,size_t indent,bool_t bare)3227 static code_reach_t generate_code(generate_t *gen, const node_t *node, int onfail, size_t indent, bool_t bare) {
3228 if (node == NULL) {
3229 print_error("Internal error [%d]\n", __LINE__);
3230 exit(-1);
3231 }
3232 switch (node->type) {
3233 case NODE_RULE:
3234 print_error("Internal error [%d]\n", __LINE__);
3235 exit(-1);
3236 case NODE_REFERENCE:
3237 if (node->data.reference.index != VOID_VALUE) {
3238 stream__write_characters(gen->stream, ' ', indent);
3239 stream__printf(gen->stream, "if (!pcc_apply_rule(ctx, pcc_evaluate_rule_%s, &chunk->thunks, &(chunk->values.buf[" FMT_LU "]))) goto L%04d;\n",
3240 node->data.reference.name, (ulong_t)node->data.reference.index, onfail);
3241 }
3242 else {
3243 stream__write_characters(gen->stream, ' ', indent);
3244 stream__printf(gen->stream, "if (!pcc_apply_rule(ctx, pcc_evaluate_rule_%s, &chunk->thunks, NULL)) goto L%04d;\n",
3245 node->data.reference.name, onfail);
3246 }
3247 return CODE_REACH__BOTH;
3248 case NODE_STRING:
3249 return generate_matching_string_code(gen, node->data.string.value, onfail, indent, bare);
3250 case NODE_CHARCLASS:
3251 return gen->ascii ?
3252 generate_matching_charclass_code(gen, node->data.charclass.value, onfail, indent, bare) :
3253 generate_matching_utf8_charclass_code(gen, node->data.charclass.value, onfail, indent, bare);
3254 case NODE_QUANTITY:
3255 return generate_quantifying_code(gen, node->data.quantity.expr, node->data.quantity.min, node->data.quantity.max, onfail, indent, bare);
3256 case NODE_PREDICATE:
3257 return generate_predicating_code(gen, node->data.predicate.expr, node->data.predicate.neg, onfail, indent, bare);
3258 case NODE_SEQUENCE:
3259 return generate_sequential_code(gen, &node->data.sequence.nodes, onfail, indent, bare);
3260 case NODE_ALTERNATE:
3261 return generate_alternative_code(gen, &node->data.alternate.nodes, onfail, indent, bare);
3262 case NODE_CAPTURE:
3263 return generate_capturing_code(gen, node->data.capture.expr, node->data.capture.index, onfail, indent, bare);
3264 case NODE_EXPAND:
3265 return generate_expanding_code(gen, node->data.expand.index, onfail, indent, bare);
3266 case NODE_ACTION:
3267 return generate_thunking_action_code(
3268 gen, node->data.action.index, &node->data.action.vars, &node->data.action.capts, FALSE, onfail, indent, bare
3269 );
3270 case NODE_ERROR:
3271 return generate_thunking_error_code(
3272 gen, node->data.error.expr, node->data.error.index, &node->data.error.vars, &node->data.error.capts, onfail, indent, bare
3273 );
3274 default:
3275 print_error("Internal error [%d]\n", __LINE__);
3276 exit(-1);
3277 }
3278 }
3279
generate(context_t * ctx)3280 static bool_t generate(context_t *ctx) {
3281 const char *const vt = get_value_type(ctx);
3282 const char *const at = get_auxil_type(ctx);
3283 const bool_t vp = is_pointer_type(vt);
3284 const bool_t ap = is_pointer_type(at);
3285 stream_t sstream = stream__wrap(fopen_wt_e(ctx->sname), ctx->sname, ctx->opts.lines ? 0 : VOID_VALUE);
3286 stream_t hstream = stream__wrap(fopen_wt_e(ctx->hname), ctx->hname, ctx->opts.lines ? 0 : VOID_VALUE);
3287 stream__printf(&sstream, "/* A packrat parser generated by PackCC %s */\n\n", VERSION);
3288 stream__printf(&hstream, "/* A packrat parser generated by PackCC %s */\n\n", VERSION);
3289 {
3290 {
3291 size_t i;
3292 for (i = 0; i < ctx->eheader.len; i++) {
3293 stream__write_code_block(&hstream, ctx->eheader.buf[i].text, ctx->eheader.buf[i].len, 0, ctx->iname, ctx->eheader.buf[i].line);
3294 }
3295 }
3296 if (ctx->eheader.len > 0) stream__puts(&hstream, "\n");
3297 stream__printf(
3298 &hstream,
3299 "#ifndef PCC_INCLUDED_%s\n"
3300 "#define PCC_INCLUDED_%s\n"
3301 "\n",
3302 ctx->hid, ctx->hid
3303 );
3304 {
3305 size_t i;
3306 for (i = 0; i < ctx->header.len; i++) {
3307 stream__write_code_block(&hstream, ctx->header.buf[i].text, ctx->header.buf[i].len, 0, ctx->iname, ctx->header.buf[i].line);
3308 }
3309 }
3310 }
3311 {
3312 {
3313 size_t i;
3314 for (i = 0; i < ctx->esource.len; i++) {
3315 stream__write_code_block(&sstream, ctx->esource.buf[i].text, ctx->esource.buf[i].len, 0, ctx->iname, ctx->esource.buf[i].line);
3316 }
3317 }
3318 if (ctx->esource.len > 0) stream__puts(&sstream, "\n");
3319 stream__puts(
3320 &sstream,
3321 "#ifdef _MSC_VER\n"
3322 "#undef _CRT_SECURE_NO_WARNINGS\n"
3323 "#define _CRT_SECURE_NO_WARNINGS\n"
3324 "#endif /* _MSC_VER */\n"
3325 "#include <stdio.h>\n"
3326 "#include <stdlib.h>\n"
3327 "#include <string.h>\n"
3328 "\n"
3329 "#ifndef _MSC_VER\n"
3330 "#if defined __GNUC__ && defined _WIN32 /* MinGW */\n"
3331 "#ifndef PCC_USE_SYSTEM_STRNLEN\n"
3332 "#define strnlen(str, maxlen) pcc_strnlen(str, maxlen)\n"
3333 "static size_t pcc_strnlen(const char *str, size_t maxlen) {\n"
3334 " size_t i;\n"
3335 " for (i = 0; i < maxlen && str[i]; i++);\n"
3336 " return i;\n"
3337 "}\n"
3338 "#endif /* !PCC_USE_SYSTEM_STRNLEN */\n"
3339 "#endif /* defined __GNUC__ && defined _WIN32 */\n"
3340 "#endif /* !_MSC_VER */\n"
3341 "\n"
3342 );
3343 stream__printf(
3344 &sstream,
3345 "#include \"%s\"\n"
3346 "\n",
3347 ctx->hname
3348 );
3349 {
3350 size_t i;
3351 for (i = 0; i < ctx->source.len; i++) {
3352 stream__write_code_block(&sstream, ctx->source.buf[i].text, ctx->source.buf[i].len, 0, ctx->iname, ctx->source.buf[i].line);
3353 }
3354 }
3355 }
3356 {
3357 stream__puts(
3358 &sstream,
3359 "#if !defined __has_attribute || defined _MSC_VER\n"
3360 "#define __attribute__(x)\n"
3361 "#endif\n"
3362 "\n"
3363 "#ifdef _MSC_VER\n"
3364 "#define MARK_FUNC_AS_USED __pragma(warning(suppress:4505))\n"
3365 "#else\n"
3366 "#define MARK_FUNC_AS_USED __attribute__((__unused__))\n"
3367 "#endif\n"
3368 "\n"
3369 "#ifndef PCC_BUFFER_MIN_SIZE\n"
3370 "#define PCC_BUFFER_MIN_SIZE 256\n"
3371 "#endif /* !PCC_BUFFER_MIN_SIZE */\n"
3372 "\n"
3373 "#ifndef PCC_ARRAY_MIN_SIZE\n"
3374 "#define PCC_ARRAY_MIN_SIZE 2\n"
3375 "#endif /* !PCC_ARRAY_MIN_SIZE */\n"
3376 "\n"
3377 "#ifndef PCC_POOL_MIN_SIZE\n"
3378 "#define PCC_POOL_MIN_SIZE 65536\n"
3379 "#endif /* !PCC_POOL_MIN_SIZE */\n"
3380 "\n"
3381 "#define PCC_DBG_EVALUATE 0\n"
3382 "#define PCC_DBG_MATCH 1\n"
3383 "#define PCC_DBG_NOMATCH 2\n"
3384 "\n"
3385 "#define PCC_VOID_VALUE (~(size_t)0)\n"
3386 "\n"
3387 "typedef enum pcc_bool_tag {\n"
3388 " PCC_FALSE = 0,\n"
3389 " PCC_TRUE\n"
3390 "} pcc_bool_t;\n"
3391 "\n"
3392 "typedef struct pcc_char_array_tag {\n"
3393 " char *buf;\n"
3394 " size_t max;\n"
3395 " size_t len;\n"
3396 "} pcc_char_array_t;\n"
3397 "\n"
3398 "typedef struct pcc_range_tag {\n"
3399 " size_t start;\n"
3400 " size_t end;\n"
3401 "} pcc_range_t;\n"
3402 "\n"
3403 );
3404 stream__printf(
3405 &sstream,
3406 "typedef %s%spcc_value_t;\n"
3407 "\n",
3408 vt, vp ? "" : " "
3409 );
3410 stream__printf(
3411 &sstream,
3412 "typedef %s%spcc_auxil_t;\n"
3413 "\n",
3414 at, ap ? "" : " "
3415 );
3416 if (strcmp(get_prefix(ctx), "pcc") != 0) {
3417 stream__printf(
3418 &sstream,
3419 "typedef %s_context_t pcc_context_t;\n"
3420 "\n",
3421 get_prefix(ctx)
3422 );
3423 }
3424 stream__puts(
3425 &sstream,
3426 "typedef struct pcc_value_table_tag {\n"
3427 " pcc_value_t *buf;\n"
3428 " size_t max;\n"
3429 " size_t len;\n"
3430 "} pcc_value_table_t;\n"
3431 "\n"
3432 "typedef struct pcc_value_refer_table_tag {\n"
3433 " pcc_value_t **buf;\n"
3434 " size_t max;\n"
3435 " size_t len;\n"
3436 "} pcc_value_refer_table_t;\n"
3437 "\n"
3438 "typedef struct pcc_capture_tag {\n"
3439 " pcc_range_t range;\n"
3440 " char *string; /* mutable */\n"
3441 "} pcc_capture_t;\n"
3442 "\n"
3443 "typedef struct pcc_capture_table_tag {\n"
3444 " pcc_capture_t *buf;\n"
3445 " size_t max;\n"
3446 " size_t len;\n"
3447 "} pcc_capture_table_t;\n"
3448 "\n"
3449 "typedef struct pcc_capture_const_table_tag {\n"
3450 " const pcc_capture_t **buf;\n"
3451 " size_t max;\n"
3452 " size_t len;\n"
3453 "} pcc_capture_const_table_t;\n"
3454 "\n"
3455 "typedef struct pcc_thunk_tag pcc_thunk_t;\n"
3456 "typedef struct pcc_thunk_array_tag pcc_thunk_array_t;\n"
3457 "\n"
3458 "typedef void (*pcc_action_t)(pcc_context_t *, pcc_thunk_t *, pcc_value_t *);\n"
3459 "\n"
3460 );
3461 stream__puts(
3462 &sstream,
3463 "typedef enum pcc_thunk_type_tag {\n"
3464 " PCC_THUNK_LEAF,\n"
3465 " PCC_THUNK_NODE\n"
3466 "} pcc_thunk_type_t;\n"
3467 "\n"
3468 "typedef struct pcc_thunk_leaf_tag {\n"
3469 " pcc_value_refer_table_t values;\n"
3470 " pcc_capture_const_table_t capts;\n"
3471 " pcc_capture_t capt0;\n"
3472 " pcc_action_t action;\n"
3473 "} pcc_thunk_leaf_t;\n"
3474 "\n"
3475 "typedef struct pcc_thunk_node_tag {\n"
3476 " const pcc_thunk_array_t *thunks; /* just a reference */\n"
3477 " pcc_value_t *value; /* just a reference */\n"
3478 "} pcc_thunk_node_t;\n"
3479 "\n"
3480 "typedef union pcc_thunk_data_tag {\n"
3481 " pcc_thunk_leaf_t leaf;\n"
3482 " pcc_thunk_node_t node;\n"
3483 "} pcc_thunk_data_t;\n"
3484 "\n"
3485 "struct pcc_thunk_tag {\n"
3486 " pcc_thunk_type_t type;\n"
3487 " pcc_thunk_data_t data;\n"
3488 "};\n"
3489 "\n"
3490 "struct pcc_thunk_array_tag {\n"
3491 " pcc_thunk_t **buf;\n"
3492 " size_t max;\n"
3493 " size_t len;\n"
3494 "};\n"
3495 "\n"
3496 "typedef struct pcc_thunk_chunk_tag {\n"
3497 " pcc_value_table_t values;\n"
3498 " pcc_capture_table_t capts;\n"
3499 " pcc_thunk_array_t thunks;\n"
3500 " size_t pos; /* the starting position in the character buffer */\n"
3501 "} pcc_thunk_chunk_t;\n"
3502 "\n"
3503 "typedef struct pcc_lr_entry_tag pcc_lr_entry_t;\n"
3504 "\n"
3505 "typedef enum pcc_lr_answer_type_tag {\n"
3506 " PCC_LR_ANSWER_LR,\n"
3507 " PCC_LR_ANSWER_CHUNK\n"
3508 "} pcc_lr_answer_type_t;\n"
3509 "\n"
3510 "typedef union pcc_lr_answer_data_tag {\n"
3511 " pcc_lr_entry_t *lr;\n"
3512 " pcc_thunk_chunk_t *chunk;\n"
3513 "} pcc_lr_answer_data_t;\n"
3514 "\n"
3515 "typedef struct pcc_lr_answer_tag pcc_lr_answer_t;\n"
3516 "\n"
3517 "struct pcc_lr_answer_tag {\n"
3518 " pcc_lr_answer_type_t type;\n"
3519 " pcc_lr_answer_data_t data;\n"
3520 " size_t pos; /* the absolute position in the input */\n"
3521 " pcc_lr_answer_t *hold;\n"
3522 "};\n"
3523 "\n"
3524 );
3525 stream__puts(
3526 &sstream,
3527 "typedef pcc_thunk_chunk_t *(*pcc_rule_t)(pcc_context_t *);\n"
3528 "\n"
3529 "typedef struct pcc_rule_set_tag {\n"
3530 " pcc_rule_t *buf;\n"
3531 " size_t max;\n"
3532 " size_t len;\n"
3533 "} pcc_rule_set_t;\n"
3534 "\n"
3535 "typedef struct pcc_lr_head_tag pcc_lr_head_t;\n"
3536 "\n"
3537 "struct pcc_lr_head_tag {\n"
3538 " pcc_rule_t rule;\n"
3539 " pcc_rule_set_t invol;\n"
3540 " pcc_rule_set_t eval;\n"
3541 " pcc_lr_head_t *hold;\n"
3542 "};\n"
3543 "\n"
3544 "typedef struct pcc_lr_memo_tag {\n"
3545 " pcc_rule_t rule;\n"
3546 " pcc_lr_answer_t *answer;\n"
3547 "} pcc_lr_memo_t;\n"
3548 "\n"
3549 "typedef struct pcc_lr_memo_map_tag {\n"
3550 " pcc_lr_memo_t *buf;\n"
3551 " size_t max;\n"
3552 " size_t len;\n"
3553 "} pcc_lr_memo_map_t;\n"
3554 "\n"
3555 "typedef struct pcc_lr_table_entry_tag {\n"
3556 " pcc_lr_head_t *head; /* just a reference */\n"
3557 " pcc_lr_memo_map_t memos;\n"
3558 " pcc_lr_answer_t *hold_a;\n"
3559 " pcc_lr_head_t *hold_h;\n"
3560 "} pcc_lr_table_entry_t;\n"
3561 "\n"
3562 "typedef struct pcc_lr_table_tag {\n"
3563 " pcc_lr_table_entry_t **buf;\n"
3564 " size_t max;\n"
3565 " size_t len;\n"
3566 " size_t ofs;\n"
3567 "} pcc_lr_table_t;\n"
3568 "\n"
3569 "struct pcc_lr_entry_tag {\n"
3570 " pcc_rule_t rule;\n"
3571 " pcc_thunk_chunk_t *seed; /* just a reference */\n"
3572 " pcc_lr_head_t *head; /* just a reference */\n"
3573 "};\n"
3574 "\n"
3575 "typedef struct pcc_lr_stack_tag {\n"
3576 " pcc_lr_entry_t **buf;\n"
3577 " size_t max;\n"
3578 " size_t len;\n"
3579 "} pcc_lr_stack_t;\n"
3580 "\n"
3581 );
3582 stream__puts(
3583 &sstream,
3584 "typedef struct pcc_memory_entry_tag pcc_memory_entry_t;\n"
3585 "typedef struct pcc_memory_pool_tag pcc_memory_pool_t;\n"
3586 "\n"
3587 "struct pcc_memory_entry_tag {\n"
3588 " pcc_memory_entry_t *next;\n"
3589 "};\n"
3590 "\n"
3591 "struct pcc_memory_pool_tag {\n"
3592 " pcc_memory_pool_t *next;\n"
3593 " size_t allocated;\n"
3594 " size_t unused;\n"
3595 "};\n"
3596 "\n"
3597 "typedef struct pcc_memory_recycler_tag {\n"
3598 " pcc_memory_pool_t *pool_list;\n"
3599 " pcc_memory_entry_t *entry_list;\n"
3600 " size_t element_size;\n"
3601 "} pcc_memory_recycler_t;\n"
3602 "\n"
3603 );
3604 stream__printf(
3605 &sstream,
3606 "struct %s_context_tag {\n"
3607 " size_t pos; /* the position in the input of the first character currently buffered */\n"
3608 " size_t cur; /* the current parsing position in the character buffer */\n"
3609 " size_t level;\n"
3610 " pcc_char_array_t buffer;\n"
3611 " pcc_lr_table_t lrtable;\n"
3612 " pcc_lr_stack_t lrstack;\n"
3613 " pcc_thunk_array_t thunks;\n"
3614 " pcc_auxil_t auxil;\n"
3615 " pcc_memory_recycler_t thunk_chunk_recycler;\n"
3616 " pcc_memory_recycler_t lr_head_recycler;\n"
3617 " pcc_memory_recycler_t lr_answer_recycler;\n"
3618 "};\n"
3619 "\n",
3620 get_prefix(ctx)
3621 );
3622 stream__puts(
3623 &sstream,
3624 "#ifndef PCC_ERROR\n"
3625 "#define PCC_ERROR(auxil) pcc_error()\n"
3626 "MARK_FUNC_AS_USED\n"
3627 "static void pcc_error(void) {\n"
3628 " fprintf(stderr, \"Syntax error\\n\");\n"
3629 " exit(1);\n"
3630 "}\n"
3631 "#endif /* !PCC_ERROR */\n"
3632 "\n"
3633 "#ifndef PCC_GETCHAR\n"
3634 "#define PCC_GETCHAR(auxil) getchar()\n"
3635 "#endif /* !PCC_GETCHAR */\n"
3636 "\n"
3637 "#ifndef PCC_MALLOC\n"
3638 "#define PCC_MALLOC(auxil, size) pcc_malloc_e(size)\n"
3639 "static void *pcc_malloc_e(size_t size) {\n"
3640 " void *const p = malloc(size);\n"
3641 " if (p == NULL) {\n"
3642 " fprintf(stderr, \"Out of memory\\n\");\n"
3643 " exit(1);\n"
3644 " }\n"
3645 " return p;\n"
3646 "}\n"
3647 "#endif /* !PCC_MALLOC */\n"
3648 "\n"
3649 "#ifndef PCC_REALLOC\n"
3650 "#define PCC_REALLOC(auxil, ptr, size) pcc_realloc_e(ptr, size)\n"
3651 "static void *pcc_realloc_e(void *ptr, size_t size) {\n"
3652 " void *const p = realloc(ptr, size);\n"
3653 " if (p == NULL) {\n"
3654 " fprintf(stderr, \"Out of memory\\n\");\n"
3655 " exit(1);\n"
3656 " }\n"
3657 " return p;\n"
3658 "}\n"
3659 "#endif /* !PCC_REALLOC */\n"
3660 "\n"
3661 "#ifndef PCC_FREE\n"
3662 "#define PCC_FREE(auxil, ptr) free(ptr)\n"
3663 "#endif /* !PCC_FREE */\n"
3664 "\n"
3665 "#ifndef PCC_DEBUG\n"
3666 "#define PCC_DEBUG(auxil, event, rule, level, pos, buffer, length) ((void)0)\n"
3667 "#endif /* !PCC_DEBUG */\n"
3668 "\n"
3669 "static char *pcc_strndup_e(pcc_auxil_t auxil, const char *str, size_t len) {\n"
3670 " const size_t m = strnlen(str, len);\n"
3671 " char *const s = (char *)PCC_MALLOC(auxil, m + 1);\n"
3672 " memcpy(s, str, m);\n"
3673 " s[m] = '\\0';\n"
3674 " return s;\n"
3675 "}\n"
3676 "\n"
3677 );
3678 stream__puts(
3679 &sstream,
3680 "static void pcc_char_array__init(pcc_auxil_t auxil, pcc_char_array_t *array) {\n"
3681 " array->len = 0;\n"
3682 " array->max = 0;\n"
3683 " array->buf = NULL;\n"
3684 "}\n"
3685 "\n"
3686 "static void pcc_char_array__add(pcc_auxil_t auxil, pcc_char_array_t *array, char ch) {\n"
3687 " if (array->max <= array->len) {\n"
3688 " const size_t n = array->len + 1;\n"
3689 " size_t m = array->max;\n"
3690 " if (m == 0) m = PCC_BUFFER_MIN_SIZE;\n"
3691 " while (m < n && m != 0) m <<= 1;\n"
3692 " if (m == 0) m = n;\n"
3693 " array->buf = (char *)PCC_REALLOC(auxil, array->buf, m);\n"
3694 " array->max = m;\n"
3695 " }\n"
3696 " array->buf[array->len++] = ch;\n"
3697 "}\n"
3698 "\n"
3699 "static void pcc_char_array__term(pcc_auxil_t auxil, pcc_char_array_t *array) {\n"
3700 " PCC_FREE(auxil, array->buf);\n"
3701 "}\n"
3702 "\n"
3703 );
3704 stream__puts(
3705 &sstream,
3706 "static void pcc_value_table__init(pcc_auxil_t auxil, pcc_value_table_t *table) {\n"
3707 " table->len = 0;\n"
3708 " table->max = 0;\n"
3709 " table->buf = NULL;\n"
3710 "}\n"
3711 "\n"
3712 "MARK_FUNC_AS_USED\n"
3713 "static void pcc_value_table__resize(pcc_auxil_t auxil, pcc_value_table_t *table, size_t len) {\n"
3714 " if (table->max < len) {\n"
3715 " size_t m = table->max;\n"
3716 " if (m == 0) m = PCC_ARRAY_MIN_SIZE;\n"
3717 " while (m < len && m != 0) m <<= 1;\n"
3718 " if (m == 0) m = len;\n"
3719 " table->buf = (pcc_value_t *)PCC_REALLOC(auxil, table->buf, sizeof(pcc_value_t) * m);\n"
3720 " table->max = m;\n"
3721 " }\n"
3722 " table->len = len;\n"
3723 "}\n"
3724 "\n"
3725 "MARK_FUNC_AS_USED\n"
3726 "static void pcc_value_table__clear(pcc_auxil_t auxil, pcc_value_table_t *table) {\n"
3727 " memset(table->buf, 0, sizeof(pcc_value_t) * table->len);\n"
3728 "}\n"
3729 "\n"
3730 "static void pcc_value_table__term(pcc_auxil_t auxil, pcc_value_table_t *table) {\n"
3731 " PCC_FREE(auxil, table->buf);\n"
3732 "}\n"
3733 "\n"
3734 );
3735 stream__puts(
3736 &sstream,
3737 "static void pcc_value_refer_table__init(pcc_auxil_t auxil, pcc_value_refer_table_t *table) {\n"
3738 " table->len = 0;\n"
3739 " table->max = 0;\n"
3740 " table->buf = NULL;\n"
3741 "}\n"
3742 "\n"
3743 "static void pcc_value_refer_table__resize(pcc_auxil_t auxil, pcc_value_refer_table_t *table, size_t len) {\n"
3744 " size_t i;\n"
3745 " if (table->max < len) {\n"
3746 " size_t m = table->max;\n"
3747 " if (m == 0) m = PCC_ARRAY_MIN_SIZE;\n"
3748 " while (m < len && m != 0) m <<= 1;\n"
3749 " if (m == 0) m = len;\n"
3750 " table->buf = (pcc_value_t **)PCC_REALLOC(auxil, table->buf, sizeof(pcc_value_t *) * m);\n"
3751 " table->max = m;\n"
3752 " }\n"
3753 " for (i = table->len; i < len; i++) table->buf[i] = NULL;\n"
3754 " table->len = len;\n"
3755 "}\n"
3756 "\n"
3757 "static void pcc_value_refer_table__term(pcc_auxil_t auxil, pcc_value_refer_table_t *table) {\n"
3758 " PCC_FREE(auxil, table->buf);\n"
3759 "}\n"
3760 "\n"
3761 );
3762 stream__puts(
3763 &sstream,
3764 "static void pcc_capture_table__init(pcc_auxil_t auxil, pcc_capture_table_t *table) {\n"
3765 " table->len = 0;\n"
3766 " table->max = 0;\n"
3767 " table->buf = NULL;\n"
3768 "}\n"
3769 "\n"
3770 "MARK_FUNC_AS_USED\n"
3771 "static void pcc_capture_table__resize(pcc_auxil_t auxil, pcc_capture_table_t *table, size_t len) {\n"
3772 " size_t i;\n"
3773 " for (i = len; i < table->len; i++) PCC_FREE(auxil, table->buf[i].string);\n"
3774 " if (table->max < len) {\n"
3775 " size_t m = table->max;\n"
3776 " if (m == 0) m = PCC_ARRAY_MIN_SIZE;\n"
3777 " while (m < len && m != 0) m <<= 1;\n"
3778 " if (m == 0) m = len;\n"
3779 " table->buf = (pcc_capture_t *)PCC_REALLOC(auxil, table->buf, sizeof(pcc_capture_t) * m);\n"
3780 " table->max = m;\n"
3781 " }\n"
3782 " for (i = table->len; i < len; i++) {\n"
3783 " table->buf[i].range.start = 0;\n"
3784 " table->buf[i].range.end = 0;\n"
3785 " table->buf[i].string = NULL;\n"
3786 " }\n"
3787 " table->len = len;\n"
3788 "}\n"
3789 "\n"
3790 "static void pcc_capture_table__term(pcc_auxil_t auxil, pcc_capture_table_t *table) {\n"
3791 " while (table->len > 0) {\n"
3792 " table->len--;\n"
3793 " PCC_FREE(auxil, table->buf[table->len].string);\n"
3794 " }\n"
3795 " PCC_FREE(auxil, table->buf);\n"
3796 "}\n"
3797 "\n"
3798 );
3799 stream__puts(
3800 &sstream,
3801 "static void pcc_capture_const_table__init(pcc_auxil_t auxil, pcc_capture_const_table_t *table) {\n"
3802 " table->len = 0;\n"
3803 " table->max = 0;\n"
3804 " table->buf = NULL;\n"
3805 "}\n"
3806 "\n"
3807 "static void pcc_capture_const_table__resize(pcc_auxil_t auxil, pcc_capture_const_table_t *table, size_t len) {\n"
3808 " size_t i;\n"
3809 " if (table->max < len) {\n"
3810 " size_t m = table->max;\n"
3811 " if (m == 0) m = PCC_ARRAY_MIN_SIZE;\n"
3812 " while (m < len && m != 0) m <<= 1;\n"
3813 " if (m == 0) m = len;\n"
3814 " table->buf = (const pcc_capture_t **)PCC_REALLOC(auxil, (pcc_capture_t **)table->buf, sizeof(const pcc_capture_t *) * m);\n"
3815 " table->max = m;\n"
3816 " }\n"
3817 " for (i = table->len; i < len; i++) table->buf[i] = NULL;\n"
3818 " table->len = len;\n"
3819 "}\n"
3820 "\n"
3821 "static void pcc_capture_const_table__term(pcc_auxil_t auxil, pcc_capture_const_table_t *table) {\n"
3822 " PCC_FREE(auxil, (void *)table->buf);\n"
3823 "}\n"
3824 "\n"
3825 );
3826 stream__puts(
3827 &sstream,
3828 "MARK_FUNC_AS_USED\n"
3829 "static pcc_thunk_t *pcc_thunk__create_leaf(pcc_auxil_t auxil, pcc_action_t action, size_t valuec, size_t captc) {\n"
3830 " pcc_thunk_t *const thunk = (pcc_thunk_t *)PCC_MALLOC(auxil, sizeof(pcc_thunk_t));\n"
3831 " thunk->type = PCC_THUNK_LEAF;\n"
3832 " pcc_value_refer_table__init(auxil, &thunk->data.leaf.values);\n"
3833 " pcc_value_refer_table__resize(auxil, &thunk->data.leaf.values, valuec);\n"
3834 " pcc_capture_const_table__init(auxil, &thunk->data.leaf.capts);\n"
3835 " pcc_capture_const_table__resize(auxil, &thunk->data.leaf.capts, captc);\n"
3836 " thunk->data.leaf.capt0.range.start = 0;\n"
3837 " thunk->data.leaf.capt0.range.end = 0;\n"
3838 " thunk->data.leaf.capt0.string = NULL;\n"
3839 " thunk->data.leaf.action = action;\n"
3840 " return thunk;\n"
3841 "}\n"
3842 "\n"
3843 "static pcc_thunk_t *pcc_thunk__create_node(pcc_auxil_t auxil, const pcc_thunk_array_t *thunks, pcc_value_t *value) {\n"
3844 " pcc_thunk_t *const thunk = (pcc_thunk_t *)PCC_MALLOC(auxil, sizeof(pcc_thunk_t));\n"
3845 " thunk->type = PCC_THUNK_NODE;\n"
3846 " thunk->data.node.thunks = thunks;\n"
3847 " thunk->data.node.value = value;\n"
3848 " return thunk;\n"
3849 "}\n"
3850 "\n"
3851 "static void pcc_thunk__destroy(pcc_auxil_t auxil, pcc_thunk_t *thunk) {\n"
3852 " if (thunk == NULL) return;\n"
3853 " switch (thunk->type) {\n"
3854 " case PCC_THUNK_LEAF:\n"
3855 " PCC_FREE(auxil, thunk->data.leaf.capt0.string);\n"
3856 " pcc_capture_const_table__term(auxil, &thunk->data.leaf.capts);\n"
3857 " pcc_value_refer_table__term(auxil, &thunk->data.leaf.values);\n"
3858 " break;\n"
3859 " case PCC_THUNK_NODE:\n"
3860 " break;\n"
3861 " default: /* unknown */\n"
3862 " break;\n"
3863 " }\n"
3864 " PCC_FREE(auxil, thunk);\n"
3865 "}\n"
3866 "\n"
3867 );
3868 stream__puts(
3869 &sstream,
3870 "static void pcc_thunk_array__init(pcc_auxil_t auxil, pcc_thunk_array_t *array) {\n"
3871 " array->len = 0;\n"
3872 " array->max = 0;\n"
3873 " array->buf = NULL;\n"
3874 "}\n"
3875 "\n"
3876 "static void pcc_thunk_array__add(pcc_auxil_t auxil, pcc_thunk_array_t *array, pcc_thunk_t *thunk) {\n"
3877 " if (array->max <= array->len) {\n"
3878 " const size_t n = array->len + 1;\n"
3879 " size_t m = array->max;\n"
3880 " if (m == 0) m = PCC_ARRAY_MIN_SIZE;\n"
3881 " while (m < n && m != 0) m <<= 1;\n"
3882 " if (m == 0) m = n;\n"
3883 " array->buf = (pcc_thunk_t **)PCC_REALLOC(auxil, array->buf, sizeof(pcc_thunk_t *) * m);\n"
3884 " array->max = m;\n"
3885 " }\n"
3886 " array->buf[array->len++] = thunk;\n"
3887 "}\n"
3888 "\n"
3889 "static void pcc_thunk_array__revert(pcc_auxil_t auxil, pcc_thunk_array_t *array, size_t len) {\n"
3890 " while (array->len > len) {\n"
3891 " array->len--;\n"
3892 " pcc_thunk__destroy(auxil, array->buf[array->len]);\n"
3893 " }\n"
3894 "}\n"
3895 "\n"
3896 "static void pcc_thunk_array__term(pcc_auxil_t auxil, pcc_thunk_array_t *array) {\n"
3897 " while (array->len > 0) {\n"
3898 " array->len--;\n"
3899 " pcc_thunk__destroy(auxil, array->buf[array->len]);\n"
3900 " }\n"
3901 " PCC_FREE(auxil, array->buf);\n"
3902 "}\n"
3903 "\n"
3904 );
3905 stream__puts(
3906 &sstream,
3907 "static void pcc_memory_recycler__init(pcc_auxil_t auxil, pcc_memory_recycler_t *recycler, size_t element_size) {\n"
3908 " recycler->pool_list = NULL;\n"
3909 " recycler->entry_list = NULL;\n"
3910 " recycler->element_size = element_size;\n"
3911 "}\n"
3912 "\n"
3913 "static void *pcc_memory_recycler__supply(pcc_auxil_t auxil, pcc_memory_recycler_t *recycler) {\n"
3914 " if (recycler->entry_list) {\n"
3915 " pcc_memory_entry_t *const tmp = recycler->entry_list;\n"
3916 " recycler->entry_list = tmp->next;\n"
3917 " return tmp;\n"
3918 " }\n"
3919 " if (!recycler->pool_list || recycler->pool_list->unused == 0) {\n"
3920 " size_t size = PCC_POOL_MIN_SIZE;\n"
3921 " if (recycler->pool_list) {\n"
3922 " size = recycler->pool_list->allocated << 1;\n"
3923 " if (size == 0) size = recycler->pool_list->allocated;\n"
3924 " }\n"
3925 " {\n"
3926 " pcc_memory_pool_t *const pool = (pcc_memory_pool_t *)PCC_MALLOC(\n"
3927 " auxil, sizeof(pcc_memory_pool_t) + recycler->element_size * size\n"
3928 " );\n"
3929 " pool->allocated = size;\n"
3930 " pool->unused = size;\n"
3931 " pool->next = recycler->pool_list;\n"
3932 " recycler->pool_list = pool;\n"
3933 " }\n"
3934 " }\n"
3935 " recycler->pool_list->unused--;\n"
3936 " return (char *)recycler->pool_list + sizeof(pcc_memory_pool_t) + recycler->element_size * recycler->pool_list->unused;\n"
3937 "}\n"
3938 "\n"
3939 "static void pcc_memory_recycler__recycle(pcc_auxil_t auxil, pcc_memory_recycler_t *recycler, void *ptr) {\n"
3940 " pcc_memory_entry_t *const tmp = (pcc_memory_entry_t *)ptr;\n"
3941 " tmp->next = recycler->entry_list;\n"
3942 " recycler->entry_list = tmp;\n"
3943 "}\n"
3944 "\n"
3945 "static void pcc_memory_recycler__term(pcc_auxil_t auxil, pcc_memory_recycler_t *recycler) {\n"
3946 " while (recycler->pool_list) {\n"
3947 " pcc_memory_pool_t *const tmp = recycler->pool_list;\n"
3948 " recycler->pool_list = tmp->next;\n"
3949 " PCC_FREE(auxil, tmp);\n"
3950 " }\n"
3951 "}\n"
3952 "\n"
3953 );
3954 stream__puts(
3955 &sstream,
3956 "MARK_FUNC_AS_USED\n"
3957 "static pcc_thunk_chunk_t *pcc_thunk_chunk__create(pcc_context_t *ctx) {\n"
3958 " pcc_thunk_chunk_t *const chunk = (pcc_thunk_chunk_t *)pcc_memory_recycler__supply(ctx->auxil, &ctx->thunk_chunk_recycler);\n"
3959 " pcc_value_table__init(ctx->auxil, &chunk->values);\n"
3960 " pcc_capture_table__init(ctx->auxil, &chunk->capts);\n"
3961 " pcc_thunk_array__init(ctx->auxil, &chunk->thunks);\n"
3962 " chunk->pos = 0;\n"
3963 " return chunk;\n"
3964 "}\n"
3965 "\n"
3966 "static void pcc_thunk_chunk__destroy(pcc_context_t *ctx, pcc_thunk_chunk_t *chunk) {\n"
3967 " if (chunk == NULL) return;\n"
3968 " pcc_thunk_array__term(ctx->auxil, &chunk->thunks);\n"
3969 " pcc_capture_table__term(ctx->auxil, &chunk->capts);\n"
3970 " pcc_value_table__term(ctx->auxil, &chunk->values);\n"
3971 " pcc_memory_recycler__recycle(ctx->auxil, &ctx->thunk_chunk_recycler, chunk);\n"
3972 "}\n"
3973 "\n"
3974 );
3975 stream__puts(
3976 &sstream,
3977 "static void pcc_rule_set__init(pcc_auxil_t auxil, pcc_rule_set_t *set) {\n"
3978 " set->len = 0;\n"
3979 " set->max = 0;\n"
3980 " set->buf = NULL;\n"
3981 "}\n"
3982 "\n"
3983 "static size_t pcc_rule_set__index(pcc_auxil_t auxil, const pcc_rule_set_t *set, pcc_rule_t rule) {\n"
3984 " size_t i;\n"
3985 " for (i = 0; i < set->len; i++) {\n"
3986 " if (set->buf[i] == rule) return i;\n"
3987 " }\n"
3988 " return PCC_VOID_VALUE;\n"
3989 "}\n"
3990 "\n"
3991 "static pcc_bool_t pcc_rule_set__add(pcc_auxil_t auxil, pcc_rule_set_t *set, pcc_rule_t rule) {\n"
3992 " const size_t i = pcc_rule_set__index(auxil, set, rule);\n"
3993 " if (i != PCC_VOID_VALUE) return PCC_FALSE;\n"
3994 " if (set->max <= set->len) {\n"
3995 " const size_t n = set->len + 1;\n"
3996 " size_t m = set->max;\n"
3997 " if (m == 0) m = PCC_ARRAY_MIN_SIZE;\n"
3998 " while (m < n && m != 0) m <<= 1;\n"
3999 " if (m == 0) m = n;\n"
4000 " set->buf = (pcc_rule_t *)PCC_REALLOC(auxil, set->buf, sizeof(pcc_rule_t) * m);\n"
4001 " set->max = m;\n"
4002 " }\n"
4003 " set->buf[set->len++] = rule;\n"
4004 " return PCC_TRUE;\n"
4005 "}\n"
4006 "\n"
4007 "static pcc_bool_t pcc_rule_set__remove(pcc_auxil_t auxil, pcc_rule_set_t *set, pcc_rule_t rule) {\n"
4008 " const size_t i = pcc_rule_set__index(auxil, set, rule);\n"
4009 " if (i == PCC_VOID_VALUE) return PCC_FALSE;\n"
4010 " memmove(set->buf + i, set->buf + (i + 1), sizeof(pcc_rule_t) * (set->len - (i + 1)));\n"
4011 " return PCC_TRUE;\n"
4012 "}\n"
4013 "\n"
4014 "static void pcc_rule_set__clear(pcc_auxil_t auxil, pcc_rule_set_t *set) {\n"
4015 " set->len = 0;\n"
4016 "}\n"
4017 "\n"
4018 "static void pcc_rule_set__copy(pcc_auxil_t auxil, pcc_rule_set_t *set, const pcc_rule_set_t *src) {\n"
4019 " size_t i;\n"
4020 " pcc_rule_set__clear(auxil, set);\n"
4021 " for (i = 0; i < src->len; i++) {\n"
4022 " pcc_rule_set__add(auxil, set, src->buf[i]);\n"
4023 " }\n"
4024 "}\n"
4025 "\n"
4026 "static void pcc_rule_set__term(pcc_auxil_t auxil, pcc_rule_set_t *set) {\n"
4027 " PCC_FREE(auxil, set->buf);\n"
4028 "}\n"
4029 "\n"
4030 );
4031 stream__puts(
4032 &sstream,
4033 "static pcc_lr_head_t *pcc_lr_head__create(pcc_context_t *ctx, pcc_rule_t rule) {\n"
4034 " pcc_lr_head_t *const head = (pcc_lr_head_t *)pcc_memory_recycler__supply(ctx->auxil, &ctx->lr_head_recycler);\n"
4035 " head->rule = rule;\n"
4036 " pcc_rule_set__init(ctx->auxil, &head->invol);\n"
4037 " pcc_rule_set__init(ctx->auxil, &head->eval);\n"
4038 " head->hold = NULL;\n"
4039 " return head;\n"
4040 "}\n"
4041 "\n"
4042 "static void pcc_lr_head__destroy(pcc_context_t *ctx, pcc_lr_head_t *head) {\n"
4043 " if (head == NULL) return;\n"
4044 " pcc_lr_head__destroy(ctx, head->hold);\n"
4045 " pcc_rule_set__term(ctx->auxil, &head->eval);\n"
4046 " pcc_rule_set__term(ctx->auxil, &head->invol);\n"
4047 " pcc_memory_recycler__recycle(ctx->auxil, &ctx->lr_head_recycler, head);\n"
4048 "}\n"
4049 "\n"
4050 );
4051 stream__puts(
4052 &sstream,
4053 "static void pcc_lr_entry__destroy(pcc_auxil_t auxil, pcc_lr_entry_t *lr);\n"
4054 "\n"
4055 "static pcc_lr_answer_t *pcc_lr_answer__create(pcc_context_t *ctx, pcc_lr_answer_type_t type, size_t pos) {\n"
4056 " pcc_lr_answer_t *answer = (pcc_lr_answer_t *)pcc_memory_recycler__supply(ctx->auxil, &ctx->lr_answer_recycler);\n"
4057 " answer->type = type;\n"
4058 " answer->pos = pos;\n"
4059 " answer->hold = NULL;\n"
4060 " switch (answer->type) {\n"
4061 " case PCC_LR_ANSWER_LR:\n"
4062 " answer->data.lr = NULL;\n"
4063 " break;\n"
4064 " case PCC_LR_ANSWER_CHUNK:\n"
4065 " answer->data.chunk = NULL;\n"
4066 " break;\n"
4067 " default: /* unknown */\n"
4068 " PCC_FREE(ctx->auxil, answer);\n"
4069 " answer = NULL;\n"
4070 " }\n"
4071 " return answer;\n"
4072 "}\n"
4073 "\n"
4074 "static void pcc_lr_answer__set_chunk(pcc_context_t *ctx, pcc_lr_answer_t *answer, pcc_thunk_chunk_t *chunk) {\n"
4075 " pcc_lr_answer_t *const a = pcc_lr_answer__create(ctx, answer->type, answer->pos);\n"
4076 " switch (answer->type) {\n"
4077 " case PCC_LR_ANSWER_LR:\n"
4078 " a->data.lr = answer->data.lr;\n"
4079 " break;\n"
4080 " case PCC_LR_ANSWER_CHUNK:\n"
4081 " a->data.chunk = answer->data.chunk;\n"
4082 " break;\n"
4083 " default: /* unknown */\n"
4084 " break;\n"
4085 " }\n"
4086 " a->hold = answer->hold;\n"
4087 " answer->hold = a;\n"
4088 " answer->type = PCC_LR_ANSWER_CHUNK;\n"
4089 " answer->data.chunk = chunk;\n"
4090 "}\n"
4091 "\n"
4092 "static void pcc_lr_answer__destroy(pcc_context_t *ctx, pcc_lr_answer_t *answer) {\n"
4093 " while (answer != NULL) {\n"
4094 " pcc_lr_answer_t *const a = answer->hold;\n"
4095 " switch (answer->type) {\n"
4096 " case PCC_LR_ANSWER_LR:\n"
4097 " pcc_lr_entry__destroy(ctx->auxil, answer->data.lr);\n"
4098 " break;\n"
4099 " case PCC_LR_ANSWER_CHUNK:\n"
4100 " pcc_thunk_chunk__destroy(ctx, answer->data.chunk);\n"
4101 " break;\n"
4102 " default: /* unknown */\n"
4103 " break;\n"
4104 " }\n"
4105 " pcc_memory_recycler__recycle(ctx->auxil, &ctx->lr_answer_recycler, answer);\n"
4106 " answer = a;\n"
4107 " }\n"
4108 "}\n"
4109 "\n"
4110 );
4111 stream__puts(
4112 &sstream,
4113 "static void pcc_lr_memo_map__init(pcc_auxil_t auxil, pcc_lr_memo_map_t *map) {\n"
4114 " map->len = 0;\n"
4115 " map->max = 0;\n"
4116 " map->buf = NULL;\n"
4117 "}\n"
4118 "\n"
4119 "static size_t pcc_lr_memo_map__index(pcc_context_t *ctx, pcc_lr_memo_map_t *map, pcc_rule_t rule) {\n"
4120 " size_t i;\n"
4121 " for (i = 0; i < map->len; i++) {\n"
4122 " if (map->buf[i].rule == rule) return i;\n"
4123 " }\n"
4124 " return PCC_VOID_VALUE;\n"
4125 "}\n"
4126 "\n"
4127 "static void pcc_lr_memo_map__put(pcc_context_t *ctx, pcc_lr_memo_map_t *map, pcc_rule_t rule, pcc_lr_answer_t *answer) {\n"
4128 " const size_t i = pcc_lr_memo_map__index(ctx, map, rule);\n"
4129 " if (i != PCC_VOID_VALUE) {\n"
4130 " pcc_lr_answer__destroy(ctx, map->buf[i].answer);\n"
4131 " map->buf[i].answer = answer;\n"
4132 " }\n"
4133 " else {\n"
4134 " if (map->max <= map->len) {\n"
4135 " const size_t n = map->len + 1;\n"
4136 " size_t m = map->max;\n"
4137 " if (m == 0) m = PCC_ARRAY_MIN_SIZE;\n"
4138 " while (m < n && m != 0) m <<= 1;\n"
4139 " if (m == 0) m = n;\n"
4140 " map->buf = (pcc_lr_memo_t *)PCC_REALLOC(ctx->auxil, map->buf, sizeof(pcc_lr_memo_t) * m);\n"
4141 " map->max = m;\n"
4142 " }\n"
4143 " map->buf[map->len].rule = rule;\n"
4144 " map->buf[map->len].answer = answer;\n"
4145 " map->len++;\n"
4146 " }\n"
4147 "}\n"
4148 "\n"
4149 "static pcc_lr_answer_t *pcc_lr_memo_map__get(pcc_context_t *ctx, pcc_lr_memo_map_t *map, pcc_rule_t rule) {\n"
4150 " const size_t i = pcc_lr_memo_map__index(ctx, map, rule);\n"
4151 " return (i != PCC_VOID_VALUE) ? map->buf[i].answer : NULL;\n"
4152 "}\n"
4153 "\n"
4154 "static void pcc_lr_memo_map__term(pcc_context_t *ctx, pcc_lr_memo_map_t *map) {\n"
4155 " while (map->len > 0) {\n"
4156 " map->len--;\n"
4157 " pcc_lr_answer__destroy(ctx, map->buf[map->len].answer);\n"
4158 " }\n"
4159 " PCC_FREE(ctx->auxil, map->buf);\n"
4160 "}\n"
4161 "\n"
4162 );
4163 stream__puts(
4164 &sstream,
4165 "static pcc_lr_table_entry_t *pcc_lr_table_entry__create(pcc_context_t *ctx) {\n"
4166 " pcc_lr_table_entry_t *const entry = (pcc_lr_table_entry_t *)PCC_MALLOC(ctx->auxil, sizeof(pcc_lr_table_entry_t));\n"
4167 " entry->head = NULL;\n"
4168 " pcc_lr_memo_map__init(ctx->auxil, &entry->memos);\n"
4169 " entry->hold_a = NULL;\n"
4170 " entry->hold_h = NULL;\n"
4171 " return entry;\n"
4172 "}\n"
4173 "\n"
4174 "static void pcc_lr_table_entry__destroy(pcc_context_t *ctx, pcc_lr_table_entry_t *entry) {\n"
4175 " if (entry == NULL) return;\n"
4176 " pcc_lr_head__destroy(ctx, entry->hold_h);\n"
4177 " pcc_lr_answer__destroy(ctx, entry->hold_a);\n"
4178 " pcc_lr_memo_map__term(ctx, &entry->memos);\n"
4179 " PCC_FREE(ctx->auxil, entry);\n"
4180 "}\n"
4181 "\n"
4182 );
4183 stream__puts(
4184 &sstream,
4185 "static void pcc_lr_table__init(pcc_auxil_t auxil, pcc_lr_table_t *table) {\n"
4186 " table->ofs = 0;\n"
4187 " table->len = 0;\n"
4188 " table->max = 0;\n"
4189 " table->buf = NULL;\n"
4190 "}\n"
4191 "\n"
4192 "static void pcc_lr_table__resize(pcc_context_t *ctx, pcc_lr_table_t *table, size_t len) {\n"
4193 " size_t i;\n"
4194 " for (i = len; i < table->len; i++) pcc_lr_table_entry__destroy(ctx, table->buf[i]);\n"
4195 " if (table->max < len) {\n"
4196 " size_t m = table->max;\n"
4197 " if (m == 0) m = PCC_ARRAY_MIN_SIZE;\n"
4198 " while (m < len && m != 0) m <<= 1;\n"
4199 " if (m == 0) m = len;\n"
4200 " table->buf = (pcc_lr_table_entry_t **)PCC_REALLOC(ctx->auxil, table->buf, sizeof(pcc_lr_table_entry_t *) * m);\n"
4201 " table->max = m;\n"
4202 " }\n"
4203 " for (i = table->len; i < len; i++) table->buf[i] = NULL;\n"
4204 " table->len = len;\n"
4205 "}\n"
4206 "\n"
4207 "static void pcc_lr_table__set_head(pcc_context_t *ctx, pcc_lr_table_t *table, size_t index, pcc_lr_head_t *head) {\n"
4208 " index += table->ofs;\n"
4209 " if (index >= table->len) pcc_lr_table__resize(ctx, table, index + 1);\n"
4210 " if (table->buf[index] == NULL) table->buf[index] = pcc_lr_table_entry__create(ctx);\n"
4211 " table->buf[index]->head = head;\n"
4212 "}\n"
4213 "\n"
4214 "static void pcc_lr_table__hold_head(pcc_context_t *ctx, pcc_lr_table_t *table, size_t index, pcc_lr_head_t *head) {\n"
4215 " index += table->ofs;\n"
4216 " if (index >= table->len) pcc_lr_table__resize(ctx, table, index + 1);\n"
4217 " if (table->buf[index] == NULL) table->buf[index] = pcc_lr_table_entry__create(ctx);\n"
4218 " head->hold = table->buf[index]->hold_h;\n"
4219 " table->buf[index]->hold_h = head;\n"
4220 "}\n"
4221 "\n"
4222 "static void pcc_lr_table__set_answer(pcc_context_t *ctx, pcc_lr_table_t *table, size_t index, pcc_rule_t rule, pcc_lr_answer_t *answer) {\n"
4223 " index += table->ofs;\n"
4224 " if (index >= table->len) pcc_lr_table__resize(ctx, table, index + 1);\n"
4225 " if (table->buf[index] == NULL) table->buf[index] = pcc_lr_table_entry__create(ctx);\n"
4226 " pcc_lr_memo_map__put(ctx, &table->buf[index]->memos, rule, answer);\n"
4227 "}\n"
4228 "\n"
4229 "static void pcc_lr_table__hold_answer(pcc_context_t *ctx, pcc_lr_table_t *table, size_t index, pcc_lr_answer_t *answer) {\n"
4230 " index += table->ofs;\n"
4231 " if (index >= table->len) pcc_lr_table__resize(ctx, table, index + 1);\n"
4232 " if (table->buf[index] == NULL) table->buf[index] = pcc_lr_table_entry__create(ctx);\n"
4233 " answer->hold = table->buf[index]->hold_a;\n"
4234 " table->buf[index]->hold_a = answer;\n"
4235 "}\n"
4236 "\n"
4237 "static pcc_lr_head_t *pcc_lr_table__get_head(pcc_context_t *ctx, pcc_lr_table_t *table, size_t index) {\n"
4238 " index += table->ofs;\n"
4239 " if (index >= table->len || table->buf[index] == NULL) return NULL;\n"
4240 " return table->buf[index]->head;\n"
4241 "}\n"
4242 "\n"
4243 "static pcc_lr_answer_t *pcc_lr_table__get_answer(pcc_context_t *ctx, pcc_lr_table_t *table, size_t index, pcc_rule_t rule) {\n"
4244 " index += table->ofs;\n"
4245 " if (index >= table->len || table->buf[index] == NULL) return NULL;\n"
4246 " return pcc_lr_memo_map__get(ctx, &table->buf[index]->memos, rule);\n"
4247 "}\n"
4248 "\n"
4249 "static void pcc_lr_table__shift(pcc_context_t *ctx, pcc_lr_table_t *table, size_t count) {\n"
4250 " size_t i;\n"
4251 " if (count > table->len - table->ofs) count = table->len - table->ofs;\n"
4252 " for (i = 0; i < count; i++) pcc_lr_table_entry__destroy(ctx, table->buf[table->ofs++]);\n"
4253 " if (table->ofs > (table->max >> 1)) {\n"
4254 " memmove(table->buf, table->buf + table->ofs, sizeof(pcc_lr_table_entry_t *) * (table->len - table->ofs));\n"
4255 " table->len -= table->ofs;\n"
4256 " table->ofs = 0;\n"
4257 " }\n"
4258 "}\n"
4259 "\n"
4260 "static void pcc_lr_table__term(pcc_context_t *ctx, pcc_lr_table_t *table) {\n"
4261 " while (table->len > table->ofs) {\n"
4262 " table->len--;\n"
4263 " pcc_lr_table_entry__destroy(ctx, table->buf[table->len]);\n"
4264 " }\n"
4265 " PCC_FREE(ctx->auxil, table->buf);\n"
4266 "}\n"
4267 "\n"
4268 );
4269 stream__puts(
4270 &sstream,
4271 "static pcc_lr_entry_t *pcc_lr_entry__create(pcc_auxil_t auxil, pcc_rule_t rule) {\n"
4272 " pcc_lr_entry_t *const lr = (pcc_lr_entry_t *)PCC_MALLOC(auxil, sizeof(pcc_lr_entry_t));\n"
4273 " lr->rule = rule;\n"
4274 " lr->seed = NULL;\n"
4275 " lr->head = NULL;\n"
4276 " return lr;\n"
4277 "}\n"
4278 "\n"
4279 "static void pcc_lr_entry__destroy(pcc_auxil_t auxil, pcc_lr_entry_t *lr) {\n"
4280 " PCC_FREE(auxil, lr);\n"
4281 "}\n"
4282 "\n"
4283 );
4284 stream__puts(
4285 &sstream,
4286 "static void pcc_lr_stack__init(pcc_auxil_t auxil, pcc_lr_stack_t *stack) {\n"
4287 " stack->len = 0;\n"
4288 " stack->max = 0;\n"
4289 " stack->buf = NULL;\n"
4290 "}\n"
4291 "\n"
4292 "static void pcc_lr_stack__push(pcc_auxil_t auxil, pcc_lr_stack_t *stack, pcc_lr_entry_t *lr) {\n"
4293 " if (stack->max <= stack->len) {\n"
4294 " const size_t n = stack->len + 1;\n"
4295 " size_t m = stack->max;\n"
4296 " if (m == 0) m = PCC_ARRAY_MIN_SIZE;\n"
4297 " while (m < n && m != 0) m <<= 1;\n"
4298 " if (m == 0) m = n;\n"
4299 " stack->buf = (pcc_lr_entry_t **)PCC_REALLOC(auxil, stack->buf, sizeof(pcc_lr_entry_t *) * m);\n"
4300 " stack->max = m;\n"
4301 " }\n"
4302 " stack->buf[stack->len++] = lr;\n"
4303 "}\n"
4304 "\n"
4305 "static pcc_lr_entry_t *pcc_lr_stack__pop(pcc_auxil_t auxil, pcc_lr_stack_t *stack) {\n"
4306 " return stack->buf[--stack->len];\n"
4307 "}\n"
4308 "\n"
4309 "static void pcc_lr_stack__term(pcc_auxil_t auxil, pcc_lr_stack_t *stack) {\n"
4310 " PCC_FREE(auxil, stack->buf);\n"
4311 "}\n"
4312 "\n"
4313 );
4314 stream__puts(
4315 &sstream,
4316 "static pcc_context_t *pcc_context__create(pcc_auxil_t auxil) {\n"
4317 " pcc_context_t *const ctx = (pcc_context_t *)PCC_MALLOC(auxil, sizeof(pcc_context_t));\n"
4318 " ctx->pos = 0;\n"
4319 " ctx->cur = 0;\n"
4320 " ctx->level = 0;\n"
4321 " pcc_char_array__init(auxil, &ctx->buffer);\n"
4322 " pcc_lr_table__init(auxil, &ctx->lrtable);\n"
4323 " pcc_lr_stack__init(auxil, &ctx->lrstack);\n"
4324 " pcc_thunk_array__init(auxil, &ctx->thunks);\n"
4325 " pcc_memory_recycler__init(auxil, &ctx->thunk_chunk_recycler, sizeof(pcc_thunk_chunk_t));\n"
4326 " pcc_memory_recycler__init(auxil, &ctx->lr_head_recycler, sizeof(pcc_lr_head_t));\n"
4327 " pcc_memory_recycler__init(auxil, &ctx->lr_answer_recycler, sizeof(pcc_lr_answer_t));\n"
4328 " ctx->auxil = auxil;\n"
4329 " return ctx;\n"
4330 "}\n"
4331 "\n"
4332 );
4333 stream__puts(
4334 &sstream,
4335 "static void pcc_context__destroy(pcc_context_t *ctx) {\n"
4336 " if (ctx == NULL) return;\n"
4337 " pcc_thunk_array__term(ctx->auxil, &ctx->thunks);\n"
4338 " pcc_lr_stack__term(ctx->auxil, &ctx->lrstack);\n"
4339 " pcc_lr_table__term(ctx, &ctx->lrtable);\n"
4340 " pcc_char_array__term(ctx->auxil, &ctx->buffer);\n"
4341 " pcc_memory_recycler__term(ctx->auxil, &ctx->thunk_chunk_recycler);\n"
4342 " pcc_memory_recycler__term(ctx->auxil, &ctx->lr_head_recycler);\n"
4343 " pcc_memory_recycler__term(ctx->auxil, &ctx->lr_answer_recycler);\n"
4344 " PCC_FREE(ctx->auxil, ctx);\n"
4345 "}\n"
4346 "\n"
4347 );
4348 stream__puts(
4349 &sstream,
4350 "static size_t pcc_refill_buffer(pcc_context_t *ctx, size_t num) {\n"
4351 " if (ctx->buffer.len >= ctx->cur + num) return ctx->buffer.len - ctx->cur;\n"
4352 " while (ctx->buffer.len < ctx->cur + num) {\n"
4353 " const int c = PCC_GETCHAR(ctx->auxil);\n"
4354 " if (c < 0) break;\n"
4355 " pcc_char_array__add(ctx->auxil, &ctx->buffer, (char)c);\n"
4356 " }\n"
4357 " return ctx->buffer.len - ctx->cur;\n"
4358 "}\n"
4359 "\n"
4360 );
4361 stream__puts(
4362 &sstream,
4363 "MARK_FUNC_AS_USED\n"
4364 "static void pcc_commit_buffer(pcc_context_t *ctx) {\n"
4365 " memmove(ctx->buffer.buf, ctx->buffer.buf + ctx->cur, ctx->buffer.len - ctx->cur);\n"
4366 " ctx->buffer.len -= ctx->cur;\n"
4367 " ctx->pos += ctx->cur;\n"
4368 " pcc_lr_table__shift(ctx, &ctx->lrtable, ctx->cur);\n"
4369 " ctx->cur = 0;\n"
4370 "}\n"
4371 "\n"
4372 );
4373 stream__puts(
4374 &sstream,
4375 "MARK_FUNC_AS_USED\n"
4376 "static const char *pcc_get_capture_string(pcc_context_t *ctx, const pcc_capture_t *capt) {\n"
4377 " if (capt->string == NULL)\n"
4378 " ((pcc_capture_t *)capt)->string =\n"
4379 " pcc_strndup_e(ctx->auxil, ctx->buffer.buf + capt->range.start, capt->range.end - capt->range.start);\n"
4380 " return capt->string;\n"
4381 "}\n"
4382 "\n"
4383 );
4384 if (ctx->flags & CODE_FLAG__UTF8_CHARCLASS_USED) {
4385 stream__puts(
4386 &sstream,
4387 "static size_t pcc_get_char_as_utf32(pcc_context_t *ctx, int *out) { /* with checking UTF-8 validity */\n"
4388 " int c, u;\n"
4389 " size_t n;\n"
4390 " if (pcc_refill_buffer(ctx, 1) < 1) return 0;\n"
4391 " c = (int)(unsigned char)ctx->buffer.buf[ctx->cur];\n"
4392 " n = (c < 0x80) ? 1 :\n"
4393 " ((c & 0xe0) == 0xc0) ? 2 :\n"
4394 " ((c & 0xf0) == 0xe0) ? 3 :\n"
4395 " ((c & 0xf8) == 0xf0) ? 4 : 0;\n"
4396 " if (n < 1) return 0;\n"
4397 " if (pcc_refill_buffer(ctx, n) < n) return 0;\n"
4398 " switch (n) {\n"
4399 " case 1:\n"
4400 " u = c;\n"
4401 " break;\n"
4402 " case 2:\n"
4403 " u = c & 0x1f;\n"
4404 " c = (int)(unsigned char)ctx->buffer.buf[ctx->cur + 1];\n"
4405 " if ((c & 0xc0) != 0x80) return 0;\n"
4406 " u <<= 6; u |= c & 0x3f;\n"
4407 " if (u < 0x80) return 0;\n"
4408 " break;\n"
4409 " case 3:\n"
4410 " u = c & 0x0f;\n"
4411 " c = (int)(unsigned char)ctx->buffer.buf[ctx->cur + 1];\n"
4412 " if ((c & 0xc0) != 0x80) return 0;\n"
4413 " u <<= 6; u |= c & 0x3f;\n"
4414 " c = (int)(unsigned char)ctx->buffer.buf[ctx->cur + 2];\n"
4415 " if ((c & 0xc0) != 0x80) return 0;\n"
4416 " u <<= 6; u |= c & 0x3f;\n"
4417 " if (u < 0x800) return 0;\n"
4418 " break;\n"
4419 " case 4:\n"
4420 " u = c & 0x07;\n"
4421 " c = (int)(unsigned char)ctx->buffer.buf[ctx->cur + 1];\n"
4422 " if ((c & 0xc0) != 0x80) return 0;\n"
4423 " u <<= 6; u |= c & 0x3f;\n"
4424 " c = (int)(unsigned char)ctx->buffer.buf[ctx->cur + 2];\n"
4425 " if ((c & 0xc0) != 0x80) return 0;\n"
4426 " u <<= 6; u |= c & 0x3f;\n"
4427 " c = (int)(unsigned char)ctx->buffer.buf[ctx->cur + 3];\n"
4428 " if ((c & 0xc0) != 0x80) return 0;\n"
4429 " u <<= 6; u |= c & 0x3f;\n"
4430 " if (u < 0x10000 || u > 0x10ffff) return 0;\n"
4431 " break;\n"
4432 " default:\n"
4433 " return 0;\n"
4434 " }\n"
4435 " if (out) *out = u;\n"
4436 " return n;\n"
4437 "}\n"
4438 "\n"
4439 );
4440 }
4441 stream__puts(
4442 &sstream,
4443 "MARK_FUNC_AS_USED\n"
4444 "static pcc_bool_t pcc_apply_rule(pcc_context_t *ctx, pcc_rule_t rule, pcc_thunk_array_t *thunks, pcc_value_t *value) {\n"
4445 " static pcc_value_t null;\n"
4446 " pcc_thunk_chunk_t *c = NULL;\n"
4447 " const size_t p = ctx->pos + ctx->cur;\n"
4448 " pcc_bool_t b = PCC_TRUE;\n"
4449 " pcc_lr_answer_t *a = pcc_lr_table__get_answer(ctx, &ctx->lrtable, p, rule);\n"
4450 " pcc_lr_head_t *h = pcc_lr_table__get_head(ctx, &ctx->lrtable, p);\n"
4451 " if (h != NULL) {\n"
4452 " if (a == NULL && rule != h->rule && pcc_rule_set__index(ctx->auxil, &h->invol, rule) == PCC_VOID_VALUE) {\n"
4453 " b = PCC_FALSE;\n"
4454 " c = NULL;\n"
4455 " }\n"
4456 " else if (pcc_rule_set__remove(ctx->auxil, &h->eval, rule)) {\n"
4457 " b = PCC_FALSE;\n"
4458 " c = rule(ctx);\n"
4459 " a = pcc_lr_answer__create(ctx, PCC_LR_ANSWER_CHUNK, ctx->pos + ctx->cur);\n"
4460 " a->data.chunk = c;\n"
4461 " pcc_lr_table__hold_answer(ctx, &ctx->lrtable, p, a);\n"
4462 " }\n"
4463 " }\n"
4464 " if (b) {\n"
4465 " if (a != NULL) {\n"
4466 " ctx->cur = a->pos - ctx->pos;\n"
4467 " switch (a->type) {\n"
4468 " case PCC_LR_ANSWER_LR:\n"
4469 " if (a->data.lr->head == NULL) {\n"
4470 " a->data.lr->head = pcc_lr_head__create(ctx, rule);\n"
4471 " pcc_lr_table__hold_head(ctx, &ctx->lrtable, p, a->data.lr->head);\n"
4472 " }\n"
4473 " {\n"
4474 " size_t i = ctx->lrstack.len;\n"
4475 " while (i > 0) {\n"
4476 " i--;\n"
4477 " if (ctx->lrstack.buf[i]->head == a->data.lr->head) break;\n"
4478 " ctx->lrstack.buf[i]->head = a->data.lr->head;\n"
4479 " pcc_rule_set__add(ctx->auxil, &a->data.lr->head->invol, ctx->lrstack.buf[i]->rule);\n"
4480 " }\n"
4481 " }\n"
4482 " c = a->data.lr->seed;\n"
4483 " break;\n"
4484 " case PCC_LR_ANSWER_CHUNK:\n"
4485 " c = a->data.chunk;\n"
4486 " break;\n"
4487 " default: /* unknown */\n"
4488 " break;\n"
4489 " }\n"
4490 " }\n"
4491 " else {\n"
4492 " pcc_lr_entry_t *const e = pcc_lr_entry__create(ctx->auxil, rule);\n"
4493 " pcc_lr_stack__push(ctx->auxil, &ctx->lrstack, e);\n"
4494 " a = pcc_lr_answer__create(ctx, PCC_LR_ANSWER_LR, p);\n"
4495 " a->data.lr = e;\n"
4496 " pcc_lr_table__set_answer(ctx, &ctx->lrtable, p, rule, a);\n"
4497 " c = rule(ctx);\n"
4498 " pcc_lr_stack__pop(ctx->auxil, &ctx->lrstack);\n"
4499 " a->pos = ctx->pos + ctx->cur;\n"
4500 " if (e->head == NULL) {\n"
4501 " pcc_lr_answer__set_chunk(ctx, a, c);\n"
4502 " }\n"
4503 " else {\n"
4504 " e->seed = c;\n"
4505 " h = a->data.lr->head;\n"
4506 " if (h->rule != rule) {\n"
4507 " c = a->data.lr->seed;\n"
4508 " a = pcc_lr_answer__create(ctx, PCC_LR_ANSWER_CHUNK, ctx->pos + ctx->cur);\n"
4509 " a->data.chunk = c;\n"
4510 " pcc_lr_table__hold_answer(ctx, &ctx->lrtable, p, a);\n"
4511 " }\n"
4512 " else {\n"
4513 " pcc_lr_answer__set_chunk(ctx, a, a->data.lr->seed);\n"
4514 " if (a->data.chunk == NULL) {\n"
4515 " c = NULL;\n"
4516 " }\n"
4517 " else {\n"
4518 " pcc_lr_table__set_head(ctx, &ctx->lrtable, p, h);\n"
4519 " for (;;) {\n"
4520 " ctx->cur = p - ctx->pos;\n"
4521 " pcc_rule_set__copy(ctx->auxil, &h->eval, &h->invol);\n"
4522 " c = rule(ctx);\n"
4523 " if (c == NULL || ctx->pos + ctx->cur <= a->pos) break;\n"
4524 " pcc_lr_answer__set_chunk(ctx, a, c);\n"
4525 " a->pos = ctx->pos + ctx->cur;\n"
4526 " }\n"
4527 " pcc_thunk_chunk__destroy(ctx, c);\n"
4528 " pcc_lr_table__set_head(ctx, &ctx->lrtable, p, NULL);\n"
4529 " ctx->cur = a->pos - ctx->pos;\n"
4530 " c = a->data.chunk;\n"
4531 " }\n"
4532 " }\n"
4533 " }\n"
4534 " }\n"
4535 " }\n"
4536 " if (c == NULL) return PCC_FALSE;\n"
4537 " if (value == NULL) value = &null;\n"
4538 " memset(value, 0, sizeof(pcc_value_t)); /* in case */\n"
4539 " pcc_thunk_array__add(ctx->auxil, thunks, pcc_thunk__create_node(ctx->auxil, &c->thunks, value));\n"
4540 " return PCC_TRUE;\n"
4541 "}\n"
4542 "\n"
4543 );
4544 stream__puts(
4545 &sstream,
4546 "MARK_FUNC_AS_USED\n"
4547 "static void pcc_do_action(pcc_context_t *ctx, const pcc_thunk_array_t *thunks, pcc_value_t *value) {\n"
4548 " size_t i;\n"
4549 " for (i = 0; i < thunks->len; i++) {\n"
4550 " pcc_thunk_t *const thunk = thunks->buf[i];\n"
4551 " switch (thunk->type) {\n"
4552 " case PCC_THUNK_LEAF:\n"
4553 " thunk->data.leaf.action(ctx, thunk, value);\n"
4554 " break;\n"
4555 " case PCC_THUNK_NODE:\n"
4556 " pcc_do_action(ctx, thunk->data.node.thunks, thunk->data.node.value);\n"
4557 " break;\n"
4558 " default: /* unknown */\n"
4559 " break;\n"
4560 " }\n"
4561 " }\n"
4562 "}\n"
4563 "\n"
4564 );
4565 {
4566 size_t i, j, k;
4567 for (i = 0; i < ctx->rules.len; i++) {
4568 const node_rule_t *const r = &ctx->rules.buf[i]->data.rule;
4569 for (j = 0; j < r->codes.len; j++) {
4570 const code_block_t *b;
4571 size_t d;
4572 const node_const_array_t *v, *c;
4573 switch (r->codes.buf[j]->type) {
4574 case NODE_ACTION:
4575 b = &r->codes.buf[j]->data.action.code;
4576 d = r->codes.buf[j]->data.action.index;
4577 v = &r->codes.buf[j]->data.action.vars;
4578 c = &r->codes.buf[j]->data.action.capts;
4579 break;
4580 case NODE_ERROR:
4581 b = &r->codes.buf[j]->data.error.code;
4582 d = r->codes.buf[j]->data.error.index;
4583 v = &r->codes.buf[j]->data.error.vars;
4584 c = &r->codes.buf[j]->data.error.capts;
4585 break;
4586 default:
4587 print_error("Internal error [%d]\n", __LINE__);
4588 exit(-1);
4589 }
4590 stream__printf(
4591 &sstream,
4592 "static void pcc_action_%s_" FMT_LU "(%s_context_t *__pcc_ctx, pcc_thunk_t *__pcc_in, pcc_value_t *__pcc_out) {\n",
4593 r->name, (ulong_t)d, get_prefix(ctx)
4594 );
4595 stream__puts(
4596 &sstream,
4597 "#define auxil (__pcc_ctx->auxil)\n"
4598 "#define __ (*__pcc_out)\n"
4599 );
4600 k = 0;
4601 while (k < v->len) {
4602 assert(v->buf[k]->type == NODE_REFERENCE);
4603 stream__printf(
4604 &sstream,
4605 "#define %s (*__pcc_in->data.leaf.values.buf[" FMT_LU "])\n",
4606 v->buf[k]->data.reference.var, (ulong_t)v->buf[k]->data.reference.index
4607 );
4608 k++;
4609 }
4610 stream__puts(
4611 &sstream,
4612 "#define _0 pcc_get_capture_string(__pcc_ctx, &__pcc_in->data.leaf.capt0)\n"
4613 "#define _0s ((const size_t)(__pcc_ctx->pos + __pcc_in->data.leaf.capt0.range.start))\n"
4614 "#define _0e ((const size_t)(__pcc_ctx->pos + __pcc_in->data.leaf.capt0.range.end))\n"
4615 );
4616 k = 0;
4617 while (k < c->len) {
4618 assert(c->buf[k]->type == NODE_CAPTURE);
4619 stream__printf(
4620 &sstream,
4621 "#define _" FMT_LU " pcc_get_capture_string(__pcc_ctx, __pcc_in->data.leaf.capts.buf[" FMT_LU "])\n",
4622 (ulong_t)(c->buf[k]->data.capture.index + 1), (ulong_t)c->buf[k]->data.capture.index
4623 );
4624 stream__printf(
4625 &sstream,
4626 "#define _" FMT_LU "s ((const size_t)(__pcc_ctx->pos + __pcc_in->data.leaf.capts.buf[" FMT_LU "]->range.start))\n",
4627 (ulong_t)(c->buf[k]->data.capture.index + 1), (ulong_t)c->buf[k]->data.capture.index
4628 );
4629 stream__printf(
4630 &sstream,
4631 "#define _" FMT_LU "e ((const size_t)(__pcc_ctx->pos + __pcc_in->data.leaf.capts.buf[" FMT_LU "]->range.end))\n",
4632 (ulong_t)(c->buf[k]->data.capture.index + 1), (ulong_t)c->buf[k]->data.capture.index
4633 );
4634 k++;
4635 }
4636 stream__write_code_block(&sstream, b->text, b->len, 4, ctx->iname, b->line);
4637 k = c->len;
4638 while (k > 0) {
4639 k--;
4640 assert(c->buf[k]->type == NODE_CAPTURE);
4641 stream__printf(
4642 &sstream,
4643 "#undef _" FMT_LU "e\n",
4644 (ulong_t)(c->buf[k]->data.capture.index + 1)
4645 );
4646 stream__printf(
4647 &sstream,
4648 "#undef _" FMT_LU "s\n",
4649 (ulong_t)(c->buf[k]->data.capture.index + 1)
4650 );
4651 stream__printf(
4652 &sstream,
4653 "#undef _" FMT_LU "\n",
4654 (ulong_t)(c->buf[k]->data.capture.index + 1)
4655 );
4656 }
4657 stream__puts(
4658 &sstream,
4659 "#undef _0e\n"
4660 "#undef _0s\n"
4661 "#undef _0\n"
4662 );
4663 k = v->len;
4664 while (k > 0) {
4665 k--;
4666 assert(v->buf[k]->type == NODE_REFERENCE);
4667 stream__printf(
4668 &sstream,
4669 "#undef %s\n",
4670 v->buf[k]->data.reference.var
4671 );
4672 }
4673 stream__puts(
4674 &sstream,
4675 "#undef __\n"
4676 "#undef auxil\n"
4677 );
4678 stream__puts(
4679 &sstream,
4680 "}\n"
4681 "\n"
4682 );
4683 }
4684 }
4685 }
4686 {
4687 size_t i;
4688 for (i = 0; i < ctx->rules.len; i++) {
4689 stream__printf(
4690 &sstream,
4691 "static pcc_thunk_chunk_t *pcc_evaluate_rule_%s(pcc_context_t *ctx);\n",
4692 ctx->rules.buf[i]->data.rule.name
4693 );
4694 }
4695 stream__puts(
4696 &sstream,
4697 "\n"
4698 );
4699 for (i = 0; i < ctx->rules.len; i++) {
4700 code_reach_t r;
4701 generate_t g;
4702 g.stream = &sstream;
4703 g.rule = ctx->rules.buf[i];
4704 g.label = 0;
4705 g.ascii = ctx->opts.ascii;
4706 stream__printf(
4707 &sstream,
4708 "static pcc_thunk_chunk_t *pcc_evaluate_rule_%s(pcc_context_t *ctx) {\n",
4709 ctx->rules.buf[i]->data.rule.name
4710 );
4711 stream__printf(
4712 &sstream,
4713 " pcc_thunk_chunk_t *const chunk = pcc_thunk_chunk__create(ctx);\n"
4714 " chunk->pos = ctx->cur;\n"
4715 " PCC_DEBUG(ctx->auxil, PCC_DBG_EVALUATE, \"%s\", ctx->level, chunk->pos, (ctx->buffer.buf + chunk->pos), (ctx->buffer.len - chunk->pos));\n"
4716 " ctx->level++;\n",
4717 ctx->rules.buf[i]->data.rule.name
4718 );
4719 stream__printf(
4720 &sstream,
4721 " pcc_value_table__resize(ctx->auxil, &chunk->values, " FMT_LU ");\n",
4722 (ulong_t)ctx->rules.buf[i]->data.rule.vars.len
4723 );
4724 stream__printf(
4725 &sstream,
4726 " pcc_capture_table__resize(ctx->auxil, &chunk->capts, " FMT_LU ");\n",
4727 (ulong_t)ctx->rules.buf[i]->data.rule.capts.len
4728 );
4729 if (ctx->rules.buf[i]->data.rule.vars.len > 0) {
4730 stream__puts(
4731 &sstream,
4732 " pcc_value_table__clear(ctx->auxil, &chunk->values);\n"
4733 );
4734 }
4735 r = generate_code(&g, ctx->rules.buf[i]->data.rule.expr, 0, 4, FALSE);
4736 stream__printf(
4737 &sstream,
4738 " ctx->level--;\n"
4739 " PCC_DEBUG(ctx->auxil, PCC_DBG_MATCH, \"%s\", ctx->level, chunk->pos, (ctx->buffer.buf + chunk->pos), (ctx->cur - chunk->pos));\n"
4740 " return chunk;\n",
4741 ctx->rules.buf[i]->data.rule.name
4742 );
4743 if (r != CODE_REACH__ALWAYS_SUCCEED) {
4744 stream__printf(
4745 &sstream,
4746 "L0000:;\n"
4747 " ctx->level--;\n"
4748 " PCC_DEBUG(ctx->auxil, PCC_DBG_NOMATCH, \"%s\", ctx->level, chunk->pos, (ctx->buffer.buf + chunk->pos), (ctx->cur - chunk->pos));\n"
4749 " pcc_thunk_chunk__destroy(ctx, chunk);\n"
4750 " return NULL;\n",
4751 ctx->rules.buf[i]->data.rule.name
4752 );
4753 }
4754 stream__puts(
4755 &sstream,
4756 "}\n"
4757 "\n"
4758 );
4759 }
4760 }
4761 stream__printf(
4762 &sstream,
4763 "%s_context_t *%s_create(%s%sauxil) {\n",
4764 get_prefix(ctx), get_prefix(ctx),
4765 at, ap ? "" : " "
4766 );
4767 stream__puts(
4768 &sstream,
4769 " return pcc_context__create(auxil);\n"
4770 "}\n"
4771 "\n"
4772 );
4773 stream__printf(
4774 &sstream,
4775 "int %s_parse(%s_context_t *ctx, %s%s*ret) {\n",
4776 get_prefix(ctx), get_prefix(ctx),
4777 vt, vp ? "" : " "
4778 );
4779 if (ctx->rules.len > 0) {
4780 stream__printf(
4781 &sstream,
4782 " if (pcc_apply_rule(ctx, pcc_evaluate_rule_%s, &ctx->thunks, ret))\n",
4783 ctx->rules.buf[0]->data.rule.name
4784 );
4785 stream__puts(
4786 &sstream,
4787 " pcc_do_action(ctx, &ctx->thunks, ret);\n"
4788 " else\n"
4789 " PCC_ERROR(ctx->auxil);\n"
4790 " pcc_commit_buffer(ctx);\n"
4791 );
4792 }
4793 stream__puts(
4794 &sstream,
4795 " pcc_thunk_array__revert(ctx->auxil, &ctx->thunks, 0);\n"
4796 " return pcc_refill_buffer(ctx, 1) >= 1;\n"
4797 "}\n"
4798 "\n"
4799 );
4800 stream__printf(
4801 &sstream,
4802 "void %s_destroy(%s_context_t *ctx) {\n",
4803 get_prefix(ctx), get_prefix(ctx)
4804 );
4805 stream__puts(
4806 &sstream,
4807 " pcc_context__destroy(ctx);\n"
4808 "}\n"
4809 );
4810 }
4811 {
4812 stream__puts(
4813 &hstream,
4814 "#ifdef __cplusplus\n"
4815 "extern \"C\" {\n"
4816 "#endif\n"
4817 "\n"
4818 );
4819 stream__printf(
4820 &hstream,
4821 "typedef struct %s_context_tag %s_context_t;\n"
4822 "\n",
4823 get_prefix(ctx), get_prefix(ctx)
4824 );
4825 stream__printf(
4826 &hstream,
4827 "%s_context_t *%s_create(%s%sauxil);\n",
4828 get_prefix(ctx), get_prefix(ctx),
4829 at, ap ? "" : " "
4830 );
4831 stream__printf(
4832 &hstream,
4833 "int %s_parse(%s_context_t *ctx, %s%s*ret);\n",
4834 get_prefix(ctx), get_prefix(ctx),
4835 vt, vp ? "" : " "
4836 );
4837 stream__printf(
4838 &hstream,
4839 "void %s_destroy(%s_context_t *ctx);\n",
4840 get_prefix(ctx), get_prefix(ctx)
4841 );
4842 stream__puts(
4843 &hstream,
4844 "\n"
4845 "#ifdef __cplusplus\n"
4846 "}\n"
4847 "#endif\n"
4848 );
4849 stream__printf(
4850 &hstream,
4851 "\n"
4852 "#endif /* !PCC_INCLUDED_%s */\n",
4853 ctx->hid
4854 );
4855 }
4856 {
4857 match_eol(ctx);
4858 if (!match_eof(ctx)) stream__putc(&sstream, '\n');
4859 commit_buffer(ctx);
4860 if (ctx->opts.lines && !match_eof(ctx))
4861 stream__write_line_directive(&sstream, ctx->iname, ctx->linenum);
4862 while (refill_buffer(ctx, ctx->buffer.max) > 0) {
4863 const size_t n = ctx->buffer.len;
4864 stream__write_text(&sstream, ctx->buffer.buf, (n > 0 && ctx->buffer.buf[n - 1] == '\r') ? n - 1 : n);
4865 ctx->bufcur = n;
4866 commit_buffer(ctx);
4867 }
4868 }
4869 fclose_e(hstream.file);
4870 fclose_e(sstream.file);
4871 if (ctx->errnum) {
4872 unlink(ctx->hname);
4873 unlink(ctx->sname);
4874 return FALSE;
4875 }
4876 return TRUE;
4877 }
4878
print_version(FILE * output)4879 static void print_version(FILE *output) {
4880 fprintf(output, "%s version %s\n", g_cmdname, VERSION);
4881 fprintf(output, "Copyright (c) 2014, 2019-2022 Arihiro Yoshida. All rights reserved.\n");
4882 }
4883
print_usage(FILE * output)4884 static void print_usage(FILE *output) {
4885 fprintf(output, "Usage: %s [OPTIONS] [FILE]\n", g_cmdname);
4886 fprintf(output, "Generates a packrat parser for C.\n");
4887 fprintf(output, "\n");
4888 fprintf(output, " -o BASENAME specify a base name of output source and header files\n");
4889 fprintf(output, " -a, --ascii disable UTF-8 support\n");
4890 fprintf(output, " -l, --lines add #line directives\n");
4891 fprintf(output, " -d, --debug with debug information\n");
4892 fprintf(output, " -h, --help print this help message and exit\n");
4893 fprintf(output, " -v, --version print the version and exit\n");
4894 }
4895
main(int argc,char ** argv)4896 int main(int argc, char **argv) {
4897 const char *iname = NULL;
4898 const char *oname = NULL;
4899 options_t opts;
4900 opts.ascii = FALSE;
4901 opts.lines = FALSE;
4902 opts.debug = FALSE;
4903 #ifdef _MSC_VER
4904 #ifdef _DEBUG
4905 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
4906 _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE);
4907 _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR);
4908 #endif
4909 #endif
4910 g_cmdname = extract_filename(argv[0]);
4911 {
4912 const char *fname = NULL;
4913 const char *opt_o = NULL;
4914 bool_t opt_a = FALSE;
4915 bool_t opt_l = FALSE;
4916 bool_t opt_d = FALSE;
4917 bool_t opt_h = FALSE;
4918 bool_t opt_v = FALSE;
4919 int i;
4920 for (i = 1; i < argc; i++) {
4921 if (argv[i][0] != '-') {
4922 break;
4923 }
4924 else if (strcmp(argv[i], "--") == 0) {
4925 i++; break;
4926 }
4927 else if (argv[i][1] == 'o') {
4928 const char *const o = (argv[i][2] != '\0') ? argv[i] + 2 : (++i < argc) ? argv[i] : NULL;
4929 if (o == NULL) {
4930 print_error("Output base name missing\n");
4931 fprintf(stderr, "\n");
4932 print_usage(stderr);
4933 exit(1);
4934 }
4935 if (opt_o != NULL) {
4936 print_error("Extra output base name '%s'\n", o);
4937 fprintf(stderr, "\n");
4938 print_usage(stderr);
4939 exit(1);
4940 }
4941 opt_o = o;
4942 }
4943 else if (strcmp(argv[i], "-a") == 0 || strcmp(argv[i], "--ascii") == 0) {
4944 opt_a = TRUE;
4945 }
4946 else if (strcmp(argv[i], "-l") == 0 || strcmp(argv[i], "--lines") == 0) {
4947 opt_l = TRUE;
4948 }
4949 else if (strcmp(argv[i], "-d") == 0 || strcmp(argv[i], "--debug") == 0) {
4950 opt_d = TRUE;
4951 }
4952 else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) {
4953 opt_h = TRUE;
4954 }
4955 else if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--version") == 0) {
4956 opt_v = TRUE;
4957 }
4958 else {
4959 print_error("Invalid option '%s'\n", argv[i]);
4960 fprintf(stderr, "\n");
4961 print_usage(stderr);
4962 exit(1);
4963 }
4964 }
4965 switch (argc - i) {
4966 case 0:
4967 break;
4968 case 1:
4969 fname = argv[i];
4970 break;
4971 default:
4972 print_error("Multiple input files\n");
4973 fprintf(stderr, "\n");
4974 print_usage(stderr);
4975 exit(1);
4976 }
4977 if (opt_h || opt_v) {
4978 if (opt_v) print_version(stdout);
4979 if (opt_v && opt_h) fprintf(stdout, "\n");
4980 if (opt_h) print_usage(stdout);
4981 exit(0);
4982 }
4983 iname = (fname != NULL && fname[0] != '\0') ? fname : NULL;
4984 oname = (opt_o != NULL && opt_o[0] != '\0') ? opt_o : NULL;
4985 opts.ascii = opt_a;
4986 opts.lines = opt_l;
4987 opts.debug = opt_d;
4988 }
4989 {
4990 context_t *const ctx = create_context(iname, oname, &opts);
4991 const int b = parse(ctx) && generate(ctx);
4992 destroy_context(ctx);
4993 if (!b) exit(10);
4994 }
4995 return 0;
4996 }
4997