xref: /Universal-ctags/misc/packcc/src/packcc.c (revision 212cf26607535acfbbd5b96604cc099e08456838)
1 /*
2  * PackCC: a packrat parser generator for C.
3  *
4  * Copyright (c) 2014, 2019-2022 Arihiro Yoshida. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /*
26  * The algorithm is based on the paper "Packrat Parsers Can Support Left Recursion"
27  * authored by A. Warth, J. R. Douglass, and T. Millstein.
28  *
29  * The specification is determined by referring to peg/leg developed by Ian Piumarta.
30  */
31 
32 #ifdef _MSC_VER
33 #define _CRT_SECURE_NO_WARNINGS
34 #ifdef _DEBUG
35 #define _CRTDBG_MAP_ALLOC
36 #include <crtdbg.h>
37 #endif
38 #endif
39 
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <stdarg.h>
43 #include <string.h>
44 #include <limits.h>
45 #include <assert.h>
46 
47 #ifndef _MSC_VER
48 #if defined __GNUC__ && defined _WIN32 /* MinGW */
49 #ifndef PCC_USE_SYSTEM_STRNLEN
50 #define strnlen(str, maxlen) strnlen_(str, maxlen)
strnlen_(const char * str,size_t maxlen)51 static size_t strnlen_(const char *str, size_t maxlen) {
52     size_t i;
53     for (i = 0; i < maxlen && str[i]; i++);
54     return i;
55 }
56 #endif /* !PCC_USE_SYSTEM_STRNLEN */
57 #endif /* defined __GNUC__ && defined _WIN32 */
58 #endif /* !_MSC_VER */
59 
60 #ifdef _MSC_VER
61 #define snprintf _snprintf
62 #define vsnprintf _vsnprintf
63 #define unlink _unlink
64 #else
65 #include <unistd.h> /* for unlink() */
66 #endif
67 
68 #if !defined __has_attribute || defined _MSC_VER
69 #define __attribute__(x)
70 #endif
71 
72 #undef TRUE  /* to avoid macro definition conflicts with the system header file of IBM AIX */
73 #undef FALSE
74 
75 #define VERSION "1.8.0"
76 
77 #ifndef BUFFER_MIN_SIZE
78 #define BUFFER_MIN_SIZE 256
79 #endif
80 #ifndef ARRAY_MIN_SIZE
81 #define ARRAY_MIN_SIZE 2
82 #endif
83 
84 #define VOID_VALUE (~(size_t)0)
85 
86 #ifdef _WIN64 /* 64-bit Windows including MSVC and MinGW-w64 */
87 #define FMT_LU "%llu"
88 typedef unsigned long long ulong_t;
89 /* NOTE: "%llu" and "long long" are not C89-compliant, but they are required to deal with a 64-bit integer value in 64-bit Windows. */
90 #else
91 #define FMT_LU "%lu"
92 typedef unsigned long ulong_t;
93 #endif
94 /* FMT_LU and ulong_t are used to print size_t values safely (ex. printf(FMT_LU "\n", (ulong_t)value);) */
95 /* NOTE: Neither "%z" nor <inttypes.h> is used since PackCC complies with the C89 standard as much as possible. */
96 
97 typedef enum bool_tag {
98     FALSE = 0,
99     TRUE
100 } bool_t;
101 
102 typedef struct stream_tag {
103     FILE *file;       /* the stream; just a reference */
104     const char *name; /* the file name */
105     size_t line;      /* the current line number (0-based); line counting is disabled if VOID_VALUE */
106 } stream_t;
107 
108 typedef struct char_array_tag {
109     char *buf;
110     size_t max;
111     size_t len;
112 } char_array_t;
113 
114 typedef struct code_block_tag {
115     char *text;
116     size_t len;
117     size_t line;
118     size_t col;
119 } code_block_t;
120 
121 typedef struct code_block_array_tag {
122     code_block_t *buf;
123     size_t max;
124     size_t len;
125 } code_block_array_t;
126 
127 typedef enum node_type_tag {
128     NODE_RULE = 0,
129     NODE_REFERENCE,
130     NODE_STRING,
131     NODE_CHARCLASS,
132     NODE_QUANTITY,
133     NODE_PREDICATE,
134     NODE_SEQUENCE,
135     NODE_ALTERNATE,
136     NODE_CAPTURE,
137     NODE_EXPAND,
138     NODE_ACTION,
139     NODE_ERROR
140 } node_type_t;
141 
142 typedef struct node_tag node_t;
143 
144 typedef struct node_array_tag {
145     node_t **buf;
146     size_t max;
147     size_t len;
148 } node_array_t;
149 
150 typedef struct node_const_array_tag {
151     const node_t **buf;
152     size_t max;
153     size_t len;
154 } node_const_array_t;
155 
156 typedef struct node_hash_table_tag {
157     const node_t **buf;
158     size_t max;
159     size_t mod;
160 } node_hash_table_t;
161 
162 typedef struct node_rule_tag {
163     char *name;
164     node_t *expr;
165     int ref; /* mutable */
166     node_const_array_t vars;
167     node_const_array_t capts;
168     node_const_array_t codes;
169     size_t line;
170     size_t col;
171 } node_rule_t;
172 
173 typedef struct node_reference_tag {
174     char *var; /* NULL if no variable name */
175     size_t index;
176     char *name;
177     const node_t *rule;
178     size_t line;
179     size_t col;
180 } node_reference_t;
181 
182 typedef struct node_string_tag {
183     char *value;
184 } node_string_t;
185 
186 typedef struct node_charclass_tag {
187     char *value; /* NULL means any character */
188 } node_charclass_t;
189 
190 typedef struct node_quantity_tag {
191     int min;
192     int max;
193     node_t *expr;
194 } node_quantity_t;
195 
196 typedef struct node_predicate_tag {
197     bool_t neg;
198     node_t *expr;
199 } node_predicate_t;
200 
201 typedef struct node_sequence_tag {
202     node_array_t nodes;
203 } node_sequence_t;
204 
205 typedef struct node_alternate_tag {
206     node_array_t nodes;
207 } node_alternate_t;
208 
209 typedef struct node_capture_tag {
210     node_t *expr;
211     size_t index;
212 } node_capture_t;
213 
214 typedef struct node_expand_tag {
215     size_t index;
216     size_t line;
217     size_t col;
218 } node_expand_t;
219 
220 typedef struct node_action_tag {
221     code_block_t code;
222     size_t index;
223     node_const_array_t vars;
224     node_const_array_t capts;
225 } node_action_t;
226 
227 typedef struct node_error_tag {
228     node_t *expr;
229     code_block_t code;
230     size_t index;
231     node_const_array_t vars;
232     node_const_array_t capts;
233 } node_error_t;
234 
235 typedef union node_data_tag {
236     node_rule_t      rule;
237     node_reference_t reference;
238     node_string_t    string;
239     node_charclass_t charclass;
240     node_quantity_t  quantity;
241     node_predicate_t predicate;
242     node_sequence_t  sequence;
243     node_alternate_t alternate;
244     node_capture_t   capture;
245     node_expand_t    expand;
246     node_action_t    action;
247     node_error_t     error;
248 } node_data_t;
249 
250 struct node_tag {
251     node_type_t type;
252     node_data_t data;
253 };
254 
255 typedef struct options_tag {
256     bool_t ascii; /* UTF-8 support is disabled if true  */
257     bool_t lines; /* #line directives are output if true */
258     bool_t debug; /* debug information is output if true */
259 } options_t;
260 
261 typedef enum code_flag_tag {
262     CODE_FLAG__NONE = 0,
263     CODE_FLAG__UTF8_CHARCLASS_USED = 1
264 } code_flag_t;
265 
266 typedef struct context_tag {
267     char *iname;  /* the path name of the PEG file being parsed */
268     char *sname;  /* the path name of the C source file being generated */
269     char *hname;  /* the path name of the C header file being generated */
270     FILE *ifile;  /* the input stream of the PEG file */
271     char *hid;    /* the macro name for the include guard of the C header file */
272     char *vtype;  /* the type name of the data output by the parsing API function (NULL means the default) */
273     char *atype;  /* the type name of the user-defined data passed to the parser creation API function (NULL means the default) */
274     char *prefix; /* the prefix of the API function names (NULL means the default) */
275     options_t opts;      /* the options */
276     code_flag_t flags;   /* the bitwise flags to control code generation; updated during PEG parsing */
277     size_t errnum;       /* the current number of PEG parsing errors */
278     size_t linenum;      /* the current line number (0-based) */
279     size_t charnum;      /* the number of characters in the current line that are already flushed (0-based, UTF-8 support if not disabled) */
280     size_t linepos;      /* the beginning position in the PEG file of the current line */
281     size_t bufpos;       /* the position in the PEG file of the first character currently buffered */
282     size_t bufcur;       /* the current parsing position in the character buffer */
283     char_array_t buffer; /* the character buffer */
284     node_array_t rules;  /* the PEG rules */
285     node_hash_table_t rulehash; /* the hash table to accelerate access of desired PEG rules */
286     code_block_array_t esource; /* the code blocks from %earlysource and %earlycommon directives to be added into the generated source file */
287     code_block_array_t eheader; /* the code blocks from %earlyheader and %earlycommon directives to be added into the generated header file */
288     code_block_array_t source;  /* the code blocks from %source and %common directives to be added into the generated source file */
289     code_block_array_t header;  /* the code blocks from %header and %common directives to be added into the generated header file */
290 } context_t;
291 
292 typedef struct generate_tag {
293     stream_t *stream;
294     const node_t *rule;
295     int label;
296     bool_t ascii;
297 } generate_t;
298 
299 typedef enum string_flag_tag {
300     STRING_FLAG__NONE = 0,
301     STRING_FLAG__NOTEMPTY = 1,
302     STRING_FLAG__NOTVOID = 2,
303     STRING_FLAG__IDENTIFIER = 4
304 } string_flag_t;
305 
306 typedef enum code_reach_tag {
307     CODE_REACH__BOTH = 0,
308     CODE_REACH__ALWAYS_SUCCEED = 1,
309     CODE_REACH__ALWAYS_FAIL = -1
310 } code_reach_t;
311 
312 static const char *g_cmdname = "packcc"; /* replaced later with actual one */
313 
314 __attribute__((format(printf, 1, 2)))
print_error(const char * format,...)315 static int print_error(const char *format, ...) {
316     int n;
317     va_list a;
318     va_start(a, format);
319     n = fprintf(stderr, "%s: ", g_cmdname);
320     if (n >= 0) {
321         const int k = vfprintf(stderr, format, a);
322         if (k < 0) n = k; else n += k;
323     }
324     va_end(a);
325     return n;
326 }
327 
fopen_rb_e(const char * path)328 static FILE *fopen_rb_e(const char *path) {
329     FILE *const f = fopen(path, "rb");
330     if (f == NULL) {
331         print_error("Cannot open file '%s' to read\n", path);
332         exit(2);
333     }
334     return f;
335 }
336 
fopen_wt_e(const char * path)337 static FILE *fopen_wt_e(const char *path) {
338     FILE *const f = fopen(path, "wt");
339     if (f == NULL) {
340         print_error("Cannot open file '%s' to write\n", path);
341         exit(2);
342     }
343     return f;
344 }
345 
fclose_e(FILE * stream)346 static int fclose_e(FILE *stream) {
347     const int r = fclose(stream);
348     if (r == EOF) {
349         print_error("File closing error\n");
350         exit(2);
351     }
352     return r;
353 }
354 
fgetc_e(FILE * stream)355 static int fgetc_e(FILE *stream) {
356     const int c = fgetc(stream);
357     if (c == EOF && ferror(stream)) {
358         print_error("File read error\n");
359         exit(2);
360     }
361     return c;
362 }
363 
malloc_e(size_t size)364 static void *malloc_e(size_t size) {
365     void *const p = malloc(size);
366     if (p == NULL) {
367         print_error("Out of memory\n");
368         exit(3);
369     }
370     return p;
371 }
372 
realloc_e(void * ptr,size_t size)373 static void *realloc_e(void *ptr, size_t size) {
374     void *const p = realloc(ptr, size);
375     if (p == NULL) {
376         print_error("Out of memory\n");
377         exit(3);
378     }
379     return p;
380 }
381 
strdup_e(const char * str)382 static char *strdup_e(const char *str) {
383     const size_t m = strlen(str);
384     char *const s = (char *)malloc_e(m + 1);
385     memcpy(s, str, m);
386     s[m] = '\0';
387     return s;
388 }
389 
strndup_e(const char * str,size_t len)390 static char *strndup_e(const char *str, size_t len) {
391     const size_t m = strnlen(str, len);
392     char *const s = (char *)malloc_e(m + 1);
393     memcpy(s, str, m);
394     s[m] = '\0';
395     return s;
396 }
397 
string_to_size_t(const char * str)398 static size_t string_to_size_t(const char *str) {
399 #define N (~(size_t)0 / 10)
400 #define M (~(size_t)0 - 10 * N)
401     size_t n = 0, i, k;
402     for (i = 0; str[i]; i++) {
403         const char c = str[i];
404         if (c < '0' || c > '9') return VOID_VALUE;
405         k = (size_t)(c - '0');
406         if (n >= N && k > M) return VOID_VALUE; /* overflow */
407         n = k + 10 * n;
408     }
409     return n;
410 #undef N
411 #undef M
412 }
413 
find_first_trailing_space(const char * str,size_t start,size_t end,size_t * next)414 static size_t find_first_trailing_space(const char *str, size_t start, size_t end, size_t *next) {
415     size_t j = start, i;
416     for (i = start; i < end; i++) {
417         switch (str[i]) {
418         case ' ':
419         case '\v':
420         case '\f':
421         case '\t':
422             continue;
423         case '\n':
424             if (next) *next = i + 1;
425             return j;
426         case '\r':
427             if (i + 1 < end && str[i + 1] == '\n') i++;
428             if (next) *next = i + 1;
429             return j;
430         default:
431             j = i + 1;
432         }
433     }
434     if (next) *next = end;
435     return j;
436 }
437 
count_indent_spaces(const char * str,size_t start,size_t end,size_t * next)438 static size_t count_indent_spaces(const char *str, size_t start, size_t end, size_t *next) {
439     size_t n = 0, i;
440     for (i = start; i < end; i++) {
441         switch (str[i]) {
442         case ' ':
443         case '\v':
444         case '\f':
445             n++;
446             break;
447         case '\t':
448             n = (n + 8) & ~7;
449             break;
450         default:
451             if (next) *next = i;
452             return n;
453         }
454     }
455     if (next) *next = end;
456     return n;
457 }
458 
is_filled_string(const char * str)459 static bool_t is_filled_string(const char *str) {
460     size_t i;
461     for (i = 0; str[i]; i++) {
462         if (
463             str[i] != ' '  &&
464             str[i] != '\v' &&
465             str[i] != '\f' &&
466             str[i] != '\t' &&
467             str[i] != '\n' &&
468             str[i] != '\r'
469         ) return TRUE;
470     }
471     return FALSE;
472 }
473 
is_identifier_string(const char * str)474 static bool_t is_identifier_string(const char *str) {
475     size_t i;
476     if (!(
477         (str[0] >= 'a' && str[0] <= 'z') ||
478         (str[0] >= 'A' && str[0] <= 'Z') ||
479         str[0] == '_'
480     )) return FALSE;
481     for (i = 1; str[i]; i++) {
482         if (!(
483             (str[i] >= 'a' && str[i] <= 'z') ||
484             (str[i] >= 'A' && str[i] <= 'Z') ||
485             (str[i] >= '0' && str[i] <= '9') ||
486             str[i] == '_'
487         )) return FALSE;
488     }
489     return TRUE;
490 }
491 
is_pointer_type(const char * str)492 static bool_t is_pointer_type(const char *str) {
493     const size_t n = strlen(str);
494     return (n > 0 && str[n - 1] == '*') ? TRUE : FALSE;
495 }
496 
is_valid_utf8_string(const char * str)497 static bool_t is_valid_utf8_string(const char *str) {
498     int k = 0, n = 0, u = 0;
499     size_t i;
500     for (i = 0; str[i]; i++) {
501         const int c = (int)(unsigned char)str[i];
502         switch (k) {
503         case 0:
504             if (c >= 0x80) {
505                 if ((c & 0xe0) == 0xc0) {
506                     u = c & 0x1f;
507                     n = k = 1;
508                 }
509                 else if ((c & 0xf0) == 0xe0) {
510                     u = c & 0x0f;
511                     n = k = 2;
512                 }
513                 else if ((c & 0xf8) == 0xf0) {
514                     u = c & 0x07;
515                     n = k = 3;
516                 }
517                 else {
518                     return FALSE;
519                 }
520             }
521             break;
522         case 1:
523         case 2:
524         case 3:
525             if ((c & 0xc0) == 0x80) {
526                 u <<= 6;
527                 u |= c & 0x3f;
528                 k--;
529                 if (k == 0) {
530                     switch (n) {
531                     case 1:
532                         if (u < 0x80) return FALSE;
533                         break;
534                     case 2:
535                         if (u < 0x800) return FALSE;
536                         break;
537                     case 3:
538                         if (u < 0x10000 || u > 0x10ffff) return FALSE;
539                         break;
540                     default:
541                         assert(((void)"unexpected control flow", 0));
542                         return FALSE; /* never reached */
543                     }
544                     u = 0;
545                     n = 0;
546                 }
547             }
548             else {
549                 return FALSE;
550             }
551             break;
552         default:
553             assert(((void)"unexpected control flow", 0));
554             return FALSE; /* never reached */
555         }
556     }
557     return (k == 0) ? TRUE : FALSE;
558 }
559 
utf8_to_utf32(const char * seq,int * out)560 static size_t utf8_to_utf32(const char *seq, int *out) { /* without checking UTF-8 validity */
561     const int c = (int)(unsigned char)seq[0];
562     const size_t n =
563         (c == 0) ? 0 : (c < 0x80) ? 1 :
564         ((c & 0xe0) == 0xc0) ? 2 :
565         ((c & 0xf0) == 0xe0) ? 3 :
566         ((c & 0xf8) == 0xf0) ? 4 : 1;
567     int u = 0;
568     switch (n) {
569     case 0:
570     case 1:
571         u = c;
572         break;
573     case 2:
574         u = ((c & 0x1f) << 6) |
575             ((int)(unsigned char)seq[1] & 0x3f);
576         break;
577     case 3:
578         u = ((c & 0x0f) << 12) |
579             (((int)(unsigned char)seq[1] & 0x3f) << 6) |
580             (seq[1] ? ((int)(unsigned char)seq[2] & 0x3f) : 0);
581         break;
582     default:
583         u = ((c & 0x07) << 18) |
584             (((int)(unsigned char)seq[1] & 0x3f) << 12) |
585             (seq[1] ? (((int)(unsigned char)seq[2] & 0x3f) << 6) : 0) |
586             (seq[2] ? ((int)(unsigned char)seq[3] & 0x3f) : 0);
587     }
588     if (out) *out = u;
589     return n;
590 }
591 
unescape_string(char * str,bool_t cls)592 static bool_t unescape_string(char *str, bool_t cls) { /* cls: TRUE if used for character class matching */
593     bool_t b = TRUE;
594     size_t i, j;
595     for (j = 0, i = 0; str[i]; i++) {
596         if (str[i] == '\\') {
597             i++;
598             switch (str[i]) {
599             case '\0': str[j++] = '\\'; str[j] = '\0'; return FALSE;
600             case '\'': str[j++] = '\''; break;
601             case '\"': str[j++] = '\"'; break;
602             case '0': str[j++] = '\x00'; break;
603             case 'a': str[j++] = '\x07'; break;
604             case 'b': str[j++] = '\x08'; break;
605             case 'f': str[j++] = '\x0c'; break;
606             case 'n': str[j++] = '\x0a'; break;
607             case 'r': str[j++] = '\x0d'; break;
608             case 't': str[j++] = '\x09'; break;
609             case 'v': str[j++] = '\x0b'; break;
610             case 'x':
611                 {
612                     char s = 0, c;
613                     size_t k;
614                     for (k = 0; k < 2; k++) {
615                         char d;
616                         c = str[i + k + 1];
617                         d = (c >= '0' && c <= '9') ? c - '0' :
618                             (c >= 'a' && c <= 'f') ? c - 'a' + 10 :
619                             (c >= 'A' && c <= 'F') ? c - 'A' + 10 : -1;
620                         if (d < 0) break;
621                         s = (s << 4) | d;
622                     }
623                     if (k < 2) {
624                         const size_t l = i + k;
625                         str[j++] = '\\'; str[j++] = 'x';
626                         while (i <= l) str[j++] = str[++i];
627                         if (c == '\0') return FALSE;
628                         b = FALSE;
629                         continue;
630                     }
631                     str[j++] = s;
632                     i += 2;
633                 }
634                 break;
635             case 'u':
636                 {
637                     int s = 0, t = 0;
638                     char c;
639                     size_t k;
640                     for (k = 0; k < 4; k++) {
641                         char d;
642                         c = str[i + k + 1];
643                         d = (c >= '0' && c <= '9') ? c - '0' :
644                             (c >= 'a' && c <= 'f') ? c - 'a' + 10 :
645                             (c >= 'A' && c <= 'F') ? c - 'A' + 10 : -1;
646                         if (d < 0) break;
647                         s = (s << 4) | d;
648                     }
649                     if (k < 4 || (s & 0xfc00) == 0xdc00) { /* invalid character or invalid surrogate code point */
650                         const size_t l = i + k;
651                         str[j++] = '\\'; str[j++] = 'u';
652                         while (i <= l) str[j++] = str[++i];
653                         if (c == '\0') return FALSE;
654                         b = FALSE;
655                         continue;
656                     }
657                     if ((s & 0xfc00) == 0xd800) { /* surrogate pair */
658                         for (k = 4; k < 10; k++) {
659                             c = str[i + k + 1];
660                             if (k == 4) {
661                                 if (c != '\\') break;
662                             }
663                             else if (k == 5) {
664                                 if (c != 'u') break;
665                             }
666                             else {
667                                 const char d =
668                                     (c >= '0' && c <= '9') ? c - '0' :
669                                     (c >= 'a' && c <= 'f') ? c - 'a' + 10 :
670                                     (c >= 'A' && c <= 'F') ? c - 'A' + 10 : -1;
671                                 if (d < 0) break;
672                                 t = (t << 4) | d;
673                             }
674                         }
675                         if (k < 10 || (t & 0xfc00) != 0xdc00) { /* invalid character or invalid surrogate code point */
676                             const size_t l = i + 4; /* NOTE: Not i + k to redo with recovery. */
677                             str[j++] = '\\'; str[j++] = 'u';
678                             while (i <= l) str[j++] = str[++i];
679                             b = FALSE;
680                             continue;
681                         }
682                     }
683                     {
684                         const int u = t ? ((((s & 0x03ff) + 0x0040) << 10) | (t & 0x03ff)) : s;
685                         if (u < 0x0080) {
686                             str[j++] = (char)u;
687                         }
688                         else if (u < 0x0800) {
689                             str[j++] = (char)(0xc0 | (u >> 6));
690                             str[j++] = (char)(0x80 | (u & 0x3f));
691                         }
692                         else if (u < 0x010000) {
693                             str[j++] = (char)(0xe0 | (u >> 12));
694                             str[j++] = (char)(0x80 | ((u >> 6) & 0x3f));
695                             str[j++] = (char)(0x80 | (u & 0x3f));
696                         }
697                         else if (u < 0x110000) {
698                             str[j++] = (char)(0xf0 | (u >> 18));
699                             str[j++] = (char)(0x80 | ((u >> 12) & 0x3f));
700                             str[j++] = (char)(0x80 | ((u >>  6) & 0x3f));
701                             str[j++] = (char)(0x80 | (u & 0x3f));
702                         }
703                         else { /* never reached theoretically; in case */
704                             const size_t l = i + 10;
705                             str[j++] = '\\'; str[j++] = 'u';
706                             while (i <= l) str[j++] = str[++i];
707                             b = FALSE;
708                             continue;
709                         }
710                     }
711                     i += t ? 10 : 4;
712                 }
713                 break;
714             case '\n': break;
715             case '\r': if (str[i + 1] == '\n') i++; break;
716             case '\\':
717                 if (cls) str[j++] = '\\'; /* left for character class matching (ex. considering [\^\]\\]) */
718                 str[j++] = '\\';
719                 break;
720             default: str[j++] = '\\'; str[j++] = str[i];
721             }
722         }
723         else {
724             str[j++] = str[i];
725         }
726     }
727     str[j] = '\0';
728     return b;
729 }
730 
escape_character(char ch,char (* buf)[5])731 static const char *escape_character(char ch, char (*buf)[5]) {
732     switch (ch) {
733     case '\x00': strncpy(*buf, "\\0", 5); break;
734     case '\x07': strncpy(*buf, "\\a", 5); break;
735     case '\x08': strncpy(*buf, "\\b", 5); break;
736     case '\x0c': strncpy(*buf, "\\f", 5); break;
737     case '\x0a': strncpy(*buf, "\\n", 5); break;
738     case '\x0d': strncpy(*buf, "\\r", 5); break;
739     case '\x09': strncpy(*buf, "\\t", 5); break;
740     case '\x0b': strncpy(*buf, "\\v", 5); break;
741     case '\\':  strncpy(*buf, "\\\\", 5); break;
742     case '\'':  strncpy(*buf, "\\\'", 5); break;
743     case '\"':  strncpy(*buf, "\\\"", 5); break;
744     default:
745         if (ch >= '\x20' && ch < '\x7f')
746             snprintf(*buf, 5, "%c", ch);
747         else
748             snprintf(*buf, 5, "\\x%02x", (int)(unsigned char)ch);
749     }
750     (*buf)[4] = '\0';
751     return *buf;
752 }
753 
remove_leading_blanks(char * str)754 static void remove_leading_blanks(char *str) {
755     size_t i, j;
756     for (i = 0; str[i]; i++) {
757         if (
758             str[i] != ' '  &&
759             str[i] != '\v' &&
760             str[i] != '\f' &&
761             str[i] != '\t' &&
762             str[i] != '\n' &&
763             str[i] != '\r'
764         ) break;
765     }
766     for (j = 0; str[i]; i++) {
767         str[j++] = str[i];
768     }
769     str[j] = '\0';
770 }
771 
remove_trailing_blanks(char * str)772 static void remove_trailing_blanks(char *str) {
773     size_t i, j;
774     for (j = 0, i = 0; str[i]; i++) {
775         if (
776             str[i] != ' '  &&
777             str[i] != '\v' &&
778             str[i] != '\f' &&
779             str[i] != '\t' &&
780             str[i] != '\n' &&
781             str[i] != '\r'
782         ) j = i + 1;
783     }
784     str[j] = '\0';
785 }
786 
find_trailing_blanks(const char * str)787 static size_t find_trailing_blanks(const char *str) {
788     size_t i, j;
789     for (j = 0, i = 0; str[i]; i++) {
790         if (
791             str[i] != ' '  &&
792             str[i] != '\v' &&
793             str[i] != '\f' &&
794             str[i] != '\t' &&
795             str[i] != '\n' &&
796             str[i] != '\r'
797         ) j = i + 1;
798     }
799     return j;
800 }
801 
count_characters(const char * str,size_t start,size_t end)802 static size_t count_characters(const char *str, size_t start, size_t end) {
803     /* UTF-8 multibyte character support but without checking UTF-8 validity */
804     size_t n = 0, i = start;
805     while (i < end) {
806         const int c = (int)(unsigned char)str[i];
807         if (c == 0) break;
808         n++;
809         i += (c < 0x80) ? 1 : ((c & 0xe0) == 0xc0) ? 2 : ((c & 0xf0) == 0xe0) ? 3 : ((c & 0xf8) == 0xf0) ? 4 : /* invalid code */ 1;
810     }
811     return n;
812 }
813 
make_header_identifier(char * str)814 static void make_header_identifier(char *str) {
815     size_t i;
816     for (i = 0; str[i]; i++) {
817         str[i] =
818             ((str[i] >= 'A' && str[i] <= 'Z') || (str[i] >= '0' && str[i] <= '9')) ? str[i] :
819             (str[i] >= 'a' && str[i] <= 'z') ? str[i] - 'a' + 'A' : '_';
820     }
821 }
822 
stream__wrap(FILE * file,const char * name,size_t line)823 static stream_t stream__wrap(FILE *file, const char *name, size_t line) {
824     stream_t s;
825     s.file = file;
826     s.name = name;
827     s.line = line;
828     return s;
829 }
830 
stream__putc(stream_t * stream,int c)831 static int stream__putc(stream_t *stream, int c) {
832     const int r = fputc(c, stream->file);
833     if (r == EOF) {
834         print_error("File write error\n");
835         exit(2);
836     }
837     if (stream->line != VOID_VALUE) {
838         if (c == '\n') stream->line++;
839     }
840     return r;
841 }
842 
stream__puts(stream_t * stream,const char * s)843 static int stream__puts(stream_t *stream, const char *s) {
844     const int r = fputs(s, stream->file);
845     if (r == EOF) {
846         print_error("File write error\n");
847         exit(2);
848     }
849     if (stream->line != VOID_VALUE) {
850         size_t i = 0;
851         for (i = 0; s[i]; i++) {
852             if (s[i] == '\n') stream->line++;
853         }
854     }
855     return r;
856 }
857 
858 __attribute__((format(printf, 2, 3)))
stream__printf(stream_t * stream,const char * format,...)859 static int stream__printf(stream_t *stream, const char *format, ...) {
860     if (stream->line != VOID_VALUE) {
861 #define M 1024
862         char s[M], *p = NULL;
863         int n = 0;
864         size_t l = 0;
865         {
866             va_list a;
867             va_start(a, format);
868             n = vsnprintf(NULL, 0, format, a);
869             va_end(a);
870             if (n < 0) {
871                 print_error("Internal error\n");
872                 exit(2);
873             }
874             l = (size_t)n + 1;
875         }
876         p = (l > M) ? (char *)malloc_e(l) : s;
877         {
878             va_list a;
879             va_start(a, format);
880             n = vsnprintf(p, l, format, a);
881             va_end(a);
882             if (n < 0 || (size_t)n >= l) {
883                 print_error("Internal error\n");
884                 exit(2);
885             }
886         }
887         stream__puts(stream, p);
888         if (p != s) free(p);
889         return n;
890 #undef M
891     }
892     else {
893         int n;
894         va_list a;
895         va_start(a, format);
896         n = vfprintf(stream->file, format, a);
897         va_end(a);
898         if (n < 0) {
899             print_error("File write error\n");
900             exit(2);
901         }
902         return n;
903     }
904 }
905 
stream__write_characters(stream_t * stream,char ch,size_t len)906 static void stream__write_characters(stream_t *stream, char ch, size_t len) {
907     size_t i;
908     if (len == VOID_VALUE) return; /* for safety */
909     for (i = 0; i < len; i++) stream__putc(stream, ch);
910 }
911 
stream__write_text(stream_t * stream,const char * ptr,size_t len)912 static void stream__write_text(stream_t *stream, const char *ptr, size_t len) {
913     size_t i;
914     if (len == VOID_VALUE) return; /* for safety */
915     for (i = 0; i < len; i++) {
916         if (ptr[i] == '\r') {
917             if (i + 1 < len && ptr[i + 1] == '\n') i++;
918             stream__putc(stream, '\n');
919         }
920         else {
921             stream__putc(stream, ptr[i]);
922         }
923     }
924 }
925 
stream__write_escaped_string(stream_t * stream,const char * ptr,size_t len)926 static void stream__write_escaped_string(stream_t *stream, const char *ptr, size_t len) {
927     char s[5];
928     size_t i;
929     if (len == VOID_VALUE) return; /* for safety */
930     for (i = 0; i < len; i++) {
931         stream__puts(stream, escape_character(ptr[i], &s));
932     }
933 }
934 
stream__write_line_directive(stream_t * stream,const char * fname,size_t lineno)935 static void stream__write_line_directive(stream_t *stream, const char *fname, size_t lineno) {
936     stream__printf(stream, "#line " FMT_LU " \"", (ulong_t)(lineno + 1));
937     stream__write_escaped_string(stream, fname, strlen(fname));
938     stream__puts(stream, "\"\n");
939 }
940 
stream__write_code_block(stream_t * stream,const char * ptr,size_t len,size_t indent,const char * fname,size_t lineno)941 static void stream__write_code_block(stream_t *stream, const char *ptr, size_t len, size_t indent, const char *fname, size_t lineno) {
942     bool_t b = FALSE;
943     size_t i, j, k;
944     if (len == VOID_VALUE) return; /* for safety */
945     j = find_first_trailing_space(ptr, 0, len, &k);
946     for (i = 0; i < j; i++) {
947         if (
948             ptr[i] != ' '  &&
949             ptr[i] != '\v' &&
950             ptr[i] != '\f' &&
951             ptr[i] != '\t'
952         ) break;
953     }
954     if (i < j) {
955         if (stream->line != VOID_VALUE)
956             stream__write_line_directive(stream, fname, lineno);
957         if (ptr[i] != '#')
958             stream__write_characters(stream, ' ', indent);
959         stream__write_text(stream, ptr + i, j - i);
960         stream__putc(stream, '\n');
961         b = TRUE;
962     }
963     else {
964         lineno++;
965     }
966     if (k < len) {
967         size_t m = VOID_VALUE;
968         size_t h;
969         for (i = k; i < len; i = h) {
970             j = find_first_trailing_space(ptr, i, len, &h);
971             if (i < j) {
972                 if (stream->line != VOID_VALUE && !b)
973                     stream__write_line_directive(stream, fname, lineno);
974                 if (ptr[i] != '#') {
975                     const size_t l = count_indent_spaces(ptr, i, j, NULL);
976                     if (m == VOID_VALUE || m > l) m = l;
977                 }
978                 b = TRUE;
979             }
980             else {
981                 if (!b) {
982                     k = h;
983                     lineno++;
984                 }
985             }
986         }
987         for (i = k; i < len; i = h) {
988             j = find_first_trailing_space(ptr, i, len, &h);
989             if (i < j) {
990                 const size_t l = count_indent_spaces(ptr, i, j, &i);
991                 if (ptr[i] != '#') {
992                     assert(m != VOID_VALUE); /* m must have a valid value */
993                     assert(l >= m);
994                     stream__write_characters(stream, ' ', l - m + indent);
995                 }
996                 stream__write_text(stream, ptr + i, j - i);
997                 stream__putc(stream, '\n');
998                 b = TRUE;
999             }
1000             else if (h < len) {
1001                 stream__putc(stream, '\n');
1002             }
1003         }
1004     }
1005     if (stream->line != VOID_VALUE && b)
1006         stream__write_line_directive(stream, stream->name, stream->line);
1007 }
1008 
extract_filename(const char * path)1009 static const char *extract_filename(const char *path) {
1010     size_t i = strlen(path);
1011     while (i > 0) {
1012         i--;
1013         if (path[i] == '/' || path[i] == '\\' || path[i] == ':') return path + i + 1;
1014     }
1015     return path;
1016 }
1017 
extract_fileext(const char * path)1018 static const char *extract_fileext(const char *path) {
1019     const size_t n = strlen(path);
1020     size_t i = n;
1021     while (i > 0) {
1022         i--;
1023         if (path[i] == '/' || path[i] == '\\' || path[i] == ':') break;
1024         if (path[i] == '.') return path + i;
1025     }
1026     return path + n;
1027 }
1028 
replace_fileext(const char * path,const char * ext)1029 static char *replace_fileext(const char *path, const char *ext) {
1030     const char *const p = extract_fileext(path);
1031     const size_t m = p - path;
1032     const size_t n = strlen(ext);
1033     char *const s = (char *)malloc_e(m + n + 2);
1034     memcpy(s, path, m);
1035     s[m] = '.';
1036     memcpy(s + m + 1, ext, n + 1);
1037     return s;
1038 }
1039 
add_fileext(const char * path,const char * ext)1040 static char *add_fileext(const char *path, const char *ext) {
1041     const size_t m = strlen(path);
1042     const size_t n = strlen(ext);
1043     char *const s = (char *)malloc_e(m + n + 2);
1044     memcpy(s, path, m);
1045     s[m] = '.';
1046     memcpy(s + m + 1, ext, n + 1);
1047     return s;
1048 }
1049 
hash_string(const char * str)1050 static size_t hash_string(const char *str) {
1051     size_t i, h = 0;
1052     for (i = 0; str[i]; i++) {
1053         h = h * 31 + str[i];
1054     }
1055     return h;
1056 }
1057 
populate_bits(size_t x)1058 static size_t populate_bits(size_t x) {
1059     x |= x >>  1;
1060     x |= x >>  2;
1061     x |= x >>  4;
1062     x |= x >>  8;
1063     x |= x >> 16;
1064 #if (defined __SIZEOF_SIZE_T__ && __SIZEOF_SIZE_T__ == 8) /* gcc or clang */ || defined _WIN64 /* MSVC */
1065     x |= x >> 32;
1066 #endif
1067     return x;
1068 }
1069 
column_number(const context_t * ctx)1070 static size_t column_number(const context_t *ctx) { /* 0-based */
1071     assert(ctx->bufpos + ctx->bufcur >= ctx->linepos);
1072     if (ctx->opts.ascii)
1073         return ctx->charnum + ctx->bufcur - ((ctx->linepos > ctx->bufpos) ? ctx->linepos - ctx->bufpos : 0);
1074     else
1075         return ctx->charnum + count_characters(ctx->buffer.buf, (ctx->linepos > ctx->bufpos) ? ctx->linepos - ctx->bufpos : 0, ctx->bufcur);
1076 }
1077 
char_array__init(char_array_t * array)1078 static void char_array__init(char_array_t *array) {
1079     array->len = 0;
1080     array->max = 0;
1081     array->buf = NULL;
1082 }
1083 
char_array__add(char_array_t * array,char ch)1084 static void char_array__add(char_array_t *array, char ch) {
1085     if (array->max <= array->len) {
1086         const size_t n = array->len + 1;
1087         size_t m = array->max;
1088         if (m == 0) m = BUFFER_MIN_SIZE;
1089         while (m < n && m != 0) m <<= 1;
1090         if (m == 0) m = n; /* in case of shift overflow */
1091         array->buf = (char *)realloc_e(array->buf, m);
1092         array->max = m;
1093     }
1094     array->buf[array->len++] = ch;
1095 }
1096 
char_array__term(char_array_t * array)1097 static void char_array__term(char_array_t *array) {
1098     free(array->buf);
1099 }
1100 
code_block__init(code_block_t * code)1101 static void code_block__init(code_block_t *code) {
1102     code->text = NULL;
1103     code->len = 0;
1104     code->line = VOID_VALUE;
1105     code->col = VOID_VALUE;
1106 }
1107 
code_block__term(code_block_t * code)1108 static void code_block__term(code_block_t *code) {
1109     free(code->text);
1110 }
1111 
code_block_array__init(code_block_array_t * array)1112 static void code_block_array__init(code_block_array_t *array) {
1113     array->len = 0;
1114     array->max = 0;
1115     array->buf = NULL;
1116 }
1117 
code_block_array__create_entry(code_block_array_t * array)1118 static code_block_t *code_block_array__create_entry(code_block_array_t *array) {
1119     if (array->max <= array->len) {
1120         const size_t n = array->len + 1;
1121         size_t m = array->max;
1122         if (m == 0) m = ARRAY_MIN_SIZE;
1123         while (m < n && m != 0) m <<= 1;
1124         if (m == 0) m = n; /* in case of shift overflow */
1125         array->buf = (code_block_t *)realloc_e(array->buf, sizeof(code_block_t) * m);
1126         array->max = m;
1127     }
1128     code_block__init(&array->buf[array->len]);
1129     return &array->buf[array->len++];
1130 }
1131 
code_block_array__term(code_block_array_t * array)1132 static void code_block_array__term(code_block_array_t *array) {
1133     while (array->len > 0) {
1134         array->len--;
1135         code_block__term(&array->buf[array->len]);
1136     }
1137     free(array->buf);
1138 }
1139 
node_array__init(node_array_t * array)1140 static void node_array__init(node_array_t *array) {
1141     array->len = 0;
1142     array->max = 0;
1143     array->buf = NULL;
1144 }
1145 
node_array__add(node_array_t * array,node_t * node)1146 static void node_array__add(node_array_t *array, node_t *node) {
1147     if (array->max <= array->len) {
1148         const size_t n = array->len + 1;
1149         size_t m = array->max;
1150         if (m == 0) m = ARRAY_MIN_SIZE;
1151         while (m < n && m != 0) m <<= 1;
1152         if (m == 0) m = n; /* in case of shift overflow */
1153         array->buf = (node_t **)realloc_e(array->buf, sizeof(node_t *) * m);
1154         array->max = m;
1155     }
1156     array->buf[array->len++] = node;
1157 }
1158 
1159 static void destroy_node(node_t *node);
1160 
node_array__term(node_array_t * array)1161 static void node_array__term(node_array_t *array) {
1162     while (array->len > 0) {
1163         array->len--;
1164         destroy_node(array->buf[array->len]);
1165     }
1166     free(array->buf);
1167 }
1168 
node_const_array__init(node_const_array_t * array)1169 static void node_const_array__init(node_const_array_t *array) {
1170     array->len = 0;
1171     array->max = 0;
1172     array->buf = NULL;
1173 }
1174 
node_const_array__add(node_const_array_t * array,const node_t * node)1175 static void node_const_array__add(node_const_array_t *array, const node_t *node) {
1176     if (array->max <= array->len) {
1177         const size_t n = array->len + 1;
1178         size_t m = array->max;
1179         if (m == 0) m = ARRAY_MIN_SIZE;
1180         while (m < n && m != 0) m <<= 1;
1181         if (m == 0) m = n; /* in case of shift overflow */
1182         array->buf = (const node_t **)realloc_e((node_t **)array->buf, sizeof(const node_t *) * m);
1183         array->max = m;
1184     }
1185     array->buf[array->len++] = node;
1186 }
1187 
node_const_array__clear(node_const_array_t * array)1188 static void node_const_array__clear(node_const_array_t *array) {
1189     array->len = 0;
1190 }
1191 
node_const_array__copy(node_const_array_t * array,const node_const_array_t * src)1192 static void node_const_array__copy(node_const_array_t *array, const node_const_array_t *src) {
1193     size_t i;
1194     node_const_array__clear(array);
1195     for (i = 0; i < src->len; i++) {
1196         node_const_array__add(array, src->buf[i]);
1197     }
1198 }
1199 
node_const_array__term(node_const_array_t * array)1200 static void node_const_array__term(node_const_array_t *array) {
1201     free((node_t **)array->buf);
1202 }
1203 
create_context(const char * iname,const char * oname,const options_t * opts)1204 static context_t *create_context(const char *iname, const char *oname, const options_t *opts) {
1205     context_t *const ctx = (context_t *)malloc_e(sizeof(context_t));
1206     ctx->iname = strdup_e((iname && iname[0]) ? iname : "-");
1207     ctx->sname = (oname && oname[0]) ? add_fileext(oname, "c") : replace_fileext(ctx->iname, "c");
1208     ctx->hname = (oname && oname[0]) ? add_fileext(oname, "h") : replace_fileext(ctx->iname, "h");
1209     ctx->ifile = (iname && iname[0]) ? fopen_rb_e(ctx->iname) : stdin;
1210     ctx->hid = strdup_e(ctx->hname); make_header_identifier(ctx->hid);
1211     ctx->vtype = NULL;
1212     ctx->atype = NULL;
1213     ctx->prefix = NULL;
1214     ctx->opts = *opts;
1215     ctx->flags = CODE_FLAG__NONE;
1216     ctx->errnum = 0;
1217     ctx->linenum = 0;
1218     ctx->charnum = 0;
1219     ctx->linepos = 0;
1220     ctx->bufpos = 0;
1221     ctx->bufcur = 0;
1222     char_array__init(&ctx->buffer);
1223     node_array__init(&ctx->rules);
1224     ctx->rulehash.mod = 0;
1225     ctx->rulehash.max = 0;
1226     ctx->rulehash.buf = NULL;
1227     code_block_array__init(&ctx->esource);
1228     code_block_array__init(&ctx->eheader);
1229     code_block_array__init(&ctx->source);
1230     code_block_array__init(&ctx->header);
1231     return ctx;
1232 }
1233 
create_node(node_type_t type)1234 static node_t *create_node(node_type_t type) {
1235     node_t *const node = (node_t *)malloc_e(sizeof(node_t));
1236     node->type = type;
1237     switch (node->type) {
1238     case NODE_RULE:
1239         node->data.rule.name = NULL;
1240         node->data.rule.expr = NULL;
1241         node->data.rule.ref = 0;
1242         node_const_array__init(&node->data.rule.vars);
1243         node_const_array__init(&node->data.rule.capts);
1244         node_const_array__init(&node->data.rule.codes);
1245         node->data.rule.line = VOID_VALUE;
1246         node->data.rule.col = VOID_VALUE;
1247         break;
1248     case NODE_REFERENCE:
1249         node->data.reference.var = NULL;
1250         node->data.reference.index = VOID_VALUE;
1251         node->data.reference.name = NULL;
1252         node->data.reference.rule = NULL;
1253         node->data.reference.line = VOID_VALUE;
1254         node->data.reference.col = VOID_VALUE;
1255         break;
1256     case NODE_STRING:
1257         node->data.string.value = NULL;
1258         break;
1259     case NODE_CHARCLASS:
1260         node->data.charclass.value = NULL;
1261         break;
1262     case NODE_QUANTITY:
1263         node->data.quantity.min = node->data.quantity.max = 0;
1264         node->data.quantity.expr = NULL;
1265         break;
1266     case NODE_PREDICATE:
1267         node->data.predicate.neg = FALSE;
1268         node->data.predicate.expr = NULL;
1269         break;
1270     case NODE_SEQUENCE:
1271         node_array__init(&node->data.sequence.nodes);
1272         break;
1273     case NODE_ALTERNATE:
1274         node_array__init(&node->data.alternate.nodes);
1275         break;
1276     case NODE_CAPTURE:
1277         node->data.capture.expr = NULL;
1278         node->data.capture.index = VOID_VALUE;
1279         break;
1280     case NODE_EXPAND:
1281         node->data.expand.index = VOID_VALUE;
1282         node->data.expand.line = VOID_VALUE;
1283         node->data.expand.col = VOID_VALUE;
1284         break;
1285     case NODE_ACTION:
1286         code_block__init(&node->data.action.code);
1287         node->data.action.index = VOID_VALUE;
1288         node_const_array__init(&node->data.action.vars);
1289         node_const_array__init(&node->data.action.capts);
1290         break;
1291     case NODE_ERROR:
1292         node->data.error.expr = NULL;
1293         code_block__init(&node->data.error.code);
1294         node->data.error.index = VOID_VALUE;
1295         node_const_array__init(&node->data.error.vars);
1296         node_const_array__init(&node->data.error.capts);
1297         break;
1298     default:
1299         print_error("Internal error [%d]\n", __LINE__);
1300         exit(-1);
1301     }
1302     return node;
1303 }
1304 
destroy_node(node_t * node)1305 static void destroy_node(node_t *node) {
1306     if (node == NULL) return;
1307     switch (node->type) {
1308     case NODE_RULE:
1309         node_const_array__term(&node->data.rule.codes);
1310         node_const_array__term(&node->data.rule.capts);
1311         node_const_array__term(&node->data.rule.vars);
1312         destroy_node(node->data.rule.expr);
1313         free(node->data.rule.name);
1314         break;
1315     case NODE_REFERENCE:
1316         free(node->data.reference.name);
1317         free(node->data.reference.var);
1318         break;
1319     case NODE_STRING:
1320         free(node->data.string.value);
1321         break;
1322     case NODE_CHARCLASS:
1323         free(node->data.charclass.value);
1324         break;
1325     case NODE_QUANTITY:
1326         destroy_node(node->data.quantity.expr);
1327         break;
1328     case NODE_PREDICATE:
1329         destroy_node(node->data.predicate.expr);
1330         break;
1331     case NODE_SEQUENCE:
1332         node_array__term(&node->data.sequence.nodes);
1333         break;
1334     case NODE_ALTERNATE:
1335         node_array__term(&node->data.alternate.nodes);
1336         break;
1337     case NODE_CAPTURE:
1338         destroy_node(node->data.capture.expr);
1339         break;
1340     case NODE_EXPAND:
1341         break;
1342     case NODE_ACTION:
1343         node_const_array__term(&node->data.action.capts);
1344         node_const_array__term(&node->data.action.vars);
1345         code_block__term(&node->data.action.code);
1346         break;
1347     case NODE_ERROR:
1348         node_const_array__term(&node->data.error.capts);
1349         node_const_array__term(&node->data.error.vars);
1350         code_block__term(&node->data.error.code);
1351         destroy_node(node->data.error.expr);
1352         break;
1353     default:
1354         print_error("Internal error [%d]\n", __LINE__);
1355         exit(-1);
1356     }
1357     free(node);
1358 }
1359 
destroy_context(context_t * ctx)1360 static void destroy_context(context_t *ctx) {
1361     if (ctx == NULL) return;
1362     code_block_array__term(&ctx->header);
1363     code_block_array__term(&ctx->source);
1364     code_block_array__term(&ctx->eheader);
1365     code_block_array__term(&ctx->esource);
1366     free((node_t **)ctx->rulehash.buf);
1367     node_array__term(&ctx->rules);
1368     char_array__term(&ctx->buffer);
1369     free(ctx->prefix);
1370     free(ctx->atype);
1371     free(ctx->vtype);
1372     free(ctx->hid);
1373     fclose_e(ctx->ifile);
1374     free(ctx->hname);
1375     free(ctx->sname);
1376     free(ctx->iname);
1377     free(ctx);
1378 }
1379 
make_rulehash(context_t * ctx)1380 static void make_rulehash(context_t *ctx) {
1381     size_t i, j;
1382     ctx->rulehash.mod = populate_bits(ctx->rules.len * 4);
1383     ctx->rulehash.max = ctx->rulehash.mod + 1;
1384     ctx->rulehash.buf = (const node_t **)realloc_e((node_t **)ctx->rulehash.buf, sizeof(const node_t *) * ctx->rulehash.max);
1385     for (i = 0; i < ctx->rulehash.max; i++) {
1386         ctx->rulehash.buf[i] = NULL;
1387     }
1388     for (i = 0; i < ctx->rules.len; i++) {
1389         assert(ctx->rules.buf[i]->type == NODE_RULE);
1390         j = hash_string(ctx->rules.buf[i]->data.rule.name) & ctx->rulehash.mod;
1391         while (ctx->rulehash.buf[j] != NULL) {
1392             if (strcmp(ctx->rules.buf[i]->data.rule.name, ctx->rulehash.buf[j]->data.rule.name) == 0) {
1393                 assert(ctx->rules.buf[i]->data.rule.ref == 0);
1394                 assert(ctx->rulehash.buf[j]->data.rule.ref == 0);
1395                 ctx->rules.buf[i]->data.rule.ref = -1;
1396                 goto EXCEPTION;
1397             }
1398             j = (j + 1) & ctx->rulehash.mod;
1399         }
1400         ctx->rulehash.buf[j] = ctx->rules.buf[i];
1401 
1402     EXCEPTION:;
1403     }
1404 }
1405 
lookup_rulehash(const context_t * ctx,const char * name)1406 static const node_t *lookup_rulehash(const context_t *ctx, const char *name) {
1407     size_t j = hash_string(name) & ctx->rulehash.mod;
1408     while (ctx->rulehash.buf[j] != NULL && strcmp(name, ctx->rulehash.buf[j]->data.rule.name) != 0) {
1409         j = (j + 1) & ctx->rulehash.mod;
1410     }
1411     return (ctx->rulehash.buf[j] != NULL) ? ctx->rulehash.buf[j] : NULL;
1412 }
1413 
link_references(context_t * ctx,node_t * node)1414 static void link_references(context_t *ctx, node_t *node) {
1415     if (node == NULL) return;
1416     switch (node->type) {
1417     case NODE_RULE:
1418         print_error("Internal error [%d]\n", __LINE__);
1419         exit(-1);
1420     case NODE_REFERENCE:
1421         node->data.reference.rule = lookup_rulehash(ctx, node->data.reference.name);
1422         if (node->data.reference.rule == NULL) {
1423             print_error("%s:" FMT_LU ":" FMT_LU ": No definition of rule '%s'\n",
1424                 ctx->iname, (ulong_t)(node->data.reference.line + 1), (ulong_t)(node->data.reference.col + 1),
1425                 node->data.reference.name);
1426             ctx->errnum++;
1427         }
1428         else {
1429             assert(node->data.reference.rule->type == NODE_RULE);
1430             ((node_t *)node->data.reference.rule)->data.rule.ref++;
1431         }
1432         break;
1433     case NODE_STRING:
1434         break;
1435     case NODE_CHARCLASS:
1436         break;
1437     case NODE_QUANTITY:
1438         link_references(ctx, node->data.quantity.expr);
1439         break;
1440     case NODE_PREDICATE:
1441         link_references(ctx, node->data.predicate.expr);
1442         break;
1443     case NODE_SEQUENCE:
1444         {
1445             size_t i;
1446             for (i = 0; i < node->data.sequence.nodes.len; i++) {
1447                 link_references(ctx, node->data.sequence.nodes.buf[i]);
1448             }
1449         }
1450         break;
1451     case NODE_ALTERNATE:
1452         {
1453             size_t i;
1454             for (i = 0; i < node->data.alternate.nodes.len; i++) {
1455                 link_references(ctx, node->data.alternate.nodes.buf[i]);
1456             }
1457         }
1458         break;
1459     case NODE_CAPTURE:
1460         link_references(ctx, node->data.capture.expr);
1461         break;
1462     case NODE_EXPAND:
1463         break;
1464     case NODE_ACTION:
1465         break;
1466     case NODE_ERROR:
1467         link_references(ctx, node->data.error.expr);
1468         break;
1469     default:
1470         print_error("Internal error [%d]\n", __LINE__);
1471         exit(-1);
1472     }
1473 }
1474 
verify_variables(context_t * ctx,node_t * node,node_const_array_t * vars)1475 static void verify_variables(context_t *ctx, node_t *node, node_const_array_t *vars) {
1476     node_const_array_t a;
1477     const bool_t b = (vars == NULL) ? TRUE : FALSE;
1478     if (node == NULL) return;
1479     if (b) {
1480         node_const_array__init(&a);
1481         vars = &a;
1482     }
1483     switch (node->type) {
1484     case NODE_RULE:
1485         print_error("Internal error [%d]\n", __LINE__);
1486         exit(-1);
1487     case NODE_REFERENCE:
1488         if (node->data.reference.index != VOID_VALUE) {
1489             size_t i;
1490             for (i = 0; i < vars->len; i++) {
1491                 assert(vars->buf[i]->type == NODE_REFERENCE);
1492                 if (node->data.reference.index == vars->buf[i]->data.reference.index) break;
1493             }
1494             if (i == vars->len) node_const_array__add(vars, node);
1495         }
1496         break;
1497     case NODE_STRING:
1498         break;
1499     case NODE_CHARCLASS:
1500         break;
1501     case NODE_QUANTITY:
1502         verify_variables(ctx, node->data.quantity.expr, vars);
1503         break;
1504     case NODE_PREDICATE:
1505         verify_variables(ctx, node->data.predicate.expr, vars);
1506         break;
1507     case NODE_SEQUENCE:
1508         {
1509             size_t i;
1510             for (i = 0; i < node->data.sequence.nodes.len; i++) {
1511                 verify_variables(ctx, node->data.sequence.nodes.buf[i], vars);
1512             }
1513         }
1514         break;
1515     case NODE_ALTERNATE:
1516         {
1517             size_t i, j, k, m = vars->len;
1518             node_const_array_t v;
1519             node_const_array__init(&v);
1520             node_const_array__copy(&v, vars);
1521             for (i = 0; i < node->data.alternate.nodes.len; i++) {
1522                 v.len = m;
1523                 verify_variables(ctx, node->data.alternate.nodes.buf[i], &v);
1524                 for (j = m; j < v.len; j++) {
1525                     for (k = m; k < vars->len; k++) {
1526                         if (v.buf[j]->data.reference.index == vars->buf[k]->data.reference.index) break;
1527                     }
1528                     if (k == vars->len) node_const_array__add(vars, v.buf[j]);
1529                 }
1530             }
1531             node_const_array__term(&v);
1532         }
1533         break;
1534     case NODE_CAPTURE:
1535         verify_variables(ctx, node->data.capture.expr, vars);
1536         break;
1537     case NODE_EXPAND:
1538         break;
1539     case NODE_ACTION:
1540         node_const_array__copy(&node->data.action.vars, vars);
1541         break;
1542     case NODE_ERROR:
1543         node_const_array__copy(&node->data.error.vars, vars);
1544         verify_variables(ctx, node->data.error.expr, vars);
1545         break;
1546     default:
1547         print_error("Internal error [%d]\n", __LINE__);
1548         exit(-1);
1549     }
1550     if (b) {
1551         node_const_array__term(&a);
1552     }
1553 }
1554 
verify_captures(context_t * ctx,node_t * node,node_const_array_t * capts)1555 static void verify_captures(context_t *ctx, node_t *node, node_const_array_t *capts) {
1556     node_const_array_t a;
1557     const bool_t b = (capts == NULL) ? TRUE : FALSE;
1558     if (node == NULL) return;
1559     if (b) {
1560         node_const_array__init(&a);
1561         capts = &a;
1562     }
1563     switch (node->type) {
1564     case NODE_RULE:
1565         print_error("Internal error [%d]\n", __LINE__);
1566         exit(-1);
1567     case NODE_REFERENCE:
1568         break;
1569     case NODE_STRING:
1570         break;
1571     case NODE_CHARCLASS:
1572         break;
1573     case NODE_QUANTITY:
1574         verify_captures(ctx, node->data.quantity.expr, capts);
1575         break;
1576     case NODE_PREDICATE:
1577         verify_captures(ctx, node->data.predicate.expr, capts);
1578         break;
1579     case NODE_SEQUENCE:
1580         {
1581             size_t i;
1582             for (i = 0; i < node->data.sequence.nodes.len; i++) {
1583                 verify_captures(ctx, node->data.sequence.nodes.buf[i], capts);
1584             }
1585         }
1586         break;
1587     case NODE_ALTERNATE:
1588         {
1589             size_t i, j, m = capts->len;
1590             node_const_array_t v;
1591             node_const_array__init(&v);
1592             node_const_array__copy(&v, capts);
1593             for (i = 0; i < node->data.alternate.nodes.len; i++) {
1594                 v.len = m;
1595                 verify_captures(ctx, node->data.alternate.nodes.buf[i], &v);
1596                 for (j = m; j < v.len; j++) {
1597                     node_const_array__add(capts, v.buf[j]);
1598                 }
1599             }
1600             node_const_array__term(&v);
1601         }
1602         break;
1603     case NODE_CAPTURE:
1604         verify_captures(ctx, node->data.capture.expr, capts);
1605         node_const_array__add(capts, node);
1606         break;
1607     case NODE_EXPAND:
1608         {
1609             size_t i;
1610             for (i = 0; i < capts->len; i++) {
1611                 assert(capts->buf[i]->type == NODE_CAPTURE);
1612                 if (node->data.expand.index == capts->buf[i]->data.capture.index) break;
1613             }
1614             if (i >= capts->len && node->data.expand.index != VOID_VALUE) {
1615                 print_error("%s:" FMT_LU ":" FMT_LU ": Capture " FMT_LU " not available at this position\n",
1616                     ctx->iname, (ulong_t)(node->data.expand.line + 1), (ulong_t)(node->data.expand.col + 1), (ulong_t)(node->data.expand.index + 1));
1617                 ctx->errnum++;
1618             }
1619         }
1620         break;
1621     case NODE_ACTION:
1622         node_const_array__copy(&node->data.action.capts, capts);
1623         break;
1624     case NODE_ERROR:
1625         node_const_array__copy(&node->data.error.capts, capts);
1626         verify_captures(ctx, node->data.error.expr, capts);
1627         break;
1628     default:
1629         print_error("Internal error [%d]\n", __LINE__);
1630         exit(-1);
1631     }
1632     if (b) {
1633         node_const_array__term(&a);
1634     }
1635 }
1636 
dump_escaped_string(const char * str)1637 static void dump_escaped_string(const char *str) {
1638     char s[5];
1639     if (str == NULL) {
1640         fprintf(stdout, "null");
1641         return;
1642     }
1643     while (*str) {
1644         fprintf(stdout, "%s", escape_character(*str++, &s));
1645     }
1646 }
1647 
dump_integer_value(size_t value)1648 static void dump_integer_value(size_t value) {
1649     if (value == VOID_VALUE) {
1650         fprintf(stdout, "void");
1651     }
1652     else {
1653         fprintf(stdout, FMT_LU, (ulong_t)value);
1654     }
1655 }
1656 
dump_node(context_t * ctx,const node_t * node,const int indent)1657 static void dump_node(context_t *ctx, const node_t *node, const int indent) {
1658     if (node == NULL) return;
1659     switch (node->type) {
1660     case NODE_RULE:
1661         fprintf(stdout, "%*sRule(name:'%s', ref:%d, vars.len:" FMT_LU ", capts.len:" FMT_LU ", codes.len:" FMT_LU ") {\n",
1662             indent, "", node->data.rule.name, node->data.rule.ref,
1663             (ulong_t)node->data.rule.vars.len, (ulong_t)node->data.rule.capts.len, (ulong_t)node->data.rule.codes.len);
1664         dump_node(ctx, node->data.rule.expr, indent + 2);
1665         fprintf(stdout, "%*s}\n", indent, "");
1666         break;
1667     case NODE_REFERENCE:
1668         fprintf(stdout, "%*sReference(var:'%s', index:", indent, "", node->data.reference.var);
1669         dump_integer_value(node->data.reference.index);
1670         fprintf(stdout, ", name:'%s', rule:'%s')\n", node->data.reference.name,
1671             (node->data.reference.rule) ? node->data.reference.rule->data.rule.name : NULL);
1672         break;
1673     case NODE_STRING:
1674         fprintf(stdout, "%*sString(value:'", indent, "");
1675         dump_escaped_string(node->data.string.value);
1676         fprintf(stdout, "')\n");
1677         break;
1678     case NODE_CHARCLASS:
1679         fprintf(stdout, "%*sCharclass(value:'", indent, "");
1680         dump_escaped_string(node->data.charclass.value);
1681         fprintf(stdout, "')\n");
1682         break;
1683     case NODE_QUANTITY:
1684         fprintf(stdout, "%*sQuantity(min:%d, max%d) {\n", indent, "", node->data.quantity.min, node->data.quantity.max);
1685         dump_node(ctx, node->data.quantity.expr, indent + 2);
1686         fprintf(stdout, "%*s}\n", indent, "");
1687         break;
1688     case NODE_PREDICATE:
1689         fprintf(stdout, "%*sPredicate(neg:%d) {\n", indent, "", node->data.predicate.neg);
1690         dump_node(ctx, node->data.predicate.expr, indent + 2);
1691         fprintf(stdout, "%*s}\n", indent, "");
1692         break;
1693     case NODE_SEQUENCE:
1694         fprintf(stdout, "%*sSequence(max:" FMT_LU ", len:" FMT_LU ") {\n",
1695             indent, "", (ulong_t)node->data.sequence.nodes.max, (ulong_t)node->data.sequence.nodes.len);
1696         {
1697             size_t i;
1698             for (i = 0; i < node->data.sequence.nodes.len; i++) {
1699                 dump_node(ctx, node->data.sequence.nodes.buf[i], indent + 2);
1700             }
1701         }
1702         fprintf(stdout, "%*s}\n", indent, "");
1703         break;
1704     case NODE_ALTERNATE:
1705         fprintf(stdout, "%*sAlternate(max:" FMT_LU ", len:" FMT_LU ") {\n",
1706             indent, "", (ulong_t)node->data.alternate.nodes.max, (ulong_t)node->data.alternate.nodes.len);
1707         {
1708             size_t i;
1709             for (i = 0; i < node->data.alternate.nodes.len; i++) {
1710                 dump_node(ctx, node->data.alternate.nodes.buf[i], indent + 2);
1711             }
1712         }
1713         fprintf(stdout, "%*s}\n", indent, "");
1714         break;
1715     case NODE_CAPTURE:
1716         fprintf(stdout, "%*sCapture(index:", indent, "");
1717         dump_integer_value(node->data.capture.index);
1718         fprintf(stdout, ") {\n");
1719         dump_node(ctx, node->data.capture.expr, indent + 2);
1720         fprintf(stdout, "%*s}\n", indent, "");
1721         break;
1722     case NODE_EXPAND:
1723         fprintf(stdout, "%*sExpand(index:", indent, "");
1724         dump_integer_value(node->data.expand.index);
1725         fprintf(stdout, ")\n");
1726         break;
1727     case NODE_ACTION:
1728         fprintf(stdout, "%*sAction(index:", indent, "");
1729         dump_integer_value(node->data.action.index);
1730         fprintf(stdout, ", code:{");
1731         dump_escaped_string(node->data.action.code.text);
1732         fprintf(stdout, "}, vars:");
1733         if (node->data.action.vars.len + node->data.action.capts.len > 0) {
1734             size_t i;
1735             fprintf(stdout, "\n");
1736             for (i = 0; i < node->data.action.vars.len; i++) {
1737                 fprintf(stdout, "%*s'%s'\n", indent + 2, "", node->data.action.vars.buf[i]->data.reference.var);
1738             }
1739             for (i = 0; i < node->data.action.capts.len; i++) {
1740                 fprintf(stdout, "%*s$" FMT_LU "\n", indent + 2, "", (ulong_t)(node->data.action.capts.buf[i]->data.capture.index + 1));
1741             }
1742             fprintf(stdout, "%*s)\n", indent, "");
1743         }
1744         else {
1745             fprintf(stdout, "none)\n");
1746         }
1747         break;
1748     case NODE_ERROR:
1749         fprintf(stdout, "%*sError(index:", indent, "");
1750         dump_integer_value(node->data.error.index);
1751         fprintf(stdout, ", code:{");
1752         dump_escaped_string(node->data.error.code.text);
1753         fprintf(stdout, "}, vars:\n");
1754         {
1755             size_t i;
1756             for (i = 0; i < node->data.error.vars.len; i++) {
1757                 fprintf(stdout, "%*s'%s'\n", indent + 2, "", node->data.error.vars.buf[i]->data.reference.var);
1758             }
1759             for (i = 0; i < node->data.error.capts.len; i++) {
1760                 fprintf(stdout, "%*s$" FMT_LU "\n", indent + 2, "", (ulong_t)(node->data.error.capts.buf[i]->data.capture.index + 1));
1761             }
1762         }
1763         fprintf(stdout, "%*s) {\n", indent, "");
1764         dump_node(ctx, node->data.error.expr, indent + 2);
1765         fprintf(stdout, "%*s}\n", indent, "");
1766         break;
1767     default:
1768         print_error("%*sInternal error [%d]\n", indent, "", __LINE__);
1769         exit(-1);
1770     }
1771 }
1772 
refill_buffer(context_t * ctx,size_t num)1773 static size_t refill_buffer(context_t *ctx, size_t num) {
1774     if (ctx->buffer.len >= ctx->bufcur + num) return ctx->buffer.len - ctx->bufcur;
1775     while (ctx->buffer.len < ctx->bufcur + num) {
1776         const int c = fgetc_e(ctx->ifile);
1777         if (c == EOF) break;
1778         char_array__add(&ctx->buffer, (char)c);
1779     }
1780     return ctx->buffer.len - ctx->bufcur;
1781 }
1782 
commit_buffer(context_t * ctx)1783 static void commit_buffer(context_t *ctx) {
1784     assert(ctx->buffer.len >= ctx->bufcur);
1785     if (ctx->linepos < ctx->bufpos + ctx->bufcur)
1786         ctx->charnum += ctx->opts.ascii ? ctx->bufcur : count_characters(ctx->buffer.buf, 0, ctx->bufcur);
1787     memmove(ctx->buffer.buf, ctx->buffer.buf + ctx->bufcur, ctx->buffer.len - ctx->bufcur);
1788     ctx->buffer.len -= ctx->bufcur;
1789     ctx->bufpos += ctx->bufcur;
1790     ctx->bufcur = 0;
1791 }
1792 
match_eof(context_t * ctx)1793 static bool_t match_eof(context_t *ctx) {
1794     return (refill_buffer(ctx, 1) < 1) ? TRUE : FALSE;
1795 }
1796 
match_eol(context_t * ctx)1797 static bool_t match_eol(context_t *ctx) {
1798     if (refill_buffer(ctx, 1) >= 1) {
1799         switch (ctx->buffer.buf[ctx->bufcur]) {
1800         case '\n':
1801             ctx->bufcur++;
1802             ctx->linenum++;
1803             ctx->charnum = 0;
1804             ctx->linepos = ctx->bufpos + ctx->bufcur;
1805             return TRUE;
1806         case '\r':
1807             ctx->bufcur++;
1808             if (refill_buffer(ctx, 1) >= 1) {
1809                 if (ctx->buffer.buf[ctx->bufcur] == '\n') ctx->bufcur++;
1810             }
1811             ctx->linenum++;
1812             ctx->charnum = 0;
1813             ctx->linepos = ctx->bufpos + ctx->bufcur;
1814             return TRUE;
1815         }
1816     }
1817     return FALSE;
1818 }
1819 
match_character(context_t * ctx,char ch)1820 static bool_t match_character(context_t *ctx, char ch) {
1821     if (refill_buffer(ctx, 1) >= 1) {
1822         if (ctx->buffer.buf[ctx->bufcur] == ch) {
1823             ctx->bufcur++;
1824             return TRUE;
1825         }
1826     }
1827     return FALSE;
1828 }
1829 
match_character_range(context_t * ctx,char min,char max)1830 static bool_t match_character_range(context_t *ctx, char min, char max) {
1831     if (refill_buffer(ctx, 1) >= 1) {
1832         const char c = ctx->buffer.buf[ctx->bufcur];
1833         if (c >= min && c <= max) {
1834             ctx->bufcur++;
1835             return TRUE;
1836         }
1837     }
1838     return FALSE;
1839 }
1840 
match_character_set(context_t * ctx,const char * chs)1841 static bool_t match_character_set(context_t *ctx, const char *chs) {
1842     if (refill_buffer(ctx, 1) >= 1) {
1843         const char c = ctx->buffer.buf[ctx->bufcur];
1844         size_t i;
1845         for (i = 0; chs[i]; i++) {
1846             if (c == chs[i]) {
1847                 ctx->bufcur++;
1848                 return TRUE;
1849             }
1850         }
1851     }
1852     return FALSE;
1853 }
1854 
match_character_any(context_t * ctx)1855 static bool_t match_character_any(context_t *ctx) {
1856     if (refill_buffer(ctx, 1) >= 1) {
1857         ctx->bufcur++;
1858         return TRUE;
1859     }
1860     return FALSE;
1861 }
1862 
match_string(context_t * ctx,const char * str)1863 static bool_t match_string(context_t *ctx, const char *str) {
1864     const size_t n = strlen(str);
1865     if (refill_buffer(ctx, n) >= n) {
1866         if (strncmp(ctx->buffer.buf + ctx->bufcur, str, n) == 0) {
1867             ctx->bufcur += n;
1868             return TRUE;
1869         }
1870     }
1871     return FALSE;
1872 }
1873 
match_blank(context_t * ctx)1874 static bool_t match_blank(context_t *ctx) {
1875     return match_character_set(ctx, " \t\v\f");
1876 }
1877 
match_section_line_(context_t * ctx,const char * head)1878 static bool_t match_section_line_(context_t *ctx, const char *head) {
1879     if (match_string(ctx, head)) {
1880         while (!match_eol(ctx) && !match_eof(ctx)) match_character_any(ctx);
1881         return TRUE;
1882     }
1883     return FALSE;
1884 }
1885 
match_section_line_continuable_(context_t * ctx,const char * head)1886 static bool_t match_section_line_continuable_(context_t *ctx, const char *head) {
1887     if (match_string(ctx, head)) {
1888         while (!match_eof(ctx)) {
1889             const size_t p = ctx->bufcur;
1890             if (match_eol(ctx)) {
1891                 if (ctx->buffer.buf[p - 1] != '\\') break;
1892             }
1893             else {
1894                 match_character_any(ctx);
1895             }
1896         }
1897         return TRUE;
1898     }
1899     return FALSE;
1900 }
1901 
match_section_block_(context_t * ctx,const char * left,const char * right,const char * name)1902 static bool_t match_section_block_(context_t *ctx, const char *left, const char *right, const char *name) {
1903     const size_t l = ctx->linenum;
1904     const size_t m = column_number(ctx);
1905     if (match_string(ctx, left)) {
1906         while (!match_string(ctx, right)) {
1907             if (match_eof(ctx)) {
1908                 print_error("%s:" FMT_LU ":" FMT_LU ": Premature EOF in %s\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1), name);
1909                 ctx->errnum++;
1910                 break;
1911             }
1912             if (!match_eol(ctx)) match_character_any(ctx);
1913         }
1914         return TRUE;
1915     }
1916     return FALSE;
1917 }
1918 
match_quotation_(context_t * ctx,const char * left,const char * right,const char * name)1919 static bool_t match_quotation_(context_t *ctx, const char *left, const char *right, const char *name) {
1920     const size_t l = ctx->linenum;
1921     const size_t m = column_number(ctx);
1922     if (match_string(ctx, left)) {
1923         while (!match_string(ctx, right)) {
1924             if (match_eof(ctx)) {
1925                 print_error("%s:" FMT_LU ":" FMT_LU ": Premature EOF in %s\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1), name);
1926                 ctx->errnum++;
1927                 break;
1928             }
1929             if (match_character(ctx, '\\')) {
1930                 if (!match_eol(ctx)) match_character_any(ctx);
1931             }
1932             else {
1933                 if (match_eol(ctx)) {
1934                     print_error("%s:" FMT_LU ":" FMT_LU ": Premature EOL in %s\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1), name);
1935                     ctx->errnum++;
1936                     break;
1937                 }
1938                 match_character_any(ctx);
1939             }
1940         }
1941         return TRUE;
1942     }
1943     return FALSE;
1944 }
1945 
match_directive_c(context_t * ctx)1946 static bool_t match_directive_c(context_t *ctx) {
1947     return match_section_line_continuable_(ctx, "#");
1948 }
1949 
match_comment(context_t * ctx)1950 static bool_t match_comment(context_t *ctx) {
1951     return match_section_line_(ctx, "#");
1952 }
1953 
match_comment_c(context_t * ctx)1954 static bool_t match_comment_c(context_t *ctx) {
1955     return match_section_block_(ctx, "/*", "*/", "C comment");
1956 }
1957 
match_comment_cxx(context_t * ctx)1958 static bool_t match_comment_cxx(context_t *ctx) {
1959     return match_section_line_(ctx, "//");
1960 }
1961 
match_quotation_single(context_t * ctx)1962 static bool_t match_quotation_single(context_t *ctx) {
1963     return match_quotation_(ctx, "\'", "\'", "single quotation");
1964 }
1965 
match_quotation_double(context_t * ctx)1966 static bool_t match_quotation_double(context_t *ctx) {
1967     return match_quotation_(ctx, "\"", "\"", "double quotation");
1968 }
1969 
match_character_class(context_t * ctx)1970 static bool_t match_character_class(context_t *ctx) {
1971     return match_quotation_(ctx, "[", "]", "character class");
1972 }
1973 
match_spaces(context_t * ctx)1974 static bool_t match_spaces(context_t *ctx) {
1975     size_t n = 0;
1976     while (match_blank(ctx) || match_eol(ctx) || match_comment(ctx)) n++;
1977     return (n > 0) ? TRUE : FALSE;
1978 }
1979 
match_number(context_t * ctx)1980 static bool_t match_number(context_t *ctx) {
1981     if (match_character_range(ctx, '0', '9')) {
1982         while (match_character_range(ctx, '0', '9'));
1983         return TRUE;
1984     }
1985     return FALSE;
1986 }
1987 
match_identifier(context_t * ctx)1988 static bool_t match_identifier(context_t *ctx) {
1989     if (
1990         match_character_range(ctx, 'a', 'z') ||
1991         match_character_range(ctx, 'A', 'Z') ||
1992         match_character(ctx, '_')
1993     ) {
1994         while (
1995             match_character_range(ctx, 'a', 'z') ||
1996             match_character_range(ctx, 'A', 'Z') ||
1997             match_character_range(ctx, '0', '9') ||
1998             match_character(ctx, '_')
1999         );
2000         return TRUE;
2001     }
2002     return FALSE;
2003 }
2004 
match_code_block(context_t * ctx)2005 static bool_t match_code_block(context_t *ctx) {
2006     const size_t l = ctx->linenum;
2007     const size_t m = column_number(ctx);
2008     if (match_character(ctx, '{')) {
2009         int d = 1;
2010         for (;;) {
2011             if (match_eof(ctx)) {
2012                 print_error("%s:" FMT_LU ":" FMT_LU ": Premature EOF in code block\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1));
2013                 ctx->errnum++;
2014                 break;
2015             }
2016             if (
2017                 match_directive_c(ctx) ||
2018                 match_comment_c(ctx) ||
2019                 match_comment_cxx(ctx) ||
2020                 match_quotation_single(ctx) ||
2021                 match_quotation_double(ctx)
2022             ) continue;
2023             if (match_character(ctx, '{')) {
2024                 d++;
2025             }
2026             else if (match_character(ctx, '}')) {
2027                 d--;
2028                 if (d == 0) break;
2029             }
2030             else {
2031                 if (!match_eol(ctx)) {
2032                     if (match_character(ctx, '$')) {
2033                         ctx->buffer.buf[ctx->bufcur - 1] = '_';
2034                     }
2035                     else {
2036                         match_character_any(ctx);
2037                     }
2038                 }
2039             }
2040         }
2041         return TRUE;
2042     }
2043     return FALSE;
2044 }
2045 
match_footer_start(context_t * ctx)2046 static bool_t match_footer_start(context_t *ctx) {
2047     return match_string(ctx, "%%");
2048 }
2049 
2050 static node_t *parse_expression(context_t *ctx, node_t *rule);
2051 
parse_primary(context_t * ctx,node_t * rule)2052 static node_t *parse_primary(context_t *ctx, node_t *rule) {
2053     const size_t p = ctx->bufcur;
2054     const size_t l = ctx->linenum;
2055     const size_t m = column_number(ctx);
2056     const size_t n = ctx->charnum;
2057     const size_t o = ctx->linepos;
2058     node_t *n_p = NULL;
2059     if (match_identifier(ctx)) {
2060         const size_t q = ctx->bufcur;
2061         size_t r = VOID_VALUE, s = VOID_VALUE;
2062         match_spaces(ctx);
2063         if (match_character(ctx, ':')) {
2064             match_spaces(ctx);
2065             r = ctx->bufcur;
2066             if (!match_identifier(ctx)) goto EXCEPTION;
2067             s = ctx->bufcur;
2068             match_spaces(ctx);
2069         }
2070         if (match_string(ctx, "<-")) goto EXCEPTION;
2071         n_p = create_node(NODE_REFERENCE);
2072         if (r == VOID_VALUE) {
2073             assert(q >= p);
2074             n_p->data.reference.var = NULL;
2075             n_p->data.reference.index = VOID_VALUE;
2076             n_p->data.reference.name = strndup_e(ctx->buffer.buf + p, q - p);
2077         }
2078         else {
2079             assert(s != VOID_VALUE); /* s should have a valid value when r has a valid value */
2080             assert(q >= p);
2081             n_p->data.reference.var = strndup_e(ctx->buffer.buf + p, q - p);
2082             if (n_p->data.reference.var[0] == '_') {
2083                 print_error("%s:" FMT_LU ":" FMT_LU ": Leading underscore in variable name '%s'\n",
2084                     ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1), n_p->data.reference.var);
2085                 ctx->errnum++;
2086             }
2087             {
2088                 size_t i;
2089                 for (i = 0; i < rule->data.rule.vars.len; i++) {
2090                     assert(rule->data.rule.vars.buf[i]->type == NODE_REFERENCE);
2091                     if (strcmp(n_p->data.reference.var, rule->data.rule.vars.buf[i]->data.reference.var) == 0) break;
2092                 }
2093                 if (i == rule->data.rule.vars.len) node_const_array__add(&rule->data.rule.vars, n_p);
2094                 n_p->data.reference.index = i;
2095             }
2096             assert(s >= r);
2097             n_p->data.reference.name = strndup_e(ctx->buffer.buf + r, s - r);
2098         }
2099         n_p->data.reference.line = l;
2100         n_p->data.reference.col = m;
2101     }
2102     else if (match_character(ctx, '(')) {
2103         match_spaces(ctx);
2104         n_p = parse_expression(ctx, rule);
2105         if (n_p == NULL) goto EXCEPTION;
2106         if (!match_character(ctx, ')')) goto EXCEPTION;
2107         match_spaces(ctx);
2108     }
2109     else if (match_character(ctx, '<')) {
2110         match_spaces(ctx);
2111         n_p = create_node(NODE_CAPTURE);
2112         n_p->data.capture.index = rule->data.rule.capts.len;
2113         node_const_array__add(&rule->data.rule.capts, n_p);
2114         n_p->data.capture.expr = parse_expression(ctx, rule);
2115         if (n_p->data.capture.expr == NULL || !match_character(ctx, '>')) {
2116             rule->data.rule.capts.len = n_p->data.capture.index;
2117             goto EXCEPTION;
2118         }
2119         match_spaces(ctx);
2120     }
2121     else if (match_character(ctx, '$')) {
2122         size_t p;
2123         match_spaces(ctx);
2124         p = ctx->bufcur;
2125         if (match_number(ctx)) {
2126             const size_t q = ctx->bufcur;
2127             char *s;
2128             match_spaces(ctx);
2129             n_p = create_node(NODE_EXPAND);
2130             assert(q >= p);
2131             s = strndup_e(ctx->buffer.buf + p, q - p);
2132             n_p->data.expand.index = string_to_size_t(s);
2133             if (n_p->data.expand.index == VOID_VALUE) {
2134                 print_error("%s:" FMT_LU ":" FMT_LU ": Invalid unsigned number '%s'\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1), s);
2135                 ctx->errnum++;
2136             }
2137             else if (n_p->data.expand.index == 0) {
2138                 print_error("%s:" FMT_LU ":" FMT_LU ": 0 not allowed\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1));
2139                 ctx->errnum++;
2140             }
2141             else if (s[0] == '0') {
2142                 print_error("%s:" FMT_LU ":" FMT_LU ": 0-prefixed number not allowed\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1));
2143                 ctx->errnum++;
2144                 n_p->data.expand.index = 0;
2145             }
2146             free(s);
2147             if (n_p->data.expand.index > 0 && n_p->data.expand.index != VOID_VALUE) {
2148                 n_p->data.expand.index--;
2149                 n_p->data.expand.line = l;
2150                 n_p->data.expand.col = m;
2151             }
2152         }
2153         else {
2154             goto EXCEPTION;
2155         }
2156     }
2157     else if (match_character(ctx, '.')) {
2158         match_spaces(ctx);
2159         n_p = create_node(NODE_CHARCLASS);
2160         n_p->data.charclass.value = NULL;
2161         if (!ctx->opts.ascii) {
2162             ctx->flags |= CODE_FLAG__UTF8_CHARCLASS_USED;
2163         }
2164     }
2165     else if (match_character_class(ctx)) {
2166         const size_t q = ctx->bufcur;
2167         match_spaces(ctx);
2168         n_p = create_node(NODE_CHARCLASS);
2169         n_p->data.charclass.value = strndup_e(ctx->buffer.buf + p + 1, q - p - 2);
2170         if (!unescape_string(n_p->data.charclass.value, TRUE)) {
2171             print_error("%s:" FMT_LU ":" FMT_LU ": Illegal escape sequence\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1));
2172             ctx->errnum++;
2173         }
2174         if (!ctx->opts.ascii && !is_valid_utf8_string(n_p->data.charclass.value)) {
2175             print_error("%s:" FMT_LU ":" FMT_LU ": Invalid UTF-8 string\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1));
2176             ctx->errnum++;
2177         }
2178         if (!ctx->opts.ascii && n_p->data.charclass.value[0] != '\0') {
2179             ctx->flags |= CODE_FLAG__UTF8_CHARCLASS_USED;
2180         }
2181     }
2182     else if (match_quotation_single(ctx) || match_quotation_double(ctx)) {
2183         const size_t q = ctx->bufcur;
2184         match_spaces(ctx);
2185         n_p = create_node(NODE_STRING);
2186         n_p->data.string.value = strndup_e(ctx->buffer.buf + p + 1, q - p - 2);
2187         if (!unescape_string(n_p->data.string.value, FALSE)) {
2188             print_error("%s:" FMT_LU ":" FMT_LU ": Illegal escape sequence\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1));
2189             ctx->errnum++;
2190         }
2191         if (!ctx->opts.ascii && !is_valid_utf8_string(n_p->data.string.value)) {
2192             print_error("%s:" FMT_LU ":" FMT_LU ": Invalid UTF-8 string\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1));
2193             ctx->errnum++;
2194         }
2195     }
2196     else if (match_code_block(ctx)) {
2197         const size_t q = ctx->bufcur;
2198         match_spaces(ctx);
2199         n_p = create_node(NODE_ACTION);
2200         n_p->data.action.code.text = strndup_e(ctx->buffer.buf + p + 1, q - p - 2);
2201         n_p->data.action.code.len = find_trailing_blanks(n_p->data.action.code.text);
2202         n_p->data.action.code.line = l;
2203         n_p->data.action.code.col = m;
2204         n_p->data.action.index = rule->data.rule.codes.len;
2205         node_const_array__add(&rule->data.rule.codes, n_p);
2206     }
2207     else {
2208         goto EXCEPTION;
2209     }
2210     return n_p;
2211 
2212 EXCEPTION:;
2213     destroy_node(n_p);
2214     ctx->bufcur = p;
2215     ctx->linenum = l;
2216     ctx->charnum = n;
2217     ctx->linepos = o;
2218     return NULL;
2219 }
2220 
parse_term(context_t * ctx,node_t * rule)2221 static node_t *parse_term(context_t *ctx, node_t *rule) {
2222     const size_t p = ctx->bufcur;
2223     const size_t l = ctx->linenum;
2224     const size_t n = ctx->charnum;
2225     const size_t o = ctx->linepos;
2226     node_t *n_p = NULL;
2227     node_t *n_q = NULL;
2228     node_t *n_r = NULL;
2229     node_t *n_t = NULL;
2230     const char t = match_character(ctx, '&') ? '&' : match_character(ctx, '!') ? '!' : '\0';
2231     if (t) match_spaces(ctx);
2232     n_p = parse_primary(ctx, rule);
2233     if (n_p == NULL) goto EXCEPTION;
2234     if (match_character(ctx, '*')) {
2235         match_spaces(ctx);
2236         n_q = create_node(NODE_QUANTITY);
2237         n_q->data.quantity.min = 0;
2238         n_q->data.quantity.max = -1;
2239         n_q->data.quantity.expr = n_p;
2240     }
2241     else if (match_character(ctx, '+')) {
2242         match_spaces(ctx);
2243         n_q = create_node(NODE_QUANTITY);
2244         n_q->data.quantity.min = 1;
2245         n_q->data.quantity.max = -1;
2246         n_q->data.quantity.expr = n_p;
2247     }
2248     else if (match_character(ctx, '?')) {
2249         match_spaces(ctx);
2250         n_q = create_node(NODE_QUANTITY);
2251         n_q->data.quantity.min = 0;
2252         n_q->data.quantity.max = 1;
2253         n_q->data.quantity.expr = n_p;
2254     }
2255     else {
2256         n_q = n_p;
2257     }
2258     switch (t) {
2259     case '&':
2260         n_r = create_node(NODE_PREDICATE);
2261         n_r->data.predicate.neg = FALSE;
2262         n_r->data.predicate.expr = n_q;
2263         break;
2264     case '!':
2265         n_r = create_node(NODE_PREDICATE);
2266         n_r->data.predicate.neg = TRUE;
2267         n_r->data.predicate.expr = n_q;
2268         break;
2269     default:
2270         n_r = n_q;
2271     }
2272     if (match_character(ctx, '~')) {
2273         size_t p, l, m;
2274         match_spaces(ctx);
2275         p = ctx->bufcur;
2276         l = ctx->linenum;
2277         m = column_number(ctx);
2278         if (match_code_block(ctx)) {
2279             const size_t q = ctx->bufcur;
2280             match_spaces(ctx);
2281             n_t = create_node(NODE_ERROR);
2282             n_t->data.error.expr = n_r;
2283             n_t->data.error.code.text = strndup_e(ctx->buffer.buf + p + 1, q - p - 2);
2284             n_t->data.error.code.len = find_trailing_blanks(n_t->data.error.code.text);
2285             n_t->data.error.code.line = l;
2286             n_t->data.error.code.col = m;
2287             n_t->data.error.index = rule->data.rule.codes.len;
2288             node_const_array__add(&rule->data.rule.codes, n_t);
2289         }
2290         else {
2291             goto EXCEPTION;
2292         }
2293     }
2294     else {
2295         n_t = n_r;
2296     }
2297     return n_t;
2298 
2299 EXCEPTION:;
2300     destroy_node(n_r);
2301     ctx->bufcur = p;
2302     ctx->linenum = l;
2303     ctx->charnum = n;
2304     ctx->linepos = o;
2305     return NULL;
2306 }
2307 
parse_sequence(context_t * ctx,node_t * rule)2308 static node_t *parse_sequence(context_t *ctx, node_t *rule) {
2309     const size_t p = ctx->bufcur;
2310     const size_t l = ctx->linenum;
2311     const size_t n = ctx->charnum;
2312     const size_t o = ctx->linepos;
2313     node_array_t *a_t = NULL;
2314     node_t *n_t = NULL;
2315     node_t *n_u = NULL;
2316     node_t *n_s = NULL;
2317     n_t = parse_term(ctx, rule);
2318     if (n_t == NULL) goto EXCEPTION;
2319     n_u = parse_term(ctx, rule);
2320     if (n_u != NULL) {
2321         n_s = create_node(NODE_SEQUENCE);
2322         a_t = &n_s->data.sequence.nodes;
2323         node_array__add(a_t, n_t);
2324         node_array__add(a_t, n_u);
2325         while ((n_t = parse_term(ctx, rule)) != NULL) {
2326             node_array__add(a_t, n_t);
2327         }
2328     }
2329     else {
2330         n_s = n_t;
2331     }
2332     return n_s;
2333 
2334 EXCEPTION:;
2335     ctx->bufcur = p;
2336     ctx->linenum = l;
2337     ctx->charnum = n;
2338     ctx->linepos = o;
2339     return NULL;
2340 }
2341 
parse_expression(context_t * ctx,node_t * rule)2342 static node_t *parse_expression(context_t *ctx, node_t *rule) {
2343     const size_t p = ctx->bufcur;
2344     const size_t l = ctx->linenum;
2345     const size_t n = ctx->charnum;
2346     const size_t o = ctx->linepos;
2347     size_t q;
2348     node_array_t *a_s = NULL;
2349     node_t *n_s = NULL;
2350     node_t *n_e = NULL;
2351     n_s = parse_sequence(ctx, rule);
2352     if (n_s == NULL) goto EXCEPTION;
2353     q = ctx->bufcur;
2354     if (match_character(ctx, '/')) {
2355         ctx->bufcur = q;
2356         n_e = create_node(NODE_ALTERNATE);
2357         a_s = &n_e->data.alternate.nodes;
2358         node_array__add(a_s, n_s);
2359         while (match_character(ctx, '/')) {
2360             match_spaces(ctx);
2361             n_s = parse_sequence(ctx, rule);
2362             if (n_s == NULL) goto EXCEPTION;
2363             node_array__add(a_s, n_s);
2364         }
2365     }
2366     else {
2367         n_e = n_s;
2368     }
2369     return n_e;
2370 
2371 EXCEPTION:;
2372     destroy_node(n_e);
2373     ctx->bufcur = p;
2374     ctx->linenum = l;
2375     ctx->charnum = n;
2376     ctx->linepos = o;
2377     return NULL;
2378 }
2379 
parse_rule(context_t * ctx)2380 static node_t *parse_rule(context_t *ctx) {
2381     const size_t p = ctx->bufcur;
2382     const size_t l = ctx->linenum;
2383     const size_t m = column_number(ctx);
2384     const size_t n = ctx->charnum;
2385     const size_t o = ctx->linepos;
2386     size_t q;
2387     node_t *n_r = NULL;
2388     if (!match_identifier(ctx)) goto EXCEPTION;
2389     q = ctx->bufcur;
2390     match_spaces(ctx);
2391     if (!match_string(ctx, "<-")) goto EXCEPTION;
2392     match_spaces(ctx);
2393     n_r = create_node(NODE_RULE);
2394     n_r->data.rule.expr = parse_expression(ctx, n_r);
2395     if (n_r->data.rule.expr == NULL) goto EXCEPTION;
2396     assert(q >= p);
2397     n_r->data.rule.name = strndup_e(ctx->buffer.buf + p, q - p);
2398     n_r->data.rule.line = l;
2399     n_r->data.rule.col = m;
2400     return n_r;
2401 
2402 EXCEPTION:;
2403     destroy_node(n_r);
2404     ctx->bufcur = p;
2405     ctx->linenum = l;
2406     ctx->charnum = n;
2407     ctx->linepos = o;
2408     return NULL;
2409 }
2410 
get_value_type(context_t * ctx)2411 static const char *get_value_type(context_t *ctx) {
2412     return (ctx->vtype && ctx->vtype[0]) ? ctx->vtype : "int";
2413 }
2414 
get_auxil_type(context_t * ctx)2415 static const char *get_auxil_type(context_t *ctx) {
2416     return (ctx->atype && ctx->atype[0]) ? ctx->atype : "void *";
2417 }
2418 
get_prefix(context_t * ctx)2419 static const char *get_prefix(context_t *ctx) {
2420     return (ctx->prefix && ctx->prefix[0]) ? ctx->prefix : "pcc";
2421 }
2422 
dump_options(context_t * ctx)2423 static void dump_options(context_t *ctx) {
2424     fprintf(stdout, "value_type: '%s'\n", get_value_type(ctx));
2425     fprintf(stdout, "auxil_type: '%s'\n", get_auxil_type(ctx));
2426     fprintf(stdout, "prefix: '%s'\n", get_prefix(ctx));
2427 }
2428 
parse_directive_include_(context_t * ctx,const char * name,code_block_array_t * output1,code_block_array_t * output2)2429 static bool_t parse_directive_include_(context_t *ctx, const char *name, code_block_array_t *output1, code_block_array_t *output2) {
2430     if (!match_string(ctx, name)) return FALSE;
2431     match_spaces(ctx);
2432     {
2433         const size_t p = ctx->bufcur;
2434         const size_t l = ctx->linenum;
2435         const size_t m = column_number(ctx);
2436         if (match_code_block(ctx)) {
2437             const size_t q = ctx->bufcur;
2438             match_spaces(ctx);
2439             if (output1 != NULL) {
2440                 code_block_t *c = code_block_array__create_entry(output1);
2441                 c->text = strndup_e(ctx->buffer.buf + p + 1, q - p - 2);
2442                 c->len = q - p - 2;
2443                 c->line = l;
2444                 c->col = m;
2445             }
2446             if (output2 != NULL) {
2447                 code_block_t *c = code_block_array__create_entry(output2);
2448                 c->text = strndup_e(ctx->buffer.buf + p + 1, q - p - 2);
2449                 c->len = q - p - 2;
2450                 c->line = l;
2451                 c->col = m;
2452             }
2453         }
2454         else {
2455             print_error("%s:" FMT_LU ":" FMT_LU ": Illegal %s syntax\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1), name);
2456             ctx->errnum++;
2457         }
2458     }
2459     return TRUE;
2460 }
2461 
parse_directive_string_(context_t * ctx,const char * name,char ** output,string_flag_t mode)2462 static bool_t parse_directive_string_(context_t *ctx, const char *name, char **output, string_flag_t mode) {
2463     const size_t l = ctx->linenum;
2464     const size_t m = column_number(ctx);
2465     if (!match_string(ctx, name)) return FALSE;
2466     match_spaces(ctx);
2467     {
2468         char *s = NULL;
2469         const size_t p = ctx->bufcur;
2470         const size_t lv = ctx->linenum;
2471         const size_t mv = column_number(ctx);
2472         size_t q;
2473         if (match_quotation_single(ctx) || match_quotation_double(ctx)) {
2474             q = ctx->bufcur;
2475             match_spaces(ctx);
2476             s = strndup_e(ctx->buffer.buf + p + 1, q - p - 2);
2477             if (!unescape_string(s, FALSE)) {
2478                 print_error("%s:" FMT_LU ":" FMT_LU ": Illegal escape sequence\n", ctx->iname, (ulong_t)(lv + 1), (ulong_t)(mv + 1));
2479                 ctx->errnum++;
2480             }
2481         }
2482         else {
2483             print_error("%s:" FMT_LU ":" FMT_LU ": Illegal %s syntax\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1), name);
2484             ctx->errnum++;
2485         }
2486         if (s != NULL) {
2487             string_flag_t f = STRING_FLAG__NONE;
2488             bool_t b = TRUE;
2489             remove_leading_blanks(s);
2490             remove_trailing_blanks(s);
2491             assert((mode & ~7) == 0);
2492             if ((mode & STRING_FLAG__NOTEMPTY) && !is_filled_string(s)) {
2493                 print_error("%s:" FMT_LU ":" FMT_LU ": Empty string\n", ctx->iname, (ulong_t)(lv + 1), (ulong_t)(mv + 1));
2494                 ctx->errnum++;
2495                 f |= STRING_FLAG__NOTEMPTY;
2496             }
2497             if ((mode & STRING_FLAG__NOTVOID) && strcmp(s, "void") == 0) {
2498                 print_error("%s:" FMT_LU ":" FMT_LU ": 'void' not allowed\n", ctx->iname, (ulong_t)(lv + 1), (ulong_t)(mv + 1));
2499                 ctx->errnum++;
2500                 f |= STRING_FLAG__NOTVOID;
2501             }
2502             if ((mode & STRING_FLAG__IDENTIFIER) && !is_identifier_string(s)) {
2503                 if (!(f & STRING_FLAG__NOTEMPTY)) {
2504                     print_error("%s:" FMT_LU ":" FMT_LU ": Invalid identifier\n", ctx->iname, (ulong_t)(lv + 1), (ulong_t)(mv + 1));
2505                     ctx->errnum++;
2506                 }
2507                 f |= STRING_FLAG__IDENTIFIER;
2508             }
2509             if (*output != NULL) {
2510                 print_error("%s:" FMT_LU ":" FMT_LU ": Multiple %s definition\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1), name);
2511                 ctx->errnum++;
2512                 b = FALSE;
2513             }
2514             if (f == STRING_FLAG__NONE && b) {
2515                 *output = s;
2516             }
2517             else {
2518                 free(s); s = NULL;
2519             }
2520         }
2521     }
2522     return TRUE;
2523 }
2524 
parse(context_t * ctx)2525 static bool_t parse(context_t *ctx) {
2526     {
2527         bool_t b = TRUE;
2528         match_spaces(ctx);
2529         for (;;) {
2530             size_t l, m, n, o;
2531             if (match_eof(ctx) || match_footer_start(ctx)) break;
2532             l = ctx->linenum;
2533             m = column_number(ctx);
2534             n = ctx->charnum;
2535             o = ctx->linepos;
2536             if (
2537                 parse_directive_include_(ctx, "%earlysource", &ctx->esource, NULL) ||
2538                 parse_directive_include_(ctx, "%earlyheader", &ctx->eheader, NULL) ||
2539                 parse_directive_include_(ctx, "%earlycommon", &ctx->esource, &ctx->eheader) ||
2540                 parse_directive_include_(ctx, "%source", &ctx->source, NULL) ||
2541                 parse_directive_include_(ctx, "%header", &ctx->header, NULL) ||
2542                 parse_directive_include_(ctx, "%common", &ctx->source, &ctx->header) ||
2543                 parse_directive_string_(ctx, "%value", &ctx->vtype, STRING_FLAG__NOTEMPTY | STRING_FLAG__NOTVOID) ||
2544                 parse_directive_string_(ctx, "%auxil", &ctx->atype, STRING_FLAG__NOTEMPTY | STRING_FLAG__NOTVOID) ||
2545                 parse_directive_string_(ctx, "%prefix", &ctx->prefix, STRING_FLAG__NOTEMPTY | STRING_FLAG__IDENTIFIER)
2546             ) {
2547                 b = TRUE;
2548             }
2549             else if (match_character(ctx, '%')) {
2550                 print_error("%s:" FMT_LU ":" FMT_LU ": Invalid directive\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1));
2551                 ctx->errnum++;
2552                 match_identifier(ctx);
2553                 match_spaces(ctx);
2554                 b = TRUE;
2555             }
2556             else {
2557                 node_t *const n_r = parse_rule(ctx);
2558                 if (n_r == NULL) {
2559                     if (b) {
2560                         print_error("%s:" FMT_LU ":" FMT_LU ": Illegal rule syntax\n", ctx->iname, (ulong_t)(l + 1), (ulong_t)(m + 1));
2561                         ctx->errnum++;
2562                         b = FALSE;
2563                     }
2564                     ctx->linenum = l;
2565                     ctx->charnum = n;
2566                     ctx->linepos = o;
2567                     if (!match_identifier(ctx) && !match_spaces(ctx)) match_character_any(ctx);
2568                     continue;
2569                 }
2570                 node_array__add(&ctx->rules, n_r);
2571                 b = TRUE;
2572             }
2573             commit_buffer(ctx);
2574         }
2575         commit_buffer(ctx);
2576     }
2577     {
2578         size_t i;
2579         make_rulehash(ctx);
2580         for (i = 0; i < ctx->rules.len; i++) {
2581             link_references(ctx, ctx->rules.buf[i]->data.rule.expr);
2582         }
2583         for (i = 1; i < ctx->rules.len; i++) {
2584             if (ctx->rules.buf[i]->data.rule.ref == 0) {
2585                 print_error("%s:" FMT_LU ":" FMT_LU ": Never used rule '%s'\n",
2586                     ctx->iname,
2587                     (ulong_t)(ctx->rules.buf[i]->data.rule.line + 1), (ulong_t)(ctx->rules.buf[i]->data.rule.col + 1),
2588                     ctx->rules.buf[i]->data.rule.name);
2589                 ctx->errnum++;
2590             }
2591             else if (ctx->rules.buf[i]->data.rule.ref < 0) {
2592                 print_error("%s:" FMT_LU ":" FMT_LU ": Multiple definition of rule '%s'\n",
2593                     ctx->iname,
2594                     (ulong_t)(ctx->rules.buf[i]->data.rule.line + 1), (ulong_t)(ctx->rules.buf[i]->data.rule.col + 1),
2595                     ctx->rules.buf[i]->data.rule.name);
2596                 ctx->errnum++;
2597             }
2598         }
2599     }
2600     {
2601         size_t i;
2602         for (i = 0; i < ctx->rules.len; i++) {
2603             verify_variables(ctx, ctx->rules.buf[i]->data.rule.expr, NULL);
2604             verify_captures(ctx, ctx->rules.buf[i]->data.rule.expr, NULL);
2605         }
2606     }
2607     if (ctx->opts.debug) {
2608         size_t i;
2609         for (i = 0; i < ctx->rules.len; i++) {
2610             dump_node(ctx, ctx->rules.buf[i], 0);
2611         }
2612         dump_options(ctx);
2613     }
2614     return (ctx->errnum == 0) ? TRUE : FALSE;
2615 }
2616 
generate_matching_string_code(generate_t * gen,const char * value,int onfail,size_t indent,bool_t bare)2617 static code_reach_t generate_matching_string_code(generate_t *gen, const char *value, int onfail, size_t indent, bool_t bare) {
2618     const size_t n = (value != NULL) ? strlen(value) : 0;
2619     if (n > 0) {
2620         char s[5];
2621         if (n > 1) {
2622             size_t i;
2623             stream__write_characters(gen->stream, ' ', indent);
2624             stream__puts(gen->stream, "if (\n");
2625             stream__write_characters(gen->stream, ' ', indent + 4);
2626             stream__printf(gen->stream, "pcc_refill_buffer(ctx, " FMT_LU ") < " FMT_LU " ||\n", (ulong_t)n, (ulong_t)n);
2627             for (i = 0; i < n - 1; i++) {
2628                 stream__write_characters(gen->stream, ' ', indent + 4);
2629                 stream__printf(gen->stream, "(ctx->buffer.buf + ctx->cur)[" FMT_LU "] != '%s' ||\n", (ulong_t)i, escape_character(value[i], &s));
2630             }
2631             stream__write_characters(gen->stream, ' ', indent + 4);
2632             stream__printf(gen->stream, "(ctx->buffer.buf + ctx->cur)[" FMT_LU "] != '%s'\n", (ulong_t)i, escape_character(value[i], &s));
2633             stream__write_characters(gen->stream, ' ', indent);
2634             stream__printf(gen->stream, ") goto L%04d;\n", onfail);
2635             stream__write_characters(gen->stream, ' ', indent);
2636             stream__printf(gen->stream, "ctx->cur += " FMT_LU ";\n", (ulong_t)n);
2637             return CODE_REACH__BOTH;
2638         }
2639         else {
2640             stream__write_characters(gen->stream, ' ', indent);
2641             stream__puts(gen->stream, "if (\n");
2642             stream__write_characters(gen->stream, ' ', indent + 4);
2643             stream__puts(gen->stream, "pcc_refill_buffer(ctx, 1) < 1 ||\n");
2644             stream__write_characters(gen->stream, ' ', indent + 4);
2645             stream__printf(gen->stream, "ctx->buffer.buf[ctx->cur] != '%s'\n", escape_character(value[0], &s));
2646             stream__write_characters(gen->stream, ' ', indent);
2647             stream__printf(gen->stream, ") goto L%04d;\n", onfail);
2648             stream__write_characters(gen->stream, ' ', indent);
2649             stream__puts(gen->stream, "ctx->cur++;\n");
2650             return CODE_REACH__BOTH;
2651         }
2652     }
2653     else {
2654         /* no code to generate */
2655         return CODE_REACH__ALWAYS_SUCCEED;
2656     }
2657 }
2658 
generate_matching_charclass_code(generate_t * gen,const char * value,int onfail,size_t indent,bool_t bare)2659 static code_reach_t generate_matching_charclass_code(generate_t *gen, const char *value, int onfail, size_t indent, bool_t bare) {
2660     assert(gen->ascii);
2661     if (value != NULL) {
2662         const size_t n = strlen(value);
2663         if (n > 0) {
2664             char s[5], t[5];
2665             if (n > 1) {
2666                 const bool_t a = (value[0] == '^') ? TRUE : FALSE;
2667                 size_t i = a ? 1 : 0;
2668                 if (i + 1 == n) { /* fulfilled only if a == TRUE */
2669                     stream__write_characters(gen->stream, ' ', indent);
2670                     stream__puts(gen->stream, "if (\n");
2671                     stream__write_characters(gen->stream, ' ', indent + 4);
2672                     stream__puts(gen->stream, "pcc_refill_buffer(ctx, 1) < 1 ||\n");
2673                     stream__write_characters(gen->stream, ' ', indent + 4);
2674                     stream__printf(gen->stream, "ctx->buffer.buf[ctx->cur] == '%s'\n", escape_character(value[i], &s));
2675                     stream__write_characters(gen->stream, ' ', indent);
2676                     stream__printf(gen->stream, ") goto L%04d;\n", onfail);
2677                     stream__write_characters(gen->stream, ' ', indent);
2678                     stream__puts(gen->stream, "ctx->cur++;\n");
2679                     return CODE_REACH__BOTH;
2680                 }
2681                 else {
2682                     if (!bare) {
2683                         stream__write_characters(gen->stream, ' ', indent);
2684                         stream__puts(gen->stream, "{\n");
2685                         indent += 4;
2686                     }
2687                     stream__write_characters(gen->stream, ' ', indent);
2688                     stream__puts(gen->stream, "char c;\n");
2689                     stream__write_characters(gen->stream, ' ', indent);
2690                     stream__printf(gen->stream, "if (pcc_refill_buffer(ctx, 1) < 1) goto L%04d;\n", onfail);
2691                     stream__write_characters(gen->stream, ' ', indent);
2692                     stream__puts(gen->stream, "c = ctx->buffer.buf[ctx->cur];\n");
2693                     if (i + 3 == n && value[i] != '\\' && value[i + 1] == '-') {
2694                         stream__write_characters(gen->stream, ' ', indent);
2695                         stream__printf(gen->stream,
2696                             a ? "if (c >= '%s' && c <= '%s') goto L%04d;\n"
2697                               : "if (!(c >= '%s' && c <= '%s')) goto L%04d;\n",
2698                             escape_character(value[i], &s), escape_character(value[i + 2], &t), onfail);
2699                     }
2700                     else {
2701                         stream__write_characters(gen->stream, ' ', indent);
2702                         stream__puts(gen->stream, a ? "if (\n" : "if (!(\n");
2703                         for (; i < n; i++) {
2704                             stream__write_characters(gen->stream, ' ', indent + 4);
2705                             if (value[i] == '\\' && i + 1 < n) i++;
2706                             if (i + 2 < n && value[i + 1] == '-') {
2707                                 stream__printf(gen->stream, "(c >= '%s' && c <= '%s')%s\n",
2708                                     escape_character(value[i], &s), escape_character(value[i + 2], &t), (i + 3 == n) ? "" : " ||");
2709                                 i += 2;
2710                             }
2711                             else {
2712                                 stream__printf(gen->stream, "c == '%s'%s\n",
2713                                     escape_character(value[i], &s), (i + 1 == n) ? "" : " ||");
2714                             }
2715                         }
2716                         stream__write_characters(gen->stream, ' ', indent);
2717                         stream__printf(gen->stream, a ? ") goto L%04d;\n" : ")) goto L%04d;\n", onfail);
2718                     }
2719                     stream__write_characters(gen->stream, ' ', indent);
2720                     stream__puts(gen->stream, "ctx->cur++;\n");
2721                     if (!bare) {
2722                         indent -= 4;
2723                         stream__write_characters(gen->stream, ' ', indent);
2724                         stream__puts(gen->stream, "}\n");
2725                     }
2726                     return CODE_REACH__BOTH;
2727                 }
2728             }
2729             else {
2730                 stream__write_characters(gen->stream, ' ', indent);
2731                 stream__puts(gen->stream, "if (\n");
2732                 stream__write_characters(gen->stream, ' ', indent + 4);
2733                 stream__puts(gen->stream, "pcc_refill_buffer(ctx, 1) < 1 ||\n");
2734                 stream__write_characters(gen->stream, ' ', indent + 4);
2735                 stream__printf(gen->stream, "ctx->buffer.buf[ctx->cur] != '%s'\n", escape_character(value[0], &s));
2736                 stream__write_characters(gen->stream, ' ', indent);
2737                 stream__printf(gen->stream, ") goto L%04d;\n", onfail);
2738                 stream__write_characters(gen->stream, ' ', indent);
2739                 stream__puts(gen->stream, "ctx->cur++;\n");
2740                 return CODE_REACH__BOTH;
2741             }
2742         }
2743         else {
2744             stream__write_characters(gen->stream, ' ', indent);
2745             stream__printf(gen->stream, "goto L%04d;\n", onfail);
2746             return CODE_REACH__ALWAYS_FAIL;
2747         }
2748     }
2749     else {
2750         stream__write_characters(gen->stream, ' ', indent);
2751         stream__printf(gen->stream, "if (pcc_refill_buffer(ctx, 1) < 1) goto L%04d;\n", onfail);
2752         stream__write_characters(gen->stream, ' ', indent);
2753         stream__puts(gen->stream, "ctx->cur++;\n");
2754         return CODE_REACH__BOTH;
2755     }
2756 }
2757 
generate_matching_utf8_charclass_code(generate_t * gen,const char * value,int onfail,size_t indent,bool_t bare)2758 static code_reach_t generate_matching_utf8_charclass_code(generate_t *gen, const char *value, int onfail, size_t indent, bool_t bare) {
2759     const size_t n = (value != NULL) ? strlen(value) : 0;
2760     if (value == NULL || n > 0) {
2761         const bool_t a = (n > 0 && value[0] == '^') ? TRUE : FALSE;
2762         size_t i = a ? 1 : 0;
2763         if (!bare) {
2764             stream__write_characters(gen->stream, ' ', indent);
2765             stream__puts(gen->stream, "{\n");
2766             indent += 4;
2767         }
2768         stream__write_characters(gen->stream, ' ', indent);
2769         stream__puts(gen->stream, "int u;\n");
2770         stream__write_characters(gen->stream, ' ', indent);
2771         stream__puts(gen->stream, "const size_t n = pcc_get_char_as_utf32(ctx, &u);\n");
2772         stream__write_characters(gen->stream, ' ', indent);
2773         stream__printf(gen->stream, "if (n == 0) goto L%04d;\n", onfail);
2774         if (value != NULL && !(a && n == 1)) { /* not '.' or '[^]' */
2775             int u0 = 0;
2776             bool_t r = FALSE;
2777             stream__write_characters(gen->stream, ' ', indent);
2778             stream__puts(gen->stream, a ? "if (\n" : "if (!(\n");
2779             while (i < n) {
2780                 int u = 0;
2781                 if (value[i] == '\\' && i + 1 < n) i++;
2782                 i += utf8_to_utf32(value + i, &u);
2783                 if (r) { /* character range */
2784                     stream__write_characters(gen->stream, ' ', indent + 4);
2785                     stream__printf(gen->stream, "(u >= 0x%06x && u <= 0x%06x)%s\n", u0, u, (i < n) ? " ||" : "");
2786                     u0 = 0;
2787                     r = FALSE;
2788                 }
2789                 else if (
2790                     value[i] != '-' ||
2791                     i == n - 1 /* the individual '-' character is valid when it is at the first or the last position */
2792                 ) { /* single character */
2793                     stream__write_characters(gen->stream, ' ', indent + 4);
2794                     stream__printf(gen->stream, "u == 0x%06x%s\n", u, (i < n) ? " ||" : "");
2795                     u0 = 0;
2796                     r = FALSE;
2797                 }
2798                 else {
2799                     assert(value[i] == '-');
2800                     i++;
2801                     u0 = u;
2802                     r = TRUE;
2803                 }
2804             }
2805             stream__write_characters(gen->stream, ' ', indent);
2806             stream__printf(gen->stream, a ? ") goto L%04d;\n" : ")) goto L%04d;\n", onfail);
2807         }
2808         stream__write_characters(gen->stream, ' ', indent);
2809         stream__puts(gen->stream, "ctx->cur += n;\n");
2810         if (!bare) {
2811             indent -= 4;
2812             stream__write_characters(gen->stream, ' ', indent);
2813             stream__puts(gen->stream, "}\n");
2814         }
2815         return CODE_REACH__BOTH;
2816     }
2817     else {
2818         stream__write_characters(gen->stream, ' ', indent);
2819         stream__printf(gen->stream, "goto L%04d;\n", onfail);
2820         return CODE_REACH__ALWAYS_FAIL;
2821     }
2822 }
2823 
2824 static code_reach_t generate_code(generate_t *gen, const node_t *node, int onfail, size_t indent, bool_t bare);
2825 
generate_quantifying_code(generate_t * gen,const node_t * expr,int min,int max,int onfail,size_t indent,bool_t bare)2826 static code_reach_t generate_quantifying_code(generate_t *gen, const node_t *expr, int min, int max, int onfail, size_t indent, bool_t bare) {
2827     if (max > 1 || max < 0) {
2828         code_reach_t r;
2829         if (!bare) {
2830             stream__write_characters(gen->stream, ' ', indent);
2831             stream__puts(gen->stream, "{\n");
2832             indent += 4;
2833         }
2834         if (min > 0) {
2835             stream__write_characters(gen->stream, ' ', indent);
2836             stream__puts(gen->stream, "const size_t p0 = ctx->cur;\n");
2837             stream__write_characters(gen->stream, ' ', indent);
2838             stream__puts(gen->stream, "const size_t n0 = chunk->thunks.len;\n");
2839         }
2840         stream__write_characters(gen->stream, ' ', indent);
2841         stream__puts(gen->stream, "int i;\n");
2842         stream__write_characters(gen->stream, ' ', indent);
2843         if (max < 0)
2844             stream__puts(gen->stream, "for (i = 0;; i++) {\n");
2845         else
2846             stream__printf(gen->stream, "for (i = 0; i < %d; i++) {\n", max);
2847         stream__write_characters(gen->stream, ' ', indent + 4);
2848         stream__puts(gen->stream, "const size_t p = ctx->cur;\n");
2849         stream__write_characters(gen->stream, ' ', indent + 4);
2850         stream__puts(gen->stream, "const size_t n = chunk->thunks.len;\n");
2851         {
2852             const int l = ++gen->label;
2853             r = generate_code(gen, expr, l, indent + 4, FALSE);
2854             stream__write_characters(gen->stream, ' ', indent + 4);
2855             stream__puts(gen->stream, "if (ctx->cur == p) break;\n");
2856             if (r != CODE_REACH__ALWAYS_SUCCEED) {
2857                 stream__write_characters(gen->stream, ' ', indent + 4);
2858                 stream__puts(gen->stream, "continue;\n");
2859                 stream__write_characters(gen->stream, ' ', indent);
2860                 stream__printf(gen->stream, "L%04d:;\n", l);
2861                 stream__write_characters(gen->stream, ' ', indent + 4);
2862                 stream__puts(gen->stream, "ctx->cur = p;\n");
2863                 stream__write_characters(gen->stream, ' ', indent + 4);
2864                 stream__puts(gen->stream, "pcc_thunk_array__revert(ctx->auxil, &chunk->thunks, n);\n");
2865                 stream__write_characters(gen->stream, ' ', indent + 4);
2866                 stream__puts(gen->stream, "break;\n");
2867             }
2868         }
2869         stream__write_characters(gen->stream, ' ', indent);
2870         stream__puts(gen->stream, "}\n");
2871         if (min > 0) {
2872             stream__write_characters(gen->stream, ' ', indent);
2873             stream__printf(gen->stream, "if (i < %d) {\n", min);
2874             stream__write_characters(gen->stream, ' ', indent + 4);
2875             stream__puts(gen->stream, "ctx->cur = p0;\n");
2876             stream__write_characters(gen->stream, ' ', indent + 4);
2877             stream__puts(gen->stream, "pcc_thunk_array__revert(ctx->auxil, &chunk->thunks, n0);\n");
2878             stream__write_characters(gen->stream, ' ', indent + 4);
2879             stream__printf(gen->stream, "goto L%04d;\n", onfail);
2880             stream__write_characters(gen->stream, ' ', indent);
2881             stream__puts(gen->stream, "}\n");
2882         }
2883         if (!bare) {
2884             indent -= 4;
2885             stream__write_characters(gen->stream, ' ', indent);
2886             stream__puts(gen->stream, "}\n");
2887         }
2888         return (min > 0) ? ((r == CODE_REACH__ALWAYS_FAIL) ? CODE_REACH__ALWAYS_FAIL : CODE_REACH__BOTH) : CODE_REACH__ALWAYS_SUCCEED;
2889     }
2890     else if (max == 1) {
2891         if (min > 0) {
2892             return generate_code(gen, expr, onfail, indent, bare);
2893         }
2894         else {
2895             if (!bare) {
2896                 stream__write_characters(gen->stream, ' ', indent);
2897                 stream__puts(gen->stream, "{\n");
2898                 indent += 4;
2899             }
2900             stream__write_characters(gen->stream, ' ', indent);
2901             stream__puts(gen->stream, "const size_t p = ctx->cur;\n");
2902             stream__write_characters(gen->stream, ' ', indent);
2903             stream__puts(gen->stream, "const size_t n = chunk->thunks.len;\n");
2904             {
2905                 const int l = ++gen->label;
2906                 if (generate_code(gen, expr, l, indent, FALSE) != CODE_REACH__ALWAYS_SUCCEED) {
2907                     const int m = ++gen->label;
2908                     stream__write_characters(gen->stream, ' ', indent);
2909                     stream__printf(gen->stream, "goto L%04d;\n", m);
2910                     if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
2911                     stream__printf(gen->stream, "L%04d:;\n", l);
2912                     stream__write_characters(gen->stream, ' ', indent);
2913                     stream__puts(gen->stream, "ctx->cur = p;\n");
2914                     stream__write_characters(gen->stream, ' ', indent);
2915                     stream__puts(gen->stream, "pcc_thunk_array__revert(ctx->auxil, &chunk->thunks, n);\n");
2916                     if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
2917                     stream__printf(gen->stream, "L%04d:;\n", m);
2918                 }
2919             }
2920             if (!bare) {
2921                 indent -= 4;
2922                 stream__write_characters(gen->stream, ' ', indent);
2923                 stream__puts(gen->stream, "}\n");
2924             }
2925             return CODE_REACH__ALWAYS_SUCCEED;
2926         }
2927     }
2928     else {
2929         /* no code to generate */
2930         return CODE_REACH__ALWAYS_SUCCEED;
2931     }
2932 }
2933 
generate_predicating_code(generate_t * gen,const node_t * expr,bool_t neg,int onfail,size_t indent,bool_t bare)2934 static code_reach_t generate_predicating_code(generate_t *gen, const node_t *expr, bool_t neg, int onfail, size_t indent, bool_t bare) {
2935     code_reach_t r;
2936     if (!bare) {
2937         stream__write_characters(gen->stream, ' ', indent);
2938         stream__puts(gen->stream, "{\n");
2939         indent += 4;
2940     }
2941     stream__write_characters(gen->stream, ' ', indent);
2942     stream__puts(gen->stream, "const size_t p = ctx->cur;\n");
2943     if (neg) {
2944         const int l = ++gen->label;
2945         r = generate_code(gen, expr, l, indent, FALSE);
2946         if (r != CODE_REACH__ALWAYS_FAIL) {
2947             stream__write_characters(gen->stream, ' ', indent);
2948             stream__puts(gen->stream, "ctx->cur = p;\n");
2949             stream__write_characters(gen->stream, ' ', indent);
2950             stream__printf(gen->stream, "goto L%04d;\n", onfail);
2951         }
2952         if (r != CODE_REACH__ALWAYS_SUCCEED) {
2953             if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
2954             stream__printf(gen->stream, "L%04d:;\n", l);
2955             stream__write_characters(gen->stream, ' ', indent);
2956             stream__puts(gen->stream, "ctx->cur = p;\n");
2957         }
2958         switch (r) {
2959         case CODE_REACH__ALWAYS_SUCCEED: r = CODE_REACH__ALWAYS_FAIL; break;
2960         case CODE_REACH__ALWAYS_FAIL: r = CODE_REACH__ALWAYS_SUCCEED; break;
2961         case CODE_REACH__BOTH: break;
2962         }
2963     }
2964     else {
2965         const int l = ++gen->label;
2966         const int m = ++gen->label;
2967         r = generate_code(gen, expr, l, indent, FALSE);
2968         if (r != CODE_REACH__ALWAYS_FAIL) {
2969             stream__write_characters(gen->stream, ' ', indent);
2970             stream__puts(gen->stream, "ctx->cur = p;\n");
2971         }
2972         if (r == CODE_REACH__BOTH) {
2973             stream__write_characters(gen->stream, ' ', indent);
2974             stream__printf(gen->stream, "goto L%04d;\n", m);
2975         }
2976         if (r != CODE_REACH__ALWAYS_SUCCEED) {
2977             if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
2978             stream__printf(gen->stream, "L%04d:;\n", l);
2979             stream__write_characters(gen->stream, ' ', indent);
2980             stream__puts(gen->stream, "ctx->cur = p;\n");
2981             stream__write_characters(gen->stream, ' ', indent);
2982             stream__printf(gen->stream, "goto L%04d;\n", onfail);
2983         }
2984         if (r == CODE_REACH__BOTH) {
2985             if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
2986             stream__printf(gen->stream, "L%04d:;\n", m);
2987         }
2988     }
2989     if (!bare) {
2990         indent -= 4;
2991         stream__write_characters(gen->stream, ' ', indent);
2992         stream__puts(gen->stream, "}\n");
2993     }
2994     return r;
2995 }
2996 
generate_sequential_code(generate_t * gen,const node_array_t * nodes,int onfail,size_t indent,bool_t bare)2997 static code_reach_t generate_sequential_code(generate_t *gen, const node_array_t *nodes, int onfail, size_t indent, bool_t bare) {
2998     bool_t b = FALSE;
2999     size_t i;
3000     for (i = 0; i < nodes->len; i++) {
3001         switch (generate_code(gen, nodes->buf[i], onfail, indent, FALSE)) {
3002         case CODE_REACH__ALWAYS_FAIL:
3003             if (i + 1 < nodes->len) {
3004                 stream__write_characters(gen->stream, ' ', indent);
3005                 stream__puts(gen->stream, "/* unreachable codes omitted */\n");
3006             }
3007             return CODE_REACH__ALWAYS_FAIL;
3008         case CODE_REACH__ALWAYS_SUCCEED:
3009             break;
3010         default:
3011             b = TRUE;
3012         }
3013     }
3014     return b ? CODE_REACH__BOTH : CODE_REACH__ALWAYS_SUCCEED;
3015 }
3016 
generate_alternative_code(generate_t * gen,const node_array_t * nodes,int onfail,size_t indent,bool_t bare)3017 static code_reach_t generate_alternative_code(generate_t *gen, const node_array_t *nodes, int onfail, size_t indent, bool_t bare) {
3018     bool_t b = FALSE;
3019     int m = ++gen->label;
3020     size_t i;
3021     if (!bare) {
3022         stream__write_characters(gen->stream, ' ', indent);
3023         stream__puts(gen->stream, "{\n");
3024         indent += 4;
3025     }
3026     stream__write_characters(gen->stream, ' ', indent);
3027     stream__puts(gen->stream, "const size_t p = ctx->cur;\n");
3028     stream__write_characters(gen->stream, ' ', indent);
3029     stream__puts(gen->stream, "const size_t n = chunk->thunks.len;\n");
3030     for (i = 0; i < nodes->len; i++) {
3031         const bool_t c = (i + 1 < nodes->len) ? TRUE : FALSE;
3032         const int l = ++gen->label;
3033         switch (generate_code(gen, nodes->buf[i], l, indent, FALSE)) {
3034         case CODE_REACH__ALWAYS_SUCCEED:
3035             if (c) {
3036                 stream__write_characters(gen->stream, ' ', indent);
3037                 stream__puts(gen->stream, "/* unreachable codes omitted */\n");
3038             }
3039             if (b) {
3040                 if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
3041                 stream__printf(gen->stream, "L%04d:;\n", m);
3042             }
3043             if (!bare) {
3044                 indent -= 4;
3045                 stream__write_characters(gen->stream, ' ', indent);
3046                 stream__puts(gen->stream, "}\n");
3047             }
3048             return CODE_REACH__ALWAYS_SUCCEED;
3049         case CODE_REACH__ALWAYS_FAIL:
3050             break;
3051         default:
3052             b = TRUE;
3053             stream__write_characters(gen->stream, ' ', indent);
3054             stream__printf(gen->stream, "goto L%04d;\n", m);
3055         }
3056         if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
3057         stream__printf(gen->stream, "L%04d:;\n", l);
3058         stream__write_characters(gen->stream, ' ', indent);
3059         stream__puts(gen->stream, "ctx->cur = p;\n");
3060         stream__write_characters(gen->stream, ' ', indent);
3061         stream__puts(gen->stream, "pcc_thunk_array__revert(ctx->auxil, &chunk->thunks, n);\n");
3062         if (!c) {
3063             stream__write_characters(gen->stream, ' ', indent);
3064             stream__printf(gen->stream, "goto L%04d;\n", onfail);
3065         }
3066     }
3067     if (b) {
3068         if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
3069         stream__printf(gen->stream, "L%04d:;\n", m);
3070     }
3071     if (!bare) {
3072         indent -= 4;
3073         stream__write_characters(gen->stream, ' ', indent);
3074         stream__puts(gen->stream, "}\n");
3075     }
3076     return b ? CODE_REACH__BOTH : CODE_REACH__ALWAYS_FAIL;
3077 }
3078 
generate_capturing_code(generate_t * gen,const node_t * expr,size_t index,int onfail,size_t indent,bool_t bare)3079 static code_reach_t generate_capturing_code(generate_t *gen, const node_t *expr, size_t index, int onfail, size_t indent, bool_t bare) {
3080     code_reach_t r;
3081     if (!bare) {
3082         stream__write_characters(gen->stream, ' ', indent);
3083         stream__puts(gen->stream, "{\n");
3084         indent += 4;
3085     }
3086     stream__write_characters(gen->stream, ' ', indent);
3087     stream__puts(gen->stream, "const size_t p = ctx->cur;\n");
3088     stream__write_characters(gen->stream, ' ', indent);
3089     stream__puts(gen->stream, "size_t q;\n");
3090     r = generate_code(gen, expr, onfail, indent, FALSE);
3091     stream__write_characters(gen->stream, ' ', indent);
3092     stream__puts(gen->stream, "q = ctx->cur;\n");
3093     stream__write_characters(gen->stream, ' ', indent);
3094     stream__printf(gen->stream, "chunk->capts.buf[" FMT_LU "].range.start = p;\n", (ulong_t)index);
3095     stream__write_characters(gen->stream, ' ', indent);
3096     stream__printf(gen->stream, "chunk->capts.buf[" FMT_LU "].range.end = q;\n", (ulong_t)index);
3097     if (!bare) {
3098         indent -= 4;
3099         stream__write_characters(gen->stream, ' ', indent);
3100         stream__puts(gen->stream, "}\n");
3101     }
3102     return r;
3103 }
3104 
generate_expanding_code(generate_t * gen,size_t index,int onfail,size_t indent,bool_t bare)3105 static code_reach_t generate_expanding_code(generate_t *gen, size_t index, int onfail, size_t indent, bool_t bare) {
3106     if (!bare) {
3107         stream__write_characters(gen->stream, ' ', indent);
3108         stream__puts(gen->stream, "{\n");
3109         indent += 4;
3110     }
3111     stream__write_characters(gen->stream, ' ', indent);
3112     stream__printf(gen->stream,
3113         "const size_t n = chunk->capts.buf[" FMT_LU "].range.end - chunk->capts.buf[" FMT_LU "].range.start;\n", (ulong_t)index, (ulong_t)index);
3114     stream__write_characters(gen->stream, ' ', indent);
3115     stream__printf(gen->stream, "if (pcc_refill_buffer(ctx, n) < n) goto L%04d;\n", onfail);
3116     stream__write_characters(gen->stream, ' ', indent);
3117     stream__puts(gen->stream, "if (n > 0) {\n");
3118     stream__write_characters(gen->stream, ' ', indent + 4);
3119     stream__puts(gen->stream, "const char *const p = ctx->buffer.buf + ctx->cur;\n");
3120     stream__write_characters(gen->stream, ' ', indent + 4);
3121     stream__printf(gen->stream, "const char *const q = ctx->buffer.buf + chunk->capts.buf[" FMT_LU "].range.start;\n", (ulong_t)index);
3122     stream__write_characters(gen->stream, ' ', indent + 4);
3123     stream__puts(gen->stream, "size_t i;\n");
3124     stream__write_characters(gen->stream, ' ', indent + 4);
3125     stream__puts(gen->stream, "for (i = 0; i < n; i++) {\n");
3126     stream__write_characters(gen->stream, ' ', indent + 8);
3127     stream__printf(gen->stream, "if (p[i] != q[i]) goto L%04d;\n", onfail);
3128     stream__write_characters(gen->stream, ' ', indent + 4);
3129     stream__puts(gen->stream, "}\n");
3130     stream__write_characters(gen->stream, ' ', indent + 4);
3131     stream__puts(gen->stream, "ctx->cur += n;\n");
3132     stream__write_characters(gen->stream, ' ', indent);
3133     stream__puts(gen->stream, "}\n");
3134     if (!bare) {
3135         indent -= 4;
3136         stream__write_characters(gen->stream, ' ', indent);
3137         stream__puts(gen->stream, "}\n");
3138     }
3139     return CODE_REACH__BOTH;
3140 }
3141 
generate_thunking_action_code(generate_t * gen,size_t index,const node_const_array_t * vars,const node_const_array_t * capts,bool_t error,int onfail,size_t indent,bool_t bare)3142 static code_reach_t generate_thunking_action_code(
3143     generate_t *gen, size_t index, const node_const_array_t *vars, const node_const_array_t *capts, bool_t error, int onfail, size_t indent, bool_t bare
3144 ) {
3145     assert(gen->rule->type == NODE_RULE);
3146     if (!bare) {
3147         stream__write_characters(gen->stream, ' ', indent);
3148         stream__puts(gen->stream, "{\n");
3149         indent += 4;
3150     }
3151     if (error) {
3152         stream__write_characters(gen->stream, ' ', indent);
3153         stream__puts(gen->stream, "pcc_value_t null;\n");
3154     }
3155     stream__write_characters(gen->stream, ' ', indent);
3156     stream__printf(gen->stream, "pcc_thunk_t *const thunk = pcc_thunk__create_leaf(ctx->auxil, pcc_action_%s_" FMT_LU ", " FMT_LU ", " FMT_LU ");\n",
3157         gen->rule->data.rule.name, (ulong_t)index, (ulong_t)gen->rule->data.rule.vars.len, (ulong_t)gen->rule->data.rule.capts.len);
3158     {
3159         size_t i;
3160         for (i = 0; i < vars->len; i++) {
3161             assert(vars->buf[i]->type == NODE_REFERENCE);
3162             stream__write_characters(gen->stream, ' ', indent);
3163             stream__printf(gen->stream, "thunk->data.leaf.values.buf[" FMT_LU "] = &(chunk->values.buf[" FMT_LU "]);\n",
3164                 (ulong_t)vars->buf[i]->data.reference.index, (ulong_t)vars->buf[i]->data.reference.index);
3165         }
3166         for (i = 0; i < capts->len; i++) {
3167             assert(capts->buf[i]->type == NODE_CAPTURE);
3168             stream__write_characters(gen->stream, ' ', indent);
3169             stream__printf(gen->stream, "thunk->data.leaf.capts.buf[" FMT_LU "] = &(chunk->capts.buf[" FMT_LU "]);\n",
3170                 (ulong_t)capts->buf[i]->data.capture.index, (ulong_t)capts->buf[i]->data.capture.index);
3171         }
3172         stream__write_characters(gen->stream, ' ', indent);
3173         stream__puts(gen->stream, "thunk->data.leaf.capt0.range.start = chunk->pos;\n");
3174         stream__write_characters(gen->stream, ' ', indent);
3175         stream__puts(gen->stream, "thunk->data.leaf.capt0.range.end = ctx->cur;\n");
3176     }
3177     if (error) {
3178         stream__write_characters(gen->stream, ' ', indent);
3179         stream__puts(gen->stream, "memset(&null, 0, sizeof(pcc_value_t)); /* in case */\n");
3180         stream__write_characters(gen->stream, ' ', indent);
3181         stream__puts(gen->stream, "thunk->data.leaf.action(ctx, thunk, &null);\n");
3182         stream__write_characters(gen->stream, ' ', indent);
3183         stream__puts(gen->stream, "pcc_thunk__destroy(ctx->auxil, thunk);\n");
3184     }
3185     else {
3186         stream__write_characters(gen->stream, ' ', indent);
3187         stream__puts(gen->stream, "pcc_thunk_array__add(ctx->auxil, &chunk->thunks, thunk);\n");
3188     }
3189     if (!bare) {
3190         indent -= 4;
3191         stream__write_characters(gen->stream, ' ', indent);
3192         stream__puts(gen->stream, "}\n");
3193     }
3194     return CODE_REACH__ALWAYS_SUCCEED;
3195 }
3196 
generate_thunking_error_code(generate_t * gen,const node_t * expr,size_t index,const node_const_array_t * vars,const node_const_array_t * capts,int onfail,size_t indent,bool_t bare)3197 static code_reach_t generate_thunking_error_code(
3198     generate_t *gen, const node_t *expr, size_t index, const node_const_array_t *vars, const node_const_array_t *capts, int onfail, size_t indent, bool_t bare
3199 ) {
3200     code_reach_t r;
3201     const int l = ++gen->label;
3202     const int m = ++gen->label;
3203     assert(gen->rule->type == NODE_RULE);
3204     if (!bare) {
3205         stream__write_characters(gen->stream, ' ', indent);
3206         stream__puts(gen->stream, "{\n");
3207         indent += 4;
3208     }
3209     r = generate_code(gen, expr, l, indent, TRUE);
3210     stream__write_characters(gen->stream, ' ', indent);
3211     stream__printf(gen->stream, "goto L%04d;\n", m);
3212     if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
3213     stream__printf(gen->stream, "L%04d:;\n", l);
3214     generate_thunking_action_code(gen, index, vars, capts, TRUE, l, indent, FALSE);
3215     stream__write_characters(gen->stream, ' ', indent);
3216     stream__printf(gen->stream, "goto L%04d;\n", onfail);
3217     if (indent > 4) stream__write_characters(gen->stream, ' ', indent - 4);
3218     stream__printf(gen->stream, "L%04d:;\n", m);
3219     if (!bare) {
3220         indent -= 4;
3221         stream__write_characters(gen->stream, ' ', indent);
3222         stream__puts(gen->stream, "}\n");
3223     }
3224     return r;
3225 }
3226 
generate_code(generate_t * gen,const node_t * node,int onfail,size_t indent,bool_t bare)3227 static code_reach_t generate_code(generate_t *gen, const node_t *node, int onfail, size_t indent, bool_t bare) {
3228     if (node == NULL) {
3229         print_error("Internal error [%d]\n", __LINE__);
3230         exit(-1);
3231     }
3232     switch (node->type) {
3233     case NODE_RULE:
3234         print_error("Internal error [%d]\n", __LINE__);
3235         exit(-1);
3236     case NODE_REFERENCE:
3237         if (node->data.reference.index != VOID_VALUE) {
3238             stream__write_characters(gen->stream, ' ', indent);
3239             stream__printf(gen->stream, "if (!pcc_apply_rule(ctx, pcc_evaluate_rule_%s, &chunk->thunks, &(chunk->values.buf[" FMT_LU "]))) goto L%04d;\n",
3240                 node->data.reference.name, (ulong_t)node->data.reference.index, onfail);
3241         }
3242         else {
3243             stream__write_characters(gen->stream, ' ', indent);
3244             stream__printf(gen->stream, "if (!pcc_apply_rule(ctx, pcc_evaluate_rule_%s, &chunk->thunks, NULL)) goto L%04d;\n",
3245                 node->data.reference.name, onfail);
3246         }
3247         return CODE_REACH__BOTH;
3248     case NODE_STRING:
3249         return generate_matching_string_code(gen, node->data.string.value, onfail, indent, bare);
3250     case NODE_CHARCLASS:
3251         return gen->ascii ?
3252                generate_matching_charclass_code(gen, node->data.charclass.value, onfail, indent, bare) :
3253                generate_matching_utf8_charclass_code(gen, node->data.charclass.value, onfail, indent, bare);
3254     case NODE_QUANTITY:
3255         return generate_quantifying_code(gen, node->data.quantity.expr, node->data.quantity.min, node->data.quantity.max, onfail, indent, bare);
3256     case NODE_PREDICATE:
3257         return generate_predicating_code(gen, node->data.predicate.expr, node->data.predicate.neg, onfail, indent, bare);
3258     case NODE_SEQUENCE:
3259         return generate_sequential_code(gen, &node->data.sequence.nodes, onfail, indent, bare);
3260     case NODE_ALTERNATE:
3261         return generate_alternative_code(gen, &node->data.alternate.nodes, onfail, indent, bare);
3262     case NODE_CAPTURE:
3263         return generate_capturing_code(gen, node->data.capture.expr, node->data.capture.index, onfail, indent, bare);
3264     case NODE_EXPAND:
3265         return generate_expanding_code(gen, node->data.expand.index, onfail, indent, bare);
3266     case NODE_ACTION:
3267         return generate_thunking_action_code(
3268             gen, node->data.action.index, &node->data.action.vars, &node->data.action.capts, FALSE, onfail, indent, bare
3269         );
3270     case NODE_ERROR:
3271         return generate_thunking_error_code(
3272             gen, node->data.error.expr, node->data.error.index, &node->data.error.vars, &node->data.error.capts, onfail, indent, bare
3273         );
3274     default:
3275         print_error("Internal error [%d]\n", __LINE__);
3276         exit(-1);
3277     }
3278 }
3279 
generate(context_t * ctx)3280 static bool_t generate(context_t *ctx) {
3281     const char *const vt = get_value_type(ctx);
3282     const char *const at = get_auxil_type(ctx);
3283     const bool_t vp = is_pointer_type(vt);
3284     const bool_t ap = is_pointer_type(at);
3285     stream_t sstream = stream__wrap(fopen_wt_e(ctx->sname), ctx->sname, ctx->opts.lines ? 0 : VOID_VALUE);
3286     stream_t hstream = stream__wrap(fopen_wt_e(ctx->hname), ctx->hname, ctx->opts.lines ? 0 : VOID_VALUE);
3287     stream__printf(&sstream, "/* A packrat parser generated by PackCC %s */\n\n", VERSION);
3288     stream__printf(&hstream, "/* A packrat parser generated by PackCC %s */\n\n", VERSION);
3289     {
3290         {
3291             size_t i;
3292             for (i = 0; i < ctx->eheader.len; i++) {
3293                 stream__write_code_block(&hstream, ctx->eheader.buf[i].text, ctx->eheader.buf[i].len, 0, ctx->iname, ctx->eheader.buf[i].line);
3294             }
3295         }
3296         if (ctx->eheader.len > 0) stream__puts(&hstream, "\n");
3297         stream__printf(
3298             &hstream,
3299             "#ifndef PCC_INCLUDED_%s\n"
3300             "#define PCC_INCLUDED_%s\n"
3301             "\n",
3302             ctx->hid, ctx->hid
3303         );
3304         {
3305             size_t i;
3306             for (i = 0; i < ctx->header.len; i++) {
3307                 stream__write_code_block(&hstream, ctx->header.buf[i].text, ctx->header.buf[i].len, 0, ctx->iname, ctx->header.buf[i].line);
3308             }
3309         }
3310     }
3311     {
3312         {
3313             size_t i;
3314             for (i = 0; i < ctx->esource.len; i++) {
3315                 stream__write_code_block(&sstream, ctx->esource.buf[i].text, ctx->esource.buf[i].len, 0, ctx->iname, ctx->esource.buf[i].line);
3316             }
3317         }
3318         if (ctx->esource.len > 0) stream__puts(&sstream, "\n");
3319         stream__puts(
3320             &sstream,
3321             "#ifdef _MSC_VER\n"
3322             "#undef _CRT_SECURE_NO_WARNINGS\n"
3323             "#define _CRT_SECURE_NO_WARNINGS\n"
3324             "#endif /* _MSC_VER */\n"
3325             "#include <stdio.h>\n"
3326             "#include <stdlib.h>\n"
3327             "#include <string.h>\n"
3328             "\n"
3329             "#ifndef _MSC_VER\n"
3330             "#if defined __GNUC__ && defined _WIN32 /* MinGW */\n"
3331             "#ifndef PCC_USE_SYSTEM_STRNLEN\n"
3332             "#define strnlen(str, maxlen) pcc_strnlen(str, maxlen)\n"
3333             "static size_t pcc_strnlen(const char *str, size_t maxlen) {\n"
3334             "    size_t i;\n"
3335             "    for (i = 0; i < maxlen && str[i]; i++);\n"
3336             "    return i;\n"
3337             "}\n"
3338             "#endif /* !PCC_USE_SYSTEM_STRNLEN */\n"
3339             "#endif /* defined __GNUC__ && defined _WIN32 */\n"
3340             "#endif /* !_MSC_VER */\n"
3341             "\n"
3342         );
3343         stream__printf(
3344             &sstream,
3345             "#include \"%s\"\n"
3346             "\n",
3347             ctx->hname
3348         );
3349         {
3350             size_t i;
3351             for (i = 0; i < ctx->source.len; i++) {
3352                 stream__write_code_block(&sstream, ctx->source.buf[i].text, ctx->source.buf[i].len, 0, ctx->iname, ctx->source.buf[i].line);
3353             }
3354         }
3355     }
3356     {
3357         stream__puts(
3358             &sstream,
3359             "#if !defined __has_attribute || defined _MSC_VER\n"
3360             "#define __attribute__(x)\n"
3361             "#endif\n"
3362             "\n"
3363             "#ifdef _MSC_VER\n"
3364             "#define MARK_FUNC_AS_USED __pragma(warning(suppress:4505))\n"
3365             "#else\n"
3366             "#define MARK_FUNC_AS_USED __attribute__((__unused__))\n"
3367             "#endif\n"
3368             "\n"
3369             "#ifndef PCC_BUFFER_MIN_SIZE\n"
3370             "#define PCC_BUFFER_MIN_SIZE 256\n"
3371             "#endif /* !PCC_BUFFER_MIN_SIZE */\n"
3372             "\n"
3373             "#ifndef PCC_ARRAY_MIN_SIZE\n"
3374             "#define PCC_ARRAY_MIN_SIZE 2\n"
3375             "#endif /* !PCC_ARRAY_MIN_SIZE */\n"
3376             "\n"
3377             "#ifndef PCC_POOL_MIN_SIZE\n"
3378             "#define PCC_POOL_MIN_SIZE 65536\n"
3379             "#endif /* !PCC_POOL_MIN_SIZE */\n"
3380             "\n"
3381             "#define PCC_DBG_EVALUATE 0\n"
3382             "#define PCC_DBG_MATCH    1\n"
3383             "#define PCC_DBG_NOMATCH  2\n"
3384             "\n"
3385             "#define PCC_VOID_VALUE (~(size_t)0)\n"
3386             "\n"
3387             "typedef enum pcc_bool_tag {\n"
3388             "    PCC_FALSE = 0,\n"
3389             "    PCC_TRUE\n"
3390             "} pcc_bool_t;\n"
3391             "\n"
3392             "typedef struct pcc_char_array_tag {\n"
3393             "    char *buf;\n"
3394             "    size_t max;\n"
3395             "    size_t len;\n"
3396             "} pcc_char_array_t;\n"
3397             "\n"
3398             "typedef struct pcc_range_tag {\n"
3399             "    size_t start;\n"
3400             "    size_t end;\n"
3401             "} pcc_range_t;\n"
3402             "\n"
3403         );
3404         stream__printf(
3405             &sstream,
3406             "typedef %s%spcc_value_t;\n"
3407             "\n",
3408             vt, vp ? "" : " "
3409         );
3410         stream__printf(
3411             &sstream,
3412             "typedef %s%spcc_auxil_t;\n"
3413             "\n",
3414             at, ap ? "" : " "
3415         );
3416         if (strcmp(get_prefix(ctx), "pcc") != 0) {
3417             stream__printf(
3418                 &sstream,
3419                 "typedef %s_context_t pcc_context_t;\n"
3420                 "\n",
3421                 get_prefix(ctx)
3422             );
3423         }
3424         stream__puts(
3425             &sstream,
3426             "typedef struct pcc_value_table_tag {\n"
3427             "    pcc_value_t *buf;\n"
3428             "    size_t max;\n"
3429             "    size_t len;\n"
3430             "} pcc_value_table_t;\n"
3431             "\n"
3432             "typedef struct pcc_value_refer_table_tag {\n"
3433             "    pcc_value_t **buf;\n"
3434             "    size_t max;\n"
3435             "    size_t len;\n"
3436             "} pcc_value_refer_table_t;\n"
3437             "\n"
3438             "typedef struct pcc_capture_tag {\n"
3439             "    pcc_range_t range;\n"
3440             "    char *string; /* mutable */\n"
3441             "} pcc_capture_t;\n"
3442             "\n"
3443             "typedef struct pcc_capture_table_tag {\n"
3444             "    pcc_capture_t *buf;\n"
3445             "    size_t max;\n"
3446             "    size_t len;\n"
3447             "} pcc_capture_table_t;\n"
3448             "\n"
3449             "typedef struct pcc_capture_const_table_tag {\n"
3450             "    const pcc_capture_t **buf;\n"
3451             "    size_t max;\n"
3452             "    size_t len;\n"
3453             "} pcc_capture_const_table_t;\n"
3454             "\n"
3455             "typedef struct pcc_thunk_tag pcc_thunk_t;\n"
3456             "typedef struct pcc_thunk_array_tag pcc_thunk_array_t;\n"
3457             "\n"
3458             "typedef void (*pcc_action_t)(pcc_context_t *, pcc_thunk_t *, pcc_value_t *);\n"
3459             "\n"
3460         );
3461         stream__puts(
3462             &sstream,
3463             "typedef enum pcc_thunk_type_tag {\n"
3464             "    PCC_THUNK_LEAF,\n"
3465             "    PCC_THUNK_NODE\n"
3466             "} pcc_thunk_type_t;\n"
3467             "\n"
3468             "typedef struct pcc_thunk_leaf_tag {\n"
3469             "    pcc_value_refer_table_t values;\n"
3470             "    pcc_capture_const_table_t capts;\n"
3471             "    pcc_capture_t capt0;\n"
3472             "    pcc_action_t action;\n"
3473             "} pcc_thunk_leaf_t;\n"
3474             "\n"
3475             "typedef struct pcc_thunk_node_tag {\n"
3476             "    const pcc_thunk_array_t *thunks; /* just a reference */\n"
3477             "    pcc_value_t *value; /* just a reference */\n"
3478             "} pcc_thunk_node_t;\n"
3479             "\n"
3480             "typedef union pcc_thunk_data_tag {\n"
3481             "    pcc_thunk_leaf_t leaf;\n"
3482             "    pcc_thunk_node_t node;\n"
3483             "} pcc_thunk_data_t;\n"
3484             "\n"
3485             "struct pcc_thunk_tag {\n"
3486             "    pcc_thunk_type_t type;\n"
3487             "    pcc_thunk_data_t data;\n"
3488             "};\n"
3489             "\n"
3490             "struct pcc_thunk_array_tag {\n"
3491             "    pcc_thunk_t **buf;\n"
3492             "    size_t max;\n"
3493             "    size_t len;\n"
3494             "};\n"
3495             "\n"
3496             "typedef struct pcc_thunk_chunk_tag {\n"
3497             "    pcc_value_table_t values;\n"
3498             "    pcc_capture_table_t capts;\n"
3499             "    pcc_thunk_array_t thunks;\n"
3500             "    size_t pos; /* the starting position in the character buffer */\n"
3501             "} pcc_thunk_chunk_t;\n"
3502             "\n"
3503             "typedef struct pcc_lr_entry_tag pcc_lr_entry_t;\n"
3504             "\n"
3505             "typedef enum pcc_lr_answer_type_tag {\n"
3506             "    PCC_LR_ANSWER_LR,\n"
3507             "    PCC_LR_ANSWER_CHUNK\n"
3508             "} pcc_lr_answer_type_t;\n"
3509             "\n"
3510             "typedef union pcc_lr_answer_data_tag {\n"
3511             "    pcc_lr_entry_t *lr;\n"
3512             "    pcc_thunk_chunk_t *chunk;\n"
3513             "} pcc_lr_answer_data_t;\n"
3514             "\n"
3515             "typedef struct pcc_lr_answer_tag pcc_lr_answer_t;\n"
3516             "\n"
3517             "struct pcc_lr_answer_tag {\n"
3518             "    pcc_lr_answer_type_t type;\n"
3519             "    pcc_lr_answer_data_t data;\n"
3520             "    size_t pos; /* the absolute position in the input */\n"
3521             "    pcc_lr_answer_t *hold;\n"
3522             "};\n"
3523             "\n"
3524         );
3525         stream__puts(
3526             &sstream,
3527             "typedef pcc_thunk_chunk_t *(*pcc_rule_t)(pcc_context_t *);\n"
3528             "\n"
3529             "typedef struct pcc_rule_set_tag {\n"
3530             "    pcc_rule_t *buf;\n"
3531             "    size_t max;\n"
3532             "    size_t len;\n"
3533             "} pcc_rule_set_t;\n"
3534             "\n"
3535             "typedef struct pcc_lr_head_tag pcc_lr_head_t;\n"
3536             "\n"
3537             "struct pcc_lr_head_tag {\n"
3538             "    pcc_rule_t rule;\n"
3539             "    pcc_rule_set_t invol;\n"
3540             "    pcc_rule_set_t eval;\n"
3541             "    pcc_lr_head_t *hold;\n"
3542             "};\n"
3543             "\n"
3544             "typedef struct pcc_lr_memo_tag {\n"
3545             "    pcc_rule_t rule;\n"
3546             "    pcc_lr_answer_t *answer;\n"
3547             "} pcc_lr_memo_t;\n"
3548             "\n"
3549             "typedef struct pcc_lr_memo_map_tag {\n"
3550             "    pcc_lr_memo_t *buf;\n"
3551             "    size_t max;\n"
3552             "    size_t len;\n"
3553             "} pcc_lr_memo_map_t;\n"
3554             "\n"
3555             "typedef struct pcc_lr_table_entry_tag {\n"
3556             "    pcc_lr_head_t *head; /* just a reference */\n"
3557             "    pcc_lr_memo_map_t memos;\n"
3558             "    pcc_lr_answer_t *hold_a;\n"
3559             "    pcc_lr_head_t *hold_h;\n"
3560             "} pcc_lr_table_entry_t;\n"
3561             "\n"
3562             "typedef struct pcc_lr_table_tag {\n"
3563             "    pcc_lr_table_entry_t **buf;\n"
3564             "    size_t max;\n"
3565             "    size_t len;\n"
3566             "    size_t ofs;\n"
3567             "} pcc_lr_table_t;\n"
3568             "\n"
3569             "struct pcc_lr_entry_tag {\n"
3570             "    pcc_rule_t rule;\n"
3571             "    pcc_thunk_chunk_t *seed; /* just a reference */\n"
3572             "    pcc_lr_head_t *head; /* just a reference */\n"
3573             "};\n"
3574             "\n"
3575             "typedef struct pcc_lr_stack_tag {\n"
3576             "    pcc_lr_entry_t **buf;\n"
3577             "    size_t max;\n"
3578             "    size_t len;\n"
3579             "} pcc_lr_stack_t;\n"
3580             "\n"
3581         );
3582         stream__puts(
3583             &sstream,
3584             "typedef struct pcc_memory_entry_tag pcc_memory_entry_t;\n"
3585             "typedef struct pcc_memory_pool_tag pcc_memory_pool_t;\n"
3586             "\n"
3587             "struct pcc_memory_entry_tag {\n"
3588             "    pcc_memory_entry_t *next;\n"
3589             "};\n"
3590             "\n"
3591             "struct pcc_memory_pool_tag {\n"
3592             "    pcc_memory_pool_t *next;\n"
3593             "    size_t allocated;\n"
3594             "    size_t unused;\n"
3595             "};\n"
3596             "\n"
3597             "typedef struct pcc_memory_recycler_tag {\n"
3598             "    pcc_memory_pool_t *pool_list;\n"
3599             "    pcc_memory_entry_t *entry_list;\n"
3600             "    size_t element_size;\n"
3601             "} pcc_memory_recycler_t;\n"
3602             "\n"
3603         );
3604         stream__printf(
3605             &sstream,
3606             "struct %s_context_tag {\n"
3607             "    size_t pos; /* the position in the input of the first character currently buffered */\n"
3608             "    size_t cur; /* the current parsing position in the character buffer */\n"
3609             "    size_t level;\n"
3610             "    pcc_char_array_t buffer;\n"
3611             "    pcc_lr_table_t lrtable;\n"
3612             "    pcc_lr_stack_t lrstack;\n"
3613             "    pcc_thunk_array_t thunks;\n"
3614             "    pcc_auxil_t auxil;\n"
3615             "    pcc_memory_recycler_t thunk_chunk_recycler;\n"
3616             "    pcc_memory_recycler_t lr_head_recycler;\n"
3617             "    pcc_memory_recycler_t lr_answer_recycler;\n"
3618             "};\n"
3619             "\n",
3620             get_prefix(ctx)
3621         );
3622         stream__puts(
3623             &sstream,
3624             "#ifndef PCC_ERROR\n"
3625             "#define PCC_ERROR(auxil) pcc_error()\n"
3626             "MARK_FUNC_AS_USED\n"
3627             "static void pcc_error(void) {\n"
3628             "    fprintf(stderr, \"Syntax error\\n\");\n"
3629             "    exit(1);\n"
3630             "}\n"
3631             "#endif /* !PCC_ERROR */\n"
3632             "\n"
3633             "#ifndef PCC_GETCHAR\n"
3634             "#define PCC_GETCHAR(auxil) getchar()\n"
3635             "#endif /* !PCC_GETCHAR */\n"
3636             "\n"
3637             "#ifndef PCC_MALLOC\n"
3638             "#define PCC_MALLOC(auxil, size) pcc_malloc_e(size)\n"
3639             "static void *pcc_malloc_e(size_t size) {\n"
3640             "    void *const p = malloc(size);\n"
3641             "    if (p == NULL) {\n"
3642             "        fprintf(stderr, \"Out of memory\\n\");\n"
3643             "        exit(1);\n"
3644             "    }\n"
3645             "    return p;\n"
3646             "}\n"
3647             "#endif /* !PCC_MALLOC */\n"
3648             "\n"
3649             "#ifndef PCC_REALLOC\n"
3650             "#define PCC_REALLOC(auxil, ptr, size) pcc_realloc_e(ptr, size)\n"
3651             "static void *pcc_realloc_e(void *ptr, size_t size) {\n"
3652             "    void *const p = realloc(ptr, size);\n"
3653             "    if (p == NULL) {\n"
3654             "        fprintf(stderr, \"Out of memory\\n\");\n"
3655             "        exit(1);\n"
3656             "    }\n"
3657             "    return p;\n"
3658             "}\n"
3659             "#endif /* !PCC_REALLOC */\n"
3660             "\n"
3661             "#ifndef PCC_FREE\n"
3662             "#define PCC_FREE(auxil, ptr) free(ptr)\n"
3663             "#endif /* !PCC_FREE */\n"
3664             "\n"
3665             "#ifndef PCC_DEBUG\n"
3666             "#define PCC_DEBUG(auxil, event, rule, level, pos, buffer, length) ((void)0)\n"
3667             "#endif /* !PCC_DEBUG */\n"
3668             "\n"
3669             "static char *pcc_strndup_e(pcc_auxil_t auxil, const char *str, size_t len) {\n"
3670             "    const size_t m = strnlen(str, len);\n"
3671             "    char *const s = (char *)PCC_MALLOC(auxil, m + 1);\n"
3672             "    memcpy(s, str, m);\n"
3673             "    s[m] = '\\0';\n"
3674             "    return s;\n"
3675             "}\n"
3676             "\n"
3677         );
3678         stream__puts(
3679             &sstream,
3680             "static void pcc_char_array__init(pcc_auxil_t auxil, pcc_char_array_t *array) {\n"
3681             "    array->len = 0;\n"
3682             "    array->max = 0;\n"
3683             "    array->buf = NULL;\n"
3684             "}\n"
3685             "\n"
3686             "static void pcc_char_array__add(pcc_auxil_t auxil, pcc_char_array_t *array, char ch) {\n"
3687             "    if (array->max <= array->len) {\n"
3688             "        const size_t n = array->len + 1;\n"
3689             "        size_t m = array->max;\n"
3690             "        if (m == 0) m = PCC_BUFFER_MIN_SIZE;\n"
3691             "        while (m < n && m != 0) m <<= 1;\n"
3692             "        if (m == 0) m = n;\n"
3693             "        array->buf = (char *)PCC_REALLOC(auxil, array->buf, m);\n"
3694             "        array->max = m;\n"
3695             "    }\n"
3696             "    array->buf[array->len++] = ch;\n"
3697             "}\n"
3698             "\n"
3699             "static void pcc_char_array__term(pcc_auxil_t auxil, pcc_char_array_t *array) {\n"
3700             "    PCC_FREE(auxil, array->buf);\n"
3701             "}\n"
3702             "\n"
3703         );
3704         stream__puts(
3705             &sstream,
3706             "static void pcc_value_table__init(pcc_auxil_t auxil, pcc_value_table_t *table) {\n"
3707             "    table->len = 0;\n"
3708             "    table->max = 0;\n"
3709             "    table->buf = NULL;\n"
3710             "}\n"
3711             "\n"
3712             "MARK_FUNC_AS_USED\n"
3713             "static void pcc_value_table__resize(pcc_auxil_t auxil, pcc_value_table_t *table, size_t len) {\n"
3714             "    if (table->max < len) {\n"
3715             "        size_t m = table->max;\n"
3716             "        if (m == 0) m = PCC_ARRAY_MIN_SIZE;\n"
3717             "        while (m < len && m != 0) m <<= 1;\n"
3718             "        if (m == 0) m = len;\n"
3719             "        table->buf = (pcc_value_t *)PCC_REALLOC(auxil, table->buf, sizeof(pcc_value_t) * m);\n"
3720             "        table->max = m;\n"
3721             "    }\n"
3722             "    table->len = len;\n"
3723             "}\n"
3724             "\n"
3725             "MARK_FUNC_AS_USED\n"
3726             "static void pcc_value_table__clear(pcc_auxil_t auxil, pcc_value_table_t *table) {\n"
3727             "    memset(table->buf, 0, sizeof(pcc_value_t) * table->len);\n"
3728             "}\n"
3729             "\n"
3730             "static void pcc_value_table__term(pcc_auxil_t auxil, pcc_value_table_t *table) {\n"
3731             "    PCC_FREE(auxil, table->buf);\n"
3732             "}\n"
3733             "\n"
3734         );
3735         stream__puts(
3736             &sstream,
3737             "static void pcc_value_refer_table__init(pcc_auxil_t auxil, pcc_value_refer_table_t *table) {\n"
3738             "    table->len = 0;\n"
3739             "    table->max = 0;\n"
3740             "    table->buf = NULL;\n"
3741             "}\n"
3742             "\n"
3743             "static void pcc_value_refer_table__resize(pcc_auxil_t auxil, pcc_value_refer_table_t *table, size_t len) {\n"
3744             "    size_t i;\n"
3745             "    if (table->max < len) {\n"
3746             "        size_t m = table->max;\n"
3747             "        if (m == 0) m = PCC_ARRAY_MIN_SIZE;\n"
3748             "        while (m < len && m != 0) m <<= 1;\n"
3749             "        if (m == 0) m = len;\n"
3750             "        table->buf = (pcc_value_t **)PCC_REALLOC(auxil, table->buf, sizeof(pcc_value_t *) * m);\n"
3751             "        table->max = m;\n"
3752             "    }\n"
3753             "    for (i = table->len; i < len; i++) table->buf[i] = NULL;\n"
3754             "    table->len = len;\n"
3755             "}\n"
3756             "\n"
3757             "static void pcc_value_refer_table__term(pcc_auxil_t auxil, pcc_value_refer_table_t *table) {\n"
3758             "    PCC_FREE(auxil, table->buf);\n"
3759             "}\n"
3760             "\n"
3761         );
3762         stream__puts(
3763             &sstream,
3764             "static void pcc_capture_table__init(pcc_auxil_t auxil, pcc_capture_table_t *table) {\n"
3765             "    table->len = 0;\n"
3766             "    table->max = 0;\n"
3767             "    table->buf = NULL;\n"
3768             "}\n"
3769             "\n"
3770             "MARK_FUNC_AS_USED\n"
3771             "static void pcc_capture_table__resize(pcc_auxil_t auxil, pcc_capture_table_t *table, size_t len) {\n"
3772             "    size_t i;\n"
3773             "    for (i = len; i < table->len; i++) PCC_FREE(auxil, table->buf[i].string);\n"
3774             "    if (table->max < len) {\n"
3775             "        size_t m = table->max;\n"
3776             "        if (m == 0) m = PCC_ARRAY_MIN_SIZE;\n"
3777             "        while (m < len && m != 0) m <<= 1;\n"
3778             "        if (m == 0) m = len;\n"
3779             "        table->buf = (pcc_capture_t *)PCC_REALLOC(auxil, table->buf, sizeof(pcc_capture_t) * m);\n"
3780             "        table->max = m;\n"
3781             "    }\n"
3782             "    for (i = table->len; i < len; i++) {\n"
3783             "        table->buf[i].range.start = 0;\n"
3784             "        table->buf[i].range.end = 0;\n"
3785             "        table->buf[i].string = NULL;\n"
3786             "    }\n"
3787             "    table->len = len;\n"
3788             "}\n"
3789             "\n"
3790             "static void pcc_capture_table__term(pcc_auxil_t auxil, pcc_capture_table_t *table) {\n"
3791             "    while (table->len > 0) {\n"
3792             "        table->len--;\n"
3793             "        PCC_FREE(auxil, table->buf[table->len].string);\n"
3794             "    }\n"
3795             "    PCC_FREE(auxil, table->buf);\n"
3796             "}\n"
3797             "\n"
3798         );
3799         stream__puts(
3800             &sstream,
3801             "static void pcc_capture_const_table__init(pcc_auxil_t auxil, pcc_capture_const_table_t *table) {\n"
3802             "    table->len = 0;\n"
3803             "    table->max = 0;\n"
3804             "    table->buf = NULL;\n"
3805             "}\n"
3806             "\n"
3807             "static void pcc_capture_const_table__resize(pcc_auxil_t auxil, pcc_capture_const_table_t *table, size_t len) {\n"
3808             "    size_t i;\n"
3809             "    if (table->max < len) {\n"
3810             "        size_t m = table->max;\n"
3811             "        if (m == 0) m = PCC_ARRAY_MIN_SIZE;\n"
3812             "        while (m < len && m != 0) m <<= 1;\n"
3813             "        if (m == 0) m = len;\n"
3814             "        table->buf = (const pcc_capture_t **)PCC_REALLOC(auxil, (pcc_capture_t **)table->buf, sizeof(const pcc_capture_t *) * m);\n"
3815             "        table->max = m;\n"
3816             "    }\n"
3817             "    for (i = table->len; i < len; i++) table->buf[i] = NULL;\n"
3818             "    table->len = len;\n"
3819             "}\n"
3820             "\n"
3821             "static void pcc_capture_const_table__term(pcc_auxil_t auxil, pcc_capture_const_table_t *table) {\n"
3822             "    PCC_FREE(auxil, (void *)table->buf);\n"
3823             "}\n"
3824             "\n"
3825         );
3826         stream__puts(
3827             &sstream,
3828             "MARK_FUNC_AS_USED\n"
3829             "static pcc_thunk_t *pcc_thunk__create_leaf(pcc_auxil_t auxil, pcc_action_t action, size_t valuec, size_t captc) {\n"
3830             "    pcc_thunk_t *const thunk = (pcc_thunk_t *)PCC_MALLOC(auxil, sizeof(pcc_thunk_t));\n"
3831             "    thunk->type = PCC_THUNK_LEAF;\n"
3832             "    pcc_value_refer_table__init(auxil, &thunk->data.leaf.values);\n"
3833             "    pcc_value_refer_table__resize(auxil, &thunk->data.leaf.values, valuec);\n"
3834             "    pcc_capture_const_table__init(auxil, &thunk->data.leaf.capts);\n"
3835             "    pcc_capture_const_table__resize(auxil, &thunk->data.leaf.capts, captc);\n"
3836             "    thunk->data.leaf.capt0.range.start = 0;\n"
3837             "    thunk->data.leaf.capt0.range.end = 0;\n"
3838             "    thunk->data.leaf.capt0.string = NULL;\n"
3839             "    thunk->data.leaf.action = action;\n"
3840             "    return thunk;\n"
3841             "}\n"
3842             "\n"
3843             "static pcc_thunk_t *pcc_thunk__create_node(pcc_auxil_t auxil, const pcc_thunk_array_t *thunks, pcc_value_t *value) {\n"
3844             "    pcc_thunk_t *const thunk = (pcc_thunk_t *)PCC_MALLOC(auxil, sizeof(pcc_thunk_t));\n"
3845             "    thunk->type = PCC_THUNK_NODE;\n"
3846             "    thunk->data.node.thunks = thunks;\n"
3847             "    thunk->data.node.value = value;\n"
3848             "    return thunk;\n"
3849             "}\n"
3850             "\n"
3851             "static void pcc_thunk__destroy(pcc_auxil_t auxil, pcc_thunk_t *thunk) {\n"
3852             "    if (thunk == NULL) return;\n"
3853             "    switch (thunk->type) {\n"
3854             "    case PCC_THUNK_LEAF:\n"
3855             "        PCC_FREE(auxil, thunk->data.leaf.capt0.string);\n"
3856             "        pcc_capture_const_table__term(auxil, &thunk->data.leaf.capts);\n"
3857             "        pcc_value_refer_table__term(auxil, &thunk->data.leaf.values);\n"
3858             "        break;\n"
3859             "    case PCC_THUNK_NODE:\n"
3860             "        break;\n"
3861             "    default: /* unknown */\n"
3862             "        break;\n"
3863             "    }\n"
3864             "    PCC_FREE(auxil, thunk);\n"
3865             "}\n"
3866             "\n"
3867         );
3868         stream__puts(
3869             &sstream,
3870             "static void pcc_thunk_array__init(pcc_auxil_t auxil, pcc_thunk_array_t *array) {\n"
3871             "    array->len = 0;\n"
3872             "    array->max = 0;\n"
3873             "    array->buf = NULL;\n"
3874             "}\n"
3875             "\n"
3876             "static void pcc_thunk_array__add(pcc_auxil_t auxil, pcc_thunk_array_t *array, pcc_thunk_t *thunk) {\n"
3877             "    if (array->max <= array->len) {\n"
3878             "        const size_t n = array->len + 1;\n"
3879             "        size_t m = array->max;\n"
3880             "        if (m == 0) m = PCC_ARRAY_MIN_SIZE;\n"
3881             "        while (m < n && m != 0) m <<= 1;\n"
3882             "        if (m == 0) m = n;\n"
3883             "        array->buf = (pcc_thunk_t **)PCC_REALLOC(auxil, array->buf, sizeof(pcc_thunk_t *) * m);\n"
3884             "        array->max = m;\n"
3885             "    }\n"
3886             "    array->buf[array->len++] = thunk;\n"
3887             "}\n"
3888             "\n"
3889             "static void pcc_thunk_array__revert(pcc_auxil_t auxil, pcc_thunk_array_t *array, size_t len) {\n"
3890             "    while (array->len > len) {\n"
3891             "        array->len--;\n"
3892             "        pcc_thunk__destroy(auxil, array->buf[array->len]);\n"
3893             "    }\n"
3894             "}\n"
3895             "\n"
3896             "static void pcc_thunk_array__term(pcc_auxil_t auxil, pcc_thunk_array_t *array) {\n"
3897             "    while (array->len > 0) {\n"
3898             "        array->len--;\n"
3899             "        pcc_thunk__destroy(auxil, array->buf[array->len]);\n"
3900             "    }\n"
3901             "    PCC_FREE(auxil, array->buf);\n"
3902             "}\n"
3903             "\n"
3904         );
3905         stream__puts(
3906             &sstream,
3907             "static void pcc_memory_recycler__init(pcc_auxil_t auxil, pcc_memory_recycler_t *recycler, size_t element_size) {\n"
3908             "    recycler->pool_list = NULL;\n"
3909             "    recycler->entry_list = NULL;\n"
3910             "    recycler->element_size = element_size;\n"
3911             "}\n"
3912             "\n"
3913             "static void *pcc_memory_recycler__supply(pcc_auxil_t auxil, pcc_memory_recycler_t *recycler) {\n"
3914             "    if (recycler->entry_list) {\n"
3915             "        pcc_memory_entry_t *const tmp = recycler->entry_list;\n"
3916             "        recycler->entry_list = tmp->next;\n"
3917             "        return tmp;\n"
3918             "    }\n"
3919             "    if (!recycler->pool_list || recycler->pool_list->unused == 0) {\n"
3920             "        size_t size = PCC_POOL_MIN_SIZE;\n"
3921             "        if (recycler->pool_list) {\n"
3922             "            size = recycler->pool_list->allocated << 1;\n"
3923             "            if (size == 0) size = recycler->pool_list->allocated;\n"
3924             "        }\n"
3925             "        {\n"
3926             "            pcc_memory_pool_t *const pool = (pcc_memory_pool_t *)PCC_MALLOC(\n"
3927             "                auxil, sizeof(pcc_memory_pool_t) + recycler->element_size * size\n"
3928             "            );\n"
3929             "            pool->allocated = size;\n"
3930             "            pool->unused = size;\n"
3931             "            pool->next = recycler->pool_list;\n"
3932             "            recycler->pool_list = pool;\n"
3933             "        }\n"
3934             "    }\n"
3935             "    recycler->pool_list->unused--;\n"
3936             "    return (char *)recycler->pool_list + sizeof(pcc_memory_pool_t) + recycler->element_size * recycler->pool_list->unused;\n"
3937             "}\n"
3938             "\n"
3939             "static void pcc_memory_recycler__recycle(pcc_auxil_t auxil, pcc_memory_recycler_t *recycler, void *ptr) {\n"
3940             "    pcc_memory_entry_t *const tmp = (pcc_memory_entry_t *)ptr;\n"
3941             "    tmp->next = recycler->entry_list;\n"
3942             "    recycler->entry_list = tmp;\n"
3943             "}\n"
3944             "\n"
3945             "static void pcc_memory_recycler__term(pcc_auxil_t auxil, pcc_memory_recycler_t *recycler) {\n"
3946             "    while (recycler->pool_list) {\n"
3947             "        pcc_memory_pool_t *const tmp = recycler->pool_list;\n"
3948             "        recycler->pool_list = tmp->next;\n"
3949             "        PCC_FREE(auxil, tmp);\n"
3950             "    }\n"
3951             "}\n"
3952             "\n"
3953         );
3954         stream__puts(
3955             &sstream,
3956             "MARK_FUNC_AS_USED\n"
3957             "static pcc_thunk_chunk_t *pcc_thunk_chunk__create(pcc_context_t *ctx) {\n"
3958             "    pcc_thunk_chunk_t *const chunk = (pcc_thunk_chunk_t *)pcc_memory_recycler__supply(ctx->auxil, &ctx->thunk_chunk_recycler);\n"
3959             "    pcc_value_table__init(ctx->auxil, &chunk->values);\n"
3960             "    pcc_capture_table__init(ctx->auxil, &chunk->capts);\n"
3961             "    pcc_thunk_array__init(ctx->auxil, &chunk->thunks);\n"
3962             "    chunk->pos = 0;\n"
3963             "    return chunk;\n"
3964             "}\n"
3965             "\n"
3966             "static void pcc_thunk_chunk__destroy(pcc_context_t *ctx, pcc_thunk_chunk_t *chunk) {\n"
3967             "    if (chunk == NULL) return;\n"
3968             "    pcc_thunk_array__term(ctx->auxil, &chunk->thunks);\n"
3969             "    pcc_capture_table__term(ctx->auxil, &chunk->capts);\n"
3970             "    pcc_value_table__term(ctx->auxil, &chunk->values);\n"
3971             "    pcc_memory_recycler__recycle(ctx->auxil, &ctx->thunk_chunk_recycler, chunk);\n"
3972             "}\n"
3973             "\n"
3974         );
3975         stream__puts(
3976             &sstream,
3977             "static void pcc_rule_set__init(pcc_auxil_t auxil, pcc_rule_set_t *set) {\n"
3978             "    set->len = 0;\n"
3979             "    set->max = 0;\n"
3980             "    set->buf = NULL;\n"
3981             "}\n"
3982             "\n"
3983             "static size_t pcc_rule_set__index(pcc_auxil_t auxil, const pcc_rule_set_t *set, pcc_rule_t rule) {\n"
3984             "    size_t i;\n"
3985             "    for (i = 0; i < set->len; i++) {\n"
3986             "        if (set->buf[i] == rule) return i;\n"
3987             "    }\n"
3988             "    return PCC_VOID_VALUE;\n"
3989             "}\n"
3990             "\n"
3991             "static pcc_bool_t pcc_rule_set__add(pcc_auxil_t auxil, pcc_rule_set_t *set, pcc_rule_t rule) {\n"
3992             "    const size_t i = pcc_rule_set__index(auxil, set, rule);\n"
3993             "    if (i != PCC_VOID_VALUE) return PCC_FALSE;\n"
3994             "    if (set->max <= set->len) {\n"
3995             "        const size_t n = set->len + 1;\n"
3996             "        size_t m = set->max;\n"
3997             "        if (m == 0) m = PCC_ARRAY_MIN_SIZE;\n"
3998             "        while (m < n && m != 0) m <<= 1;\n"
3999             "        if (m == 0) m = n;\n"
4000             "        set->buf = (pcc_rule_t *)PCC_REALLOC(auxil, set->buf, sizeof(pcc_rule_t) * m);\n"
4001             "        set->max = m;\n"
4002             "    }\n"
4003             "    set->buf[set->len++] = rule;\n"
4004             "    return PCC_TRUE;\n"
4005             "}\n"
4006             "\n"
4007             "static pcc_bool_t pcc_rule_set__remove(pcc_auxil_t auxil, pcc_rule_set_t *set, pcc_rule_t rule) {\n"
4008             "    const size_t i = pcc_rule_set__index(auxil, set, rule);\n"
4009             "    if (i == PCC_VOID_VALUE) return PCC_FALSE;\n"
4010             "    memmove(set->buf + i, set->buf + (i + 1), sizeof(pcc_rule_t) * (set->len - (i + 1)));\n"
4011             "    return PCC_TRUE;\n"
4012             "}\n"
4013             "\n"
4014             "static void pcc_rule_set__clear(pcc_auxil_t auxil, pcc_rule_set_t *set) {\n"
4015             "    set->len = 0;\n"
4016             "}\n"
4017             "\n"
4018             "static void pcc_rule_set__copy(pcc_auxil_t auxil, pcc_rule_set_t *set, const pcc_rule_set_t *src) {\n"
4019             "    size_t i;\n"
4020             "    pcc_rule_set__clear(auxil, set);\n"
4021             "    for (i = 0; i < src->len; i++) {\n"
4022             "        pcc_rule_set__add(auxil, set, src->buf[i]);\n"
4023             "    }\n"
4024             "}\n"
4025             "\n"
4026             "static void pcc_rule_set__term(pcc_auxil_t auxil, pcc_rule_set_t *set) {\n"
4027             "    PCC_FREE(auxil, set->buf);\n"
4028             "}\n"
4029             "\n"
4030         );
4031         stream__puts(
4032             &sstream,
4033             "static pcc_lr_head_t *pcc_lr_head__create(pcc_context_t *ctx, pcc_rule_t rule) {\n"
4034             "    pcc_lr_head_t *const head = (pcc_lr_head_t *)pcc_memory_recycler__supply(ctx->auxil, &ctx->lr_head_recycler);\n"
4035             "    head->rule = rule;\n"
4036             "    pcc_rule_set__init(ctx->auxil, &head->invol);\n"
4037             "    pcc_rule_set__init(ctx->auxil, &head->eval);\n"
4038             "    head->hold = NULL;\n"
4039             "    return head;\n"
4040             "}\n"
4041             "\n"
4042             "static void pcc_lr_head__destroy(pcc_context_t *ctx, pcc_lr_head_t *head) {\n"
4043             "    if (head == NULL) return;\n"
4044             "    pcc_lr_head__destroy(ctx, head->hold);\n"
4045             "    pcc_rule_set__term(ctx->auxil, &head->eval);\n"
4046             "    pcc_rule_set__term(ctx->auxil, &head->invol);\n"
4047             "    pcc_memory_recycler__recycle(ctx->auxil, &ctx->lr_head_recycler, head);\n"
4048             "}\n"
4049             "\n"
4050         );
4051         stream__puts(
4052             &sstream,
4053             "static void pcc_lr_entry__destroy(pcc_auxil_t auxil, pcc_lr_entry_t *lr);\n"
4054             "\n"
4055             "static pcc_lr_answer_t *pcc_lr_answer__create(pcc_context_t *ctx, pcc_lr_answer_type_t type, size_t pos) {\n"
4056             "    pcc_lr_answer_t *answer = (pcc_lr_answer_t *)pcc_memory_recycler__supply(ctx->auxil, &ctx->lr_answer_recycler);\n"
4057             "    answer->type = type;\n"
4058             "    answer->pos = pos;\n"
4059             "    answer->hold = NULL;\n"
4060             "    switch (answer->type) {\n"
4061             "    case PCC_LR_ANSWER_LR:\n"
4062             "        answer->data.lr = NULL;\n"
4063             "        break;\n"
4064             "    case PCC_LR_ANSWER_CHUNK:\n"
4065             "        answer->data.chunk = NULL;\n"
4066             "        break;\n"
4067             "    default: /* unknown */\n"
4068             "        PCC_FREE(ctx->auxil, answer);\n"
4069             "        answer = NULL;\n"
4070             "    }\n"
4071             "    return answer;\n"
4072             "}\n"
4073             "\n"
4074             "static void pcc_lr_answer__set_chunk(pcc_context_t *ctx, pcc_lr_answer_t *answer, pcc_thunk_chunk_t *chunk) {\n"
4075             "    pcc_lr_answer_t *const a = pcc_lr_answer__create(ctx, answer->type, answer->pos);\n"
4076             "    switch (answer->type) {\n"
4077             "    case PCC_LR_ANSWER_LR:\n"
4078             "        a->data.lr = answer->data.lr;\n"
4079             "        break;\n"
4080             "    case PCC_LR_ANSWER_CHUNK:\n"
4081             "        a->data.chunk = answer->data.chunk;\n"
4082             "        break;\n"
4083             "    default: /* unknown */\n"
4084             "        break;\n"
4085             "    }\n"
4086             "    a->hold = answer->hold;\n"
4087             "    answer->hold = a;\n"
4088             "    answer->type = PCC_LR_ANSWER_CHUNK;\n"
4089             "    answer->data.chunk = chunk;\n"
4090             "}\n"
4091             "\n"
4092             "static void pcc_lr_answer__destroy(pcc_context_t *ctx, pcc_lr_answer_t *answer) {\n"
4093             "    while (answer != NULL) {\n"
4094             "        pcc_lr_answer_t *const a = answer->hold;\n"
4095             "        switch (answer->type) {\n"
4096             "        case PCC_LR_ANSWER_LR:\n"
4097             "            pcc_lr_entry__destroy(ctx->auxil, answer->data.lr);\n"
4098             "            break;\n"
4099             "        case PCC_LR_ANSWER_CHUNK:\n"
4100             "            pcc_thunk_chunk__destroy(ctx, answer->data.chunk);\n"
4101             "            break;\n"
4102             "        default: /* unknown */\n"
4103             "            break;\n"
4104             "        }\n"
4105             "        pcc_memory_recycler__recycle(ctx->auxil, &ctx->lr_answer_recycler, answer);\n"
4106             "        answer = a;\n"
4107             "    }\n"
4108             "}\n"
4109             "\n"
4110         );
4111         stream__puts(
4112             &sstream,
4113             "static void pcc_lr_memo_map__init(pcc_auxil_t auxil, pcc_lr_memo_map_t *map) {\n"
4114             "    map->len = 0;\n"
4115             "    map->max = 0;\n"
4116             "    map->buf = NULL;\n"
4117             "}\n"
4118             "\n"
4119             "static size_t pcc_lr_memo_map__index(pcc_context_t *ctx, pcc_lr_memo_map_t *map, pcc_rule_t rule) {\n"
4120             "    size_t i;\n"
4121             "    for (i = 0; i < map->len; i++) {\n"
4122             "        if (map->buf[i].rule == rule) return i;\n"
4123             "    }\n"
4124             "    return PCC_VOID_VALUE;\n"
4125             "}\n"
4126             "\n"
4127             "static void pcc_lr_memo_map__put(pcc_context_t *ctx, pcc_lr_memo_map_t *map, pcc_rule_t rule, pcc_lr_answer_t *answer) {\n"
4128             "    const size_t i = pcc_lr_memo_map__index(ctx, map, rule);\n"
4129             "    if (i != PCC_VOID_VALUE) {\n"
4130             "        pcc_lr_answer__destroy(ctx, map->buf[i].answer);\n"
4131             "        map->buf[i].answer = answer;\n"
4132             "    }\n"
4133             "    else {\n"
4134             "        if (map->max <= map->len) {\n"
4135             "            const size_t n = map->len + 1;\n"
4136             "            size_t m = map->max;\n"
4137             "            if (m == 0) m = PCC_ARRAY_MIN_SIZE;\n"
4138             "            while (m < n && m != 0) m <<= 1;\n"
4139             "            if (m == 0) m = n;\n"
4140             "            map->buf = (pcc_lr_memo_t *)PCC_REALLOC(ctx->auxil, map->buf, sizeof(pcc_lr_memo_t) * m);\n"
4141             "            map->max = m;\n"
4142             "        }\n"
4143             "        map->buf[map->len].rule = rule;\n"
4144             "        map->buf[map->len].answer = answer;\n"
4145             "        map->len++;\n"
4146             "    }\n"
4147             "}\n"
4148             "\n"
4149             "static pcc_lr_answer_t *pcc_lr_memo_map__get(pcc_context_t *ctx, pcc_lr_memo_map_t *map, pcc_rule_t rule) {\n"
4150             "    const size_t i = pcc_lr_memo_map__index(ctx, map, rule);\n"
4151             "    return (i != PCC_VOID_VALUE) ? map->buf[i].answer : NULL;\n"
4152             "}\n"
4153             "\n"
4154             "static void pcc_lr_memo_map__term(pcc_context_t *ctx, pcc_lr_memo_map_t *map) {\n"
4155             "    while (map->len > 0) {\n"
4156             "        map->len--;\n"
4157             "        pcc_lr_answer__destroy(ctx, map->buf[map->len].answer);\n"
4158             "    }\n"
4159             "    PCC_FREE(ctx->auxil, map->buf);\n"
4160             "}\n"
4161             "\n"
4162         );
4163         stream__puts(
4164             &sstream,
4165             "static pcc_lr_table_entry_t *pcc_lr_table_entry__create(pcc_context_t *ctx) {\n"
4166             "    pcc_lr_table_entry_t *const entry = (pcc_lr_table_entry_t *)PCC_MALLOC(ctx->auxil, sizeof(pcc_lr_table_entry_t));\n"
4167             "    entry->head = NULL;\n"
4168             "    pcc_lr_memo_map__init(ctx->auxil, &entry->memos);\n"
4169             "    entry->hold_a = NULL;\n"
4170             "    entry->hold_h = NULL;\n"
4171             "    return entry;\n"
4172             "}\n"
4173             "\n"
4174             "static void pcc_lr_table_entry__destroy(pcc_context_t *ctx, pcc_lr_table_entry_t *entry) {\n"
4175             "    if (entry == NULL) return;\n"
4176             "    pcc_lr_head__destroy(ctx, entry->hold_h);\n"
4177             "    pcc_lr_answer__destroy(ctx, entry->hold_a);\n"
4178             "    pcc_lr_memo_map__term(ctx, &entry->memos);\n"
4179             "    PCC_FREE(ctx->auxil, entry);\n"
4180             "}\n"
4181             "\n"
4182         );
4183         stream__puts(
4184             &sstream,
4185             "static void pcc_lr_table__init(pcc_auxil_t auxil, pcc_lr_table_t *table) {\n"
4186             "    table->ofs = 0;\n"
4187             "    table->len = 0;\n"
4188             "    table->max = 0;\n"
4189             "    table->buf = NULL;\n"
4190             "}\n"
4191             "\n"
4192             "static void pcc_lr_table__resize(pcc_context_t *ctx, pcc_lr_table_t *table, size_t len) {\n"
4193             "    size_t i;\n"
4194             "    for (i = len; i < table->len; i++) pcc_lr_table_entry__destroy(ctx, table->buf[i]);\n"
4195             "    if (table->max < len) {\n"
4196             "        size_t m = table->max;\n"
4197             "        if (m == 0) m = PCC_ARRAY_MIN_SIZE;\n"
4198             "        while (m < len && m != 0) m <<= 1;\n"
4199             "        if (m == 0) m = len;\n"
4200             "        table->buf = (pcc_lr_table_entry_t **)PCC_REALLOC(ctx->auxil, table->buf, sizeof(pcc_lr_table_entry_t *) * m);\n"
4201             "        table->max = m;\n"
4202             "    }\n"
4203             "    for (i = table->len; i < len; i++) table->buf[i] = NULL;\n"
4204             "    table->len = len;\n"
4205             "}\n"
4206             "\n"
4207             "static void pcc_lr_table__set_head(pcc_context_t *ctx, pcc_lr_table_t *table, size_t index, pcc_lr_head_t *head) {\n"
4208             "    index += table->ofs;\n"
4209             "    if (index >= table->len) pcc_lr_table__resize(ctx, table, index + 1);\n"
4210             "    if (table->buf[index] == NULL) table->buf[index] = pcc_lr_table_entry__create(ctx);\n"
4211             "    table->buf[index]->head = head;\n"
4212             "}\n"
4213             "\n"
4214             "static void pcc_lr_table__hold_head(pcc_context_t *ctx, pcc_lr_table_t *table, size_t index, pcc_lr_head_t *head) {\n"
4215             "    index += table->ofs;\n"
4216             "    if (index >= table->len) pcc_lr_table__resize(ctx, table, index + 1);\n"
4217             "    if (table->buf[index] == NULL) table->buf[index] = pcc_lr_table_entry__create(ctx);\n"
4218             "    head->hold = table->buf[index]->hold_h;\n"
4219             "    table->buf[index]->hold_h = head;\n"
4220             "}\n"
4221             "\n"
4222             "static void pcc_lr_table__set_answer(pcc_context_t *ctx, pcc_lr_table_t *table, size_t index, pcc_rule_t rule, pcc_lr_answer_t *answer) {\n"
4223             "    index += table->ofs;\n"
4224             "    if (index >= table->len) pcc_lr_table__resize(ctx, table, index + 1);\n"
4225             "    if (table->buf[index] == NULL) table->buf[index] = pcc_lr_table_entry__create(ctx);\n"
4226             "    pcc_lr_memo_map__put(ctx, &table->buf[index]->memos, rule, answer);\n"
4227             "}\n"
4228             "\n"
4229             "static void pcc_lr_table__hold_answer(pcc_context_t *ctx, pcc_lr_table_t *table, size_t index, pcc_lr_answer_t *answer) {\n"
4230             "    index += table->ofs;\n"
4231             "    if (index >= table->len) pcc_lr_table__resize(ctx, table, index + 1);\n"
4232             "    if (table->buf[index] == NULL) table->buf[index] = pcc_lr_table_entry__create(ctx);\n"
4233             "    answer->hold = table->buf[index]->hold_a;\n"
4234             "    table->buf[index]->hold_a = answer;\n"
4235             "}\n"
4236             "\n"
4237             "static pcc_lr_head_t *pcc_lr_table__get_head(pcc_context_t *ctx, pcc_lr_table_t *table, size_t index) {\n"
4238             "    index += table->ofs;\n"
4239             "    if (index >= table->len || table->buf[index] == NULL) return NULL;\n"
4240             "    return table->buf[index]->head;\n"
4241             "}\n"
4242             "\n"
4243             "static pcc_lr_answer_t *pcc_lr_table__get_answer(pcc_context_t *ctx, pcc_lr_table_t *table, size_t index, pcc_rule_t rule) {\n"
4244             "    index += table->ofs;\n"
4245             "    if (index >= table->len || table->buf[index] == NULL) return NULL;\n"
4246             "    return pcc_lr_memo_map__get(ctx, &table->buf[index]->memos, rule);\n"
4247             "}\n"
4248             "\n"
4249             "static void pcc_lr_table__shift(pcc_context_t *ctx, pcc_lr_table_t *table, size_t count) {\n"
4250             "    size_t i;\n"
4251             "    if (count > table->len - table->ofs) count = table->len - table->ofs;\n"
4252             "    for (i = 0; i < count; i++) pcc_lr_table_entry__destroy(ctx, table->buf[table->ofs++]);\n"
4253             "    if (table->ofs > (table->max >> 1)) {\n"
4254             "        memmove(table->buf, table->buf + table->ofs, sizeof(pcc_lr_table_entry_t *) * (table->len - table->ofs));\n"
4255             "        table->len -= table->ofs;\n"
4256             "        table->ofs = 0;\n"
4257             "    }\n"
4258             "}\n"
4259             "\n"
4260             "static void pcc_lr_table__term(pcc_context_t *ctx, pcc_lr_table_t *table) {\n"
4261             "    while (table->len > table->ofs) {\n"
4262             "        table->len--;\n"
4263             "        pcc_lr_table_entry__destroy(ctx, table->buf[table->len]);\n"
4264             "    }\n"
4265             "    PCC_FREE(ctx->auxil, table->buf);\n"
4266             "}\n"
4267             "\n"
4268         );
4269         stream__puts(
4270             &sstream,
4271             "static pcc_lr_entry_t *pcc_lr_entry__create(pcc_auxil_t auxil, pcc_rule_t rule) {\n"
4272             "    pcc_lr_entry_t *const lr = (pcc_lr_entry_t *)PCC_MALLOC(auxil, sizeof(pcc_lr_entry_t));\n"
4273             "    lr->rule = rule;\n"
4274             "    lr->seed = NULL;\n"
4275             "    lr->head = NULL;\n"
4276             "    return lr;\n"
4277             "}\n"
4278             "\n"
4279             "static void pcc_lr_entry__destroy(pcc_auxil_t auxil, pcc_lr_entry_t *lr) {\n"
4280             "    PCC_FREE(auxil, lr);\n"
4281             "}\n"
4282             "\n"
4283         );
4284         stream__puts(
4285             &sstream,
4286             "static void pcc_lr_stack__init(pcc_auxil_t auxil, pcc_lr_stack_t *stack) {\n"
4287             "    stack->len = 0;\n"
4288             "    stack->max = 0;\n"
4289             "    stack->buf = NULL;\n"
4290             "}\n"
4291             "\n"
4292             "static void pcc_lr_stack__push(pcc_auxil_t auxil, pcc_lr_stack_t *stack, pcc_lr_entry_t *lr) {\n"
4293             "    if (stack->max <= stack->len) {\n"
4294             "        const size_t n = stack->len + 1;\n"
4295             "        size_t m = stack->max;\n"
4296             "        if (m == 0) m = PCC_ARRAY_MIN_SIZE;\n"
4297             "        while (m < n && m != 0) m <<= 1;\n"
4298             "        if (m == 0) m = n;\n"
4299             "        stack->buf = (pcc_lr_entry_t **)PCC_REALLOC(auxil, stack->buf, sizeof(pcc_lr_entry_t *) * m);\n"
4300             "        stack->max = m;\n"
4301             "    }\n"
4302             "    stack->buf[stack->len++] = lr;\n"
4303             "}\n"
4304             "\n"
4305             "static pcc_lr_entry_t *pcc_lr_stack__pop(pcc_auxil_t auxil, pcc_lr_stack_t *stack) {\n"
4306             "    return stack->buf[--stack->len];\n"
4307             "}\n"
4308             "\n"
4309             "static void pcc_lr_stack__term(pcc_auxil_t auxil, pcc_lr_stack_t *stack) {\n"
4310             "    PCC_FREE(auxil, stack->buf);\n"
4311             "}\n"
4312             "\n"
4313         );
4314         stream__puts(
4315             &sstream,
4316             "static pcc_context_t *pcc_context__create(pcc_auxil_t auxil) {\n"
4317             "    pcc_context_t *const ctx = (pcc_context_t *)PCC_MALLOC(auxil, sizeof(pcc_context_t));\n"
4318             "    ctx->pos = 0;\n"
4319             "    ctx->cur = 0;\n"
4320             "    ctx->level = 0;\n"
4321             "    pcc_char_array__init(auxil, &ctx->buffer);\n"
4322             "    pcc_lr_table__init(auxil, &ctx->lrtable);\n"
4323             "    pcc_lr_stack__init(auxil, &ctx->lrstack);\n"
4324             "    pcc_thunk_array__init(auxil, &ctx->thunks);\n"
4325             "    pcc_memory_recycler__init(auxil, &ctx->thunk_chunk_recycler, sizeof(pcc_thunk_chunk_t));\n"
4326             "    pcc_memory_recycler__init(auxil, &ctx->lr_head_recycler, sizeof(pcc_lr_head_t));\n"
4327             "    pcc_memory_recycler__init(auxil, &ctx->lr_answer_recycler, sizeof(pcc_lr_answer_t));\n"
4328             "    ctx->auxil = auxil;\n"
4329             "    return ctx;\n"
4330             "}\n"
4331             "\n"
4332         );
4333         stream__puts(
4334             &sstream,
4335             "static void pcc_context__destroy(pcc_context_t *ctx) {\n"
4336             "    if (ctx == NULL) return;\n"
4337             "    pcc_thunk_array__term(ctx->auxil, &ctx->thunks);\n"
4338             "    pcc_lr_stack__term(ctx->auxil, &ctx->lrstack);\n"
4339             "    pcc_lr_table__term(ctx, &ctx->lrtable);\n"
4340             "    pcc_char_array__term(ctx->auxil, &ctx->buffer);\n"
4341             "    pcc_memory_recycler__term(ctx->auxil, &ctx->thunk_chunk_recycler);\n"
4342             "    pcc_memory_recycler__term(ctx->auxil, &ctx->lr_head_recycler);\n"
4343             "    pcc_memory_recycler__term(ctx->auxil, &ctx->lr_answer_recycler);\n"
4344             "    PCC_FREE(ctx->auxil, ctx);\n"
4345             "}\n"
4346             "\n"
4347         );
4348         stream__puts(
4349             &sstream,
4350             "static size_t pcc_refill_buffer(pcc_context_t *ctx, size_t num) {\n"
4351             "    if (ctx->buffer.len >= ctx->cur + num) return ctx->buffer.len - ctx->cur;\n"
4352             "    while (ctx->buffer.len < ctx->cur + num) {\n"
4353             "        const int c = PCC_GETCHAR(ctx->auxil);\n"
4354             "        if (c < 0) break;\n"
4355             "        pcc_char_array__add(ctx->auxil, &ctx->buffer, (char)c);\n"
4356             "    }\n"
4357             "    return ctx->buffer.len - ctx->cur;\n"
4358             "}\n"
4359             "\n"
4360         );
4361         stream__puts(
4362             &sstream,
4363             "MARK_FUNC_AS_USED\n"
4364             "static void pcc_commit_buffer(pcc_context_t *ctx) {\n"
4365             "    memmove(ctx->buffer.buf, ctx->buffer.buf + ctx->cur, ctx->buffer.len - ctx->cur);\n"
4366             "    ctx->buffer.len -= ctx->cur;\n"
4367             "    ctx->pos += ctx->cur;\n"
4368             "    pcc_lr_table__shift(ctx, &ctx->lrtable, ctx->cur);\n"
4369             "    ctx->cur = 0;\n"
4370             "}\n"
4371             "\n"
4372         );
4373         stream__puts(
4374             &sstream,
4375             "MARK_FUNC_AS_USED\n"
4376             "static const char *pcc_get_capture_string(pcc_context_t *ctx, const pcc_capture_t *capt) {\n"
4377             "    if (capt->string == NULL)\n"
4378             "        ((pcc_capture_t *)capt)->string =\n"
4379             "            pcc_strndup_e(ctx->auxil, ctx->buffer.buf + capt->range.start, capt->range.end - capt->range.start);\n"
4380             "    return capt->string;\n"
4381             "}\n"
4382             "\n"
4383         );
4384         if (ctx->flags & CODE_FLAG__UTF8_CHARCLASS_USED) {
4385             stream__puts(
4386                 &sstream,
4387                 "static size_t pcc_get_char_as_utf32(pcc_context_t *ctx, int *out) { /* with checking UTF-8 validity */\n"
4388                 "    int c, u;\n"
4389                 "    size_t n;\n"
4390                 "    if (pcc_refill_buffer(ctx, 1) < 1) return 0;\n"
4391                 "    c = (int)(unsigned char)ctx->buffer.buf[ctx->cur];\n"
4392                 "    n = (c < 0x80) ? 1 :\n"
4393                 "        ((c & 0xe0) == 0xc0) ? 2 :\n"
4394                 "        ((c & 0xf0) == 0xe0) ? 3 :\n"
4395                 "        ((c & 0xf8) == 0xf0) ? 4 : 0;\n"
4396                 "    if (n < 1) return 0;\n"
4397                 "    if (pcc_refill_buffer(ctx, n) < n) return 0;\n"
4398                 "    switch (n) {\n"
4399                 "    case 1:\n"
4400                 "        u = c;\n"
4401                 "        break;\n"
4402                 "    case 2:\n"
4403                 "        u = c & 0x1f;\n"
4404                 "        c = (int)(unsigned char)ctx->buffer.buf[ctx->cur + 1];\n"
4405                 "        if ((c & 0xc0) != 0x80) return 0;\n"
4406                 "        u <<= 6; u |= c & 0x3f;\n"
4407                 "        if (u < 0x80) return 0;\n"
4408                 "        break;\n"
4409                 "    case 3:\n"
4410                 "        u = c & 0x0f;\n"
4411                 "        c = (int)(unsigned char)ctx->buffer.buf[ctx->cur + 1];\n"
4412                 "        if ((c & 0xc0) != 0x80) return 0;\n"
4413                 "        u <<= 6; u |= c & 0x3f;\n"
4414                 "        c = (int)(unsigned char)ctx->buffer.buf[ctx->cur + 2];\n"
4415                 "        if ((c & 0xc0) != 0x80) return 0;\n"
4416                 "        u <<= 6; u |= c & 0x3f;\n"
4417                 "        if (u < 0x800) return 0;\n"
4418                 "        break;\n"
4419                 "    case 4:\n"
4420                 "        u = c & 0x07;\n"
4421                 "        c = (int)(unsigned char)ctx->buffer.buf[ctx->cur + 1];\n"
4422                 "        if ((c & 0xc0) != 0x80) return 0;\n"
4423                 "        u <<= 6; u |= c & 0x3f;\n"
4424                 "        c = (int)(unsigned char)ctx->buffer.buf[ctx->cur + 2];\n"
4425                 "        if ((c & 0xc0) != 0x80) return 0;\n"
4426                 "        u <<= 6; u |= c & 0x3f;\n"
4427                 "        c = (int)(unsigned char)ctx->buffer.buf[ctx->cur + 3];\n"
4428                 "        if ((c & 0xc0) != 0x80) return 0;\n"
4429                 "        u <<= 6; u |= c & 0x3f;\n"
4430                 "        if (u < 0x10000 || u > 0x10ffff) return 0;\n"
4431                 "        break;\n"
4432                 "    default:\n"
4433                 "        return 0;\n"
4434                 "    }\n"
4435                 "    if (out) *out = u;\n"
4436                 "    return n;\n"
4437                 "}\n"
4438                 "\n"
4439             );
4440         }
4441         stream__puts(
4442             &sstream,
4443             "MARK_FUNC_AS_USED\n"
4444             "static pcc_bool_t pcc_apply_rule(pcc_context_t *ctx, pcc_rule_t rule, pcc_thunk_array_t *thunks, pcc_value_t *value) {\n"
4445             "    static pcc_value_t null;\n"
4446             "    pcc_thunk_chunk_t *c = NULL;\n"
4447             "    const size_t p = ctx->pos + ctx->cur;\n"
4448             "    pcc_bool_t b = PCC_TRUE;\n"
4449             "    pcc_lr_answer_t *a = pcc_lr_table__get_answer(ctx, &ctx->lrtable, p, rule);\n"
4450             "    pcc_lr_head_t *h = pcc_lr_table__get_head(ctx, &ctx->lrtable, p);\n"
4451             "    if (h != NULL) {\n"
4452             "        if (a == NULL && rule != h->rule && pcc_rule_set__index(ctx->auxil, &h->invol, rule) == PCC_VOID_VALUE) {\n"
4453             "            b = PCC_FALSE;\n"
4454             "            c = NULL;\n"
4455             "        }\n"
4456             "        else if (pcc_rule_set__remove(ctx->auxil, &h->eval, rule)) {\n"
4457             "            b = PCC_FALSE;\n"
4458             "            c = rule(ctx);\n"
4459             "            a = pcc_lr_answer__create(ctx, PCC_LR_ANSWER_CHUNK, ctx->pos + ctx->cur);\n"
4460             "            a->data.chunk = c;\n"
4461             "            pcc_lr_table__hold_answer(ctx, &ctx->lrtable, p, a);\n"
4462             "        }\n"
4463             "    }\n"
4464             "    if (b) {\n"
4465             "        if (a != NULL) {\n"
4466             "            ctx->cur = a->pos - ctx->pos;\n"
4467             "            switch (a->type) {\n"
4468             "            case PCC_LR_ANSWER_LR:\n"
4469             "                if (a->data.lr->head == NULL) {\n"
4470             "                    a->data.lr->head = pcc_lr_head__create(ctx, rule);\n"
4471             "                    pcc_lr_table__hold_head(ctx, &ctx->lrtable, p, a->data.lr->head);\n"
4472             "                }\n"
4473             "                {\n"
4474             "                    size_t i = ctx->lrstack.len;\n"
4475             "                    while (i > 0) {\n"
4476             "                        i--;\n"
4477             "                        if (ctx->lrstack.buf[i]->head == a->data.lr->head) break;\n"
4478             "                        ctx->lrstack.buf[i]->head = a->data.lr->head;\n"
4479             "                        pcc_rule_set__add(ctx->auxil, &a->data.lr->head->invol, ctx->lrstack.buf[i]->rule);\n"
4480             "                    }\n"
4481             "                }\n"
4482             "                c = a->data.lr->seed;\n"
4483             "                break;\n"
4484             "            case PCC_LR_ANSWER_CHUNK:\n"
4485             "                c = a->data.chunk;\n"
4486             "                break;\n"
4487             "            default: /* unknown */\n"
4488             "                break;\n"
4489             "            }\n"
4490             "        }\n"
4491             "        else {\n"
4492             "            pcc_lr_entry_t *const e = pcc_lr_entry__create(ctx->auxil, rule);\n"
4493             "            pcc_lr_stack__push(ctx->auxil, &ctx->lrstack, e);\n"
4494             "            a = pcc_lr_answer__create(ctx, PCC_LR_ANSWER_LR, p);\n"
4495             "            a->data.lr = e;\n"
4496             "            pcc_lr_table__set_answer(ctx, &ctx->lrtable, p, rule, a);\n"
4497             "            c = rule(ctx);\n"
4498             "            pcc_lr_stack__pop(ctx->auxil, &ctx->lrstack);\n"
4499             "            a->pos = ctx->pos + ctx->cur;\n"
4500             "            if (e->head == NULL) {\n"
4501             "                pcc_lr_answer__set_chunk(ctx, a, c);\n"
4502             "            }\n"
4503             "            else {\n"
4504             "                e->seed = c;\n"
4505             "                h = a->data.lr->head;\n"
4506             "                if (h->rule != rule) {\n"
4507             "                    c = a->data.lr->seed;\n"
4508             "                    a = pcc_lr_answer__create(ctx, PCC_LR_ANSWER_CHUNK, ctx->pos + ctx->cur);\n"
4509             "                    a->data.chunk = c;\n"
4510             "                    pcc_lr_table__hold_answer(ctx, &ctx->lrtable, p, a);\n"
4511             "                }\n"
4512             "                else {\n"
4513             "                    pcc_lr_answer__set_chunk(ctx, a, a->data.lr->seed);\n"
4514             "                    if (a->data.chunk == NULL) {\n"
4515             "                        c = NULL;\n"
4516             "                    }\n"
4517             "                    else {\n"
4518             "                        pcc_lr_table__set_head(ctx, &ctx->lrtable, p, h);\n"
4519             "                        for (;;) {\n"
4520             "                            ctx->cur = p - ctx->pos;\n"
4521             "                            pcc_rule_set__copy(ctx->auxil, &h->eval, &h->invol);\n"
4522             "                            c = rule(ctx);\n"
4523             "                            if (c == NULL || ctx->pos + ctx->cur <= a->pos) break;\n"
4524             "                            pcc_lr_answer__set_chunk(ctx, a, c);\n"
4525             "                            a->pos = ctx->pos + ctx->cur;\n"
4526             "                        }\n"
4527             "                        pcc_thunk_chunk__destroy(ctx, c);\n"
4528             "                        pcc_lr_table__set_head(ctx, &ctx->lrtable, p, NULL);\n"
4529             "                        ctx->cur = a->pos - ctx->pos;\n"
4530             "                        c = a->data.chunk;\n"
4531             "                    }\n"
4532             "                }\n"
4533             "            }\n"
4534             "        }\n"
4535             "    }\n"
4536             "    if (c == NULL) return PCC_FALSE;\n"
4537             "    if (value == NULL) value = &null;\n"
4538             "    memset(value, 0, sizeof(pcc_value_t)); /* in case */\n"
4539             "    pcc_thunk_array__add(ctx->auxil, thunks, pcc_thunk__create_node(ctx->auxil, &c->thunks, value));\n"
4540             "    return PCC_TRUE;\n"
4541             "}\n"
4542             "\n"
4543         );
4544         stream__puts(
4545             &sstream,
4546             "MARK_FUNC_AS_USED\n"
4547             "static void pcc_do_action(pcc_context_t *ctx, const pcc_thunk_array_t *thunks, pcc_value_t *value) {\n"
4548             "    size_t i;\n"
4549             "    for (i = 0; i < thunks->len; i++) {\n"
4550             "        pcc_thunk_t *const thunk = thunks->buf[i];\n"
4551             "        switch (thunk->type) {\n"
4552             "        case PCC_THUNK_LEAF:\n"
4553             "            thunk->data.leaf.action(ctx, thunk, value);\n"
4554             "            break;\n"
4555             "        case PCC_THUNK_NODE:\n"
4556             "            pcc_do_action(ctx, thunk->data.node.thunks, thunk->data.node.value);\n"
4557             "            break;\n"
4558             "        default: /* unknown */\n"
4559             "            break;\n"
4560             "        }\n"
4561             "    }\n"
4562             "}\n"
4563             "\n"
4564         );
4565         {
4566             size_t i, j, k;
4567             for (i = 0; i < ctx->rules.len; i++) {
4568                 const node_rule_t *const r = &ctx->rules.buf[i]->data.rule;
4569                 for (j = 0; j < r->codes.len; j++) {
4570                     const code_block_t *b;
4571                     size_t d;
4572                     const node_const_array_t *v, *c;
4573                     switch (r->codes.buf[j]->type) {
4574                     case NODE_ACTION:
4575                         b = &r->codes.buf[j]->data.action.code;
4576                         d = r->codes.buf[j]->data.action.index;
4577                         v = &r->codes.buf[j]->data.action.vars;
4578                         c = &r->codes.buf[j]->data.action.capts;
4579                         break;
4580                     case NODE_ERROR:
4581                         b = &r->codes.buf[j]->data.error.code;
4582                         d = r->codes.buf[j]->data.error.index;
4583                         v = &r->codes.buf[j]->data.error.vars;
4584                         c = &r->codes.buf[j]->data.error.capts;
4585                         break;
4586                     default:
4587                         print_error("Internal error [%d]\n", __LINE__);
4588                         exit(-1);
4589                     }
4590                     stream__printf(
4591                         &sstream,
4592                         "static void pcc_action_%s_" FMT_LU "(%s_context_t *__pcc_ctx, pcc_thunk_t *__pcc_in, pcc_value_t *__pcc_out) {\n",
4593                         r->name, (ulong_t)d, get_prefix(ctx)
4594                     );
4595                     stream__puts(
4596                         &sstream,
4597                         "#define auxil (__pcc_ctx->auxil)\n"
4598                         "#define __ (*__pcc_out)\n"
4599                     );
4600                     k = 0;
4601                     while (k < v->len) {
4602                         assert(v->buf[k]->type == NODE_REFERENCE);
4603                         stream__printf(
4604                             &sstream,
4605                             "#define %s (*__pcc_in->data.leaf.values.buf[" FMT_LU "])\n",
4606                             v->buf[k]->data.reference.var, (ulong_t)v->buf[k]->data.reference.index
4607                         );
4608                         k++;
4609                     }
4610                     stream__puts(
4611                         &sstream,
4612                         "#define _0 pcc_get_capture_string(__pcc_ctx, &__pcc_in->data.leaf.capt0)\n"
4613                         "#define _0s ((const size_t)(__pcc_ctx->pos + __pcc_in->data.leaf.capt0.range.start))\n"
4614                         "#define _0e ((const size_t)(__pcc_ctx->pos + __pcc_in->data.leaf.capt0.range.end))\n"
4615                     );
4616                     k = 0;
4617                     while (k < c->len) {
4618                         assert(c->buf[k]->type == NODE_CAPTURE);
4619                         stream__printf(
4620                             &sstream,
4621                             "#define _" FMT_LU " pcc_get_capture_string(__pcc_ctx, __pcc_in->data.leaf.capts.buf[" FMT_LU "])\n",
4622                             (ulong_t)(c->buf[k]->data.capture.index + 1), (ulong_t)c->buf[k]->data.capture.index
4623                         );
4624                         stream__printf(
4625                             &sstream,
4626                             "#define _" FMT_LU "s ((const size_t)(__pcc_ctx->pos + __pcc_in->data.leaf.capts.buf[" FMT_LU "]->range.start))\n",
4627                             (ulong_t)(c->buf[k]->data.capture.index + 1), (ulong_t)c->buf[k]->data.capture.index
4628                         );
4629                         stream__printf(
4630                             &sstream,
4631                             "#define _" FMT_LU "e ((const size_t)(__pcc_ctx->pos + __pcc_in->data.leaf.capts.buf[" FMT_LU "]->range.end))\n",
4632                             (ulong_t)(c->buf[k]->data.capture.index + 1), (ulong_t)c->buf[k]->data.capture.index
4633                         );
4634                         k++;
4635                     }
4636                     stream__write_code_block(&sstream, b->text, b->len, 4, ctx->iname, b->line);
4637                     k = c->len;
4638                     while (k > 0) {
4639                         k--;
4640                         assert(c->buf[k]->type == NODE_CAPTURE);
4641                         stream__printf(
4642                             &sstream,
4643                             "#undef _" FMT_LU "e\n",
4644                             (ulong_t)(c->buf[k]->data.capture.index + 1)
4645                         );
4646                         stream__printf(
4647                             &sstream,
4648                             "#undef _" FMT_LU "s\n",
4649                             (ulong_t)(c->buf[k]->data.capture.index + 1)
4650                         );
4651                         stream__printf(
4652                             &sstream,
4653                             "#undef _" FMT_LU "\n",
4654                             (ulong_t)(c->buf[k]->data.capture.index + 1)
4655                         );
4656                     }
4657                     stream__puts(
4658                         &sstream,
4659                         "#undef _0e\n"
4660                         "#undef _0s\n"
4661                         "#undef _0\n"
4662                     );
4663                     k = v->len;
4664                     while (k > 0) {
4665                         k--;
4666                         assert(v->buf[k]->type == NODE_REFERENCE);
4667                         stream__printf(
4668                             &sstream,
4669                             "#undef %s\n",
4670                             v->buf[k]->data.reference.var
4671                         );
4672                     }
4673                     stream__puts(
4674                         &sstream,
4675                         "#undef __\n"
4676                         "#undef auxil\n"
4677                     );
4678                     stream__puts(
4679                         &sstream,
4680                         "}\n"
4681                         "\n"
4682                     );
4683                 }
4684             }
4685         }
4686         {
4687             size_t i;
4688             for (i = 0; i < ctx->rules.len; i++) {
4689                 stream__printf(
4690                     &sstream,
4691                     "static pcc_thunk_chunk_t *pcc_evaluate_rule_%s(pcc_context_t *ctx);\n",
4692                     ctx->rules.buf[i]->data.rule.name
4693                 );
4694             }
4695             stream__puts(
4696                 &sstream,
4697                 "\n"
4698             );
4699             for (i = 0; i < ctx->rules.len; i++) {
4700                 code_reach_t r;
4701                 generate_t g;
4702                 g.stream = &sstream;
4703                 g.rule = ctx->rules.buf[i];
4704                 g.label = 0;
4705                 g.ascii = ctx->opts.ascii;
4706                 stream__printf(
4707                     &sstream,
4708                     "static pcc_thunk_chunk_t *pcc_evaluate_rule_%s(pcc_context_t *ctx) {\n",
4709                     ctx->rules.buf[i]->data.rule.name
4710                 );
4711                 stream__printf(
4712                     &sstream,
4713                     "    pcc_thunk_chunk_t *const chunk = pcc_thunk_chunk__create(ctx);\n"
4714                     "    chunk->pos = ctx->cur;\n"
4715                     "    PCC_DEBUG(ctx->auxil, PCC_DBG_EVALUATE, \"%s\", ctx->level, chunk->pos, (ctx->buffer.buf + chunk->pos), (ctx->buffer.len - chunk->pos));\n"
4716                     "    ctx->level++;\n",
4717                     ctx->rules.buf[i]->data.rule.name
4718                 );
4719                 stream__printf(
4720                     &sstream,
4721                     "    pcc_value_table__resize(ctx->auxil, &chunk->values, " FMT_LU ");\n",
4722                     (ulong_t)ctx->rules.buf[i]->data.rule.vars.len
4723                 );
4724                 stream__printf(
4725                     &sstream,
4726                     "    pcc_capture_table__resize(ctx->auxil, &chunk->capts, " FMT_LU ");\n",
4727                     (ulong_t)ctx->rules.buf[i]->data.rule.capts.len
4728                 );
4729                 if (ctx->rules.buf[i]->data.rule.vars.len > 0) {
4730                     stream__puts(
4731                         &sstream,
4732                         "    pcc_value_table__clear(ctx->auxil, &chunk->values);\n"
4733                     );
4734                 }
4735                 r = generate_code(&g, ctx->rules.buf[i]->data.rule.expr, 0, 4, FALSE);
4736                 stream__printf(
4737                     &sstream,
4738                     "    ctx->level--;\n"
4739                     "    PCC_DEBUG(ctx->auxil, PCC_DBG_MATCH, \"%s\", ctx->level, chunk->pos, (ctx->buffer.buf + chunk->pos), (ctx->cur - chunk->pos));\n"
4740                     "    return chunk;\n",
4741                     ctx->rules.buf[i]->data.rule.name
4742                 );
4743                 if (r != CODE_REACH__ALWAYS_SUCCEED) {
4744                     stream__printf(
4745                         &sstream,
4746                         "L0000:;\n"
4747                         "    ctx->level--;\n"
4748                         "    PCC_DEBUG(ctx->auxil, PCC_DBG_NOMATCH, \"%s\", ctx->level, chunk->pos, (ctx->buffer.buf + chunk->pos), (ctx->cur - chunk->pos));\n"
4749                         "    pcc_thunk_chunk__destroy(ctx, chunk);\n"
4750                         "    return NULL;\n",
4751                         ctx->rules.buf[i]->data.rule.name
4752                     );
4753                 }
4754                 stream__puts(
4755                     &sstream,
4756                     "}\n"
4757                     "\n"
4758                 );
4759             }
4760         }
4761         stream__printf(
4762             &sstream,
4763             "%s_context_t *%s_create(%s%sauxil) {\n",
4764             get_prefix(ctx), get_prefix(ctx),
4765             at, ap ? "" : " "
4766         );
4767         stream__puts(
4768             &sstream,
4769             "    return pcc_context__create(auxil);\n"
4770             "}\n"
4771             "\n"
4772         );
4773         stream__printf(
4774             &sstream,
4775             "int %s_parse(%s_context_t *ctx, %s%s*ret) {\n",
4776             get_prefix(ctx), get_prefix(ctx),
4777             vt, vp ? "" : " "
4778         );
4779         if (ctx->rules.len > 0) {
4780             stream__printf(
4781                 &sstream,
4782                 "    if (pcc_apply_rule(ctx, pcc_evaluate_rule_%s, &ctx->thunks, ret))\n",
4783                 ctx->rules.buf[0]->data.rule.name
4784             );
4785             stream__puts(
4786                 &sstream,
4787                 "        pcc_do_action(ctx, &ctx->thunks, ret);\n"
4788                 "    else\n"
4789                 "        PCC_ERROR(ctx->auxil);\n"
4790                 "    pcc_commit_buffer(ctx);\n"
4791             );
4792         }
4793         stream__puts(
4794             &sstream,
4795             "    pcc_thunk_array__revert(ctx->auxil, &ctx->thunks, 0);\n"
4796             "    return pcc_refill_buffer(ctx, 1) >= 1;\n"
4797             "}\n"
4798             "\n"
4799         );
4800         stream__printf(
4801             &sstream,
4802             "void %s_destroy(%s_context_t *ctx) {\n",
4803             get_prefix(ctx), get_prefix(ctx)
4804         );
4805         stream__puts(
4806             &sstream,
4807             "    pcc_context__destroy(ctx);\n"
4808             "}\n"
4809         );
4810     }
4811     {
4812         stream__puts(
4813             &hstream,
4814             "#ifdef __cplusplus\n"
4815             "extern \"C\" {\n"
4816             "#endif\n"
4817             "\n"
4818         );
4819         stream__printf(
4820             &hstream,
4821             "typedef struct %s_context_tag %s_context_t;\n"
4822             "\n",
4823             get_prefix(ctx), get_prefix(ctx)
4824         );
4825         stream__printf(
4826             &hstream,
4827             "%s_context_t *%s_create(%s%sauxil);\n",
4828             get_prefix(ctx), get_prefix(ctx),
4829             at, ap ? "" : " "
4830         );
4831         stream__printf(
4832             &hstream,
4833             "int %s_parse(%s_context_t *ctx, %s%s*ret);\n",
4834             get_prefix(ctx), get_prefix(ctx),
4835             vt, vp ? "" : " "
4836         );
4837         stream__printf(
4838             &hstream,
4839             "void %s_destroy(%s_context_t *ctx);\n",
4840             get_prefix(ctx), get_prefix(ctx)
4841         );
4842         stream__puts(
4843             &hstream,
4844             "\n"
4845             "#ifdef __cplusplus\n"
4846             "}\n"
4847             "#endif\n"
4848         );
4849         stream__printf(
4850             &hstream,
4851             "\n"
4852             "#endif /* !PCC_INCLUDED_%s */\n",
4853             ctx->hid
4854         );
4855     }
4856     {
4857         match_eol(ctx);
4858         if (!match_eof(ctx)) stream__putc(&sstream, '\n');
4859         commit_buffer(ctx);
4860         if (ctx->opts.lines && !match_eof(ctx))
4861             stream__write_line_directive(&sstream, ctx->iname, ctx->linenum);
4862         while (refill_buffer(ctx, ctx->buffer.max) > 0) {
4863             const size_t n = ctx->buffer.len;
4864             stream__write_text(&sstream, ctx->buffer.buf, (n > 0 && ctx->buffer.buf[n - 1] == '\r') ? n - 1 : n);
4865             ctx->bufcur = n;
4866             commit_buffer(ctx);
4867         }
4868     }
4869     fclose_e(hstream.file);
4870     fclose_e(sstream.file);
4871     if (ctx->errnum) {
4872         unlink(ctx->hname);
4873         unlink(ctx->sname);
4874         return FALSE;
4875     }
4876     return TRUE;
4877 }
4878 
print_version(FILE * output)4879 static void print_version(FILE *output) {
4880     fprintf(output, "%s version %s\n", g_cmdname, VERSION);
4881     fprintf(output, "Copyright (c) 2014, 2019-2022 Arihiro Yoshida. All rights reserved.\n");
4882 }
4883 
print_usage(FILE * output)4884 static void print_usage(FILE *output) {
4885     fprintf(output, "Usage: %s [OPTIONS] [FILE]\n", g_cmdname);
4886     fprintf(output, "Generates a packrat parser for C.\n");
4887     fprintf(output, "\n");
4888     fprintf(output, "  -o BASENAME    specify a base name of output source and header files\n");
4889     fprintf(output, "  -a, --ascii    disable UTF-8 support\n");
4890     fprintf(output, "  -l, --lines    add #line directives\n");
4891     fprintf(output, "  -d, --debug    with debug information\n");
4892     fprintf(output, "  -h, --help     print this help message and exit\n");
4893     fprintf(output, "  -v, --version  print the version and exit\n");
4894 }
4895 
main(int argc,char ** argv)4896 int main(int argc, char **argv) {
4897     const char *iname = NULL;
4898     const char *oname = NULL;
4899     options_t opts;
4900     opts.ascii = FALSE;
4901     opts.lines = FALSE;
4902     opts.debug = FALSE;
4903 #ifdef _MSC_VER
4904 #ifdef _DEBUG
4905     _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
4906     _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE);
4907     _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR);
4908 #endif
4909 #endif
4910     g_cmdname = extract_filename(argv[0]);
4911     {
4912         const char *fname = NULL;
4913         const char *opt_o = NULL;
4914         bool_t opt_a = FALSE;
4915         bool_t opt_l = FALSE;
4916         bool_t opt_d = FALSE;
4917         bool_t opt_h = FALSE;
4918         bool_t opt_v = FALSE;
4919         int i;
4920         for (i = 1; i < argc; i++) {
4921             if (argv[i][0] != '-') {
4922                 break;
4923             }
4924             else if (strcmp(argv[i], "--") == 0) {
4925                 i++; break;
4926             }
4927             else if (argv[i][1] == 'o') {
4928                 const char *const o = (argv[i][2] != '\0') ? argv[i] + 2 : (++i < argc) ?  argv[i] : NULL;
4929                 if (o == NULL) {
4930                     print_error("Output base name missing\n");
4931                     fprintf(stderr, "\n");
4932                     print_usage(stderr);
4933                     exit(1);
4934                 }
4935                 if (opt_o != NULL) {
4936                     print_error("Extra output base name '%s'\n", o);
4937                     fprintf(stderr, "\n");
4938                     print_usage(stderr);
4939                     exit(1);
4940                 }
4941                 opt_o = o;
4942             }
4943             else if (strcmp(argv[i], "-a") == 0 || strcmp(argv[i], "--ascii") == 0) {
4944                 opt_a = TRUE;
4945             }
4946             else if (strcmp(argv[i], "-l") == 0 || strcmp(argv[i], "--lines") == 0) {
4947                 opt_l = TRUE;
4948             }
4949             else if (strcmp(argv[i], "-d") == 0 || strcmp(argv[i], "--debug") == 0) {
4950                 opt_d = TRUE;
4951             }
4952             else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) {
4953                 opt_h = TRUE;
4954             }
4955             else if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--version") == 0) {
4956                 opt_v = TRUE;
4957             }
4958             else {
4959                 print_error("Invalid option '%s'\n", argv[i]);
4960                 fprintf(stderr, "\n");
4961                 print_usage(stderr);
4962                 exit(1);
4963             }
4964         }
4965         switch (argc - i) {
4966         case 0:
4967             break;
4968         case 1:
4969             fname = argv[i];
4970             break;
4971         default:
4972             print_error("Multiple input files\n");
4973             fprintf(stderr, "\n");
4974             print_usage(stderr);
4975             exit(1);
4976         }
4977         if (opt_h || opt_v) {
4978             if (opt_v) print_version(stdout);
4979             if (opt_v && opt_h) fprintf(stdout, "\n");
4980             if (opt_h) print_usage(stdout);
4981             exit(0);
4982         }
4983         iname = (fname != NULL && fname[0] != '\0') ? fname : NULL;
4984         oname = (opt_o != NULL && opt_o[0] != '\0') ? opt_o : NULL;
4985         opts.ascii = opt_a;
4986         opts.lines = opt_l;
4987         opts.debug = opt_d;
4988     }
4989     {
4990         context_t *const ctx = create_context(iname, oname, &opts);
4991         const int b = parse(ctx) && generate(ctx);
4992         destroy_context(ctx);
4993         if (!b) exit(10);
4994     }
4995     return 0;
4996 }
4997