xref: /Universal-ctags/misc/badinput.c (revision 7963e4b9c75211d36155a586e9b7d9663443009a)
1 /* badinput.c: do bisect-quest to find minimal input which breaks the target command execution
2 
3    Copyright (C) 2014 Masatake YAMATO
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 
18    Build
19    =======================================================================
20 
21 	$ gcc -Wall badinput.c -o badinput
22 
23    Usage
24    =======================================================================
25 
26 	$ badinput CMDLINE_TEMPLATE INPUT OUTPUT
27 
28    Description
29    =======================================================================
30 
31    Consider a situation that a process execve'd from CMDLINE_TEMPLATE crashes or
32    enters into an infinite-loop when the process deals with INPUT file.
33 
34    This program truncates both the head and tail of the INPUT file and
35    runs CMDLINE_TEMPLATE repeatedly till the process exits normally(==
36    0); and reports the shortest input which causes the crash or infinite-loop.
37 
38    Here is an example:
39 
40 	$ misc/badinput "timeout 1 ./ctags -o - --language-force=Ada %s > /dev/null" Test/1880687.js /tmp/output.txt
41 
42    Ada parser of ctags enters an infinite-loop when Test/1880687.js is given.
43    The size of original Test/1880687.js is 2258 bytes.
44 
45 	$ misc/badinput "timeout 1 ./ctags -o - --language-force=Ada %s > /dev/null" Test/1880687.js /tmp/output.txt
46 	[0, 2448]...31744
47 	[0, 0]...0
48 	step(end): 0 [0, 2448]...31744
49 	step(end): 1 [0, 1224]...31744
50 	step(end): 2 [0, 612]...0
51 	step(end): 3 [0, 918]...0
52 	step(end): 4 [0, 1071]...0
53 	step(end): 5 [0, 1147]...31744
54 	step(end): 6 [0, 1109]...0
55 	step(end): 7 [0, 1128]...31744
56 	step(end): 8 [0, 1119]...0
57 	step(end): 9 [0, 1123]...31744
58 	step(end): 10 [0, 1121]...0
59 	step(end): 11 [0, 1122]...31744
60 	step(start): 0 [0, 1122]...31744
61 	step(start): 1 [561, 1122]...31744
62 	step(start): 2 [841, 1122]...31744
63 	step(start): 3 [981, 1122]...31744
64 	step(start): 4 [1051, 1122]...31744
65 	step(start): 5 [1086, 1122]...0
66 	step(start): 6 [1069, 1122]...31744
67 	step(start): 7 [1077, 1122]...31744
68 	step(start): 8 [1081, 1122]...31744
69 	step(start): 9 [1083, 1122]...0
70 	step(start): 10 [1082, 1122]...0
71 	Minimal bad input:
72 	function baz() {
73 	    }
74 	}
75 
76 	function g(
77 	$
78 
79    New shorter input, only 38 bytes, which can reproduce the issue is reported at the end.
80    This new input is useful for debugging.
81 
82    The result is shown in stdout and is recorded to the file specified as OUTPUT. */
83 
84 #define _GNU_SOURCE
85 #include <string.h>
86 #include <stdio.h>
87 #include <stdlib.h>
88 #include <errno.h>
89 
90 #include <sys/types.h>
91 #include <sys/stat.h>
92 #include <fcntl.h>
93 #include <unistd.h>
94 
95 
96 static void
print_help(const char * prog,FILE * fp,int status)97 print_help(const char *prog, FILE *fp, int status)
98 {
99 	fprintf(fp, "Usage:\n");
100 	fprintf(fp, "	%s --help|-h\n", prog);
101 	fprintf(fp, "	%s CMDLINE_TEMPLATE INPUT OUTPUT\n", prog);
102 	exit (status);
103 }
104 
105 static void
load(const char * input_file,char ** input,size_t * len)106 load (const char* input_file, char** input, size_t* len)
107 {
108 	int input_fd;
109 	struct stat stat_buf;
110 
111 	input_fd = open (input_file, O_RDONLY);
112 	if (input_fd < 0)
113 	{
114 		perror ("open(input)");
115 		exit(1);
116 	}
117 
118 	if (fstat(input_fd, &stat_buf) < 0)
119 	{
120 		perror ("fstat");
121 		exit(1);
122 	}
123 
124 	*len = stat_buf.st_size;
125 	*input = malloc (*len);
126 	if (!*input)
127 	{
128 		fprintf(stderr, "memory exhausted\n");
129 		exit (1);
130 	}
131 
132 	if (read (input_fd, *input, *len) != *len)
133 	{
134 		perror ("read");
135 		exit (1);
136 	}
137 }
138 
139 static void
prepare(int output_fd,char * input,size_t len)140 prepare(int output_fd, char * input, size_t len)
141 {
142 	if (lseek (output_fd, 0, SEEK_SET == -1))
143 	{
144 		perror("lseek");
145 		exit (1);
146 	}
147 
148 	if (ftruncate(output_fd, 0) == -1)
149 	{
150 		perror ("truncate");
151 		exit (1);
152 	}
153 
154 	if (write (output_fd, input, len) != len)
155 	{
156 		perror ("write");
157 		exit (1);
158 	}
159 }
160 
161 static int
test(char * cmdline,char * input,off_t start,size_t len,int output_fd)162 test (char* cmdline, char * input, off_t start, size_t len, int output_fd)
163 {
164 	int r;
165 
166 	prepare (output_fd, input + start, len);
167 	fprintf (stderr, "[%lu, %lu]...", start, start + len);
168 	r = system(cmdline);
169 	fprintf(stderr, "%d\n", r);
170 
171 	return r;
172 }
173 
174 static int
bisect(char * cmdline,char * input,size_t len,int output_fd)175 bisect(char* cmdline, char * input, size_t len, int output_fd)
176 {
177 	off_t end;
178 	off_t start;
179 
180 	unsigned int step;
181 	int delta;
182 
183 	off_t failed = len;
184 	off_t successful = 0;
185 
186 	end = len;
187 	failed = len;
188 	successful = 0;
189 	for (step = 0; 1; step++)
190 	{
191 		fprintf(stderr, "step(end): %d ", step);
192 		delta = (len >> (step + 1));
193 		if (delta == 0)
194 			delta = 1;
195 
196 		if (test (cmdline, input, 0, end, output_fd) == 0)
197 		{
198 			successful = end;
199 			if (end + 1 == failed)
200 			{
201 				end = failed;
202 				break;
203 			}
204 			else
205 				end += delta;
206 		}
207 		else
208 		{
209 			failed = end;
210 			if (successful + 1 == end)
211 				break;
212 			else
213 				end -= delta;
214 		}
215 	}
216 
217 	len = end;
218 	start = 0;
219 	failed = 0;
220 	successful = end;
221 	for (step = 0; 1; step++)
222 	{
223 		fprintf(stderr, "step(start): %d ", step);
224 		delta = (len >> (step + 1));
225 		if (delta == 0)
226 			delta = 1;
227 		if (test (cmdline, input, start, end - start, output_fd) == 0)
228 		{
229 			successful = start;
230 			if (start - 1 == failed)
231 			{
232 				start--;
233 				break;
234 			}
235 			else
236 				start -= delta;
237 		}
238 		else
239 		{
240 			failed = start;
241 			if (successful - 1 == start)
242 				break;
243 			else
244 				start += delta;
245 		}
246 
247 	}
248 
249 	len = end - start;
250 	fprintf(stderr, "Minimal bad input:\n");
251 	fwrite(input + start, 1, len, stdout);
252 	prepare (output_fd, input + start, len);
253 	printf("\n");
254 
255 	return 0;
256 }
257 
258 int
main(int argc,char ** argv)259 main(int argc, char** argv)
260 {
261 	char* cmdline_template;
262 	char* input_file;
263 	char* output_file;
264 
265 	char* cmdline;
266 	char * input;
267 	size_t len;
268 	int output_fd;
269 
270 
271 	if (argc == 2
272 	    && ((!strcmp(argv[2], "--help"))
273 		|| (!strcmp(argv[2], "-h"))))
274 		print_help(argv[0], stdout, 0);
275 	else if (argc != 4)
276 	{
277 		fprintf(stderr,"wrong number of arguments\n");
278 		exit (1);
279 	}
280 
281 	cmdline_template = argv[1];
282 	input_file = argv[2];
283 	output_file = argv[3];
284 
285 	if (!strstr (cmdline_template, "%s"))
286 	{
287 		fprintf(stderr, "no %%s is found in command line template\n");
288 		exit (1);
289 	}
290 
291 	load (input_file, &input, &len);
292 
293 	output_fd = open (output_file, O_WRONLY|O_CREAT, 0666);
294 	if (output_fd < 0)
295 	{
296 		perror ("open(output)");
297 		exit (1);
298 	}
299 
300 	if (asprintf (&cmdline, cmdline_template, output_file) == -1)
301 	{
302 		fprintf(stderr, "error in asprintf\n");
303 		exit (1);
304 	}
305 
306 	if (test (cmdline, input, 0, len, output_fd) == 0)
307 	{
308 		fprintf(stderr, "the target command line exits normally against the original input\n");
309 		exit (1);
310 	}
311 
312 	if (test (cmdline, input, 0, 0, output_fd) != 0)
313 	{
314 		fprintf(stderr, "the target command line exits normally against the empty input\n");
315 		exit (1);
316 	}
317 
318 	return bisect(cmdline, input, len, output_fd);
319 }
320