xref: /Universal-ctags/parsers/haskell.c (revision e64d7b88423cb8f499e9b3fd101a24155747b961)
1 /*
2 * Copyright (c) 2003, Peter Strand <peter@zarquon.se>
3 *
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your opinion) any later version.
6 *
7 * This module contains functions for generating tags for Haskell language
8 * files (https://en.wikipedia.org/wiki/Haskell_(programming_language)).
9 *
10 * Does not handle operators or infix definitions like:
11 * a `f` b = ...
12 *
13 */
14 
15 
16 /*
17 *   INCLUDE FILES
18 */
19 
20 #include "general.h"    /* must always come first */
21 
22 #include <string.h>
23 
24 #include "parse.h"
25 #include "read.h"
26 #include "vstring.h"
27 #include "routines.h"
28 
29 
30 /*
31 *   DATA DEFINITIONS
32 */
33 typedef enum {
34 	K_TYPE, K_CONSTRUCTOR, K_FUNCTION, K_MODULE
35 } haskellKind;
36 
37 static kindDefinition HaskellKinds [] = {
38 	{ true, 't', "type", "types" },
39 	{ true, 'c', "constructor", "type constructors" },
40 	{ true, 'f', "function", "functions" },
41 	{ true, 'm', "module", "modules"}
42 };
43 
44 
45 typedef const unsigned char *custr;
46 
47 /*
48 *   FUNCTION DEFINITIONS
49 */
50 
51 
skip_rest_of_line(void)52 static void skip_rest_of_line(void)
53 {
54 	int c;
55 	do {
56 		c = getcFromInputFile();
57 	} while (c != EOF && c != '\n');
58 }
59 
get_line(char * buf)60 static int get_line(char *buf)
61 {
62 	int i = 0;
63 	int c;
64 	do {
65 		c = getcFromInputFile();
66 		buf[i++] = c;
67 	} while (c != EOF && c != '\n' && i < 1000);
68 	buf[i] = '\0';
69 	return i;
70 }
71 
get_next_char(void)72 static int get_next_char(void)
73 {
74 	int c, nxt;
75 	c = getcFromInputFile();
76 	if (c == EOF)
77 		return c;
78 	nxt = getcFromInputFile();
79 	if (nxt == EOF)
80 		return c;
81 	ungetcToInputFile(nxt);
82 
83 	if (c == '-' && nxt == '-') {
84 		skip_rest_of_line();
85 		return get_next_char();
86 	}
87 	if (c == '{' && nxt == '-') {
88 		int last = '\0';
89 		do {
90 			last = c;
91 			c = get_next_char();
92 		} while (! (c == EOF || (last == '-' && c == '}')));
93 		return get_next_char();
94 	}
95 	return c;
96 }
97 
add_tag(const char * token,haskellKind kind,vString * name)98 static void add_tag(const char *token, haskellKind kind, vString *name)
99 {
100 	int i;
101 	for (i = 0; token[i] != '\0'; ++i)
102 		vStringPut(name, token[i]);
103 
104 	makeSimpleTag(name, kind);
105 	vStringClear(name);
106 }
107 
isident(char c)108 static int isident(char c)
109 {
110 	return isalnum(c) || c == '_' || c == '\'' || c == '$';
111 }
112 
get_token(char * token,int n)113 static int get_token(char *token, int n)
114 {
115 	int c = getcFromInputFile();
116 	int i = n;
117 	while (c != EOF && isident(c) && i < 1000) {
118 		token[i] = c;
119 		i++;
120 		c = getcFromInputFile();
121 	}
122 	token[i] = '\0';
123 	if (c == EOF)
124 		return 0;
125 	if (i != n) {
126 		ungetcToInputFile(c);
127 		return 1;
128 	} else {
129 		return 0;
130 	}
131 }
132 
133 enum Find_State { Find_Eq, Find_Constr, Get_Extr, Find_Extr, Find_Bar };
134 
inside_datatype(vString * name)135 static int inside_datatype(vString *name)
136 {
137 	enum Find_State st = Find_Eq;
138 	int c;
139 	char token[1001];
140 
141 	while (1) {
142 		if (st == Find_Eq)
143 		{
144 			do {
145 				c = get_next_char();
146 				if (c == '\n') {
147 					c = get_next_char();
148 					if (! (c == ' ' || c == '\t')) {
149 						return c;
150 					}
151 				}
152 			} while (c != EOF && c != '=');
153 			st = Find_Constr;
154 		}
155 		else if (st == Find_Constr)
156 		{
157 			do {
158 				c = get_next_char();
159 			} while (isspace(c));
160 			if (!isupper(c)) {
161 				skip_rest_of_line();
162 				return '\n';
163 			}
164 			token[0] = c;
165 			if (!get_token(token, 1))
166 				return '\n';
167 			add_tag(token, K_CONSTRUCTOR, name);
168 			st = Find_Extr;
169 		}
170 		else if (st == Find_Extr)
171 		{
172 			c = get_next_char();
173 			if (c == '{')
174 				st = Get_Extr;
175 			else if (c == '|')
176 				st = Find_Constr;
177 			else if (c == '\n') {
178 				c = get_next_char();
179 				if (! (c == ' ' || c == '\t')) {
180 					return c;
181 				}
182 			}
183 			else if (!isspace(c))
184 				st = Find_Bar;
185 		}
186 		else if (st == Get_Extr)
187 		{
188 			do {
189 				c = get_next_char();
190 			} while (isspace(c));
191 			if (c == EOF)
192 				return c;
193 			token[0] = c;
194 			get_token(token, 1);
195 			add_tag(token, K_FUNCTION, name);
196 			do {
197 				c = get_next_char();
198 				if (c == '}') {
199 					st = Find_Bar;
200 					break;
201 				}
202 			} while (c != EOF && c != ',');
203 		}
204 		else if (st == Find_Bar)
205 		{
206 			do {
207 				c = get_next_char();
208 				if (c == '\n') {
209 					c = get_next_char();
210 					if (! (c == ' ' || c == '\t')) {
211 						return c;
212 					}
213 				}
214 			} while (c != EOF && c != '|');
215 			st = Find_Constr;
216 		}
217 	}
218 	return '\n';
219 }
220 
findHaskellTags(int is_literate)221 static void findHaskellTags (int is_literate)
222 {
223 	vString *name = vStringNew ();
224 	char token[1001], arg[1001];
225 	int c;
226 	int in_tex_lit_code = 0;
227 	c = get_next_char();
228 
229 	while (c != EOF)
230 	{
231 		if (c == '\n') {
232 			c = get_next_char();
233 			continue;
234 		}
235 
236 		if (isspace(c)) {
237 			skip_rest_of_line();
238 			c = get_next_char();
239 			continue;
240 		}
241 		if (is_literate && !in_tex_lit_code) {
242 			if (c == '>') {
243 				c = getcFromInputFile();
244 				if (c == ' ') {
245 					c = get_next_char();
246 					if (!isident(c)) {
247 						skip_rest_of_line();
248 						c = get_next_char();
249 						continue;
250 					}
251 				} else {
252 					skip_rest_of_line();
253 					c = get_next_char();
254 					continue;
255 				}
256 			} else if (c == '\\') {
257 				int n = get_line(token);
258 				if (strncmp(token, "begin{code}", 11) == 0) {
259 					in_tex_lit_code = 1;
260 					c = get_next_char();
261 					continue;
262 				} else {
263 					if (n > 0 && token[n-1] != '\n')
264 						skip_rest_of_line();
265 					else
266 						c = get_next_char();
267 				}
268 				continue;
269 			} else {
270 				skip_rest_of_line();
271 				c = get_next_char();
272 				continue;
273 			}
274 		}
275 		if (is_literate && in_tex_lit_code && c == '\\') {
276 			get_line(token);
277 			if (strncmp(token, "end{code}", 9) == 0) {
278 				in_tex_lit_code = 0;
279 				c = get_next_char();
280 				continue;
281 			}
282 		}
283 		token[0] = c;
284 		if (!isident(c)) {
285 			skip_rest_of_line();
286 			c = get_next_char();
287 			continue;
288 		}
289 		if (!get_token(token, 1)) {
290 			c = get_next_char();
291 			continue;
292 		}
293 		do {
294 			if ((c = getcFromInputFile()) == EOF)
295 				return;
296 		} while (c == ' ' || c == '\t');
297 		arg[0] = c;
298 		get_token(arg, 1);
299 		if (strcmp(token, "data") == 0 || strcmp(token, "newtype") == 0) {
300 			add_tag(arg, K_TYPE, name);
301 			c = inside_datatype(name);
302 			continue;
303 		}
304 		if (strcmp(token, "type") == 0)
305 			add_tag(arg, K_TYPE, name);
306 		else if (strcmp(token, "module") == 0)
307 			add_tag(arg, K_MODULE, name);
308 		else if (strcmp(token, "instance") == 0 ||
309 				 strcmp(token, "foreign") == 0 ||
310 				 strcmp(token, "import") == 0)
311 			;
312 		else {
313 			if (arg[0] != ':')
314 				add_tag(token, K_FUNCTION, name);
315 		}
316 		skip_rest_of_line();
317 		c = get_next_char();
318 	}
319 	vStringDelete(name);
320 }
321 
findNormalHaskellTags(void)322 static void findNormalHaskellTags (void)
323 {
324 	findHaskellTags (0);
325 }
326 
findLiterateHaskellTags(void)327 static void findLiterateHaskellTags (void)
328 {
329 	findHaskellTags (1);
330 }
331 
HaskellParser(void)332 extern parserDefinition* HaskellParser (void)
333 {
334 	static const char *const extensions [] = { "hs", NULL };
335 	parserDefinition* def  = parserNew ("Haskell");
336 
337 	def->kindTable  = HaskellKinds;
338 	def->kindCount  = ARRAY_SIZE(HaskellKinds);
339 	def->extensions = extensions;
340 	def->parser     = findNormalHaskellTags;
341 	return def;
342 }
343 
LiterateHaskellParser(void)344 extern parserDefinition* LiterateHaskellParser (void)
345 {
346 	static const char *const extensions [] = { "lhs", NULL };
347 	parserDefinition* def = parserNew ("LiterateHaskell");
348 	def->kindTable  = HaskellKinds;
349 	def->kindCount  = ARRAY_SIZE(HaskellKinds);
350 	def->extensions = extensions;
351 	def->parser     = findLiterateHaskellTags;
352 	return def;
353 }
354