1 /*
2 * Copyright (c) 2003, Peter Strand <peter@zarquon.se>
3 *
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your opinion) any later version.
6 *
7 * This module contains functions for generating tags for Haskell language
8 * files (https://en.wikipedia.org/wiki/Haskell_(programming_language)).
9 *
10 * Does not handle operators or infix definitions like:
11 * a `f` b = ...
12 *
13 */
14
15
16 /*
17 * INCLUDE FILES
18 */
19
20 #include "general.h" /* must always come first */
21
22 #include <string.h>
23
24 #include "parse.h"
25 #include "read.h"
26 #include "vstring.h"
27 #include "routines.h"
28
29
30 /*
31 * DATA DEFINITIONS
32 */
33 typedef enum {
34 K_TYPE, K_CONSTRUCTOR, K_FUNCTION, K_MODULE
35 } haskellKind;
36
37 static kindDefinition HaskellKinds [] = {
38 { true, 't', "type", "types" },
39 { true, 'c', "constructor", "type constructors" },
40 { true, 'f', "function", "functions" },
41 { true, 'm', "module", "modules"}
42 };
43
44
45 typedef const unsigned char *custr;
46
47 /*
48 * FUNCTION DEFINITIONS
49 */
50
51
skip_rest_of_line(void)52 static void skip_rest_of_line(void)
53 {
54 int c;
55 do {
56 c = getcFromInputFile();
57 } while (c != EOF && c != '\n');
58 }
59
get_line(char * buf)60 static int get_line(char *buf)
61 {
62 int i = 0;
63 int c;
64 do {
65 c = getcFromInputFile();
66 buf[i++] = c;
67 } while (c != EOF && c != '\n' && i < 1000);
68 buf[i] = '\0';
69 return i;
70 }
71
get_next_char(void)72 static int get_next_char(void)
73 {
74 int c, nxt;
75 c = getcFromInputFile();
76 if (c == EOF)
77 return c;
78 nxt = getcFromInputFile();
79 if (nxt == EOF)
80 return c;
81 ungetcToInputFile(nxt);
82
83 if (c == '-' && nxt == '-') {
84 skip_rest_of_line();
85 return get_next_char();
86 }
87 if (c == '{' && nxt == '-') {
88 int last = '\0';
89 do {
90 last = c;
91 c = get_next_char();
92 } while (! (c == EOF || (last == '-' && c == '}')));
93 return get_next_char();
94 }
95 return c;
96 }
97
add_tag(const char * token,haskellKind kind,vString * name)98 static void add_tag(const char *token, haskellKind kind, vString *name)
99 {
100 int i;
101 for (i = 0; token[i] != '\0'; ++i)
102 vStringPut(name, token[i]);
103
104 makeSimpleTag(name, kind);
105 vStringClear(name);
106 }
107
isident(char c)108 static int isident(char c)
109 {
110 return isalnum(c) || c == '_' || c == '\'' || c == '$';
111 }
112
get_token(char * token,int n)113 static int get_token(char *token, int n)
114 {
115 int c = getcFromInputFile();
116 int i = n;
117 while (c != EOF && isident(c) && i < 1000) {
118 token[i] = c;
119 i++;
120 c = getcFromInputFile();
121 }
122 token[i] = '\0';
123 if (c == EOF)
124 return 0;
125 if (i != n) {
126 ungetcToInputFile(c);
127 return 1;
128 } else {
129 return 0;
130 }
131 }
132
133 enum Find_State { Find_Eq, Find_Constr, Get_Extr, Find_Extr, Find_Bar };
134
inside_datatype(vString * name)135 static int inside_datatype(vString *name)
136 {
137 enum Find_State st = Find_Eq;
138 int c;
139 char token[1001];
140
141 while (1) {
142 if (st == Find_Eq)
143 {
144 do {
145 c = get_next_char();
146 if (c == '\n') {
147 c = get_next_char();
148 if (! (c == ' ' || c == '\t')) {
149 return c;
150 }
151 }
152 } while (c != EOF && c != '=');
153 st = Find_Constr;
154 }
155 else if (st == Find_Constr)
156 {
157 do {
158 c = get_next_char();
159 } while (isspace(c));
160 if (!isupper(c)) {
161 skip_rest_of_line();
162 return '\n';
163 }
164 token[0] = c;
165 if (!get_token(token, 1))
166 return '\n';
167 add_tag(token, K_CONSTRUCTOR, name);
168 st = Find_Extr;
169 }
170 else if (st == Find_Extr)
171 {
172 c = get_next_char();
173 if (c == '{')
174 st = Get_Extr;
175 else if (c == '|')
176 st = Find_Constr;
177 else if (c == '\n') {
178 c = get_next_char();
179 if (! (c == ' ' || c == '\t')) {
180 return c;
181 }
182 }
183 else if (!isspace(c))
184 st = Find_Bar;
185 }
186 else if (st == Get_Extr)
187 {
188 do {
189 c = get_next_char();
190 } while (isspace(c));
191 if (c == EOF)
192 return c;
193 token[0] = c;
194 get_token(token, 1);
195 add_tag(token, K_FUNCTION, name);
196 do {
197 c = get_next_char();
198 if (c == '}') {
199 st = Find_Bar;
200 break;
201 }
202 } while (c != EOF && c != ',');
203 }
204 else if (st == Find_Bar)
205 {
206 do {
207 c = get_next_char();
208 if (c == '\n') {
209 c = get_next_char();
210 if (! (c == ' ' || c == '\t')) {
211 return c;
212 }
213 }
214 } while (c != EOF && c != '|');
215 st = Find_Constr;
216 }
217 }
218 return '\n';
219 }
220
findHaskellTags(int is_literate)221 static void findHaskellTags (int is_literate)
222 {
223 vString *name = vStringNew ();
224 char token[1001], arg[1001];
225 int c;
226 int in_tex_lit_code = 0;
227 c = get_next_char();
228
229 while (c != EOF)
230 {
231 if (c == '\n') {
232 c = get_next_char();
233 continue;
234 }
235
236 if (isspace(c)) {
237 skip_rest_of_line();
238 c = get_next_char();
239 continue;
240 }
241 if (is_literate && !in_tex_lit_code) {
242 if (c == '>') {
243 c = getcFromInputFile();
244 if (c == ' ') {
245 c = get_next_char();
246 if (!isident(c)) {
247 skip_rest_of_line();
248 c = get_next_char();
249 continue;
250 }
251 } else {
252 skip_rest_of_line();
253 c = get_next_char();
254 continue;
255 }
256 } else if (c == '\\') {
257 int n = get_line(token);
258 if (strncmp(token, "begin{code}", 11) == 0) {
259 in_tex_lit_code = 1;
260 c = get_next_char();
261 continue;
262 } else {
263 if (n > 0 && token[n-1] != '\n')
264 skip_rest_of_line();
265 else
266 c = get_next_char();
267 }
268 continue;
269 } else {
270 skip_rest_of_line();
271 c = get_next_char();
272 continue;
273 }
274 }
275 if (is_literate && in_tex_lit_code && c == '\\') {
276 get_line(token);
277 if (strncmp(token, "end{code}", 9) == 0) {
278 in_tex_lit_code = 0;
279 c = get_next_char();
280 continue;
281 }
282 }
283 token[0] = c;
284 if (!isident(c)) {
285 skip_rest_of_line();
286 c = get_next_char();
287 continue;
288 }
289 if (!get_token(token, 1)) {
290 c = get_next_char();
291 continue;
292 }
293 do {
294 if ((c = getcFromInputFile()) == EOF)
295 return;
296 } while (c == ' ' || c == '\t');
297 arg[0] = c;
298 get_token(arg, 1);
299 if (strcmp(token, "data") == 0 || strcmp(token, "newtype") == 0) {
300 add_tag(arg, K_TYPE, name);
301 c = inside_datatype(name);
302 continue;
303 }
304 if (strcmp(token, "type") == 0)
305 add_tag(arg, K_TYPE, name);
306 else if (strcmp(token, "module") == 0)
307 add_tag(arg, K_MODULE, name);
308 else if (strcmp(token, "instance") == 0 ||
309 strcmp(token, "foreign") == 0 ||
310 strcmp(token, "import") == 0)
311 ;
312 else {
313 if (arg[0] != ':')
314 add_tag(token, K_FUNCTION, name);
315 }
316 skip_rest_of_line();
317 c = get_next_char();
318 }
319 vStringDelete(name);
320 }
321
findNormalHaskellTags(void)322 static void findNormalHaskellTags (void)
323 {
324 findHaskellTags (0);
325 }
326
findLiterateHaskellTags(void)327 static void findLiterateHaskellTags (void)
328 {
329 findHaskellTags (1);
330 }
331
HaskellParser(void)332 extern parserDefinition* HaskellParser (void)
333 {
334 static const char *const extensions [] = { "hs", NULL };
335 parserDefinition* def = parserNew ("Haskell");
336
337 def->kindTable = HaskellKinds;
338 def->kindCount = ARRAY_SIZE(HaskellKinds);
339 def->extensions = extensions;
340 def->parser = findNormalHaskellTags;
341 return def;
342 }
343
LiterateHaskellParser(void)344 extern parserDefinition* LiterateHaskellParser (void)
345 {
346 static const char *const extensions [] = { "lhs", NULL };
347 parserDefinition* def = parserNew ("LiterateHaskell");
348 def->kindTable = HaskellKinds;
349 def->kindCount = ARRAY_SIZE(HaskellKinds);
350 def->extensions = extensions;
351 def->parser = findLiterateHaskellTags;
352 return def;
353 }
354