1*e64d7b88SJiří Techet /*
2*e64d7b88SJiří Techet * Copyright (c) 2003, Peter Strand <peter@zarquon.se>
3*e64d7b88SJiří Techet *
4*e64d7b88SJiří Techet * This source code is released for free distribution under the terms of the
5*e64d7b88SJiří Techet * GNU General Public License version 2 or (at your opinion) any later version.
6*e64d7b88SJiří Techet *
7*e64d7b88SJiří Techet * This module contains functions for generating tags for Haskell language
8*e64d7b88SJiří Techet * files (https://en.wikipedia.org/wiki/Haskell_(programming_language)).
9*e64d7b88SJiří Techet *
10*e64d7b88SJiří Techet * Does not handle operators or infix definitions like:
11*e64d7b88SJiří Techet * a `f` b = ...
12*e64d7b88SJiří Techet *
13*e64d7b88SJiří Techet */
14*e64d7b88SJiří Techet
15*e64d7b88SJiří Techet
16*e64d7b88SJiří Techet /*
17*e64d7b88SJiří Techet * INCLUDE FILES
18*e64d7b88SJiří Techet */
19*e64d7b88SJiří Techet
20*e64d7b88SJiří Techet #include "general.h" /* must always come first */
21*e64d7b88SJiří Techet
22*e64d7b88SJiří Techet #include <string.h>
23*e64d7b88SJiří Techet
24*e64d7b88SJiří Techet #include "parse.h"
25*e64d7b88SJiří Techet #include "read.h"
26*e64d7b88SJiří Techet #include "vstring.h"
27*e64d7b88SJiří Techet #include "routines.h"
28*e64d7b88SJiří Techet
29*e64d7b88SJiří Techet
30*e64d7b88SJiří Techet /*
31*e64d7b88SJiří Techet * DATA DEFINITIONS
32*e64d7b88SJiří Techet */
33*e64d7b88SJiří Techet typedef enum {
34*e64d7b88SJiří Techet K_TYPE, K_CONSTRUCTOR, K_FUNCTION, K_MODULE
35*e64d7b88SJiří Techet } haskellKind;
36*e64d7b88SJiří Techet
37*e64d7b88SJiří Techet static kindDefinition HaskellKinds [] = {
38*e64d7b88SJiří Techet { true, 't', "type", "types" },
39*e64d7b88SJiří Techet { true, 'c', "constructor", "type constructors" },
40*e64d7b88SJiří Techet { true, 'f', "function", "functions" },
41*e64d7b88SJiří Techet { true, 'm', "module", "modules"}
42*e64d7b88SJiří Techet };
43*e64d7b88SJiří Techet
44*e64d7b88SJiří Techet
45*e64d7b88SJiří Techet typedef const unsigned char *custr;
46*e64d7b88SJiří Techet
47*e64d7b88SJiří Techet /*
48*e64d7b88SJiří Techet * FUNCTION DEFINITIONS
49*e64d7b88SJiří Techet */
50*e64d7b88SJiří Techet
51*e64d7b88SJiří Techet
skip_rest_of_line(void)52*e64d7b88SJiří Techet static void skip_rest_of_line(void)
53*e64d7b88SJiří Techet {
54*e64d7b88SJiří Techet int c;
55*e64d7b88SJiří Techet do {
56*e64d7b88SJiří Techet c = getcFromInputFile();
57*e64d7b88SJiří Techet } while (c != EOF && c != '\n');
58*e64d7b88SJiří Techet }
59*e64d7b88SJiří Techet
get_line(char * buf)60*e64d7b88SJiří Techet static int get_line(char *buf)
61*e64d7b88SJiří Techet {
62*e64d7b88SJiří Techet int i = 0;
63*e64d7b88SJiří Techet int c;
64*e64d7b88SJiří Techet do {
65*e64d7b88SJiří Techet c = getcFromInputFile();
66*e64d7b88SJiří Techet buf[i++] = c;
67*e64d7b88SJiří Techet } while (c != EOF && c != '\n' && i < 1000);
68*e64d7b88SJiří Techet buf[i] = '\0';
69*e64d7b88SJiří Techet return i;
70*e64d7b88SJiří Techet }
71*e64d7b88SJiří Techet
get_next_char(void)72*e64d7b88SJiří Techet static int get_next_char(void)
73*e64d7b88SJiří Techet {
74*e64d7b88SJiří Techet int c, nxt;
75*e64d7b88SJiří Techet c = getcFromInputFile();
76*e64d7b88SJiří Techet if (c == EOF)
77*e64d7b88SJiří Techet return c;
78*e64d7b88SJiří Techet nxt = getcFromInputFile();
79*e64d7b88SJiří Techet if (nxt == EOF)
80*e64d7b88SJiří Techet return c;
81*e64d7b88SJiří Techet ungetcToInputFile(nxt);
82*e64d7b88SJiří Techet
83*e64d7b88SJiří Techet if (c == '-' && nxt == '-') {
84*e64d7b88SJiří Techet skip_rest_of_line();
85*e64d7b88SJiří Techet return get_next_char();
86*e64d7b88SJiří Techet }
87*e64d7b88SJiří Techet if (c == '{' && nxt == '-') {
88*e64d7b88SJiří Techet int last = '\0';
89*e64d7b88SJiří Techet do {
90*e64d7b88SJiří Techet last = c;
91*e64d7b88SJiří Techet c = get_next_char();
92*e64d7b88SJiří Techet } while (! (c == EOF || (last == '-' && c == '}')));
93*e64d7b88SJiří Techet return get_next_char();
94*e64d7b88SJiří Techet }
95*e64d7b88SJiří Techet return c;
96*e64d7b88SJiří Techet }
97*e64d7b88SJiří Techet
add_tag(const char * token,haskellKind kind,vString * name)98*e64d7b88SJiří Techet static void add_tag(const char *token, haskellKind kind, vString *name)
99*e64d7b88SJiří Techet {
100*e64d7b88SJiří Techet int i;
101*e64d7b88SJiří Techet for (i = 0; token[i] != '\0'; ++i)
102*e64d7b88SJiří Techet vStringPut(name, token[i]);
103*e64d7b88SJiří Techet
104*e64d7b88SJiří Techet makeSimpleTag(name, kind);
105*e64d7b88SJiří Techet vStringClear(name);
106*e64d7b88SJiří Techet }
107*e64d7b88SJiří Techet
isident(char c)108*e64d7b88SJiří Techet static int isident(char c)
109*e64d7b88SJiří Techet {
110*e64d7b88SJiří Techet return isalnum(c) || c == '_' || c == '\'' || c == '$';
111*e64d7b88SJiří Techet }
112*e64d7b88SJiří Techet
get_token(char * token,int n)113*e64d7b88SJiří Techet static int get_token(char *token, int n)
114*e64d7b88SJiří Techet {
115*e64d7b88SJiří Techet int c = getcFromInputFile();
116*e64d7b88SJiří Techet int i = n;
117*e64d7b88SJiří Techet while (c != EOF && isident(c) && i < 1000) {
118*e64d7b88SJiří Techet token[i] = c;
119*e64d7b88SJiří Techet i++;
120*e64d7b88SJiří Techet c = getcFromInputFile();
121*e64d7b88SJiří Techet }
122*e64d7b88SJiří Techet token[i] = '\0';
123*e64d7b88SJiří Techet if (c == EOF)
124*e64d7b88SJiří Techet return 0;
125*e64d7b88SJiří Techet if (i != n) {
126*e64d7b88SJiří Techet ungetcToInputFile(c);
127*e64d7b88SJiří Techet return 1;
128*e64d7b88SJiří Techet } else {
129*e64d7b88SJiří Techet return 0;
130*e64d7b88SJiří Techet }
131*e64d7b88SJiří Techet }
132*e64d7b88SJiří Techet
133*e64d7b88SJiří Techet enum Find_State { Find_Eq, Find_Constr, Get_Extr, Find_Extr, Find_Bar };
134*e64d7b88SJiří Techet
inside_datatype(vString * name)135*e64d7b88SJiří Techet static int inside_datatype(vString *name)
136*e64d7b88SJiří Techet {
137*e64d7b88SJiří Techet enum Find_State st = Find_Eq;
138*e64d7b88SJiří Techet int c;
139*e64d7b88SJiří Techet char token[1001];
140*e64d7b88SJiří Techet
141*e64d7b88SJiří Techet while (1) {
142*e64d7b88SJiří Techet if (st == Find_Eq)
143*e64d7b88SJiří Techet {
144*e64d7b88SJiří Techet do {
145*e64d7b88SJiří Techet c = get_next_char();
146*e64d7b88SJiří Techet if (c == '\n') {
147*e64d7b88SJiří Techet c = get_next_char();
148*e64d7b88SJiří Techet if (! (c == ' ' || c == '\t')) {
149*e64d7b88SJiří Techet return c;
150*e64d7b88SJiří Techet }
151*e64d7b88SJiří Techet }
152*e64d7b88SJiří Techet } while (c != EOF && c != '=');
153*e64d7b88SJiří Techet st = Find_Constr;
154*e64d7b88SJiří Techet }
155*e64d7b88SJiří Techet else if (st == Find_Constr)
156*e64d7b88SJiří Techet {
157*e64d7b88SJiří Techet do {
158*e64d7b88SJiří Techet c = get_next_char();
159*e64d7b88SJiří Techet } while (isspace(c));
160*e64d7b88SJiří Techet if (!isupper(c)) {
161*e64d7b88SJiří Techet skip_rest_of_line();
162*e64d7b88SJiří Techet return '\n';
163*e64d7b88SJiří Techet }
164*e64d7b88SJiří Techet token[0] = c;
165*e64d7b88SJiří Techet if (!get_token(token, 1))
166*e64d7b88SJiří Techet return '\n';
167*e64d7b88SJiří Techet add_tag(token, K_CONSTRUCTOR, name);
168*e64d7b88SJiří Techet st = Find_Extr;
169*e64d7b88SJiří Techet }
170*e64d7b88SJiří Techet else if (st == Find_Extr)
171*e64d7b88SJiří Techet {
172*e64d7b88SJiří Techet c = get_next_char();
173*e64d7b88SJiří Techet if (c == '{')
174*e64d7b88SJiří Techet st = Get_Extr;
175*e64d7b88SJiří Techet else if (c == '|')
176*e64d7b88SJiří Techet st = Find_Constr;
177*e64d7b88SJiří Techet else if (c == '\n') {
178*e64d7b88SJiří Techet c = get_next_char();
179*e64d7b88SJiří Techet if (! (c == ' ' || c == '\t')) {
180*e64d7b88SJiří Techet return c;
181*e64d7b88SJiří Techet }
182*e64d7b88SJiří Techet }
183*e64d7b88SJiří Techet else if (!isspace(c))
184*e64d7b88SJiří Techet st = Find_Bar;
185*e64d7b88SJiří Techet }
186*e64d7b88SJiří Techet else if (st == Get_Extr)
187*e64d7b88SJiří Techet {
188*e64d7b88SJiří Techet do {
189*e64d7b88SJiří Techet c = get_next_char();
190*e64d7b88SJiří Techet } while (isspace(c));
191*e64d7b88SJiří Techet if (c == EOF)
192*e64d7b88SJiří Techet return c;
193*e64d7b88SJiří Techet token[0] = c;
194*e64d7b88SJiří Techet get_token(token, 1);
195*e64d7b88SJiří Techet add_tag(token, K_FUNCTION, name);
196*e64d7b88SJiří Techet do {
197*e64d7b88SJiří Techet c = get_next_char();
198*e64d7b88SJiří Techet if (c == '}') {
199*e64d7b88SJiří Techet st = Find_Bar;
200*e64d7b88SJiří Techet break;
201*e64d7b88SJiří Techet }
202*e64d7b88SJiří Techet } while (c != EOF && c != ',');
203*e64d7b88SJiří Techet }
204*e64d7b88SJiří Techet else if (st == Find_Bar)
205*e64d7b88SJiří Techet {
206*e64d7b88SJiří Techet do {
207*e64d7b88SJiří Techet c = get_next_char();
208*e64d7b88SJiří Techet if (c == '\n') {
209*e64d7b88SJiří Techet c = get_next_char();
210*e64d7b88SJiří Techet if (! (c == ' ' || c == '\t')) {
211*e64d7b88SJiří Techet return c;
212*e64d7b88SJiří Techet }
213*e64d7b88SJiří Techet }
214*e64d7b88SJiří Techet } while (c != EOF && c != '|');
215*e64d7b88SJiří Techet st = Find_Constr;
216*e64d7b88SJiří Techet }
217*e64d7b88SJiří Techet }
218*e64d7b88SJiří Techet return '\n';
219*e64d7b88SJiří Techet }
220*e64d7b88SJiří Techet
findHaskellTags(int is_literate)221*e64d7b88SJiří Techet static void findHaskellTags (int is_literate)
222*e64d7b88SJiří Techet {
223*e64d7b88SJiří Techet vString *name = vStringNew ();
224*e64d7b88SJiří Techet char token[1001], arg[1001];
225*e64d7b88SJiří Techet int c;
226*e64d7b88SJiří Techet int in_tex_lit_code = 0;
227*e64d7b88SJiří Techet c = get_next_char();
228*e64d7b88SJiří Techet
229*e64d7b88SJiří Techet while (c != EOF)
230*e64d7b88SJiří Techet {
231*e64d7b88SJiří Techet if (c == '\n') {
232*e64d7b88SJiří Techet c = get_next_char();
233*e64d7b88SJiří Techet continue;
234*e64d7b88SJiří Techet }
235*e64d7b88SJiří Techet
236*e64d7b88SJiří Techet if (isspace(c)) {
237*e64d7b88SJiří Techet skip_rest_of_line();
238*e64d7b88SJiří Techet c = get_next_char();
239*e64d7b88SJiří Techet continue;
240*e64d7b88SJiří Techet }
241*e64d7b88SJiří Techet if (is_literate && !in_tex_lit_code) {
242*e64d7b88SJiří Techet if (c == '>') {
243*e64d7b88SJiří Techet c = getcFromInputFile();
244*e64d7b88SJiří Techet if (c == ' ') {
245*e64d7b88SJiří Techet c = get_next_char();
246*e64d7b88SJiří Techet if (!isident(c)) {
247*e64d7b88SJiří Techet skip_rest_of_line();
248*e64d7b88SJiří Techet c = get_next_char();
249*e64d7b88SJiří Techet continue;
250*e64d7b88SJiří Techet }
251*e64d7b88SJiří Techet } else {
252*e64d7b88SJiří Techet skip_rest_of_line();
253*e64d7b88SJiří Techet c = get_next_char();
254*e64d7b88SJiří Techet continue;
255*e64d7b88SJiří Techet }
256*e64d7b88SJiří Techet } else if (c == '\\') {
257*e64d7b88SJiří Techet int n = get_line(token);
258*e64d7b88SJiří Techet if (strncmp(token, "begin{code}", 11) == 0) {
259*e64d7b88SJiří Techet in_tex_lit_code = 1;
260*e64d7b88SJiří Techet c = get_next_char();
261*e64d7b88SJiří Techet continue;
262*e64d7b88SJiří Techet } else {
263*e64d7b88SJiří Techet if (n > 0 && token[n-1] != '\n')
264*e64d7b88SJiří Techet skip_rest_of_line();
265*e64d7b88SJiří Techet else
266*e64d7b88SJiří Techet c = get_next_char();
267*e64d7b88SJiří Techet }
268*e64d7b88SJiří Techet continue;
269*e64d7b88SJiří Techet } else {
270*e64d7b88SJiří Techet skip_rest_of_line();
271*e64d7b88SJiří Techet c = get_next_char();
272*e64d7b88SJiří Techet continue;
273*e64d7b88SJiří Techet }
274*e64d7b88SJiří Techet }
275*e64d7b88SJiří Techet if (is_literate && in_tex_lit_code && c == '\\') {
276*e64d7b88SJiří Techet get_line(token);
277*e64d7b88SJiří Techet if (strncmp(token, "end{code}", 9) == 0) {
278*e64d7b88SJiří Techet in_tex_lit_code = 0;
279*e64d7b88SJiří Techet c = get_next_char();
280*e64d7b88SJiří Techet continue;
281*e64d7b88SJiří Techet }
282*e64d7b88SJiří Techet }
283*e64d7b88SJiří Techet token[0] = c;
284*e64d7b88SJiří Techet if (!isident(c)) {
285*e64d7b88SJiří Techet skip_rest_of_line();
286*e64d7b88SJiří Techet c = get_next_char();
287*e64d7b88SJiří Techet continue;
288*e64d7b88SJiří Techet }
289*e64d7b88SJiří Techet if (!get_token(token, 1)) {
290*e64d7b88SJiří Techet c = get_next_char();
291*e64d7b88SJiří Techet continue;
292*e64d7b88SJiří Techet }
293*e64d7b88SJiří Techet do {
294*e64d7b88SJiří Techet if ((c = getcFromInputFile()) == EOF)
295*e64d7b88SJiří Techet return;
296*e64d7b88SJiří Techet } while (c == ' ' || c == '\t');
297*e64d7b88SJiří Techet arg[0] = c;
298*e64d7b88SJiří Techet get_token(arg, 1);
299*e64d7b88SJiří Techet if (strcmp(token, "data") == 0 || strcmp(token, "newtype") == 0) {
300*e64d7b88SJiří Techet add_tag(arg, K_TYPE, name);
301*e64d7b88SJiří Techet c = inside_datatype(name);
302*e64d7b88SJiří Techet continue;
303*e64d7b88SJiří Techet }
304*e64d7b88SJiří Techet if (strcmp(token, "type") == 0)
305*e64d7b88SJiří Techet add_tag(arg, K_TYPE, name);
306*e64d7b88SJiří Techet else if (strcmp(token, "module") == 0)
307*e64d7b88SJiří Techet add_tag(arg, K_MODULE, name);
308*e64d7b88SJiří Techet else if (strcmp(token, "instance") == 0 ||
309*e64d7b88SJiří Techet strcmp(token, "foreign") == 0 ||
310*e64d7b88SJiří Techet strcmp(token, "import") == 0)
311*e64d7b88SJiří Techet ;
312*e64d7b88SJiří Techet else {
313*e64d7b88SJiří Techet if (arg[0] != ':')
314*e64d7b88SJiří Techet add_tag(token, K_FUNCTION, name);
315*e64d7b88SJiří Techet }
316*e64d7b88SJiří Techet skip_rest_of_line();
317*e64d7b88SJiří Techet c = get_next_char();
318*e64d7b88SJiří Techet }
319*e64d7b88SJiří Techet vStringDelete(name);
320*e64d7b88SJiří Techet }
321*e64d7b88SJiří Techet
findNormalHaskellTags(void)322*e64d7b88SJiří Techet static void findNormalHaskellTags (void)
323*e64d7b88SJiří Techet {
324*e64d7b88SJiří Techet findHaskellTags (0);
325*e64d7b88SJiří Techet }
326*e64d7b88SJiří Techet
findLiterateHaskellTags(void)327*e64d7b88SJiří Techet static void findLiterateHaskellTags (void)
328*e64d7b88SJiří Techet {
329*e64d7b88SJiří Techet findHaskellTags (1);
330*e64d7b88SJiří Techet }
331*e64d7b88SJiří Techet
HaskellParser(void)332*e64d7b88SJiří Techet extern parserDefinition* HaskellParser (void)
333*e64d7b88SJiří Techet {
334*e64d7b88SJiří Techet static const char *const extensions [] = { "hs", NULL };
335*e64d7b88SJiří Techet parserDefinition* def = parserNew ("Haskell");
336*e64d7b88SJiří Techet
337*e64d7b88SJiří Techet def->kindTable = HaskellKinds;
338*e64d7b88SJiří Techet def->kindCount = ARRAY_SIZE(HaskellKinds);
339*e64d7b88SJiří Techet def->extensions = extensions;
340*e64d7b88SJiří Techet def->parser = findNormalHaskellTags;
341*e64d7b88SJiří Techet return def;
342*e64d7b88SJiří Techet }
343*e64d7b88SJiří Techet
LiterateHaskellParser(void)344*e64d7b88SJiří Techet extern parserDefinition* LiterateHaskellParser (void)
345*e64d7b88SJiří Techet {
346*e64d7b88SJiří Techet static const char *const extensions [] = { "lhs", NULL };
347*e64d7b88SJiří Techet parserDefinition* def = parserNew ("LiterateHaskell");
348*e64d7b88SJiří Techet def->kindTable = HaskellKinds;
349*e64d7b88SJiří Techet def->kindCount = ARRAY_SIZE(HaskellKinds);
350*e64d7b88SJiří Techet def->extensions = extensions;
351*e64d7b88SJiří Techet def->parser = findLiterateHaskellTags;
352*e64d7b88SJiří Techet return def;
353*e64d7b88SJiří Techet }
354