xref: /Universal-ctags/parsers/cobol.c (revision bcc2ead480f5ab6b9974f8441d08fd601d36d4bf)
1 /*
2 *   Copyright (c) 2000-2003, Darren Hiebert
3 *
4 *   This source code is released for free distribution under the terms of the
5 *   GNU General Public License version 2 or (at your option) any later version.
6 *
7 *   This module contains functions for generating tags for COBOL language
8 *   files.
9 */
10 
11 /* Some references:
12  * - https://www.cs.vu.nl/grammarware/browsable/cobol/
13  * - https://www.cs.vu.nl/grammarware/browsable/vs-cobol-ii/
14  * - https://open-cobol.sourceforge.io/guides/grammar.pdf
15  * - http://mapage.noos.fr/~bpinon/a_cobol_parser.htm
16  * - https://en.wikipedia.org/wiki/COBOL
17  */
18 
19 /*
20 *   INCLUDE FILES
21 */
22 #include "general.h"	/* must always come first */
23 #include "debug.h"
24 #include "entry.h"
25 #include "keyword.h"
26 #include "nestlevel.h"
27 #include "parse.h"
28 #include "read.h"
29 #include "routines.h"
30 
31 typedef enum {
32 	K_FILE,
33 	K_GROUP,
34 	K_PROGRAM,
35 	K_SECTION,
36 	K_DIVISION,
37 	K_PARAGRAPH,
38 	K_DATA,
39 	K_SOURCEFILE,
40 } cobolKind;
41 
42 typedef enum {
43 	COBOL_SOURCEFILE_COPIED,
44 } cobolSourcefileRole;
45 
46 static roleDefinition CobolSourcefileRoles [] = {
47 	{ true, "copied", "copied in source file" },
48 };
49 
50 static kindDefinition CobolKinds[] = {
51 	{ true, 'f', "fd", "file descriptions (FD, SD, RD)" },
52 	{ true, 'g', "group", "group items" },
53 	{ true, 'P', "program", "program ids" },
54 	{ true, 's', "section", "sections" },
55 	{ true, 'D', "division", "divisions" },
56 	{ true, 'p', "paragraph", "paragraphs" },
57 	{ true, 'd', "data", "data items"      },
58 	{ true, 'S', "sourcefile", "source code file",
59 	  .referenceOnly = true, ATTACH_ROLES(CobolSourcefileRoles)},
60 };
61 
62 static langType Lang_cobol;
63 
64 enum {
65 	KEYWORD_FD,
66 	KEYWORD_SD,
67 	KEYWORD_RD,
68 	KEYWORD_SECTION,
69 	KEYWORD_DIVISION,
70 	KEYWORD_CONTINUE,
71 	KEYWORD_END_EXEC,
72 	KEYWORD_FILLER,
73 	KEYWORD_BLANK,
74 	KEYWORD_OCCURS,
75 	KEYWORD_IS,
76 	KEYWORD_JUST,
77 	KEYWORD_PIC,
78 	KEYWORD_REDEFINES,
79 	KEYWORD_RENAMES,
80 	KEYWORD_SIGN,
81 	KEYWORD_SYNC,
82 	KEYWORD_USAGE,
83 	KEYWORD_VALUE,
84 	KEYWORD_PROGRAM_ID,
85 	KEYWORD_EXIT,
86 	KEYWORD_COPY,
87 };
88 
89 static const keywordTable cobolKeywordTable[] = {
90 #define DEFINE_KEYWORD(n) { #n, KEYWORD_##n }
91 	DEFINE_KEYWORD (FD),
92 	DEFINE_KEYWORD (SD),
93 	DEFINE_KEYWORD (RD),
94 	DEFINE_KEYWORD (SECTION),
95 	DEFINE_KEYWORD (DIVISION),
96 	DEFINE_KEYWORD (CONTINUE),
97 	{ "END-EXEC", KEYWORD_END_EXEC },
98 	DEFINE_KEYWORD (EXIT),
99 	DEFINE_KEYWORD (FILLER),
100 	DEFINE_KEYWORD (BLANK),
101 	DEFINE_KEYWORD (OCCURS),
102 	DEFINE_KEYWORD (IS),
103 	DEFINE_KEYWORD (JUST),
104 	DEFINE_KEYWORD (PIC),
105 	{ "PICTURE", KEYWORD_PIC },
106 	DEFINE_KEYWORD (REDEFINES),
107 	DEFINE_KEYWORD (RENAMES),
108 	DEFINE_KEYWORD (SIGN),
109 	DEFINE_KEYWORD (SYNC),
110 	DEFINE_KEYWORD (USAGE),
111 	DEFINE_KEYWORD (VALUE),
112 	{ "VALUES", KEYWORD_VALUE },
113 	{ "PROGRAM-ID", KEYWORD_PROGRAM_ID },
114 	DEFINE_KEYWORD (COPY),
115 };
116 
117 #define INDICATOR_COLUMN 7
118 #define PROGRAM_NAME_AREA_COLUMN 73
119 
120 #define isIdentifierChar(c) (isalnum(c) || (c) == '-')
121 #define isQuote(c) ((c) == '\'' || (c) == '"')
122 
123 typedef enum {
124 	/* Fixed: program starts at column 8, ends at column 72 */
125 	FORMAT_FIXED	= 0x1,
126 	/* Free: program starts at column 1, no specific end */
127 	FORMAT_FREE		= 0x2,
128 	/* Variable: program starts at column 8, no specific end */
129 	FORMAT_VARIABLE	= FORMAT_FIXED | FORMAT_FREE
130 } CobolFormat;
131 
132 static struct {
133 	vString *line;
134 	unsigned long int lineNumber;
135 	MIOPos filePosition;
136 	const char *nextLine;
137 	CobolFormat format;
138 } CblInputState;
139 
cblppInit(const CobolFormat format)140 static void cblppInit (const CobolFormat format)
141 {
142 	CblInputState.line = vStringNew ();
143 	CblInputState.lineNumber = 0;
144 	CblInputState.nextLine = NULL;
145 	CblInputState.format = format;
146 }
147 
cblppDeinit(void)148 static void cblppDeinit (void)
149 {
150 	vStringDelete (CblInputState.line);
151 }
152 
cblppGetColumn(const char * line,const unsigned int column)153 static const char *cblppGetColumn (const char *line,
154 								   const unsigned int column)
155 {
156 	unsigned int col = 0;
157 
158 	for (; *line; line++)
159 	{
160 		col += (*line == '\t') ? 8 : 1;
161 		if (col >= column)
162 			return line;
163 	}
164 
165 	return NULL;
166 }
167 
cblppAppendLine(vString * buffer,const char * line)168 static void cblppAppendLine (vString *buffer,
169 							 const char *line)
170 {
171 	if (CblInputState.format & FORMAT_FIXED)
172 	{
173 		const char *indicator = cblppGetColumn (line, INDICATOR_COLUMN);
174 
175 		if (indicator && *indicator && *indicator != '*' && *indicator != '/')
176 		{
177 			const char *lineStart = indicator + 1;
178 			const char *lineEnd = cblppGetColumn (line, PROGRAM_NAME_AREA_COLUMN);
179 
180 			if (*indicator == '-')
181 			{
182 				vStringStripTrailing (buffer);
183 				while (isspace (*lineStart))
184 					lineStart++;
185 			}
186 
187 			if (CblInputState.format == FORMAT_FIXED)
188 				vStringNCatS (buffer, lineStart, lineEnd - lineStart);
189 			else
190 				vStringCatS (buffer, lineStart);
191 		}
192 	}
193 	else if (line[0] != '*' && line[0] != '/')
194 		vStringCatS (buffer, line);
195 }
196 
197 /* TODO: skip *> comments */
cblppGetLine(void)198 static const char *cblppGetLine (void)
199 {
200 	const char *line;
201 
202 	if (CblInputState.nextLine)
203 	{
204 		line = CblInputState.nextLine;
205 		CblInputState.nextLine = NULL;
206 	}
207 	else
208 		line = (const char *) readLineFromInputFile ();
209 
210 	CblInputState.lineNumber = getInputLineNumber ();
211 	CblInputState.filePosition = getInputFilePosition ();
212 
213 	if (!line)
214 		return NULL;
215 
216 	vStringClear (CblInputState.line);
217 	cblppAppendLine (CblInputState.line, line);
218 
219 	/* check for continuation lines */
220 	if (CblInputState.format & FORMAT_FIXED)
221 	{
222 		while (true)
223 		{
224 			const char *indicator;
225 			line = (const char *) readLineFromInputFile ();
226 			if (! line)
227 				break;
228 			indicator = cblppGetColumn (line, INDICATOR_COLUMN);
229 			if (indicator && *indicator == '-')
230 				cblppAppendLine (CblInputState.line, line);
231 			else
232 				break;
233 		}
234 
235 		CblInputState.nextLine = line;
236 	}
237 
238 	return vStringValue (CblInputState.line);
239 }
240 
initCOBOLRefTagEntry(tagEntryInfo * e,const char * name,const cobolKind kind,const int role)241 static void initCOBOLRefTagEntry (tagEntryInfo *e, const char *name,
242 								  const cobolKind kind, const int role)
243 {
244 	initRefTagEntry (e, name, kind, role);
245 	e->lineNumber = CblInputState.lineNumber;
246 	e->filePosition = CblInputState.filePosition;
247 }
248 
initCOBOLTagEntry(tagEntryInfo * e,const char * name,const cobolKind kind)249 static void initCOBOLTagEntry (tagEntryInfo *e, const char *name, const cobolKind kind)
250 {
251 	initCOBOLRefTagEntry (e, name, kind, ROLE_DEFINITION_INDEX);
252 }
253 
makeCOBOLRefTag(const char * name,const cobolKind kind,const int role)254 static int makeCOBOLRefTag (const char *name, const cobolKind kind, const int role)
255 {
256 	if (CobolKinds[kind].enabled)
257 	{
258 		tagEntryInfo e;
259 
260 		initCOBOLRefTagEntry (&e, name, kind, role);
261 
262 		return makeTagEntry (&e);
263 	}
264 
265 	return CORK_NIL;
266 }
267 
makeCOBOLTag(const char * name,const cobolKind kind)268 static int makeCOBOLTag (const char *name, const cobolKind kind)
269 {
270 	return makeCOBOLRefTag (name, kind, ROLE_DEFINITION_INDEX);
271 }
272 
273 #define CBL_NL(nl) (*((unsigned int *) (nestingLevelGetUserData (nl))))
274 
popNestingLevelsToLevelNumber(NestingLevels * levels,const unsigned int levelNumber)275 static NestingLevel *popNestingLevelsToLevelNumber (NestingLevels *levels, const unsigned int levelNumber)
276 {
277 	NestingLevel *nl;
278 
279 	while (true)
280 	{
281 		nl = nestingLevelsGetCurrent (levels);
282 		if (! nl || CBL_NL (nl) < levelNumber)
283 			break;
284 		nestingLevelsPop (levels);
285 	}
286 
287 	return nl;
288 }
289 
isNumeric(const char * nptr,unsigned long int * num)290 static bool isNumeric (const char *nptr, unsigned long int *num)
291 {
292 	char *endptr;
293 	unsigned long int v;
294 
295 	v = strtoul (nptr, &endptr, 10);
296 	if (nptr != endptr && *endptr == 0)
297 	{
298 		if (num)
299 			*num = v;
300 		return true;
301 	}
302 	return false;
303 }
304 
findCOBOLTags(const CobolFormat format)305 static void findCOBOLTags (const CobolFormat format)
306 {
307 	NestingLevels *levels;
308 	const char *line;
309 
310 	cblppInit (format);
311 
312 	levels = nestingLevelsNew (sizeof (unsigned int));
313 
314 	while ((line = cblppGetLine ()) != NULL)
315 	{
316 		char word[64];
317 		int keyword;
318 		unsigned long int levelNumber;
319 
320 #define READ_WHILE(word, cond) \
321 	do { \
322 		unsigned int i; \
323 		for (i = 0; i < (ARRAY_SIZE (word) - 1) && *line && (cond); line++) \
324 			word[i++] = *line; \
325 		word[i] = 0; \
326 	} while (0)
327 #define READ_LITERAL(word) \
328 	do { \
329 		const char READ_LITERAL__q = isQuote (*line) ? *line++ : 0; \
330 		READ_WHILE (word, (READ_LITERAL__q && READ_LITERAL__q != *line) || \
331 		                   isIdentifierChar (*line)); \
332 		if (READ_LITERAL__q && READ_LITERAL__q == *line) \
333 			line++; \
334 		keyword = lookupCaseKeyword (word, Lang_cobol); \
335 	} while (0)
336 #define READ_WORD(word, keyword) \
337 	do { \
338 		READ_WHILE (word, isIdentifierChar (*line)); \
339 		keyword = lookupCaseKeyword (word, Lang_cobol); \
340 	} while (0)
341 #define READ_KEYWORD(keyword) \
342 	do { \
343 		char READ_KEYWORD__word[64]; \
344 		READ_WORD (READ_KEYWORD__word, keyword); \
345 	} while (0)
346 #define SKIP_SPACES() do { while (isspace (*line)) line++; } while (0)
347 
348 		SKIP_SPACES ();
349 		READ_WORD (word, keyword);
350 		SKIP_SPACES ();
351 
352 		switch (keyword)
353 		{
354 		case KEYWORD_FD:
355 		case KEYWORD_SD:
356 		case KEYWORD_RD:
357 			READ_WORD (word, keyword);
358 			SKIP_SPACES ();
359 			if (*word && *line == '.')
360 				makeCOBOLTag (word, K_FILE);
361 			break;
362 
363 		case KEYWORD_PROGRAM_ID:
364 			if (*line == '.')
365 			{
366 				line++;
367 				SKIP_SPACES ();
368 			}
369 			READ_LITERAL (word);
370 			if (*word)
371 				makeCOBOLTag (word, K_PROGRAM);
372 			break;
373 
374 		case KEYWORD_COPY:
375 			READ_WORD (word, keyword); // FIXME: also allow LITERAL
376 			if (*word)
377 				makeCOBOLRefTag (word, K_SOURCEFILE, COBOL_SOURCEFILE_COPIED);
378 			break;
379 
380 		case KEYWORD_CONTINUE:
381 		case KEYWORD_END_EXEC:
382 		case KEYWORD_EXIT:
383 		case KEYWORD_FILLER:
384 			/* nothing, just ignore those in following cases */;
385 			break;
386 
387 		default:
388 			if (isNumeric (word, &levelNumber))
389 			{
390 				READ_WORD (word, keyword);
391 				SKIP_SPACES ();
392 
393 				if (*word && keyword != KEYWORD_FILLER)
394 				{
395 					int kind = KIND_GHOST_INDEX;
396 
397 					if (*line == '.')
398 						kind = K_GROUP;
399 					else
400 					{
401 						int keyword2;
402 
403 						READ_KEYWORD (keyword2);
404 						switch (keyword2)
405 						{
406 						case KEYWORD_BLANK:
407 						case KEYWORD_OCCURS:
408 						case KEYWORD_IS:
409 						case KEYWORD_JUST:
410 						case KEYWORD_PIC:
411 						case KEYWORD_REDEFINES:
412 						case KEYWORD_RENAMES:
413 						case KEYWORD_SIGN:
414 						case KEYWORD_SYNC:
415 						case KEYWORD_USAGE:
416 						case KEYWORD_VALUE:
417 							kind = K_DATA;
418 						}
419 					}
420 
421 					if (kind != KIND_GHOST_INDEX)
422 					{
423 						NestingLevel *nl;
424 						tagEntryInfo entry;
425 						int r;
426 						unsigned int nestingLevelNumber;
427 
428 						/* for nesting purposes, level 77 is identical to 1,
429 						 * and 66 to 2 */
430 						switch (levelNumber)
431 						{
432 						default: nestingLevelNumber = levelNumber; break;
433 						case 77: nestingLevelNumber = 1; break;
434 						case 66: nestingLevelNumber = 2; break;
435 						}
436 
437 						nl = popNestingLevelsToLevelNumber (levels, nestingLevelNumber);
438 						initCOBOLTagEntry (&entry, word, kind);
439 						if (nl && CBL_NL (nl) < nestingLevelNumber)
440 							entry.extensionFields.scopeIndex = nl->corkIndex;
441 						r = makeTagEntry (&entry);
442 						if (levelNumber < 50 /* exclude special levels */)
443 						{
444 							nl = nestingLevelsPush (levels, r);
445 							CBL_NL (nl) = levelNumber;
446 						}
447 					}
448 				}
449 			}
450 			else if (*word && *line == '.')
451 				makeCOBOLTag (word, K_PARAGRAPH);
452 			else
453 			{
454 				int keyword2;
455 
456 				READ_KEYWORD (keyword2);
457 				SKIP_SPACES ();
458 
459 				if (keyword2 == KEYWORD_DIVISION && *line == '.')
460 					makeCOBOLTag (word, K_DIVISION);
461 				else if (keyword2 == KEYWORD_SECTION && *line == '.')
462 					makeCOBOLTag (word, K_SECTION);
463 			}
464 		}
465 	}
466 
467 	nestingLevelsFree (levels);
468 	cblppDeinit ();
469 }
470 
findCOBOLFixedTags(void)471 static void findCOBOLFixedTags (void)
472 {
473 	findCOBOLTags (FORMAT_FIXED);
474 }
475 
findCOBOLFreeTags(void)476 static void findCOBOLFreeTags (void)
477 {
478 	findCOBOLTags (FORMAT_FREE);
479 }
480 
findCOBOLVariableTags(void)481 static void findCOBOLVariableTags (void)
482 {
483 	findCOBOLTags (FORMAT_VARIABLE);
484 }
485 
initializeCobolParser(langType language)486 static void initializeCobolParser (langType language)
487 {
488 	Lang_cobol = language;
489 }
490 
commonCobolParserDefinition(const char * name,simpleParser parser)491 static parserDefinition* commonCobolParserDefinition (const char *name,
492 													  simpleParser parser)
493 {
494 	parserDefinition* def = parserNew (name);
495 	def->initialize = initializeCobolParser;
496 	def->parser = parser;
497 	def->kindTable = CobolKinds;
498 	def->kindCount = ARRAY_SIZE(CobolKinds);
499 	def->keywordTable = cobolKeywordTable;
500 	def->keywordCount = ARRAY_SIZE(cobolKeywordTable);
501 	def->useCork = CORK_QUEUE;
502 	return def;
503 }
504 
CobolParser(void)505 extern parserDefinition* CobolParser (void)
506 {
507 	static const char *const extensions [] = {
508 			"cbl", "cob", "CBL", "COB", NULL };
509 	parserDefinition* def = commonCobolParserDefinition ("Cobol",
510 														 findCOBOLFixedTags);
511 	def->extensions = extensions;
512 	return def;
513 }
514 
CobolFreeParser(void)515 extern parserDefinition* CobolFreeParser (void)
516 {
517 	return commonCobolParserDefinition ("CobolFree", findCOBOLFreeTags);
518 }
519 
CobolVariableParser(void)520 extern parserDefinition* CobolVariableParser (void)
521 {
522 	return commonCobolParserDefinition ("CobolVariable", findCOBOLVariableTags);
523 }
524