1 /*
2 * Copyright (c) 2000-2003, Darren Hiebert
3 *
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
6 *
7 * This module contains functions for generating tags for COBOL language
8 * files.
9 */
10
11 /* Some references:
12 * - https://www.cs.vu.nl/grammarware/browsable/cobol/
13 * - https://www.cs.vu.nl/grammarware/browsable/vs-cobol-ii/
14 * - https://open-cobol.sourceforge.io/guides/grammar.pdf
15 * - http://mapage.noos.fr/~bpinon/a_cobol_parser.htm
16 * - https://en.wikipedia.org/wiki/COBOL
17 */
18
19 /*
20 * INCLUDE FILES
21 */
22 #include "general.h" /* must always come first */
23 #include "debug.h"
24 #include "entry.h"
25 #include "keyword.h"
26 #include "nestlevel.h"
27 #include "parse.h"
28 #include "read.h"
29 #include "routines.h"
30
31 typedef enum {
32 K_FILE,
33 K_GROUP,
34 K_PROGRAM,
35 K_SECTION,
36 K_DIVISION,
37 K_PARAGRAPH,
38 K_DATA,
39 K_SOURCEFILE,
40 } cobolKind;
41
42 typedef enum {
43 COBOL_SOURCEFILE_COPIED,
44 } cobolSourcefileRole;
45
46 static roleDefinition CobolSourcefileRoles [] = {
47 { true, "copied", "copied in source file" },
48 };
49
50 static kindDefinition CobolKinds[] = {
51 { true, 'f', "fd", "file descriptions (FD, SD, RD)" },
52 { true, 'g', "group", "group items" },
53 { true, 'P', "program", "program ids" },
54 { true, 's', "section", "sections" },
55 { true, 'D', "division", "divisions" },
56 { true, 'p', "paragraph", "paragraphs" },
57 { true, 'd', "data", "data items" },
58 { true, 'S', "sourcefile", "source code file",
59 .referenceOnly = true, ATTACH_ROLES(CobolSourcefileRoles)},
60 };
61
62 static langType Lang_cobol;
63
64 enum {
65 KEYWORD_FD,
66 KEYWORD_SD,
67 KEYWORD_RD,
68 KEYWORD_SECTION,
69 KEYWORD_DIVISION,
70 KEYWORD_CONTINUE,
71 KEYWORD_END_EXEC,
72 KEYWORD_FILLER,
73 KEYWORD_BLANK,
74 KEYWORD_OCCURS,
75 KEYWORD_IS,
76 KEYWORD_JUST,
77 KEYWORD_PIC,
78 KEYWORD_REDEFINES,
79 KEYWORD_RENAMES,
80 KEYWORD_SIGN,
81 KEYWORD_SYNC,
82 KEYWORD_USAGE,
83 KEYWORD_VALUE,
84 KEYWORD_PROGRAM_ID,
85 KEYWORD_EXIT,
86 KEYWORD_COPY,
87 };
88
89 static const keywordTable cobolKeywordTable[] = {
90 #define DEFINE_KEYWORD(n) { #n, KEYWORD_##n }
91 DEFINE_KEYWORD (FD),
92 DEFINE_KEYWORD (SD),
93 DEFINE_KEYWORD (RD),
94 DEFINE_KEYWORD (SECTION),
95 DEFINE_KEYWORD (DIVISION),
96 DEFINE_KEYWORD (CONTINUE),
97 { "END-EXEC", KEYWORD_END_EXEC },
98 DEFINE_KEYWORD (EXIT),
99 DEFINE_KEYWORD (FILLER),
100 DEFINE_KEYWORD (BLANK),
101 DEFINE_KEYWORD (OCCURS),
102 DEFINE_KEYWORD (IS),
103 DEFINE_KEYWORD (JUST),
104 DEFINE_KEYWORD (PIC),
105 { "PICTURE", KEYWORD_PIC },
106 DEFINE_KEYWORD (REDEFINES),
107 DEFINE_KEYWORD (RENAMES),
108 DEFINE_KEYWORD (SIGN),
109 DEFINE_KEYWORD (SYNC),
110 DEFINE_KEYWORD (USAGE),
111 DEFINE_KEYWORD (VALUE),
112 { "VALUES", KEYWORD_VALUE },
113 { "PROGRAM-ID", KEYWORD_PROGRAM_ID },
114 DEFINE_KEYWORD (COPY),
115 };
116
117 #define INDICATOR_COLUMN 7
118 #define PROGRAM_NAME_AREA_COLUMN 73
119
120 #define isIdentifierChar(c) (isalnum(c) || (c) == '-')
121 #define isQuote(c) ((c) == '\'' || (c) == '"')
122
123 typedef enum {
124 /* Fixed: program starts at column 8, ends at column 72 */
125 FORMAT_FIXED = 0x1,
126 /* Free: program starts at column 1, no specific end */
127 FORMAT_FREE = 0x2,
128 /* Variable: program starts at column 8, no specific end */
129 FORMAT_VARIABLE = FORMAT_FIXED | FORMAT_FREE
130 } CobolFormat;
131
132 static struct {
133 vString *line;
134 unsigned long int lineNumber;
135 MIOPos filePosition;
136 const char *nextLine;
137 CobolFormat format;
138 } CblInputState;
139
cblppInit(const CobolFormat format)140 static void cblppInit (const CobolFormat format)
141 {
142 CblInputState.line = vStringNew ();
143 CblInputState.lineNumber = 0;
144 CblInputState.nextLine = NULL;
145 CblInputState.format = format;
146 }
147
cblppDeinit(void)148 static void cblppDeinit (void)
149 {
150 vStringDelete (CblInputState.line);
151 }
152
cblppGetColumn(const char * line,const unsigned int column)153 static const char *cblppGetColumn (const char *line,
154 const unsigned int column)
155 {
156 unsigned int col = 0;
157
158 for (; *line; line++)
159 {
160 col += (*line == '\t') ? 8 : 1;
161 if (col >= column)
162 return line;
163 }
164
165 return NULL;
166 }
167
cblppAppendLine(vString * buffer,const char * line)168 static void cblppAppendLine (vString *buffer,
169 const char *line)
170 {
171 if (CblInputState.format & FORMAT_FIXED)
172 {
173 const char *indicator = cblppGetColumn (line, INDICATOR_COLUMN);
174
175 if (indicator && *indicator && *indicator != '*' && *indicator != '/')
176 {
177 const char *lineStart = indicator + 1;
178 const char *lineEnd = cblppGetColumn (line, PROGRAM_NAME_AREA_COLUMN);
179
180 if (*indicator == '-')
181 {
182 vStringStripTrailing (buffer);
183 while (isspace (*lineStart))
184 lineStart++;
185 }
186
187 if (CblInputState.format == FORMAT_FIXED)
188 vStringNCatS (buffer, lineStart, lineEnd - lineStart);
189 else
190 vStringCatS (buffer, lineStart);
191 }
192 }
193 else if (line[0] != '*' && line[0] != '/')
194 vStringCatS (buffer, line);
195 }
196
197 /* TODO: skip *> comments */
cblppGetLine(void)198 static const char *cblppGetLine (void)
199 {
200 const char *line;
201
202 if (CblInputState.nextLine)
203 {
204 line = CblInputState.nextLine;
205 CblInputState.nextLine = NULL;
206 }
207 else
208 line = (const char *) readLineFromInputFile ();
209
210 CblInputState.lineNumber = getInputLineNumber ();
211 CblInputState.filePosition = getInputFilePosition ();
212
213 if (!line)
214 return NULL;
215
216 vStringClear (CblInputState.line);
217 cblppAppendLine (CblInputState.line, line);
218
219 /* check for continuation lines */
220 if (CblInputState.format & FORMAT_FIXED)
221 {
222 while (true)
223 {
224 const char *indicator;
225 line = (const char *) readLineFromInputFile ();
226 if (! line)
227 break;
228 indicator = cblppGetColumn (line, INDICATOR_COLUMN);
229 if (indicator && *indicator == '-')
230 cblppAppendLine (CblInputState.line, line);
231 else
232 break;
233 }
234
235 CblInputState.nextLine = line;
236 }
237
238 return vStringValue (CblInputState.line);
239 }
240
initCOBOLRefTagEntry(tagEntryInfo * e,const char * name,const cobolKind kind,const int role)241 static void initCOBOLRefTagEntry (tagEntryInfo *e, const char *name,
242 const cobolKind kind, const int role)
243 {
244 initRefTagEntry (e, name, kind, role);
245 e->lineNumber = CblInputState.lineNumber;
246 e->filePosition = CblInputState.filePosition;
247 }
248
initCOBOLTagEntry(tagEntryInfo * e,const char * name,const cobolKind kind)249 static void initCOBOLTagEntry (tagEntryInfo *e, const char *name, const cobolKind kind)
250 {
251 initCOBOLRefTagEntry (e, name, kind, ROLE_DEFINITION_INDEX);
252 }
253
makeCOBOLRefTag(const char * name,const cobolKind kind,const int role)254 static int makeCOBOLRefTag (const char *name, const cobolKind kind, const int role)
255 {
256 if (CobolKinds[kind].enabled)
257 {
258 tagEntryInfo e;
259
260 initCOBOLRefTagEntry (&e, name, kind, role);
261
262 return makeTagEntry (&e);
263 }
264
265 return CORK_NIL;
266 }
267
makeCOBOLTag(const char * name,const cobolKind kind)268 static int makeCOBOLTag (const char *name, const cobolKind kind)
269 {
270 return makeCOBOLRefTag (name, kind, ROLE_DEFINITION_INDEX);
271 }
272
273 #define CBL_NL(nl) (*((unsigned int *) (nestingLevelGetUserData (nl))))
274
popNestingLevelsToLevelNumber(NestingLevels * levels,const unsigned int levelNumber)275 static NestingLevel *popNestingLevelsToLevelNumber (NestingLevels *levels, const unsigned int levelNumber)
276 {
277 NestingLevel *nl;
278
279 while (true)
280 {
281 nl = nestingLevelsGetCurrent (levels);
282 if (! nl || CBL_NL (nl) < levelNumber)
283 break;
284 nestingLevelsPop (levels);
285 }
286
287 return nl;
288 }
289
isNumeric(const char * nptr,unsigned long int * num)290 static bool isNumeric (const char *nptr, unsigned long int *num)
291 {
292 char *endptr;
293 unsigned long int v;
294
295 v = strtoul (nptr, &endptr, 10);
296 if (nptr != endptr && *endptr == 0)
297 {
298 if (num)
299 *num = v;
300 return true;
301 }
302 return false;
303 }
304
findCOBOLTags(const CobolFormat format)305 static void findCOBOLTags (const CobolFormat format)
306 {
307 NestingLevels *levels;
308 const char *line;
309
310 cblppInit (format);
311
312 levels = nestingLevelsNew (sizeof (unsigned int));
313
314 while ((line = cblppGetLine ()) != NULL)
315 {
316 char word[64];
317 int keyword;
318 unsigned long int levelNumber;
319
320 #define READ_WHILE(word, cond) \
321 do { \
322 unsigned int i; \
323 for (i = 0; i < (ARRAY_SIZE (word) - 1) && *line && (cond); line++) \
324 word[i++] = *line; \
325 word[i] = 0; \
326 } while (0)
327 #define READ_LITERAL(word) \
328 do { \
329 const char READ_LITERAL__q = isQuote (*line) ? *line++ : 0; \
330 READ_WHILE (word, (READ_LITERAL__q && READ_LITERAL__q != *line) || \
331 isIdentifierChar (*line)); \
332 if (READ_LITERAL__q && READ_LITERAL__q == *line) \
333 line++; \
334 keyword = lookupCaseKeyword (word, Lang_cobol); \
335 } while (0)
336 #define READ_WORD(word, keyword) \
337 do { \
338 READ_WHILE (word, isIdentifierChar (*line)); \
339 keyword = lookupCaseKeyword (word, Lang_cobol); \
340 } while (0)
341 #define READ_KEYWORD(keyword) \
342 do { \
343 char READ_KEYWORD__word[64]; \
344 READ_WORD (READ_KEYWORD__word, keyword); \
345 } while (0)
346 #define SKIP_SPACES() do { while (isspace (*line)) line++; } while (0)
347
348 SKIP_SPACES ();
349 READ_WORD (word, keyword);
350 SKIP_SPACES ();
351
352 switch (keyword)
353 {
354 case KEYWORD_FD:
355 case KEYWORD_SD:
356 case KEYWORD_RD:
357 READ_WORD (word, keyword);
358 SKIP_SPACES ();
359 if (*word && *line == '.')
360 makeCOBOLTag (word, K_FILE);
361 break;
362
363 case KEYWORD_PROGRAM_ID:
364 if (*line == '.')
365 {
366 line++;
367 SKIP_SPACES ();
368 }
369 READ_LITERAL (word);
370 if (*word)
371 makeCOBOLTag (word, K_PROGRAM);
372 break;
373
374 case KEYWORD_COPY:
375 READ_WORD (word, keyword); // FIXME: also allow LITERAL
376 if (*word)
377 makeCOBOLRefTag (word, K_SOURCEFILE, COBOL_SOURCEFILE_COPIED);
378 break;
379
380 case KEYWORD_CONTINUE:
381 case KEYWORD_END_EXEC:
382 case KEYWORD_EXIT:
383 case KEYWORD_FILLER:
384 /* nothing, just ignore those in following cases */;
385 break;
386
387 default:
388 if (isNumeric (word, &levelNumber))
389 {
390 READ_WORD (word, keyword);
391 SKIP_SPACES ();
392
393 if (*word && keyword != KEYWORD_FILLER)
394 {
395 int kind = KIND_GHOST_INDEX;
396
397 if (*line == '.')
398 kind = K_GROUP;
399 else
400 {
401 int keyword2;
402
403 READ_KEYWORD (keyword2);
404 switch (keyword2)
405 {
406 case KEYWORD_BLANK:
407 case KEYWORD_OCCURS:
408 case KEYWORD_IS:
409 case KEYWORD_JUST:
410 case KEYWORD_PIC:
411 case KEYWORD_REDEFINES:
412 case KEYWORD_RENAMES:
413 case KEYWORD_SIGN:
414 case KEYWORD_SYNC:
415 case KEYWORD_USAGE:
416 case KEYWORD_VALUE:
417 kind = K_DATA;
418 }
419 }
420
421 if (kind != KIND_GHOST_INDEX)
422 {
423 NestingLevel *nl;
424 tagEntryInfo entry;
425 int r;
426 unsigned int nestingLevelNumber;
427
428 /* for nesting purposes, level 77 is identical to 1,
429 * and 66 to 2 */
430 switch (levelNumber)
431 {
432 default: nestingLevelNumber = levelNumber; break;
433 case 77: nestingLevelNumber = 1; break;
434 case 66: nestingLevelNumber = 2; break;
435 }
436
437 nl = popNestingLevelsToLevelNumber (levels, nestingLevelNumber);
438 initCOBOLTagEntry (&entry, word, kind);
439 if (nl && CBL_NL (nl) < nestingLevelNumber)
440 entry.extensionFields.scopeIndex = nl->corkIndex;
441 r = makeTagEntry (&entry);
442 if (levelNumber < 50 /* exclude special levels */)
443 {
444 nl = nestingLevelsPush (levels, r);
445 CBL_NL (nl) = levelNumber;
446 }
447 }
448 }
449 }
450 else if (*word && *line == '.')
451 makeCOBOLTag (word, K_PARAGRAPH);
452 else
453 {
454 int keyword2;
455
456 READ_KEYWORD (keyword2);
457 SKIP_SPACES ();
458
459 if (keyword2 == KEYWORD_DIVISION && *line == '.')
460 makeCOBOLTag (word, K_DIVISION);
461 else if (keyword2 == KEYWORD_SECTION && *line == '.')
462 makeCOBOLTag (word, K_SECTION);
463 }
464 }
465 }
466
467 nestingLevelsFree (levels);
468 cblppDeinit ();
469 }
470
findCOBOLFixedTags(void)471 static void findCOBOLFixedTags (void)
472 {
473 findCOBOLTags (FORMAT_FIXED);
474 }
475
findCOBOLFreeTags(void)476 static void findCOBOLFreeTags (void)
477 {
478 findCOBOLTags (FORMAT_FREE);
479 }
480
findCOBOLVariableTags(void)481 static void findCOBOLVariableTags (void)
482 {
483 findCOBOLTags (FORMAT_VARIABLE);
484 }
485
initializeCobolParser(langType language)486 static void initializeCobolParser (langType language)
487 {
488 Lang_cobol = language;
489 }
490
commonCobolParserDefinition(const char * name,simpleParser parser)491 static parserDefinition* commonCobolParserDefinition (const char *name,
492 simpleParser parser)
493 {
494 parserDefinition* def = parserNew (name);
495 def->initialize = initializeCobolParser;
496 def->parser = parser;
497 def->kindTable = CobolKinds;
498 def->kindCount = ARRAY_SIZE(CobolKinds);
499 def->keywordTable = cobolKeywordTable;
500 def->keywordCount = ARRAY_SIZE(cobolKeywordTable);
501 def->useCork = CORK_QUEUE;
502 return def;
503 }
504
CobolParser(void)505 extern parserDefinition* CobolParser (void)
506 {
507 static const char *const extensions [] = {
508 "cbl", "cob", "CBL", "COB", NULL };
509 parserDefinition* def = commonCobolParserDefinition ("Cobol",
510 findCOBOLFixedTags);
511 def->extensions = extensions;
512 return def;
513 }
514
CobolFreeParser(void)515 extern parserDefinition* CobolFreeParser (void)
516 {
517 return commonCobolParserDefinition ("CobolFree", findCOBOLFreeTags);
518 }
519
CobolVariableParser(void)520 extern parserDefinition* CobolVariableParser (void)
521 {
522 return commonCobolParserDefinition ("CobolVariable", findCOBOLVariableTags);
523 }
524