1 /*
2 * Copyright (c) 2016, Masatake YAMATO
3 * Copyright (c) 2016, Red Hat, Inc.
4 *
5 * This source code is released for free distribution under the terms of the
6 * GNU General Public License version 2 or (at your option) any later version.
7 *
8 * This module contains functions for generating tags for DTD, data type
9 * definition explained in https://www.w3.org/TR/REC-xml/#sec-physical-struct
10 *
11 */
12
13 #include "general.h"
14 #include "tokeninfo.h"
15
16 #include "debug.h"
17 #include "entry.h"
18 #include "keyword.h"
19 #include "parse.h"
20 #include "read.h"
21 #include "xtag.h"
22
23
24 static scopeSeparator DtdParameterEntrySeparators [] = {
25 { KIND_WILDCARD_INDEX, "/%" },
26 };
27
28 static scopeSeparator DtdAttSeparators [] = {
29 { KIND_WILDCARD_INDEX, "/@" },
30 };
31
32 typedef enum {
33 DTD_PARAMETER_ENTITY_ELEMENT_NAME,
34 DTD_PARAMETER_ENTITY_CONDITION,
35 DTD_PARAMETER_ENTITY_PART_OF_ATT_DEF,
36 } dtdEntityRole;
37
38 static roleDefinition DtdEntityRoles [] = {
39 { true, "elementName", "element names" },
40 { true, "condition", "conditions" },
41 { true, "partOfAttDef", "part of attribute definition" },
42 };
43
44 typedef enum {
45 DTD_ELEMENT_ATT_OWNER,
46 } dtdElementRole;
47
48 static roleDefinition DtdElementRoles [] = {
49 { true, "attOwner", "attributes owner" },
50 };
51
52 typedef enum {
53 K_ENTITY,
54 K_PARAMETER_ENTITY,
55 // K_EXTERNAL_ENTITY,
56 // K_UNPARSED_ENTITY,
57 K_ELEMENT,
58 K_ATTRIBUTE,
59 K_NOTATION,
60 } dtdKind;
61
62 static kindDefinition DtdKinds [] = {
63 { true, 'E', "entity", "entities" },
64 { true, 'p', "parameterEntity", "parameter entities",
65 .referenceOnly = false, ATTACH_ROLES(DtdEntityRoles),
66 ATTACH_SEPARATORS(DtdParameterEntrySeparators),
67 },
68 // { true, 'X', "externalEntity", "external entities" },
69 // { true, 'U', "unparsedEntity", "unparsed entities" },
70 { true, 'e', "element", "elements",
71 .referenceOnly = false, ATTACH_ROLES(DtdElementRoles) },
72 { true, 'a', "attribute", "attributes",
73 ATTACH_SEPARATORS(DtdAttSeparators), },
74 { true, 'n', "notation", "notations" },
75
76 };
77
78 enum {
79 KEYWORD_ENTITY,
80 KEYWORD_ELEMENT,
81 KEYWORD_ATTLIST,
82 KEYWORD_INCLUDE,
83 KEYWORD_IGNORE,
84 // KEYWORD_PUBLIC,
85 // KEYWORD_SYSTEM,
86 KEYWORD_NOTATION,
87 KEYWORD_FIXED,
88 KEYWORD_ATTR_TYPES,
89 KEYWORD_ATTR_DEFAULT_DECLS,
90 };
91
92 typedef int keywordId;
93
94 static const keywordTable DtdKeywordTable[] = {
95 { "ENTITY", KEYWORD_ENTITY },
96 { "ELEMENT", KEYWORD_ELEMENT },
97 { "ATTLIST", KEYWORD_ATTLIST },
98 { "INCLUDE", KEYWORD_INCLUDE },
99 { "IGNORE", KEYWORD_IGNORE },
100 // { "PUBLIC", KEYWORD_PUBLIC },
101 // { "SYSTEM", KEYWORD_SYSTEM },
102 { "NOTATION", KEYWORD_NOTATION },
103 { "FIXED", KEYWORD_FIXED },
104 { "CDATA", KEYWORD_ATTR_TYPES },
105 { "ID", KEYWORD_ATTR_TYPES },
106 { "IDREF", KEYWORD_ATTR_TYPES },
107 { "IDREFS", KEYWORD_ATTR_TYPES },
108 { "ENTITIES", KEYWORD_ATTR_TYPES },
109 { "NMTOKEN", KEYWORD_ATTR_TYPES },
110 { "NMTOKENS", KEYWORD_ATTR_TYPES },
111 { "REQUIRED", KEYWORD_ATTR_DEFAULT_DECLS },
112 { "IMPLIED", KEYWORD_ATTR_DEFAULT_DECLS },
113 };
114
115 enum eTokenType {
116 /* 0..255 are the byte's value */
117 TOKEN_CLOSE = '>',
118 TOKEN_EOF = 256,
119 TOKEN_UNDEFINED,
120 TOKEN_KEYWORD,
121 TOKEN_IDENTIFIER,
122 TOKEN_OPEN, /* <! */
123 TOKEN_STRING,
124 };
125
126 static void readToken (tokenInfo *const token, void *data CTAGS_ATTR_UNUSED);
127 static void clearToken (tokenInfo *token);
128 static void copyToken (tokenInfo *dest, tokenInfo *src, void *data CTAGS_ATTR_UNUSED);
129
130 typedef struct sDtdToken {
131 tokenInfo base;
132 int scopeIndex;
133 } dtdToken;
134
135 #define DTD(TOKEN) ((dtdToken *)TOKEN)
136
137 static struct tokenInfoClass dtdTokenInfoClass = {
138 .nPreAlloc = 16,
139 .typeForUndefined = TOKEN_UNDEFINED,
140 .keywordNone = KEYWORD_NONE,
141 .typeForKeyword = TOKEN_KEYWORD,
142 .typeForEOF = TOKEN_EOF,
143 .extraSpace = sizeof (dtdToken) - sizeof (tokenInfo),
144 .read = readToken,
145 .clear = clearToken,
146 .copy = copyToken,
147 };
148
149 static langType Lang_dtd;
150
151 #define isIdentifierChar(c) (isalnum (c) || c == '-' || c == '_' || c == '.' \
152 || c == ':')
153
newDtdToken(void)154 static tokenInfo *newDtdToken (void)
155 {
156 return newToken (&dtdTokenInfoClass);
157 }
158
clearToken(tokenInfo * token)159 static void clearToken (tokenInfo *token)
160 {
161 DTD (token)->scopeIndex = CORK_NIL;
162 }
163
copyToken(tokenInfo * dest,tokenInfo * src,void * data CTAGS_ATTR_UNUSED)164 static void copyToken (tokenInfo *dest, tokenInfo *src, void *data CTAGS_ATTR_UNUSED)
165 {
166 DTD (dest)->scopeIndex = DTD (src)->scopeIndex;
167 }
168
readToken(tokenInfo * const token,void * data CTAGS_ATTR_UNUSED)169 static void readToken (tokenInfo *const token, void *data CTAGS_ATTR_UNUSED)
170 {
171 int c, c0;
172
173 token->type = TOKEN_UNDEFINED;
174 token->keyword = KEYWORD_NONE;
175 vStringClear (token->string);
176
177 retry:
178 do {
179 c = getcFromInputFile ();
180 } while (c == ' ' || c == '\t' || c == '\f' || c == '\n');
181
182 token->lineNumber = getInputLineNumber ();
183 token->filePosition = getInputFilePosition ();
184
185 switch (c)
186 {
187 case EOF:
188 token->type = TOKEN_EOF;
189 break;
190 case ';':
191 case '&':
192 case '%':
193 case '>':
194 case '#':
195 case '?':
196 case '[':
197 case ']':
198 case '|':
199 case ',':
200 case '(':
201 case ')':
202 case '+':
203 token->type = c;
204 break;
205 case '<':
206 c0 = getcFromInputFile();
207 if (c0 == '!')
208 {
209 token->type = TOKEN_OPEN;
210 break;
211 }
212 else
213 {
214 ungetcToInputFile (c0);
215 token->type = c;
216 break;
217 }
218 case '-':
219 c0 = getcFromInputFile();
220 if (c0 == '-')
221 {
222 int c1, c2;
223
224 while ( (c1 = getcFromInputFile()) != EOF )
225 {
226 if (c1 == '-')
227 {
228 c2 = getcFromInputFile();
229 if (c2 == '-' || c2 == EOF)
230 goto retry;
231 }
232 }
233 }
234 else
235 {
236 ungetcToInputFile (c0);
237 token->type = c;
238 }
239 break;
240 case '"':
241 case '\'':
242 token->type = TOKEN_STRING;
243 while ((c0 = getcFromInputFile ()))
244 {
245 if (c0 == EOF || c0 == c)
246 break;
247 else
248 tokenPutc(token, c0);
249 }
250 break;
251 default:
252 if (isIdentifierChar(c))
253 {
254 tokenPutc(token, c);
255 while ((c = getcFromInputFile ()))
256 {
257 if (isIdentifierChar(c))
258 tokenPutc(token, c);
259 else
260 {
261 ungetcToInputFile (c);
262 break;
263 }
264 }
265 token->keyword = lookupKeyword (vStringValue (token->string),
266 Lang_dtd);
267 if (token->keyword == KEYWORD_NONE)
268 token->type = TOKEN_IDENTIFIER;
269 else
270 token->type = TOKEN_KEYWORD;
271
272 }
273 else
274 token->type = c;
275 break;
276 }
277 }
278
makeDtdTagMaybe(tagEntryInfo * const e,tokenInfo * const token,int kind,int role)279 static int makeDtdTagMaybe (tagEntryInfo *const e, tokenInfo *const token,
280 int kind, int role)
281 {
282 if (role == ROLE_DEFINITION_INDEX)
283 {
284 if (! DtdKinds[kind].enabled)
285 return CORK_NIL;
286 }
287 else if (! (isXtagEnabled (XTAG_REFERENCE_TAGS)
288 && DtdKinds[kind].roles[role].enabled))
289 return CORK_NIL;
290
291 initRefTagEntry (e, tokenString (token),
292 kind,
293 role);
294 e->lineNumber = token->lineNumber;
295 e->filePosition = token->filePosition;
296 e->extensionFields.scopeIndex = DTD (token)->scopeIndex;
297
298 return makeTagEntry (e);
299 }
300
backpatchEndField(int index,unsigned long lineNumber)301 static void backpatchEndField (int index, unsigned long lineNumber)
302 {
303 tagEntryInfo *ep = getEntryInCorkQueue (index);
304
305 if (ep)
306 ep->extensionFields.endLine = lineNumber;
307 }
308
parseEntity(tokenInfo * const token)309 static void parseEntity (tokenInfo *const token)
310 {
311 tagEntryInfo e;
312 int index = CORK_NIL;
313
314 tokenRead (token);
315 if (token->type == '%')
316 {
317 tokenRead (token);
318 if (tokenIsType(token, IDENTIFIER))
319 index = makeDtdTagMaybe (&e, token,
320 K_PARAMETER_ENTITY, ROLE_DEFINITION_INDEX);
321 }
322 else if (tokenIsType(token, IDENTIFIER))
323 index = makeDtdTagMaybe (&e, token,
324 K_ENTITY, ROLE_DEFINITION_INDEX);
325
326 if (tokenSkipToType (token, TOKEN_CLOSE) && (index != CORK_NIL))
327 backpatchEndField (index, token->lineNumber);
328 }
329
parserParameterEntityRef(tokenInfo * const token)330 static tokenInfo *parserParameterEntityRef (tokenInfo *const token)
331 {
332 tokenRead (token);
333 if (tokenIsType(token, IDENTIFIER))
334 {
335 tokenInfo * identifier = newTokenByCopying (token);
336
337 tokenRead (token);
338
339 if (token->type == ';')
340 return identifier;
341 else
342 {
343 tokenDelete (identifier);
344 return NULL;
345 }
346 }
347 return NULL;
348 }
349
parseElement(tokenInfo * const token,bool skipToClose)350 static void parseElement (tokenInfo *const token, bool skipToClose)
351 {
352 tagEntryInfo e;
353 int original_index;
354
355 if (skipToClose)
356 original_index = (int)countEntryInCorkQueue ();
357
358 tokenRead (token);
359 if (token->type == '%')
360 {
361 tokenInfo * identifier = parserParameterEntityRef (token);
362 if (identifier)
363 {
364 makeDtdTagMaybe (&e, identifier,
365 K_PARAMETER_ENTITY,
366 DTD_PARAMETER_ENTITY_ELEMENT_NAME);
367 tokenDelete (identifier);
368 }
369 }
370 else if (tokenIsType(token, IDENTIFIER))
371 makeDtdTagMaybe (&e, token, K_ELEMENT, ROLE_DEFINITION_INDEX);
372 else if (token->type == '(')
373 {
374 do {
375 parseElement (token, false);
376 } while ((!tokenIsEOF (token))
377 && (token->type != ')'));
378 }
379
380 if (skipToClose)
381 {
382 int current_index = (int)countEntryInCorkQueue ();
383 if (tokenSkipToType (token, TOKEN_CLOSE)
384 && (current_index > original_index))
385 {
386 for (int index = original_index; index < current_index; index++)
387 backpatchEndField (index, token->lineNumber);
388 }
389 }
390 }
391
parseAttDefs(tokenInfo * const token)392 static void parseAttDefs (tokenInfo *const token)
393 {
394 /* [53] AttDef ::= S Name S AttType S DefaultDecl */
395
396 do {
397 tokenRead (token);
398
399 /* Name */
400 if (tokenIsType(token, IDENTIFIER))
401 {
402 tagEntryInfo e;
403 makeDtdTagMaybe (&e, token,
404 K_ATTRIBUTE, ROLE_DEFINITION_INDEX);
405 }
406 else if (tokenIsKeyword(token, ATTR_TYPES)
407 || tokenIsKeyword(token, ENTITY))
408 /* AttType -> just consuming */
409 ;
410 else if (tokenIsKeyword(token, NOTATION))
411 {
412 /* AttType -> just consuming */
413 tokenRead (token);
414 if (token->type == '(')
415 tokenSkipToType (token, ')');
416 }
417 else if (token->type == '(')
418 {
419 /* AttType, TODO: Enumerated members can be tagged. */
420 tokenSkipToType (token, ')');
421 }
422 else if (token->type == '#')
423 {
424 /* DefaultDecl */
425 tokenRead (token);
426 if (tokenIsKeyword(token, FIXED))
427 tokenRead (token);
428 else if (tokenIsKeyword(token, ATTR_DEFAULT_DECLS))
429 {
430 /* Just consuming */
431 }
432 }
433 else if (tokenIsType (token, STRING))
434 ; /* DefaultDecl -> Just consuming */
435 else if (token->type == '%')
436 {
437 tokenInfo * identifier = parserParameterEntityRef (token);
438 if (identifier)
439 {
440 tagEntryInfo e;
441 makeDtdTagMaybe (&e, identifier,
442 K_PARAMETER_ENTITY,
443 DTD_PARAMETER_ENTITY_PART_OF_ATT_DEF);
444 tokenDelete (identifier);
445 }
446 }
447 else if (tokenIsType(token, CLOSE))
448 {
449 DTD (token)->scopeIndex = CORK_NIL;
450 tokenUnread (token);
451 break;
452 }
453 } while (!tokenIsEOF (token));
454 }
455
parseAttlist(tokenInfo * const token)456 static void parseAttlist (tokenInfo *const token)
457 {
458 tagEntryInfo e;
459 int index = CORK_NIL;
460
461 tokenRead (token);
462 if (token->type == '%')
463 {
464 tokenRead (token);
465 if (tokenIsType(token, IDENTIFIER))
466 {
467 tokenInfo * identifier = parserParameterEntityRef (token);
468 if (identifier)
469 {
470 index = makeDtdTagMaybe (&e, identifier,
471 K_ENTITY,
472 DTD_PARAMETER_ENTITY_ELEMENT_NAME);
473 tokenDelete (identifier);
474
475 DTD (token)->scopeIndex = index;
476 parseAttDefs (token);
477 DTD (token)->scopeIndex = CORK_NIL;
478 }
479 }
480 }
481 else if (tokenIsType(token, IDENTIFIER))
482 {
483 tokenInfo * element = newTokenByCopying (token);
484
485 index = makeDtdTagMaybe (&e, element,
486 K_ELEMENT, DTD_ELEMENT_ATT_OWNER);
487 tokenDelete (element);
488
489 DTD (token)->scopeIndex = index;
490 parseAttDefs (token);
491 DTD (token)->scopeIndex = CORK_NIL;
492 }
493
494 tokenSkipToType (token, TOKEN_CLOSE);
495 backpatchEndField (index, token->lineNumber);
496 }
497
parseNotation(tokenInfo * const token)498 static void parseNotation (tokenInfo *const token)
499 {
500 int index = CORK_NIL;
501 tagEntryInfo e;
502
503 tokenRead (token);
504 if (tokenIsType(token, IDENTIFIER))
505 index = makeDtdTagMaybe (&e, token,
506 K_NOTATION, ROLE_DEFINITION_INDEX);
507
508 tokenSkipToType (token, TOKEN_CLOSE);
509 backpatchEndField (index, token->lineNumber);
510 }
511
512
513 static void parseSection (tokenInfo *const token);
514
parseDtdTag1(tokenInfo * const token)515 static void parseDtdTag1 (tokenInfo *const token)
516 {
517 if (tokenIsType(token, OPEN))
518 {
519 tokenRead (token);
520 if (tokenIsKeyword (token, ELEMENT))
521 parseElement(token, true);
522 else if (tokenIsKeyword (token, ATTLIST))
523 parseAttlist(token);
524 else if (tokenIsKeyword (token, ENTITY))
525 parseEntity(token);
526 else if (tokenIsKeyword (token, NOTATION))
527 parseNotation(token);
528 else if (token->type == '[')
529 {
530 tokenRead (token);
531 parseSection (token);
532 tokenSkipToType (token, ']');
533 }
534 else if (!tokenIsType(token, CLOSE))
535 tokenSkipToType (token, TOKEN_CLOSE);
536 }
537 }
538
parseSection(tokenInfo * const token)539 static void parseSection (tokenInfo *const token)
540 {
541 if (tokenIsKeyword(token, IGNORE))
542 tokenSkipToType (token, ']');
543 else
544 {
545 if (tokenIsKeyword (token, INCLUDE))
546 {
547 tokenRead (token);
548 if (token->type == '[')
549 {
550 do {
551 tokenRead (token);
552 } while ((!tokenIsEOF (token))
553 && (token->type != ']'));
554 }
555 }
556 else if (token->type == '%')
557 {
558 tokenInfo *const condition = parserParameterEntityRef (token);
559 if (condition)
560 {
561 tagEntryInfo e;
562 int index = makeDtdTagMaybe (&e, condition,
563 K_PARAMETER_ENTITY,
564 DTD_PARAMETER_ENTITY_CONDITION);
565 tokenDelete (condition);
566 tokenRead (token);
567 if (token->type == '[')
568 {
569 do {
570 tokenRead (token);
571 parseDtdTag1 (token);
572 } while ((!tokenIsEOF (token))
573 && (token->type != ']'));
574 if (token->type== ']')
575 backpatchEndField (index, token->lineNumber);
576 }
577 }
578 }
579 }
580 }
581
findDtdTags(void)582 static void findDtdTags (void)
583 {
584 tokenInfo *const token = newDtdToken ();
585
586 do {
587 tokenRead (token);
588 parseDtdTag1 (token);
589 } while (!tokenIsEOF (token));
590
591 tokenDelete (token);
592
593 flashTokenBacklog (&dtdTokenInfoClass);
594 }
595
initialize(const langType language)596 static void initialize (const langType language)
597 {
598 Lang_dtd = language;
599 }
600
DtdParser(void)601 extern parserDefinition* DtdParser (void)
602 {
603 parserDefinition* def = parserNew ("DTD");
604
605 /* File name patters are picked from Linux kernel. */
606 static const char *const extensions [] = {
607 "dtd",
608 "mod",
609 NULL
610 };
611
612 def->initialize = initialize;
613 def->parser = findDtdTags;
614
615 def->kindTable = DtdKinds;
616 def->kindCount = ARRAY_SIZE (DtdKinds);
617 def->extensions = extensions;
618
619 def->keywordTable = DtdKeywordTable;
620 def->keywordCount = ARRAY_SIZE (DtdKeywordTable);
621
622 def->useCork = CORK_QUEUE;
623 def->requestAutomaticFQTag = true;
624
625 return def;
626 }
627