xref: /Universal-ctags/parsers/dtd.c (revision 3afb54752e930d14181e28896dcb669d3e4323b9)
1 /*
2  *   Copyright (c) 2016, Masatake YAMATO
3  *   Copyright (c) 2016, Red Hat, Inc.
4  *
5  *   This source code is released for free distribution under the terms of the
6  *   GNU General Public License version 2 or (at your option) any later version.
7  *
8  *   This module contains functions for generating tags for DTD, data type
9  *   definition explained in https://www.w3.org/TR/REC-xml/#sec-physical-struct
10  *
11  */
12 
13 #include "general.h"
14 #include "tokeninfo.h"
15 
16 #include "debug.h"
17 #include "entry.h"
18 #include "keyword.h"
19 #include "parse.h"
20 #include "read.h"
21 #include "xtag.h"
22 
23 
24 static scopeSeparator DtdParameterEntrySeparators [] = {
25 	{ KIND_WILDCARD_INDEX, "/%" },
26 };
27 
28 static scopeSeparator DtdAttSeparators [] = {
29 	{ KIND_WILDCARD_INDEX, "/@" },
30 };
31 
32 typedef enum {
33 	DTD_PARAMETER_ENTITY_ELEMENT_NAME,
34 	DTD_PARAMETER_ENTITY_CONDITION,
35 	DTD_PARAMETER_ENTITY_PART_OF_ATT_DEF,
36 } dtdEntityRole;
37 
38 static roleDefinition DtdEntityRoles [] = {
39 	{ true, "elementName", "element names" },
40 	{ true, "condition",    "conditions" },
41 	{ true, "partOfAttDef", "part of attribute definition" },
42 };
43 
44 typedef enum {
45 	DTD_ELEMENT_ATT_OWNER,
46 } dtdElementRole;
47 
48 static roleDefinition DtdElementRoles [] = {
49 	{ true, "attOwner", "attributes owner" },
50 };
51 
52 typedef enum {
53 	K_ENTITY,
54 	K_PARAMETER_ENTITY,
55 	// K_EXTERNAL_ENTITY,
56 	// K_UNPARSED_ENTITY,
57 	K_ELEMENT,
58 	K_ATTRIBUTE,
59 	K_NOTATION,
60 } dtdKind;
61 
62 static kindDefinition DtdKinds [] = {
63 	{ true, 'E', "entity",    "entities" },
64 	{ true, 'p', "parameterEntity", "parameter entities",
65 	  .referenceOnly = false, ATTACH_ROLES(DtdEntityRoles),
66 	  ATTACH_SEPARATORS(DtdParameterEntrySeparators),
67 	},
68 	// { true, 'X', "externalEntity", "external entities" },
69 	// { true, 'U', "unparsedEntity", "unparsed entities" },
70 	{ true, 'e', "element",   "elements",
71 	  .referenceOnly = false, ATTACH_ROLES(DtdElementRoles) },
72 	{ true, 'a', "attribute", "attributes",
73 	  ATTACH_SEPARATORS(DtdAttSeparators), },
74 	{ true, 'n', "notation", "notations" },
75 
76 };
77 
78 enum {
79 	KEYWORD_ENTITY,
80 	KEYWORD_ELEMENT,
81 	KEYWORD_ATTLIST,
82 	KEYWORD_INCLUDE,
83 	KEYWORD_IGNORE,
84 	// KEYWORD_PUBLIC,
85 	// KEYWORD_SYSTEM,
86 	KEYWORD_NOTATION,
87 	KEYWORD_FIXED,
88 	KEYWORD_ATTR_TYPES,
89 	KEYWORD_ATTR_DEFAULT_DECLS,
90 };
91 
92 typedef int keywordId;
93 
94 static const keywordTable DtdKeywordTable[] = {
95 	{ "ENTITY",    KEYWORD_ENTITY   },
96 	{ "ELEMENT",   KEYWORD_ELEMENT  },
97 	{ "ATTLIST",   KEYWORD_ATTLIST },
98 	{ "INCLUDE",   KEYWORD_INCLUDE  },
99 	{ "IGNORE",    KEYWORD_IGNORE   },
100 	// { "PUBLIC",    KEYWORD_PUBLIC   },
101 	// { "SYSTEM",    KEYWORD_SYSTEM   },
102 	{ "NOTATION",  KEYWORD_NOTATION },
103 	{ "FIXED",     KEYWORD_FIXED    },
104 	{ "CDATA",     KEYWORD_ATTR_TYPES },
105 	{ "ID",        KEYWORD_ATTR_TYPES },
106 	{ "IDREF",     KEYWORD_ATTR_TYPES },
107 	{ "IDREFS",    KEYWORD_ATTR_TYPES },
108 	{ "ENTITIES",  KEYWORD_ATTR_TYPES },
109 	{ "NMTOKEN",   KEYWORD_ATTR_TYPES },
110 	{ "NMTOKENS",  KEYWORD_ATTR_TYPES },
111 	{ "REQUIRED",  KEYWORD_ATTR_DEFAULT_DECLS },
112 	{ "IMPLIED",   KEYWORD_ATTR_DEFAULT_DECLS },
113 };
114 
115 enum eTokenType {
116 	/* 0..255 are the byte's value */
117 	TOKEN_CLOSE = '>',
118 	TOKEN_EOF = 256,
119 	TOKEN_UNDEFINED,
120 	TOKEN_KEYWORD,
121 	TOKEN_IDENTIFIER,
122 	TOKEN_OPEN,					/* <! */
123 	TOKEN_STRING,
124 };
125 
126 static void readToken (tokenInfo *const token, void *data CTAGS_ATTR_UNUSED);
127 static void clearToken (tokenInfo *token);
128 static void copyToken (tokenInfo *dest, tokenInfo *src, void *data CTAGS_ATTR_UNUSED);
129 
130 typedef struct sDtdToken {
131 	tokenInfo base;
132 	int scopeIndex;
133 } dtdToken;
134 
135 #define DTD(TOKEN) ((dtdToken *)TOKEN)
136 
137 static struct tokenInfoClass dtdTokenInfoClass = {
138 	.nPreAlloc = 16,
139 	.typeForUndefined = TOKEN_UNDEFINED,
140 	.keywordNone      = KEYWORD_NONE,
141 	.typeForKeyword   = TOKEN_KEYWORD,
142 	.typeForEOF       = TOKEN_EOF,
143 	.extraSpace       = sizeof (dtdToken) - sizeof (tokenInfo),
144 	.read             = readToken,
145 	.clear            = clearToken,
146 	.copy             = copyToken,
147 };
148 
149 static langType Lang_dtd;
150 
151 #define isIdentifierChar(c) (isalnum (c) || c == '-' || c == '_' || c == '.' \
152 							 || c == ':')
153 
newDtdToken(void)154 static tokenInfo *newDtdToken (void)
155 {
156 	return newToken (&dtdTokenInfoClass);
157 }
158 
clearToken(tokenInfo * token)159 static void clearToken (tokenInfo *token)
160 {
161 	DTD (token)->scopeIndex = CORK_NIL;
162 }
163 
copyToken(tokenInfo * dest,tokenInfo * src,void * data CTAGS_ATTR_UNUSED)164 static void copyToken (tokenInfo *dest, tokenInfo *src, void *data CTAGS_ATTR_UNUSED)
165 {
166 	DTD (dest)->scopeIndex = DTD (src)->scopeIndex;
167 }
168 
readToken(tokenInfo * const token,void * data CTAGS_ATTR_UNUSED)169 static void readToken (tokenInfo *const token, void *data CTAGS_ATTR_UNUSED)
170 {
171 	int c, c0;
172 
173 	token->type		= TOKEN_UNDEFINED;
174 	token->keyword	= KEYWORD_NONE;
175 	vStringClear (token->string);
176 
177  retry:
178 	do {
179 		c = getcFromInputFile ();
180 	} while (c == ' ' || c == '\t' || c == '\f' || c == '\n');
181 
182 	token->lineNumber   = getInputLineNumber ();
183 	token->filePosition = getInputFilePosition ();
184 
185 	switch (c)
186 	{
187 	case EOF:
188 		token->type = TOKEN_EOF;
189 		break;
190 	case ';':
191 	case '&':
192 	case '%':
193 	case '>':
194 	case '#':
195 	case '?':
196 	case '[':
197 	case ']':
198 	case '|':
199 	case ',':
200 	case '(':
201 	case ')':
202 	case '+':
203 		token->type = c;
204 		break;
205 	case '<':
206 		c0 = getcFromInputFile();
207 		if (c0 == '!')
208 		{
209 			token->type = TOKEN_OPEN;
210 			break;
211 		}
212 		else
213 		{
214 			ungetcToInputFile (c0);
215 			token->type = c;
216 			break;
217 		}
218 	case '-':
219 		c0 = getcFromInputFile();
220 		if (c0 == '-')
221 		{
222 			int c1, c2;
223 
224 			while ( (c1 = getcFromInputFile()) != EOF )
225 			{
226 				if (c1 == '-')
227 				{
228 					c2 = getcFromInputFile();
229 					if (c2 == '-' || c2 == EOF)
230 						goto retry;
231 				}
232 			}
233 		}
234 		else
235 		{
236 			ungetcToInputFile (c0);
237 			token->type = c;
238 		}
239 		break;
240 	case '"':
241 	case '\'':
242 		token->type = TOKEN_STRING;
243 		while ((c0 = getcFromInputFile ()))
244 		{
245 			if (c0 == EOF || c0 == c)
246 				break;
247 			else
248 				tokenPutc(token, c0);
249 		}
250 		break;
251 	default:
252 		if (isIdentifierChar(c))
253 		{
254 			tokenPutc(token, c);
255 			while ((c = getcFromInputFile ()))
256 			{
257 				if (isIdentifierChar(c))
258 					tokenPutc(token, c);
259 				else
260 				{
261 					ungetcToInputFile (c);
262 					break;
263 				}
264 			}
265 			token->keyword = lookupKeyword (vStringValue (token->string),
266 											Lang_dtd);
267 			if (token->keyword == KEYWORD_NONE)
268 				token->type = TOKEN_IDENTIFIER;
269 			else
270 				token->type = TOKEN_KEYWORD;
271 
272 		}
273 		else
274 			token->type = c;
275 		break;
276 	}
277 }
278 
makeDtdTagMaybe(tagEntryInfo * const e,tokenInfo * const token,int kind,int role)279 static int makeDtdTagMaybe (tagEntryInfo *const e, tokenInfo *const token,
280 							int kind, int role)
281 {
282 	if (role == ROLE_DEFINITION_INDEX)
283 	{
284 		if (! DtdKinds[kind].enabled)
285 			return CORK_NIL;
286 	}
287 	else if (! (isXtagEnabled (XTAG_REFERENCE_TAGS)
288 				&& DtdKinds[kind].roles[role].enabled))
289 		return CORK_NIL;
290 
291 	initRefTagEntry (e, tokenString (token),
292 					 kind,
293 					 role);
294 	e->lineNumber = token->lineNumber;
295 	e->filePosition = token->filePosition;
296 	e->extensionFields.scopeIndex = DTD (token)->scopeIndex;
297 
298 	return makeTagEntry (e);
299 }
300 
backpatchEndField(int index,unsigned long lineNumber)301 static void backpatchEndField (int index, unsigned long lineNumber)
302 {
303 	tagEntryInfo *ep = getEntryInCorkQueue (index);
304 
305 	if (ep)
306 		ep->extensionFields.endLine = lineNumber;
307 }
308 
parseEntity(tokenInfo * const token)309 static void parseEntity (tokenInfo *const token)
310 {
311 	tagEntryInfo e;
312 	int index = CORK_NIL;
313 
314 	tokenRead (token);
315 	if (token->type == '%')
316 	{
317 		tokenRead (token);
318 		if (tokenIsType(token, IDENTIFIER))
319 			index = makeDtdTagMaybe (&e, token,
320 									 K_PARAMETER_ENTITY, ROLE_DEFINITION_INDEX);
321 	}
322 	else if (tokenIsType(token, IDENTIFIER))
323 		index = makeDtdTagMaybe (&e, token,
324 								 K_ENTITY, ROLE_DEFINITION_INDEX);
325 
326 	if (tokenSkipToType (token, TOKEN_CLOSE) && (index != CORK_NIL))
327 		backpatchEndField (index, token->lineNumber);
328 }
329 
parserParameterEntityRef(tokenInfo * const token)330 static tokenInfo *parserParameterEntityRef (tokenInfo *const token)
331 {
332 	tokenRead (token);
333 	if (tokenIsType(token, IDENTIFIER))
334 	{
335 		tokenInfo * identifier = newTokenByCopying (token);
336 
337 		tokenRead (token);
338 
339 		if (token->type == ';')
340 			return identifier;
341 		else
342 		{
343 			tokenDelete (identifier);
344 			return NULL;
345 		}
346 	}
347 	return NULL;
348 }
349 
parseElement(tokenInfo * const token,bool skipToClose)350 static void parseElement (tokenInfo *const token, bool skipToClose)
351 {
352 	tagEntryInfo e;
353 	int original_index;
354 
355 	if (skipToClose)
356 		original_index = (int)countEntryInCorkQueue ();
357 
358 	tokenRead (token);
359 	if (token->type == '%')
360 	{
361 		tokenInfo * identifier = parserParameterEntityRef (token);
362 		if (identifier)
363 		{
364 			makeDtdTagMaybe (&e, identifier,
365 							 K_PARAMETER_ENTITY,
366 							 DTD_PARAMETER_ENTITY_ELEMENT_NAME);
367 			tokenDelete (identifier);
368 		}
369 	}
370 	else if (tokenIsType(token, IDENTIFIER))
371 		makeDtdTagMaybe (&e, token, K_ELEMENT, ROLE_DEFINITION_INDEX);
372 	else if (token->type == '(')
373 	{
374 		do {
375 			parseElement (token, false);
376 		} while ((!tokenIsEOF (token))
377 				 && (token->type != ')'));
378 	}
379 
380 	if (skipToClose)
381 	{
382 		int current_index = (int)countEntryInCorkQueue ();
383 		if (tokenSkipToType (token, TOKEN_CLOSE)
384 			&& (current_index > original_index))
385 		{
386 			for (int index = original_index; index < current_index; index++)
387 				backpatchEndField (index, token->lineNumber);
388 		}
389 	}
390 }
391 
parseAttDefs(tokenInfo * const token)392 static void parseAttDefs (tokenInfo *const token)
393 {
394 	/*  [53]   	AttDef	   ::=   	S Name S AttType S DefaultDecl */
395 
396 	do {
397 		tokenRead (token);
398 
399 		/* Name */
400 		if (tokenIsType(token, IDENTIFIER))
401 		{
402 			tagEntryInfo e;
403 			makeDtdTagMaybe (&e, token,
404 							 K_ATTRIBUTE, ROLE_DEFINITION_INDEX);
405 		}
406 		else if (tokenIsKeyword(token, ATTR_TYPES)
407 				 || tokenIsKeyword(token, ENTITY))
408 			/* AttType -> just consuming */
409 			;
410 		else if (tokenIsKeyword(token, NOTATION))
411 		{
412 			/* AttType -> just consuming */
413 			tokenRead (token);
414 			if (token->type == '(')
415 				tokenSkipToType (token, ')');
416 		}
417 		else if (token->type == '(')
418 		{
419 			/* AttType, TODO: Enumerated members can be tagged. */
420 			tokenSkipToType (token, ')');
421 		}
422 		else if (token->type == '#')
423 		{
424 			/* DefaultDecl */
425 			tokenRead (token);
426 			if (tokenIsKeyword(token, FIXED))
427 				tokenRead (token);
428 			else if (tokenIsKeyword(token, ATTR_DEFAULT_DECLS))
429 			{
430 				/* Just consuming */
431 			}
432 		}
433 		else if (tokenIsType (token, STRING))
434 			;					/* DefaultDecl -> Just consuming */
435 		else if (token->type == '%')
436 		{
437 			tokenInfo * identifier = parserParameterEntityRef (token);
438 			if (identifier)
439 			{
440 				tagEntryInfo e;
441 				makeDtdTagMaybe (&e, identifier,
442 								 K_PARAMETER_ENTITY,
443 								 DTD_PARAMETER_ENTITY_PART_OF_ATT_DEF);
444 				tokenDelete (identifier);
445 			}
446 		}
447 		else if (tokenIsType(token, CLOSE))
448 		{
449 			DTD (token)->scopeIndex = CORK_NIL;
450 			tokenUnread (token);
451 			break;
452 		}
453 	} while (!tokenIsEOF (token));
454 }
455 
parseAttlist(tokenInfo * const token)456 static void parseAttlist (tokenInfo *const token)
457 {
458 	tagEntryInfo e;
459 	int index = CORK_NIL;
460 
461 	tokenRead (token);
462 	if (token->type == '%')
463 	{
464 		tokenRead (token);
465 		if (tokenIsType(token, IDENTIFIER))
466 		{
467 			tokenInfo * identifier = parserParameterEntityRef (token);
468 			if (identifier)
469 			{
470 				index = makeDtdTagMaybe (&e, identifier,
471 										 K_ENTITY,
472 										 DTD_PARAMETER_ENTITY_ELEMENT_NAME);
473 				tokenDelete (identifier);
474 
475 				DTD (token)->scopeIndex = index;
476 				parseAttDefs (token);
477 				DTD (token)->scopeIndex = CORK_NIL;
478 			}
479 		}
480 	}
481 	else if (tokenIsType(token, IDENTIFIER))
482 	{
483 		tokenInfo * element = newTokenByCopying (token);
484 
485 		index = makeDtdTagMaybe (&e, element,
486 								 K_ELEMENT, DTD_ELEMENT_ATT_OWNER);
487 		tokenDelete (element);
488 
489 		DTD (token)->scopeIndex = index;
490 		parseAttDefs (token);
491 		DTD (token)->scopeIndex = CORK_NIL;
492 	}
493 
494 	tokenSkipToType (token, TOKEN_CLOSE);
495 	backpatchEndField (index, token->lineNumber);
496 }
497 
parseNotation(tokenInfo * const token)498 static void parseNotation (tokenInfo *const token)
499 {
500 	int index = CORK_NIL;
501 	tagEntryInfo e;
502 
503 	tokenRead (token);
504 	if (tokenIsType(token, IDENTIFIER))
505 		index = makeDtdTagMaybe (&e, token,
506 								 K_NOTATION, ROLE_DEFINITION_INDEX);
507 
508 	tokenSkipToType (token, TOKEN_CLOSE);
509 	backpatchEndField (index, token->lineNumber);
510 }
511 
512 
513 static void parseSection (tokenInfo *const token);
514 
parseDtdTag1(tokenInfo * const token)515 static void parseDtdTag1 (tokenInfo *const token)
516 {
517 	if (tokenIsType(token, OPEN))
518 	{
519 		tokenRead (token);
520 		if (tokenIsKeyword (token, ELEMENT))
521 			parseElement(token, true);
522 		else if (tokenIsKeyword (token, ATTLIST))
523 			parseAttlist(token);
524 		else if (tokenIsKeyword (token, ENTITY))
525 			parseEntity(token);
526 		else if (tokenIsKeyword (token, NOTATION))
527 			parseNotation(token);
528 		else if (token->type == '[')
529 		{
530 			tokenRead (token);
531 			parseSection (token);
532 			tokenSkipToType (token, ']');
533 		}
534 		else if (!tokenIsType(token, CLOSE))
535 			tokenSkipToType (token, TOKEN_CLOSE);
536 	}
537 }
538 
parseSection(tokenInfo * const token)539 static void parseSection (tokenInfo *const token)
540 {
541 	if (tokenIsKeyword(token, IGNORE))
542 		tokenSkipToType (token, ']');
543 	else
544 	{
545 		if (tokenIsKeyword (token, INCLUDE))
546 		{
547 			tokenRead (token);
548 			if (token->type == '[')
549 			{
550 				do {
551 					tokenRead (token);
552 				} while ((!tokenIsEOF (token))
553 						 && (token->type != ']'));
554 			}
555 		}
556 		else if (token->type == '%')
557 		{
558 			tokenInfo *const condition = parserParameterEntityRef (token);
559 			if (condition)
560 			{
561 				tagEntryInfo e;
562 				int index = makeDtdTagMaybe (&e, condition,
563 											 K_PARAMETER_ENTITY,
564 											 DTD_PARAMETER_ENTITY_CONDITION);
565 				tokenDelete (condition);
566 				tokenRead (token);
567 				if (token->type == '[')
568 				{
569 					do {
570 						tokenRead (token);
571 						parseDtdTag1 (token);
572 					} while ((!tokenIsEOF (token))
573 							 && (token->type != ']'));
574 					if (token->type== ']')
575 						backpatchEndField (index, token->lineNumber);
576 				}
577 			}
578 		}
579 	}
580 }
581 
findDtdTags(void)582 static void findDtdTags (void)
583 {
584 	tokenInfo *const token = newDtdToken ();
585 
586 	do {
587 		tokenRead (token);
588 		parseDtdTag1 (token);
589 	} while (!tokenIsEOF (token));
590 
591 	tokenDelete (token);
592 
593 	flashTokenBacklog (&dtdTokenInfoClass);
594 }
595 
initialize(const langType language)596 static void initialize (const langType language)
597 {
598 	Lang_dtd = language;
599 }
600 
DtdParser(void)601 extern parserDefinition* DtdParser (void)
602 {
603 	parserDefinition* def = parserNew ("DTD");
604 
605 	/* File name patters are picked from Linux kernel. */
606 	static const char *const extensions [] = {
607 		"dtd",
608 		"mod",
609 		NULL
610 	};
611 
612 	def->initialize = initialize;
613 	def->parser     = findDtdTags;
614 
615 	def->kindTable      = DtdKinds;
616 	def->kindCount  = ARRAY_SIZE (DtdKinds);
617 	def->extensions = extensions;
618 
619 	def->keywordTable = DtdKeywordTable;
620 	def->keywordCount = ARRAY_SIZE (DtdKeywordTable);
621 
622 	def->useCork    = CORK_QUEUE;
623 	def->requestAutomaticFQTag = true;
624 
625 	return def;
626 }
627